-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
44 lines (36 loc) · 1.27 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
import pymongo
from selenium import webdriver
from bs4 import BeautifulSoup
client = pymongo.MongoClient('mongodb://localhost:27017/')
catcher_db = client['truck_catcher_db']
id_collect = catcher_db['id_collect']
error_collection = catcher_db['error_collection']
def get_html(url):
print(url)
option = webdriver.ChromeOptions()
# option.add_argument('headless')
option.add_argument('process-per-site')
driver = webdriver.Chrome(chrome_options=option)
driver.implicitly_wait(60)
driver.get(url)
html_str = driver.page_source
return BeautifulSoup(html_str, 'html.parser')
def get_next_id(collect_name):
ret = id_collect.find_and_modify({"_id": collect_name},
{"$inc": {
"sequence_value": 1
}},
safe=True,
new=True)
if ret:
return ret.get("sequence_value", "default")
id_collect.insert_one(({'_id': collect_name, 'sequence_value': 0}))
return 0
def except_handler(url, target_coll):
error_collection.insert_one({
"url": url,
"collection": target_coll,
"version": 0
})