-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.py
60 lines (48 loc) · 1.49 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# -*- coding: utf-8 -*-
#https://github.com/NikolaiT/GoogleScraper
from GoogleScraper import scrape_with_config, GoogleSearchError
# See in the config.cfg file for possible values
keywordname='bits'
config = {
'SCRAPING': {
#'use_own_ip': 'True',
'keyword': keywordname,
'search_engines': 'google, yahoo',
'num_pages_for_keyword': 3,
'num_results_per_page': 10,
'verbosity':0,
'scrape_method': 'http',
}
}
try:
search = scrape_with_config(config)
except GoogleSearchError as e:
print(e)
# let's inspect what we got
i=1
j=1
for serp in search.serps:
if (serp.search_engine_name == "yahoo"):
f = open(keywordname+'_yahoo.txt','a')
else:
f = open(keywordname+'_google.txt','a')
#print(serp.scrape_method)
#print(serp.page_number)
#print(serp.requested_at)
#print(serp.num_results)
# print(serp.num_results)
# ... more attributes ...
for link in serp.links:
#write(link.title)
if (serp.search_engine_name == "yahoo"):
if serp.page_number is not None:
f.write(str(serp.page_number*100+link.rank)+'\t'+str(i)+'\t'+link.title+'\t')
i=i+1
else:
if serp.page_number is not None:
f.write(str(serp.page_number*100+link.rank)+'\t'+str(j)+'\t'+link.title+'\t')
j=j+1
if(link.snippet is None):
f.write(' \n')
else:
f.write(str(link.snippet)+'\n')