-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbrowse_games.py
74 lines (56 loc) · 1.74 KB
/
browse_games.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
import urllib
from bs4 import BeautifulSoup
import sys
PAGE = sys.argv[1]
f = urllib.urlopen("https://www.boardgamegeek.com/browse/boardgame/page/%s" %(PAGE))
s = f.read()
f.close()
soup = BeautifulSoup(s)
# Extract collection table
table = soup.find("table", attrs={"class":"collection_table"})
# Extract ranking (first column)
rank = [t.get_text().strip() for t in table.find_all("td", attrs={"class":"collection_rank"})]
# Estract figure link
figures = [t.find("img")["src"] if t.find("img") is not None else "N/A"
for t in table.find_all("td", attrs={"class":"collection_thumbnail"})
]
# 3 values: game ref, game name, year
object_data = [
(
t.find("a")["href"],
t.find("a").get_text(),
t.find("span").get_text().strip("()") if t.find("span") is not None else "N/A",
) for t in table.find_all("td", attrs={"class":"collection_objectname"})
]
# Extract geek ratings
geek_ratings = [t.get_text().strip()
for i,t in enumerate(table.find_all("td", attrs={"class":"collection_bggrating"}))
if i%3==0
]
# Extract average ratings
avg_ratings = [t.get_text().strip()
for i,t in enumerate(table.find_all("td", attrs={"class":"collection_bggrating"}))
if i%3==1
]
# Extract number of voters
num_voters = [t.get_text().strip()
for i,t in enumerate(table.find_all("td", attrs={"class":"collection_bggrating"}))
if i%3==2
]
# Extract shop ref
market = [t.find("a")["href"] for t in table.find_all("td", attrs={"class":"collection_shop"})]
# Print all info as table
for i in range(len(rank)):
line = u"\t".join((
rank[i],
figures[i],
object_data[i][0], # href
object_data[i][1], # game name
object_data[i][2], # year
geek_ratings[i],
avg_ratings[i],
num_voters[i],
market[i],
)).encode('utf-8').strip()
print line