-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape_political_compass.py
40 lines (31 loc) · 1.13 KB
/
scrape_political_compass.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import requests
from bs4 import BeautifulSoup
def fetch_questions(page_url, carried_ec=0, carried_soc=0, page_num=1):
data = {
'page': page_num,
'carried_ec': carried_ec,
'carried_soc': carried_soc,
'populated': '',
}
# Select the first option for each question on the page
for i in range(1, 7):
data[f'p{i}'] = 0
response = requests.post(page_url, data=data)
soup = BeautifulSoup(response.text, 'html.parser')
form = soup.find('form', {'method': 'POST'})
fieldset_blocks = form.find_all('fieldset', {'class': 'b1 pa2 mb1'})
questions = []
for i, fieldset_block in enumerate(fieldset_blocks, start=1):
legend = fieldset_block.find('legend')
question_text = legend.text.strip()
questions.append(question_text)
return questions
url = "https://www.politicalcompass.org/test/en"
all_questions = []
for i in range(1, 7):
questions = fetch_questions(url, page_num=i)
all_questions.extend(questions)
with open("data/political_compass_questions.txt", "w") as f:
for question in all_questions:
f.write(question)
f.write("\n")