|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +#Imports and dependencies |
| 4 | + |
| 5 | +import requests |
| 6 | +from bs4 import BeautifulSoup |
| 7 | +import re |
| 8 | +import csv |
| 9 | + |
| 10 | +def Euler(): |
| 11 | + |
| 12 | + #The contents are written into a CSV file |
| 13 | + #Each question has a serial number, name of the problem and description of the problem |
| 14 | + |
| 15 | + with open('Project_Euler.csv', 'w', newline='') as file: |
| 16 | + writer = csv.writer(file) |
| 17 | + writer.writerow(["Problem Number", "Name" , "Description"]) |
| 18 | + |
| 19 | + #There are 15 pages in all, the page number is appended to the URL |
| 20 | + start = 1 |
| 21 | + pages = 15 |
| 22 | + |
| 23 | + for page in range(start , pages + start): |
| 24 | + |
| 25 | + #Response is got from each page, the questions are then searched for |
| 26 | + page_url = "https://projecteuler.net/archives;page="+ str(page) |
| 27 | + response = requests.get(page_url) |
| 28 | + soup = BeautifulSoup(response.text,"html.parser") |
| 29 | + |
| 30 | + #All the questions are located within the <table> tag |
| 31 | + #This information can be found out by using inspect element, Ctrl+Shift+I |
| 32 | + |
| 33 | + for link in soup.find('table' , attrs={"id" : "problems_table"}).find_all('a'): |
| 34 | + |
| 35 | + #The link to the question is located in a <a> tag |
| 36 | + question_url = "https://projecteuler.net/" + link['href'] |
| 37 | + |
| 38 | + #The name and question number are obtained |
| 39 | + question_number = link['href'].split('=')[-1] |
| 40 | + question_name = link.string |
| 41 | + |
| 42 | + ques_response = requests.get(question_url) |
| 43 | + ques_contents = BeautifulSoup(ques_response.text, "html.parser") |
| 44 | + description = '' |
| 45 | + |
| 46 | + #In each question element, the description is mentioned in the <div> tag |
| 47 | + |
| 48 | + for content in ques_contents.find("div" , attrs={"class":"problem_content"}).children: |
| 49 | + |
| 50 | + #The content between the tags are obtained getting rid of the tag elements |
| 51 | + |
| 52 | + content = re.sub(r'\<.*?>', r' ', str(content)) |
| 53 | + description += content |
| 54 | + |
| 55 | + #Each entry is written into the file |
| 56 | + |
| 57 | + writer.writerow([question_number, question_name , description]) |
| 58 | + |
| 59 | +Euler() |
0 commit comments