5
5
import requests
6
6
from bs4 import BeautifulSoup
7
7
8
- #The content is written into a text file
9
8
10
- file = open ( "Medium_article_content.txt" , "w" )
9
+ def article_download ():
11
10
12
- #The URL of the article is entered here
13
- page_url = input ("Enter the URL of the Medium Article " )
11
+ #The content is written into a text file
14
12
15
- #Based on the response got from the URL, the content is loaded into response
13
+ file = open ( "Medium_article_content.txt" , "w" )
16
14
17
- response = requests .get (page_url )
15
+ #The URL of the article is entered here
16
+ page_url = input ("Enter the URL of the Medium Article " )
18
17
19
- #Beautiful soup is a library used for web scraping and parsing the contents of a web page
20
- #Here a html parser is used to parse through the content embedded in the html tags
18
+ #In the field User-Agent, the user must look for my-user-agent and get the headers
19
+ headers = { "User-Agent" : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0' }
21
20
22
- soup = BeautifulSoup ( response . text , "html.parser" )
21
+ #Based on the response got from the URL, the content is loaded into response
23
22
24
- #The content of the article is stored in the <article> tag
23
+ response = requests . get ( page_url , headers )
25
24
26
- for line in soup .find ('article' ).find ('div' ):
25
+ #Beautiful soup is a library used for web scraping and parsing the contents of a web page
26
+ #Here a html parser is used to parse through the content embedded in the html tags
27
+
28
+ soup = BeautifulSoup (response .text ,"html.parser" )
29
+
30
+ #The content of the article is stored in the <article> tag
31
+
32
+ for line in soup .find ('article' ).find ('div' ):
27
33
28
- #All the content is essentially stored between <p> tags
34
+ #All the content is essentially stored between <p> tags
29
35
30
- for content in line .find_all ('p' ):
36
+ for content in line .find_all ('p' ):
31
37
32
- #contents are written into a file
38
+ #contents are written into a file
33
39
34
- file .write (content .text + '\n ' )
40
+ file .write (content .text + '\n ' )
41
+
42
+ file .close ()
43
+ print ("Content downloaded" )
35
44
36
- file .close ()
45
+ if __name__ == "__main__" :
46
+ article_download ()
0 commit comments