Updated script

namrun · namrun · commit 65967e86dd1e · 2020-08-16T17:47:24.000+05:30
diff --git a/Web-Scraping/medium-article-downloader.py b/Web-Scraping/medium-article-downloader.py
@@ -5,32 +5,42 @@
 import requests
 from bs4 import BeautifulSoup
 
-#The content is written into a text file
 
-file = open("Medium_article_content.txt", "w")
+def article_download():
 
-#The URL of the article is entered here
-page_url = input("Enter the URL of the Medium Article ")
+    #The content is written into a text file
 
-#Based on the response got from the URL, the content is loaded into response
+    file = open("Medium_article_content.txt", "w")
 
-response = requests.get(page_url)
+    #The URL of the article is entered here
+    page_url = input("Enter the URL of the Medium Article ")
 
-#Beautiful soup is a library used for web scraping and parsing the contents of a web page
-#Here a html parser is used to parse through the content embedded in the html tags
+    #In the field User-Agent, the user must look for my-user-agent and get the headers 
+    headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0'}
 
-soup = BeautifulSoup(response.text,"html.parser")
+    #Based on the response got from the URL, the content is loaded into response
 
-#The content of the article is stored in the <article> tag
+    response = requests.get(page_url, headers)
 
-for line in soup.find('article').find('div'):
+    #Beautiful soup is a library used for web scraping and parsing the contents of a web page
+    #Here a html parser is used to parse through the content embedded in the html tags
+
+    soup = BeautifulSoup(response.text,"html.parser")
+
+    #The content of the article is stored in the <article> tag
+
+    for line in soup.find('article').find('div'):
   
-  #All the content is essentially stored between <p> tags
+        #All the content is essentially stored between <p> tags
   
-  for content in line.find_all('p'):
+        for content in line.find_all('p'):
 
-    #contents are written into a file
+            #contents are written into a file
     
-    file.write(content.text + '\n')
+            file.write(content.text + '\n')
+
+    file.close()
+    print("Content downloaded")
 
-file.close()
+if __name__ == "__main__":
+    article_download()