All necessary changes made

kaustubhgupta · kaustubhgupta · commit dd64bb4e218c · 2020-08-17T16:05:17.000+05:30
diff --git a/Web-Scraping/Medium-Articles-Details-Scrapping/.gitignore b/Web-Scraping/Medium-Articles-Details-Scrapping/.gitignore
@@ -1,3 +1,4 @@
 # Unnecesaary Files
 
-app/__pycahce__/
+app/__pycahce__/
+.idea
diff --git a/Web-Scraping/Medium-Articles-Details-Scrapping/README.md b/Web-Scraping/Medium-Articles-Details-Scrapping/README.md
@@ -11,3 +11,33 @@ This script will scrap details about medium articles published in a date range i
 - Open the run.py to add the dictionary of urls, date range and number of random dates.
 - Save the file.
 - Run the command: python run.py
+
+# About the Scrap class
+    A Scrapper to get details about medium articles published in a date range in a Publication by selecting random dates.
+
+    Attributes
+    ----------
+    urls_dict : dict
+        key-value pairs of the publication name with link. Example:
+        urls_dict={"The Startup":"https://medium.com/swlh"}
+
+    start_date : str
+        starting date of the search. Default: 2020-01-01
+
+    end_date : str
+        ending date of the search. Default: 2020-08-01
+
+    year : int
+        year in which search has to be done. Default: 2020
+
+    number: int
+        number of random dates you want to pick. Default: 10
+
+    Methods
+    -------
+    scrap():
+        Scrapping process will be initiated by this method.
+
+    dataframe():
+        Returns the dataframe object.
+
diff --git a/Web-Scraping/Medium-Articles-Details-Scrapping/app/__init__.py b/Web-Scraping/Medium-Articles-Details-Scrapping/app/__init__.py
@@ -6,37 +6,8 @@
 import itertools
 import time
 
-class Scrap:
-    """
-    A Scrapper to get details about medium articles published in a date range in a Publication by selecting random dates.
-
-    Attributes
-    ----------
-    urls_dict : dict
-        key-value pairs of the publication name with link. Example:
-        urls_dict={"The Startup":"https://medium.com/swlh"}
-
-    start_date : str
-        starting date of the search. Default: 2020-01-01
-
-    end_date : str
-        ending date of the search. Default: 2020-08-01
-
-    year : int
-        year in which search has to be done. Default: 2020
 
-    number: int
-        number of random dates you want to pick. Default: 10
-
-    Methods
-    -------
-    scrap():
-        Scrapping process will be initiated by this method.
-
-    dataframe():
-        Returns the dataframe object.
-
-    """
+class Scrap:
 
     def __init__(self, urls_dict, start_date='2020-01-01', end_date='2020-08-01', number=10, year=2020):
         self.urls = urls_dict
diff --git a/Web-Scraping/Medium-Articles-Details-Scrapping/run.py b/Web-Scraping/Medium-Articles-Details-Scrapping/run.py
@@ -1,10 +1,31 @@
 from app import Scrap
 
+print('-----------------')
+pub_name = input('Enter the comma seperated list of publication names(The Startup, Medium ...): ').split(',')
+pub_link = input('Enter the comma seperated links of publications (https://medium.com/swlh, https://towardsdatascience.com  ...): ').split(',')
 
-a = Scrap(urls_dict={"Towards Data Science": "https://towardsdatascience.com",
-                     "The Startup":"https://medium.com/swlh",
-                     }, number=50,
-                     start_date='2019-01-01', end_date='2019-08-01',year=2019)
+if len(pub_name) != len(pub_link):
+    print('Please Enter links of all publications!')
+
+pub_dict = {i: j for i, j in zip(pub_name, pub_link)}
+
+choice = input("The default information passed is:\nNumber=5\nstart_date='2019-01-01'\nend_date='2019-08-01'\nyear=2019\n\nDo you want to change it? (Y/N): ")
+
+if choice == 'Y':
+    s_date = input("Enter new start date in format (YYYY-MM-DD): ")
+    e_date = input("Enter new end date in format (YYYY-MM-DD): ")
+    new_year = int(input("Enter year: "))
+    num = int(input("Enter number of random samples: "))
+else:
+    s_date = '2019-01-01'
+    e_date = '2019-08-01'
+    new_year = 2020
+    num = 5
+
+print('Process started ...')
+a = Scrap(urls_dict=pub_dict, number=num, start_date=s_date, end_date=e_date, year=new_year)
 a.scrap()
 a.dataframe().to_csv('results.csv')
 print(a.dataframe())
+print('-----------------')
+print('Process ended... Thanks for using!')