From 2a35441c26a41a9859373045812cece8f3222743 Mon Sep 17 00:00:00 2001
From: GudlaArunKumar <arunkumarakansha@gmail.com>
Date: Sun, 16 Aug 2020 11:29:41 +0530
Subject: [PATCH 1/2] Added Scraping Hacker news website script

---
 Web-Scraping/ScrappingHackerNewsWebsite.py | 54 ++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 Web-Scraping/ScrappingHackerNewsWebsite.py

diff --git a/Web-Scraping/ScrappingHackerNewsWebsite.py b/Web-Scraping/ScrappingHackerNewsWebsite.py
new file mode 100644
index 00000000..feaa8231
--- /dev/null
+++ b/Web-Scraping/ScrappingHackerNewsWebsite.py
@@ -0,0 +1,54 @@
+'''
+Scraping the first 2 pages of Hacker news website which gives lot of Tech news(as a articles)
+which has upvotes more than 100.User can just click on story link to see the article.
+'''
+
+'''
+Program uses requests module to get web data from URL and BeautifulSoup module to parse the web data
+as HTML using html parser.
+Install requests and BeautifulSoup module before executing!
+'''
+
+import requests
+from bs4 import BeautifulSoup
+import pprint  # prints the Final output in pretty manner which is inbuilt module in Python
+
+
+response1 = requests.get("https://news.ycombinator.com/news")   #Storing response of first page of website
+response2 = requests.get("https://news.ycombinator.com/news?p=2")  # Storing response of Second page of website
+
+response1_html_parser = BeautifulSoup(response1.text,'html.parser') #parsing the received web data by html parser
+response2_html_parser = BeautifulSoup(response2.text,'html.parser')
+
+linksInPage1 = response1_html_parser.select('.storylink') #All links of tech news are included in class "Storylink"
+linksInPage2 = response2_html_parser.select('.storylink')
+
+votesInPage1 = response1_html_parser.select('.subtext') #All votes are stored inside subclass "score" of class "subtext"
+votesInPage2 = response2_html_parser.select('.subtext')
+
+
+mega_link = linksInPage1 + linksInPage2  # Combining links of both pages
+#print(mega_link)
+mega_votes = votesInPage1 + votesInPage2
+
+def sorted_stories_list(hackerNewsList):
+    """Sorting the list in decreasing order
+       with respect to votes"""
+    return sorted(hackerNewsList,key=lambda x:x['votes'],reverse=True)
+
+def create_custom_hackernews(mega_link,mega_votes):
+    hackerNews =[]
+    for index,item in enumerate(mega_link):
+        title = mega_link[index].getText()  #To get title of the story(news)
+        href = mega_link[index].get('href',None) # To get link of stroy(news).If no link is present, default is None
+        vote = mega_votes[index].select('.score') # points are stored inside class "score" of class subtext,if points/votes not available, then class score wont be present.
+        if len(vote): #To check if class "score" exists or not
+            points = int(vote[0].getText().replace(' points', ''))
+            if points > 100:  # To get votes/points more than 100
+                hackerNews.append({'title': title, 'link': href,'votes':points})
+
+    return sorted_stories_list(hackerNews)
+
+if __name__ == '__main__':
+    # Prints story link, story title and its votes in a pretty manner
+    pprint.pprint(create_custom_hackernews(mega_link,mega_votes))
\ No newline at end of file

From eb92b6b1be9455fd68cb1702fb07bc7db3deccbf Mon Sep 17 00:00:00 2001
From: GudlaArunKumar <arunkumarakansha@gmail.com>
Date: Sun, 16 Aug 2020 17:02:32 +0530
Subject: [PATCH 2/2] Added Readme as per PR review

---
 Web-Scraping/ScrappingHackerNewsWebsite/README.md           | 6 ++++++
 .../ScrappingHackerNewsWebsite.py                           | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 Web-Scraping/ScrappingHackerNewsWebsite/README.md
 rename Web-Scraping/{ => ScrappingHackerNewsWebsite}/ScrappingHackerNewsWebsite.py (99%)

diff --git a/Web-Scraping/ScrappingHackerNewsWebsite/README.md b/Web-Scraping/ScrappingHackerNewsWebsite/README.md
new file mode 100644
index 00000000..9d2ced2c
--- /dev/null
+++ b/Web-Scraping/ScrappingHackerNewsWebsite/README.md
@@ -0,0 +1,6 @@
+# Scraping Hacker news Website
+
+Scraping the first 2 pages of Hacker news website wherein user can read Tech news(as a articles) which has upvotes more than 100 with help of Requests and 
+Beautiful Soup Modules. User can just click on story link to see the article.
+
+Link for Hacker news Website - https://news.ycombinator.com/
\ No newline at end of file
diff --git a/Web-Scraping/ScrappingHackerNewsWebsite.py b/Web-Scraping/ScrappingHackerNewsWebsite/ScrappingHackerNewsWebsite.py
similarity index 99%
rename from Web-Scraping/ScrappingHackerNewsWebsite.py
rename to Web-Scraping/ScrappingHackerNewsWebsite/ScrappingHackerNewsWebsite.py
index feaa8231..bb66926c 100644
--- a/Web-Scraping/ScrappingHackerNewsWebsite.py
+++ b/Web-Scraping/ScrappingHackerNewsWebsite/ScrappingHackerNewsWebsite.py
@@ -45,7 +45,7 @@ def create_custom_hackernews(mega_link,mega_votes):
         if len(vote): #To check if class "score" exists or not
             points = int(vote[0].getText().replace(' points', ''))
             if points > 100:  # To get votes/points more than 100
-                hackerNews.append({'title': title, 'link': href,'votes':points})
+                hackerNews.append({'title': title, 'link': href,'votes': points})
 
     return sorted_stories_list(hackerNews)