Skip to content

Commit a104e87

Browse files
authored
Add files via upload
1 parent d8a23f1 commit a104e87

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

Diff for: DecodeAWebPage.py

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import requests #Getting html from a site
2+
from bs4 import BeautifulSoup #parses that disgustingness from requests (ty bs)
3+
4+
# Use the BeautifulSoup and requests Python packages to print out a list
5+
# of all the article titles on the New York Times homepage.
6+
# https://www.nytimes.com/
7+
8+
# I poked around a bit and even looked in the comments at some of the videos
9+
# but nothing worked, so I went to the solution on this one. 4 chilis, not
10+
# so disappointed in myself for finally breaking on a problem. I did add
11+
# comments so I would understand it.
12+
13+
if __name__ == '__main__':
14+
base_url = 'http://www.nytimes.com' # the url
15+
r = requests.get(base_url) #the source code
16+
soup = BeautifulSoup(r.text,features="html.parser") #the html goes into BeautifulSoup
17+
print("0") #Are we even getting to the loop? Yes
18+
print(soup.find_all(class_="story-heading")) #there's nothing in class_="story-heading"
19+
for story_heading in soup.find_all(class_="story-heading"): # a list of found bits of the soup that contains the class "story-heading"
20+
print("1") #the "Solution" isn't working, so I added this to test if the loop did anything, and it doesn't?
21+
if story_heading.a: #link version
22+
print(story_heading.a.text.replace("\n", " ").strip())
23+
else: #not a link version
24+
print(story_heading.contents[0].strip())
25+
26+
# MORAL OF THE CODE
27+
# It doesn't work

0 commit comments

Comments
 (0)