Skip to content

Commit 3738588

Browse files
appledoracclauss
andauthored
Improved readability of web_programming/get_imdbtop.py and added documentations with doctests (TheAlgorithms#4855)
* improved readability of the existing method by reformatting, adding documentations with doctests. * improved readability of the existing method by reformatting, adding documentations with doctests. * fixed typo in test * added doctest to parse dictionary method * added doctest to parse dictionary method * Changed return type, removed print() from method and implemented doctests as suggested * Fixed doctest error, removed print() from method, created new script as suggested * Update get_imdbtop.py * Fix typo discovered by codespell * return () Co-authored-by: Christian Clauss <[email protected]>
1 parent 1522617 commit 3738588

File tree

1 file changed

+45
-12
lines changed

1 file changed

+45
-12
lines changed

web_programming/get_imdbtop.py

+45-12
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,53 @@
1+
import bs4
12
import requests
2-
from bs4 import BeautifulSoup
33

44

5-
def imdb_top(imdb_top_n):
5+
def get_movie_data_from_soup(soup: bs4.element.ResultSet) -> dict[str, str]:
6+
return {
7+
"name": soup.h3.a.text,
8+
"genre": soup.find("span", class_="genre").text.strip(),
9+
"rating": soup.strong.text,
10+
"page_link": f"https://www.imdb.com{soup.a.get('href')}",
11+
}
12+
13+
14+
def get_imdb_top_movies(num_movies: int = 5) -> tuple:
15+
"""Get the top num_movies most highly rated movies from IMDB and
16+
return a tuple of dicts describing each movie's name, genre, rating, and URL.
17+
18+
Args:
19+
num_movies: The number of movies to get. Defaults to 5.
20+
21+
Returns:
22+
A list of tuples containing information about the top n movies.
23+
24+
>>> len(get_imdb_top_movies(5))
25+
5
26+
>>> len(get_imdb_top_movies(-3))
27+
0
28+
>>> len(get_imdb_top_movies(4.99999))
29+
4
30+
"""
31+
num_movies = int(float(num_movies))
32+
if num_movies < 1:
33+
return ()
634
base_url = (
7-
f"https://www.imdb.com/search/title?title_type="
8-
f"feature&sort=num_votes,desc&count={imdb_top_n}"
35+
"https://www.imdb.com/search/title?title_type="
36+
f"feature&sort=num_votes,desc&count={num_movies}"
37+
)
38+
source = bs4.BeautifulSoup(requests.get(base_url).content, "html.parser")
39+
return tuple(
40+
get_movie_data_from_soup(movie)
41+
for movie in source.find_all("div", class_="lister-item mode-advanced")
942
)
10-
source = BeautifulSoup(requests.get(base_url).content, "html.parser")
11-
for m in source.findAll("div", class_="lister-item mode-advanced"):
12-
print("\n" + m.h3.a.text) # movie's name
13-
print(m.find("span", attrs={"class": "genre"}).text) # genre
14-
print(m.strong.text) # movie's rating
15-
print(f"https://www.imdb.com{m.a.get('href')}") # movie's page link
16-
print("*" * 40)
1743

1844

1945
if __name__ == "__main__":
20-
imdb_top(input("How many movies would you like to see? "))
46+
import json
47+
48+
num_movies = int(input("How many movies would you like to see? "))
49+
print(
50+
", ".join(
51+
json.dumps(movie, indent=4) for movie in get_imdb_top_movies(num_movies)
52+
)
53+
)

0 commit comments

Comments
 (0)