|
| 1 | +import bs4 |
1 | 2 | import requests
|
2 |
| -from bs4 import BeautifulSoup |
3 | 3 |
|
4 | 4 |
|
5 |
| -def imdb_top(imdb_top_n): |
| 5 | +def get_movie_data_from_soup(soup: bs4.element.ResultSet) -> dict[str, str]: |
| 6 | + return { |
| 7 | + "name": soup.h3.a.text, |
| 8 | + "genre": soup.find("span", class_="genre").text.strip(), |
| 9 | + "rating": soup.strong.text, |
| 10 | + "page_link": f"https://www.imdb.com{soup.a.get('href')}", |
| 11 | + } |
| 12 | + |
| 13 | + |
| 14 | +def get_imdb_top_movies(num_movies: int = 5) -> tuple: |
| 15 | + """Get the top num_movies most highly rated movies from IMDB and |
| 16 | + return a tuple of dicts describing each movie's name, genre, rating, and URL. |
| 17 | +
|
| 18 | + Args: |
| 19 | + num_movies: The number of movies to get. Defaults to 5. |
| 20 | +
|
| 21 | + Returns: |
| 22 | + A list of tuples containing information about the top n movies. |
| 23 | +
|
| 24 | + >>> len(get_imdb_top_movies(5)) |
| 25 | + 5 |
| 26 | + >>> len(get_imdb_top_movies(-3)) |
| 27 | + 0 |
| 28 | + >>> len(get_imdb_top_movies(4.99999)) |
| 29 | + 4 |
| 30 | + """ |
| 31 | + num_movies = int(float(num_movies)) |
| 32 | + if num_movies < 1: |
| 33 | + return () |
6 | 34 | base_url = (
|
7 |
| - f"https://www.imdb.com/search/title?title_type=" |
8 |
| - f"feature&sort=num_votes,desc&count={imdb_top_n}" |
| 35 | + "https://www.imdb.com/search/title?title_type=" |
| 36 | + f"feature&sort=num_votes,desc&count={num_movies}" |
| 37 | + ) |
| 38 | + source = bs4.BeautifulSoup(requests.get(base_url).content, "html.parser") |
| 39 | + return tuple( |
| 40 | + get_movie_data_from_soup(movie) |
| 41 | + for movie in source.find_all("div", class_="lister-item mode-advanced") |
9 | 42 | )
|
10 |
| - source = BeautifulSoup(requests.get(base_url).content, "html.parser") |
11 |
| - for m in source.findAll("div", class_="lister-item mode-advanced"): |
12 |
| - print("\n" + m.h3.a.text) # movie's name |
13 |
| - print(m.find("span", attrs={"class": "genre"}).text) # genre |
14 |
| - print(m.strong.text) # movie's rating |
15 |
| - print(f"https://www.imdb.com{m.a.get('href')}") # movie's page link |
16 |
| - print("*" * 40) |
17 | 43 |
|
18 | 44 |
|
19 | 45 | if __name__ == "__main__":
|
20 |
| - imdb_top(input("How many movies would you like to see? ")) |
| 46 | + import json |
| 47 | + |
| 48 | + num_movies = int(input("How many movies would you like to see? ")) |
| 49 | + print( |
| 50 | + ", ".join( |
| 51 | + json.dumps(movie, indent=4) for movie in get_imdb_top_movies(num_movies) |
| 52 | + ) |
| 53 | + ) |
0 commit comments