Skip to content

Commit 850ea72

Browse files
authored
Merge pull request #28 from ssiddhantsharma/patch-3
Create imdb.py
2 parents b4bb9e5 + 3562d1b commit 850ea72

File tree

1 file changed

+87
-0
lines changed

1 file changed

+87
-0
lines changed

Automation/src/imdb_scraper/imdb.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import requests #for imdb movie requests.
2+
3+
from bs4
4+
import BeautifulSoup #Beautifulsoup for desktop notif.
5+
6+
print('Enter movie/Tv series name')
7+
8+
movie = input()
9+
10+
print()
11+
12+
url = 'http://www.imdb.com/find?ref_=nv_sr_fn&q=' + movie + '&s=all' #imdb's search API.
13+
14+
def get_title(movie_url):
15+
16+
source_code = requests.get(movie_url) #getting movie imdb page url from user input.
17+
18+
plain_text = source_code.text #convert to plain text
19+
20+
soup = BeautifulSoup(plain_text, 'lxml')
21+
22+
for title in soup.findAll('div', {
23+
'class': 'title_wrapper'
24+
}):
25+
26+
return title.find('h1').text.rstrip()
27+
source_code = requests.get(url)
28+
29+
plain_text = source_code.text
30+
31+
soup = BeautifulSoup(plain_text, 'lxml')
32+
33+
for td in soup.findAll('td', {
34+
'class': 'result_text'
35+
}):
36+
37+
href = td.find('a')['href'] #find movie page in imdb
38+
39+
movie_page = 'http://www.imdb.com' + href
40+
41+
break
42+
43+
movie_name = get_title(movie_page)
44+
45+
def get_movie_data(movie_url): #getting movie data like reviews and genre.
46+
47+
source_code = requests.get(movie_url)
48+
49+
plain_text = source_code.text
50+
51+
soup = BeautifulSoup(plain_text, 'lxml')
52+
53+
for div in soup.findAll('div', {
54+
'class': 'ratingValue'
55+
}):
56+
57+
print('Imdb rating of the movie/Tv Series "' + movie_name + '" is: ', end = '') #showing movie rating as a desktop notification
58+
59+
print(div.text)
60+
61+
print()
62+
63+
for div in soup.findAll('div', {
64+
'class': 'summary_text'
65+
}):
66+
67+
print('Summary of the movie/Tv series:') #showing summary of movie as desktop notif.
68+
69+
print(div.text.lstrip())
70+
71+
get_movie_data(movie_page)
72+
73+
''
74+
'print_genre = soup.findAll('
75+
div ',{'
76+
class ':'
77+
subtext '})
78+
79+
for div in print_genre:
80+
81+
for genre in print_genre.findAll('a'):
82+
83+
print(genre.text, end = ' |') #showing genre.
84+
85+
print()
86+
''
87+
'

0 commit comments

Comments
 (0)