1
- import requests
1
+ from requests_html import HTMLSession
2
2
from bs4 import BeautifulSoup as bs
3
3
4
+ # init session
5
+ session = HTMLSession ()
6
+
4
7
5
8
def get_video_info (url ):
6
9
# download HTML code
7
- content = requests .get (url )
10
+ response = session .get (url )
11
+ # execute Javascript
12
+ response .html .render (sleep = 1 )
8
13
# create beautiful soup object to parse HTML
9
- soup = bs (content .content , "html.parser" )
14
+ soup = bs (response .html .html , "html.parser" )
15
+ # open("index.html", "w").write(response.html.html)
10
16
# initialize the result
11
17
result = {}
12
18
# video title
13
- result [' title' ] = soup .find ("span" , attrs = { "class" : "watch-title" } ).text .strip ()
19
+ result [" title" ] = soup .find ("h1" ).text .strip ()
14
20
# video views (converted to integer)
15
- result [' views' ] = int (soup .find ("div " , attrs = {"class" : "watch- view-count" }).text [: - 6 ]. replace ( "," , "" ))
21
+ result [" views" ] = int ('' . join ([ c for c in soup .find ("span " , attrs = {"class" : "view-count" }).text if c . isdigit () ] ))
16
22
# video description
17
- result [' description' ] = soup .find ("p " , attrs = { "id " : "eow-description " }).text
23
+ result [" description" ] = soup .find ("yt-formatted-string " , { "class " : "content " }).text
18
24
# date published
19
- result ['date_published' ] = soup .find ("strong" , attrs = {"class" : "watch-time-text" }).text
20
- # number of likes as integer
21
- result ['likes' ] = int (soup .find ("button" , attrs = {"title" : "I like this" }).text .replace ("," , "" ))
22
- # number of dislikes as integer
23
- result ['dislikes' ] = int (soup .find ("button" , attrs = {"title" : "I dislike this" }).text .replace ("," , "" ))
25
+ result ["date_published" ] = soup .find ("div" , {"id" : "date" }).text [1 :]
26
+ # get the duration of the video
27
+ result ["duration" ] = soup .find ("span" , {"class" : "ytp-time-duration" }).text
28
+ # get the video tags
29
+ result ["tags" ] = ', ' .join ([ meta .attrs .get ("content" ) for meta in soup .find_all ("meta" , {"property" : "og:video:tag" }) ])
30
+ # number of likes
31
+ text_yt_formatted_strings = soup .find_all ("yt-formatted-string" , {"id" : "text" , "class" : "ytd-toggle-button-renderer" })
32
+ result ["likes" ] = int ('' .join ([ c for c in text_yt_formatted_strings [0 ].attrs .get ("aria-label" ) if c .isdigit () ]))
33
+ # number of dislikes
34
+ result ["dislikes" ] = int ('' .join ([ c for c in text_yt_formatted_strings [1 ].attrs .get ("aria-label" ) if c .isdigit () ]))
35
+
24
36
# channel details
25
- channel_tag = soup .find ("div " , attrs = {"class" : "yt-user-info " }).find ("a" )
37
+ channel_tag = soup .find ("yt-formatted-string " , {"class" : "ytd-channel-name " }).find ("a" )
26
38
# channel name
27
39
channel_name = channel_tag .text
28
40
# channel URL
29
41
channel_url = f"https://www.youtube.com{ channel_tag ['href' ]} "
30
42
# number of subscribers as str
31
- channel_subscribers = soup .find ("span " , attrs = { "class " : "yt-subscriber -count" }).text .strip ()
43
+ channel_subscribers = soup .find ("yt-formatted-string " , { "id " : "owner-sub -count" }).text .strip ()
32
44
result ['channel' ] = {'name' : channel_name , 'url' : channel_url , 'subscribers' : channel_subscribers }
33
45
return result
34
46
@@ -46,10 +58,12 @@ def get_video_info(url):
46
58
# print in nice format
47
59
print (f"Title: { data ['title' ]} " )
48
60
print (f"Views: { data ['views' ]} " )
49
- print (f"\n Description: { data ['description' ]} \n " )
50
- print (data ['date_published' ])
61
+ print (f"Published at: { data ['date_published' ]} " )
62
+ print (f"Video Duration: { data ['duration' ]} " )
63
+ print (f"Video tags: { data ['tags' ]} " )
51
64
print (f"Likes: { data ['likes' ]} " )
52
65
print (f"Dislikes: { data ['dislikes' ]} " )
66
+ print (f"\n Description: { data ['description' ]} \n " )
53
67
print (f"\n Channel Name: { data ['channel' ]['name' ]} " )
54
68
print (f"Channel URL: { data ['channel' ]['url' ]} " )
55
69
print (f"Channel Subscribers: { data ['channel' ]['subscribers' ]} " )
0 commit comments