1+ import json
2+ import csv
3+ import urllib2
4+ import re
5+ import string
6+
7+ api_key = 'AIzaSyC4C3gzSSErzmc2FeUTleQqZGzw8-z-d6w'
8+ # AIzaSyCrFWiPfGcb5IsyS-wpAMk6eaNdMaC8pXs
9+ # AIzaSyDlZR2UhwQXeGw2IhCRnpoZB8LHZkagwI4
10+ # AIzaSyCXqjs2ZPb0PQReIWiENMAAkSx0_tvd4nk
11+ # AIzaSyCsE91PTD-XjTU3O_IZpY0PvVom2tw4Dr8
12+ # AIzaSyArrhkh49b2GNlC8UdLodq3uSpKzcgdzeg
13+ # AIzaSyCPcAKC74SzgQB8MSXKcPO6zIoVfqwlOig
14+ # AIzaSyDBkoHdD1Iw6HooMhMoObbHFCXHFSwKzIU
15+ # AIzaSyC4C3gzSSErzmc2FeUTleQqZGzw8-z-d6w
16+
17+ url = 'https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics,recordingDetails&id='
18+
19+ #strip punctuation
20+ regex = re .compile ('[%s]' % re .escape (string .punctuation ))
21+
22+ #id, title, description, like count, dislike count, location, tags
23+ with open ('tempList.txt' , 'rb' ) as f :
24+ l = [line .split (',' ) for line in f ]
25+
26+ # data = json.load(f)
27+ # l = []
28+ # t = open('tempList.txt', 'wb')
29+
30+ # for item in data['items']:
31+ # l.append([item['id']['videoId'], item['snippet']['title'], item['snippet']['description']])
32+ # t.write('%s,' % item['id']['videoId'])
33+ # try:
34+ # t.write('%s,' % regex.sub('', item['snippet']['title'].encode('utf8').decode('unicode_escape').encode('ascii','ignore')))
35+ # except:
36+ # print('title missing')
37+ # t.write(',')
38+ # try:
39+ # t.write('%s\n' % regex.sub('', item['snippet']['description'].encode('utf8').decode('unicode_escape').encode('ascii','ignore')))
40+ # except:
41+ # print('description missing')
42+ # t.write('\n')
43+
44+ # t.close()
45+
46+ with open ('videoStats.csv' , 'wb' ) as c :
47+ writer = csv .writer (c )
48+ writer .writerow (['Id' , 'Title' , 'Description' , 'LikeCount' , 'DislikeCount' , 'Location (latitude, longitude)' , 'Tags (; delimited string)' ])
49+
50+ for vid in l :
51+ try :
52+ stats = json .load (urllib2 .urlopen (url + vid [0 ] + '&key=' + api_key ))
53+ print (vid [0 ])
54+ except :
55+ print ('API key ran out' )
56+ print (l .index (vid [0 ]))
57+
58+ if stats ['items' ] == []:
59+ writer .writerow ([vid [0 ], vid [1 ].encode ('utf8' ), vid [2 ].encode ('utf8' ),0 ,0 ,'' ,'' ])
60+ continue
61+
62+ s = stats ['items' ][0 ]
63+ LC = 0
64+ DC = 0
65+ loc = ''
66+ tags = ''
67+
68+ if 'likeCount' in s ['statistics' ]:
69+ LC = s ['statistics' ]['likeCount' ]
70+ if 'dislikeCount' in s ['statistics' ]:
71+ DC = s ['statistics' ]['dislikeCount' ]
72+ if 'latitude' in s .get ('recordingDetails' , {}).get ('location' , {}):
73+ loc = str (s ['recordingDetails' ]['location' ]['latitude' ]) + ';' + str (s ['recordingDetails' ]['location' ]['longitude' ])
74+ if 'tags' in s ['snippet' ]:
75+ t = s ['snippet' ]['tags' ]
76+
77+ for i in range (len (t )):
78+ t [i ] = re .sub (r'http\S+|www.\S+' , '' , t [i ])
79+ t [i ] = regex .sub ('' , t [i ])
80+
81+ tags = ';' .join (t )
82+
83+ title = re .sub (r'http\S+|www.\S+' , '' , vid [1 ])
84+ descr = re .sub (r'http\S+|www.\S+' , '' , vid [2 ])
85+
86+ title = regex .sub ('' , title )
87+ descr = regex .sub ('' , descr )
88+
89+ writer .writerow ([vid [0 ], title .encode ('utf8' ).decode ('unicode_escape' ).encode ('ascii' ,'ignore' ), descr .encode ('utf8' ).decode ('unicode_escape' ).encode ('ascii' ,'ignore' ), LC , DC , loc , tags .encode ('utf8' ).decode ('unicode_escape' ).encode ('ascii' ,'ignore' )])
0 commit comments