Skip to content

Commit

Permalink
[instagram] fix extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
soimort committed Jul 1, 2022
1 parent a47960f commit d661c95
Showing 1 changed file with 34 additions and 44 deletions.
78 changes: 34 additions & 44 deletions src/you_get/extractors/instagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,60 +10,50 @@ def instagram_download(url, output_dir='.', merge=True, info_only=False, **kwarg

vid = r1(r'instagram.com/\w+/([^/]+)', url)
description = r1(r'<meta property="og:title" content="([^"]*)"', cont) or \
r1(r'<title>\s([^<]*)</title>', cont) # with logged-in cookies
r1(r'<title>([^<]*)</title>', cont) # with logged-in cookies
title = "{} [{}]".format(description.replace("\n", " "), vid)

stream = r1(r'<meta property="og:video" content="([^"]*)"', cont)
if stream:
_, ext, size = url_info(stream)

print_info(site_info, title, ext, size)
if not info_only:
download_urls([stream], title, ext, size, output_dir, merge=merge)
else:
data = re.search(r'window\._sharedData\s*=\s*(.*);</script>', cont)
try:
info = json.loads(data.group(1))
post = info['entry_data']['PostPage'][0]
assert post['items']
except:
# with logged-in cookies
data = re.search(r'window\.__additionalDataLoaded\(\'[^\']+\',(.*)\);</script>', cont)
if data is not None:
log.e('[Warning] Cookies needed.')
post = json.loads(data.group(1))

for item in post['items']:
code = item['code']
carousel_media = item.get('carousel_media') or [item]
for i, media in enumerate(carousel_media):
title = '%s [%s]' % (code, i)
image_url = media['image_versions2']['candidates'][0]['url']
ext = image_url.split('?')[0].split('.')[-1]
size = int(get_head(image_url)['Content-Length'])
appId = r1(r'"appId":"(\d+)"', cont)
media_id = r1(r'"media_id":"(\d+)"', cont)

api_url = 'https://i.instagram.com/api/v1/media/%s/info/' % media_id
try:
api_cont = get_content(api_url, headers={**fake_headers, **{'x-ig-app-id': appId}})
except:
log.wtf('[Error] Please specify a cookie file.')
post = json.loads(api_cont)

for item in post['items']:
code = item['code']
carousel_media = item.get('carousel_media') or [item]
for i, media in enumerate(carousel_media):
title = '%s [%s]' % (code, i)
image_url = media['image_versions2']['candidates'][0]['url']
ext = image_url.split('?')[0].split('.')[-1]
size = int(get_head(image_url)['Content-Length'])

print_info(site_info, title, ext, size)
if not info_only:
download_urls(urls=[image_url],
title=title,
ext=ext,
total_size=size,
output_dir=output_dir)

# download videos (if any)
if 'video_versions' in media:
video_url = media['video_versions'][0]['url']
ext = video_url.split('?')[0].split('.')[-1]
size = int(get_head(video_url)['Content-Length'])

print_info(site_info, title, ext, size)
if not info_only:
download_urls(urls=[image_url],
download_urls(urls=[video_url],
title=title,
ext=ext,
total_size=size,
output_dir=output_dir)

# download videos (if any)
if 'video_versions' in media:
video_url = media['video_versions'][0]['url']
ext = video_url.split('?')[0].split('.')[-1]
size = int(get_head(video_url)['Content-Length'])

print_info(site_info, title, ext, size)
if not info_only:
download_urls(urls=[video_url],
title=title,
ext=ext,
total_size=size,
output_dir=output_dir)

site_info = "Instagram.com"
download = instagram_download
download_playlist = playlist_not_supported('instagram')

0 comments on commit d661c95

Please sign in to comment.