Skip to content

Commit d878429

Browse files
committed
add lazy loading support on downloading images tutorial
1 parent 14f8bb5 commit d878429

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

web-scraping/download-images/download_images.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def download(url, pathname):
5757
# progress bar, changing the unit to bytes instead of iteration (default by tqdm)
5858
progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024)
5959
with open(filename, "wb") as f:
60-
for data in progress:
60+
for data in progress.iterable:
6161
# write data read to the file
6262
f.write(data)
6363
# update the progress bar manually

web-scraping/download-images/download_images_js.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,14 @@ def get_all_images(url):
2323
session = HTMLSession()
2424
# make the HTTP request and retrieve response
2525
response = session.get(url)
26-
# execute Javascript
27-
response.html.render()
26+
# execute Javascript with a timeout of 20 seconds
27+
response.html.render(timeout=20)
2828
# construct the soup parser
2929
soup = bs(response.html.html, "html.parser")
3030
urls = []
3131
for img in tqdm(soup.find_all("img"), "Extracting images"):
32-
img_url = img.attrs.get("src") or img.attrs.get("data-src")
32+
img_url = img.attrs.get("src") or img.attrs.get("data-src") or img.attrs.get("data-original")
33+
print(img_url)
3334
if not img_url:
3435
# if img does not contain src attribute, just skip
3536
continue
@@ -68,7 +69,7 @@ def download(url, pathname):
6869
# progress bar, changing the unit to bytes instead of iteration (default by tqdm)
6970
progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024)
7071
with open(filename, "wb") as f:
71-
for data in progress:
72+
for data in progress.iterable:
7273
# write data read to the file
7374
f.write(data)
7475
# update the progress bar manually

0 commit comments

Comments
 (0)