update pdf image extractor code to fit the new version of PyMuPDF

x4nth055 · x4nth055 · commit e840928637a2 · 2022-09-20T17:55:39.000+01:00
diff --git a/web-scraping/pdf-image-extractor/pdf_image_extractor.py b/web-scraping/pdf-image-extractor/pdf_image_extractor.py
@@ -10,17 +10,18 @@
 for page_index in range(len(pdf_file)):
     # get the page itself
     page = pdf_file[page_index]
-    image_list = page.getImageList()
+    # get image list
+    image_list = page.get_images()
     # printing number of images found in this page
     if image_list:
         print(f"[+] Found a total of {len(image_list)} images in page {page_index}")
     else:
         print("[!] No images found on page", page_index)
-    for image_index, img in enumerate(page.getImageList(), start=1):
+    for image_index, img in enumerate(image_list, start=1):
         # get the XREF of the image
         xref = img[0]
         # extract the image bytes
-        base_image = pdf_file.extractImage(xref)
+        base_image = pdf_file.extract_image(xref)
         image_bytes = base_image["image"]
         # get the image extension
         image_ext = base_image["ext"]