x4nth055
diff --git a/Diff for: ‎README.md
+4-3 b/Diff for: ‎README.md
+4-3
diff --git a/Diff for: ‎web-scraping/download-images/README.md
+26 b/Diff for: ‎web-scraping/download-images/README.md
+26
diff --git a/Diff for: ‎web-scraping/download-images/download_images.py
+100 b/Diff for: ‎web-scraping/download-images/download_images.py
+100
diff --git a/Diff for: ‎web-scraping/download-images/requirements.txt
+3 b/Diff for: ‎web-scraping/download-images/requirements.txt
+3
diff --git a/Diff for: ‎general/weather-extractor/README.md renamed to ‎web-scraping/weather-extractor/README.md b/Diff for: ‎general/weather-extractor/README.md renamed to ‎web-scraping/weather-extractor/README.md
diff --git a/Diff for: ‎general/weather-extractor/requirements.txt renamed to ‎web-scraping/weather-extractor/requirements.txt b/Diff for: ‎general/weather-extractor/requirements.txt renamed to ‎web-scraping/weather-extractor/requirements.txt
diff --git a/Diff for: ‎general/weather-extractor/weather.py renamed to ‎web-scraping/weather-extractor/weather.py b/Diff for: ‎general/weather-extractor/weather.py renamed to ‎web-scraping/weather-extractor/weather.py
diff --git a/Diff for: ‎general/wikipedia-extractor/README.md renamed to ‎web-scraping/wikipedia-extractor/README.md b/Diff for: ‎general/wikipedia-extractor/README.md renamed to ‎web-scraping/wikipedia-extractor/README.md
diff --git a/Diff for: ‎general/wikipedia-extractor/requirements.txt renamed to ‎web-scraping/wikipedia-extractor/requirements.txt b/Diff for: ‎general/wikipedia-extractor/requirements.txt renamed to ‎web-scraping/wikipedia-extractor/requirements.txt
diff --git a/Diff for: ‎general/wikipedia-extractor/wikipedia_extractor.py renamed to ‎web-scraping/wikipedia-extractor/wikipedia_extractor.py b/Diff for: ‎general/wikipedia-extractor/wikipedia_extractor.py renamed to ‎web-scraping/wikipedia-extractor/wikipedia_extractor.py
diff --git a/Diff for: ‎general/youtube-extractor/README.md renamed to ‎web-scraping/youtube-extractor/README.md b/Diff for: ‎general/youtube-extractor/README.md renamed to ‎web-scraping/youtube-extractor/README.md
diff --git a/Diff for: ‎general/youtube-extractor/extract_video_info.py renamed to ‎web-scraping/youtube-extractor/extract_video_info.py b/Diff for: ‎general/youtube-extractor/extract_video_info.py renamed to ‎web-scraping/youtube-extractor/extract_video_info.py
diff --git a/Diff for: ‎general/youtube-extractor/requirements.txt renamed to ‎web-scraping/youtube-extractor/requirements.txt b/Diff for: ‎general/youtube-extractor/requirements.txt renamed to ‎web-scraping/youtube-extractor/requirements.txt
@@ -40,7 +40,8 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
     - [How to Download Files in Python](https://www.thepythoncode.com/article/download-files-python). ([code](general/file-downloader))
 
 - ### [Web Scraping](https://www.thepythoncode.com/topic/web-scraping)
-    - [How to Access Wikipedia in Python](https://www.thepythoncode.com/article/access-wikipedia-python). ([code](general/wikipedia-extractor))
-    - [How to Extract YouTube Data in Python](https://www.thepythoncode.com/article/get-youtube-data-python). ([code](general/youtube-extractor))
-    - [How to Extract Weather Data from Google in Python](https://www.thepythoncode.com/article/extract-weather-data-python). ([code](general/weather-extractor))
+    - [How to Access Wikipedia in Python](https://www.thepythoncode.com/article/access-wikipedia-python). ([code](web-scraping/wikipedia-extractor))
+    - [How to Extract YouTube Data in Python](https://www.thepythoncode.com/article/get-youtube-data-python). ([code](web-scraping/youtube-extractor))
+    - [How to Extract Weather Data from Google in Python](https://www.thepythoncode.com/article/extract-weather-data-python). ([code](web-scraping/weather-extractor))
+    - [How to Download All Images from a Web Page in Python](https://www.thepythoncode.com/article/download-web-page-images-python). ([code](web-scraping/download-images))
 
@@ -0,0 +1,26 @@
+# [How to Download All Images from a Web Page in Python](https://www.thepythoncode.com/article/download-web-page-images-python)
+To run this:
+- `pip3 install -r requirements.txt`
+- 
+    ```
+    python download_images --help
+    ```
+    **Output:**
+    ```
+    usage: download_images.py [-h] [-p PATH] url
+
+    This script downloads all images from a web page
+
+    positional arguments:
+    url                   The URL of the web page you want to download images
+
+    optional arguments:
+    -h, --help            show this help message and exit
+    -p PATH, --path PATH  The Directory you want to store your images, default
+                            is the domain of URL passed
+    ```
+- If you want to download all images from https://www.thepythoncode.com/topic/web-scraping for example:
+    ```
+    python download_images https://www.thepythoncode.com/topic/web-scraping
+    ```
+    A new folder `www.thepythoncode.com` will be created automatically that contains all the images of that web page.
@@ -0,0 +1,100 @@
+import requests
+import os
+from tqdm import tqdm
+from bs4 import BeautifulSoup as bs
+from urllib.parse import urljoin, urlparse
+
+
+def is_absolute(url):
+    """
+    Determines whether a `url` is absolute.
+    """
+    return bool(urlparse(url).netloc)
+
+
+def is_valid(url):
+    """
+    Checks whether `url` is a valid URL.
+    """
+    parsed = urlparse(url)
+    return bool(parsed.netloc) and bool(parsed.scheme)
+
+
+def get_all_images(url):
+    """
+    Returns all image URLs on a single `url`
+    """
+    soup = bs(requests.get(url).content, "html.parser")
+    urls = []
+    for img in tqdm(soup.find_all("img"), "Extracting images"):
+        img_url = img.attrs.get("src")
+
+        if not img_url:
+            # if img does not contain src attribute, just skip
+            continue
+
+        if not is_absolute(img_url):
+            # if img has relative URL, make it absolute by joining
+            img_url = urljoin(url, img_url)
+        # remove URLs like '/hsts-pixel.gif?c=3.2.5'
+        try:
+            pos = img_url.index("?")
+            img_url = img_url[:pos]
+        except ValueError:
+            pass
+        # finally, if the url is valid
+        if is_valid(img_url):
+            urls.append(img_url)
+    return urls
+
+
+def download(url, pathname):
+    """
+    Downloads a file given an URL and puts it in the folder `pathname`
+    """
+    # if path doesn't exist, make that path dir
+    if not os.path.isdir(pathname):
+        os.makedirs(pathname)
+    # download the body of response by chunk, not immediately
+    response = requests.get(url, stream=True)
+
+    # get the total file size
+    file_size = int(response.headers.get("Content-Length", 0))
+
+    # get the file name
+    filename = os.path.join(pathname, url.split("/")[-1])
+
+    # progress bar, changing the unit to bytes instead of iteration (default by tqdm)
+    progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024)
+    with open(filename, "wb") as f:
+        for data in progress:
+            # write data read to the file
+            f.write(data)
+            # update the progress bar manually
+            progress.update(len(data))
+
+
+def main(url, path):
+    # get all images
+    imgs = get_all_images(url)
+    for img in imgs:
+        # for each img, download it
+        download(img, path)
+    
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="This script downloads all images from a web page")
+    parser.add_argument("url", help="The URL of the web page you want to download images")
+    parser.add_argument("-p", "--path", help="The Directory you want to store your images, default is the domain of URL passed")
+    
+    args = parser.parse_args()
+    url = args.url
+    path = args.path
+
+    if not path:
+        # if path isn't specified, use the domain name of that url as the folder name
+        path = urlparse(url).netloc
+    
+    main(url, path)
@@ -0,0 +1,3 @@
+requests
+bs4
+tqdm