scrapfly
diff --git a/‎.github/workflows/test_scrapers.yaml
Lines changed: 12 additions & 0 deletions b/‎.github/workflows/test_scrapers.yaml
Lines changed: 12 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 17 additions & 0 deletions b/‎README.md
Lines changed: 17 additions & 0 deletions
diff --git a/‎youtube-scraper/README.md
Lines changed: 54 additions & 0 deletions b/‎youtube-scraper/README.md
Lines changed: 54 additions & 0 deletions
diff --git a/‎youtube-scraper/pyproject.toml
Lines changed: 34 additions & 0 deletions b/‎youtube-scraper/pyproject.toml
Lines changed: 34 additions & 0 deletions
@@ -158,6 +158,18 @@ jobs:
             test: test_page_scraping
           - project_dir: yelp-scraper
             test: test_search_scraping
+          - project_dir: youtube-scraper
+            test: test_video_scraping
+          - project_dir: youtube-scraper
+            test: test_comment_scraping
+          - project_dir: youtube-scraper
+            test: test_channel_scraping
+          - project_dir: youtube-scraper
+            test: test_channel_videos_scraping
+          - project_dir: youtube-scraper
+            test: test_search_scraping
+          - project_dir: youtube-scraper
+            test: test_shorts_scraping
           - project_dir: immoscout24-scraper
             test: test_search_scraping
           - project_dir: immoscout24-scraper
 
@@ -565,6 +565,23 @@ Below is the list of available web scrapers for the supported domains along with
     <td><img src="https://img.shields.io/badge/Yelp_scraper-success-brightgreen" alt="Yelp-scraper-status"></td>
 </tr>
 
+<tr>
+    <td><a href="/youtube-scraper/">YouTube.com</a></td>
+</a></td>
+    <td>
+    <ul>
+        <li><a href="./youtube-scraper/results/channel_videos.json">Channel videos</a></li>
+        <li><a href="./youtube-scraper/results/channels.json">Channel metadata</a></li>
+        <li><a href="./youtube-scraper/results/channel_videos.json">Channel videos</a></li>
+        <li><a href="./youtube-scraper/results/videos.json">Video metadata</a></li>
+        <li><a href="./youtube-scraper/results/comments.json">Video comments</a></li>
+        <li><a href="./youtube-scraper/results/shorts.json">Shorts' metadata</a></li>
+    </ul>
+    </td>
+    <td><img src="https://img.shields.io/badge/YouTube_scraper-success-brightgreen" alt="YouTube-scraper-status"></td>
+</tr>
+
+
 <tr>
     <td><a href="/zillow-scraper/">Zillow.com</a></td>
     <td><a href="https://scrapfly.io/blog/how-to-scrape-zillow/">How to Scrape Zillow Real Estate Property Data in Python</a></td>
 
@@ -0,0 +1,54 @@
+# YouTube.com Scraper
+
+This scraper is using [scrapfly.io](https://scrapfly.io/) and Python to scrape public YouTube.com video, channel, comments, search, and short videos.
+
+Full tutorial
+
+The scraping code is located in the `youtube.py` file. It's fully documented and simplified for educational purposes and the example scraper run code can be found in `run.py` file.
+
+This scraper scrapes:
+- YouTube video metadata
+- YouTube video comments
+- YouTube channel metadata
+- YouTube channel videos
+- YouTube search
+- YouTube shorts metadata
+
+For output examples, see the `./results` directory.
+
+
+## Fair Use Disclaimer
+
+Note that this code is provided free of charge as is, and Scrapfly does __not__ provide free web scraping support or consultation. For any bugs, see the issue tracker.
+
+## Setup and Use
+
+This YouTube.com scraper uses __Python 3.10__ with [scrapfly-sdk](https://pypi.org/project/scrapfly-sdk/) package which is used to scrape and parse YouTube's data.
+
+0. Ensure you have __Python 3.10__ and [poetry Python package manager](https://python-poetry.org/docs/#installation) on your system.
+1. Retrieve your Scrapfly API key from <https://scrapfly.io/dashboard> and set `SCRAPFLY_KEY` environment variable:
+    ```shell
+    $ export SCRAPFLY_KEY="YOUR SCRAPFLY KEY"
+    ```
+2. Clone and install Python environment:
+    ```shell
+    $ git clone https://github.com/scrapfly/scrapfly-scrapers.git
+    $ cd scrapfly-scrapers/youtube-scraper
+    $ poetry install
+    ```
+3. Run example scrape:
+    ```shell
+    $ poetry run python run.py
+    ```
+4. Run tests:
+    ```shell
+    $ poetry install --with dev
+    $ poetry run pytest test.py
+    # or specific scraping areas
+    $ poetry run pytest test.py -k test_video_scraping
+    $ poetry run pytest test.py -k test_comment_scraping
+    $ poetry run pytest test.py -k test_channel_scraping
+    $ poetry run pytest test.py -k test_channel_videos_scraping
+    $ poetry run pytest test.py -k test_search_scraping
+    $ poetry run pytest test.py -k test_shorts_scraping
+    ```
@@ -0,0 +1,34 @@
+[tool.poetry]
+name = "scrapfly-youtube"
+version = "0.1.0"
+description = "demo web scraper for YouTube.com using Scrapfly"
+authors = ["Mazen Ramadan <[email protected]>"]
+license = "NPOS-3.0"
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "^3.10"
+scrapfly-sdk = {extras = ["all"], version = "^0.8.5"}
+loguru = "^0.7.0"
+
+[tool.poetry.group.dev.dependencies]
+black = "^23.3.0"
+ruff = "^0.0.269"
+cerberus = "^1.3.4"
+pytest = "^7.3.1"
+pytest-asyncio = "^0.21.0"
+pytest-rerunfailures = "^14.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.pytest.ini_options]
+python_files = "test.py"
+
+[tool.black]
+line-length = 120
+target-version = ['py37', 'py38', 'py39', 'py310', 'py311']
+
+[tool.ruff]
+line-length = 120