From ed07f790cf0bc05872c512862e15c39bdcef6485 Mon Sep 17 00:00:00 2001
From: Sparsh Manni <105648172+sparshmanni@users.noreply.github.com>
Date: Tue, 31 Oct 2023 23:47:32 +0530
Subject: [PATCH] Create Web-scrap.py

---
 Python Projects/Web-scrap.py | 46 ++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 Python Projects/Web-scrap.py

diff --git a/Python Projects/Web-scrap.py b/Python Projects/Web-scrap.py
new file mode 100644
index 00000000..50a30f59
--- /dev/null
+++ b/Python Projects/Web-scrap.py	
@@ -0,0 +1,46 @@
+#REQUIRED
+#pip install requests
+#pip install BeautifulSoup4
+
+import requests
+from bs4 import BeautifulSoup
+
+# Function to extract and save text to a text file
+def extract_text_and_save(url):
+    response = requests.get(url)
+
+    if response.status_code == 200:
+        soup = BeautifulSoup(response.text, 'html.parser')
+        text_data = soup.get_text()
+
+        with open('extracted_text.txt', 'w', encoding='utf-8') as file:
+            file.write(text_data)
+        print("Text data extracted and saved to 'extracted_text.txt'.")
+    else:
+        print(f"Failed to retrieve the page. Status code: {response.status_code}")
+
+# Function to extract and save URLs to a text file
+def extract_urls_and_save(url):
+    response = requests.get(url)
+
+    if response.status_code == 200:
+        soup = BeautifulSoup(response.text, 'html.parser')
+        urls = [a['href'] for a in soup.find_all('a', href=True)]
+
+        with open('extracted_urls.txt', 'w', encoding='utf-8') as file:
+            for url in urls:
+                file.write(url + '\n')
+        print("URLs extracted and saved to 'extracted_urls.txt'.")
+    else:
+        print(f"Failed to retrieve the page. Status code: {response.status_code}")
+
+# Main program
+url = input("Enter the URL you want to scrape: ")
+choice = input("Do you want to extract 'text' or 'urls'? ")
+
+if choice.lower() == 'text':
+    extract_text_and_save(url)
+elif choice.lower() == 'urls':
+    extract_urls_and_save(url)
+else:
+    print("Invalid choice. Please choose 'text' or 'urls'.")