1
+ # Importing required libraries
2
+ from urllib .request import Request , urlopen
3
+ from bs4 import BeautifulSoup
4
+ from xlwt import Workbook
5
+ from datetime import datetime
6
+
7
+ #Tacking URL input from user
8
+ url = input ("Enter site to get links\n " )
9
+ links = []
10
+ while (len (url )== 0 ):
11
+ url = input ("Enter site to get links\n " )
12
+ try :
13
+ # Sending request to server using Urllib
14
+ req = Request (url , headers = {'User-Agent' : 'Mozilla/5.0' })
15
+ html_data = urlopen (req ).read ()
16
+
17
+ #Beautyfying all data to html form
18
+ soup = BeautifulSoup (html_data ,'html.parser' )
19
+
20
+ #Retriving all anchor tags in html data
21
+ tags = soup ('a' )
22
+
23
+ #Adding all href attribute values to list
24
+ for tag in tags :
25
+ if tag .has_attr ('href' ):
26
+ links .append (tag ['href' ])
27
+ except :
28
+ #Check if any errors
29
+ print ("Please check the URL properly" )
30
+ if (len (links )== 0 ):
31
+ print ("No links to fetch" )
32
+ else :
33
+ # Tackning workbook
34
+ wb = Workbook ()
35
+
36
+ #Creaing sheet in workbook
37
+ sheet1 = wb .add_sheet ('Links' )
38
+
39
+ #adding all data in list to excel sheet
40
+ for i in range (0 ,len (links )):
41
+ sheet1 .write (i ,0 ,links [i ])
42
+
43
+ #Getting date and time to create file
44
+ data_time = datetime .now ()
45
+ current_time = str (data_time .strftime ("%H-%M-%S" ))
46
+
47
+ #Adding time to file name and saving file locally
48
+ wb .save ('links for ' + current_time + '.xls' )
49
+ print ("Done writing data to excel sheet" )
0 commit comments