-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathcheck-url.py
executable file
·82 lines (71 loc) · 2.72 KB
/
check-url.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python
# Copyright 2018 Jose Delarosa
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script will get a specific URL and either return basic information
# about the site or will throw out an error that URL does not exist.
# Seems to work even when using a proxy server, but it times out on some
# sites, not sure why.
import sys, os, urllib2
from urllib2 import Request, urlopen, URLError
def usage(me):
print "Usage: %s <url>" % (me)
exit(0)
# some sanity checking
def verifyurl(url):
str1 = "http://" # http protocol
str2 = "https://" # https protocol
if not url.startswith(str1) and not url.startswith(str2):
print "URL does not start with \"%s\" or \"%s\"" % (str1, str2)
sys.exit(1)
return
def getresp(url):
try:
response = urllib2.urlopen(url)
except URLError, e:
if hasattr(e, 'reason'):
print "Couldn't connect to server. Reason:", e.reason
sys.exit(1)
elif hasattr(e, 'code'):
print 'The server couldn\'t fulfill request. Code: ', e.code
sys.exit(1)
except KeyboardInterrupt: # let's add this for when it hangs
print "Aborted by user."
sys.exit(1)
return response
def printinfo(resp):
print "URL: ", resp.geturl() # Get the URL
print "Return code: ", resp.code # Get the code
# Get the Headers. This returns a dictionary-like object that describes the
# page fetched, particularly the headers sent by the server
print "Headers: ", resp.info()
print "Date: ", resp.info()['date'] # Get the date part of the header
print "Server: ", resp.info()['server'] # Get the server part of the header
html = resp.read()
#print "Data: ", html # Get all data - lots of stuff
print "Length :", len(html) # Get only the length
# Show that the file object is iterable; rstrip strips the trailing
# newlines and carriage returns before printing the output.
for line in resp:
print line.rstrip()
return
def main():
if len(sys.argv) < 2: # Check args
usage(sys.argv[0])
url = sys.argv[1]
verifyurl(url)
resp = getresp(url) # get 'response' from URL
printinfo(resp)
if __name__ == "__main__":
main()
# 2013.11.06 13:55:28 - JD