1616import ConfigParser
1717
1818# Keep in mind, if you use non-ASCII characters in defaults, you should change their object type from str ("") to unicode (u"") one
19- # Except user_agent - this shoud stay str object ("")
20- # And dont 't forget to specify correct source file encoding
19+ # Except user_agent - this should stay str object ("")
20+ # And don 't forget to specify correct source file encoding
2121
2222# When current locale encoding can't be determined, fallback_encoding is used
2323# So it's actually better to set proper POSIX locale environment variables, instead of changing this default
@@ -49,7 +49,7 @@ def ProcessHostsFile(domain_tree, section, url, file, keep, encoding):
4949 hosts_encoding = encoding
5050
5151 if url :
52- SafePrint (u"Dowloading {}..." .format (section ))
52+ SafePrint (u"Downloading {}..." .format (section ))
5353
5454 # Reasons behind converting back and forth to UTF-8:
5555 # urllib2.quote and urllib.quote_plus choke on non-ASCII characters in unicode objects (any kind of str objects are ok)
@@ -104,7 +104,7 @@ def ProcessHostsFile(domain_tree, section, url, file, keep, encoding):
104104 for line in hosts .readlines ():
105105 if not re .match (white_pattern , line ):
106106 line_match = block_pattern .match (line )
107- line_items = line_match and line_match .group (2 ) is not None and line_match .group (2 ).strip ().split ()
107+ line_items = line_match and line_match .group (4 ) is not None and line_match .group (4 ).strip ().split ()
108108 if line_items :
109109 prc_count += 1
110110 for alias , hostname in enumerate (line_items ):
@@ -133,6 +133,12 @@ def ProcessHostsFile(domain_tree, section, url, file, keep, encoding):
133133
134134 if not keep : os .remove (hosts_path )
135135
136+ def GetConfigBoolean (config , section , option ):
137+ try :
138+ return bool (config .getfloat (section , option ))
139+ except ValueError :
140+ return config .getboolean (section , option )
141+
136142def GetTimestamp (dt ):
137143 return "{0} {1.day: >2} {1:%H:%M:%S %Y}" .format (rfc3164_months [dt .month - 1 ], dt )
138144
@@ -145,7 +151,9 @@ def GetTimestamp(dt):
145151 SafePrint (u"Licensed under BSD 2-Clause License" );
146152 exit (1 )
147153
148- block_pattern = re .compile ("^\s*(0\.0\.0\.0|127\.\d{1,3}\.\d{1,3}\.\d{1,3}|::1|::)\s+([\w\s.-]+)#?" , re .UNICODE )
154+ # Block pattern conforms (in a sane way) to RFC 4291 and ID draft-main-ipaddr-text-rep-02
155+
156+ block_pattern = re .compile ("^\s*(0+\.0+\.0+\.0+|127\.\d+\.\d+\.\d+|(0{0,4}:){1,7}(0{0,4}|0{0,3}1))\s+([\w\s.-]+)#?" , re .UNICODE )
149157white_pattern = re .compile ("^\s*#.*$|^\s*$" )
150158encoding_pattern = re .compile ("^([^']+)'[\w-]*'(.+)" )
151159rfc3164_months = ["Jan" , "Feb" , "Mar" , "Apr" , "May" , "Jun" , "Jul" , "Aug" , "Sep" , "Oct" , "Nov" , "Dec" ]
@@ -177,7 +185,7 @@ def GetTimestamp(dt):
177185 for section in config .sections ():
178186 action_file .write (u"# {}" .format (config .get (section , "Url" ) or config .get (section , "File" )) + os .linesep )
179187 try :
180- ProcessHostsFile (domain_tree , section , config .get (section , "Url" ), config .get (section , "File" ), config . getboolean ( section , "Keep" ), config .get (section , "Encoding" ))
188+ ProcessHostsFile (domain_tree , section , config .get (section , "Url" ), config .get (section , "File" ), GetConfigBoolean ( config , section , "Keep" ), config .get (section , "Encoding" ))
181189 except UnicodeError as e :
182190 SafePrint (u"Codec error ({}): {}" .format (e .encoding , e .message or e .reason ))
183191 except urllib2 .HTTPError as e :
0 commit comments