16
16
import ConfigParser
17
17
18
18
# Keep in mind, if you use non-ASCII characters in defaults, you should change their object type from str ("") to unicode (u"") one
19
- # Except user_agent - this shoud stay str object ("")
20
- # And dont 't forget to specify correct source file encoding
19
+ # Except user_agent - this should stay str object ("")
20
+ # And don 't forget to specify correct source file encoding
21
21
22
22
# When current locale encoding can't be determined, fallback_encoding is used
23
23
# So it's actually better to set proper POSIX locale environment variables, instead of changing this default
@@ -49,7 +49,7 @@ def ProcessHostsFile(domain_tree, section, url, file, keep, encoding):
49
49
hosts_encoding = encoding
50
50
51
51
if url :
52
- SafePrint (u"Dowloading {}..." .format (section ))
52
+ SafePrint (u"Downloading {}..." .format (section ))
53
53
54
54
# Reasons behind converting back and forth to UTF-8:
55
55
# urllib2.quote and urllib.quote_plus choke on non-ASCII characters in unicode objects (any kind of str objects are ok)
@@ -104,7 +104,7 @@ def ProcessHostsFile(domain_tree, section, url, file, keep, encoding):
104
104
for line in hosts .readlines ():
105
105
if not re .match (white_pattern , line ):
106
106
line_match = block_pattern .match (line )
107
- line_items = line_match and line_match .group (2 ) is not None and line_match .group (2 ).strip ().split ()
107
+ line_items = line_match and line_match .group (4 ) is not None and line_match .group (4 ).strip ().split ()
108
108
if line_items :
109
109
prc_count += 1
110
110
for alias , hostname in enumerate (line_items ):
@@ -133,6 +133,12 @@ def ProcessHostsFile(domain_tree, section, url, file, keep, encoding):
133
133
134
134
if not keep : os .remove (hosts_path )
135
135
136
+ def GetConfigBoolean (config , section , option ):
137
+ try :
138
+ return bool (config .getfloat (section , option ))
139
+ except ValueError :
140
+ return config .getboolean (section , option )
141
+
136
142
def GetTimestamp (dt ):
137
143
return "{0} {1.day: >2} {1:%H:%M:%S %Y}" .format (rfc3164_months [dt .month - 1 ], dt )
138
144
@@ -145,7 +151,9 @@ def GetTimestamp(dt):
145
151
SafePrint (u"Licensed under BSD 2-Clause License" );
146
152
exit (1 )
147
153
148
- block_pattern = re .compile ("^\s*(0\.0\.0\.0|127\.\d{1,3}\.\d{1,3}\.\d{1,3}|::1|::)\s+([\w\s.-]+)#?" , re .UNICODE )
154
+ # Block pattern conforms (in a sane way) to RFC 4291 and ID draft-main-ipaddr-text-rep-02
155
+
156
+ block_pattern = re .compile ("^\s*(0+\.0+\.0+\.0+|127\.\d+\.\d+\.\d+|(0{0,4}:){1,7}(0{0,4}|0{0,3}1))\s+([\w\s.-]+)#?" , re .UNICODE )
149
157
white_pattern = re .compile ("^\s*#.*$|^\s*$" )
150
158
encoding_pattern = re .compile ("^([^']+)'[\w-]*'(.+)" )
151
159
rfc3164_months = ["Jan" , "Feb" , "Mar" , "Apr" , "May" , "Jun" , "Jul" , "Aug" , "Sep" , "Oct" , "Nov" , "Dec" ]
@@ -177,7 +185,7 @@ def GetTimestamp(dt):
177
185
for section in config .sections ():
178
186
action_file .write (u"# {}" .format (config .get (section , "Url" ) or config .get (section , "File" )) + os .linesep )
179
187
try :
180
- ProcessHostsFile (domain_tree , section , config .get (section , "Url" ), config .get (section , "File" ), config . getboolean ( section , "Keep" ), config .get (section , "Encoding" ))
188
+ ProcessHostsFile (domain_tree , section , config .get (section , "Url" ), config .get (section , "File" ), GetConfigBoolean ( config , section , "Keep" ), config .get (section , "Encoding" ))
181
189
except UnicodeError as e :
182
190
SafePrint (u"Codec error ({}): {}" .format (e .encoding , e .message or e .reason ))
183
191
except urllib2 .HTTPError as e :
0 commit comments