Skip to content

Commit 12d6621

Browse files
committed
Updated IP addresses and boolean values parsing
1 parent 6bf7812 commit 12d6621

File tree

2 files changed

+19
-10
lines changed

2 files changed

+19
-10
lines changed

README

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,11 @@ current directory under filename 'hosts'.
107107

108108
By default, after script finishes processing hosts file, it deletes it. To
109109
control this behavior 'Keep' variable is used - if it translates to True
110-
(values '1', 'yes', 'true' and 'on'), file won't be deleted after being
111-
processed. If it translates to False (values '0', 'no', 'false' and 'off') or
112-
variable is omitted - default action takes place and file becomes deleted. In
113-
this example, hosts file won't be deleted after being processed.
110+
(values 'yes', 'true', 'on' and non-zero decimals), file won't be deleted after
111+
being processed. If it translates to False (values 'no', 'false', 'off' and
112+
zero equivalents) or variable is omitted - default action takes place and file
113+
becomes deleted. In this example, hosts file won't be deleted after being
114+
processed.
114115

115116
Hosts files can have wide variety of encodings. So, when reading hosts file,
116117
script uses encoding specified in Content-Type header or in current locale, if

adhosts2privoxy.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
import ConfigParser
1717

1818
# Keep in mind, if you use non-ASCII characters in defaults, you should change their object type from str ("") to unicode (u"") one
19-
# Except user_agent - this shoud stay str object ("")
20-
# And dont't forget to specify correct source file encoding
19+
# Except user_agent - this should stay str object ("")
20+
# And don't forget to specify correct source file encoding
2121

2222
# When current locale encoding can't be determined, fallback_encoding is used
2323
# So it's actually better to set proper POSIX locale environment variables, instead of changing this default
@@ -49,7 +49,7 @@ def ProcessHostsFile(domain_tree, section, url, file, keep, encoding):
4949
hosts_encoding = encoding
5050

5151
if url:
52-
SafePrint(u"Dowloading {}...".format(section))
52+
SafePrint(u"Downloading {}...".format(section))
5353

5454
# Reasons behind converting back and forth to UTF-8:
5555
# urllib2.quote and urllib.quote_plus choke on non-ASCII characters in unicode objects (any kind of str objects are ok)
@@ -104,7 +104,7 @@ def ProcessHostsFile(domain_tree, section, url, file, keep, encoding):
104104
for line in hosts.readlines():
105105
if not re.match(white_pattern, line):
106106
line_match = block_pattern.match(line)
107-
line_items = line_match and line_match.group(2) is not None and line_match.group(2).strip().split()
107+
line_items = line_match and line_match.group(4) is not None and line_match.group(4).strip().split()
108108
if line_items:
109109
prc_count += 1
110110
for alias, hostname in enumerate(line_items):
@@ -133,6 +133,12 @@ def ProcessHostsFile(domain_tree, section, url, file, keep, encoding):
133133

134134
if not keep: os.remove(hosts_path)
135135

136+
def GetConfigBoolean(config, section, option):
137+
try:
138+
return bool(config.getfloat(section, option))
139+
except ValueError:
140+
return config.getboolean(section, option)
141+
136142
def GetTimestamp(dt):
137143
return "{0} {1.day: >2} {1:%H:%M:%S %Y}".format(rfc3164_months[dt.month - 1], dt)
138144

@@ -145,7 +151,9 @@ def GetTimestamp(dt):
145151
SafePrint(u"Licensed under BSD 2-Clause License");
146152
exit(1)
147153

148-
block_pattern = re.compile("^\s*(0\.0\.0\.0|127\.\d{1,3}\.\d{1,3}\.\d{1,3}|::1|::)\s+([\w\s.-]+)#?", re.UNICODE)
154+
# Block pattern conforms (in a sane way) to RFC 4291 and ID draft-main-ipaddr-text-rep-02
155+
156+
block_pattern = re.compile("^\s*(0+\.0+\.0+\.0+|127\.\d+\.\d+\.\d+|(0{0,4}:){1,7}(0{0,4}|0{0,3}1))\s+([\w\s.-]+)#?", re.UNICODE)
149157
white_pattern = re.compile("^\s*#.*$|^\s*$")
150158
encoding_pattern = re.compile("^([^']+)'[\w-]*'(.+)")
151159
rfc3164_months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
@@ -177,7 +185,7 @@ def GetTimestamp(dt):
177185
for section in config.sections():
178186
action_file.write(u"# {}".format(config.get(section, "Url") or config.get(section, "File")) + os.linesep)
179187
try:
180-
ProcessHostsFile(domain_tree, section, config.get(section, "Url"), config.get(section, "File"), config.getboolean(section, "Keep"), config.get(section, "Encoding"))
188+
ProcessHostsFile(domain_tree, section, config.get(section, "Url"), config.get(section, "File"), GetConfigBoolean(config, section, "Keep"), config.get(section, "Encoding"))
181189
except UnicodeError as e:
182190
SafePrint(u"Codec error ({}): {}".format(e.encoding, e.message or e.reason))
183191
except urllib2.HTTPError as e:

0 commit comments

Comments
 (0)