Skip to content

Commit b029407

Browse files
viskey98afeena
authored andcommitted
Update doc for css validation (#123)
* update doc for css validation * pep8 stype coding * minor change * pep8 coding update #2 * pep8 codin update #3 * pep8 codin update #4 * pep8 coding update #5
1 parent 031a7a9 commit b029407

File tree

4 files changed

+33
-26
lines changed

4 files changed

+33
-26
lines changed

clone.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,15 @@
3434
class Cloner(object):
3535
def __init__(self, root, max_depth, css_validate):
3636
self.visited_urls = []
37-
self.root, self.error_page = self.add_scheme(root)
37+
self.root, self.error_page = self.add_scheme(root)
3838
self.max_depth = max_depth
3939
self.moved_root = None
4040
if len(self.root.host) < 4:
4141
sys.exit('invalid taget {}'.format(self.root.host))
4242
self.target_path = '/opt/snare/pages/{}'.format(self.root.host)
4343

4444
if not os.path.exists(self.target_path):
45-
os.mkdir(self.target_path)
45+
os.mkdir(self.target_path)
4646
self.css_validate = css_validate
4747
self.new_urls = Queue()
4848
self.meta = {}
@@ -152,7 +152,7 @@ async def get_body(self, session):
152152
response = await session.get(current_url, headers={'Accept': 'text/html'})
153153
content_type = response.content_type
154154
data = await response.read()
155-
155+
156156
except (aiohttp.ClientError, asyncio.TimeoutError) as client_error:
157157
self.logger.error(client_error)
158158
else:
@@ -165,8 +165,8 @@ async def get_body(self, session):
165165
data = str(soup).encode()
166166
with open(os.path.join(self.target_path, hash_name), 'wb') as index_fh:
167167
index_fh.write(data)
168-
if content_type == 'text/css':
169-
css = cssutils.parseString(data, validate=self.css_validate)
168+
if content_type == 'text/css':
169+
css = cssutils.parseString(data, validate=self.css_validate)
170170
for carved_url in cssutils.getUrls(css):
171171
if carved_url.startswith('data'):
172172
continue
@@ -191,15 +191,16 @@ async def run(self):
191191
session = aiohttp.ClientSession()
192192
try:
193193
await self.new_urls.put((self.root, 0))
194-
await self.new_urls.put((self.error_page,0))
194+
await self.new_urls.put((self.error_page, 0))
195195
await self.get_body(session)
196196
except KeyboardInterrupt:
197197
raise
198198
finally:
199199
with open(os.path.join(self.target_path, 'meta.json'), 'w') as mj:
200200
json.dump(self.meta, mj)
201201
await session.close()
202-
202+
203+
203204
def str_to_bool(v):
204205
if v.lower() == 'true':
205206
return True
@@ -208,6 +209,7 @@ def str_to_bool(v):
208209
else:
209210
raise argparse.ArgumentTypeError('Boolean value expected')
210211

212+
211213
def main():
212214
if os.getuid() != 0:
213215
print('Clone has to be run as root!')
@@ -226,7 +228,7 @@ def main():
226228
if args.log_path:
227229
log_err = args.log_path + "clone.err"
228230
else:
229-
log_err = "/opt/snare/clone.err"
231+
log_err = "/opt/snare/clone.err"
230232
logger.Logger.create_clone_logger(log_err, __package__)
231233
print("Error logs will be stored in {}\n".format(log_err))
232234
try:
@@ -244,6 +246,6 @@ def main():
244246
/ / / / / / / // |/ / __/ / /_/ /
245247
/ /___ / /____ / /_/ // /| / /___/ _, _/
246248
/_____//______//_____//_/ |_/_____/_/ |_|
247-
249+
248250
""")
249251
main()

docs/source/cloner.rst

+1
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ Description
1212

1313
* **target** -- url of website to be cloned
1414
* **max--depth** -- maximum depth of the web-pages desired to be cloned (optional), default: full depth of the site
15+
* **css--validate** -- set whether css validation is required (optional), default: None

logger.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,51 @@
11
import logging
22
import logging.handlers
33

4+
45
class LevelFilter(logging.Filter):
56
"""Filters (lets through) all messages with level < LEVEL"""
67

78
def __init__(self, level):
89
self.level = level
910

1011
def filter(self, record):
11-
return record.levelno < self.level # "<" instead of "<=": since logger.setLevel is inclusive, this should be exclusive
12+
return record.levelno < self.level
13+
# "<" instead of "<=": since logger.setLevel is inclusive, this should be exclusive
14+
1215

1316
class Logger:
14-
17+
1518
@staticmethod
1619
def create_logger(debug_filename, err_filename, logger_name):
1720
logger = logging.getLogger(logger_name)
1821
logger.setLevel(logging.DEBUG)
1922
logger.propagate = False
2023
formatter = logging.Formatter(
21-
fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
22-
24+
fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
25+
2326
# ERROR log to 'snare.err'
2427
error_log_handler = logging.handlers.RotatingFileHandler(err_filename, encoding='utf-8')
2528
error_log_handler.setLevel(logging.ERROR)
2629
error_log_handler.setFormatter(formatter)
2730
logger.addHandler(error_log_handler)
28-
31+
2932
# DEBUG log to 'snare.log'
3033
debug_log_handler = logging.handlers.RotatingFileHandler(debug_filename, encoding='utf-8')
3134
debug_log_handler.setLevel(logging.DEBUG)
3235
debug_log_handler.setFormatter(formatter)
3336
max_level_filter = LevelFilter(logging.ERROR)
3437
debug_log_handler.addFilter(max_level_filter)
3538
logger.addHandler(debug_log_handler)
36-
39+
3740
return logger
38-
41+
3942
@staticmethod
4043
def create_clone_logger(err_filename, logger_name):
4144
logger = logging.getLogger(logger_name)
4245
formatter = logging.Formatter(
43-
fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
46+
fmt='%(asctime)s %(levelname)s:%(name)s:%(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
4447
# ERROR log to 'clone.err'
4548
error_log_handler = logging.handlers.RotatingFileHandler(err_filename, encoding='utf-8')
4649
error_log_handler.setLevel(logging.ERROR)
4750
error_log_handler.setFormatter(formatter)
4851
logger.addHandler(error_log_handler)
49-

snare.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def __init__(self, meta, run_args, debug=False, keep_alive=75, **kwargs):
5555
self.dir = '/opt/snare/pages/{}'.format(run_args.page_dir)
5656

5757
self.meta = meta
58-
58+
5959
self.logger = logging.getLogger(__name__)
6060

6161
self.sroute = StaticRoute(
@@ -223,9 +223,11 @@ async def parse_tanner_response(self, requested_name, detection):
223223
content = None
224224
status_code = 200
225225
headers = {}
226-
p = re.compile('/+') # Creating a regex object for the pattern of multiple contiguous forward slashes
227-
requested_name = p.sub('/', requested_name) # Substituting all occurrences of the pattern with single forward slash
228-
226+
# Creating a regex object for the pattern of multiple contiguous forward slashes
227+
p = re.compile('/+')
228+
# Substituting all occurrences of the pattern with single forward slash
229+
requested_name = p.sub('/', requested_name)
230+
229231
if detection['type'] == 1:
230232
query_start = requested_name.find('?')
231233
if query_start != -1:
@@ -235,7 +237,7 @@ async def parse_tanner_response(self, requested_name, detection):
235237
requested_name = self.run_args.index_page
236238
try:
237239
if requested_name[-1] == '/':
238-
requested_name = requested_name[:-1]
240+
requested_name = requested_name[:-1]
239241
requested_name = unquote(requested_name)
240242
file_name = self.meta[requested_name]['hash']
241243
content_type = self.meta[requested_name]['content_type']
@@ -248,7 +250,7 @@ async def parse_tanner_response(self, requested_name, detection):
248250
with open(path, 'rb') as fh:
249251
content = fh.read()
250252
content = await self.handle_html_content(content)
251-
253+
252254
else:
253255
path = os.path.join(self.dir, file_name)
254256
if os.path.isfile(path):
@@ -464,9 +466,9 @@ async def check_tanner():
464466
base_page_path = '/opt/snare/pages/'
465467
config = configparser.ConfigParser()
466468
config.read(os.path.join(base_path, args.config))
467-
469+
468470
log_debug = args.log_dir + "snare.log"
469-
log_err = args.log_dir + "snare.err"
471+
log_err = args.log_dir + "snare.err"
470472
logger.Logger.create_logger(log_debug, log_err, __package__)
471473

472474
if args.list_pages:

0 commit comments

Comments
 (0)