From f770409cb8f21bf2daeb8b580a92f56417a6c4ba Mon Sep 17 00:00:00 2001 From: Angelo Dell'Aera Date: Wed, 21 Jun 2017 16:49:44 +0200 Subject: [PATCH 1/2] Multiple improvements - Code layout changes (PEP-8 coding style) - Python 3 compatibility - Parser class initialization refactoring - Compile the regexps with the IGNORECASE flag --- bin/iocp | 20 +- iocp/Output.py | 184 +++++++++-------- iocp/Parser.py | 506 +++++++++++++++++++++++----------------------- iocp/Whitelist.py | 12 -- iocp/__init__.py | 2 +- requirements.txt | 2 + setup.py | 40 ++-- 7 files changed, 382 insertions(+), 384 deletions(-) delete mode 100644 iocp/Whitelist.py diff --git a/bin/iocp b/bin/iocp index 8047f6f..f53b5ac 100755 --- a/bin/iocp +++ b/bin/iocp @@ -40,14 +40,14 @@ import argparse from iocp import Parser if __name__ == "__main__": - argparser = argparse.ArgumentParser() - argparser.add_argument('PATH', action='store', help='File/directory/URL to report(s)') - argparser.add_argument('-p', dest='INI', default=None, help='Pattern file') - argparser.add_argument('-i', dest='INPUT_FORMAT', default='pdf', help='Input format (pdf/txt/html)') - argparser.add_argument('-o', dest='OUTPUT_FORMAT', default='csv', help='Output format (csv/tsv/json/yara/netflow)') - argparser.add_argument('-d', dest='DEDUP', action='store_true', default=False, help='Deduplicate matches') - argparser.add_argument('-l', dest='LIB', default='pdfminer', help='PDF parsing library (pypdf2/pdfminer)') - args = argparser.parse_args() + argparser = argparse.ArgumentParser() + argparser.add_argument('PATH', action='store', help='File/directory/URL to report(s)') + argparser.add_argument('-p', dest='INI', default=None, help='Pattern file') + argparser.add_argument('-i', dest='INPUT_FORMAT', default='pdf', help='Input format (pdf/txt/html)') + argparser.add_argument('-o', dest='OUTPUT_FORMAT', default='csv', help='Output format (csv/tsv/json/yara/netflow)') + argparser.add_argument('-d', dest='DEDUP', action='store_true', default=False, help='Deduplicate matches') + argparser.add_argument('-l', dest='LIB', default='pdfminer', help='PDF parsing library (pypdf2/pdfminer)') + args = argparser.parse_args() - parser = Parser.Parser(args.INI, args.INPUT_FORMAT, args.DEDUP, args.LIB, args.OUTPUT_FORMAT) - parser.parse(args.PATH) \ No newline at end of file + parser = Parser.Parser(args.INI, args.INPUT_FORMAT, args.DEDUP, args.LIB, args.OUTPUT_FORMAT) + parser.parse(args.PATH) diff --git a/iocp/Output.py b/iocp/Output.py index 93b109b..dbebe80 100644 --- a/iocp/Output.py +++ b/iocp/Output.py @@ -7,112 +7,120 @@ OUTPUT_FORMATS = ('csv', 'tsv', 'json', 'yara', 'netflow', ) + def getHandler(output_format): - output_format = output_format.lower() - if output_format not in OUTPUT_FORMATS: - print("[WARNING] Invalid output format specified... using CSV") - output_format = 'csv' + output_format = output_format.lower() + if output_format not in OUTPUT_FORMATS: + print("[WARNING] Invalid output format specified... using CSV") + output_format = 'csv' + + handler_format = "OutputHandler_" + output_format + handler_class = getattr(sys.modules[__name__], handler_format) - handler_format = "OutputHandler_" + output_format - handler_class = getattr(sys.modules[__name__], handler_format) + return handler_class() - return handler_class() class OutputHandler(object): - def print_match(self, fpath, page, name, match): - pass + def print_match(self, fpath, page, name, match): + pass + + def print_header(self, fpath): + pass - def print_header(self, fpath): - pass + def print_footer(self, fpath): + pass - def print_footer(self, fpath): - pass + def print_error(self, fpath, exception): + print("[ERROR] %s" % (exception)) - def print_error(self, fpath, exception): - print("[ERROR] %s" % (exception)) class OutputHandler_csv(OutputHandler): - def __init__(self): - self.csv_writer = csv.writer(sys.stdout) + def __init__(self): + self.csv_writer = csv.writer(sys.stdout) - def print_match(self, fpath, page, name, match): - self.csv_writer.writerow((fpath, page, name, match)) + def print_match(self, fpath, page, name, match): + self.csv_writer.writerow((fpath, page, name, match)) + + def print_error(self, fpath, exception): + self.csv_writer.writerow((fpath, '0', 'error', exception)) - def print_error(self, fpath, exception): - self.csv_writer.writerow((fpath, '0', 'error', exception)) class OutputHandler_tsv(OutputHandler): - def __init__(self): - self.csv_writer = csv.writer(sys.stdout, delimiter = '\t') + def __init__(self): + self.csv_writer = csv.writer(sys.stdout, delimiter = '\t') + + def print_match(self, fpath, page, name, match): + self.csv_writer.writerow((fpath, page, name, match)) - def print_match(self, fpath, page, name, match): - self.csv_writer.writerow((fpath, page, name, match)) + def print_error(self, fpath, exception): + self.csv_writer.writerow((fpath, '0', 'error', exception)) - def print_error(self, fpath, exception): - self.csv_writer.writerow((fpath, '0', 'error', exception)) class OutputHandler_json(OutputHandler): - def print_match(self, fpath, page, name, match): - data = { - 'path' : fpath, - 'file' : os.path.basename(fpath), - 'page' : page, - 'type' : name, - 'match': match - } - - print(json.dumps(data)) - - def print_error(self, fpath, exception): - data = { - 'path' : fpath, - 'file' : os.path.basename(fpath), - 'type' : 'error', - 'exception' : exception - } - - print(json.dumps(data)) + def print_match(self, fpath, page, name, match): + data = { + 'path' : fpath, + 'file' : os.path.basename(fpath), + 'page' : page, + 'type' : name, + 'match': match + } + + print(json.dumps(data)) + + def print_error(self, fpath, exception): + data = { + 'path' : fpath, + 'file' : os.path.basename(fpath), + 'type' : 'error', + 'exception' : exception + } + + print(json.dumps(data)) + class OutputHandler_yara(OutputHandler): - def __init__(self): - self.rule_enc = ''.join(chr(c) if chr(c).isupper() or chr(c).islower() or chr(c).isdigit() else '_' for c in range(256)) - - def print_match(self, fpath, page, name, match): - if name in self.cnt: - self.cnt[name] += 1 - else: - self.cnt[name] = 1 - - string_id = "$%s%d" % (name, self.cnt[name]) - self.sids.append(string_id) - string_value = match.replace('\\', '\\\\') - print("\t\t%s = \"%s\"" % (string_id, string_value)) - - def print_header(self, fpath): - rule_name = os.path.splitext(os.path.basename(fpath))[0].translate(self.rule_enc) - - print("rule %s" % (rule_name)) - print("{") - print("\tstrings:") - - self.cnt = {} - self.sids = [] - - def print_footer(self, fpath): - cond = ' or '.join(self.sids) - - print("\tcondition:") - print("\t\t" + cond) - print("}") - + def __init__(self): + self.rule_enc = ''.join(chr(c) if chr(c).isupper() or chr(c).islower() or chr(c).isdigit() else '_' for c in range(256)) + + def print_match(self, fpath, page, name, match): + if name in self.cnt: + self.cnt[name] += 1 + else: + self.cnt[name] = 1 + + string_id = "$%s%d" % (name, self.cnt[name]) + self.sids.append(string_id) + string_value = match.replace('\\', '\\\\') + print("\t\t%s = \"%s\"" % (string_id, string_value)) + + def print_header(self, fpath): + rule_name = os.path.splitext(os.path.basename(fpath))[0].translate(self.rule_enc) + + print("rule %s" % (rule_name)) + print("{") + print("\tstrings:") + + self.cnt = {} + self.sids = [] + + def print_footer(self, fpath): + cond = ' or '.join(self.sids) + + print("\tcondition:") + print("\t\t" + cond) + print("}") + + class OutputHandler_netflow(OutputHandler): - def __init__(self): - print "host 255.255.255.255" - - def print_match(self, fpath, page, name, match): - data = { - 'type' : name, - 'match': match - } - if data["type"] == "IP": - print " or host %s " % data["match"] + def __init__(self): + print "host 255.255.255.255" + + def print_match(self, fpath, page, name, match): + data = { + 'type' : name, + 'match': match + } + + if data["type"] == "IP": + print " or host %s " % data["match"] diff --git a/iocp/Parser.py b/iocp/Parser.py index f8d084a..d5475fc 100644 --- a/iocp/Parser.py +++ b/iocp/Parser.py @@ -35,274 +35,280 @@ # ################################################################################################### -import os import sys +import os import fnmatch import glob import re -try: - import configparser as ConfigParser -except ImportError: - import ConfigParser -try: - from StringIO import StringIO -except ImportError: - from io import StringIO +import six.moves.configparser as ConfigParser +from six import StringIO # Import optional third-party libraries IMPORTS = [] + try: - from PyPDF2 import PdfFileReader - IMPORTS.append('pypdf2') + from PyPDF2 import PdfFileReader + IMPORTS.append('pypdf2') except ImportError: - pass + pass + try: - from pdfminer.pdfpage import PDFPage - from pdfminer.pdfinterp import PDFResourceManager - from pdfminer.converter import TextConverter - from pdfminer.pdfinterp import PDFPageInterpreter - from pdfminer.layout import LAParams - IMPORTS.append('pdfminer') + from pdfminer.pdfpage import PDFPage + from pdfminer.pdfinterp import PDFResourceManager + from pdfminer.converter import TextConverter + from pdfminer.pdfinterp import PDFPageInterpreter + from pdfminer.layout import LAParams + IMPORTS.append('pdfminer') except ImportError: - pass + pass + try: - from bs4 import BeautifulSoup - IMPORTS.append('beautifulsoup') + from bs4 import BeautifulSoup + IMPORTS.append('beautifulsoup') except ImportError: - pass + pass + try: - import requests - IMPORTS.append('requests') + import requests + IMPORTS.append('requests') except ImportError: - pass + pass # Import project source files import iocp from iocp import Output + class Parser(object): - patterns = {} - defang = {} - - def __init__(self, patterns_ini=None, input_format='pdf', dedup=False, library='pdfminer', output_format='csv', output_handler=None): - basedir = iocp.get_basedir() - - if patterns_ini is None: - patterns_ini = os.path.join(basedir, 'data/patterns.ini') - self.load_patterns(patterns_ini) - - wldir = os.path.join(basedir, 'data/whitelists') - self.whitelist = self.load_whitelists(wldir) - - self.dedup = dedup - if output_handler: - self.handler = output_handler - else: - self.handler = Output.getHandler(output_format) - - self.ext_filter = "*." + input_format - parser_format = "parse_" + input_format - try: - self.parser_func = getattr(self, parser_format) - except AttributeError: - e = 'Selected parser format is not supported: %s' % (input_format) - raise NotImplementedError(e) - - self.library = library - if input_format == 'pdf': - if library not in IMPORTS: - e = 'Selected PDF parser library not found: %s' % (library) - raise ImportError(e) - elif input_format == 'html': - if 'beautifulsoup' not in IMPORTS: - e = 'HTML parser library not found: BeautifulSoup' - raise ImportError(e) - - def load_patterns(self, fpath): - config = ConfigParser.ConfigParser() - with open(fpath) as f: - config.readfp(f) - - for ind_type in config.sections(): - try: - ind_pattern = config.get(ind_type, 'pattern') - except: - continue - - if ind_pattern: - ind_regex = re.compile(ind_pattern) - self.patterns[ind_type] = ind_regex - - try: - ind_defang = config.get(ind_type, 'defang') - except: - continue - - if ind_defang: - self.defang[ind_type] = True - - def load_whitelists(self, fpath): - whitelist = {} - - searchdir = os.path.join(fpath, "whitelist_*.ini") - fpaths = glob.glob(searchdir) - for fpath in fpaths: - t = os.path.splitext(os.path.split(fpath)[1])[0].split('_',1)[1] - patterns = [line.strip() for line in open(fpath)] - whitelist[t] = [re.compile(p) for p in patterns] - - return whitelist - - def is_whitelisted(self, ind_match, ind_type): - try: - for w in self.whitelist[ind_type]: - if w.findall(ind_match): - return True - except KeyError as e: - pass - return False - - def parse_page(self, fpath, data, page_num): - for ind_type, ind_regex in self.patterns.items(): - matches = ind_regex.findall(data) - - for ind_match in matches: - if isinstance(ind_match, tuple): - ind_match = ind_match[0] - - if self.is_whitelisted(ind_match, ind_type): - continue - - if ind_type in self.defang: - ind_match = re.sub(r'\[\.\]', '.', ind_match) - - if self.dedup: - if (ind_type, ind_match) in self.dedup_store: - continue - - self.dedup_store.add((ind_type, ind_match)) - - self.handler.print_match(fpath, page_num, ind_type, ind_match) - - def parse_pdf_pypdf2(self, f, fpath): - try: - pdf = PdfFileReader(f, strict = False) - - if self.dedup: - self.dedup_store = set() - - self.handler.print_header(fpath) - page_num = 0 - for page in pdf.pages: - page_num += 1 - - data = page.extractText() - - self.parse_page(fpath, data, page_num) - self.handler.print_footer(fpath) - except (KeyboardInterrupt, SystemExit): - raise - - def parse_pdf_pdfminer(self, f, fpath): - try: - laparams = LAParams() - laparams.all_texts = True - rsrcmgr = PDFResourceManager() - pagenos = set() - - if self.dedup: - self.dedup_store = set() - - self.handler.print_header(fpath) - page_num = 0 - for page in PDFPage.get_pages(f, pagenos, check_extractable=True): - page_num += 1 - - retstr = StringIO() - device = TextConverter(rsrcmgr, retstr, codec='utf-8', laparams=laparams) - interpreter = PDFPageInterpreter(rsrcmgr, device) - interpreter.process_page(page) - data = retstr.getvalue() - retstr.close() - - self.parse_page(fpath, data, page_num) - self.handler.print_footer(fpath) - except (KeyboardInterrupt, SystemExit): - raise - - def parse_pdf(self, f, fpath): - parser_format = "parse_pdf_" + self.library - try: - self.parser_func = getattr(self, parser_format) - except AttributeError: - e = 'Selected PDF parser library is not supported: %s' % (self.library) - raise NotImplementedError(e) - - self.parser_func(f, fpath) - - def parse_txt(self, f, fpath): - try: - if self.dedup: - self.dedup_store = set() - - data = f.read() - self.handler.print_header(fpath) - self.parse_page(fpath, data, 1) - self.handler.print_footer(fpath) - except (KeyboardInterrupt, SystemExit): - raise - - def parse_html(self, f, fpath): - try: - if self.dedup: - self.dedup_store = set() - - data = f.read() - soup = BeautifulSoup(data) - html = soup.findAll(text=True) - - text = u'' - for elem in html: - if elem.parent.name in ['style', 'script', '[document]', 'head', 'title']: - continue - elif re.match('', unicode(elem)): - continue - else: - text += unicode(elem) - - self.handler.print_header(fpath) - self.parse_page(fpath, text, 1) - self.handler.print_footer(fpath) - except (KeyboardInterrupt, SystemExit): - raise - - def parse(self, path): - try: - if path.startswith('http://') or path.startswith('https://'): - if 'requests' not in IMPORTS: - e = 'HTTP library not found: requests' - raise ImportError(e) - headers = { 'User-Agent': 'Mozilla/5.0 Gecko Firefox' } - r = requests.get(path, headers=headers) - r.raise_for_status() - f = StringIO(r.content) - self.parser_func(f, path) - return - elif os.path.isfile(path): - with open(path, 'rb') as f: - self.parser_func(f, path) - return - elif os.path.isdir(path): - for walk_root, walk_dirs, walk_files in os.walk(path): - for walk_file in fnmatch.filter(walk_files, self.ext_filter): - fpath = os.path.join(walk_root, walk_file) - with open(fpath, 'rb') as f: - self.parser_func(f, fpath) - return - - e = 'File path is not a file, directory or URL: %s' % (path) - raise IOError(e) - except (KeyboardInterrupt, SystemExit): - raise - except Exception as e: - self.handler.print_error(path, e) \ No newline at end of file + patterns = {} + defang = {} + + def __init__(self, patterns_ini = None, input_format = 'pdf', dedup = False, library = 'pdfminer', output_format = 'csv', output_handler = None): + self.__init_patterns(patterns_ini) + self.__init_whitelist() + self.__init_dedup(dedup) + self.__init_output_handler(output_format, output_handler) + self.__init_parser(input_format) + self.__init_library(library, input_format) + + def __init_patterns(self, patterns_ini): + if patterns_ini is None: + patterns_ini = os.path.join(iocp.get_basedir(), 'data/patterns.ini') + + self.load_patterns(patterns_ini) + + def __init_whitelist(self): + wldir = os.path.join(iocp.get_basedir(), 'data/whitelists') + self.whitelist = self.load_whitelists(wldir) + + def __init_dedup(self, dedup): + self.dedup = dedup + + if dedup: + self.dedup_store = set() + + def __init_output_handler(self, output_format, output_handler): + self.handler = output_handler if output_handler else Output.getHandler(output_format) + + def __init_parser(self, input_format): + self.ext_filter = "*.{}".format(input_format) + parser_format = "parse_{}".format(input_format) + + self.parser_func = getattr(self, parser_format, None) + if not self.parser_func: + print('Selected parser format is not supported: {}'.format(input_format)) + sys.exit(-1) + + def __init_library(self, library, input_format): + self.library = library + + if input_format in ('pdf', ) and library not in IMPORTS: + print('PDF parser library not found: {}'.format(library)) + sys.exit(-1) + + if input_format in ('html', ) and 'beautifulsoup' not in IMPORTS: + print('HTML parser library not found: BeautifulSoup') + sys.exit(-1) + + def load_patterns(self, fpath): + config = ConfigParser.ConfigParser() + + with open(fpath) as f: + config.readfp(f) + + for ind_type in config.sections(): + try: + ind_pattern = config.get(ind_type, 'pattern') + except ConfigParser.NoOptionError: + continue + + if ind_pattern: + ind_regex = re.compile(ind_pattern, flags = re.IGNORECASE) + self.patterns[ind_type] = ind_regex + + try: + ind_defang = config.get(ind_type, 'defang') + except ConfigParser.NoOptionError: + continue + + if ind_defang: + self.defang[ind_type] = True + + def load_whitelists(self, fpath): + whitelist = {} + + searchdir = os.path.join(fpath, "whitelist_*.ini") + fpaths = glob.glob(searchdir) + for fpath in fpaths: + t = os.path.splitext(os.path.split(fpath)[1])[0].split('_', 1)[1] + patterns = [line.strip() for line in open(fpath)] + whitelist[t] = [re.compile(p, flags = re.IGNORECASE) for p in patterns] + + return whitelist + + def is_whitelisted(self, ind_match, ind_type): + try: + for w in self.whitelist[ind_type]: + if w.findall(ind_match): + return True + except KeyError: + pass + + return False + + def parse_page(self, fpath, data, page_num): + for ind_type, ind_regex in self.patterns.items(): + matches = ind_regex.findall(data) + + for ind_match in matches: + if isinstance(ind_match, tuple): + ind_match = ind_match[0] + + if self.is_whitelisted(ind_match, ind_type): + continue + + if ind_type in self.defang: + ind_match = re.sub(r'\[\.\]', '.', ind_match) + + if self.dedup: + if (ind_type, ind_match) in self.dedup_store: + continue + + self.dedup_store.add((ind_type, ind_match)) + + self.handler.print_match(fpath, page_num, ind_type, ind_match) + + def parse_pdf_pypdf2(self, f, fpath): + try: + pdf = PdfFileReader(f, strict = False) + + self.handler.print_header(fpath) + page_num = 0 + for page in pdf.pages: + page_num += 1 + + data = page.extractText() + + self.parse_page(fpath, data, page_num) + + self.handler.print_footer(fpath) + except (KeyboardInterrupt, SystemExit): + raise + + def parse_pdf_pdfminer(self, f, fpath): + try: + laparams = LAParams() + laparams.all_texts = True + rsrcmgr = PDFResourceManager() + pagenos = set() + + self.handler.print_header(fpath) + page_num = 0 + + for page in PDFPage.get_pages(f, pagenos, check_extractable=True): + page_num += 1 + + retstr = StringIO() + device = TextConverter(rsrcmgr, retstr, codec='utf-8', laparams=laparams) + interpreter = PDFPageInterpreter(rsrcmgr, device) + interpreter.process_page(page) + data = retstr.getvalue() + retstr.close() + + self.parse_page(fpath, data, page_num) + + self.handler.print_footer(fpath) + except (KeyboardInterrupt, SystemExit): + raise + + def parse_pdf(self, f, fpath): + parser_format = "parse_pdf_" + self.library + + self.parser_func = getattr(self, parser_format, None) + if not self.parser_func: + e = 'Selected PDF parser library is not supported: {}'.format(self.library) + raise NotImplementedError(e) + + self.parser_func(f, fpath) + + def parse_txt(self, f, fpath): + try: + data = f.read() + self.handler.print_header(fpath) + self.parse_page(fpath, data, 1) + self.handler.print_footer(fpath) + except (KeyboardInterrupt, SystemExit): + raise + + def parse_html(self, f, fpath): + try: + data = f.read() + soup = BeautifulSoup(data, "lxml") + html = soup.findAll(text = True) + + text = u'' + for elem in html: + if elem.parent.name in ['style', 'script', '[document]', 'head', 'title']: + continue + elif re.match('', unicode(elem)): + continue + else: + text += unicode(elem) + + self.handler.print_header(fpath) + self.parse_page(fpath, text, 1) + self.handler.print_footer(fpath) + except (KeyboardInterrupt, SystemExit): + raise + + def parse(self, path): + try: + if path.startswith('http://') or path.startswith('https://'): + if 'requests' not in IMPORTS: + e = 'HTTP library not found: requests' + raise ImportError(e) + + headers = {'User-Agent': 'Mozilla/5.0 Gecko Firefox'} + r = requests.get(path, headers = headers) + r.raise_for_status() + f = StringIO(r.content) + self.parser_func(f, path) + return + if os.path.isfile(path): + with open(path, 'rb') as f: + self.parser_func(f, path) + return + if os.path.isdir(path): + for walk_root, walk_dirs, walk_files in os.walk(path): + for walk_file in fnmatch.filter(walk_files, self.ext_filter): + fpath = os.path.join(walk_root, walk_file) + with open(fpath, 'rb') as f: + self.parser_func(f, fpath) + return + + e = 'File path is not a file, directory or URL: %s' % (path) + raise IOError(e) + except (KeyboardInterrupt, SystemExit): + raise + except Exception as e: + self.handler.print_error(path, e) diff --git a/iocp/Whitelist.py b/iocp/Whitelist.py deleted file mode 100644 index 5f12d31..0000000 --- a/iocp/Whitelist.py +++ /dev/null @@ -1,12 +0,0 @@ -import os -import glob -import re - -class WhiteList(dict): - def __init__(self, basedir): - searchdir = os.path.join(basedir, "whitelists/whitelist_*.ini") - fpaths = glob.glob(searchdir) - for fpath in fpaths: - t = os.path.splitext(os.path.split(fpath)[1])[0].split('_',1)[1] - patterns = [line.strip() for line in open(fpath)] - self[t] = [re.compile(p) for p in patterns] \ No newline at end of file diff --git a/iocp/__init__.py b/iocp/__init__.py index 31fcb29..b703061 100644 --- a/iocp/__init__.py +++ b/iocp/__init__.py @@ -5,4 +5,4 @@ _IOCP_ROOT = os.path.abspath(os.path.dirname(__file__)) def get_basedir(): - return _IOCP_ROOT \ No newline at end of file + return _IOCP_ROOT diff --git a/requirements.txt b/requirements.txt index 5ff7a2e..9d0c62d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,5 @@ beautifulsoup4>=4.4.1 pdfminer>=20140328 PyPDF2>=1.26.0 requests>=2.10.0 +six>=1.10.0 +lxml>=3.8.0 diff --git a/setup.py b/setup.py index 2186826..5bdaa82 100755 --- a/setup.py +++ b/setup.py @@ -1,28 +1,22 @@ #!/usr/bin/env python -import os from setuptools import setup setup( - name = "ioc_parser", - version = "0.9.1", - author = "Armin Buescher", - author_email = "armin.buescher@googlemail.com", - scripts=['bin/iocp'], - description = ("Tool to extract indicators of compromise from security reports"), - license = "MIT", - url = "https://github.com/armbues/ioc_parser", - packages=['iocp'], - include_package_data=True, - classifiers=[ - "Development Status :: 4 - Beta", - "Topic :: Security", - "License :: OSI Approved :: MIT License", - ], - install_requires=[ - "pdfminer", - "PyPDF2", - "requests", - "beautifulsoup4" - ], -) \ No newline at end of file + name = "ioc_parser", + version = "0.9.1", + author = "Armin Buescher", + author_email = "armin.buescher@googlemail.com", + scripts = ['bin/iocp'], + description = ("Tool to extract indicators of compromise from security reports"), + license = "MIT", + url = "https://github.com/armbues/ioc_parser", + packages = ['iocp'], + include_package_data = True, + classifiers = [ + "Development Status :: 4 - Beta", + "Topic :: Security", + "License :: OSI Approved :: MIT License", + ], + install_requires= open("requirements.txt").read().splitlines(), +) From 28ba90ef3d044352780b78440c3be91d9104137c Mon Sep 17 00:00:00 2001 From: Floyd Hightower Date: Wed, 18 Oct 2017 10:29:19 -0400 Subject: [PATCH 2/2] Fixing remaining python2 print statements --- iocp/Output.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iocp/Output.py b/iocp/Output.py index dbebe80..1e7e26a 100644 --- a/iocp/Output.py +++ b/iocp/Output.py @@ -114,7 +114,7 @@ def print_footer(self, fpath): class OutputHandler_netflow(OutputHandler): def __init__(self): - print "host 255.255.255.255" + print("host 255.255.255.255") def print_match(self, fpath, page, name, match): data = { @@ -123,4 +123,4 @@ def print_match(self, fpath, page, name, match): } if data["type"] == "IP": - print " or host %s " % data["match"] + print(" or host %s " % data["match"])