From 8a64310a90401790effa1b2bca5d4c55cf992b54 Mon Sep 17 00:00:00 2001 From: John Van de Meulebrouck Brendgard Date: Mon, 12 Nov 2018 17:29:56 +0100 Subject: [PATCH] Support the use of custom config as requested in #9 --- catch_phishing.py | 31 +++++--- external.yaml | 14 ++++ requirements.txt | 1 + suspicious.py | 180 ---------------------------------------------- suspicious.yaml | 178 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 216 insertions(+), 188 deletions(-) create mode 100644 external.yaml delete mode 100644 suspicious.py create mode 100644 suspicious.yaml diff --git a/catch_phishing.py b/catch_phishing.py index df0c983..8068d6e 100755 --- a/catch_phishing.py +++ b/catch_phishing.py @@ -10,14 +10,14 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. import re + import certstream -import tqdm import entropy -from tld import get_tld +import tqdm +import yaml from Levenshtein import distance from termcolor import colored, cprint - -from suspicious import keywords, tlds +from tld import get_tld from confusables import unconfuse @@ -39,7 +39,7 @@ def score_domain(domain): int: the score of `domain`. """ score = 0 - for t in tlds: + for t in suspicious['tlds']: if domain.endswith(t): score += 20 @@ -70,12 +70,12 @@ def score_domain(domain): score += 10 # Testing keywords - for word in keywords.keys(): + for word in suspicious['keywords']: if word in domain: - score += keywords[word] + score += suspicious['keywords'][word] # Testing Levenshtein distance for strong keywords (>= 70 points) (ie. paypol) - for key in [k for (k,s) in keywords.items() if s >= 70]: + for key in [k for (k,s) in suspicious['keywords'].items() if s >= 70]: # Removing too generic keywords (ie. mail.domain.com) for word in [w for w in words_in_domain if w not in ['email', 'mail', 'cloud']]: if distance(str(word), str(key)) == 1: @@ -131,4 +131,19 @@ def callback(message, context): if __name__ == '__main__': + with open('suspicious.yaml', 'r') as f: + suspicious = yaml.safe_load(f) + + with open('external.yaml', 'r') as f: + external = yaml.safe_load(f) + + if external['override_suspicious.yaml'] is True: + suspicious = external + else: + if external['keywords'] is not None: + suspicious['keywords'].update(external['keywords']) + + if external['tlds'] is not None: + suspicious['tlds'].update(external['tlds']) + certstream.listen_for_events(callback, url=certstream_url) diff --git a/external.yaml b/external.yaml new file mode 100644 index 0000000..928be67 --- /dev/null +++ b/external.yaml @@ -0,0 +1,14 @@ +# Change to true if you want to override suspicious.yaml +# and only use your own config in this file. +override_suspicious.yaml: false + +keywords: +# Add your own keywords here or override the score +# for the ones found in suspicious.yaml, e.g.: +# 'myownkeyword': 50 +# 'appleid': 0 + +tlds: +# Add your own TLDs here, e.g.: +# '.nu': +# '.se': diff --git a/requirements.txt b/requirements.txt index 307d732..b3422a7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ tqdm==4.19.4 tld==0.7.9 python_Levenshtein==0.12.0 websocket-client==0.48.0 +PyYAML==3.13 diff --git a/suspicious.py b/suspicious.py deleted file mode 100644 index fd4fa7b..0000000 --- a/suspicious.py +++ /dev/null @@ -1,180 +0,0 @@ -keywords = { -# Generic suspicious - 'login': 25, - 'log-in': 25, - 'sign-in': 25, - 'signin': 25, - 'account': 25, - 'verification': 25, - 'verify': 25, - 'webscr': 25, - 'password': 25, - 'credential': 25, - 'support': 25, - 'activity': 25, - 'security': 25, - 'update': 25, - 'authentication': 25, - 'authenticate': 25, - 'authorize': 25, - 'wallet': 25, - 'alert': 25, - 'purchase': 25, - 'transaction': 25, - 'recover': 25, - 'unlock': 25, - 'confirm': 20, - 'live': 15, - 'office': 15, - 'service': 15, - 'manage': 15, - 'portal': 15, - 'invoice': 15, - 'secure': 10, - 'customer': 10, - 'client': 10, - 'bill': 10, - 'online': 10, - 'safe': 10, - 'form': 10, - -# Apple iCloud - 'appleid': 70, - 'icloud': 60, - 'iforgot': 60, - 'itunes': 50, - 'apple':30, - -# Email - 'outlook': 60, - 'office365': 50, - 'microsoft': 60, - 'windows': 30, - 'protonmail': 70, - 'tutanota': 60, - 'hotmail': 60, - 'gmail': 70, - 'google': 70, - 'outlook': 60, - 'yahoo': 60, - 'google': 60, - 'yandex': 60, - -# Social Media - 'twitter': 60, - 'facebook': 60, - 'tumblr': 60, - 'reddit': 60, - 'youtube': 40, # some false positives - 'linkedin': 60, - 'instagram': 60, - 'flickr': 60, - 'whatsapp': 60, - -# Cryptocurrency - 'localbitcoin': 70, - 'poloniex': 60, - 'coinhive': 70, - 'bithumb': 60, - 'kraken': 50, # some false positives - 'bitstamp': 60, - 'bittrex': 60, - 'blockchain': 70, - 'bitflyer': 60, - 'coinbase': 60, - 'hitbtc': 60, - 'lakebtc': 60, - 'bitfinex': 60, - 'bitconnect': 60, - 'coinsbank': 60, - -# Bank/money - 'paypal': 70, - 'moneygram': 60, - 'westernunion': 60, - 'bankofamerica': 60, - 'wellsfargo': 60, - 'citigroup': 60, - 'santander': 60, - 'morganstanley': 60, - 'barclays': 50, - 'hsbc': 50, - 'scottrade': 60, - 'ameritrade': 60, - 'merilledge': 60, - 'bank': 15, - -# Ecommerce - 'amazon': 60, - 'overstock': 60, - 'alibaba': 60, - 'aliexpress': 60, - 'leboncoin': 70, - -# Other - 'netflix': 70, - 'skype': 60, - 'github': 60, - 'onedrive': 60, - 'dropbox': 60, - -# Miscellaneous & SE tricks - 'cgi-bin': 50, - '.com-': 20, - '-com.': 20, - '.net-': 20, - '.org-': 20, - '.com-': 20, - '.net.': 20, - '.org.': 20, - '.com.': 20, - '.gov-': 30, - '.gov.': 30, - '.gouv-': 40, - '-gouv-': 40, - '.gouv.': 40, - # FR specific - 'suivi': 50, - 'laposte': 50, -} - -tlds = [ - '.ga', - '.gq', - '.ml', - '.cf', - '.tk', - '.xyz', - '.pw', - '.cc', - '.club', - '.work', - '.top', - '.support', - '.bank', - '.info', - '.study', - '.party', - '.click', - '.country', - '.stream', - '.gdn', - '.mom', - '.xin', - '.kim', - '.men', - '.loan', - '.download', - '.racing', - '.online', - '.center', - '.ren', - '.gb', - '.win', - '.review', - '.vip', - '.party', - '.tech', - '.science', - '.business' -] diff --git a/suspicious.yaml b/suspicious.yaml new file mode 100644 index 0000000..2b23dd7 --- /dev/null +++ b/suspicious.yaml @@ -0,0 +1,178 @@ +keywords: +# Generic suspicious + 'login': 25 + 'log-in': 25 + 'sign-in': 25 + 'signin': 25 + 'account': 25 + 'verification': 25 + 'verify': 25 + 'webscr': 25 + 'password': 25 + 'credential': 25 + 'support': 25 + 'activity': 25 + 'security': 25 + 'update': 25 + 'authentication': 25 + 'authenticate': 25 + 'authorize': 25 + 'wallet': 25 + 'alert': 25 + 'purchase': 25 + 'transaction': 25 + 'recover': 25 + 'unlock': 25 + 'confirm': 20 + 'live': 15 + 'office': 15 + 'service': 15 + 'manage': 15 + 'portal': 15 + 'invoice': 15 + 'secure': 10 + 'customer': 10 + 'client': 10 + 'bill': 10 + 'online': 10 + 'safe': 10 + 'form': 10 + +# Apple iCloud + 'appleid': 70 + 'icloud': 60 + 'iforgot': 60 + 'itunes': 50 + 'apple': 30 + +# Email + 'outlook': 60 + 'office365': 50 + 'microsoft': 60 + 'windows': 30 + 'protonmail': 70 + 'tutanota': 60 + 'hotmail': 60 + 'gmail': 70 + 'google': 70 + 'outlook': 60 + 'yahoo': 60 + 'google': 60 + 'yandex': 60 + +# Social Media + 'twitter': 60 + 'facebook': 60 + 'tumblr': 60 + 'reddit': 60 + 'youtube': 40 # some false positives + 'linkedin': 60 + 'instagram': 60 + 'flickr': 60 + 'whatsapp': 60 + +# Cryptocurrency + 'localbitcoin': 70 + 'poloniex': 60 + 'coinhive': 70 + 'bithumb': 60 + 'kraken': 50 # some false positives + 'bitstamp': 60 + 'bittrex': 60 + 'blockchain': 70 + 'bitflyer': 60 + 'coinbase': 60 + 'hitbtc': 60 + 'lakebtc': 60 + 'bitfinex': 60 + 'bitconnect': 60 + 'coinsbank': 60 + +# Bank/money + 'paypal': 70 + 'moneygram': 60 + 'westernunion': 60 + 'bankofamerica': 60 + 'wellsfargo': 60 + 'citigroup': 60 + 'santander': 60 + 'morganstanley': 60 + 'barclays': 50 + 'hsbc': 50 + 'scottrade': 60 + 'ameritrade': 60 + 'merilledge': 60 + 'bank': 15 + +# Ecommerce + 'amazon': 60 + 'overstock': 60 + 'alibaba': 60 + 'aliexpress': 60 + 'leboncoin': 70 + +# Other + 'netflix': 70 + 'skype': 60 + 'github': 60 + 'onedrive': 60 + 'dropbox': 60 + +# Miscellaneous & SE tricks + 'cgi-bin': 50 + '.com-': 20 + '-com.': 20 + '.net-': 20 + '.org-': 20 + '.com-': 20 + '.net.': 20 + '.org.': 20 + '.com.': 20 + '.gov-': 30 + '.gov.': 30 + '.gouv-': 40 + '-gouv-': 40 + '.gouv.': 40 + # FR specific + 'suivi': 50 + 'laposte': 50 + +tlds: + '.ga': + '.gq': + '.ml': + '.cf': + '.tk': + '.xyz': + '.pw': + '.cc': + '.club': + '.work': + '.top': + '.support': + '.bank': + '.info': + '.study': + '.party': + '.click': + '.country': + '.stream': + '.gdn': + '.mom': + '.xin': + '.kim': + '.men': + '.loan': + '.download': + '.racing': + '.online': + '.center': + '.ren': + '.gb': + '.win': + '.review': + '.vip': + '.party': + '.tech': + '.science': + '.business':