diff --git a/.gitignore b/.gitignore index e5cf1673..b92e7ba2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,9 @@ modules/__pycache__/ +.cache .*.swp +.ropeproject/ +modules/.ropeproject/ +tests/__pycache__/ +modules/__init__.py +.idea/ +tests/.ropeproject/ diff --git a/.travis.yml b/.travis.yml index 7ecb9805..0f74ecd8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,12 +6,10 @@ python: - "3.5" # command to install dependencies install: - - cd tests - sudo apt-get -y install python3-pip - - pip3 install bs4 + - pip3 install -r requirements.txt + - cd tests script: - - python3 test_getemails.py - - python3 test_getweblinks.py - - python3 test_savetofile.py + - pytest notifications: slack: dedsec-inside:24NHg47gypeVR3DWnEncRq7c diff --git a/CHANGELOG.md b/CHANGELOG.md index e87d3b70..5eff5a06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,18 +2,28 @@ -------------------- All notable changes to this project will be documented in this file. -## 1.2.0 | (Currently in Development) +## 1.2.0 | Present (Stable) + +### Changed +* Major code improvements +* Pep 8 Standard +* Tests +* Library changes ### Added +* Documentation * Save to JSON * Testcase for Save to JSON -## 1.1.0 - July 6, 2017 - Present +## 1.1.0 - July 6, 2017 - Nov 16 ### Added * Updater * Version checker +* Refactored code to meet PEP8 requirements +* Refactored code to meet Google Style Docstrings for documentation +* Fixed error occuring while using -i flag ## 1.0.0 - Jun 28, 2017 - July 5 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index a232ad11..b3f17ece 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at dedsecinside@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at thepsnappz@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. diff --git a/README.md b/README.md index 5cd3f9d7..9f3ae36a 100755 --- a/README.md +++ b/README.md @@ -1,44 +1,44 @@
-████████╗ ██████╗ ██████╗ ██████╗ ██████╗ ████████╗ -╚══██╔══╝██╔═══██╗██╔══██╗ ██╔══██╗██╔═████╗╚══██╔══╝ - ██║ ██║ ██║██████╔╝ ██████╔╝██║██╔██║ ██║ - ██║ ██║ ██║██╔══██╗ ██╔══██╗████╔╝██║ ██║ - ██║ ╚██████╔╝██║ ██║ ██████╔╝╚██████╔╝ ██║ - ╚═╝ ╚═════╝ ╚═╝ ╚═╝ ╚═════╝ ╚═════╝ ╚═╝ + ████████╗ ██████╗ ██████╗ ██████╗ ██████╗ ████████╗ + ╚══██╔══╝██╔═══██╗██╔══██╗ ██╔══██╗██╔═████╗╚══██╔══╝ + ██║ ██║ ██║██████╔╝ ██████╔╝██║██╔██║ ██║ + ██║ ██║ ██║██╔══██╗ ██╔══██╗████╔╝██║ ██║ + ██║ ╚██████╔╝██║ ██║ ██████╔╝╚██████╔╝ ██║ + ╚═╝ ╚═════╝ ╚═╝ ╚═╝ ╚═════╝ ╚═════╝ ╚═╝ - - - `.` ` - ``.:.--.` - .-+++/-` - `+sso:` - `` /yy+. - -+.oho. - o../+y - -s.-/:y:` - .:o+-`--::oo/-` - `/o+:.```---///oss+- - .+o:.``...`-::-+++++sys- - :y/```....``--::-yooooosh+ - -h-``--.```..-:-::ssssssssd+ - h:``:.``....`--:-++hsssyyyym. -.d.`/.``--.```:--//odyyyyyyym/ -`d.`+``:.```.--/-+/smyyhhhhhm: - os`./`/````/`-/:+oydhhhhhhdh` - `so.-/-:``./`.//osmddddddmd. - /s/-/:/.`/..+/ydmdddddmo` - `:oosso/:+/syNmddmdy/. - `-/++oosyso+/.` - - -██████╗ ███████╗██████╗ ███████╗██████╗ ██████╗ ██╗███╗ ██╗███████╗██╗██████╗ ███████╗ -██╔══██╗██╔════╝██╔══██╗██╔════╝╚════██╗██╔════╝ ██║████╗ ██║██╔════╝██║██╔══██╗██╔════╝ -██║ ██║█████╗ ██║ ██║███████╗ █████╔╝██║ ██║██╔██╗ ██║███████╗██║██║ ██║█████╗ -██║ ██║██╔══╝ ██║ ██║╚════██║ ╚═══██╗██║ ██║██║╚██╗██║╚════██║██║██║ ██║██╔══╝ -██████╔╝███████╗██████╔╝███████║██████╔╝╚██████╗ ██║██║ ╚████║███████║██║██████╔╝███████╗ -╚═════╝ ╚══════╝╚═════╝ ╚══════╝╚═════╝ ╚═════╝ ╚═╝╚═╝ ╚═══╝╚══════╝╚═╝╚═════╝ ╚══════╝ + + + `.` ` + ``.:.--.` + .-+++/-` + `+sso:` + `` /yy+. + -+.oho. + o../+y + -s.-/:y:` + .:o+-`--::oo/-` + `/o+:.```---///oss+- + .+o:.``...`-::-+++++sys- + :y/```....``--::-yooooosh+ + -h-``--.```..-:-::ssssssssd+ + h:``:.``....`--:-++hsssyyyym. + .d.`/.``--.```:--//odyyyyyyym/ + `d.`+``:.```.--/-+/smyyhhhhhm: + os`./`/````/`-/:+oydhhhhhhdh` + `so.-/-:``./`.//osmddddddmd. + /s/-/:/.`/..+/ydmdddddmo` + `:oosso/:+/syNmddmdy/. + `-/++oosyso+/.` + + + ██████╗ ███████╗██████╗ ███████╗██████╗ ██████╗ ██╗███╗ ██╗███████╗██╗██████╗ ███████╗ + ██╔══██╗██╔════╝██╔══██╗██╔════╝╚════██╗██╔════╝ ██║████╗ ██║██╔════╝██║██╔══██╗██╔════╝ + ██║ ██║█████╗ ██║ ██║███████╗ █████╔╝██║ ██║██╔██╗ ██║███████╗██║██║ ██║█████╗ + ██║ ██║██╔══╝ ██║ ██║╚════██║ ╚═══██╗██║ ██║██║╚██╗██║╚════██║██║██║ ██║██╔══╝ + ██████╔╝███████╗██████╔╝███████║██████╔╝╚██████╗ ██║██║ ╚████║███████║██║██████╔╝███████╗ + ╚═════╝ ╚══════╝╚═════╝ ╚══════╝╚═════╝ ╚═════╝ ╚═╝╚═╝ ╚═══╝╚══════╝╚═╝╚═════╝ ╚══════╝ @@ -46,9 +46,9 @@ ## A python web crawler for Deep and Dark Web. [](https://travis-ci.org/DedSecInside/TorBoT) -[](https://blockchain.info/address/14st7SzDbQZuu8fpQ74x477WoRJ7gpHFaj) -[](http://forthebadge.com) -[](http://forthebadge.com) +[](https://blockchain.info/address/14st7SzDbQZuu8fpQ74x477WoRJ7gpHFaj) +[]() +[]() ### Working Procedure/Basic Plan @@ -65,29 +65,34 @@ the following steps: 8. After all URLs are processed, return the most relevant page. ### Features -1. Crawls Tor links (.onion) only. -2. Returns Page title and address. -3. Cache links so that there won't be duplicate links. +1. Crawls Tor links (.onion).(Completed) +2. Returns Page title and address with a short description about the site.(Not Started) +3. Save links to database.(Not Started) +4. Get emails from site.(Completed) +5. Save crawl info to JSON file.(Completed) +6. Crawl custom domains.(Completed) +7. Check if the link is live.(Not Started) +8. Built-in Updater.(Completed) ...(will be updated) ## Contribute Contributions to this project are always welcome. -To add a new feature fork this repository and give a pull request when your new feature is tested and complete. +To add a new feature fork the dev branch and give a pull request when your new feature is tested and complete. If its a new module, it should be put inside the modules directory and imported to the main file. The branch name should be your new feature name in the format. For example, Feature_FasterCrawl_1.0. Contributor name will be updated to the below list. :D ## Dependencies 1. Tor -2. Python 3.x (Make sure pip3 is there) -3. Python Stem Module -4. urllib -5. Beautiful Soup 4 -6. Socket -7. Sock -8. Argparse -9. Stem module -10. Git +2. Python 3.x (Make sure pip3 is installed) +3. requests +4. Beautiful Soup 4 +5. Socket +6. Sock +7. Argparse +8. Git +9. termcolor +10. tldextract ## Basic setup Before you run the torBot make sure the following things are done properly: @@ -95,32 +100,34 @@ Before you run the torBot make sure the following things are done properly: * Run tor service `sudo service tor start` -* Set a password for tor -`tor --hash-password "my_password" ` - -* Give the password inside torbot.py -`from stem.control import Controller -with Controller.from_port(port = 9051) as controller: - controller.authenticate("your_password_hash") - controller.signal(Signal.NEWNYM)` +* Make sure that your torrc is configured to SOCKS_PORT localhost:9050 -`python3 torBot.py` -`usage: torBot.py [-h] [-q] [-u URL] [-m] [-e EXTENSION] [-l] +`python3 torBot.py or use the -h/--help argument` + +`usage: torBot.py [-h] [-v] [--update] [-q] [-u URL] [-s] [-m] [-e EXTENSION] + [-l] [-i] optional arguments: - -h, --help show this help message and exit - -q, --quiet - -u URL, --url URL Specifiy a website link to crawl + -h, --help Show this help message and exit + -v, --version Show current version of TorBot. + --update Update TorBot to the latest stable version + -q, --quiet Prevent header from displaying + -u URL, --url URL Specifiy a website link to crawl, currently returns links on that page + -s, --save Save results to a file in json format -m, --mail Get e-mail addresses from the crawled sites -e EXTENSION, --extension EXTENSION Specifiy additional website extensions to the list(.com or .org etc) - -l, --live Check if websites are live or not (slow)` + -l, --live Check if websites are live or not (slow) + -i, --info Info displays basic info of the scanned site (very + slow)`+ +* NOTE: All flags under -u URL, --url URL must also be passed a -u flag. Read more about torrc here : [Torrc](https://github.com/DedSecInside/TorBoT/blob/master/Tor.md) ## TO-DO -A TO-DO list will be added here as soon as its complete. +- [ ] Implement A\* Search for webcrawler ### Have ideas? If you have new ideas which is worth implementing, mention those by starting a new issue with the title [FEATURE_REQUEST]. @@ -133,7 +140,11 @@ GNU Public License - [X] [P5N4PPZ](https://github.com/PSNAppz) - Owner - [X] [agrepravin](https://github.com/agrepravin) - Contributor,Reviewer -- [X] [y-mehta](https://github.com/y-mehta) - Contributer +- [X] [y-mehta](https://github.com/y-mehta) - Contributor +- [X] [Manfredi Martorana](https://github.com/Agostinelli) - Contributor +- [X] [KingAkeem](https://github.com/KingAkeem) - Contributor +- [X] [Evan Sia Wai Suan](https://github.com/waisuan) - New Contributor +  diff --git a/Resources And Samples/torConnectionExample.py b/Resources And Samples/torConnectionExample.py deleted file mode 100644 index 4dbf4eb2..00000000 --- a/Resources And Samples/torConnectionExample.py +++ /dev/null @@ -1,56 +0,0 @@ -# tor connect example code -# author: James Campbell -# date: 2015 05 17 -# date updated: 2016 09 18 confirmed working (make sure to not use privoxy settings or will break) using python3 - -import urllib -import urllib.request # had to add for python 3.4 -jc -import socks -import socket -#import socket -import argparse -import random -import sys - - -# terminal arguments parser globals - do not change -parser = argparse.ArgumentParser() -parser.add_argument('-o', action='store', dest='onion', - help='put in onion site to load (with http & quotes)') # set -o to accept onion address -results = parser.parse_args() - -# Global Vars -onionsite = 'http://3g2upl4pq6kufc4m.onion' # set the default onion site to visit to test, in this case DuckDuckGo -if results.onion != None: # if search terms set in terminal then change from default to that - onionsite = results.onion # set from argparse above in globals section - -#TOR SETUP GLOBAL Vars -SOCKS_PORT = 9050 # TOR proxy port that is default from torrc, change to whatever torrc is configured to - -socks.set_default_proxy(socks.SOCKS5, "127.0.0.1",SOCKS_PORT) -socket.socket = socks.socksocket - -# Perform DNS resolution through the socket -def getaddrinfo(*args): - return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', (args[0], args[1]))] -socket.getaddrinfo = getaddrinfo - -# test connect to DuckDuckGo .onion site -headers = {'User-Agent': 'JAMES CAMPBELL jamescampbell.us SEARCH BOT! I FOUND YOU!!!!' } - #print ('trying request now...') -req = urllib.request.Request(onionsite,None,headers) -print (req) -response = urllib.request.urlopen(req) # new python 3 code -jc -print (response) -status = 'loaded successfully' -try: - sitehtml = response.read() - print (sitehtml) -except urllib.error.URLError as e: - html = e.read().decode("utf8", 'ignore') - #html = e.partial - status = 'failed reading' - html = 'none' - currenturl = 'none' -print (status) -exit() diff --git a/modules/__init__.py b/modules/__init__.py deleted file mode 100644 index eadbd6aa..00000000 --- a/modules/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .bcolors import * -from .getemails import * -from .getweblinks import * -from .pagereader import * -from .updater import * -from .savefile import * - - -__all__ = (bcolors.__all__ + getemails.__all__ + getweblinks.__all__ + pagereader.__all__ + updater.__all__ + savefile.__all__ ) diff --git a/modules/__init__.pyc b/modules/__init__.pyc deleted file mode 100644 index c8bbeb83..00000000 Binary files a/modules/__init__.pyc and /dev/null differ diff --git a/modules/bcolors.py b/modules/bcolors.py index 2b5ab4ef..78b05842 100644 --- a/modules/bcolors.py +++ b/modules/bcolors.py @@ -1,16 +1,14 @@ -__all__ = ['Bcolors'] - - class Bcolors: - HEADER = '\033[95m' - OKBLUE = '\033[94m' - OKGREEN = '\033[92m' - WARNING = '\033[93m' - FAIL = '\033[91m' - ENDC = '\033[0m' - BOLD = '\033[1m' - UNDERLINE = '\033[4m' - WHITE = '\033[97m' - On_Black = '\033[40m' - On_Red = '\033[41m' - + + def __init__(self): + self.HEADER = '\033[95m' + self.OKBLUE = '\033[94m' + self.OKGREEN = '\033[92m' + self.WARNING = '\033[93m' + self.FAIL = '\033[91m' + self.ENDC = '\033[0m' + self.BOLD = '\033[1m' + self.UNDERLINE = '\033[4m' + self.WHITE = '\033[97m' + self.On_Black = '\033[40m' + self.On_Red = '\033[41m' diff --git a/modules/bcolors.pyc b/modules/bcolors.pyc deleted file mode 100644 index bae21949..00000000 Binary files a/modules/bcolors.pyc and /dev/null differ diff --git a/modules/getemails.py b/modules/getemails.py index eb07ab25..4b9c03dc 100644 --- a/modules/getemails.py +++ b/modules/getemails.py @@ -1,35 +1,41 @@ -import sys -import os -sys.path.append(os.path.abspath('../')) from modules.bcolors import Bcolors -from modules.savefile import saveJson -import bs4 +from bs4 import BeautifulSoup -__all__ = ['getMails'] -"""Get all emails from the website""" +def getMails(soup): + + """ + Searches for tags for links then checks if link contains the + substring 'mailto' indicating that it's an email. If it is determined + to be an email then the link is split and the username is appeneded to + the list + + Args: + soup: BeautifulSoup isntance that will be used for parsing + + Returns: + emails: list of email IDs + """ + b_colors = Bcolors() + + if isinstance(type(soup), type(BeautifulSoup)): -def getMails(soup,save=0): - _soup_instance = bs4.BeautifulSoup - if isinstance(type(soup), type(_soup_instance)): emails = [] - for link in soup.find_all('a'): - email_link = link.get('href') - if email_link != None: - if 'mailto' in email_link: - """Split email address on""" - email_addr = email_link.split(':') + links = soup.find_all('a') + for ref in links: + url = ref.get('href') + if url and 'mailto' in url: + """Split email address on""" + email_addr = url.split(':') + if (len(email_addr) > 1): emails.append(email_addr[1]) - else: - pass + """Pretty print output as below""" - print ('') - print (Bcolors.OKGREEN+'Mails Found - '+Bcolors.ENDC+str(len(emails))) + print ('') + print (b_colors.OKGREEN+'Mails Found - '+b_colors.ENDC+str(len(emails))) print ('-------------------------------') - for mail in emails: - print (mail) - if save: - saveJson("Extracted-Mail-IDs",emails) - return '' + + return emails + else: - raise(Bcolors.FAIL+'Method parameter is not of instance bs4.BeautifulSoup'+Bcolors.ENDC) + raise('Method parameter is not of instance BeautifulSoup') diff --git a/modules/getemails.pyc b/modules/getemails.pyc deleted file mode 100644 index e1605aec..00000000 Binary files a/modules/getemails.pyc and /dev/null differ diff --git a/modules/getweblinks.py b/modules/getweblinks.py index ce65b5f4..21471f0e 100644 --- a/modules/getweblinks.py +++ b/modules/getweblinks.py @@ -1,79 +1,112 @@ -import sys -import os -sys.path.append(os.path.abspath('../')) -from modules.savefile import saveJson -import urllib.request +import re +import requests +import tldextract + +from bs4 import BeautifulSoup from modules.bcolors import Bcolors -import bs4 -import time -import threading -import http +from requests.exceptions import ConnectionError, HTTPError + + +def valid_url(url, extensions=False): + """Checks for any valid url using regular expression matching + + Matches all possible url patterns with the url that is passed and + returns True if it is a url and returns False if it is not. + + Args: + url: string representing url to be checked + + Returns: + bool: True if valid url format and False if not + """ + pattern = r"^https?:\/\/(www\.)?([a-z,A-Z,0-9]*)\.([a-z, A-Z]+)(.*)" + regex = re.compile(pattern) + if not extensions: + if regex.match(url): + return True + return False + + parts = tldextract.extract(url) + valid_sites = list() + for ext in extensions: + if regex.match(url) and '.'+parts.suffix in ext: + valid_sites.append(url) + return valid_sites + + +def valid_onion_url(url): + """Checks for valid onion url using regular expression matching + + Only matches onion urls + + Args: + url: string representing url to be checked + + Returns: + bool: True if valid onion url format, False if not + """ + pattern = r"^https?:\/\/(www\.)?([a-z,A-Z,0-9]*)\.onion/(.*)" + regex = re.compile(pattern) + if regex.match(url): + return True + return False -__all__ = ['getLinks'] +def get_link_status(link, colors): + """Generator that yields links as they come + + Uses head request because it uses less bandwith than get and timeout is + set to 10 seconds and then link is automatically declared as dead. + + Args: + link: link to be tested + colors: object containing colors for link + + Yields: + string: link with either no color or red which indicates failure + """ -def link_status(web,out_queue,index): - link_live = False - out_queue[index] = web + " is_live = False " try: - urllib.request.urlopen(web) - link_live = True - out_queue[index] = web + " is_live = True " - print(web) - except urllib.error.HTTPError as e: - print(Bcolors.On_Red+web+Bcolors.ENDC) - except urllib.error.URLError as e: - print(Bcolors.On_Red+web+Bcolors.ENDC) - except http.client.RemoteDisconnected as e: - print(Bcolors.On_Red+web+Bcolors.ENDC) - return - - -"""Get all onion links from the website""" -def getLinks(soup,ext,live=0,save=0): - _soup_instance = bs4.BeautifulSoup - extensions = [] - if ext: - for e in ext: - extensions.append(e) - if isinstance(type(soup), type(_soup_instance)): + resp = requests.head(link, timeout=10) + resp.raise_for_status() + yield '\t'+link + except (ConnectionError, HTTPError): + yield '\t'+colors.On_Red+link+colors.ENDC + + +def getLinks(soup, ext=False, live=False): + """ + Searches through all (hyperlinks) tags and stores them in a + list then validates if the url is formatted correctly. + + Args: + soup: BeautifulSoup instance currently being used. + + Returns: + websites: List of websites that were found + """ + b_colors = Bcolors() + if isinstance(soup, BeautifulSoup): websites = [] - start_time = time.time() - for link in soup.find_all('a'): - web_link = link.get('href') - if web_link != None: - if ('http' in web_link or 'https' in web_link): - if ext: - for exten in extensions: - if web_link.endswith(exten): - websites.append(web_link) - else: - websites.append(web_link) + + links = soup.find_all('a') + for ref in links: + url = ref.get('href') + if ext: + if url and valid_url(url, ext): + websites.append(url) else: - pass + if url and valid_onion_url(url): + websites.append(url) + """Pretty print output as below""" - print ('') - print (Bcolors.OKGREEN+'Websites Found - '+Bcolors.ENDC+str(len(websites))) - print ('-------------------------------') - if live: - threads = [] - result = [{} for x in websites] - for web in websites: - t = threading.Thread(target=link_status, args=(web,result,websites.index(web))) - t.start() - threads.append(t) - try: - for t in threads: - t.join() - if save: - saveJson("Live-Onion-Links",result) - except: - pass - else: - for web in websites: - print(web) - if save: - saveJson("Onion-Links",websites) - return websites + print(''.join((b_colors.OKGREEN, + 'Websites Found - ', b_colors.ENDC, str(len(websites))))) + print('------------------------------------') + + for link in websites: + print(next(get_link_status(link, b_colors))) + return websites + else: - raise('Method parameter is not of instance bs4.BeautifulSoup') + raise(Exception('Method parameter is not of instance BeautifulSoup')) diff --git a/modules/info.py b/modules/info.py new file mode 100644 index 00000000..a861cd54 --- /dev/null +++ b/modules/info.py @@ -0,0 +1,73 @@ +import requests + +from urllib.parse import urlsplit +from termcolor import cprint + + +def executeAll(target): + try: + get_robots_txt(target) + except Exception: + cprint("No robots.txt file Found!""blue") + try: + get_dot_git(target) + except Exception: + cprint("Error !""red") + try: + get_dot_svn(target) + except Exception: + cprint("Error""red") + try: + get_dot_htaccess(target) + except Exception: + cprint("Error""red") + + +def get_robots_txt(target): + cprint("[*]Checking for Robots.txt"'yellow') + url = target + target = "{0.scheme}://{0.netloc}/".format(urlsplit(url)) + requests.get(target+"/robots.txt") + cprint(r'blue') + + +def get_dot_git(target): + cprint("[*]Checking for .git folder"'yellow') + url = target + target = "{0.scheme}://{0.netloc}/".format(urlsplit(url)) + req = requests.get(target+"/.git/") + r = req.status_code + if r == 200: + cprint("Alert!"'red') + cprint(".git folder exposed publicly"'red') + else: + print("NO .git folder found"'blue') + + +def get_dot_svn(target): + cprint("[*]Checking for .svn folder"'yellow') + url = target + target = "{0.scheme}://{0.netloc}/".format(urlsplit(url)) + req = requests.get(target+"/.svn/entries") + r = req.status_code + if r == 200: + cprint("Alert!"'red') + cprint(".SVN folder exposed publicly"'red') + else: + cprint("NO .SVN folder found"'blue') + + +def get_dot_htaccess(target): + cprint("[*]Checking for .htaccess"'yellow') + url = target + target = "{0.scheme}://{0.netloc}/".format(urlsplit(url)) + req = requests.get(target+"/.htaccess") + statcode = req.status_code + if statcode == 403: + cprint("403 Forbidden"'blue') + elif statcode == 200: + cprint("Alert!!"'blue') + cprint(".htaccess file found!"'blue') + else: + cprint("Status code"'blue') + cprint(statcode) diff --git a/modules/pagereader.py b/modules/pagereader.py index 5783d4e3..66862f25 100644 --- a/modules/pagereader.py +++ b/modules/pagereader.py @@ -1,17 +1,65 @@ -import urllib.request +import requests + from bs4 import BeautifulSoup from modules.bcolors import Bcolors +from requests.exceptions import ConnectionError, HTTPError +from sys import exit + + +def connection_msg(site): + yield "Attempting to connect to {site}".format(site=site) + + +def readPage(site, extension=False): + headers = {'User-Agent': + 'TorBot - Onion crawler | www.github.com/DedSecInside/TorBot'} + attempts_left = 3 + err = " " + while attempts_left: + try: + if not extension: + print(next(connection_msg(site))) + response = requests.get(site, headers=headers) + print("Connection successful.") + page = BeautifulSoup(response.text, 'html.parser') + return page + if extension and attempts_left == 3: + print(next(connection_msg('https://'+site))) + response = requests.get('https://'+site, headers=headers) + print("Connection successful.") + page = BeautifulSoup(response.text, 'html.parser') + return page + if extension and attempts_left == 2: + print(next(connection_msg('http://'+site))) + response = requests.get('http://'+site, headers=headers) + print("Connection successful.") + page = BeautifulSoup(response.text, 'html.parser') + return page + if extension and attempts_left == 1: + msg = ''.join(("There has been an {err} while attempting to ", + "connect to {site}.")).format(err=err, site=site) + exit(msg) + + except (HTTPError, ConnectionError) as e: + attempts_left -= 1 + err = e + + if err == HTTPError: + raise("There has been an HTTP error after three attempts.") + if err == ConnectionError: + raise("There has been a connection error after three attempts.") + + +def get_ip(): + """Returns users tor ip address -__all__ = ['readPage'] + https://check.torproject.org/ tells you if you are using tor and it + displays your IP address which we scape and return + """ -def readPage(site,printIP=0): + b_colors = Bcolors() + page = readPage('https://check.torproject.org/') + pg = page.find('strong') + ip_addr = pg.renderContents() - headers = {'User-Agent': 'TorBot - Onion crawler | www.github.com/DedSecInside/TorBot' } - req = urllib.request.Request(site,None,headers) - response = urllib.request.urlopen(req) - page = BeautifulSoup(response.read(),'html.parser') - if printIP: - pg = page.find('strong') - IP = pg.renderContents() - print(Bcolors.WARNING+Bcolors.BOLD+IP.decode("utf-8")+Bcolors.ENDC) - return page + return b_colors.WARNING+b_colors.BOLD+ip_addr.decode("utf-8")+b_colors.ENDC diff --git a/modules/savefile.py b/modules/savefile.py index 82c295d2..95375a4b 100644 --- a/modules/savefile.py +++ b/modules/savefile.py @@ -1,21 +1,24 @@ import json import time -__all__ = ['saveJson'] -# open the file "TorBoT-Export" in write ("a") mode -def saveJson(datatype,data): - "function_docstring" - timestr = time.strftime("%Y%m%d-%H%M%S") - #Json File Creation - file = open("TorBoT-Export-"+datatype+timestr+".json", "a") - #Store data in Json format - output = {datatype : data} - #Dump output to file - json.dump(output, file, indent=2) - file.close() - print("\nData will be saved with a File Name :"+ "TorBoT-Export-"+datatype+timestr+".json") - return +def saveJson(datatype, data): + """ + Creates json file and stores json + Args: + datatype: the type of the object being passed + data = data that is being stored with object + """ + timestr = time.strftime("%Y%m%d-%H%M%S") + file_name = "TorBot-Export-"+datatype+timestr+".json" + # Json File Creation + with open(file_name, "w+") as f: + # Store data in Json format + output = {datatype: data} + # Dump output to file + json.dump(output, f, indent=2) + print("\nData will be saved with a File Name :", file_name) + return file_name diff --git a/modules/updater.py b/modules/updater.py index d3cdb214..2b662e8d 100644 --- a/modules/updater.py +++ b/modules/updater.py @@ -1,27 +1,48 @@ import subprocess -__all__ = ['updateTor'] def updateTor(): - print("Checking for latest stable release") - isGit = subprocess.Popen("git branch",stdout=subprocess.PIPE,shell=True) - (output,err)=isGit.communicate() - branch = output[2:8].decode("utf-8") - if branch == 'master': - update = subprocess.Popen(["git","pull","origin","master"],stdout=subprocess.PIPE,stderr=subprocess.STDOUT) - update_out = update.stdout.read() - if update_out[85:104].decode("utf-8") == 'Already up-to-date.' : - print("TorBot Already up-to-date.") - else: - print("TorBot has succesfully updated to latest stable version.") - else: - subprocess.Popen(["git","init"],stdout=subprocess.PIPE,stderr=subprocess.STDOUT) - subprocess.Popen(["git","remote","add","origin","https://github.com/DedSecInside/TorBoT.git"],stdout=subprocess.PIPE,stderr=subprocess.STDOUT) - update = subprocess.Popen(["git","pull","origin","master"],stdout=subprocess.PIPE,stderr=subprocess.STDOUT) - update_out = update.stdout.read() - if update_out[85:104].decode("utf-8") == 'Already up-to-date.' : - print("TorBot Already up-to-date.") - else: - print("TorBot has succesfully updated to latest stable version.") - return 0 - + + """ + Currently updates Tor by calling terminal commands using subprocess + Not a great method and will be replaced in the future. + + """ + + print("Checking for latest stable release") + isGit = subprocess.Popen( + ["git", "branch"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + output = isGit.stdout.read() + branch = output[2:8].decode("utf-8") + print(branch) + if branch == 'master': + update = subprocess.Popen( + ["git", "pull", "origin", "master"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + update_out = update.stdout.read() + if update_out[90:109].decode("utf-8") == 'Already up-to-date.': + print("TorBot is already up-to-date.") + else: + print("TorBot has succesfully updated to latest stable version.") + else: + subprocess.Popen( + ["git", "init"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + subprocess.Popen( + ["git", "remote", "add", "origin", + "https://github.com/DedSecInside/TorBoT.git"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + update = subprocess.Popen( + ["git", "pull", "origin", "dev"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + update_out = update.stdout.read() + if update_out[90:109].decode("utf-8") == 'Already up-to-date.': + print("TorBot is already up-to-date.") + else: + print("TorBot has succesfully updated to latest stable version.") diff --git a/requirements.txt b/requirements.txt index 56192eaf..a5882a6c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ beautifulsoup4==4.6.0 PySocks==1.6.7 -stem==1.5.4 +termcolor==1.1.0 +requests==2.18.4 +tldextract==2.2.0 \ No newline at end of file diff --git a/sample.py b/sample.py deleted file mode 100644 index 782c0739..00000000 --- a/sample.py +++ /dev/null @@ -1,15 +0,0 @@ -import sys -if __name__ == '__main__': - from stem.control import Controller - with Controller.from_port(port = 9051) as controller: - controller.authenticate() # controller.authenticate("yourpassphrase") - bytes_read = controller.get_info("traffic/read") - bytes_written = controller.get_info("traffic/written") - print("My Tor relay has read %s bytes and written %s." % (bytes_read, bytes_written)) - - if not controller: - sys.exit(1) # unable to get a connection - - print ("Connection is Working Properly") - controller.close() - diff --git a/tests/test_getemails.py b/tests/test_getemails.py index 49057cff..cb8a73d5 100644 --- a/tests/test_getemails.py +++ b/tests/test_getemails.py @@ -1,24 +1,20 @@ import sys import os -import unittest -from io import StringIO -sys.path.append(os.path.abspath('../modules')) -import getemails -from bcolors import Bcolors -import pagereader -soup = pagereader.readPage('http://www.whatsmyip.net/') +PACKAGE_PARENT = '..' +SCRIPT_DIR = os.path.dirname(os.path.realpath( + os.path.join(os.getcwd(), os.path.expanduser(__file__)))) -class getMailsTestCase(unittest.TestCase): - - def setUp(self): - self.held, sys.stdout = sys.stdout, StringIO() - - def test_print_emails(self): - data = "\n"+Bcolors.OKGREEN+"Mails Found - "+Bcolors.ENDC+"1\n-------------------------------\nadvertise@provaz.eu\n" - getemails.getMails(soup) - self.assertEqual(sys.stdout.getvalue(),data) - +sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) + +from modules import pagereader, getemails + + +def test_get_emails_successful(): + soup = pagereader.readPage('https://www.helloaddress.com/') + test_emails = ["hello@helloaddress.com"] + emails = getemails.getMails(soup) + assert emails == test_emails if __name__ == '__main__': - unittest.main() + test_get_emails_successful() \ No newline at end of file diff --git a/tests/test_getweblinks.py b/tests/test_getweblinks.py index 762f97b1..2e7d0cff 100644 --- a/tests/test_getweblinks.py +++ b/tests/test_getweblinks.py @@ -1,27 +1,25 @@ +#!/usr/bin/env python + import sys import os -import unittest -from io import StringIO -sys.path.append(os.path.abspath('../modules')) -import getweblinks -from bcolors import Bcolors -import pagereader +PACKAGE_PARENT = '..' +SCRIPT_DIR = os.path.dirname(os.path.realpath( + os.path.join(os.getcwd(), os.path.expanduser(__file__)))) + +sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) +from modules import getweblinks, pagereader + -soup = pagereader.readPage('http://www.whatsmyip.net/') +def test_get_links_successful(): + soup = pagereader.readPage('http://www.whatsmyip.net/') + data = ['http://aff.ironsocket.com/SH7L', + 'http://aff.ironsocket.com/SH7L', + 'http://wsrs.net/', + 'http://cmsgear.com/'] -class getLinksTestCase(unittest.TestCase): - - def setUp(self): - self.held, sys.stdout = sys.stdout, StringIO() - self.maxDiff=None - - def test_print_links(self): - #data = "\nWebsites Found - 7\n-------------------------------\nhttp://ads.wsrs.net/www/delivery/ck.php?n=MyIP856a6b4\nhttp://ads.wsrs.net/www/delivery/ck.php?n=MyIPbf5d683\nhttp://aff.ironsocket.com/SH7L\nhttp://aff.ironsocket.com/SH7L\nhttp://ads.wsrs.net/www/delivery/ck.php?n=MyIPdb5f512\nhttp://wsrs.net/\nhttp://cmsgear.com/\n" - data = "\n"+Bcolors.OKGREEN+"Websites Found - "+Bcolors.ENDC+"1\n-------------------------------\nhttp://cmsgear.com/\n" - ext = ['.com/'] - getweblinks.getLinks(soup,ext) - self.assertEqual(sys.stdout.getvalue(),data) + result = getweblinks.getLinks(soup, ext=['.com', '.net']) + assert result == data if __name__ == '__main__': - unittest.main() + test_get_links_successful() \ No newline at end of file diff --git a/tests/test_savetofile.py b/tests/test_savetofile.py index 42e232c6..2c6c369d 100644 --- a/tests/test_savetofile.py +++ b/tests/test_savetofile.py @@ -1,29 +1,40 @@ import sys import os -import unittest -from io import StringIO -sys.path.append(os.path.abspath('../modules')) -import getweblinks -from bcolors import Bcolors -import pagereader -import time - -soup = pagereader.readPage('http://www.whatsmyip.net/') -timestr = time.strftime("%Y%m%d-%H%M%S") - -class getLinksTestCase(unittest.TestCase): - - def setUp(self): - self.held, sys.stdout = sys.stdout, StringIO() - self.maxDiff=None - - def test_save_links(self): - data = "\n"+Bcolors.OKGREEN+"Websites Found - "+Bcolors.ENDC+"1\n-------------------------------\nhttp://cmsgear.com/\n\nData will be saved with a File Name :"+ "TorBoT-Export-Onion-Links"+timestr+".json\n" - ext = ['.com/'] - getweblinks.getLinks(soup,ext,0,1) - self.assertEqual(sys.stdout.getvalue(),data) +import json +PACKAGE_PARENT = '..' +SCRIPT_DIR = os.path.dirname(os.path.realpath( + os.path.join(os.getcwd(), os.path.expanduser(__file__)))) +sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT))) -if __name__ == '__main__': - unittest.main() +from modules import savefile + + +def test_save_links_successful(): + mock_data = ['http://aff.ironsocket.com/SH7L', + 'http://aff.ironsocket.com/SH7L', + 'http://wsrs.net/', + 'http://cmsgear.com/'] + try: + file_name = savefile.saveJson('Links', mock_data) + mock_output = {'Links': mock_data} + + with open('test_file.json', 'w+') as test_file: + json.dump(mock_output, test_file, indent=2) + + os.chdir(os.getcwd()) + assert os.path.isfile(file_name) is True + mock_file = open(file_name, 'r') + test_file = open('test_file.json', 'r') + mock_data = mock_file.read() + test_data = test_file.read() + + finally: + os.remove(file_name) + os.remove('test_file.json') + + assert mock_data == test_data + +if __name__ == '__main__': + test_save_links_successful() \ No newline at end of file diff --git a/torBot.py b/torBot.py index a28be0a8..39e4fe44 100644 --- a/torBot.py +++ b/torBot.py @@ -1,124 +1,189 @@ -#from modules.getemails import getMails -#from modules.getweblinks import getLinks -#from modules.pagereader import readPage -#from modules.bcolors import bcolors -#from modules.updater import updateTor - -from modules import * - +import argparse import socket import socks -import argparse +from modules import (bcolors, getemails, pagereader, getweblinks, updater, + info, savefile) + + +LOCALHOST = "127.0.0.1" +PORT = 9050 +# TorBot VERSION +__VERSION = "1.2" + + +def connect(address, port): + """ Establishes connection to port + + Assumes port is bound to localhost, if host that port is bound to changes + then change the port + + Args: + address: address for port to bound to + port: Establishes connect to this port + """ + socks.set_default_proxy(socks.PROXY_TYPE_SOCKS5, address, port) + socket.socket = socks.socksocket # Monkey Patch our socket to tor socket + + def getaddrinfo(*args): + """ + Overloads socket function for std socket library + Check socket.getaddrinfo() documentation to understand parameters. + Simple description below: + argument - explanation (actual value) + socket.AF_INET - the type of address the socket can speak to (IPV4) + sock.SOCK_STREAM - creates a stream connecton rather than packets + 6 - protocol being used is TCP + Last two arguments should be a tuple containing the address and port + """ + return [(socket.AF_INET, socket.SOCK_STREAM, 6, + '', (args[0], args[1]))] + socket.getaddrinfo = getaddrinfo -from stem import Signal -from stem.control import Controller - -with Controller.from_port(port = 9051) as controller: - controller.authenticate("16:3BEA46EB6C489B90608A65120BD7CF0C7BA709513AB8ACF212B9537183") - controller.signal(Signal.NEWNYM) -#TorBot VERSION -_VERSION_ = "1.0.1" -#TOR SETUP GLOBAL Vars -SOCKS_PORT = 9050 # TOR proxy port that is default from torrc, change to whatever torrc is configured to -socks.set_default_proxy(socks.SOCKS5, "127.0.0.1",SOCKS_PORT) -socket.socket = socks.socksocket -# Perform DNS resolution through the socket -def getaddrinfo(*args): - return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', (args[0], args[1]))] - -socket.getaddrinfo = getaddrinfo def header(): - """ Display the header of TorBot """ - print("#######################################################") - print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWMMMMMMMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWMMMMMMMMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNXNWWWWWMMMMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWWX0KXXKKXWMMMMMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNNKOkOOkOXWMMMMMMMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMMNX0kdodoxKWMMMMMMMMMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMW0doccloONMWWMMMMMMMMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMMMMMMMMMKl;;:cxKWMMMMMMMMMMMMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMMMMWKOXNx;,,cONMMMMMMMMMMMMMMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMMMMMXdxKk:',lONMMMM"+Bcolors.FAIL + " D3DSEC "+Bcolors.WHITE+"MMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMMMMMMOo0NOdxc,kMMMM"+Bcolors.FAIL + " INS1DE "+Bcolors.WHITE+"MMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMMMMMMOcONOxkx;dWMMMMMMMMMMMMMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMMMMNkcdXXOkxkd:oXMMMMMMMMMMMMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMNOoclONNX00OkOxc:lkXWMMMMMMMMMMMMMM") - print( "MMMMMMMMMMMMMMMMN0olld0NWNNX0O00kxkxl:;ckXMWWMMMMMMMMM") - print( "MMMMMMMMMMMWMMNxccd0NNNNNXNXOkOK0dodxdo:,;o0WMMMMMMMMM") - print( "MMMMMMMMMMMMNk:ckXNNWNXXXXNXOOOOK0oloooooc,'oKMMMMMMMM") - print( "MMMMMMMMMMMXc;xXNNNXKXXXNNWKOO0Ok0x:clllllc:.,OWMMMMMM") - print( "MMMMMMMMMMX:;0WNNX00XNNNNNNKOO0KkkOc,ccccccc:.'OWMMMMM") - print( "MMMMMMMMMNl,ONNN0OXNNNNNXXNKOkOK0xkl':c::::::;.;KMMMMM") - print( "MMMMMMMMM0,lNWXO0NNNNXKKXXNXO0Ok0Oxl',:;;;;;;;..dMMMMM") - print( "MMMMMMMMMk,xWNOONNNX00XNNNWKOO0OkOxc'';;,,,,,,'.cMMMMM") - print( "MMMMMMMMMx,xWKkKWNXOKNWNNNX0xxOKxxx:..,,,,,''''.cMMMMM") - print( "MMMMMMMMM0,oWXkOWXOKNNNNN00Xkdx0kdd;..,'''''''..oMMMMM") - print( "MMMMMMMMMNl;0W0kKKkKWNNN0ONNOxdOOll,..'''......,0MMMMM") - print( "MMMMMMMMMMK::KN0kKOkNNWXk0WX0kdxkc:............xWMMMMM") - print( "MMMMMMMMMMMKl:kX0k0kONWNOONX0koxd:,..........'kWMMMMMM") - print( "MMMMMMMMMMMMNxccxOkkxkKWKx0NOoooc'..........lKWMMMMMMM") - print( "MMMMMMMMMMMMMWNklccclldk0OxOdcc;. .......;oKWWMMMMMMMM") - print( "MMMMMMMMMMMMMMMMWXOdl:::;cc;'... ..',:lx0NMMMMMMMMMMMM") - print( "MMMMMMMMMMMMMMMMMMMMMNKOkxddolloodk0XWMMMMMMMMMMMMMMMM") - print(Bcolors.FAIL+Bcolors.BOLD) - print( " __ ____ ____ __ ______ ") - print( " / /_/ __ \/ __ \/ /_ ____/_ __/ ") - print( " / __/ / / / /_/ / __ \/ __ \/ / ") - print( " / /_/ /_/ / _, _/ /_/ / /_/ / / ") - print( " \__/\____/_/ |_/_.___/\____/_/ V"+_VERSION_) - print(Bcolors.FAIL+Bcolors.On_Black) - print("#######################################################") - print("# TorBot - A python Tor Crawler #") - print("# GitHub : https://github.com/DedsecInside/TorBot #") - print("#######################################################") - print(Bcolors.FAIL + "LICENSE: GNU Public License" + Bcolors.ENDC) - print() - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("-v","--version",action="store_true",help="Show current version of TorBot.") - parser.add_argument("--update",action="store_true",help="Update TorBot to the latest stable version") - parser.add_argument("-q","--quiet",action="store_true") - parser.add_argument("-u","--url",help="Specifiy a website link to crawl") - parser.add_argument("-s","--save",action="store_true", help="Save results in a file") - parser.add_argument("-m","--mail",action="store_true", help="Get e-mail addresses from the crawled sites") - parser.add_argument("-e","--extension",action='append',dest='extension',default=[],help="Specifiy additional website extensions to the list(.com or .org etc)") - parser.add_argument("-l","--live",action="store_true",help="Check if websites are live or not (slow)") - args = parser.parse_args() - if args.version : - print("TorBot Version:"+_VERSION_) - exit() - if args.update: - updateTor() - exit() - - if args.quiet == 0: - header() - print ("Tor Ip Address :") - link = args.url - ext = 0 - live = 0 - save=0 - live = args.live - ext = args.extension - save = args.save - a = readPage("https://check.torproject.org/",1) - if link: - b = readPage(link) - else: - b = readPage("http://torlinkbgs6aabns.onion/") - if args.mail: - getMails(b,save) - getLinks(b,ext,live,save) - print ("\n\n") - return 0 + """ + Prints out header ASCII art + """ + + b_color = bcolors.Bcolors() + D3DSEC = b_color.FAIL + " D3DSEC " + b_color.WHITE + INS1DE = b_color.FAIL + " INS1DE " + b_color.WHITE + + header = r""" + {WHITE} + ###################################################### + MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWMMMMMMMMMMMMM + MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWMMMMMMMMMMMMMM + MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNXNWWWWWMMMMMMMMMM + MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWWX0KXXKKXWMMMMMMMMMMM + MMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNNKOkOOkOXWMMMMMMMMMMMMM + MMMMMMMMMMMMMMMMMMMMMMMMMMMMNX0kdodoxKWMMMMMMMMMMMMMMM + MMMMMMMMMMMMMMMMMMMMMMMMMMMW0doccloONMWWMMMMMMMMMMMMMM + MMMMMMMMMMMMMMMMMMMMMMMMMMMKl;;:cxKWMMMMMMMMMMMMMMMMMM + MMMMMMMMMMMMMMMMMMMMMMWKOXNx;,,cONMMMMMMMMMMMMMMMMMMMM + MMMMMMMMMMMMMMMMMMMMMMMXdxKk:',lONMMMM{D3DSEC}MMMMMMMM + MMMMMMMMMMMMMMMMMMMMMMMMOo0NOdxc,kMMMM{INS1DE}MMMMMMMM + MMMMMMMMMMMMMMMMMMMMMMMMOcONOxkx;dWMMMMMMMMMMMMMMMMMMM + MMMMMMMMMMMMMMMMMMMMMMNkcdXXOkxkd:oXMMMMMMMMMMMMMMMMMM + MMMMMMMMMMMMMMMMMMMNOoclONNX00OkOxc:lkXWMMMMMMMMMMMMMM + MMMMMMMMMMMMMMMMN0olld0NWNNX0O00kxkxl:;ckXMWWMMMMMMMMM + MMMMMMMMMMMWMMNxccd0NNNNNXNXOkOK0dodxdo:,;o0WMMMMMMMMM + MMMMMMMMMMMMNk:ckXNNWNXXXXNXOOOOK0oloooooc,'oKMMMMMMMM + MMMMMMMMMMMXc;xXNNNXKXXXNNWKOO0Ok0x:clllllc:.,OWMMMMMM + MMMMMMMMMMX:;0WNNX00XNNNNNNKOO0KkkOc,ccccccc:.'OWMMMMM + MMMMMMMMMNl,ONNN0OXNNNNNXXNKOkOK0xkl':c::::::;.;KMMMMM + MMMMMMMMM0,lNWXO0NNNNXKKXXNXO0Ok0Oxl',:;;;;;;;..dMMMMM + MMMMMMMMMk,xWNOONNNX00XNNNWKOO0OkOxc'';;,,,,,,'.cMMMMM + MMMMMMMMMx,xWKkKWNXOKNWNNNX0xxOKxxx:..,,,,,''''.cMMMMM + MMMMMMMMM0,oWXkOWXOKNNNNN00Xkdx0kdd;..,'''''''..oMMMMM + MMMMMMMMMNl;0W0kKKkKWNNN0ONNOxdOOll,..'''......,0MMMMM + MMMMMMMMMMK::KN0kKOkNNWXk0WX0kdxkc:............xWMMMMM + MMMMMMMMMMMKl:kX0k0kONWNOONX0koxd:,..........'kWMMMMMM + MMMMMMMMMMMMNxccxOkkxkKWKx0NOoooc'..........lKWMMMMMMM + MMMMMMMMMMMMMWNklccclldk0OxOdcc;. .......;oKWWMMMMMMMM + MMMMMMMMMMMMMMMMWXOdl:::;cc;'... ..',:lx0NMMMMMMMMMMMM + MMMMMMMMMMMMMMMMMMMMMNKOkxddolloodk0XWMMMMMMMMMMMMMMMM + {FAIL} + {BOLD} + __ ____ ____ __ ______ + / /_/ __ \/ __ \/ /_ ____/_ __/ + / __/ / / / /_/ / __ \/ __ \/ / + / /_/ /_/ / _, _/ /_/ / /_/ / / + \__/\____/_/ |_/_____/\____/_/ V{VERSION} + {FAIL} + {On_Black} + ####################################################### + # TorBot - A python Tor Crawler # + # GitHub : https://github.com/DedsecInside/TorBot # + # Help : use -h for help text # + ####################################################### + {FAIL} + "LICENSE: GNU Public License" + {END}""".format( + D3DSEC=D3DSEC, INS1DE=INS1DE, FAIL=b_color.FAIL, + BOLD=b_color.BOLD, VERSION=__VERSION, END=b_color.ENDC, + On_Black=b_color.On_Black, WHITE=b_color.WHITE + ) + print(header) + + +def main(conn=False): + + if conn: + connect(LOCALHOST, PORT) + + parser = argparse.ArgumentParser() + parser.add_argument("-v", "--version", + action="store_true", + help="Show current version of TorBot.") + parser.add_argument("--update", + action="store_true", + help="Update TorBot to the latest stable version") + parser.add_argument("-q", "--quiet", + action="store_true") + parser.add_argument("-u", "--url", + help="Specifiy a website link to crawl") + parser.add_argument("-s", "--save", + action="store_true", + help="Save results in a file") + parser.add_argument("-m", "--mail", + action="store_true", + help="Get e-mail addresses from the crawled sites") + parser.add_argument("-e", "--extension", + action='append', + dest='extension', + default=[], + help=' '.join(("Specifiy additional website extensions", + "to the list(.com , .org etc)"))) + parser.add_argument("-l", "--live", + action="store_true", + help="Check if websites are live or not (slow)") + parser.add_argument("-i", "--info", + action="store_true", + help=' '.join(("Info displays basic info of the", + "scanned site, (very slow)"))) + args = parser.parse_args() + + link = args.url + + # If flag is -v, --update, -q/--quiet then user only runs that operation + # because these are single flags only + if args.version: + print("TorBot Version:" + __VERSION) + exit() + if args.update: + updater.updateTor() + exit() + if not args.quiet: + header() + # If url flag is set then check for accompanying flag set. Only one + # additional flag can be set with -u/--url flag + if args.url: + print("Tor IP Address :", pagereader.get_ip()) + html_content = pagereader.readPage(link, args.extension) + # -m/--mail + if args.mail: + emails = getemails.getMails(html_content) + print(emails) + if args.save: + savefile.saveJson('Emails', emails) + # -i/--info + elif args.info: + info.executeAll(link) + if args.save: + print('Nothing to save.\n') + else: + links = getweblinks.getLinks(soup=html_content, + live=args.live, + ext=args.extension) + if args.save: + savefile.saveJson("Links", links) + else: + print("usage: torBot.py [-h] [-v] [--update] [-q] [-u URL] [-s] [-m] [-e EXTENSION] [-l] [-i]") + + print("\n\n") + if __name__ == '__main__': - try: - main() - except KeyboardInterrupt: - print("Interrupt received! Exiting cleanly...") + try: + main(conn=True) + + except KeyboardInterrupt: + print("Interrupt received! Exiting cleanly...")