forked from bitcrackcyber/rurasort
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrurasort.py
379 lines (325 loc) · 14 KB
/
rurasort.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
#!/usr/bin/python
"""
rurasort.py 1.8
author: Dimitri Fousekis (@rurapenthe0)
Licensed under the GNU General Public License Version 2 (GNU GPL v2),
available at: http://www.gnu.org/licenses/gpl-2.0.txt
(C) 2015 Dimitri Fousekis (@rurapenthe0)
TODO:
Please send a tweet to @rurapenthe0 with any suggestions or comments.
THANKS:
Assistance with certain operations or input provided by: @m3g4tr0n, g0tmi1k, and atom (@hashcat)
To use this script, simply call it from the command-line with your relevant option.
NOTE : *** IT IS RECOMMENDED YOU RUN ONLY ONE ENGINE/PARAM AT A TIME, ENSURE OUTPUT IS CORRECT THEN USE THE NEXT ONE. **
"""
#import our required libraries
import sys, io, argparse,re,unicodedata
from bs4 import BeautifulSoup
filelist=[]
#command-line parameters
cmdparams = argparse.ArgumentParser(description="RuraSort - Wordlist management tool by RuraPenthe. Output goes to stdout.")
cmdparams.add_argument("--maxlen", help="filter out words over a certain max length",dest="maxlen", type=int)
cmdparams.add_argument("--maxtrim",help="trim words over a certain max length", dest="maxtrim", type=int)
cmdparams.add_argument("--digit-trim", help="trim all digits from beginning and end of words", dest="digit_trim", action="store_true")
cmdparams.add_argument("--special-trim", help="trim all special characters from beginning and end of words", dest="special_trim", action="store_true")
cmdparams.add_argument("--dup-remove", help="remove duplicate words within words eg: hellohello -> hello", dest="dup_remove", action="store_true")
cmdparams.add_argument("--no-sentence", help="de-sentenceify the line by removing allspacesbetweenwords", dest="no_sentence", action="store_true")
cmdparams.add_argument("--lower", help="change word to all lower case", dest="lower", action="store_true")
cmdparams.add_argument("--infile", help="specify the wordlist to be used", dest="infile")
cmdparams.add_argument("--wordify", help="convert all input sentences into separate words", dest="wordify", action="store_true")
cmdparams.add_argument("--no-numbers", help="ignore/delete words that are all numeric", dest="no_numbers", action="store_true")
cmdparams.add_argument("--minlen", help="filter out words below a certain min length",dest="minlen", type=int)
cmdparams.add_argument("--detab", help="remove tabs or space from beginning of words",dest="detab", action="store_true")
cmdparams.add_argument("--dup-sense", help="Senses if more than <specified>%% of a word is duplicate chars and removes the word", dest="sense",type=int)
cmdparams.add_argument("--hash-remove", help="Filters word candidates that are actually hashes", dest="hashfilter", action="store_true")
cmdparams.add_argument("--email-sort",help="Converts email addresses to username and domain as separate words", dest="emailsort", action="store_true")
cmdparams.add_argument("--email-split",nargs=2, help="Extracts email addresses to username and domain and appends to <user wordlist> and <domain wordlist>",dest="emailsplit",metavar='<user/domain.txt>')
cmdparams.add_argument("--dewebify",action="store_true",help="Extracts words from an HTML specified by --infile and outputs a plain wordlist.", dest="dewebify")
cmdparams.add_argument("--noutf8",help="Only output non UTF-8 characters. Works with --dewebify only",dest="noutf8",action="store_true")
args = cmdparams.parse_args()
# Hash Detection Engine. These engines are from hashfind.py, and detect various hash types
# this engine checks the string candidate for a match to valid hash types and filters it out.
def Hashfilter(inputstring):
#Init values
hash_present = False
hash_done = False
# We want the loop to end immediately if we find a match, otherwise its wasting resources.
while (not hash_present) or (not hash_done):
# MD5 Check (includes NTLM, MD4, MD2)
results = re.search(r'(^|[^a-fA-F0-9])[a-fA-F0-9]{32}([^a-fA-F0-9]|\$)',inputstring,re.M|re.I)
if results: hash_present = True
# mySQL (old hashes)
results = re.search(r'[0-7][0-9a-f]{7}[0-7][0-9a-f]{7}',inputstring,re.M|re.I)
if results: hash_present = True
# Joomla Check
results = re.search(r'([0-9a-zA-Z]{32}):(\w{32})',inputstring)
if results: hash_present = True
# vBulletin Check
results = re.search(r'([0-9a-zA-Z]{32}):(\S{3,32})',inputstring,re.M|re.I)
if results: hash_present = True
# PHBB3 Check
results = re.search(r'\$H\$\S{31}',inputstring,re.M|re.I)
if results: hash_present = True
# Wordpress MD5
results = re.search('\$P\$\S{31}',inputstring,re.M)
if results: hash_present = True
# Drupal Check
results = re.search(r'\$S\$\S{52}',inputstring,re.M|re.I)
if results: hash_present = True
# Unix MD5 old Check
results = re.search(r'\$1\$\w{8}\S{22}',inputstring,re.M|re.I)
if results: hash_present = True
# SHA512Crypt Check
results = re.search(r'\$6\$\w{8}\S{86}',inputstring,re.M|re.I)
if results: hash_present = True
# SHA1 Check
results = re.search(r'(^|[^a-fA-F0-9])[a-fA-F0-9]{40}([^a-fA-F0-9]|$)',inputstring,re.M|re.I)
if results: hash_present = True
# SHA512 check
results = re.search(r'(^|[^a-fA-F0-9])[a-fA-F0-9]{128}([^a-fA-F0-9]|$)',inputstring,re.M|re.I)
if results: hash_present = True
# SHA256 Check
results = re.search(r'(^|[^a-fA-F0-9])[a-fA-F0-9]{64}([^a-fA-F0-9]|$)',inputstring,re.M|re.I)
if results: hash_present = True
# SHA384 Check
results = re.search(r'(^|[^a-fA-F0-9])[a-fA-F0-9]{96}([^a-fA-F0-9]|$)',inputstring,re.M|re.I)
if results: hash_present = True
# Blowfish Check
results = re.search(r'\$2a\$10\$\S{53}',inputstring,re.M|re.I)
if results: hash_present = True
# MD5 APR Check
results = re.search(r'\$apr1\$\w{8}\S{22}',inputstring,re.M|re.I)
if results: hash_present = True
# Sun MD5 Check
results = re.search(r'\$md5\$rounds\=904\$\w{16}\S{23}',inputstring,re.M|re.I)
if results: hash_present = True
# SHA256Unix Check
results = re.search(r'\$5\$\w{8}\$\S{43}',inputstring,re.M|re.I)
if results: hash_present = True
# AIX SHA256 Check
results = re.search(r'\{ssha256\}06\$\S{16}\$\S{43}',inputstring,re.M|re.I)
if results: hash_present = True
# AIX SHA512 Check
results = re.search(r'\{ssha512\}06\$\S{16}\$\S{86}',inputstring,re.M|re.I)
if results: hash_present = True
# AIX SHA1 Check
results = re.search(r'\{ssha1\}06\$\S{16}\$\S{27}',inputstring,re.M|re.I)
if results: hash_present = True
# OSX Hash Check
results = re.search(r'\$ml\$\w{5}\$\w{64}\$\w{128}',inputstring,re.M|re.I)
if results: hash_present = True
# HMAC SHA1 Check
results = re.search(r'([0-9a-fA-F]{130}):(\w{40})',inputstring,re.M|re.I)
if results: hash_present = True
# Cisco 8 Check
results = re.search(r'\$8\$\S{14}\$\S{43}',inputstring,re.M|re.I)
if results: hash_present = True
# Cisco 9 Check
results = re.search(r'\$9\$\S{14}\$\S{43}',inputstring,re.M|re.I)
if results: hash_present = True
# DJango SHA256 Check
results = re.search(r'pbkdf2_sha256\$20000\$\S{57}',inputstring,re.M|re.I)
if results: hash_present = True
# DJango SHA1 Check
results = re.search(r'sha1\$\w{5}\$\w{40}',inputstring,re.M|re.I)
if results: hash_present = True
# MSSQL 2005 Check
results = re.search(r'(0x\w{52})',inputstring,re.M|re.I)
if results: hash_present = True
# MSSQL 2000 Check
results = re.search(r'(0x\w{92})',inputstring,re.M|re.I)
if results: hash_present = True
# IP Address Check
results = re.search(r'([0-9]{1,3}[\.]){3}[0-9]{1,3}',inputstring)
if results: hash_present = True
hash_done = True
if hash_present: return ""
else: return inputstring
# Filter Engines. The Engines process a particular string, depending on whether the command-line requested such or not.
#Email filter engine
def Emailsort(inputstring):
resultset = []
results = re.search('[a-zA-Z0-9.#?$*_-]+@[a-zA-Z0-9.#?$*_-]+',inputstring,re.M|re.I)
if results:
find_positional = inputstring.find('@')
resultset.append(inputstring[0:find_positional])
resultset.append(inputstring[find_positional+1:len(inputstring)])
return resultset
else:
resultset.append(inputstring)
return resultset
#Email split engine
def Emailsplit(inputstring):
global user_file_handler
global domain_file_handler
resultset = []
results = re.search('[a-zA-Z0-9.#?$*_-]+@[a-zA-Z0-9.#?$*_-]+',inputstring,re.M|re.I)
if results:
find_positional = inputstring.find('@')
user_file_handler.write(inputstring[0:find_positional]+'\r\n')
domain_file_handler.write(inputstring[find_positional+1:len(inputstring)].rstrip()+'\r\n')
return ""
else: return inputstring
# De-Webify Engine
def Dewebify(filename):
try:
filehandler = open('rurasort.jnk','r')
junklist2 = filehandler.read().rstrip().split()
except:
sys.stderr.write('[-] Error! I cannot open the junkfile - is it there? \r\n')
sys.exit(-1)
filehandler = open(filename,'r')
htmls = filehandler.read()
response = ''.join(BeautifulSoup(htmls,"html.parser").findAll(text=True))
sys.stderr.write('[+] Parsing complete next phase: Removing whitespace...\r\n')
wordlist = []
templist = []
wordlist = response.lower().split()
for items in wordlist:
templist.append(items.lstrip().rstrip())
sys.stderr.write('[+] Whitespace removal complete. Next phase: Ignoring > Length 20...\r\n')
wordlist = []
wordlist = templist
templist= []
for items in wordlist:
if len(items) < 21: templist.append(items)
sys.stderr.write('[+] Length check complete. Removing duplicates...\r\n')
wordlist = []
wordlist = templist
templist = []
for items in wordlist:
if items not in templist: templist.append(items)
sys.stderr.write('[+] Duplicates removed. Removing special chars...\r\n')
wordlist = []
wordlist = templist
templist = []
for items in wordlist:
templist.append(Special_Trim(items))
sys.stderr.write('[+] Special chars removed. Removing known junk from junkfile...\r\n')
wordlist = []
wordlist = templist
templist = []
for items in wordlist:
canaccept = True
for checkitem in junklist2:
if checkitem in items.lower(): canaccept=False
if canaccept: templist.append(items)
sys.stderr.write('[+] Junk removed. Ignoring < Length 4 ...\r\n')
wordlist = []
wordlist = templist
templist = []
for items in wordlist:
if len(items.rstrip()) > 3: templist.append(items)
sys.stderr.write('[+] All done, printing output...\r\n')
for items in templist:
if args.noutf8:
if (unicodedata.category(items[1]) != 'Ll') and (unicodedata.category(items[1]) != 'Nd'): print items.encode('utf-8')
else: print items.encode('utf-8')
sys.stderr.write('[+] All done.\r\n')
#print response
#Duplicate sense engine
def Sense(inputstring):
if args.sense < 50:
print "[-] Warning, 50% or less duplicate sensing may produce unsatisfactory results. Edit the code if you want to override"
sys.exit(1)
keepstring = True
sense_dict=[]
total_chars = len(inputstring)
sense_dict = list(inputstring)
for candidate in sense_dict:
char_count = inputstring.count(candidate)
average = (char_count / float(total_chars)) * 100
if average >= args.sense:
keepstring = False
break
if keepstring: return inputstring
#De-Tab Engine
def Detab(inputstring):
return inputstring.lstrip()
#Maxlen Engine
def Maxtrim(inputstring):
return inputstring[:args.maxtrim]
def Maxlen(inputstring):
if len(inputstring) > args.maxlen: return ''
else: return inputstring
#Minlen Engine
def Minlen(inputstring):
if len(inputstring) > args.minlen: return inputstring
#Digit Trim Engine
def Digit_Trim(inputstring):
#left-side process
newstring = inputstring.lstrip('1,2,3,4,5,6,7,8,9,0')
#right-side process
newstring = newstring.rstrip('1,2,3,4,5,6,7,8,9,0')
return newstring
#Special Trim Engine
def Special_Trim(inputstring):
newstring = inputstring.lstrip("!\"#$%&'()*+,-./:;?@[\]^_`{|}~")
return newstring.rstrip("!\"#$%&'()*+,-./:;?@[\]^_`{|}~")
#Duplicate Remover Engine
def Dup_Remove(inputstring):
holding = inputstring[:len(inputstring)/2]
if inputstring.count(holding) > 1: inputstring=inputstring.replace(holding,'')+holding
return inputstring
#No-Sentence Engine
def DeSentenceify(inputstring):
return inputstring.replace(' ','')
#Lower-case Engine
def Lower(inputstring):
return inputstring.lower()
#Date-Remover Engine
#TBA
#Wordify Engine
def Wordify(inputstring):
return inputstring.split()
#Binary Remove Engine
def Bin_Remove(inputstring):
print ""
#No Numbers Engine
def No_Numbers(inputstring):
#First remove any spaces, we dont want them.
inputstring.replace(' ','')
if inputstring.isdigit(): return ""
else: return inputstring
def main():
#Main Starts Here
#Lets get our file open and begin processing
if args.dewebify:
Dewebify(args.infile)
sys.exit(0)
if not args.infile:
print "[-] I need an input file with --infile! or try --help for help."
sys.exit()
try:
for words in open(args.infile):
words=words.rstrip('\n')
if args.maxlen: words = Maxlen(words)
if args.minlen: words = Minlen(words)
if args.digit_trim: words = Digit_Trim(words)
if args.special_trim: words = Special_Trim(words)
if args.dup_remove: words = Dup_Remove(words)
if args.no_sentence: words = DeSentenceify(words)
if args.lower: words = Lower(words)
if args.no_numbers: word = No_Numbers(words)
if args.detab: words = Detab(words)
if args.maxtrim: words = Maxtrim(words)
if args.sense: words = Sense(words)
if args.hashfilter: words = Hashfilter(words)
if args.emailsplit: words = Emailsplit(words)
if args.wordify:
for items in Wordify(words): print items
if args.emailsort:
for items in Emailsort(words): print items
try:
if len(words) > 0:
if (not args.wordify) and (not args.emailsort):
print words
except: continue
except IOError, error: print error.args[1]+" : "+args.infile
try:
if args.emailsplit:
user_file_handler = open(args.emailsplit[0],'a')
domain_file_handler = open(args.emailsplit[1],'a')
except IOError, error: print"[-] Problem during Email Split file handling:" +error.args[1]
if __name__ == "__main__":
main()