13
13
print ("[!]Error: You have to install BeautifulSoup module." )
14
14
exit ()
15
15
16
+ import os
16
17
import re
17
18
import math
18
19
import sys
@@ -61,8 +62,7 @@ class GitPrey(object):
61
62
def __init__ (self , keyword ):
62
63
self .keyword = keyword
63
64
self .search_url = "https://github.com/search?o=desc&p={page}&q={keyword}&ref=searchresults&s=indexed&type=Code&utf8=%E2%9C%93"
64
- self .headers = {
65
- 'User-Agent' : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36" }
65
+ self .headers = {'User-Agent' : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36" }
66
66
self .cookies = ""
67
67
68
68
def search_project (self ):
@@ -72,7 +72,7 @@ def search_project(self):
72
72
"""
73
73
unique_project_list = []
74
74
self .__auto_login (USER_NAME , PASSWORD )
75
- info_print ('[*] Searching projects hard...' )
75
+ info_print ('[*] Searching hard for projects ...' )
76
76
77
77
# Get unique project list of first page searched results
78
78
total_progress = SCAN_DEEP [SEARCH_LEVEL - 1 ]
@@ -105,16 +105,16 @@ def __page_project_list(page_html):
105
105
:returns: Project list of per page
106
106
"""
107
107
cur_par_html = BeautifulSoup (page_html , "lxml" )
108
- project_info = cur_par_html .select ("a.text-bold " )
109
- page_project = [project .text for project in project_info ]
108
+ project_info = cur_par_html .select ("a.link-gray " )
109
+ page_project = [project .text . strip () for project in project_info ]
110
110
return page_project
111
111
112
112
def sensitive_info_query (self , project_string , mode ):
113
113
"""
114
114
Search sensitive information and sensitive file from projects
115
115
:param project_string: Key words string for querying
116
116
:param mode: Searching mode within "content" or "filename"
117
- :returns: None
117
+ :returns: Code segments or file lists
118
118
"""
119
119
if mode == "content" :
120
120
# Output code line with sensitive key words like username.
@@ -132,16 +132,17 @@ def sensitive_info_query(self, project_string, mode):
132
132
if mode == "filename" :
133
133
# Search project according to file path.
134
134
path_sig_list = self .__pattern_db_list (PATH_DB )
135
- path_string = " filename:" + " filename:" .join (path_sig_list ) + project_string
135
+ path_string = "filename:" + " filename:" .join (path_sig_list ) + project_string
136
136
repo_file_dic = self .__file_name_inspect (path_string , print_mode = 1 )
137
137
return repo_file_dic
138
138
139
139
def __file_content_inspect (self , project_string , file_pattern , project_pattern ):
140
140
"""
141
141
Check sensitive code in particular project
142
- :param content_query_string: Content string for searching
143
- :param info_sig_match: information signature match regular
144
- :returns: None
142
+ :param project_string: Projects for searching
143
+ :param file_pattern: File string for searching
144
+ :param project_pattern: Content signature match regular
145
+ :returns: Code segments
145
146
"""
146
147
query_string = " OR " .join (project_pattern )
147
148
repo_file_dic = self .__file_name_inspect (query_string + project_string + file_pattern )
@@ -169,15 +170,16 @@ def __file_name_inspect(self, file_query_string, print_mode=0):
169
170
"""
170
171
Inspect sensitive file in particular project
171
172
:param file_query_string: File string for searching
172
- :returns: None
173
+ :param print_mode: 1 means print file, 0 means print code
174
+ :returns: Files lists
173
175
"""
174
176
page_num = 1
175
177
repo_file_dic = {}
176
178
while page_num <= SCAN_DEEP [SEARCH_LEVEL - 1 ]:
177
179
check_url = self .search_url .format (page = page_num , keyword = file_query_string )
178
180
page_html = self .__get_page_html (check_url )
179
181
project_html = BeautifulSoup (page_html , 'lxml' )
180
- repo_list = project_html .select ('div .min-width-0 > a:nth-of-type(2) ' )
182
+ repo_list = project_html .select ('a[data-hydro-click-hmac] ' )
181
183
if not repo_list :
182
184
break
183
185
# Handle file links for each project
@@ -205,7 +207,7 @@ def __pattern_db_list(file_path):
205
207
:returns: Signature item list
206
208
"""
207
209
item_list = []
208
- with open (file_path , 'r' ) as pattern_file :
210
+ with open (os . path . join ( os . path . dirname ( __file__ ), file_path ) , 'r' ) as pattern_file :
209
211
item_line = pattern_file .readline ()
210
212
while item_line :
211
213
item_list .append (item_line .strip ())
@@ -239,7 +241,7 @@ def __auto_login(self, username, password):
239
241
for item in input_items :
240
242
post_data [item .get ('name' )] = item .get ('value' )
241
243
post_data ['login' ], post_data ['password' ] = username , password
242
- login_request .post ("https://github.com/session" , data = post_data , headers = self .headers )
244
+ login_request .post ("https://github.com/session" , data = post_data , cookies = login_html . cookies , headers = self .headers )
243
245
self .cookies = login_request .cookies
244
246
if self .cookies ['logged_in' ] == 'no' :
245
247
error_print ('[!] Error: Login Github failed, please check account in config file.' )
@@ -280,18 +282,16 @@ def is_keyword_valid(keyword):
280
282
def init ():
281
283
"""
282
284
Initialize GitPrey with module inspection and input inspection
283
- :return: None
285
+ :return: Key words
284
286
"""
285
287
if not importlib .util .find_spec ('lxml' ):
286
- error_print ('[!]Error: You have to install lxml module.' )
288
+ error_print ('[!] Error: You have to install lxml module.' )
287
289
exit ()
288
290
289
291
# Get command parameters for searching level and key words
290
292
parser = argparse .ArgumentParser (description = "Searching sensitive file and content in GitHub." )
291
- parser .add_argument ("-l" , "--level" , type = int , choices = range (1 , 6 ), default = 1 , metavar = "level" ,
292
- help = "Set search level within 1~5, default is 1." )
293
- parser .add_argument ("-k" , "--keywords" , metavar = "keywords" , required = True ,
294
- help = "Set key words to search projects." )
293
+ parser .add_argument ("-l" , "--level" , type = int , choices = range (1 , 6 ), default = 1 , metavar = "level" , help = "Set search level within 1~5, default is 1." )
294
+ parser .add_argument ("-k" , "--keywords" , metavar = "keywords" , required = True , help = "Set key words to search projects." )
295
295
args = parser .parse_args ()
296
296
297
297
SEARCH_LEVEL = args .level if args .level else 1
@@ -309,7 +309,6 @@ def init():
309
309
310
310
return key_words
311
311
312
-
313
312
def project_miner (key_words ):
314
313
"""
315
314
Search projects for content and path inspection later.
@@ -323,6 +322,9 @@ def project_miner(key_words):
323
322
project_info_output = "\n [*] Found {num} public projects related to the key words.\n "
324
323
info_print (project_info_output .format (num = len (total_project_list )))
325
324
325
+ if (len (total_project_list ) == 0 ):
326
+ exit (0 )
327
+
326
328
# Join all projects to together to search
327
329
repo_string = " repo:" + " repo:" .join (total_project_list )
328
330
0 commit comments