2
2
import os
3
3
import re
4
4
import json
5
+ import logging
6
+ logging .basicConfig (level = logging .DEBUG )
5
7
6
8
from pathlib import Path
7
9
from mdutils import MdUtils
@@ -16,7 +18,7 @@ class Messages:
16
18
def map_severity_to_sarif (severity : str ) -> str :
17
19
"""
18
20
Map Socket severity levels to SARIF levels (GitHub code scanning).
19
-
21
+
20
22
'low' -> 'note'
21
23
'medium' or 'middle' -> 'warning'
22
24
'high' or 'critical' -> 'error'
@@ -39,115 +41,89 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str)
39
41
Supports:
40
42
1) JSON-based manifest files (package-lock.json, Pipfile.lock, composer.lock)
41
43
- Locates a dictionary entry with the matching package & version
42
- - Does a rough line-based search to find the actual line in the raw text
43
- 2) Text-based (requirements.txt, package.json, yarn.lock, etc.)
44
- - Uses compiled regex patterns to detect a match line by line
44
+ - Searches the raw text for the key
45
+ 2) Text-based (requirements.txt, package.json, yarn.lock, pnpm-lock.yaml, etc.)
46
+ - Uses regex patterns to detect a match line by line
45
47
"""
46
- # Extract just the file name to detect manifest type
47
48
file_type = Path (manifest_file ).name
49
+ logging .debug ("Processing file for line lookup: %s" , manifest_file )
48
50
49
- # ----------------------------------------------------
50
- # 1) JSON-based manifest files
51
- # ----------------------------------------------------
52
51
if file_type in ["package-lock.json" , "Pipfile.lock" , "composer.lock" ]:
53
52
try :
54
- # Read entire file so we can parse JSON and also do raw line checks
55
53
with open (manifest_file , "r" , encoding = "utf-8" ) as f :
56
54
raw_text = f .read ()
57
-
58
- # Attempt JSON parse
55
+ logging .debug ("Read %d characters from %s" , len (raw_text ), manifest_file )
59
56
data = json .loads (raw_text )
60
-
61
- # In practice, you may need to check data["dependencies"], data["default"], etc.
62
- # This is an example approach.
63
57
packages_dict = (
64
58
data .get ("packages" )
65
59
or data .get ("default" )
66
60
or data .get ("dependencies" )
67
61
or {}
68
62
)
69
-
63
+ logging . debug ( "Found package keys in %s: %s" , manifest_file , list ( packages_dict . keys ()))
70
64
found_key = None
71
65
found_info = None
72
- # Locate a dictionary entry whose 'version' matches
73
66
for key , value in packages_dict .items ():
74
- # For NPM package-lock, keys might look like "node_modules/axios"
75
67
if key .endswith (packagename ) and "version" in value :
76
68
if value ["version" ] == packageversion :
77
69
found_key = key
78
70
found_info = value
79
71
break
80
-
81
72
if found_key and found_info :
82
- # Search lines to approximate the correct line number
83
- needle_key = f'"{ found_key } ":' # e.g. "node_modules/axios":
84
- needle_version = f'"version": "{ packageversion } "'
73
+ needle_key = f'"{ found_key } ":'
85
74
lines = raw_text .splitlines ()
86
- best_line = 1
87
- snippet = None
88
-
75
+ logging .debug ("Total lines in %s: %d" , manifest_file , len (lines ))
89
76
for i , line in enumerate (lines , start = 1 ):
90
- if (needle_key in line ) or (needle_version in line ):
91
- best_line = i
92
- snippet = line .strip ()
93
- break # On first match, stop
94
-
95
- # If we found an approximate line, return it; else fallback to line 1
96
- if best_line > 0 and snippet :
97
- return best_line , snippet
98
- else :
99
- return 1 , f'"{ found_key } ": { found_info } '
77
+ if needle_key in line :
78
+ logging .debug ("Found match at line %d in %s: %s" , i , manifest_file , line .strip ())
79
+ return i , line .strip ()
80
+ return 1 , f'"{ found_key } ": { found_info } '
100
81
else :
101
82
return 1 , f"{ packagename } { packageversion } (not found in { manifest_file } )"
102
-
103
- except ( FileNotFoundError , json . JSONDecodeError ):
83
+ except ( FileNotFoundError , json . JSONDecodeError ) as e :
84
+ logging . error ( "Error reading %s: %s" , manifest_file , e )
104
85
return 1 , f"Error reading { manifest_file } "
105
86
106
- # ----------------------------------------------------
107
- # 2) Text-based / line-based manifests
108
- # ----------------------------------------------------
109
- # Define a dictionary of patterns for common manifest types
110
- search_patterns = {
111
- "package.json" : rf'"{ packagename } ":\s*"{ packageversion } "' ,
112
- "yarn.lock" : rf'{ packagename } @{ packageversion } ' ,
113
- "pnpm-lock.yaml" : rf'"{ re .escape (packagename )} "\s*:\s*\{{[^}}]*"version":\s*"{ re .escape (packageversion )} "' ,
114
- "requirements.txt" : rf'^{ re .escape (packagename )} \s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{ re .escape (packageversion )} (?:\s*;.*)?$' ,
115
- "pyproject.toml" : rf'{ packagename } \s*=\s*"{ packageversion } "' ,
116
- "Pipfile" : rf'"{ packagename } "\s*=\s*"{ packageversion } "' ,
117
- "go.mod" : rf'require\s+{ re .escape (packagename )} \s+{ re .escape (packageversion )} ' ,
118
- "go.sum" : rf'{ re .escape (packagename )} \s+{ re .escape (packageversion )} ' ,
119
- "pom.xml" : rf'<artifactId>{ re .escape (packagename )} </artifactId>\s*<version>{ re .escape (packageversion )} </version>' ,
120
- "build.gradle" : rf'implementation\s+"{ re .escape (packagename )} :{ re .escape (packageversion )} "' ,
121
- "Gemfile" : rf'gem\s+"{ re .escape (packagename )} ",\s*"{ re .escape (packageversion )} "' ,
122
- "Gemfile.lock" : rf'\s+{ re .escape (packagename )} \s+\({ re .escape (packageversion )} \)' ,
123
- ".csproj" : rf'<PackageReference\s+Include="{ re .escape (packagename )} "\s+Version="{ re .escape (packageversion )} "\s*/>' ,
124
- ".fsproj" : rf'<PackageReference\s+Include="{ re .escape (packagename )} "\s+Version="{ re .escape (packageversion )} "\s*/>' ,
125
- "paket.dependencies" : rf'nuget\s+{ re .escape (packagename )} \s+{ re .escape (packageversion )} ' ,
126
- "Cargo.toml" : rf'{ re .escape (packagename )} \s*=\s*"{ re .escape (packageversion )} "' ,
127
- "build.sbt" : rf'"{ re .escape (packagename )} "\s*%\s*"{ re .escape (packageversion )} "' ,
128
- "Podfile" : rf'pod\s+"{ re .escape (packagename )} ",\s*"{ re .escape (packageversion )} "' ,
129
- "Package.swift" : rf'\.package\(name:\s*"{ re .escape (packagename )} ",\s*url:\s*".*?",\s*version:\s*"{ re .escape (packageversion )} "\)' ,
130
- "mix.exs" : rf'\{{:{ re .escape (packagename )} ,\s*"{ re .escape (packageversion )} "\}}' ,
131
- "composer.json" : rf'"{ re .escape (packagename )} ":\s*"{ re .escape (packageversion )} "' ,
132
- "conanfile.txt" : rf'{ re .escape (packagename )} /{ re .escape (packageversion )} ' ,
133
- "vcpkg.json" : rf'"{ re .escape (packagename )} ":\s*"{ re .escape (packageversion )} "' ,
134
- }
135
-
136
- # If no specific pattern is found for this file name, fallback to a naive approach
137
- searchstring = search_patterns .get (file_type , rf'{ re .escape (packagename )} .*{ re .escape (packageversion )} ' )
87
+ # For pnpm-lock.yaml, use a special regex pattern.
88
+ if file_type .lower () == "pnpm-lock.yaml" :
89
+ searchstring = rf'^\s*/{ re .escape (packagename )} /{ re .escape (packageversion )} :'
90
+ else :
91
+ search_patterns = {
92
+ "package.json" : rf'"{ packagename } ":\s*"[\^~]?{ re .escape (packageversion )} "' ,
93
+ "yarn.lock" : rf'{ packagename } @{ packageversion } ' ,
94
+ "requirements.txt" : rf'^{ re .escape (packagename )} \s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{ re .escape (packageversion )} (?:\s*;.*)?$' ,
95
+ "pyproject.toml" : rf'{ packagename } \s*=\s*"{ re .escape (packageversion )} "' ,
96
+ "Pipfile" : rf'"{ packagename } "\s*=\s*"{ re .escape (packageversion )} "' ,
97
+ "go.mod" : rf'require\s+{ re .escape (packagename )} \s+{ re .escape (packageversion )} ' ,
98
+ "go.sum" : rf'{ re .escape (packagename )} \s+{ re .escape (packageversion )} ' ,
99
+ "pom.xml" : rf'<artifactId>{ re .escape (packagename )} </artifactId>\s*<version>{ re .escape (packageversion )} </version>' ,
100
+ "build.gradle" : rf'implementation\s+"{ re .escape (packagename )} :{ re .escape (packageversion )} "' ,
101
+ "Gemfile" : rf'gem\s+"{ re .escape (packagename )} ",\s*"{ re .escape (packageversion )} "' ,
102
+ "Gemfile.lock" : rf'\s+{ re .escape (packagename )} \s+\({ re .escape (packageversion )} \)' ,
103
+ ".csproj" : rf'<PackageReference\s+Include="{ re .escape (packagename )} "\s+Version="{ re .escape (packageversion )} "\s*/>' ,
104
+ ".fsproj" : rf'<PackageReference\s+Include="{ re .escape (packagename )} "\s+Version="{ re .escape (packageversion )} "\s*/>' ,
105
+ "paket.dependencies" : rf'nuget\s+{ re .escape (packagename )} \s+{ re .escape (packageversion )} ' ,
106
+ "Cargo.toml" : rf'{ re .escape (packagename )} \s*=\s*"{ re .escape (packageversion )} "' ,
107
+ "build.sbt" : rf'"{ re .escape (packagename )} "\s*%\s*"{ re .escape (packageversion )} "' ,
108
+ "Podfile" : rf'pod\s+"{ re .escape (packagename )} ",\s*"{ re .escape (packageversion )} "' ,
109
+ "Package.swift" : rf'\.package\(name:\s*"{ re .escape (packagename )} ",\s*url:\s*".*?",\s*version:\s*"{ re .escape (packageversion )} "\)' ,
110
+ "mix.exs" : rf'\{{:{ re .escape (packagename )} ,\s*"{ re .escape (packageversion )} "\}}' ,
111
+ "composer.json" : rf'"{ re .escape (packagename )} ":\s*"{ re .escape (packageversion )} "' ,
112
+ "conanfile.txt" : rf'{ re .escape (packagename )} /{ re .escape (packageversion )} ' ,
113
+ "vcpkg.json" : rf'"{ re .escape (packagename )} ":\s*"{ re .escape (packageversion )} "' ,
114
+ }
115
+ searchstring = search_patterns .get (file_type , rf'{ re .escape (packagename )} .*{ re .escape (packageversion )} ' )
138
116
117
+ logging .debug ("Using search pattern for %s: %s" , file_type , searchstring )
139
118
try :
140
- # Read file lines and search for a match
141
119
with open (manifest_file , 'r' , encoding = "utf-8" ) as file :
142
120
lines = [line .rstrip ("\n " ) for line in file ]
121
+ logging .debug ("Total lines in %s: %d" , manifest_file , len (lines ))
143
122
for line_number , line_content in enumerate (lines , start = 1 ):
144
- # For Python conditional dependencies, ignore everything after first ';'
145
123
line_main = line_content .split (";" , 1 )[0 ].strip ()
146
-
147
- # Use a case-insensitive regex search
148
124
if re .search (searchstring , line_main , re .IGNORECASE ):
125
+ logging .debug ("Match found at line %d in %s: %s" , line_number , manifest_file , line_content .strip ())
149
126
return line_number , line_content .strip ()
150
-
151
127
except FileNotFoundError :
152
128
return 1 , f"{ manifest_file } not found"
153
129
except Exception as e :
@@ -181,7 +157,6 @@ def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) -
181
157
"composer.json" : "composer" ,
182
158
"vcpkg.json" : "vcpkg" ,
183
159
}
184
-
185
160
file_type = Path (manifest_file ).name
186
161
url_prefix = manifest_to_url_prefix .get (file_type , "unknown" )
187
162
return f"https://socket.dev/{ url_prefix } /package/{ pkg_name } /alerts/{ pkg_version } "
@@ -191,29 +166,33 @@ def create_security_comment_sarif(diff) -> dict:
191
166
"""
192
167
Create SARIF-compliant output from the diff report, including dynamic URL generation
193
168
based on manifest type and improved <br/> formatting for GitHub SARIF display.
169
+
170
+ This function now:
171
+ - Processes every alert in diff.new_alerts.
172
+ - For alerts with multiple manifest files, generates an individual SARIF result for each file.
173
+ - Appends the manifest file name to the rule ID and name to make each result unique.
174
+ - Does NOT fall back to 'requirements.txt' if no manifest file is provided.
175
+ - Adds detailed logging to validate our assumptions.
176
+
194
177
"""
195
- scan_failed = False
196
178
if len (diff .new_alerts ) == 0 :
197
179
for alert in diff .new_alerts :
198
- alert : Issue
199
180
if alert .error :
200
- scan_failed = True
201
181
break
182
+
202
183
sarif_data = {
203
184
"$schema" : "https://json.schemastore.org/sarif-2.1.0.json" ,
204
185
"version" : "2.1.0" ,
205
- "runs" : [
206
- {
207
- "tool" : {
208
- "driver" : {
209
- "name" : "Socket Security" ,
210
- "informationUri" : "https://socket.dev" ,
211
- "rules" : []
212
- }
213
- },
214
- "results" : []
215
- }
216
- ]
186
+ "runs" : [{
187
+ "tool" : {
188
+ "driver" : {
189
+ "name" : "Socket Security" ,
190
+ "informationUri" : "https://socket.dev" ,
191
+ "rules" : []
192
+ }
193
+ },
194
+ "results" : []
195
+ }]
217
196
}
218
197
219
198
rules_map = {}
@@ -222,60 +201,77 @@ def create_security_comment_sarif(diff) -> dict:
222
201
for alert in diff .new_alerts :
223
202
pkg_name = alert .pkg_name
224
203
pkg_version = alert .pkg_version
225
- rule_id = f"{ pkg_name } =={ pkg_version } "
204
+ base_rule_id = f"{ pkg_name } =={ pkg_version } "
226
205
severity = alert .severity
227
206
228
- # Generate the correct URL for the alert based on manifest type
229
- introduced_list = alert .introduced_by
230
- manifest_file = introduced_list [0 ][1 ] if introduced_list and isinstance (introduced_list [0 ], list ) else alert .manifests or "requirements.txt"
231
- socket_url = Messages .get_manifest_type_url (manifest_file , pkg_name , pkg_version )
232
-
233
- # Prepare descriptions with <br/> replacements
234
- short_desc = f"{ alert .props .get ('note' , '' )} <br/><br/>Suggested Action:<br/>{ alert .suggestion } <br/><a href=\" { socket_url } \" >{ socket_url } </a>"
235
- full_desc = "{} - {}" .format (alert .title , alert .description .replace ('\r \n ' , '<br/>' ))
236
-
237
- # Identify the line and snippet in the manifest file
238
- line_number , line_content = Messages .find_line_in_file (pkg_name , pkg_version , manifest_file )
239
- if line_number < 1 :
240
- line_number = 1 # Ensure SARIF compliance
241
-
242
- # Create the rule if not already defined
243
- if rule_id not in rules_map :
244
- rules_map [rule_id ] = {
245
- "id" : rule_id ,
246
- "name" : f"{ pkg_name } =={ pkg_version } " ,
247
- "shortDescription" : {"text" : f"Alert generated for { rule_id } by Socket Security" },
248
- "fullDescription" : {"text" : full_desc },
249
- "helpUri" : socket_url ,
250
- "defaultConfiguration" : {
251
- "level" : Messages .map_severity_to_sarif (severity )
252
- },
253
- }
207
+ logging .debug ("Alert %s - introduced_by: %s, manifests: %s" , base_rule_id , alert .introduced_by , getattr (alert , 'manifests' , None ))
208
+ manifest_files = []
209
+ if alert .introduced_by and isinstance (alert .introduced_by , list ):
210
+ for entry in alert .introduced_by :
211
+ if isinstance (entry , (list , tuple )) and len (entry ) >= 2 :
212
+ files = [f .strip () for f in entry [1 ].split (";" ) if f .strip ()]
213
+ manifest_files .extend (files )
214
+ elif isinstance (entry , str ):
215
+ manifest_files .extend ([m .strip () for m in entry .split (";" ) if m .strip ()])
216
+ elif hasattr (alert , 'manifests' ) and alert .manifests :
217
+ manifest_files = [mf .strip () for mf in alert .manifests .split (";" ) if mf .strip ()]
218
+
219
+ logging .debug ("Alert %s - extracted manifest_files: %s" , base_rule_id , manifest_files )
220
+ if not manifest_files :
221
+ logging .error ("Alert %s: No manifest file found; cannot determine file location." , base_rule_id )
222
+ continue
223
+
224
+ logging .debug ("Alert %s - using manifest_files for processing: %s" , base_rule_id , manifest_files )
225
+
226
+ # Create an individual SARIF result for each manifest file.
227
+ for mf in manifest_files :
228
+ logging .debug ("Alert %s - Processing manifest file: %s" , base_rule_id , mf )
229
+ socket_url = Messages .get_manifest_type_url (mf , pkg_name , pkg_version )
230
+ line_number , line_content = Messages .find_line_in_file (pkg_name , pkg_version , mf )
231
+ if line_number < 1 :
232
+ line_number = 1
233
+ logging .debug ("Alert %s: Manifest %s, line %d: %s" , base_rule_id , mf , line_number , line_content )
234
+
235
+ # Create a unique rule id and name by appending the manifest file.
236
+ unique_rule_id = f"{ base_rule_id } ({ mf } )"
237
+ rule_name = f"Alert { base_rule_id } ({ mf } )"
238
+
239
+ short_desc = (f"{ alert .props .get ('note' , '' )} <br/><br/>Suggested Action:<br/>{ alert .suggestion } "
240
+ f"<br/><a href=\" { socket_url } \" >{ socket_url } </a>" )
241
+ full_desc = "{} - {}" .format (alert .title , alert .description .replace ('\r \n ' , '<br/>' ))
242
+
243
+ if unique_rule_id not in rules_map :
244
+ rules_map [unique_rule_id ] = {
245
+ "id" : unique_rule_id ,
246
+ "name" : rule_name ,
247
+ "shortDescription" : {"text" : rule_name },
248
+ "fullDescription" : {"text" : full_desc },
249
+ "helpUri" : socket_url ,
250
+ "defaultConfiguration" : {
251
+ "level" : Messages .map_severity_to_sarif (severity )
252
+ },
253
+ }
254
254
255
- # Add the SARIF result
256
- result_obj = {
257
- "ruleId" : rule_id ,
258
- "message" : {"text" : short_desc },
259
- "locations" : [
260
- {
255
+ result_obj = {
256
+ "ruleId" : unique_rule_id ,
257
+ "message" : {"text" : short_desc },
258
+ "locations" : [{
261
259
"physicalLocation" : {
262
- "artifactLocation" : {"uri" : manifest_file },
260
+ "artifactLocation" : {"uri" : mf },
263
261
"region" : {
264
262
"startLine" : line_number ,
265
263
"snippet" : {"text" : line_content },
266
264
},
267
265
}
268
- }
269
- ],
270
- }
271
- results_list .append (result_obj )
266
+ }]
267
+ }
268
+ results_list .append (result_obj )
272
269
273
- # Attach rules and results
274
270
sarif_data ["runs" ][0 ]["tool" ]["driver" ]["rules" ] = list (rules_map .values ())
275
271
sarif_data ["runs" ][0 ]["results" ] = results_list
276
272
277
273
return sarif_data
278
-
274
+
279
275
@staticmethod
280
276
def create_security_comment_json (diff : Diff ) -> dict :
281
277
scan_failed = False
0 commit comments