Skip to content

Commit c3c6670

Browse files
committed
parser-json-sarif: expand relative paths coming from GCC
Resolves: #209 Closes: #210
1 parent 92b64a4 commit c3c6670

5 files changed

+299
-0
lines changed

src/lib/parser-json-sarif.cc

+45
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ struct SarifTreeDecoder::Private {
2929
void readToolInfo(TScanProps *pScanProps, const pt::ptree *toolNode);
3030

3131
std::string singleChecker = "UNKNOWN_SARIF_WARNING";
32+
std::string pwd;
33+
const RE reFileUrl = RE("^file://");
3234
const RE reCwe = RE("^CWE-([0-9]+)$");
3335
const RE reVersion = RE("^([0-9][0-9.]+).*$");
3436
const RE reRuleId =
@@ -156,6 +158,22 @@ void SarifTreeDecoder::readScanProps(
156158
const pt::ptree *toolNode;
157159
if (findChildOf(&toolNode, run0, "tool"))
158160
d->readToolInfo(pDst, toolNode);
161+
162+
// read PWD so that we can reconstruct absolute paths later on
163+
const pt::ptree *uriBase, *pwdNode, *uriNode;
164+
if (findChildOf(&uriBase, run0, "originalUriBaseIds")
165+
&& findChildOf(&pwdNode, *uriBase, "PWD")
166+
&& findChildOf(&uriNode, *pwdNode, "uri"))
167+
{
168+
// remove the "file://" prefix
169+
const auto &pwd = uriNode->data();
170+
d->pwd = boost::regex_replace(pwd, d->reFileUrl, "");
171+
// FIXME: Should we check whether d->pwd begins with '/'?
172+
173+
// make sure that d->pwd ends with '/'
174+
if (!d->pwd.empty() && *d->pwd.rbegin() != '/')
175+
d->pwd += '/';
176+
}
159177
}
160178

161179
void SarifTreeDecoder::readRoot(const pt::ptree *runs)
@@ -321,6 +339,32 @@ static int sarifCweFromDefNode(const pt::ptree &defNode)
321339
return 0;
322340
}
323341

342+
static void expandRelativePaths(Defect *pDef, const std::string &pwd)
343+
{
344+
if (pwd.empty())
345+
// no PWD info provided
346+
return;
347+
348+
// go through all events
349+
for (DefEvent &evt : pDef->events) {
350+
std::string &fileName = evt.fileName;
351+
if (fileName.empty())
352+
// no file path to expand
353+
continue;
354+
355+
const unsigned char beginsWith = *fileName.begin();
356+
switch (beginsWith) {
357+
case '/': // absolute path
358+
case '<': // <unknown> and the like
359+
continue;
360+
361+
default:
362+
// prepend `pwd` to relative path
363+
fileName = pwd + fileName;
364+
}
365+
}
366+
}
367+
324368
bool SarifTreeDecoder::readNode(Defect *def)
325369
{
326370
// move the iterator after we get the current position
@@ -388,6 +432,7 @@ bool SarifTreeDecoder::readNode(Defect *def)
388432
if (findChildOf(&relatedLocs, defNode, "relatedLocations"))
389433
sarifReadComments(def, *relatedLocs);
390434

435+
expandRelativePaths(def, d->pwd);
391436
d->digger.inferLangFromChecker(def);
392437
d->digger.inferToolFromChecker(def);
393438

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--mode=json
+209
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
{
2+
"$schema": "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
3+
"version": "2.1.0",
4+
"runs": [
5+
{
6+
"tool": {
7+
"driver": {
8+
"name": "GNU C17",
9+
"fullName": "GNU C17 (GCC) version 14.2.1 20240912 (Red Hat 14.2.1-4) (x86_64-redhat-linux)",
10+
"version": "14.2.1 20240912 (Red Hat 14.2.1-4)",
11+
"informationUri": "https://gcc.gnu.org/gcc-14/",
12+
"rules": [
13+
{
14+
"id": "-Wanalyzer-null-dereference",
15+
"helpUri": "https://gcc.gnu.org/onlinedocs/gcc-14.2.0/gcc/Static-Analyzer-Options.html#index-Wanalyzer-null-dereference"
16+
}
17+
]
18+
}
19+
},
20+
"taxonomies": [
21+
{
22+
"name": "CWE",
23+
"version": "4.7",
24+
"organization": "MITRE",
25+
"shortDescription": {
26+
"text": "The MITRE Common Weakness Enumeration"
27+
},
28+
"taxa": [
29+
{
30+
"id": "476",
31+
"helpUri": "https://cwe.mitre.org/data/definitions/476.html"
32+
}
33+
]
34+
}
35+
],
36+
"invocations": [
37+
{
38+
"executionSuccessful": true,
39+
"toolExecutionNotifications": []
40+
}
41+
],
42+
"originalUriBaseIds": {
43+
"PWD": {
44+
"uri": "file:///home/kdudka/"
45+
}
46+
},
47+
"artifacts": [
48+
{
49+
"location": {
50+
"uri": "xxx.c",
51+
"uriBaseId": "PWD"
52+
},
53+
"contents": {
54+
"text": "int main()\n{\n int *p = 0;\n return *p;\n}\n"
55+
},
56+
"sourceLanguage": "c"
57+
}
58+
],
59+
"results": [
60+
{
61+
"ruleId": "-Wanalyzer-null-dereference",
62+
"taxa": [
63+
{
64+
"id": "476",
65+
"toolComponent": {
66+
"name": "cwe"
67+
}
68+
}
69+
],
70+
"properties": {
71+
"gcc/analyzer/saved_diagnostic/sm": "malloc",
72+
"gcc/analyzer/saved_diagnostic/enode": 4,
73+
"gcc/analyzer/saved_diagnostic/snode": 1,
74+
"gcc/analyzer/saved_diagnostic/sval": "(int *)0B",
75+
"gcc/analyzer/saved_diagnostic/state": "null",
76+
"gcc/analyzer/saved_diagnostic/idx": 0
77+
},
78+
"level": "warning",
79+
"message": {
80+
"text": "dereference of NULL \u2018p\u2019"
81+
},
82+
"locations": [
83+
{
84+
"physicalLocation": {
85+
"artifactLocation": {
86+
"uri": "xxx.c",
87+
"uriBaseId": "PWD"
88+
},
89+
"region": {
90+
"startLine": 4,
91+
"startColumn": 12,
92+
"endColumn": 14
93+
},
94+
"contextRegion": {
95+
"startLine": 4,
96+
"snippet": {
97+
"text": " return *p;\n"
98+
}
99+
}
100+
},
101+
"logicalLocations": [
102+
{
103+
"name": "main",
104+
"fullyQualifiedName": "main",
105+
"decoratedName": "main",
106+
"kind": "function"
107+
}
108+
]
109+
}
110+
],
111+
"codeFlows": [
112+
{
113+
"threadFlows": [
114+
{
115+
"id": "main",
116+
"locations": [
117+
{
118+
"properties": {
119+
"gcc/analyzer/checker_event/emission_id": "(1)",
120+
"gcc/analyzer/checker_event/kind": "EK_STATE_CHANGE"
121+
},
122+
"location": {
123+
"physicalLocation": {
124+
"artifactLocation": {
125+
"uri": "xxx.c",
126+
"uriBaseId": "PWD"
127+
},
128+
"region": {
129+
"startLine": 3,
130+
"startColumn": 10,
131+
"endColumn": 11
132+
},
133+
"contextRegion": {
134+
"startLine": 3,
135+
"snippet": {
136+
"text": " int *p = 0;\n"
137+
}
138+
}
139+
},
140+
"logicalLocations": [
141+
{
142+
"name": "main",
143+
"fullyQualifiedName": "main",
144+
"decoratedName": "main",
145+
"kind": "function"
146+
}
147+
],
148+
"message": {
149+
"text": "\u2018p\u2019 is NULL"
150+
}
151+
},
152+
"kinds": [
153+
"release",
154+
"memory"
155+
],
156+
"nestingLevel": 1,
157+
"executionOrder": 1
158+
},
159+
{
160+
"properties": {
161+
"gcc/analyzer/checker_event/emission_id": "(2)",
162+
"gcc/analyzer/checker_event/kind": "EK_WARNING"
163+
},
164+
"location": {
165+
"physicalLocation": {
166+
"artifactLocation": {
167+
"uri": "xxx.c",
168+
"uriBaseId": "PWD"
169+
},
170+
"region": {
171+
"startLine": 4,
172+
"startColumn": 12,
173+
"endColumn": 14
174+
},
175+
"contextRegion": {
176+
"startLine": 4,
177+
"snippet": {
178+
"text": " return *p;\n"
179+
}
180+
}
181+
},
182+
"logicalLocations": [
183+
{
184+
"name": "main",
185+
"fullyQualifiedName": "main",
186+
"decoratedName": "main",
187+
"kind": "function"
188+
}
189+
],
190+
"message": {
191+
"text": "dereference of NULL \u2018p\u2019"
192+
}
193+
},
194+
"kinds": [
195+
"danger"
196+
],
197+
"nestingLevel": 1,
198+
"executionOrder": 2
199+
}
200+
]
201+
}
202+
]
203+
}
204+
]
205+
}
206+
]
207+
}
208+
]
209+
}
+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{
2+
"scan": {
3+
"analyzer-version-gcc": "14.2.1"
4+
},
5+
"defects": [
6+
{
7+
"checker": "GCC_ANALYZER_WARNING",
8+
"cwe": 476,
9+
"language": "c/c++",
10+
"tool": "gcc-analyzer",
11+
"key_event_idx": 0,
12+
"events": [
13+
{
14+
"file_name": "/home/kdudka/xxx.c",
15+
"line": 4,
16+
"column": 12,
17+
"h_size": 2,
18+
"event": "warning[-Wanalyzer-null-dereference]",
19+
"message": "dereference of NULL ‘p’",
20+
"verbosity_level": 0
21+
},
22+
{
23+
"file_name": "/home/kdudka/xxx.c",
24+
"line": 3,
25+
"column": 10,
26+
"h_size": 1,
27+
"event": "release_memory",
28+
"message": "‘p’ is NULL",
29+
"verbosity_level": 1
30+
},
31+
{
32+
"file_name": "/home/kdudka/xxx.c",
33+
"line": 4,
34+
"column": 12,
35+
"h_size": 2,
36+
"event": "danger",
37+
"message": "dereference of NULL ‘p’",
38+
"verbosity_level": 1
39+
}
40+
]
41+
}
42+
]
43+
}

tests/csgrep/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -172,4 +172,5 @@ test_csgrep("0125-sarif-parser-bom" )
172172
test_csgrep("0126-cov-parser-imp-flag" )
173173
test_csgrep("0127-cov-writer-noloc" )
174174
test_csgrep("0128-cov-parser-noloc" )
175+
test_csgrep("0129-sarif-gcc-pwd" )
175176
test_csgrep("0131-unicontrol-perl-man-page" )

0 commit comments

Comments
 (0)