Skip to content

Commit 1708d5e

Browse files
committed
csfilter-kfp: script to filter known false positives
Resolves: https://issues.redhat.com/browse/OSH-736
1 parent 007e64e commit 1708d5e

17 files changed

+51598
-4
lines changed

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ cmake_minimum_required(VERSION 3.15)
1919
project(csdiff CXX)
2020
enable_testing()
2121

22-
# C/C++ sources
22+
# source code
2323
add_subdirectory(src)
2424

2525
# regression tests

make-srpm.sh

+1
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ make version.cc
202202
%doc README
203203
%license COPYING
204204
%{_bindir}/csdiff
205+
%{_bindir}/csfilter-kfp
205206
%{_bindir}/csgrep
206207
%{_bindir}/cshtml
207208
%{_bindir}/cslinker

src/CMakeLists.txt

+5
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ install(TARGETS
8282
cstrans-df-run
8383
DESTINATION ${CMAKE_INSTALL_BINDIR})
8484

85+
# install the csfilter-kfp script
86+
install(PROGRAMS
87+
csfilter-kfp
88+
DESTINATION ${CMAKE_INSTALL_BINDIR})
89+
8590
# optionally build statically linked csgrep-static
8691
option(CSGREP_STATIC "Set to ON to build the csgrep-static executable" OFF)
8792
if(CSGREP_STATIC)

src/csfilter-kfp

+238
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (C) 2024 Red Hat, Inc.
4+
#
5+
# This file is part of csdiff.
6+
#
7+
# csdiff is free software: you can redistribute it and/or modify
8+
# it under the terms of the GNU General Public License as published by
9+
# the Free Software Foundation, either version 3 of the License, or
10+
# any later version.
11+
#
12+
# csdiff is distributed in the hope that it will be useful,
13+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
# GNU General Public License for more details.
16+
#
17+
# You should have received a copy of the GNU General Public License
18+
# along with csdiff. If not, see <http://www.gnu.org/licenses/>.
19+
20+
import argparse
21+
import os
22+
import re
23+
import shlex
24+
import subprocess
25+
import sys
26+
27+
28+
# if neither --kfp-dir nor --kfp-git-url is specified, use the known-false-positives RPM package
29+
DEFAULT_KFP_DIR = "/usr/share/csmock/known-false-positives.d"
30+
DEFAULT_KFP_JSON = "/usr/share/csmock/known-false-positives.js"
31+
32+
33+
def construct_init_cmd(args):
34+
# make bash exit on error
35+
cmd = 'set -e\n'
36+
37+
# make bash propagate exit code from piped commands
38+
cmd += 'set -o pipefail\n'
39+
40+
# make bash expand empty globs
41+
cmd += 'shopt -s nullglob\n'
42+
43+
# create a temporary directory with an automatic destructor
44+
cmd += 'export td=$(mktemp --directory --tmpdir tmp-csfilter-kfp.XXXXXXXXXX)\n'
45+
cmd += 'trap "rm -fr \'${td}\'" EXIT\n'
46+
47+
if args.verbose:
48+
# run shell in XTRACE mode
49+
cmd += 'set -x\n'
50+
51+
return cmd
52+
53+
54+
def construct_git_cmd(kfp_git_url):
55+
# split kfp_git_url into the clone URL and (optional) revision
56+
m = re.match("^(.*)#([0-9a-f]+)", kfp_git_url)
57+
if m:
58+
# checkout a specific revision
59+
url = shlex.quote(m.group(1))
60+
rev = m.group(2)
61+
return f'git clone {url} ${{td}}/kfp\n' \
62+
f'git -C "${{td}}/kfp" reset -q --hard {rev}\n'
63+
else:
64+
# shallow clone of the default branch
65+
url = shlex.quote(kfp_git_url)
66+
return f'git clone --depth 1 {url} "${{td}}/kfp"\n'
67+
68+
69+
def construct_prep_cmd(args):
70+
# check which KFP will be used
71+
have_kfp_json = False
72+
if args.kfp_git_url:
73+
# clone git repo
74+
cmd = construct_git_cmd(args.kfp_git_url)
75+
elif args.kfp_dir:
76+
# symlink an absolute path to the directory
77+
kfp_abs = shlex.quote(os.path.realpath(args.kfp_dir))
78+
cmd = f'ln -s {kfp_abs} "${{td}}/kfp"\n'
79+
elif os.path.isfile(DEFAULT_KFP_JSON):
80+
# create symlinks to the known-false-positives RPM package installed on the system
81+
cmd = f'ln -s "{DEFAULT_KFP_DIR}" "${{td}}/kfp"\n' \
82+
f'ln -s "{DEFAULT_KFP_JSON}" "${{td}}/kfp.json"\n'
83+
have_kfp_json = True
84+
else:
85+
raise RuntimeError("no source of KFP specified, please use --kfp-dir or --kfp-git-url" \
86+
" (or install the known-false-positives RPM pacakge)")
87+
88+
if not have_kfp_json:
89+
# create all-in-one kfp.json file from files in ${td}/kfp
90+
cmd += 'touch "${td}/empty.err"\n'
91+
cmd += '(cd "${td}/kfp" && csgrep --mode=json --remove-duplicates ${td}/empty.err'
92+
cmd += ' */ignore.err */true-positives-ignore.err >"${td}/kfp.json")\n'
93+
94+
return cmd
95+
96+
97+
def construct_path_filter(args):
98+
if args.project_nvr is None:
99+
# TODO: read project_nvr from scan properties if available
100+
return ' cat\n'
101+
102+
# cut off the `-version-release` or `-version` suffix to obtain package name where `version` can be
103+
# a number optionally prefixed by `v` or a full-size SHA1 hash encoded in lowercase as, for example,
104+
# in `project-koku-koku-cbe5e5c3355c1e140aa1cca7377aebe09d8d8466`
105+
proj = re.sub("-(([v]?[0-9][^-]*)|([0-9a-f]{40}))(-[0-9][^-]*)?$", "", args.project_nvr)
106+
107+
# validate the resulting project name
108+
if not re.match("^[A-Za-z0-9-_]+$", proj):
109+
raise RuntimeError(f"invalid project name: {proj}")
110+
111+
# generate a script that will construct the filter at run-time
112+
cmd = f' ep="${{td}}/kfp/{proj}/exclude-paths.txt"\n'
113+
cmd += ' re=\n'
114+
cmd += ' while read line; do\n'
115+
cmd += ' re="${re}|(${line})"\n'
116+
cmd += ' done < <(grep -Esv "^(#|\\\\$)" "$ep")\n'
117+
cmd += ' if test -n "$re"; then\n'
118+
cmd += ' csgrep --mode=json --invert-match --path="${re#|}"\n'
119+
cmd += ' else\n'
120+
cmd += ' cat\n'
121+
cmd += ' fi\n'
122+
return cmd
123+
124+
125+
def construct_filter_cmd(args):
126+
# set shell options and create a temporary diretory ${td}
127+
cmd = construct_init_cmd(args)
128+
129+
# prepare the KFP data from the specified source
130+
cmd += construct_prep_cmd(args)
131+
132+
# read the whole input into a JSON file
133+
cmd += 'csgrep --mode=json'
134+
if args.input_file:
135+
input_file = shlex.quote(args.input_file)
136+
cmd += f' {input_file}'
137+
cmd += ' >"${td}/input.json"\n'
138+
139+
# define path-based filter
140+
path_filter = construct_path_filter(args)
141+
cmd += f'path_filter() {{\n{path_filter}}}\n'
142+
143+
# exclude individual findings
144+
cmd += 'csdiff --show-internal "${td}/kfp.json" "${td}/input.json"'
145+
146+
# exclude paths in the scan results
147+
cmd += ' | path_filter >${td}/output.json\n'
148+
149+
if args.record_excluded:
150+
# record excluded findings to the specified file
151+
excluded_file = shlex.quote(args.record_excluded)
152+
cmd += 'csdiff "${td}/output.json" "${td}/input.json"'
153+
cmd += f' >{excluded_file}\n'
154+
155+
if not args.json_output:
156+
# export plain-text format
157+
cmd += 'csgrep "${td}/output.json"\n'
158+
return cmd
159+
160+
# export JSON format
161+
cmd += 'csgrep --mode=json "${td}/output.json"'
162+
163+
# optionally record the source of known-false-positives
164+
if args.kfp_dir:
165+
kfp_dir = shlex.quote(args.kfp_dir)
166+
cmd += f' --set-scan-prop=known-false-positives-dir:{kfp_dir}'
167+
elif args.kfp_git_url:
168+
kfp_git_url = shlex.quote(args.kfp_git_url)
169+
cmd += f' --set-scan-prop=known-false-positives-git-url:{kfp_git_url}'
170+
cmd += '\n'
171+
172+
return cmd
173+
174+
175+
def main():
176+
# initialize argument parser
177+
parser = argparse.ArgumentParser()
178+
179+
parser.add_argument(
180+
"input_file", nargs="?",
181+
help="optional name of the input file (standard input is used by default)")
182+
183+
# source of known-false-positives
184+
kfp_source = parser.add_mutually_exclusive_group()
185+
kfp_source.add_argument(
186+
"--kfp-dir",
187+
help="known false positives directory")
188+
kfp_source.add_argument(
189+
"--kfp-git-url",
190+
help="known false positives git URL (optionally taking a revision delimited by #)")
191+
192+
parser.add_argument(
193+
"--project-nvr",
194+
help="Name-Version-Release (NVR) of the scanned project, used to find path exclusions")
195+
196+
parser.add_argument(
197+
"--record-excluded",
198+
help="file to store all excluded findings to")
199+
200+
parser.add_argument(
201+
"--json-output", action="store_true", default=(not sys.stdout.isatty()),
202+
help="produce JSON output (default if stdout is not connected to a terminal)")
203+
204+
parser.add_argument(
205+
"-v", "--verbose", action="store_true",
206+
help="run shell in XTRACE mode while executing the filtering script")
207+
208+
parser.add_argument(
209+
"-n", "--dry-run", action="store_true",
210+
help="do not execute anything, only print the shell script that would be executed")
211+
212+
# parse command-line arguments
213+
args = parser.parse_args()
214+
215+
# if --kfp-dir is used, check that a directory was given
216+
if args.kfp_dir and not os.path.isdir(args.kfp_dir):
217+
parser.error(f"'{args.kfp_dir}' given to --kfp-dir is not a directory")
218+
219+
# construct the command to filter
220+
try:
221+
cmd = construct_filter_cmd(args)
222+
except RuntimeError as e:
223+
parser.error(e)
224+
225+
if args.dry_run:
226+
# print the command and exit successfully
227+
print(cmd, end='')
228+
sys.exit(0)
229+
230+
# run the command
231+
try:
232+
subprocess.run(cmd, shell=True, check=True, executable='/bin/bash')
233+
except subprocess.CalledProcessError as e:
234+
sys.exit(e.returncode)
235+
236+
237+
if __name__ == "__main__":
238+
main()

tests/CMakeLists.txt

+4-2
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,12 @@ set(diffcmd "diff -up")
2626

2727
# eliminate csdiff's version string in the output
2828
set(jsfilter "sed -e 's|\"version\": \"[^\"]*\"|\"version\": \"\"|g'")
29+
set(jsfilter "${jsfilter} -e 's|${CMAKE_SOURCE_DIR}/tests/csfilter-kfp/|\$PROJECT_ROOT/tests/csfilter-kfp/|'")
2930

3031
macro(add_test_wrap test_name cmd)
3132
add_test("${test_name}" bash -c "${cmd}")
32-
set_tests_properties(${test_name} PROPERTIES
33-
ENVIRONMENT "PROJECT_ROOT=${CMAKE_SOURCE_DIR}")
33+
set_tests_properties(${test_name} PROPERTIES ENVIRONMENT
34+
"PATH=${CMAKE_BINARY_DIR}/src:$ENV{PATH};PROJECT_ROOT=${CMAKE_SOURCE_DIR}")
3435

3536
set_tests_properties(${test_name} PROPERTIES COST ${test_cost})
3637
math(EXPR test_cost "${test_cost} - 1")
@@ -45,6 +46,7 @@ endmacro()
4546
set(test_cost 1048576)
4647

4748
add_subdirectory(csdiff)
49+
add_subdirectory(csfilter-kfp)
4850
add_subdirectory(csgrep)
4951
add_subdirectory(cshtml)
5052
add_subdirectory(cslinker)

tests/csfilter-kfp/0001-args.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--kfp-dir "$PROJECT_ROOT/tests/csfilter-kfp/0001-kfp" --project-nvr project-koku-koku-cbe5e5c3355c1e140aa1cca7377aebe09d8d8466
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
.*/test/.*
2+
.*/tests/.*
3+
.*testing/.*
4+
.*/[^/]*test_[^/]*\.py$
5+
.*docker-compose.*
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
Error: SNYK_CODE_WARNING (CWE-89):
2+
project-koku-koku-5c7647f/koku/masu/api/db_performance/dbp_views.py:483:24: error[python/Sqli]: Unsanitized input from the HTTP request body flows into execute, where it is used in an SQL query. This may result in an SQL Injection vulnerability.
3+
# 481| with DBPerformanceStats(get_identity_username(request), CONFIGURATOR) as dbp:
4+
# 482| try:
5+
# 483|-> data = dbp.explain_sql(query_params["sql_statement"])
6+
# 484| except ProgrammingError as e:
7+
# 485| data = {"query_plan": f"{type(e).__name__}: {str(e)}"}
8+
# dbp.explain_sql parses and sanitizes the inputted query params. The query is then run through a read-only db connection.
9+
10+
Error: IDENTIFIER_TYPO (CWE-688):
11+
project-koku-koku-30de2cf/koku/api/settings/tags/mapping/utils.py:96: identifier_typo: Using "provider__uuid" appears to be a typo:
12+
* Identifier "provider__uuid" is only known to be referenced here, or in copies of this code.
13+
* Identifier "provider_uuid" is referenced elsewhere at least 216 times.
14+
project-koku-koku-30de2cf/koku/api/settings/tags/mapping/utils.py:96: remediation: Should identifier "provider__uuid" be replaced by "provider_uuid"?
15+
project-koku-koku-30de2cf/dev/scripts/trino_query.py:11: identifier_use: Example 1: Using identifier "provider_uuid" (2 total uses in this function).
16+
project-koku-koku-30de2cf/koku/api/provider/models.py:224: identifier_use: Example 2: Using identifier "provider_uuid".
17+
project-koku-koku-30de2cf/koku/api/report/ocp/query_handler.py:147: identifier_use: Example 3: Using identifier "provider_uuid".
18+
project-koku-koku-30de2cf/koku/api/test_utils.py:303: identifier_use: Example 4: Using identifier "provider_uuid".
19+
project-koku-koku-30de2cf/koku/cost_models/cost_model_manager.py:123: identifier_use: Example 5: Using identifier "provider_uuid".
20+
# 94| provider_uuids = (
21+
# 95| OCPUsageReportPeriod.objects.filter(cluster_id__in=clusters, report_period_start=start_date)
22+
# 96|-> .values_list("provider__uuid", flat=True)
23+
# 97| .distinct()
24+
# 98| )
25+
# This is Django syntax to query through a foreign key.
26+
27+
Error: SNYK_CODE_WARNING (CWE-89):
28+
project-koku-koku-cf77b7a/koku/masu/api/trino.py:56:13: error[python/Sqli]: Unsanitized input from the HTTP request body flows into execute, where it is used in an SQL query. This may result in an SQL Injection vulnerability.
29+
# 54| ) as conn:
30+
# 55| cur = conn.cursor()
31+
# 56|-> cur.execute(query)
32+
# 57| cols = [des[0] for des in cur.description]
33+
# 58| rows = cur.fetchall()
34+
# this trino connection has been converted to a READONLY connection

0 commit comments

Comments
 (0)