Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retrieve SARIF errors and warnings correctly #4837

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 99 additions & 103 deletions megalinter/Linter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1412,89 +1412,10 @@ def get_sarif_arguments(self):

# Find number of errors in linter stdout log
def get_total_number_errors(self, stdout: str):
total_errors = 0
total_errors = self.get_result_count(
stdout, "error", "cli_lint_errors_count", "cli_lint_errors_regex"
)

# Count using SARIF output file
if self.output_sarif is True:
try:
# SARIF is in MegaLinter named file
if self.sarif_output_file is not None and os.path.isfile(
self.sarif_output_file
):
with open(
self.sarif_output_file, "r", encoding="utf-8"
) as sarif_file:
sarif_output = yaml.safe_load(sarif_file)
# SARIF is in default output file
elif self.sarif_default_output_file is not None and os.path.isfile(
self.sarif_default_output_file
):
with open(
self.sarif_default_output_file, "r", encoding="utf-8"
) as sarif_file:
sarif_output = yaml.safe_load(sarif_file)
# SARIF is in stdout
else:
# SARIF is in stdout
sarif_output = yaml.safe_load(stdout)
if "results" in sarif_output["runs"][0]:
# Get number of results
total_errors = len(sarif_output["runs"][0]["results"])
# Append number of invocation config notifications (other type of errors, not in result)
if "invocations" in sarif_output["runs"][0]:
for invocation in sarif_output["runs"][0]["invocations"]:
if "toolConfigurationNotifications" in invocation:
total_errors += len(
invocation["toolConfigurationNotifications"]
)
# If we got here, we should have found a number of errors from SARIF output
if total_errors == 0:
logging.warning(
"Unable to get total errors from SARIF output.\nSARIF:"
+ str(sarif_output)
)
return total_errors
except Exception as e:
total_errors = 1
logging.error(
"Error while getting total errors from SARIF output.\nError:"
+ str(e)
+ "\nstdout: "
+ stdout
)
return total_errors
# Get number with a single regex. Used when linter prints out Found _ errors
elif self.cli_lint_errors_count == "regex_number":
reg = self.get_regex(self.cli_lint_errors_regex)
m = re.search(reg, utils.normalize_log_string(stdout))
if m:
total_errors = int(m.group(1))
# Count the number of occurrences of a regex corresponding to an error in linter log (parses linter log)
elif self.cli_lint_errors_count == "regex_count":
reg = self.get_regex(self.cli_lint_errors_regex)
total_errors = len(re.findall(reg, utils.normalize_log_string(stdout)))
# Sum of all numbers found in linter logs with a regex. Found when each file prints out total number of errors
elif self.cli_lint_errors_count == "regex_sum":
reg = self.get_regex(self.cli_lint_errors_regex)
matches = re.findall(reg, utils.normalize_log_string(stdout))
total_errors = sum(int(m) for m in matches)
# Count all lines of the linter log
elif self.cli_lint_errors_count == "total_lines":
total_errors = sum(
not line.isspace() and line != "" for line in stdout.splitlines()
)
# Count number of results in sarif format
elif self.cli_lint_errors_count == "sarif":
sarif = None
sarif_stdout = utils.find_json_in_stdout(stdout)
try:
sarif = json.loads(sarif_stdout)
except ValueError as e:
logging.warning(f"Unable to parse sarif ({str(e)}):" + stdout)
if sarif and sarif["runs"] and sarif["runs"][0]["results"]:
total_errors = len(sarif["runs"][0]["results"])
else:
logging.warning("Unable to find results in :" + stdout)
# Return result if found, else default value according to status
if total_errors > 0:
return total_errors
Expand All @@ -1512,28 +1433,10 @@ def get_total_number_errors(self, stdout: str):

# Find number of warnings in linter stdout log
def get_total_number_warnings(self, stdout: str):
total_warnings = None
total_warnings = self.get_result_count(
stdout, "warning", "cli_lint_warnings_count", "cli_lint_warnings_regex"
)

# Get number with a single regex.
if self.cli_lint_warnings_count == "regex_number":
reg = self.get_regex(self.cli_lint_warnings_regex)
m = re.search(reg, utils.normalize_log_string(stdout))
if m:
total_warnings = int(m.group(1))
# Count the number of occurrences of a regex corresponding to an error in linter log (parses linter log)
elif self.cli_lint_warnings_count == "regex_count":
reg = self.get_regex(self.cli_lint_warnings_regex)
total_warnings = len(re.findall(reg, utils.normalize_log_string(stdout)))
# Sum of all numbers found in linter logs with a regex. Found when each file prints out total number of errors
elif self.cli_lint_warnings_count == "regex_sum":
reg = self.get_regex(self.cli_lint_warnings_regex)
matches = re.findall(reg, utils.normalize_log_string(stdout))
total_warnings = sum(int(m) for m in matches)
# Count all lines of the linter log
elif self.cli_lint_warnings_count == "total_lines":
total_warnings = sum(
not line.isspace() and line != "" for line in stdout.splitlines()
)
if self.cli_lint_warnings_count is not None and total_warnings is None:
logging.warning(
f"Unable to get number of warnings with {self.cli_lint_warnings_count} "
Expand All @@ -1545,6 +1448,99 @@ def get_total_number_warnings(self, stdout: str):

return total_warnings

# Find number of results by level in linter stdout log
def get_result_count(
self, stdout: str, level: str, count_property: str, regex_property: str
):
total_result = 0

# Count using SARIF output file
if self.output_sarif is True:
return self.get_sarif_result_count(stdout, level)
# Get number with a single regex. Used when linter prints out Found _ errors/warnings
elif getattr(self, count_property) == "regex_number":
reg = self.get_regex(getattr(self, regex_property))
m = re.search(reg, utils.normalize_log_string(stdout))
if m:
total_result = int(m.group(1))
# Count the number of occurrences of a regex corresponding to
# an error or warning in linter log (parses linter log)
elif getattr(self, count_property) == "regex_count":
reg = self.get_regex(getattr(self, regex_property))
total_result = len(re.findall(reg, utils.normalize_log_string(stdout)))
# Sum of all numbers found in linter logs with a regex.
# Found when each file prints out total number of errors or warnings
elif getattr(self, count_property) == "regex_sum":
reg = self.get_regex(self.cli_lint_errors_regex)
matches = re.findall(reg, utils.normalize_log_string(stdout))
total_result = sum(int(m) for m in matches)
# Count all lines of the linter log
elif getattr(self, count_property) == "total_lines":
total_result = sum(
not line.isspace() and line != "" for line in stdout.splitlines()
)
# Count number of results in sarif format
elif getattr(self, count_property) == "sarif":
sarif = None
sarif_stdout = utils.find_json_in_stdout(stdout)
try:
sarif = json.loads(sarif_stdout)
except ValueError as e:
logging.warning(f"Unable to parse sarif ({str(e)}):" + stdout)
if sarif and sarif["runs"]:
for run in sarif["runs"]:
for result in run["results"]:
if result["level"] == level:
total_result += 1
else:
logging.warning("Unable to find results in:" + stdout)
return total_result

def get_sarif_result_count(self, stdout: str, level: str):
total_result = 0

try:
# SARIF is in MegaLinter named file
if self.sarif_output_file is not None and os.path.isfile(
self.sarif_output_file
):
with open(self.sarif_output_file, "r", encoding="utf-8") as sarif_file:
sarif_output = yaml.safe_load(sarif_file)
# SARIF is in default output file
elif self.sarif_default_output_file is not None and os.path.isfile(
self.sarif_default_output_file
):
with open(
self.sarif_default_output_file, "r", encoding="utf-8"
) as sarif_file:
sarif_output = yaml.safe_load(sarif_file)
# SARIF is in stdout
else:
# SARIF is in stdout
sarif_output = yaml.safe_load(stdout)

for run in sarif_output["runs"]:
for result in run["results"]:
if result["level"] == level:
total_result += 1

# If we got here, we should have found a number of results from SARIF output
if total_result == 0:
logging.warning(
f"Unable to get total {level}s from SARIF output.\nSARIF:"
+ str(sarif_output)
)
return total_result
except Exception as e:
total_result = 1
logging.error(
f"Error while getting total {level}s from SARIF output.\nError:"
+ str(e)
+ "\nstdout: "
+ stdout
)
return total_result

# Build the CLI command to get linter version (can be overridden if --version is not the way to get the version)
def build_version_command(self):
cmd = [*self.cli_executable_version]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ linters:
cli_help_arg_name: --help
cli_version_arg_name: --version
cli_lint_errors_count: sarif
cli_lint_warnings_count: sarif
test_folder: devskim
examples:
- "devskim analyze --source-code ."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@
},
"cli_lint_errors_count": {
"$id": "#/properties/linters/items/properties/cli_lint_errors_count",
"description": "Defines how to count errors from log file. regex_number, regex_count, regex_sum, or total_lines",
"description": "Defines how to count errors from log file. regex_number, regex_count, regex_sum, total_lines or sarif",
"enum": [
"regex_number",
"regex_count",
Expand All @@ -632,7 +632,8 @@
"regex_number",
"regex_count",
"regex_sum",
"total_lines"
"total_lines",
"sarif"
],
"title": "Lint errors count mode",
"type": "string"
Expand Down Expand Up @@ -714,18 +715,20 @@
},
"cli_lint_warnings_count": {
"$id": "#/properties/linters/items/properties/cli_lint_warnings_count",
"description": "Defines how to count warnings from log file. regex_number, regex_count, regex_sum, or total_lines",
"description": "Defines how to count warnings from log file. regex_number, regex_count, regex_sum, total_lines or sarif",
"enum": [
"regex_number",
"regex_count",
"regex_sum",
"total_lines"
"total_lines",
"sarif"
],
"examples": [
"regex_number",
"regex_count",
"regex_sum",
"total_lines"
"total_lines",
"sarif"
],
"title": "Lint errors count mode",
"type": "string"
Expand Down
10 changes: 9 additions & 1 deletion megalinter/utilstest.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,7 @@ def test_linter_report_sarif(linter, test_self):
len(sarif_content["runs"]) > 0,
f"Empty runs list in {tmp_sarif_file_name}",
)
# Check number of errors is ok
# Check number of errors and warnings is ok
for linter in mega_linter.linters:
if (
linter.output_sarif is True
Expand All @@ -616,6 +616,14 @@ def test_linter_report_sarif(linter, test_self):
+ f"SARIF:{str(sarif_content)}",
)

if linter.cli_lint_warnings_count is not None:
test_self.assertTrue(
linter.total_number_warnings > 1,
f"Missing multiple sarif warnings in {linter.name}"
+ f" ({linter.total_number_warnings})\n"
+ f"SARIF:{str(sarif_content)}",
)


def assert_is_skipped(skipped_item, output, test_self):
test_self.assertRegex(
Expand Down
Loading