Skip to content

Commit bd3e94e

Browse files
committed
cleanup some code
1 parent 8540440 commit bd3e94e

File tree

4 files changed

+65
-123
lines changed

4 files changed

+65
-123
lines changed

Dockerfile.gh

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@ RUN pip install -r requirements.txt
2323

2424
COPY . .
2525

26-
CMD [ "python3", "run_json.py", "--mode=check", "--splunk", "--err_file=err_biased_lang.log" ]
26+
CMD [ "python3", "run_json.py", "--splunk", "--err_file=err_biased_lang.log" ]

README.md

-5
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,6 @@ Below is a list of arguments you can pass to the CLI tool.
7676
Note: For the additional arguments you find in `run_json.py` that aren't listed below, they are for internal use.
7777

7878
- **`--path=`** [_**required**_] absolute path to the directory
79-
- **`--mode=`** [_**required**_] `check` to scan for bias language
80-
- **`--verbose`** enables explicit logging (only applicable for check mode)
8179
- **`--err_file=`** sends any error messages to a log file of your choice, in addition to the console
8280
- **`--splunk`** [_**splunk_required**_] not available yet
8381
- **`--splunk_token=`** [_**splunk_required**_] not available yet
@@ -101,13 +99,10 @@ python3 run_json.py --mode=check --path=/user/jdoe/git/myProject
10199
#### biased-language-summary.json
102100

103101
`biased-language-summary.json` contains a summary of which files contain which biased words.
104-
(With `--verbose`, this output is capable of line-by-line reporting instead of a summary. The GitLab CI uses the summarized version.)
105102

106103
```sh
107104
{
108105
"terms_found": "true" | "false",
109-
"mode": "check",
110-
"verbose": "true" | "false",
111106
"total_lines_matched": "295",
112107
"total_files_matched": "54",
113108
"total_words_matched": "449",

run_json.py

+64-100
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,14 @@
1515
'''
1616
This version of the script is intended to produce a JSON output and used in a CI environment
1717
like GitHub Actions or GitLab CI.
18-
19-
Note: This Python script only functions in check mode.
2018
'''
2119

2220
import argparse
2321
import constants
2422
import hashlib
2523
import json
2624
import os
27-
import re
2825
import sys
29-
import requests
3026
from subprocess import getoutput
3127
from copy import copy
3228
from tools.event2splunk import Event2Splunk
@@ -47,8 +43,6 @@ def build_args_dict(args=None):
4743
parser = argparse.ArgumentParser()
4844
parser.add_argument('--path')
4945
parser.add_argument('--url')
50-
parser.add_argument('--mode')
51-
parser.add_argument('--verbose', action='store_true')
5246
parser.add_argument('--err_file')
5347
parser.add_argument('--splunk', action='store_true')
5448
parser.add_argument('--h_endpoint')
@@ -64,14 +58,6 @@ def build_args_dict(args=None):
6458
raise Exception('No path specified')
6559
if path.endswith('/'):
6660
path = path[:-1]
67-
if args.mode == 'check':
68-
mode = 'check'
69-
elif args.mode == 'fix':
70-
raise Exception(
71-
'Fix mode for JSON output is not yet supported. Please use --mode=check or use standard output with fix mode instead.')
72-
if not mode:
73-
raise Exception(
74-
'Invalid mode specified. Please specify --mode=check or --mode=fix.')
7561
if args.err_file:
7662
if not os.path.exists(args.err_file):
7763
sys.stdout.write('%sWarning: no file "%s" for error logs found. Defaulting to "%s". %s\n' % (
@@ -83,8 +69,6 @@ def build_args_dict(args=None):
8369
return {
8470
'path': path,
8571
'url': args.url or os.environ.get('GITHUB_URL'),
86-
'mode': mode,
87-
'is_verbose': args.verbose,
8872
'splunk_flag': args.splunk,
8973
'err_file': args.err_file,
9074
'h_endpoint': args.h_endpoint,
@@ -102,7 +86,7 @@ def build_args_dict(args=None):
10286
'''
10387

10488

105-
def process_word_occurrences(results, batch_info, biased_word, is_verbose, path, splunk_flag):
89+
def process_word_occurrences(results, batch_info, biased_word, path, splunk_flag):
10690
json_result, report, events = {'biased_word': biased_word}, [], []
10791
files, lines = [], []
10892

@@ -147,12 +131,6 @@ def process_word_occurrences(results, batch_info, biased_word, is_verbose, path,
147131
'fingerprint': hashlib.md5(string.encode('utf-8')).hexdigest()
148132
}
149133

150-
if is_verbose:
151-
# add to json_result
152-
lines.append({'line': line, 'location': location})
153-
# add to code quality report
154-
occurrence['line'] = line
155-
# code quality report
156134
report.append(occurrence)
157135
# code quality events - additional details if Splunking
158136
if splunk_flag:
@@ -192,7 +170,7 @@ def process_biased_word_line(line, occurrences, code_quality_report, splunk_even
192170
# the data summary entry will always be there, hence the > 1
193171
if len(rg_results) > 1:
194172
json_results, word_report, events = process_word_occurrences(
195-
rg_results, batch_info, biased_word, args['is_verbose'], args['path'], args['splunk_flag'])
173+
rg_results, batch_info, biased_word, args['path'], args['splunk_flag'])
196174
terms_found = True
197175

198176
# add to code quality output and to Splunkable events list
@@ -207,7 +185,6 @@ def process_biased_word_line(line, occurrences, code_quality_report, splunk_even
207185

208186
return terms_found, copy_occurrences
209187

210-
211188
def main(args, logger):
212189
main_timer = TimeFunction('main', logger)
213190
main_timer.start()
@@ -223,82 +200,69 @@ def main(args, logger):
223200
args['path'], constants.EXCLUDE_FILE, constants.RGIGNORE_FILE)
224201
lines = open_csv('word_list.csv')
225202

226-
if args['mode'] == 'check':
227-
occurrences = {'biased_words': [],
228-
'mode': args['mode'], 'verbose': args['is_verbose']}
229-
code_quality_report, splunk_events = [], []
230-
terms_found = False
231-
232-
# Generate JSON
233-
for line in lines:
234-
terms_found, occurrences = process_biased_word_line(
235-
line, occurrences, code_quality_report, splunk_events, args, batch_info, terms_found, logger)
236-
237-
occurrences['terms_found'] = terms_found
238-
239-
# every JSON in the codeclimate array is a line found
240-
occurrences['total_lines_matched'] = len(code_quality_report)
241-
# dedupes the files and accounts for all words for total count
242-
all_files_matched = []
243-
occurrences['total_words_matched'] = 0
244-
for word in occurrences['biased_words']:
245-
if word in occurrences and len(occurrences[word]) > 0:
246-
occurrences['total_words_matched'] += occurrences[word]['num_matched_words']
247-
all_files_matched = list(
248-
set(all_files_matched) | set(occurrences[word]['files']))
249-
occurrences['total_files_matched'] = len(all_files_matched)
250-
251-
# print output to console
252-
print(json.dumps(occurrences, indent=2))
253-
254-
write_file(constants.SUMMARY_FILENAME, occurrences)
255-
write_file(constants.CODECLIMATE_FILENAME, code_quality_report)
256-
# final error check for check mode
257-
if terms_found:
258-
error_message = '%sError: %sBiased Lang Linter%s found biased words. Replacement(s) required. 🚨\nSee JSON output for details on what to replace. 🕵🏽‍♀️ %s\n' % (
259-
c['red'], c['lightmagenta'], c['red'], c['nc'])
260-
sys.stderr.write(error_message)
261-
if args['err_file']:
262-
with open(args['err_file'], 'w') as errfile:
263-
errfile.write(error_message)
264-
if args['splunk_flag']:
265-
# Splunk the code quality report
266-
# If ran in GitHub, call endpoint to Splunk data
267-
if args['github_repo']:
268-
# TODO: Call endpoint to post data to Splunk instance
269-
print('Posting data to Splunk')
270-
else:
271-
send_codeclimate_batch(constants.CODECLIMATE_FILENAME, splunk_events,
272-
repo_name, source_type, event2splunk)
273-
send_codeclimate_batch(constants.CODECLIMATE_FILENAME, splunk_events,
274-
repo_name, source_type, pz_event2splunk)
203+
# if args['mode'] == 'check':
204+
occurrences = {'biased_words': []}
205+
code_quality_report, splunk_events = [], []
206+
terms_found = False
207+
208+
# Generate JSON
209+
for line in lines:
210+
terms_found, occurrences = process_biased_word_line(
211+
line, occurrences, code_quality_report, splunk_events, args, batch_info, terms_found, logger)
212+
213+
occurrences['terms_found'] = terms_found
214+
215+
# every JSON in the codeclimate array is a line found
216+
occurrences['total_lines_matched'] = len(code_quality_report)
217+
# dedupes the files and accounts for all words for total count
218+
all_files_matched = []
219+
occurrences['total_words_matched'] = 0
220+
for word in occurrences['biased_words']:
221+
if word in occurrences and len(occurrences[word]) > 0:
222+
occurrences['total_words_matched'] += occurrences[word]['num_matched_words']
223+
all_files_matched = list(
224+
set(all_files_matched) | set(occurrences[word]['files']))
225+
occurrences['total_files_matched'] = len(all_files_matched)
226+
227+
# print output to console
228+
print(json.dumps(occurrences, indent=2))
229+
230+
write_file(constants.SUMMARY_FILENAME, occurrences)
231+
write_file(constants.CODECLIMATE_FILENAME, code_quality_report)
232+
err_file = args['err_file']
233+
# final error check for check mode
234+
if not terms_found:
235+
sys.stdout.write('%sBiased Lang Linter %sfound no biased words! 🎉%s\n' % (
236+
c['lightmagenta'], c['green'], c['nc']))
237+
else:
238+
error_message = '%sError: %sBiased Lang Linter%s found biased words. Replacement(s) required. 🚨\nSee JSON output for details on what to replace. 🕵🏽‍♀️ %s\n' % (
239+
c['red'], c['lightmagenta'], c['red'], c['nc'])
240+
sys.stderr.write(error_message)
241+
if err_file:
242+
with open(err_file, 'w') as errfile:
243+
errfile.write(error_message)
275244

276-
else:
277-
sys.stdout.write('%sBiased Lang Linter %sfound no biased words! 🎉%s\n' % (
278-
c['lightmagenta'], c['green'], c['nc']))
279-
280-
if args['splunk_flag']:
281-
# Splunk the summarized JSON
282-
occurrences['content'] = constants.SUMMARY_FILENAME
283-
occurrences.update(batch_info)
284-
occurrences['total_lines'] = get_line_count(args['path'], excluded)
285-
occurrences['run_time'] = main_timer.stop()
286-
# If ran in GitHub, call endpoint to Splunk data
287-
if args['github_repo']:
288-
# TODO: Call endpoint to post data to Splunk instance
289-
print('Splunking occurrences data from GitHub!')
290-
else:
291-
event2splunk.post_event(payload=occurrences,
292-
source=repo_name, sourcetype=source_type)
293-
event2splunk.close(filename=constants.SUMMARY_FILENAME)
294-
pz_event2splunk.post_event(
295-
payload=occurrences, source=repo_name, sourcetype=source_type)
296-
pz_event2splunk.close(filename=constants.SUMMARY_FILENAME)
297-
# For GitHub Actions to provide error annotations
298-
err_file = args['err_file']
299-
if os.path.exists(err_file) and args['github_repo']:
300-
print(f'{err_file} file found, exiting(1)')
301-
sys.exit(1)
245+
if args['splunk_flag']:
246+
# Splunk the summarized JSON
247+
occurrences['content'] = constants.SUMMARY_FILENAME
248+
occurrences.update(batch_info)
249+
occurrences['total_lines'] = get_line_count(args['path'], excluded)
250+
occurrences['run_time'] = main_timer.stop()
251+
if not args['github_repo']:
252+
send_codeclimate_batch(constants.CODECLIMATE_FILENAME, splunk_events,
253+
repo_name, source_type, event2splunk)
254+
send_codeclimate_batch(constants.CODECLIMATE_FILENAME, splunk_events,
255+
repo_name, source_type, pz_event2splunk)
256+
event2splunk.post_event(payload=occurrences,
257+
source=repo_name, sourcetype=source_type)
258+
event2splunk.close(filename=constants.SUMMARY_FILENAME)
259+
pz_event2splunk.post_event(
260+
payload=occurrences, source=repo_name, sourcetype=source_type)
261+
pz_event2splunk.close(filename=constants.SUMMARY_FILENAME)
262+
# For GitHub Actions to provide error annotations
263+
if os.path.exists(err_file) and args['github_repo']:
264+
print(f'{err_file} file found, exiting(1)')
265+
sys.exit(1)
302266

303267

304268
if __name__ == '__main__':

utils/utils.py

-17
Original file line numberDiff line numberDiff line change
@@ -67,23 +67,6 @@ def get_colors():
6767
}
6868

6969

70-
def get_colors_sh():
71-
return {
72-
'text': {
73-
'yellow': '\\033[0;33m',
74-
'green': '\\033[0;32m',
75-
'red': '\\033[0;31m',
76-
'lightmagenta': '\\033[0;95m',
77-
'orange': '\\033[38;5;172m',
78-
'nc': '\\033[0m'
79-
},
80-
'underline': {
81-
'cyan': '\\033[4;36m',
82-
'lightmagenta': '\\033[4;95m'
83-
}
84-
}
85-
86-
8770
def get_batch_info():
8871
year = str(datetime.now().year)
8972
timezone_offset = time.strftime('%z', time.gmtime())

0 commit comments

Comments
 (0)