cleanup some code

Lng88 · Lng88 · commit bd3e94ee5c4a · 2021-07-22T12:33:33.000-07:00
diff --git a/Dockerfile.gh b/Dockerfile.gh
@@ -23,4 +23,4 @@ RUN pip install -r requirements.txt
 
 COPY . .
 
-CMD [ "python3", "run_json.py", "--mode=check", "--splunk", "--err_file=err_biased_lang.log" ]
+CMD [ "python3", "run_json.py", "--splunk", "--err_file=err_biased_lang.log" ]
diff --git a/README.md b/README.md
@@ -76,8 +76,6 @@ Below is a list of arguments you can pass to the CLI tool.
 Note: For the additional arguments you find in `run_json.py` that aren't listed below, they are for internal use.
 
 - **`--path=`** [_**required**_] absolute path to the directory
-- **`--mode=`** [_**required**_] `check` to scan for bias language
-- **`--verbose`** enables explicit logging (only applicable for check mode)
 - **`--err_file=`** sends any error messages to a log file of your choice, in addition to the console
 - **`--splunk`** [_**splunk_required**_] not available yet
 - **`--splunk_token=`** [_**splunk_required**_] not available yet
@@ -101,13 +99,10 @@ python3 run_json.py --mode=check --path=/user/jdoe/git/myProject
 #### biased-language-summary.json
 
 `biased-language-summary.json` contains a summary of which files contain which biased words.
-(With `--verbose`, this output is capable of line-by-line reporting instead of a summary. The GitLab CI uses the summarized version.)
 
 ```sh
 {
     "terms_found": "true" | "false",
-    "mode": "check",
-    "verbose": "true" | "false",
     "total_lines_matched": "295",
     "total_files_matched": "54",
     "total_words_matched": "449",
diff --git a/run_json.py b/run_json.py
@@ -15,18 +15,14 @@
 '''
 This version of the script is intended to produce a JSON output and used in a CI environment
 like GitHub Actions or GitLab CI.
-
- Note: This Python script only functions in check mode.
 '''
 
 import argparse
 import constants
 import hashlib
 import json
 import os
-import re
 import sys
-import requests
 from subprocess import getoutput
 from copy import copy
 from tools.event2splunk import Event2Splunk
@@ -47,8 +43,6 @@ def build_args_dict(args=None):
     parser = argparse.ArgumentParser()
     parser.add_argument('--path')
     parser.add_argument('--url')
-    parser.add_argument('--mode')
-    parser.add_argument('--verbose', action='store_true')
     parser.add_argument('--err_file')
     parser.add_argument('--splunk', action='store_true')
     parser.add_argument('--h_endpoint')
@@ -64,14 +58,6 @@ def build_args_dict(args=None):
         raise Exception('No path specified')
     if path.endswith('/'):
         path = path[:-1]
-    if args.mode == 'check':
-        mode = 'check'
-    elif args.mode == 'fix':
-        raise Exception(
-            'Fix mode for JSON output is not yet supported. Please use --mode=check or use standard output with fix mode instead.')
-    if not mode:
-        raise Exception(
-            'Invalid mode specified. Please specify --mode=check or --mode=fix.')
     if args.err_file:
         if not os.path.exists(args.err_file):
             sys.stdout.write('%sWarning: no file "%s" for error logs found. Defaulting to "%s". %s\n' % (
@@ -83,8 +69,6 @@ def build_args_dict(args=None):
     return {
         'path': path,
         'url': args.url or os.environ.get('GITHUB_URL'),
-        'mode': mode,
-        'is_verbose': args.verbose,
         'splunk_flag': args.splunk,
         'err_file': args.err_file,
         'h_endpoint': args.h_endpoint,
@@ -102,7 +86,7 @@ def build_args_dict(args=None):
 '''
 
 
-def process_word_occurrences(results, batch_info, biased_word, is_verbose, path, splunk_flag):
+def process_word_occurrences(results, batch_info, biased_word, path, splunk_flag):
     json_result, report, events = {'biased_word': biased_word}, [], []
     files, lines = [], []
 
@@ -147,12 +131,6 @@ def process_word_occurrences(results, batch_info, biased_word, is_verbose, path,
                 'fingerprint': hashlib.md5(string.encode('utf-8')).hexdigest()
             }
 
-            if is_verbose:
-                # add to json_result
-                lines.append({'line': line, 'location': location})
-                # add to code quality report
-                occurrence['line'] = line
-            # code quality report
             report.append(occurrence)
             # code quality events - additional details if Splunking
             if splunk_flag:
@@ -192,7 +170,7 @@ def process_biased_word_line(line, occurrences, code_quality_report, splunk_even
     # the data summary entry will always be there, hence the > 1
     if len(rg_results) > 1:
         json_results, word_report, events = process_word_occurrences(
-            rg_results, batch_info, biased_word, args['is_verbose'], args['path'], args['splunk_flag'])
+            rg_results, batch_info, biased_word, args['path'], args['splunk_flag'])
         terms_found = True
 
     # add to code quality output and to Splunkable events list
@@ -207,7 +185,6 @@ def process_biased_word_line(line, occurrences, code_quality_report, splunk_even
 
     return terms_found, copy_occurrences
 
-
 def main(args, logger):
     main_timer = TimeFunction('main', logger)
     main_timer.start()
@@ -223,82 +200,69 @@ def main(args, logger):
         args['path'], constants.EXCLUDE_FILE, constants.RGIGNORE_FILE)
     lines = open_csv('word_list.csv')
 
-    if args['mode'] == 'check':
-        occurrences = {'biased_words': [],
-                       'mode': args['mode'], 'verbose': args['is_verbose']}
-        code_quality_report, splunk_events = [], []
-        terms_found = False
-
-        # Generate JSON
-        for line in lines:
-            terms_found, occurrences = process_biased_word_line(
-                line, occurrences, code_quality_report, splunk_events, args, batch_info, terms_found, logger)
-
-        occurrences['terms_found'] = terms_found
-
-        # every JSON in the codeclimate array is a line found
-        occurrences['total_lines_matched'] = len(code_quality_report)
-        # dedupes the files and accounts for all words for total count
-        all_files_matched = []
-        occurrences['total_words_matched'] = 0
-        for word in occurrences['biased_words']:
-            if word in occurrences and len(occurrences[word]) > 0:
-                occurrences['total_words_matched'] += occurrences[word]['num_matched_words']
-                all_files_matched = list(
-                    set(all_files_matched) | set(occurrences[word]['files']))
-        occurrences['total_files_matched'] = len(all_files_matched)
-
-        # print output to console
-        print(json.dumps(occurrences, indent=2))
-
-        write_file(constants.SUMMARY_FILENAME, occurrences)
-        write_file(constants.CODECLIMATE_FILENAME, code_quality_report)
-        # final error check for check mode
-        if terms_found:
-            error_message = '%sError: %sBiased Lang Linter%s found biased words. Replacement(s) required. 🚨\nSee JSON output for details on what to replace. 🕵🏽‍♀️ %s\n' % (
-                c['red'], c['lightmagenta'], c['red'], c['nc'])
-            sys.stderr.write(error_message)
-            if args['err_file']:
-                with open(args['err_file'], 'w') as errfile:
-                    errfile.write(error_message)
-            if args['splunk_flag']:
-                # Splunk the code quality report
-                # If ran in GitHub, call endpoint to Splunk data
-                if args['github_repo']:
-                    # TODO: Call endpoint to post data to Splunk instance
-                    print('Posting data to Splunk')
-                else:
-                    send_codeclimate_batch(constants.CODECLIMATE_FILENAME, splunk_events,
-                                           repo_name, source_type, event2splunk)
-                    send_codeclimate_batch(constants.CODECLIMATE_FILENAME, splunk_events,
-                                           repo_name, source_type, pz_event2splunk)
+    # if args['mode'] == 'check':
+    occurrences = {'biased_words': []}
+    code_quality_report, splunk_events = [], []
+    terms_found = False
+
+    # Generate JSON
+    for line in lines:
+        terms_found, occurrences = process_biased_word_line(
+            line, occurrences, code_quality_report, splunk_events, args, batch_info, terms_found, logger)
+
+    occurrences['terms_found'] = terms_found
+
+    # every JSON in the codeclimate array is a line found
+    occurrences['total_lines_matched'] = len(code_quality_report)
+    # dedupes the files and accounts for all words for total count
+    all_files_matched = []
+    occurrences['total_words_matched'] = 0
+    for word in occurrences['biased_words']:
+        if word in occurrences and len(occurrences[word]) > 0:
+            occurrences['total_words_matched'] += occurrences[word]['num_matched_words']
+            all_files_matched = list(
+                set(all_files_matched) | set(occurrences[word]['files']))
+    occurrences['total_files_matched'] = len(all_files_matched)
+
+    # print output to console
+    print(json.dumps(occurrences, indent=2))
+
+    write_file(constants.SUMMARY_FILENAME, occurrences)
+    write_file(constants.CODECLIMATE_FILENAME, code_quality_report)
+    err_file = args['err_file']
+    # final error check for check mode
+    if not terms_found:
+        sys.stdout.write('%sBiased Lang Linter %sfound no biased words! 🎉%s\n' % (
+            c['lightmagenta'], c['green'], c['nc']))
+    else:
+        error_message = '%sError: %sBiased Lang Linter%s found biased words. Replacement(s) required. 🚨\nSee JSON output for details on what to replace. 🕵🏽‍♀️ %s\n' % (
+            c['red'], c['lightmagenta'], c['red'], c['nc'])
+        sys.stderr.write(error_message)
+        if err_file:
+            with open(err_file, 'w') as errfile:
+                errfile.write(error_message)
 
-        else:
-            sys.stdout.write('%sBiased Lang Linter %sfound no biased words! 🎉%s\n' % (
-                c['lightmagenta'], c['green'], c['nc']))
-
-        if args['splunk_flag']:
-            # Splunk the summarized JSON
-            occurrences['content'] = constants.SUMMARY_FILENAME
-            occurrences.update(batch_info)
-            occurrences['total_lines'] = get_line_count(args['path'], excluded)
-            occurrences['run_time'] = main_timer.stop()
-            # If ran in GitHub, call endpoint to Splunk data
-            if args['github_repo']:
-                # TODO: Call endpoint to post data to Splunk instance
-                print('Splunking occurrences data from GitHub!')
-            else:
-                event2splunk.post_event(payload=occurrences,
-                                        source=repo_name, sourcetype=source_type)
-                event2splunk.close(filename=constants.SUMMARY_FILENAME)
-                pz_event2splunk.post_event(
-                    payload=occurrences, source=repo_name, sourcetype=source_type)
-                pz_event2splunk.close(filename=constants.SUMMARY_FILENAME)
-        # For GitHub Actions to provide error annotations
-        err_file = args['err_file']
-        if os.path.exists(err_file) and args['github_repo']:
-            print(f'{err_file} file found, exiting(1)')
-            sys.exit(1)
+    if args['splunk_flag']:
+        # Splunk the summarized JSON
+        occurrences['content'] = constants.SUMMARY_FILENAME
+        occurrences.update(batch_info)
+        occurrences['total_lines'] = get_line_count(args['path'], excluded)
+        occurrences['run_time'] = main_timer.stop()
+        if not args['github_repo']:
+            send_codeclimate_batch(constants.CODECLIMATE_FILENAME, splunk_events,
+                                    repo_name, source_type, event2splunk)
+            send_codeclimate_batch(constants.CODECLIMATE_FILENAME, splunk_events,
+                                    repo_name, source_type, pz_event2splunk)
+            event2splunk.post_event(payload=occurrences,
+                                    source=repo_name, sourcetype=source_type)
+            event2splunk.close(filename=constants.SUMMARY_FILENAME)
+            pz_event2splunk.post_event(
+                payload=occurrences, source=repo_name, sourcetype=source_type)
+            pz_event2splunk.close(filename=constants.SUMMARY_FILENAME)
+    # For GitHub Actions to provide error annotations
+    if os.path.exists(err_file) and args['github_repo']:
+        print(f'{err_file} file found, exiting(1)')
+        sys.exit(1)
 
 
 if __name__ == '__main__':
diff --git a/utils/utils.py b/utils/utils.py
@@ -67,23 +67,6 @@ def get_colors():
     }
 
 
-def get_colors_sh():
-    return {
-        'text': {
-            'yellow': '\\033[0;33m',
-            'green': '\\033[0;32m',
-            'red': '\\033[0;31m',
-            'lightmagenta': '\\033[0;95m',
-            'orange': '\\033[38;5;172m',
-            'nc': '\\033[0m'
-        },
-        'underline': {
-            'cyan': '\\033[4;36m',
-            'lightmagenta': '\\033[4;95m'
-        }
-    }
-
-
 def get_batch_info():
     year = str(datetime.now().year)
     timezone_offset = time.strftime('%z', time.gmtime())

Original file line number	Diff line number	Diff line change
`@@ -23,4 +23,4 @@ RUN pip install -r requirements.txt`
`23`	`23`
`24`	`24`	`COPY . .`
`25`	`25`
`26`		`-CMD [ "python3", "run_json.py", "--mode=check", "--splunk", "--err_file=err_biased_lang.log" ]`
	`26`	`+CMD [ "python3", "run_json.py", "--splunk", "--err_file=err_biased_lang.log" ]`