diff --git a/assemblyline_ui/api/v4/file.py b/assemblyline_ui/api/v4/file.py index 61b3d38a..ef7266b6 100644 --- a/assemblyline_ui/api/v4/file.py +++ b/assemblyline_ui/api/v4/file.py @@ -491,7 +491,9 @@ def get_file_results(sha256, **kwargs): if sec.get('heuristic', False): # Get the heuristics data - if sec['heuristic']['score'] < 100: + if sec['heuristic']['score'] < 0: + h_type = "safe" + elif sec['heuristic']['score'] < 100: h_type = "info" elif sec['heuristic']['score'] < 1000: h_type = "suspicious" @@ -521,7 +523,7 @@ def get_file_results(sha256, **kwargs): # Process tags for t in sec['tags']: output["tags"].setdefault(t['type'], []) - t_item = (t['value'], h_type) + t_item = (t['value'], h_type, t['safelisted']) if t_item not in output["tags"][t['type']]: output["tags"][t['type']].append(t_item) diff --git a/assemblyline_ui/api/v4/safelist.py b/assemblyline_ui/api/v4/safelist.py new file mode 100644 index 00000000..8fb093f8 --- /dev/null +++ b/assemblyline_ui/api/v4/safelist.py @@ -0,0 +1,419 @@ + +import hashlib +from flask import request + +from assemblyline.common.isotime import now_as_iso +from assemblyline.remote.datatypes.lock import Lock +from assemblyline_ui.api.base import api_login, make_api_response, make_subapi_blueprint +from assemblyline_ui.config import CLASSIFICATION, STORAGE + +SUB_API = 'safelist' +safelist_api = make_subapi_blueprint(SUB_API, api_version=4) +safelist_api._doc = "Perform operations on safelisted hashes" + + +class InvalidSafehash(Exception): + pass + + +def _merge_safe_hashes(new, old): + try: + # Check if hash types match + if new['type'] != old['type']: + raise InvalidSafehash(f"Safe hash type mismatch: {new['type']} != {old['type']}") + + # Use max classification + old['classification'] = CLASSIFICATION.max_classification(old['classification'], new['classification']) + + # Update updated time + old['updated'] = now_as_iso() + + # Update hashes + old['hashes'].update(new['hashes']) + + # Update type specific info + if old['type'] == 'file': + old.setdefault('file', {}) + new_names = new.get('file', {}).pop('name', []) + if 'name' in old['file']: + for name in new_names: + if name not in old['file']['name']: + old['file']['name'].append(name) + elif new_names: + old['file']['name'] = new_names + old['file'].update(new.get('file', {})) + elif old['type'] == 'tag': + old['tag'] = new['tag'] + + # Merge sources + src_map = {x['name']: x for x in new['sources']} + if not src_map: + raise InvalidSafehash("No valid source found") + + old_src_map = {x['name']: x for x in old['sources']} + for name, src in src_map.items(): + src_cl = src.get('classification', None) + if src_cl: + old['classification'] = CLASSIFICATION.max_classification(old['classification'], src_cl) + + if name not in old_src_map: + old_src_map[name] = src + else: + old_src = old_src_map[name] + if old_src['type'] != src['type']: + raise InvalidSafehash(f"Source {name} has a type conflict: {old_src['type']} != {src['type']}") + + for reason in src['reason']: + if reason not in old_src['reason']: + old_src['reason'].append(reason) + old['sources'] = old_src_map.values() + return old + except Exception as e: + raise InvalidSafehash(f"Invalid data provided: {str(e)}") + + +@safelist_api.route("/", methods=["PUT", "POST"]) +@api_login(require_type=['user', 'signature_importer'], allow_readonly=False, required_priv=["W"]) +def add_or_update_hash(**kwargs): + """ + Add a hash in the safelist if it does not exist or update its list of sources if it does + + Arguments: + None + + Data Block: + { + "classification": "TLP:W", # Classification of the safe hash (Computed for the mix of sources) - Optional + "enabled": true, # Is the safe hash enabled or not + "file": { # Information about the file - Only used in file mode + "name": ["file.txt"] # Possible names for the file + "size": 12345, # Size of the file + "type": "document/text"}, # Type of the file + }, + "hashes": { # Information about the safe hash - At least one hash required + "md5": "123...321", # MD5 hash of the safe hash + "sha1": "1234...4321", # SHA1 hash of the safe hash + "sha256": "12345....54321", # SHA256 of the safe hash + "sources": [ # List of sources for why the file is safelisted, dedupped on name - Required + {"classification": "TLP:W", # Classification of the source (default: TLP:W) - Optional + "name": "NSRL", # Name of external source or user who safelisted it - Required + "reason": [ # List of reasons why the source is safelisted - Required + "Found as test.txt on default windows 10 CD", + "Found as install.txt on default windows XP CD" + ], + "type": "external"}, # Type or source (external or user) - Required + {"classification": "TLP:W", + "name": "admin", + "reason": ["We've seen this file many times and it leads to False positives"], + "type": "user"} + ], + "tag": { # Tag information - Only used in tag mode + "type": "network.url", # Type of tag + "value": "google.ca" # Value of the tag + }, + "type": "tag" # Type of safelist hash (tag or file) + } + + Result example: + { + "success": true, # Was the hash successfully added + "op": "add" # Was it added to the system or updated + } + """ + # Load data + data = request.json + if not data: + return make_api_response({}, "No data provided", 400) + user = kwargs['user'] + + # Set defaults + data.setdefault('classification', CLASSIFICATION.UNRESTRICTED) + data.setdefault('hashes', {}) + if data['type'] == 'tag': + tag_data = data.get('tag', None) + if tag_data is None or 'type' not in tag_data or 'value' not in tag_data: + return make_api_response(None, "Tag data not found", 400) + + hashed_value = f"{tag_data['type']}: {tag_data['value']}".encode('utf8') + data['hashes']['md5'] = hashlib.md5(hashed_value).hexdigest() + data['hashes']['sha1'] = hashlib.sha1(hashed_value).hexdigest() + data['hashes']['sha256'] = hashlib.sha256(hashed_value).hexdigest() + data.pop('file', None) + + elif data['type'] == 'file': + data.pop('tag', None) + data.setdefault('file', {}) + + data['added'] = data['updated'] = now_as_iso() + + # Find the best hash to use for the key + qhash = data['hashes'].get('sha256', data['hashes'].get('sha1', data['hashes'].get('md5', None))) + # Validate hash length + if not qhash: + return make_api_response(None, "No valid hash found", 400) + + # Validate sources + src_map = {} + for src in data['sources']: + if src['type'] == 'user': + if src['name'] != user['uname']: + return make_api_response( + {}, f"You cannot add a source for another user. {src['name']} != {user['uname']}", 400) + else: + if 'signature_importer' not in user['type']: + return make_api_response( + {}, "You do not have sufficient priviledges to add an external source.", 403) + + src_cl = src.get('classification', None) + if src_cl: + data['classification'] = CLASSIFICATION.max_classification(data['classification'], src_cl) + + src_map[src['name']] = src + + with Lock(f'add_or_update-safelist-{qhash}', 30): + old = STORAGE.safelist.get_if_exists(qhash, as_obj=False) + if old: + try: + # Save data to the DB + STORAGE.safelist.save(qhash, _merge_safe_hashes(data, old)) + return make_api_response({'success': True, "op": "update"}) + except InvalidSafehash as e: + return make_api_response({}, str(e), 400) + else: + try: + data['sources'] = src_map.values() + STORAGE.safelist.save(qhash, data) + return make_api_response({'success': True, "op": "add"}) + except Exception as e: + return make_api_response({}, f"Invalid data provided: {str(e)}", 400) + + +@safelist_api.route("/add_update_many/", methods=["POST", "PUT"]) +@api_login(audit=False, required_priv=['W'], allow_readonly=False, require_type=['signature_importer']) +def add_update_many_hashes(**_): + """ + Add or Update a list of the safe hashes + + Variables: + None + + Arguments: + None + + Data Block (REQUIRED): + [ # List of Safe hash blocks + { + "classification": "TLP:W", # Classification of the safe hash (Computed for the mix of sources) - Optional + "enabled": true, # Is the safe hash enabled or not + "file": { # Information about the file - Only used in file mode + "name": ["file.txt"] # Possible names for the file + "size": 12345, # Size of the file + "type": "document/text"}, # Type of the file + }, + "hashes": { # Information about the safe hash - At least one hash required + "md5": "123...321", # MD5 hash of the safe hash + "sha1": "1234...4321", # SHA1 hash of the safe hash + "sha256": "12345....54321", # SHA256 of the safe hash + "sources": [ # List of sources for why the file is safelisted, dedupped on name - Required + {"classification": "TLP:W", # Classification of the source (default: TLP:W) - Optional + "name": "NSRL", # Name of external source or user who safelisted it - Required + "reason": [ # List of reasons why the source is safelisted - Required + "Found as test.txt on default windows 10 CD", + "Found as install.txt on default windows XP CD" + ], + "type": "external"}, # Type or source (external or user) - Required + {"classification": "TLP:W", + "name": "admin", + "reason": ["We've seen this file many times and it leads to False positives"], + "type": "user"} + ], + "tag": { # Tag information - Only used in tag mode + "type": "network.url", # Type of tag + "value": "google.ca" # Value of the tag + }, + "type": "tag" # Type of safelist hash (tag or file) + } + ... + ] + + Result example: + {"success": 23, # Number of hashes that succeeded + "errors": []} # List of hashes that failed + """ + data = request.json + + if not isinstance(data, list): + return make_api_response("", "Could not get the list of hashes", 400) + + new_data = {} + for hash_data in data: + # Set a classification if None + hash_data.setdefault('classification', CLASSIFICATION.UNRESTRICTED) + if hash_data['type'] == 'tag': + hash_data.pop('file', None) + elif hash_data['type'] == 'file': + hash_data.pop('tag', None) + + # Find the hash used for the key + key = hash_data['hashes'].get('sha256', hash_data['hashes'].get('sha1', hash_data['hashes'].get('md5', None))) + if not key: + return make_api_response("", f"Invalid hash block: {str(hash_data)}", 400) + + # Save the new hash_block + new_data[key] = hash_data + + # Get already existing hashes + old_data = STORAGE.safelist.multiget(list(new_data.keys()), as_dictionary=True, as_obj=False, + error_on_missing=False) + + # Test signature names + plan = STORAGE.safelist.get_bulk_plan() + for key, val in new_data.items(): + # Use maximum classification + old_val = old_data.get(key, {'classification': CLASSIFICATION.UNRESTRICTED, + 'hashes': {}, 'sources': [], 'type': val['type']}) + + # Add upsert operation + try: + plan.add_upsert_operation(key, _merge_safe_hashes(val, old_val)) + except InvalidSafehash as e: + return make_api_response("", str(e), 400) + + if not plan.empty: + # Execute plan + res = STORAGE.safelist.bulk(plan) + return make_api_response({"success": len(res['items']), "errors": res['errors']}) + + return make_api_response({"success": 0, "errors": []}) + + +@safelist_api.route("//", methods=["GET"]) +@api_login(required_priv=["R"]) +def check_hash_exists(qhash, **kwargs): + """ + Check if a hash exists in the safelist. + + Variables: + qhash => Hash to check is exist (either md5, sha1 or sha256) + + Arguments: + None + + Data Block: + None + + API call example: + GET /api/v1/safelist/123456...654321/ + + Result example: + { + "classification": "TLP:W", # Classification of the safe hash (Computed for the mix of sources) - Optional + "enabled": true, # Is the safe hash enabled or not + "file": { # Information about the file - Only used in file mode + "name": ["file.txt"] # Possible names for the file + "size": 12345, # Size of the file + "type": "document/text"}, # Type of the file + }, + "hashes": { # Information about the safe hash - At least one hash required + "md5": "123...321", # MD5 hash of the safe hash + "sha1": "1234...4321", # SHA1 hash of the safe hash + "sha256": "12345....54321", # SHA256 of the safe hash + "sources": [ # List of sources for why the file is safelisted, dedupped on name - Required + {"classification": "TLP:W", # Classification of the source (default: TLP:W) - Optional + "name": "NSRL", # Name of external source or user who safelisted it - Required + "reason": [ # List of reasons why the source is safelisted - Required + "Found as test.txt on default windows 10 CD", + "Found as install.txt on default windows XP CD" + ], + "type": "external"}, # Type or source (external or user) - Required + {"classification": "TLP:W", + "name": "admin", + "reason": ["We've seen this file many times and it leads to False positives"], + "type": "user"} + ], + "tag": { # Tag information - Only used in tag mode + "type": "network.url", # Type of tag + "value": "google.ca" # Value of the tag + }, + "type": "tag" # Type of safelist hash (tag or file) + } + """ + if len(qhash) not in [64, 40, 32]: + return make_api_response(None, "Invalid hash length", 400) + + safelist = STORAGE.safelist.get_if_exists(qhash, as_obj=False) + if safelist and CLASSIFICATION.is_accessible(kwargs['user']['classification'], safelist['classification']): + return make_api_response(safelist) + + return make_api_response(None, "The hash was not found in the safelist.", 404) + + +@safelist_api.route("/enable//", methods=["PUT"]) +@api_login(allow_readonly=False) +def set_hash_status(qhash, **kwargs): + """ + Set the enabled status of a hash + + Variables: + qhash => Hash to change the status + + Arguments: + None + + Data Block: + "true" + + Result example: + {"success": True} + """ + user = kwargs['user'] + data = request.json + + if len(qhash) not in [64, 40, 32]: + return make_api_response(None, "Invalid hash length", 400) + + if 'admin' in user['type'] or 'signature_manager' in user['type']: + return make_api_response({'success': STORAGE.safelist.update( + qhash, [(STORAGE.safelist.UPDATE_SET, 'enabled', data)])}) + + return make_api_response({}, "You are not allowed to change the status", 403) + + +@safelist_api.route("//", methods=["DELETE"]) +@api_login(allow_readonly=False) +def delete_hash(qhash, **kwargs): + """ + Delete a hash from the safelist + + Variables: + qhash => Hash to check + + Arguments: + None + + Data Block: + None + + API call example: + DELETE /api/v1/safelist/123456...654321/ + + Result example: + {"success": True} + """ + user = kwargs['user'] + + if len(qhash) not in [64, 40, 32]: + return make_api_response(None, "Invalid hash length", 400) + + if 'admin' in user['type'] or 'signature_manager' in user['type']: + return make_api_response({'success': STORAGE.safelist.delete(qhash)}) + else: + safe_hash = STORAGE.safelist.get_if_exists(qhash, as_obj=False) + if safe_hash: + safe_hash['sources'] = [x for x in safe_hash['sources'] if x['name'] != user['uname']] + if len(safe_hash['sources']) == 0: + return make_api_response({'success': STORAGE.safelist.delete(qhash)}) + else: + return make_api_response({'success': STORAGE.safelist.save(qhash, safe_hash)}) + + return make_api_response({'success': False}) diff --git a/assemblyline_ui/api/v4/search.py b/assemblyline_ui/api/v4/search.py index 0b534512..20885335 100644 --- a/assemblyline_ui/api/v4/search.py +++ b/assemblyline_ui/api/v4/search.py @@ -55,7 +55,7 @@ def search(bucket, **kwargs): return make_api_response("", f"Not a valid bucket to search in: {bucket}", 400) user = kwargs['user'] - fields = ["offset", "rows", "sort", "fl", "timeout", "deep_paging_id"] + fields = ["offset", "rows", "sort", "fl", "timeout", "deep_paging_id", 'track_total_hits'] multi_fields = ['filters'] boolean_fields = ['use_archive'] diff --git a/assemblyline_ui/api/v4/submission.py b/assemblyline_ui/api/v4/submission.py index 38522cf5..3491ae43 100644 --- a/assemblyline_ui/api/v4/submission.py +++ b/assemblyline_ui/api/v4/submission.py @@ -149,7 +149,9 @@ def get_file_submission_results(sid, sha256, **kwargs): h_type = "info" if sec.get('heuristic', False): # Get the heuristics data - if sec['heuristic']['score'] < 100: + if sec['heuristic']['score'] < 0: + h_type = "safe" + elif sec['heuristic']['score'] < 100: h_type = "info" elif sec['heuristic']['score'] < 1000: h_type = "suspicious" @@ -181,17 +183,17 @@ def get_file_submission_results(sid, sha256, **kwargs): output["tags"].setdefault(t['type'], {}) current_htype = output["tags"][t['type']].get(t['value'], None) if not current_htype: - output["tags"][t['type']][t['value']] = h_type + output["tags"][t['type']][t['value']] = (h_type, t['safelisted']) else: if current_htype == 'malicious' or h_type == 'malicious': - output["tags"][t['type']][t['value']] = 'malicious' + output["tags"][t['type']][t['value']] = ('malicious', t['safelisted']) elif current_htype == 'suspicious' or h_type == 'suspicious': - output["tags"][t['type']][t['value']] = 'suspicious' + output["tags"][t['type']][t['value']] = ('suspicious', t['safelisted']) else: - output["tags"][t['type']][t['value']] = 'info' + output["tags"][t['type']][t['value']] = ('info', t['safelisted']) for t_type in output["tags"]: - output["tags"][t_type] = [(k, v) for k, v in output['tags'][t_type].items()] + output["tags"][t_type] = [(k, v[0], v[1]) for k, v in output['tags'][t_type].items()] output['signatures'] = list(output['signatures']) @@ -612,18 +614,19 @@ def get_summary(sid, **kwargs): output['tags'][summary_type].setdefault(t['type'], {}) current_htype = output['tags'][summary_type][t['type']].get(t['value'], None) if not current_htype: - output['tags'][summary_type][t['type']][t['value']] = t['h_type'] + output['tags'][summary_type][t['type']][t['value']] = (t['h_type'], t['safelisted']) else: if current_htype == 'malicious' or t['h_type'] == 'malicious': - output['tags'][summary_type][t['type']][t['value']] = 'malicious' + output['tags'][summary_type][t['type']][t['value']] = ('malicious', t['safelisted']) elif current_htype == 'suspicious' or t['h_type'] == 'suspicious': - output['tags'][summary_type][t['type']][t['value']] = 'suspicious' + output['tags'][summary_type][t['type']][t['value']] = ('suspicious', t['safelisted']) else: - output['tags'][summary_type][t['type']][t['value']] = 'info' + output['tags'][summary_type][t['type']][t['value']] = ('info', t['safelisted']) for summary_type in output['tags']: for t_type in output['tags'][summary_type]: - output['tags'][summary_type][t_type] = [(k, v) for k, v in output['tags'][summary_type][t_type].items()] + output['tags'][summary_type][t_type] = [(k, v[0], v[1]) + for k, v in output['tags'][summary_type][t_type].items()] return make_api_response(output) else: @@ -818,10 +821,10 @@ def recurse_get_names(data): return output name_map = recurse_get_names(tree['tree']) - summary = get_or_create_summary(submission_id, submission.pop('results', []), user['classification'], submission['state'] == "completed") - tags = summary['tags'] + tags = [t for t in summary['tags'] if not t['safelisted']] + attack_matrix = summary['attack_matrix'] heuristics = summary['heuristics'] submission['classification'] = Classification.max_classification(submission['classification'], diff --git a/assemblyline_ui/api/v4/system.py b/assemblyline_ui/api/v4/system.py new file mode 100644 index 00000000..fce18fd1 --- /dev/null +++ b/assemblyline_ui/api/v4/system.py @@ -0,0 +1,93 @@ + +from flask import request + +from assemblyline.common import forge +from assemblyline.common.str_utils import safe_str +from assemblyline.odm.models.tagging import Tagging + +from assemblyline_ui.config import STORAGE +from assemblyline_ui.api.base import api_login, make_api_response, make_subapi_blueprint +import yaml + + +Classification = forge.get_classification() +config = forge.get_config() + +SUB_API = 'system' +system_api = make_subapi_blueprint(SUB_API, api_version=4) +system_api._doc = "Perform system actions" + +ADMIN_FILE_TTL = 60 * 60 * 24 * 365 * 100 # Just keep the file for 100 years... + + +@system_api.route("/tag_safelist/", methods=["GET"]) +@api_login(require_type=['admin'], required_priv=['R']) +def get_tag_safelist(**_): + """ + Get the current tag_safelist + + Variables: + None + + Arguments: + None + + Data Block: + None + + Result example: + + """ + with forge.get_cachestore('system', config=config, datastore=STORAGE) as cache: + tag_safelist_yml = cache.get('tag_safelist_yml') + if not tag_safelist_yml: + yml_data = forge.get_tag_safelist_data() + if yml_data: + return make_api_response(yaml.safe_dump(yml_data)) + + return make_api_response(None, "Could not find the tag_safelist.yml file", 404) + + return make_api_response(safe_str(tag_safelist_yml)) + + +@system_api.route("/tag_safelist/", methods=["PUT"]) +@api_login(require_type=['admin'], allow_readonly=False, required_priv=['W']) +def put_tag_safelist(**_): + """ + Save a new version of the tag_safelist file + + Variables: + None + + Arguments: + None + + Data Block: + + + Result example: + {"success": true} + """ + tag_safelist_yml = request.json + + try: + yml_data = yaml.safe_load(tag_safelist_yml) + for key in yml_data.keys(): + if key not in ['match', 'regex']: + raise Exception('Invalid key found.') + + fields = Tagging.flat_fields() + for tag_type in ['match', 'regex']: + for key, value in yml_data[tag_type].items(): + if key not in fields: + raise Exception(f'{key} is not a valid tag type') + + if not isinstance(value, list): + raise Exception(f'Value for {key} should be a list of strings') + except Exception as e: + return make_api_response(None, f"Invalid tag_safelist.yml file submitted: {str(e)}", 400) + + with forge.get_cachestore('system', config=config, datastore=STORAGE) as cache: + cache.save('tag_safelist_yml', tag_safelist_yml.encode('utf-8'), ttl=ADMIN_FILE_TTL, force=True) + + return make_api_response({'success': True}) diff --git a/assemblyline_ui/app.py b/assemblyline_ui/app.py index 87b49067..57497b74 100644 --- a/assemblyline_ui/app.py +++ b/assemblyline_ui/app.py @@ -20,11 +20,13 @@ from assemblyline_ui.api.v4.ingest import ingest_api from assemblyline_ui.api.v4.live import live_api from assemblyline_ui.api.v4.result import result_api +from assemblyline_ui.api.v4.safelist import safelist_api from assemblyline_ui.api.v4.search import search_api from assemblyline_ui.api.v4.service import service_api from assemblyline_ui.api.v4.signature import signature_api from assemblyline_ui.api.v4.submission import submission_api from assemblyline_ui.api.v4.submit import submit_api +from assemblyline_ui.api.v4.system import system_api from assemblyline_ui.api.v4.ui import ui_api from assemblyline_ui.api.v4.user import user_api from assemblyline_ui.api.v4.webauthn import webauthn_api @@ -57,8 +59,8 @@ app.register_blueprint(healthz) app.register_blueprint(api) app.register_blueprint(apiv4) -app.register_blueprint(auth_api) app.register_blueprint(alert_api) +app.register_blueprint(auth_api) app.register_blueprint(bundle_api) app.register_blueprint(errors) app.register_blueprint(error_api) @@ -74,9 +76,11 @@ app.register_blueprint(signature_api) app.register_blueprint(submission_api) app.register_blueprint(submit_api) +app.register_blueprint(system_api) app.register_blueprint(ui_api) app.register_blueprint(user_api) app.register_blueprint(webauthn_api) +app.register_blueprint(safelist_api) app.register_blueprint(workflow_api) diff --git a/assemblyline_ui/helper/result.py b/assemblyline_ui/helper/result.py index ee128ca9..689d32e2 100644 --- a/assemblyline_ui/helper/result.py +++ b/assemblyline_ui/helper/result.py @@ -1,4 +1,5 @@ import json +from assemblyline.common.dict_utils import flatten from assemblyline_ui.config import CLASSIFICATION, LOGGER from assemblyline.common.classification import InvalidClassification @@ -58,7 +59,8 @@ def filter_sections(sections, user_classification, min_classification): pass # Changing tags to a list - section['tags'] = tag_dict_to_list(section['tags']) + section['tags'] = tag_dict_to_list(flatten(section['tags']), False) + section['tags'] += tag_dict_to_list(section.pop('safelisted_tags', {}), True) final_sections.append(section) # Telling the user a section was hidden diff --git a/assemblyline_ui/helper/search.py b/assemblyline_ui/helper/search.py index 8e31b377..780218d6 100644 --- a/assemblyline_ui/helper/search.py +++ b/assemblyline_ui/helper/search.py @@ -7,6 +7,7 @@ 'result': STORAGE.result, 'signature': STORAGE.signature, 'submission': STORAGE.submission, + 'safelist': STORAGE.safelist, 'workflow': STORAGE.workflow } @@ -17,6 +18,7 @@ 'result': "created desc", 'signature': "type asc", 'submission': "times.submitted desc", + 'safelist': "added desc", 'workflow': "last_seen desc" } diff --git a/test/conftest.py b/test/conftest.py index 1975f98f..3e4f82ff 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -93,15 +93,17 @@ def host(redis_connection): for the failed address to become available. """ errors = {} - for host in POSSIBLE_HOSTS: - try: - result = requests.get(f"{host}/api/v4/auth/login", verify=False) - if result.status_code == 200: - return host - result.raise_for_status() - errors[host] = str(result.status_code) - except requests.RequestException as err: - errors[host] = str(err) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + for host in POSSIBLE_HOSTS: + try: + result = requests.get(f"{host}/api/v4/auth/login/", verify=False) + if result.status_code == 401: + return host + result.raise_for_status() + errors[host] = str(result.status_code) + except requests.RequestException as err: + errors[host] = str(err) pytest.skip("Couldn't find the API server, can't test against it.\n" + '\n'.join(k + ' ' + v for k, v in errors.items())) @@ -146,7 +148,7 @@ def get_api_data(session, url, params=None, data=None, method="GET", raw=False, try: res_data = res.json() return res_data['api_response'] - except JSONDecodeError: + except Exception: raise APIError(f'{res.status_code}: {res.content or None}') else: try: diff --git a/test/test_file.py b/test/test_file.py index 0e65f1c2..119907b2 100644 --- a/test/test_file.py +++ b/test/test_file.py @@ -2,7 +2,6 @@ import pytest -from assemblyline.common import forge from assemblyline.common.dict_utils import unflatten from assemblyline.common.tagging import tag_list_to_dict from assemblyline.odm.models.file import File diff --git a/test/test_safelist.py b/test/test_safelist.py new file mode 100644 index 00000000..facf6ebb --- /dev/null +++ b/test/test_safelist.py @@ -0,0 +1,274 @@ + +import hashlib +import json +import random + +import pytest + +from assemblyline.common.forge import get_classification +from assemblyline.common.isotime import iso_to_epoch +from assemblyline.odm.random_data import create_users, create_safelists, wipe_users, wipe_safelist +from assemblyline.odm.randomizer import get_random_hash +from conftest import APIError, get_api_data + +add_hash_file = "10" + get_random_hash(62) +add_error_hash = "11" + get_random_hash(62) +update_hash = "12" + get_random_hash(62) +update_conflict_hash = "13" + get_random_hash(62) + +NSRL_SOURCE = { + "classification": 'TLP:W', + "name": "NSRL", + "reason": [ + "Found as test.txt on default windows 10 CD", + "Found as install.txt on default windows XP CD" + ], + "type": "external"} + +NSRL2_SOURCE = { + "classification": 'TLP:W', + "name": "NSRL2", + "reason": [ + "File contains only AAAAs..." + ], + "type": "external"} + +ADMIN_SOURCE = { + "classification": 'TLP:W', + "name": "admin", + "reason": [ + "Generates a lot of FPs", + ], + "type": "user"} + +USER_SOURCE = { + "classification": 'TLP:W', + "name": "user", + "reason": [ + "I just feel like it!", + "I just feel like it!", + ], + "type": "user"} + + +@pytest.fixture(scope="module") +def datastore(datastore_connection): + try: + create_users(datastore_connection) + create_safelists(datastore_connection) + yield datastore_connection + finally: + wipe_users(datastore_connection) + wipe_safelist(datastore_connection) + + +# noinspection PyUnusedLocal +def test_safelist_add_file(datastore, login_session): + _, session, host = login_session + + # Generate a random safelist + sl_data = { + 'hashes': {'md5': get_random_hash(32), + 'sha1': get_random_hash(40), + 'sha256': add_hash_file}, + 'file': {'name': ['file.txt'], + 'size': random.randint(128, 4096), + 'type': 'document/text'}, + 'sources': [NSRL_SOURCE, ADMIN_SOURCE], + 'type': 'file' + } + + # Insert it and test return value + resp = get_api_data(session, f"{host}/api/v4/safelist/", method="PUT", data=json.dumps(sl_data)) + assert resp['success'] + assert resp['op'] == 'add' + + # Load inserted data from DB + ds_sl = datastore.safelist.get(add_hash_file, as_obj=False) + + # Test dates + added = ds_sl.pop('added', None) + updated = ds_sl.pop('updated', None) + assert added == updated + assert added is not None and updated is not None + + # Make sure tag is none + tag = ds_sl.pop('tag', {}) + assert tag is None + + # Test classification + classification = ds_sl.pop('classification', None) + assert classification is not None + + # Test enabled + enabled = ds_sl.pop('enabled', None) + assert enabled + + # Test rest + assert ds_sl == sl_data + + +def test_safelist_add_tag(datastore, login_session): + _, session, host = login_session + + tag_type = 'network.static.ip' + tag_value = '127.0.0.1' + hashed_value = f"{tag_type}: {tag_value}".encode('utf8') + + # Generate a random safelist + sl_data = { + 'hashes': {'md5': hashlib.md5(hashed_value).hexdigest(), + 'sha1': hashlib.sha1(hashed_value).hexdigest(), + 'sha256': hashlib.sha256(hashed_value).hexdigest()}, + 'tag': {'type': tag_type, + 'value': tag_value}, + 'sources': [NSRL_SOURCE, ADMIN_SOURCE], + 'type': 'tag' + } + + # Insert it and test return value + resp = get_api_data(session, f"{host}/api/v4/safelist/", method="PUT", data=json.dumps(sl_data)) + assert resp['success'] + assert resp['op'] == 'add' + + # Load inserted data from DB + ds_sl = datastore.safelist.get(hashlib.sha256(hashed_value).hexdigest(), as_obj=False) + + # Test dates + added = ds_sl.pop('added', None) + updated = ds_sl.pop('updated', None) + assert added == updated + assert added is not None and updated is not None + + # Make sure file is none + file = ds_sl.pop('file', {}) + assert file is None + + # Test classification + classification = ds_sl.pop('classification', None) + assert classification is not None + + # Test enabled + enabled = ds_sl.pop('enabled', None) + assert enabled + + # Test rest + assert ds_sl == sl_data + + +def test_safelist_add_invalid(datastore, login_session): + _, session, host = login_session + + # Generate a random safelist + sl_data = { + 'hashes': {'sha256': add_error_hash}, + 'sources': [USER_SOURCE], + 'type': 'file'} + + # Insert it and test return value + with pytest.raises(APIError) as conflict_exc: + get_api_data(session, f"{host}/api/v4/safelist/", method="PUT", data=json.dumps(sl_data)) + + assert 'for another user' in conflict_exc.value.args[0] + + +def test_safelist_update(datastore, login_session): + _, session, host = login_session + cl_eng = get_classification() + + # Generate a random safelist + sl_data = { + 'hashes': {'md5': get_random_hash(32), + 'sha1': get_random_hash(40), + 'sha256': update_hash}, + 'file': {'name': [], + 'size': random.randint(128, 4096), + 'type': 'document/text'}, + 'sources': [NSRL_SOURCE], + 'type': 'file' + } + + # Insert it and test return value + resp = get_api_data(session, f"{host}/api/v4/safelist/", method="PUT", data=json.dumps(sl_data)) + assert resp['success'] + assert resp['op'] == 'add' + + # Load inserted data from DB + ds_sl = datastore.safelist.get(update_hash, as_obj=False) + + # Test rest + assert {k: v for k, v in ds_sl.items() + if k not in ['added', 'updated', 'classification', 'enabled', 'tag']} == sl_data + + u_data = { + 'classification': cl_eng.RESTRICTED, + 'hashes': {'sha256': update_hash}, + 'sources': [NSRL2_SOURCE], + 'type': 'file' + } + + # Insert it and test return value + resp = get_api_data(session, f"{host}/api/v4/safelist/", method="PUT", data=json.dumps(u_data)) + assert resp['success'] + assert resp['op'] == 'update' + + # Load inserted data from DB + ds_u = datastore.safelist.get(update_hash, as_obj=False) + + assert ds_u['added'] == ds_sl['added'] + assert iso_to_epoch(ds_u['updated']) > iso_to_epoch(ds_sl['updated']) + assert ds_u['classification'] == cl_eng.RESTRICTED + assert len(ds_u['sources']) == 2 + assert NSRL2_SOURCE in ds_u['sources'] + assert NSRL_SOURCE in ds_u['sources'] + + +def test_safelist_update_conflict(datastore, login_session): + _, session, host = login_session + + # Generate a random safelist + sl_data = {'hashes': {'sha256': update_conflict_hash}, 'file': {}, 'sources': [ADMIN_SOURCE], 'type': 'file'} + + # Insert it and test return value + resp = get_api_data(session, f"{host}/api/v4/safelist/", + method="PUT", data=json.dumps(sl_data)) + assert resp['success'] + assert resp['op'] == 'add' + + # Insert the same source with a different type + sl_data['sources'][0]['type'] = 'external' + with pytest.raises(APIError) as conflict_exc: + get_api_data(session, f"{host}/api/v4/safelist/", + method="PUT", data=json.dumps(sl_data)) + + assert 'has a type conflict:' in conflict_exc.value.args[0] + + +def test_safelist_exist(datastore, login_session): + _, session, host = login_session + + hash = random.choice(datastore.safelist.search("id:*", fl='id', rows=100, as_obj=False)['items'])['id'] + + resp = get_api_data(session, f"{host}/api/v4/safelist/{hash}/") + assert resp == datastore.safelist.get(hash, as_obj=False) + + +# noinspection PyUnusedLocal +def test_safelist_invalid(datastore, login_session): + _, session, host = login_session + + with pytest.raises(APIError) as invalid_exc: + get_api_data(session, f"{host}/api/v4/safelist/{get_random_hash(12)}/") + + assert 'hash length' in invalid_exc.value.args[0] + + +# noinspection PyUnusedLocal +def test_safelist_missing(datastore, login_session): + _, session, host = login_session + + missing_hash = "f" + get_random_hash(63) + with pytest.raises(APIError) as missing_exc: + get_api_data(session, f"{host}/api/v4/safelist/{missing_hash}/") + + assert 'not found' in missing_exc.value.args[0] diff --git a/test/test_search.py b/test/test_search.py index 5ec9f295..5b76feb6 100644 --- a/test/test_search.py +++ b/test/test_search.py @@ -9,12 +9,13 @@ from assemblyline.odm.models.file import File from assemblyline.odm.models.result import Result from assemblyline.odm.models.submission import Submission +from assemblyline.odm.models.safelist import Safelist from assemblyline.odm.models.workflow import Workflow -from assemblyline.odm.randomizer import random_model_obj +from assemblyline.odm.randomizer import get_random_hash, random_model_obj from assemblyline.odm.random_data import create_users, wipe_users, create_signatures TEST_SIZE = 10 -collections = ['alert', 'file', 'heuristic', 'result', 'signature', 'submission', 'workflow'] +collections = ['alert', 'file', 'heuristic', 'result', 'signature', 'submission', 'safelist', 'workflow'] file_list = [] signatures = [] @@ -59,6 +60,12 @@ def datastore(datastore_connection): ds.heuristic.save(h.heur_id, h) ds.heuristic.commit() + for _ in range(TEST_SIZE): + w_id = "0"+get_random_hash(63) + w = random_model_obj(Safelist) + ds.safelist.save(w_id, w) + ds.safelist.commit() + for _ in range(TEST_SIZE): w_id = get_random_id() w = random_model_obj(Workflow) @@ -73,6 +80,7 @@ def datastore(datastore_connection): ds.signature.wipe() ds.submission.wipe() ds.heuristic.wipe() + ds.safelist.wipe() ds.workflow.wipe() wipe_users(ds) @@ -129,6 +137,7 @@ def test_histogram_search(datastore, login_session): 'heuristic': False, 'signature': 'last_modified', 'submission': 'times.submitted', + 'safelist': 'added', 'workflow': 'last_edit' } @@ -148,6 +157,7 @@ def test_histogram_search(datastore, login_session): 'signature': 'order', 'submission': 'file_count', 'heuristic': False, + 'safelist': False, 'workflow': 'hit_count' } @@ -191,6 +201,7 @@ def test_stats_search(datastore, login_session): 'signature': 'order', 'submission': 'file_count', 'heuristic': False, + 'safelist': False, 'workflow': 'hit_count' }