Skip to content

Commit

Permalink
Merge pull request #8 from CybercentreCanada/AL-3076-why-is-url-creat…
Browse files Browse the repository at this point in the history
…or-always-scaled-very-high-on-pb

Al 3076 why is url creator always scaled very high [dev]
  • Loading branch information
cccs-kevin authored Jun 24, 2024
2 parents 9bcdf0a + d848641 commit f4ee62e
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"tags": [
{
"type": "network.email.address",
"short_type": "URI",
"short_type": "address",
"value": "[email protected]",
"score": 0
},
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"tags": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"extra": {
"drop_file": false,
"score": 0,
"sections": []
},
"files": {
"extracted": [],
"supplementary": []
},
"results": {
"heuristics": [],
"tags": {},
"temp_submission_data": {}
}
}
5 changes: 4 additions & 1 deletion urlcreator/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,16 @@
from multidecoder.decoders.network import DOMAIN_TYPE, EMAIL_TYPE, IP_TYPE, URL_TYPE, parse_url
from multidecoder.multidecoder import Multidecoder
from multidecoder.node import Node
from multidecoder.registry import build_registry
from multidecoder.string_helper import make_bytes, make_str

NETWORK_IOC_TYPES = ["domain", "ip", "uri"]


def url_analysis(url: str) -> Tuple[ResultTableSection, Dict[str, List[str]]]:
md = Multidecoder()
# There is no point in searching for keywords in a URL
md_registry = build_registry(exclude=["get_keywords"])
md = Multidecoder(decoders=md_registry)

analysis_table = ResultTableSection(url[:128] + "..." if len(url) > 128 else url)
network_iocs = {ioc_type: [] for ioc_type in NETWORK_IOC_TYPES}
Expand Down
11 changes: 7 additions & 4 deletions urlcreator/urlcreator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from collections import Counter, defaultdict
from urllib.parse import urlparse

import urlcreator.network
from assemblyline.odm.base import IP_ONLY_REGEX, IPV4_ONLY_REGEX
from assemblyline_v4_service.common.base import ServiceBase
from assemblyline_v4_service.common.request import ServiceRequest
Expand All @@ -16,8 +17,6 @@
)
from assemblyline_v4_service.common.task import MaxExtractedExceeded

import urlcreator.network

# Threshold to trigger heuristic regarding high port usage in URI
HIGH_PORT_MINIMUM = 1024

Expand Down Expand Up @@ -51,12 +50,16 @@ def __init__(self, config) -> None:

def execute(self, request: ServiceRequest) -> None:
request.result = Result()

minimum_maliciousness = max(int(request.get_param("minimum_maliciousness")), self.minimum_maliciousness_limit)
tags = request.task.tags

# Only concerned with static/dynamic URIs found by prior services
urls = tags.get("network.static.uri", []) + tags.get("network.dynamic.uri", [])

# No tags of interest? Exit fast!
if not urls:
return

minimum_maliciousness = max(int(request.get_param("minimum_maliciousness")), self.minimum_maliciousness_limit)
emails = [x[0].lower() for x in tags.get("network.email.address", [])]

scoring_uri = ResultTableSection(title_text="High scoring URI")
Expand Down

0 comments on commit f4ee62e

Please sign in to comment.