Skip to content

Commit

Permalink
remove content filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
cccs-rs committed Aug 26, 2022
1 parent 3d10b7a commit 8e04f28
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 13 deletions.
4 changes: 0 additions & 4 deletions service_manifest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ submission_params:
value: ""
type: str

config:
content_type_filter:
- text/plain

docker_config:
image: ${REGISTRY}cccs/assemblyline-service-urldownloader:$SERVICE_TAG
cpu_cores: 1
Expand Down
13 changes: 4 additions & 9 deletions urldownloader/urldownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,11 @@
class URLDownloader(ServiceBase):
def __init__(self, config) -> None:
super().__init__(config)
self.content_type_filter: list = config.get('content_type_filter', [])
self.content_type_filter.append(None)

def fetch_uri(self, uri: str, apply_filter: bool = True, headers={}) -> Union[str, requests.Response]:
def fetch_uri(self, uri: str, headers={}) -> Union[str, requests.Response]:
resp = requests.head(uri, allow_redirects=True, timeout=10, headers=headers, proxies=SUBMISSION_PROXIES)
# Only concerned with gathering responses of interest
if resp.ok:
if apply_filter and any(content_type in resp.headers.get('Content-Type')
for content_type in self.content_type_filter):
return
resp_fh = NamedTemporaryFile(delete=False)
resp_fh.write(requests.get(uri, allow_redirects=True, headers=headers).content)
resp_fh.close()
Expand Down Expand Up @@ -62,7 +57,7 @@ def execute(self, request: ServiceRequest) -> None:
request.temp_submission_data['visited_urls'].append(tag_value)
# Write response and attach to submission
try:
fp = self.fetch_uri(tag_value, apply_filter=bool(tag_score < 500), headers=headers)
fp = self.fetch_uri(tag_value, headers=headers)
if isinstance(fp, str):
request.add_extracted(fp, tag_value, f"Response from {tag_value}",
safelist_interface=self.api_interface)
Expand All @@ -71,7 +66,7 @@ def execute(self, request: ServiceRequest) -> None:
except requests.exceptions.ConnectionError as e:
exception_table.add_row(TableRow({'URI': tag_value, 'REASON': str(e).split(':')[-1][:-2]}))

if exception_table.body:
result.add_section(exception_table)
if exception_table.body:
result.add_section(exception_table)

request.result = result

0 comments on commit 8e04f28

Please sign in to comment.