From 815a857730ae26ac0caf546e31930f686eba909f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janusz=20Kamie=C5=84ski?= Date: Thu, 23 May 2024 11:22:39 +0200 Subject: [PATCH 01/12] PAPP-34012 msooxml files should be recognized as such (not as zip) and handled properly in all environments (some OS don't support them properly in their magic library files definitions, so we patch them here) --- phantom_connector.py | 40 ++++++++++++++++++++++++---------------- phantom_consts.py | 2 ++ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/phantom_connector.py b/phantom_connector.py index 9fc9e30..493b6fd 100644 --- a/phantom_connector.py +++ b/phantom_connector.py @@ -21,6 +21,7 @@ import gzip import json import os +import pathlib import random import socket import string @@ -28,6 +29,7 @@ import time import zipfile from pathlib import Path +from typing import Tuple import magic import requests @@ -51,12 +53,6 @@ from urllib import quote -# A list of files that MS OOXML archives will contain. -# OOXML documents are zip files with metadata, assets and data as various -# archive entries. We do not want to the deflate action to extract these. -OOXML_FILES = frozenset(['[Content_Types].xml', '_rels/.rels']) - - def determine_contains(value): valid_contains = list() for c, f in list(CONTAINS_VALIDATORS.items()): @@ -799,9 +795,9 @@ def _add_file_to_vault(self, action_result, data_stream, file_name, recursive, c file_name = vault_info['name'] - file_type = magic.from_file(file_path, mime=True) + file_type, is_supported = self.is_deflation_supported_file(file_path) - if file_type not in SUPPORTED_FILES: + if not is_supported: return (phantom.APP_SUCCESS) self._extract_file(action_result, file_path, file_name, recursive, container_id) @@ -809,10 +805,6 @@ def _add_file_to_vault(self, action_result, data_stream, file_name, recursive, c return (phantom.APP_SUCCESS) - @staticmethod - def _is_ooxml_zip(member_filenames): - return OOXML_FILES.issubset(member_filenames) - @staticmethod def _has_allowed_archive_extension(file_name, allowed_extensions): if allowed_extensions: @@ -829,9 +821,9 @@ def _extract_file(self, action_result, file_path, file_name, recursive, containe if container_id is None: container_id = self.get_container_id() - file_type = magic.from_file(file_path, mime=True) + file_type, is_supported = self.is_deflation_supported_file(file_path) - if file_type not in SUPPORTED_FILES: + if not is_supported: return action_result.set_status(phantom.APP_ERROR, "Deflation of file type: {0} not supported".format(file_type)) config = self.get_config() @@ -923,6 +915,22 @@ def _extract_file(self, action_result, file_path, file_name, recursive, containe return action_result.set_status(phantom.APP_SUCCESS) + @staticmethod + def is_deflation_supported_file(file_path) -> Tuple[str, bool]: + """ + Checks if the file is supported for deflation. + + This method patches invalid behavior of some Operating + Systems recognizing MS Office files (eg. xslx) as zip + files which lead to an enormous deflation process run + hanging the service. + """ + msooxml_magic_file_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "magic", "msooxml") + m = magic.Magic(mime=True, magic_file=msooxml_magic_file_path) + file_type = m.from_file(file_path) + + return file_type, file_type in SUPPORTED_FILES + def _deflate_item(self, param): action_result = self.add_action_result(ActionResult(dict(param))) @@ -953,13 +961,13 @@ def _deflate_item(self, param): "Failed to get vault item info: {}".format(self._get_error_message_from_exception(e))) try: - file_type = magic.from_file(file_path, mime=True) + file_type, is_supported = self.is_deflation_supported_file(file_path) except IOError: return action_result.set_status(phantom.APP_ERROR, PHANTOM_ERR_FILE_PATH_NOT_FOUND) except Exception: return action_result.set_status(phantom.APP_ERROR, PHANTOM_ERR_FILE_PATH_NOT_FOUND) - if file_type not in SUPPORTED_FILES: + if not is_supported: return action_result.set_status(phantom.APP_ERROR, "Deflation of file type: {0} not supported".format(file_type)) ret_val = self._extract_file(action_result, file_path, file_name, param.get('recursive', False), diff --git a/phantom_consts.py b/phantom_consts.py index 63faedf..847464e 100644 --- a/phantom_consts.py +++ b/phantom_consts.py @@ -14,6 +14,8 @@ # and limitations under the License. TIMEOUT = 120 INVALID_RESPONSE = 'Server did not return a valid JSON response.' + +# list of file types supported for deflation SUPPORTED_FILES = ['application/zip', 'application/x-gzip', 'application/x-tar', 'application/x-bzip2', 'application/gzip'] # Consts for error messages From 7e83af7e564b80bef8051c5bf6b2f2c899126ef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janusz=20Kamie=C5=84ski?= Date: Thu, 23 May 2024 11:31:20 +0200 Subject: [PATCH 02/12] PAPP-34012 add magic file; bump version for minimum required SOAR version --- magic/msooxml | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++ phantom.json | 2 +- 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 magic/msooxml diff --git a/magic/msooxml b/magic/msooxml new file mode 100644 index 0000000..76205ef --- /dev/null +++ b/magic/msooxml @@ -0,0 +1,75 @@ + +#------------------------------------------------------------------------------ +# $File: msooxml,v 1.21 2024/05/12 18:57:02 christos Exp $ +# msooxml: file(1) magic for Microsoft Office XML +# From: Ralf Brown + +# .docx, .pptx, and .xlsx are XML plus other files inside a ZIP +# archive. The first member file is normally "[Content_Types].xml". +# but some libreoffice generated files put this later. Perhaps skip +# the "[Content_Types].xml" test? +# Since MSOOXML doesn't have anything like the uncompressed "mimetype" +# file of ePub or OpenDocument, we'll have to scan for a filename +# which can distinguish between the three types + +0 name msooxml +>0 string word/ Microsoft Word 2007+ +!:mime application/vnd.openxmlformats-officedocument.wordprocessingml.document +!:ext docx +>0 string ppt/ Microsoft PowerPoint 2007+ +!:mime application/vnd.openxmlformats-officedocument.presentationml.presentation +!:ext pptx +>0 string xl/ Microsoft Excel 2007+ +!:mime application/vnd.openxmlformats-officedocument.spreadsheetml.sheet +!:ext xlsx +>0 string visio/ Microsoft Visio 2013+ +!:mime application/vnd.ms-visio.drawing.main+xml +>0 string AppManifest.xaml Microsoft Silverlight Application +!:mime application/x-silverlight-app + +# start by checking for ZIP local file header signature +0 string PK\003\004 +!:strength +10 +# make sure the first file is correct +>0x1E use msooxml +>0x1E default x +>>0x1E regex \\[Content_Types\\]\\.xml|_rels/\\.rels|docProps|customXml +# skip to the second local file header +# since some documents include a 520-byte extra field following the file +# header, we need to scan for the next header +>>>(18.l+49) search/6000 PK\003\004 +>>>>&26 use msooxml +>>>>&26 default x +# now skip to the *third* local file header; again, we need to scan due to a +# 520-byte extra field following the file header +>>>>&26 search/6000 PK\003\004 +# and check the subdirectory name to determine which type of OOXML +# file we have. Correct the mimetype with the registered ones: +# https://technet.microsoft.com/en-us/library/cc179224.aspx +>>>>>&26 use msooxml +>>>>>&26 default x +# OpenOffice/Libreoffice orders ZIP entry differently, so check the 4th file +>>>>>>&26 search/6000 PK\003\004 +>>>>>>>&26 use msooxml +# Some OOXML generators add an extra customXml directory. Check another file. +>>>>>>>&26 default x +>>>>>>>>&26 search/6000 PK\003\004 +>>>>>>>>>&26 use msooxml +>>>>>>>>>&26 default x +>>>>>>>>>&26 search/6000 PK\003\004 +>>>>>>>>>>&26 use msooxml +>>>>>>>>>>&26 default x Microsoft OOXML +>>>>>>>>>&26 default x Microsoft OOXML +>>>>>>>>&26 default x Microsoft OOXML +>>>>>>>&26 default x Microsoft OOXML +>>>>>&26 default x Microsoft OOXML +>>0x1E regex \\[trash\\] +>>>&26 search/6000 PK\003\004 +>>>>&26 search/6000 PK\003\004 +>>>>>&26 use msooxml +>>>>>&26 default x +>>>>>>&26 search/6000 PK\003\004 +>>>>>>>&26 use msooxml +>>>>>>>&26 default x Microsoft OOXML +>>>>>>&26 default x Microsoft OOXML +>>>>>&26 default x Microsoft OOXML diff --git a/phantom.json b/phantom.json index de887f6..4f33430 100644 --- a/phantom.json +++ b/phantom.json @@ -17,7 +17,7 @@ "product_vendor": "Phantom", "product_name": "Phantom", "product_version_regex": ".*", - "min_phantom_version": "5.2.0", + "min_phantom_version": "6.1.1", "fips_compliant": true, "license": "Copyright (c) 2016-2024 Splunk Inc.", "logo": "logo_splunk.svg", From b00fb35e66bbe2f6795ea88e3349289dcb986434 Mon Sep 17 00:00:00 2001 From: splunk-soar-connectors-admin Date: Thu, 23 May 2024 09:32:06 +0000 Subject: [PATCH 03/12] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1941295..5b0733c 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Connector Version: 3.6.2 Product Vendor: Phantom Product Name: Phantom Product Version Supported (regex): ".\*" -Minimum Product Version: 5.2.0 +Minimum Product Version: 6.1.1 This App exposes various Phantom APIs as actions From 1644643f178e3c8244a299d7baa9cd5a89080226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janusz=20Kamie=C5=84ski?= Date: Thu, 23 May 2024 15:14:47 +0200 Subject: [PATCH 04/12] PAPP-34012 provide fallback for magic mime recognition on the default OS definitions for files not detected as msooxml types --- phantom_connector.py | 4 ++++ phantom_consts.py | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/phantom_connector.py b/phantom_connector.py index 493b6fd..bb76def 100644 --- a/phantom_connector.py +++ b/phantom_connector.py @@ -929,6 +929,10 @@ def is_deflation_supported_file(file_path) -> Tuple[str, bool]: m = magic.Magic(mime=True, magic_file=msooxml_magic_file_path) file_type = m.from_file(file_path) + if file_type not in OPEN_XML_FORMATS: + # fallback to the default magic definitions + file_type = magic.from_file(file_path) + return file_type, file_type in SUPPORTED_FILES def _deflate_item(self, param): diff --git a/phantom_consts.py b/phantom_consts.py index 847464e..10c38bf 100644 --- a/phantom_consts.py +++ b/phantom_consts.py @@ -15,6 +15,14 @@ TIMEOUT = 120 INVALID_RESPONSE = 'Server did not return a valid JSON response.' +OPEN_XML_FORMATS = [ + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.ms-visio.drawing.main+xml", + "application/x-silverlight-app", +] + # list of file types supported for deflation SUPPORTED_FILES = ['application/zip', 'application/x-gzip', 'application/x-tar', 'application/x-bzip2', 'application/gzip'] From 69fb9c14e617cc8b0cfc405819deb72694f262cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janusz=20Kamie=C5=84ski?= Date: Thu, 23 May 2024 15:17:15 +0200 Subject: [PATCH 05/12] PAPP-34012 lint fix --- phantom_connector.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/phantom_connector.py b/phantom_connector.py index bb76def..bb6f274 100644 --- a/phantom_connector.py +++ b/phantom_connector.py @@ -32,18 +32,18 @@ from typing import Tuple import magic -import requests -from bs4 import BeautifulSoup -from requests.exceptions import SSLError, Timeout - import phantom.app as phantom import phantom.rules as ph_rules import phantom.utils as ph_utils +import requests +from bs4 import BeautifulSoup from phantom.action_result import ActionResult from phantom.base_connector import BaseConnector from phantom.cef import CEF_JSON, CEF_NAME_MAPPING from phantom.utils import CONTAINS_VALIDATORS from phantom.vault import Vault +from requests.exceptions import SSLError, Timeout + # Constants imports from phantom_consts import * From 81d138bce1c332f709d6a4d261edea0f95704c51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janusz=20Kamie=C5=84ski?= Date: Thu, 23 May 2024 15:23:55 +0200 Subject: [PATCH 06/12] PAPP-34012 isort fixes --- phantom_connector.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/phantom_connector.py b/phantom_connector.py index bb6f274..62d2ccb 100644 --- a/phantom_connector.py +++ b/phantom_connector.py @@ -31,19 +31,19 @@ from pathlib import Path from typing import Tuple +import requests +from bs4 import BeautifulSoup +from requests.exceptions import SSLError, Timeout + import magic import phantom.app as phantom import phantom.rules as ph_rules import phantom.utils as ph_utils -import requests -from bs4 import BeautifulSoup from phantom.action_result import ActionResult from phantom.base_connector import BaseConnector from phantom.cef import CEF_JSON, CEF_NAME_MAPPING from phantom.utils import CONTAINS_VALIDATORS from phantom.vault import Vault -from requests.exceptions import SSLError, Timeout - # Constants imports from phantom_consts import * From f958df28c52dc71a01c231f4b866b2e093bec037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janusz=20Kamie=C5=84ski?= Date: Thu, 23 May 2024 15:39:50 +0200 Subject: [PATCH 07/12] PAPP-34012 check mime type --- phantom_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phantom_connector.py b/phantom_connector.py index 62d2ccb..f2a70b2 100644 --- a/phantom_connector.py +++ b/phantom_connector.py @@ -931,7 +931,7 @@ def is_deflation_supported_file(file_path) -> Tuple[str, bool]: if file_type not in OPEN_XML_FORMATS: # fallback to the default magic definitions - file_type = magic.from_file(file_path) + file_type = magic.from_file(file_path, mime=True) return file_type, file_type in SUPPORTED_FILES From ae92b76205cbf6b952a00d83348e48ba54e9b354 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janusz=20Kamie=C5=84ski?= <200957+ivellios@users.noreply.github.com> Date: Wed, 10 Jul 2024 14:02:55 +0200 Subject: [PATCH 08/12] Update phantom_connector.py docstring Co-authored-by: bb-splunk <147497734+bb-splunk@users.noreply.github.com> --- phantom_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phantom_connector.py b/phantom_connector.py index f2a70b2..ec2ca90 100644 --- a/phantom_connector.py +++ b/phantom_connector.py @@ -921,7 +921,7 @@ def is_deflation_supported_file(file_path) -> Tuple[str, bool]: Checks if the file is supported for deflation. This method patches invalid behavior of some Operating - Systems recognizing MS Office files (eg. xslx) as zip + Systems recognizing MS Office files (eg. xlsx) as zip files which lead to an enormous deflation process run hanging the service. """ From 230ed9511e8db7e1ebfc804c55a5e3b111e8150b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janusz=20Kamie=C5=84ski?= Date: Thu, 11 Jul 2024 09:14:45 +0200 Subject: [PATCH 09/12] remove naming confusion for the method name --- phantom_connector.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/phantom_connector.py b/phantom_connector.py index f2a70b2..cb784cd 100644 --- a/phantom_connector.py +++ b/phantom_connector.py @@ -795,7 +795,7 @@ def _add_file_to_vault(self, action_result, data_stream, file_name, recursive, c file_name = vault_info['name'] - file_type, is_supported = self.is_deflation_supported_file(file_path) + file_type, is_supported = self.check_deflation_supported_file(file_path) if not is_supported: return (phantom.APP_SUCCESS) @@ -821,7 +821,7 @@ def _extract_file(self, action_result, file_path, file_name, recursive, containe if container_id is None: container_id = self.get_container_id() - file_type, is_supported = self.is_deflation_supported_file(file_path) + file_type, is_supported = self.check_deflation_supported_file(file_path) if not is_supported: return action_result.set_status(phantom.APP_ERROR, "Deflation of file type: {0} not supported".format(file_type)) @@ -916,7 +916,7 @@ def _extract_file(self, action_result, file_path, file_name, recursive, containe return action_result.set_status(phantom.APP_SUCCESS) @staticmethod - def is_deflation_supported_file(file_path) -> Tuple[str, bool]: + def check_deflation_supported_file(file_path) -> Tuple[str, bool]: """ Checks if the file is supported for deflation. @@ -930,7 +930,7 @@ def is_deflation_supported_file(file_path) -> Tuple[str, bool]: file_type = m.from_file(file_path) if file_type not in OPEN_XML_FORMATS: - # fallback to the default magic definitions + # fallback to the default magic files definitions file_type = magic.from_file(file_path, mime=True) return file_type, file_type in SUPPORTED_FILES @@ -965,7 +965,7 @@ def _deflate_item(self, param): "Failed to get vault item info: {}".format(self._get_error_message_from_exception(e))) try: - file_type, is_supported = self.is_deflation_supported_file(file_path) + file_type, is_supported = self.check_deflation_supported_file(file_path) except IOError: return action_result.set_status(phantom.APP_ERROR, PHANTOM_ERR_FILE_PATH_NOT_FOUND) except Exception: From 516e126efd3ce4cffedc894d7bd8b4d3084a499b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janusz=20Kamie=C5=84ski?= Date: Thu, 11 Jul 2024 09:16:58 +0200 Subject: [PATCH 10/12] rename directory so it is not confused for being a package by isort --- {magic => magic_files}/msooxml | 0 phantom_connector.py | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename {magic => magic_files}/msooxml (100%) diff --git a/magic/msooxml b/magic_files/msooxml similarity index 100% rename from magic/msooxml rename to magic_files/msooxml diff --git a/phantom_connector.py b/phantom_connector.py index cb784cd..b3cd167 100644 --- a/phantom_connector.py +++ b/phantom_connector.py @@ -31,11 +31,11 @@ from pathlib import Path from typing import Tuple +import magic import requests from bs4 import BeautifulSoup from requests.exceptions import SSLError, Timeout -import magic import phantom.app as phantom import phantom.rules as ph_rules import phantom.utils as ph_utils @@ -925,7 +925,7 @@ def check_deflation_supported_file(file_path) -> Tuple[str, bool]: files which lead to an enormous deflation process run hanging the service. """ - msooxml_magic_file_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "magic", "msooxml") + msooxml_magic_file_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "magic_files", "msooxml") m = magic.Magic(mime=True, magic_file=msooxml_magic_file_path) file_type = m.from_file(file_path) From 88883f291eca857603d4ac34226ee2574e09753b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janusz=20Kamie=C5=84ski?= Date: Thu, 11 Jul 2024 09:19:47 +0200 Subject: [PATCH 11/12] bumped minimum platform version --- phantom.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phantom.json b/phantom.json index 4f33430..9ea6f0d 100644 --- a/phantom.json +++ b/phantom.json @@ -17,7 +17,7 @@ "product_vendor": "Phantom", "product_name": "Phantom", "product_version_regex": ".*", - "min_phantom_version": "6.1.1", + "min_phantom_version": "6.2.1", "fips_compliant": true, "license": "Copyright (c) 2016-2024 Splunk Inc.", "logo": "logo_splunk.svg", From 7a1eb5720a2c59967b8e11596d0cd73a00fbd851 Mon Sep 17 00:00:00 2001 From: splunk-soar-connectors-admin Date: Thu, 11 Jul 2024 07:20:24 +0000 Subject: [PATCH 12/12] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5b0733c..6734906 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Connector Version: 3.6.2 Product Vendor: Phantom Product Name: Phantom Product Version Supported (regex): ".\*" -Minimum Product Version: 6.1.1 +Minimum Product Version: 6.2.1 This App exposes various Phantom APIs as actions