Skip to content

Commit

Permalink
Merge pull request #20 from splunk-soar-connectors/jkamienski/PAPP-34…
Browse files Browse the repository at this point in the history
…012-handle_msooxml_files_being_recognized_as_zip_type

PAPP-34012 recognize msooxml files in all environments
  • Loading branch information
ivellios authored Aug 12, 2024
2 parents 4c5a582 + 7a1eb57 commit 796ef8d
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 18 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Connector Version: 3.6.2
Product Vendor: Phantom
Product Name: Phantom
Product Version Supported (regex): ".\*"
Minimum Product Version: 5.2.0
Minimum Product Version: 6.2.1

This App exposes various Phantom APIs as actions

Expand Down
75 changes: 75 additions & 0 deletions magic_files/msooxml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@

#------------------------------------------------------------------------------
# $File: msooxml,v 1.21 2024/05/12 18:57:02 christos Exp $
# msooxml: file(1) magic for Microsoft Office XML
# From: Ralf Brown <[email protected]>

# .docx, .pptx, and .xlsx are XML plus other files inside a ZIP
# archive. The first member file is normally "[Content_Types].xml".
# but some libreoffice generated files put this later. Perhaps skip
# the "[Content_Types].xml" test?
# Since MSOOXML doesn't have anything like the uncompressed "mimetype"
# file of ePub or OpenDocument, we'll have to scan for a filename
# which can distinguish between the three types

0 name msooxml
>0 string word/ Microsoft Word 2007+
!:mime application/vnd.openxmlformats-officedocument.wordprocessingml.document
!:ext docx
>0 string ppt/ Microsoft PowerPoint 2007+
!:mime application/vnd.openxmlformats-officedocument.presentationml.presentation
!:ext pptx
>0 string xl/ Microsoft Excel 2007+
!:mime application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
!:ext xlsx
>0 string visio/ Microsoft Visio 2013+
!:mime application/vnd.ms-visio.drawing.main+xml
>0 string AppManifest.xaml Microsoft Silverlight Application
!:mime application/x-silverlight-app

# start by checking for ZIP local file header signature
0 string PK\003\004
!:strength +10
# make sure the first file is correct
>0x1E use msooxml
>0x1E default x
>>0x1E regex \\[Content_Types\\]\\.xml|_rels/\\.rels|docProps|customXml
# skip to the second local file header
# since some documents include a 520-byte extra field following the file
# header, we need to scan for the next header
>>>(18.l+49) search/6000 PK\003\004
>>>>&26 use msooxml
>>>>&26 default x
# now skip to the *third* local file header; again, we need to scan due to a
# 520-byte extra field following the file header
>>>>&26 search/6000 PK\003\004
# and check the subdirectory name to determine which type of OOXML
# file we have. Correct the mimetype with the registered ones:
# https://technet.microsoft.com/en-us/library/cc179224.aspx
>>>>>&26 use msooxml
>>>>>&26 default x
# OpenOffice/Libreoffice orders ZIP entry differently, so check the 4th file
>>>>>>&26 search/6000 PK\003\004
>>>>>>>&26 use msooxml
# Some OOXML generators add an extra customXml directory. Check another file.
>>>>>>>&26 default x
>>>>>>>>&26 search/6000 PK\003\004
>>>>>>>>>&26 use msooxml
>>>>>>>>>&26 default x
>>>>>>>>>&26 search/6000 PK\003\004
>>>>>>>>>>&26 use msooxml
>>>>>>>>>>&26 default x Microsoft OOXML
>>>>>>>>>&26 default x Microsoft OOXML
>>>>>>>>&26 default x Microsoft OOXML
>>>>>>>&26 default x Microsoft OOXML
>>>>>&26 default x Microsoft OOXML
>>0x1E regex \\[trash\\]
>>>&26 search/6000 PK\003\004
>>>>&26 search/6000 PK\003\004
>>>>>&26 use msooxml
>>>>>&26 default x
>>>>>>&26 search/6000 PK\003\004
>>>>>>>&26 use msooxml
>>>>>>>&26 default x Microsoft OOXML
>>>>>>&26 default x Microsoft OOXML
>>>>>&26 default x Microsoft OOXML
2 changes: 1 addition & 1 deletion phantom.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"product_vendor": "Phantom",
"product_name": "Phantom",
"product_version_regex": ".*",
"min_phantom_version": "5.2.0",
"min_phantom_version": "6.2.1",
"fips_compliant": true,
"license": "Copyright (c) 2016-2024 Splunk Inc.",
"logo": "logo_splunk.svg",
Expand Down
44 changes: 28 additions & 16 deletions phantom_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
import gzip
import json
import os
import pathlib
import random
import socket
import string
import tarfile
import time
import zipfile
from pathlib import Path
from typing import Tuple

import magic
import requests
Expand All @@ -51,12 +53,6 @@
from urllib import quote


# A list of files that MS OOXML archives will contain.
# OOXML documents are zip files with metadata, assets and data as various
# archive entries. We do not want to the deflate action to extract these.
OOXML_FILES = frozenset(['[Content_Types].xml', '_rels/.rels'])


def determine_contains(value):
valid_contains = list()
for c, f in list(CONTAINS_VALIDATORS.items()):
Expand Down Expand Up @@ -799,20 +795,16 @@ def _add_file_to_vault(self, action_result, data_stream, file_name, recursive, c

file_name = vault_info['name']

file_type = magic.from_file(file_path, mime=True)
file_type, is_supported = self.check_deflation_supported_file(file_path)

if file_type not in SUPPORTED_FILES:
if not is_supported:
return (phantom.APP_SUCCESS)

self._extract_file(action_result, file_path, file_name, recursive, container_id)
self._level -= 1

return (phantom.APP_SUCCESS)

@staticmethod
def _is_ooxml_zip(member_filenames):
return OOXML_FILES.issubset(member_filenames)

@staticmethod
def _has_allowed_archive_extension(file_name, allowed_extensions):
if allowed_extensions:
Expand All @@ -829,9 +821,9 @@ def _extract_file(self, action_result, file_path, file_name, recursive, containe
if container_id is None:
container_id = self.get_container_id()

file_type = magic.from_file(file_path, mime=True)
file_type, is_supported = self.check_deflation_supported_file(file_path)

if file_type not in SUPPORTED_FILES:
if not is_supported:
return action_result.set_status(phantom.APP_ERROR, "Deflation of file type: {0} not supported".format(file_type))

config = self.get_config()
Expand Down Expand Up @@ -923,6 +915,26 @@ def _extract_file(self, action_result, file_path, file_name, recursive, containe

return action_result.set_status(phantom.APP_SUCCESS)

@staticmethod
def check_deflation_supported_file(file_path) -> Tuple[str, bool]:
"""
Checks if the file is supported for deflation.
This method patches invalid behavior of some Operating
Systems recognizing MS Office files (eg. xlsx) as zip
files which lead to an enormous deflation process run
hanging the service.
"""
msooxml_magic_file_path = os.path.join(pathlib.Path(__file__).parent.resolve(), "magic_files", "msooxml")
m = magic.Magic(mime=True, magic_file=msooxml_magic_file_path)
file_type = m.from_file(file_path)

if file_type not in OPEN_XML_FORMATS:
# fallback to the default magic files definitions
file_type = magic.from_file(file_path, mime=True)

return file_type, file_type in SUPPORTED_FILES

def _deflate_item(self, param):

action_result = self.add_action_result(ActionResult(dict(param)))
Expand Down Expand Up @@ -953,13 +965,13 @@ def _deflate_item(self, param):
"Failed to get vault item info: {}".format(self._get_error_message_from_exception(e)))

try:
file_type = magic.from_file(file_path, mime=True)
file_type, is_supported = self.check_deflation_supported_file(file_path)
except IOError:
return action_result.set_status(phantom.APP_ERROR, PHANTOM_ERR_FILE_PATH_NOT_FOUND)
except Exception:
return action_result.set_status(phantom.APP_ERROR, PHANTOM_ERR_FILE_PATH_NOT_FOUND)

if file_type not in SUPPORTED_FILES:
if not is_supported:
return action_result.set_status(phantom.APP_ERROR, "Deflation of file type: {0} not supported".format(file_type))

ret_val = self._extract_file(action_result, file_path, file_name, param.get('recursive', False),
Expand Down
10 changes: 10 additions & 0 deletions phantom_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@
# and limitations under the License.
TIMEOUT = 120
INVALID_RESPONSE = 'Server did not return a valid JSON response.'

OPEN_XML_FORMATS = [
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.ms-visio.drawing.main+xml",
"application/x-silverlight-app",
]

# list of file types supported for deflation
SUPPORTED_FILES = ['application/zip', 'application/x-gzip', 'application/x-tar', 'application/x-bzip2', 'application/gzip']

# Consts for error messages
Expand Down

0 comments on commit 796ef8d

Please sign in to comment.