Skip to content

Commit 1c1ebec

Browse files
authored
Merge pull request #108 from demisto/fix-attachment-bug
Fix attachment bug
2 parents 5ea8591 + 6a9a4e4 commit 1c1ebec

File tree

4 files changed

+61
-12
lines changed

4 files changed

+61
-12
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
v0.1.34
44
* Fixed an issue that prevented the headers from being parsed correctly for eml files (unknown-8bit encoding).
5+
* Fixed an issue that email with attachment was parsed incorrectly for eml files.
56

67
v0.1.33
78
* Fixed an issue where html parts were not parsed properly in EML files.

parse_emails/handle_eml.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,9 @@ def handle_eml(file_path, b64=False, file_name=None, parse_only_headers=False, m
110110
while parts:
111111
part = parts.pop()
112112

113-
payload = part.get_payload()
114-
115113
logger.debug(f'Iterating over parts. Current part: {part.get_content_type()=}')
116114
if (part.is_multipart() or part.get_content_type().startswith('multipart')) \
117-
and "attachment" not in part.get("Content-Disposition", "") or \
118-
(payload and isinstance(payload, list) and len(payload) == 1 and
119-
payload[0].get_content_type() == 'text/html'):
115+
and "attachment" not in part.get("Content-Disposition", ""):
120116
parts += [part_ for part_ in part.get_payload() if isinstance(part_, email.message.Message)]
121117

122118
elif part.get_filename()\
@@ -200,13 +196,16 @@ def handle_eml(file_path, b64=False, file_name=None, parse_only_headers=False, m
200196
attachment_file_name = individual_message.get_filename()
201197
attachment_content_id = individual_message.get('Content-ID')
202198
attachment_content_disposition = individual_message.get('Content-Disposition')
203-
if attachment_file_name is None:
204-
attachment_file_name = f"unknown_file_name{i}"
205-
206-
attachment_content.append(msg_info)
207-
attachment_names.append(attachment_file_name)
208-
attachment_content_ids.append(attachment_content_id)
209-
attachment_content_dispositions.append(attachment_content_disposition)
199+
if not attachment_file_name and not attachment_content_ids and 'text/html' in individual_message.get_content_type():
200+
html = decode_content(individual_message)
201+
else:
202+
if attachment_file_name is None:
203+
attachment_file_name = f"unknown_file_name{i}"
204+
205+
attachment_content.append(msg_info)
206+
attachment_names.append(attachment_file_name)
207+
attachment_content_ids.append(attachment_content_id)
208+
attachment_content_dispositions.append(attachment_content_disposition)
210209
else:
211210
file_content = part.get_payload(decode=True)
212211
if attachment_file_name.endswith('.p7s') or not file_content:

parse_emails/tests/parse_emails_test.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -991,3 +991,24 @@ def test_handle_eml_unknown8bit():
991991
email_parser = EmailParser(file_path='parse_emails/tests/test_data/test-unknown-8bit.eml')
992992
results = email_parser.parse()
993993
assert results['From'] == '[email protected]'
994+
995+
996+
def test_multipart_eml_with_eml_attachment_containing_html_body():
997+
"""
998+
Given:
999+
- eml file with attached another eml file with text/html content.
1000+
When:
1001+
- parsing the file.
1002+
Then:
1003+
- make sure the msg was correctly parsed.
1004+
"""
1005+
test_path = 'parse_emails/tests/test_data/multipart_with_eml_attachment_containing_html.eml'
1006+
1007+
email_parser = EmailParser(file_path=test_path, max_depth=2)
1008+
results = email_parser.parse()
1009+
1010+
assert len(results) == 2
1011+
assert results[0]["HTML"] == ""
1012+
assert results[0]["Attachments"] == "original_message.eml"
1013+
assert len(results[0]["AttachmentsData"]) > 0
1014+
assert results[1]["ParentFileName"] == "multipart_with_eml_attachment_containing_html.eml"
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
2+
3+
Subject: Your Subject
4+
Date: 14 Jan 2025 12:00:00 +0000
5+
Content-Type: multipart/mixed; boundary="000000000000e915c3062bcd115c"
6+
7+
--000000000000e915c3062bcd115c
8+
Content-Type: text/plain; charset="UTF-8"
9+
Content-Transfer-Encoding: base64
10+
11+
Email with attached another email
12+
13+
--000000000000e915c3062bcd115c
14+
Content-Type: message/rfc822; name="original_message.eml"
15+
Content-Disposition: attachment; filename="original_message.eml"
16+
Content-Transfer-Encoding: 8bit
17+
X-Attachment-Id: f0af9d461a78b41c_0.1
18+
19+
20+
21+
Date: 16 Jan 2025 05:31:24 +0000
22+
Subject: =?utf-8?B?QXR0YWNoZWQgZW1haWwgc3ViamVjdA==?=
23+
Content-Type: text/html; charset="utf-8"
24+
Content-Transfer-Encoding: base64
25+
26+
PG1ldGEgaHR0cC1lcXVpdj0iQ29udGVudC1UeXBlIiBjb250ZW50PSJ0ZXh0L2h0b
27+
Ww7IGNoYXJzZXQ9dXRmLTgiPg0KPHA+QXR0YWNoZWQgZW1haWwgSFRNTDwvcD4=
28+
--000000000000e915c3062bcd115c--

0 commit comments

Comments
 (0)