From bf050f99161346aa8aafb36190dca2a106f20fe7 Mon Sep 17 00:00:00 2001 From: Damon Delcoro Date: Fri, 3 Apr 2020 17:02:46 -0400 Subject: [PATCH] handle mulitline reply headers in Gmail Gmail will break the reply header into multiple lines when it is over 80 characters. As an example: On Fri, Apr 3, 2020 at 12:32 PM Someone wrote: Will become: On Fri, Apr 3, 2020 at 12:32 PM Someone < someone@example.com> wrote: This was causing the reply header to be included in the final returned body. This update matches the reply header across multiple lines so it is excluded. --- .gitignore | 4 +++- lib/email_reply_parser.rb | 2 +- test/email_reply_parser_test.rb | 7 ++++++- test/emails/email_multiple_replies.txt | 19 +++++++++++++++++++ 4 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 test/emails/email_multiple_replies.txt diff --git a/.gitignore b/.gitignore index 85f7977..f4720b3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ pkg -.ruby-version \ No newline at end of file +.ruby-version +.byebug_history +.idea \ No newline at end of file diff --git a/lib/email_reply_parser.rb b/lib/email_reply_parser.rb index 87d1365..8005ca5 100644 --- a/lib/email_reply_parser.rb +++ b/lib/email_reply_parser.rb @@ -83,7 +83,7 @@ def read(text) # Check for multi-line reply headers. Some clients break up # the "On DATE, NAME wrote:" line into multiple lines. - if text =~ /^(?!On.*On\s.+?wrote:)(On\s(.+?)wrote:)$/m + if text =~ /(^On\s((?!On).)*wrote:$)/m # Remove all new lines from the reply header. text.gsub! $1, $1.gsub("\n", " ") end diff --git a/test/email_reply_parser_test.rb b/test/email_reply_parser_test.rb index df509cb..791b0d9 100644 --- a/test/email_reply_parser_test.rb +++ b/test/email_reply_parser_test.rb @@ -202,7 +202,7 @@ def test_one_is_not_on assert_match(/^On Oct 1, 2012/, reply.fragments[1].to_s) end - def test_mulitple_on + def test_multiple_on reply = email("greedy_on") assert_match(/^On your remote host/, reply.fragments[0].to_s) assert_match(/^On 9 Jan 2014/, reply.fragments[1].to_s) @@ -211,6 +211,11 @@ def test_mulitple_on assert_equal [false, true, true], reply.fragments.map { |f| f.hidden? } end + def test_multiple_replies + reply = email("email_multiple_replies") + assert_equal("TAKE 3 - testing once again", reply.fragments[0].to_s) + end + def test_pathological_emails t0 = Time.now email("pathological") diff --git a/test/emails/email_multiple_replies.txt b/test/emails/email_multiple_replies.txt new file mode 100644 index 0000000..773bed9 --- /dev/null +++ b/test/emails/email_multiple_replies.txt @@ -0,0 +1,19 @@ +TAKE 3 - testing once again + +On Fri, Apr 3, 2020 at 12:32 PM Someone < +someone@example.com> wrote: + +> TAKE 2 - Lets see what is posted now to the portal +> +> On Fri, Apr 3, 2020 at 12:31 PM Someone < +> someone@example.com> wrote: +> +>> Lets see what is posted now to the portal +>> +>> On Fri, Apr 3, 2020 at 11:58 AM wrote: +>> +>>> Project~Stream - Comment Posted +>>> +>>> Someone1 added a comment on 04/03/2020 at 11:58 AM +>>> +>>> Full email test... Hopefully you don't see this in the reply \ No newline at end of file