You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ponymail.apache.org by hu...@apache.org on 2021/09/18 02:13:09 UTC

[incubator-ponymail-foal] branch master updated: Try to locate a better date for the From line.

This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git


The following commit(s) were added to refs/heads/master by this push:
     new df57669  Try to locate a better date for the From line.
df57669 is described below

commit df57669628d5287b854e20ed7245a46fd101af1a
Author: Daniel Gruno <hu...@apache.org>
AuthorDate: Fri Sep 17 21:13:03 2021 -0500

    Try to locate a better date for the From line.
    
    This could probably get molded into an external function, but for now it is only needed here.
---
 server/endpoints/mbox.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/server/endpoints/mbox.py b/server/endpoints/mbox.py
index 59cfc01..ec67eae 100644
--- a/server/endpoints/mbox.py
+++ b/server/endpoints/mbox.py
@@ -26,6 +26,9 @@ import typing
 import aiohttp.web
 import asyncio.exceptions
 import time
+import email.utils
+import datetime
+import dateutil.tz
 
 
 async def convert_source(session: plugins.session.SessionObject, email: dict):
@@ -34,7 +37,16 @@ async def convert_source(session: plugins.session.SessionObject, email: dict):
         source_as_text = source["_source"]["source"]
         # Ensure it starts with "From "...or fake it
         if not source_as_text.startswith("From "):
-            from_line = "From MAILER-DAEMON %s\n" % time.strftime("%a %b %d %H:%M:%S %Y")
+            from_line = "From MAILER-DAEMON %s\n" % time.strftime("%a %b %d %H:%M:%S %Y", time.gmtime(0))  # Fallback in case no date found
+            # If we have any Received: headers, we can extrapolate an approximate time from the last (top) one.
+            from_match = re.search(r"(?:[\r\n]|^)Received:\s+from[^;]+?; (.+?)[\r\n]", source_as_text)
+            if from_match:
+                recv_time = email.utils.parsedate_tz(from_match.group(1))
+                if recv_time:
+                    dt_tuple = datetime.datetime(*recv_time[:7])
+                    if recv_time[9]:  # If we have an offset, set timezone
+                        dt_tuple = dt_tuple.replace(tzinfo=dateutil.tz.tzoffset("Offset", recv_time[9]))
+                    from_line = "From MAILER-DAEMON %s\n" % dt_tuple.strftime("%a %b %d %H:%M:%S %Y %z")
             source_as_text = from_line + source_as_text
         # Convert to mboxrd format
         mboxrd_source = ""

Re: [incubator-ponymail-foal] branch master updated: Try to locate a better date for the From line.

Posted by sebb <se...@gmail.com>.
On Sat, 18 Sept 2021 at 15:34, Daniel Gruno <hu...@apache.org> wrote:
>
> On 18/09/2021 05.16, sebb wrote:
> > On Sat, 18 Sept 2021 at 03:13, <hu...@apache.org> wrote:
> >>
> >> This is an automated email from the ASF dual-hosted git repository.
> >>
> >> humbedooh pushed a commit to branch master
> >> in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
> >>
> >>
> >> The following commit(s) were added to refs/heads/master by this push:
> >>       new df57669  Try to locate a better date for the From line.
> >> df57669 is described below
> >>
> >> commit df57669628d5287b854e20ed7245a46fd101af1a
> >> Author: Daniel Gruno <hu...@apache.org>
> >> AuthorDate: Fri Sep 17 21:13:03 2021 -0500
> >>
> >>      Try to locate a better date for the From line.
> >>
> >>      This could probably get molded into an external function, but for now it is only needed here.
> >> ---
> >>   server/endpoints/mbox.py | 14 +++++++++++++-
> >>   1 file changed, 13 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/server/endpoints/mbox.py b/server/endpoints/mbox.py
> >> index 59cfc01..ec67eae 100644
> >> --- a/server/endpoints/mbox.py
> >> +++ b/server/endpoints/mbox.py
> >> @@ -26,6 +26,9 @@ import typing
> >>   import aiohttp.web
> >>   import asyncio.exceptions
> >>   import time
> >> +import email.utils
> >> +import datetime
> >> +import dateutil.tz
> >>
> >>
> >>   async def convert_source(session: plugins.session.SessionObject, email: dict):
> >> @@ -34,7 +37,16 @@ async def convert_source(session: plugins.session.SessionObject, email: dict):
> >>           source_as_text = source["_source"]["source"]
> >>           # Ensure it starts with "From "...or fake it
> >>           if not source_as_text.startswith("From "):
> >> -            from_line = "From MAILER-DAEMON %s\n" % time.strftime("%a %b %d %H:%M:%S %Y")
> >> +            from_line = "From MAILER-DAEMON %s\n" % time.strftime("%a %b %d %H:%M:%S %Y", time.gmtime(0))  # Fallback in case no date found
> >> +            # If we have any Received: headers, we can extrapolate an approximate time from the last (top) one.
> >> +            from_match = re.search(r"(?:[\r\n]|^)Received:\s+from[^;]+?; (.+?)[\r\n]", source_as_text)
> >> +            if from_match:
> >> +                recv_time = email.utils.parsedate_tz(from_match.group(1))
> >> +                if recv_time:
> >> +                    dt_tuple = datetime.datetime(*recv_time[:7])
> >> +                    if recv_time[9]:  # If we have an offset, set timezone
> >> +                        dt_tuple = dt_tuple.replace(tzinfo=dateutil.tz.tzoffset("Offset", recv_time[9]))
> >> +                    from_line = "From MAILER-DAEMON %s\n" % dt_tuple.strftime("%a %b %d %H:%M:%S %Y %z")
> >
> > -1
> > Conversion to a From_ line timestamp c/should use a standard function,
> > which can then be tested separately.
> > Or at least use a shared constant.
> > Doing either of those might have avoided the bug in the above line.
>
> I don't quite follow here, what bug?

Can you replace the format strings with a single constant?

Read the code very carefully...

> I went and looked at the RFC for mbox formats, and it suggested that a
> datetime.ctime() call would be the best option here, so I've now change
> it to do just that. For the fallback, I've just put in jan 1st 1970 as
> plain text, there is really no need to call ctime for that.
> >
> >>               source_as_text = from_line + source_as_text
> >>           # Convert to mboxrd format
> >>           mboxrd_source = ""
>

Re: [incubator-ponymail-foal] branch master updated: Try to locate a better date for the From line.

Posted by Daniel Gruno <hu...@apache.org>.
On 18/09/2021 05.16, sebb wrote:
> On Sat, 18 Sept 2021 at 03:13, <hu...@apache.org> wrote:
>>
>> This is an automated email from the ASF dual-hosted git repository.
>>
>> humbedooh pushed a commit to branch master
>> in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
>>
>>
>> The following commit(s) were added to refs/heads/master by this push:
>>       new df57669  Try to locate a better date for the From line.
>> df57669 is described below
>>
>> commit df57669628d5287b854e20ed7245a46fd101af1a
>> Author: Daniel Gruno <hu...@apache.org>
>> AuthorDate: Fri Sep 17 21:13:03 2021 -0500
>>
>>      Try to locate a better date for the From line.
>>
>>      This could probably get molded into an external function, but for now it is only needed here.
>> ---
>>   server/endpoints/mbox.py | 14 +++++++++++++-
>>   1 file changed, 13 insertions(+), 1 deletion(-)
>>
>> diff --git a/server/endpoints/mbox.py b/server/endpoints/mbox.py
>> index 59cfc01..ec67eae 100644
>> --- a/server/endpoints/mbox.py
>> +++ b/server/endpoints/mbox.py
>> @@ -26,6 +26,9 @@ import typing
>>   import aiohttp.web
>>   import asyncio.exceptions
>>   import time
>> +import email.utils
>> +import datetime
>> +import dateutil.tz
>>
>>
>>   async def convert_source(session: plugins.session.SessionObject, email: dict):
>> @@ -34,7 +37,16 @@ async def convert_source(session: plugins.session.SessionObject, email: dict):
>>           source_as_text = source["_source"]["source"]
>>           # Ensure it starts with "From "...or fake it
>>           if not source_as_text.startswith("From "):
>> -            from_line = "From MAILER-DAEMON %s\n" % time.strftime("%a %b %d %H:%M:%S %Y")
>> +            from_line = "From MAILER-DAEMON %s\n" % time.strftime("%a %b %d %H:%M:%S %Y", time.gmtime(0))  # Fallback in case no date found
>> +            # If we have any Received: headers, we can extrapolate an approximate time from the last (top) one.
>> +            from_match = re.search(r"(?:[\r\n]|^)Received:\s+from[^;]+?; (.+?)[\r\n]", source_as_text)
>> +            if from_match:
>> +                recv_time = email.utils.parsedate_tz(from_match.group(1))
>> +                if recv_time:
>> +                    dt_tuple = datetime.datetime(*recv_time[:7])
>> +                    if recv_time[9]:  # If we have an offset, set timezone
>> +                        dt_tuple = dt_tuple.replace(tzinfo=dateutil.tz.tzoffset("Offset", recv_time[9]))
>> +                    from_line = "From MAILER-DAEMON %s\n" % dt_tuple.strftime("%a %b %d %H:%M:%S %Y %z")
> 
> -1
> Conversion to a From_ line timestamp c/should use a standard function,
> which can then be tested separately.
> Or at least use a shared constant.
> Doing either of those might have avoided the bug in the above line.

I don't quite follow here, what bug?
I went and looked at the RFC for mbox formats, and it suggested that a 
datetime.ctime() call would be the best option here, so I've now change 
it to do just that. For the fallback, I've just put in jan 1st 1970 as 
plain text, there is really no need to call ctime for that.

> 
>>               source_as_text = from_line + source_as_text
>>           # Convert to mboxrd format
>>           mboxrd_source = ""


Re: [incubator-ponymail-foal] branch master updated: Try to locate a better date for the From line.

Posted by sebb <se...@gmail.com>.
On Sat, 18 Sept 2021 at 03:13, <hu...@apache.org> wrote:
>
> This is an automated email from the ASF dual-hosted git repository.
>
> humbedooh pushed a commit to branch master
> in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
>
>
> The following commit(s) were added to refs/heads/master by this push:
>      new df57669  Try to locate a better date for the From line.
> df57669 is described below
>
> commit df57669628d5287b854e20ed7245a46fd101af1a
> Author: Daniel Gruno <hu...@apache.org>
> AuthorDate: Fri Sep 17 21:13:03 2021 -0500
>
>     Try to locate a better date for the From line.
>
>     This could probably get molded into an external function, but for now it is only needed here.
> ---
>  server/endpoints/mbox.py | 14 +++++++++++++-
>  1 file changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/server/endpoints/mbox.py b/server/endpoints/mbox.py
> index 59cfc01..ec67eae 100644
> --- a/server/endpoints/mbox.py
> +++ b/server/endpoints/mbox.py
> @@ -26,6 +26,9 @@ import typing
>  import aiohttp.web
>  import asyncio.exceptions
>  import time
> +import email.utils
> +import datetime
> +import dateutil.tz
>
>
>  async def convert_source(session: plugins.session.SessionObject, email: dict):
> @@ -34,7 +37,16 @@ async def convert_source(session: plugins.session.SessionObject, email: dict):
>          source_as_text = source["_source"]["source"]
>          # Ensure it starts with "From "...or fake it
>          if not source_as_text.startswith("From "):
> -            from_line = "From MAILER-DAEMON %s\n" % time.strftime("%a %b %d %H:%M:%S %Y")
> +            from_line = "From MAILER-DAEMON %s\n" % time.strftime("%a %b %d %H:%M:%S %Y", time.gmtime(0))  # Fallback in case no date found
> +            # If we have any Received: headers, we can extrapolate an approximate time from the last (top) one.
> +            from_match = re.search(r"(?:[\r\n]|^)Received:\s+from[^;]+?; (.+?)[\r\n]", source_as_text)
> +            if from_match:
> +                recv_time = email.utils.parsedate_tz(from_match.group(1))
> +                if recv_time:
> +                    dt_tuple = datetime.datetime(*recv_time[:7])
> +                    if recv_time[9]:  # If we have an offset, set timezone
> +                        dt_tuple = dt_tuple.replace(tzinfo=dateutil.tz.tzoffset("Offset", recv_time[9]))
> +                    from_line = "From MAILER-DAEMON %s\n" % dt_tuple.strftime("%a %b %d %H:%M:%S %Y %z")

-1
Conversion to a From_ line timestamp c/should use a standard function,
which can then be tested separately.
Or at least use a shared constant.
Doing either of those might have avoided the bug in the above line.

>              source_as_text = from_line + source_as_text
>          # Convert to mboxrd format
>          mboxrd_source = ""