diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2021-03-26 17:44:42 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2021-03-26 17:44:42 -0400 |
commit | 3f3a8532c0e0101c8ba7121182a756ba68713923 (patch) | |
tree | da9b5d0a147b90492f7c45b495f1a046c7b1e460 | |
parent | 4bc9130253d01efaabbb26c3c8114f3574716cd1 (diff) | |
download | korg-helpers-3f3a8532c0e0101c8ba7121182a756ba68713923.tar.gz |
Fix mailman demangling
Make it more generic without a pre-check. May not even be slower.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rwxr-xr-x | list-archive-collector.py | 17 |
1 files changed, 8 insertions, 9 deletions
diff --git a/list-archive-collector.py b/list-archive-collector.py index 40ac910..b000b4b 100755 --- a/list-archive-collector.py +++ b/list-archive-collector.py @@ -329,16 +329,15 @@ def grab_pipermail_archive(pipermail_url, mbx, listid, toaddr, lookaside, checks regex = r'^From ' subst = '>From ' mboxdata = re.sub(regex, subst, mboxdata, 0, flags=re.M) + # Fix pipermail mangling where it changes some email addresses + # to be ' at ' instead of '@'. This is easiest to do with a + # handful of regexes than via actual message body manipulation + # as part of the python's email.message object regex = r'(<[^>]+) at ([^>]+>)' - if re.search(regex, mboxdata): - # Fix pipermail mangling where it changes some email addresses - # to be ' at ' instead of '@'. This is easiest to do with a - # handful of regexes than via actual message body manipulation - # as part of the python's email.message object - subst = r'\1@\2' - mboxdata = re.sub(regex, subst, mboxdata, 0, flags=re.M) - regex = r'^>?(From:? \S+) at (\S+\..*)' - mboxdata = re.sub(regex, subst, mboxdata, 0, flags=re.M) + subst = r'\1@\2' + mboxdata = re.sub(regex, subst, mboxdata, 0, flags=re.M) + regex = r'^>?(From:? \S+) at (\S+\..*)' + mboxdata = re.sub(regex, subst, mboxdata, 0, flags=re.M) # Fix any remaining false From escapes regex = r'^>(From\s+\S+\s+\w+\s+\w+\s+\d+\s+\d+:\d+:\d+\s+\d{4})' subst = r'\1' |