aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-11-27 10:56:31 -0500
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-11-27 10:56:31 -0500
commitd9d9d4eee641cd007487a7f5c4defd71f2d21ee8 (patch)
tree375bd06c5612a16dcb4e8a7c074fd4c67017d4dc
parentfa9a573b428dd8e8ce67e4ab0a2c28111ec7988d (diff)
downloadkorg-helpers-d9d9d4eee641cd007487a7f5c4defd71f2d21ee8.tar.gz
More handling of non-mangled mailman archives
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rwxr-xr-xlist-archive-collector.py28
1 files changed, 15 insertions, 13 deletions
diff --git a/list-archive-collector.py b/list-archive-collector.py
index f122e27..810fb4e 100755
--- a/list-archive-collector.py
+++ b/list-archive-collector.py
@@ -329,19 +329,21 @@ def grab_pipermail_archive(pipermail_url, mbx, listid, toaddr, lookaside, checks
regex = r'^From '
subst = '>From '
mboxdata = re.sub(regex, subst, mboxdata, 0, flags=re.M)
- # Fix pipermail mangling where it changes some email addresses
- # to be ' at ' instead of '@'. This is easiest to do with a
- # handful of regexes than via actual message body manipulation
- # as part of the python's email.message object
regex = r'(<[^>]+) at ([^>]+>)'
- subst = r'\1@\2'
- mboxdata = re.sub(regex, subst, mboxdata, 0, flags=re.M)
- regex = r'^>?(From:? \S+) at (\S+\..*)'
- mboxdata = re.sub(regex, subst, mboxdata, 0, flags=re.M)
- # Fix any remaining false From escapes
- regex = r'^>(From\s+\S+@\S+\s+\w+\s+\w+\s+\d+\s+\d+:\d+:\d+\s+\d{4})'
- subst = r'\1'
- mboxdata = re.sub(regex, subst, mboxdata, 0, flags=re.M)
+ if re.search(regex, mboxdata):
+ # Fix pipermail mangling where it changes some email addresses
+ # to be ' at ' instead of '@'. This is easiest to do with a
+ # handful of regexes than via actual message body manipulation
+ # as part of the python's email.message object
+ subst = r'\1@\2'
+ mboxdata = re.sub(regex, subst, mboxdata, 0, flags=re.M)
+ regex = r'^>?(From:? \S+) at (\S+\..*)'
+ mboxdata = re.sub(regex, subst, mboxdata, 0, flags=re.M)
+ else:
+ # Fix any remaining false From escapes
+ regex = r'^>(From\s+\S+@\S+\s+\w+\s+\w+\s+\d+\s+\d+:\d+:\d+\s+\d{4})'
+ subst = r'\1'
+ mboxdata = re.sub(regex, subst, mboxdata, 0, flags=re.M)
with open(tmpfile, 'wb') as out_fh:
out_fh.write(mboxdata.encode())
@@ -350,7 +352,7 @@ def grab_pipermail_archive(pipermail_url, mbx, listid, toaddr, lookaside, checks
tmpmbx = mailbox.mbox(tmpfile)
for msg in tmpmbx:
logger.info(' processing: %s', msg.get('Message-Id'))
- fromline = msg.get('From')
+ fromline = str(msg.get('From', ''))
if fromline and fromline.find('(') > 0:
# Fix bogus From: foo@bar.baz (Foo Barski) -> Foo Barski <foo@bar.baz>
matches = re.search(r'(\S+@\S+\.\S+) \((.*)\)$', fromline)