aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-05-21 09:05:20 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2021-05-21 09:05:20 -0400
commit9768c88abc66f12a9fa77302a66dc599a987c433 (patch)
tree02fbf06137bbfb8c4dafe25b3722229350c0a39c
parent21a3a883974f0bc9569aef0fcc6e14706b810f31 (diff)
downloadkorg-helpers-9768c88abc66f12a9fa77302a66dc599a987c433.tar.gz
Use construct more tolerant to unicode failures
When we iterate directly through messages, unicode or other parsing errors cause us to BT on the entire mailbox. Iterating by key allows us to only skip individual unparseable messages. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rwxr-xr-xlist-archive-collector.py9
1 files changed, 7 insertions, 2 deletions
diff --git a/list-archive-collector.py b/list-archive-collector.py
index b000b4b..416fe90 100755
--- a/list-archive-collector.py
+++ b/list-archive-collector.py
@@ -348,8 +348,13 @@ def grab_pipermail_archive(pipermail_url, mbx, listid, toaddr, lookaside, checks
# Open it now as a mailbox
tmpmbx = mailbox.mbox(tmpfile)
- for msg in tmpmbx:
- logger.info(' processing: %s', msg.get('Message-Id'))
+ for mkey in tmpmbx.keys():
+ try:
+ msg = tmpmbx.get_message(mkey)
+ except: # noqa
+ logger.info(' error parsing message %d, skipped', mkey)
+ continue
+
fromline = str(msg.get('From', ''))
if fromline and fromline.find('(') > 0:
# Fix bogus From: foo@bar.baz (Foo Barski) -> Foo Barski <foo@bar.baz>