diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2021-05-21 09:05:20 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2021-05-21 09:05:20 -0400 |
commit | 9768c88abc66f12a9fa77302a66dc599a987c433 (patch) | |
tree | 02fbf06137bbfb8c4dafe25b3722229350c0a39c | |
parent | 21a3a883974f0bc9569aef0fcc6e14706b810f31 (diff) | |
download | korg-helpers-9768c88abc66f12a9fa77302a66dc599a987c433.tar.gz |
Use construct more tolerant to unicode failures
When we iterate directly through messages, unicode or other parsing
errors cause us to BT on the entire mailbox. Iterating by key allows us
to only skip individual unparseable messages.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rwxr-xr-x | list-archive-collector.py | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/list-archive-collector.py b/list-archive-collector.py index b000b4b..416fe90 100755 --- a/list-archive-collector.py +++ b/list-archive-collector.py @@ -348,8 +348,13 @@ def grab_pipermail_archive(pipermail_url, mbx, listid, toaddr, lookaside, checks # Open it now as a mailbox tmpmbx = mailbox.mbox(tmpfile) - for msg in tmpmbx: - logger.info(' processing: %s', msg.get('Message-Id')) + for mkey in tmpmbx.keys(): + try: + msg = tmpmbx.get_message(mkey) + except: # noqa + logger.info(' error parsing message %d, skipped', mkey) + continue + fromline = str(msg.get('From', '')) if fromline and fromline.find('(') > 0: # Fix bogus From: foo@bar.baz (Foo Barski) -> Foo Barski <foo@bar.baz> |