diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-02-17 13:23:12 -0500 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-02-17 13:23:12 -0500 |
commit | 4ab72b4995c5539cd750603138818d41b739c4fa (patch) | |
tree | dd6d4fbb6b426b4b0fe6bf9658aa14ce0bcaae7b | |
parent | e8fd5a0e03371fb11af50bdd81f933ceeda4bc3a (diff) | |
download | korg-helpers-4ab72b4995c5539cd750603138818d41b739c4fa.tar.gz |
Fix TB on 8bit headers in older python
Getting a message as_bytes() before adding it to the am-ready mailbox
allows us to pass the policy that avoids triggering some of the odder
bugs in older versions of python.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rwxr-xr-x | get-lore-mbox.py | 25 |
1 files changed, 7 insertions, 18 deletions
diff --git a/get-lore-mbox.py b/get-lore-mbox.py index eb309b7..af9d847 100755 --- a/get-lore-mbox.py +++ b/get-lore-mbox.py @@ -12,6 +12,7 @@ import email import email.message import email.utils import email.header +import email.policy import subprocess import logging import re @@ -28,7 +29,7 @@ from email import charset charset.add_charset('utf-8', None) logger = logging.getLogger('get-lore-mbox') -VERSION = '0.2.1' +VERSION = '0.2.2' # You can use bash-style globbing here WANTHDRS = [ @@ -283,7 +284,8 @@ class LoreSeries: lmsg.followup_trailers.append(('Link', linkmask % lmsg.msgid)) logger.info(' %s', lmsg.full_subject) msg = lmsg.get_am_message(trailer_order=trailer_order) - mbx.add(msg) + # Pass a policy that avoids most legacy encoding horrors + mbx.add(msg.as_bytes(policy=msg.policy.clone(utf8=True, cte_type='8bit', max_line_length=None))) else: logger.error(' ERROR: missing [%s/%s]!', at, self.expected) at += 1 @@ -413,22 +415,9 @@ class LoreMessage: @staticmethod def clean_header(hdrval): - new_hdrval = '' - dhdrs = email.header.decode_header(hdrval) - for dhdr in dhdrs: - if dhdr[1] is not None: - try: - uval = dhdr[0].decode(dhdr[1], errors='replace') - except LookupError: - # Not known charset/encoding. Try utf-8 and hope for the best. - uval = dhdr[0].decode('utf-8', errors='replace') - elif isinstance(dhdr[0], (bytes, bytearray)): - uval = dhdr[0].decode('utf-8', errors='replace') - else: - uval = dhdr[0] - uval = uval.replace('\n', ' ') - new_hdrval += re.sub(r'\s+', ' ', uval).strip() - return new_hdrval + uval = hdrval.replace('\n', ' ') + new_hdrval = re.sub(r'\s+', ' ', uval) + return new_hdrval.strip() @staticmethod def get_clean_msgid(msg, header='Message-Id'): |