aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-02-17 13:23:12 -0500
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-02-17 13:23:12 -0500
commit4ab72b4995c5539cd750603138818d41b739c4fa (patch)
treedd6d4fbb6b426b4b0fe6bf9658aa14ce0bcaae7b
parente8fd5a0e03371fb11af50bdd81f933ceeda4bc3a (diff)
downloadkorg-helpers-4ab72b4995c5539cd750603138818d41b739c4fa.tar.gz
Fix TB on 8bit headers in older python
Getting a message as_bytes() before adding it to the am-ready mailbox allows us to pass the policy that avoids triggering some of the odder bugs in older versions of python. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rwxr-xr-xget-lore-mbox.py25
1 files changed, 7 insertions, 18 deletions
diff --git a/get-lore-mbox.py b/get-lore-mbox.py
index eb309b7..af9d847 100755
--- a/get-lore-mbox.py
+++ b/get-lore-mbox.py
@@ -12,6 +12,7 @@ import email
import email.message
import email.utils
import email.header
+import email.policy
import subprocess
import logging
import re
@@ -28,7 +29,7 @@ from email import charset
charset.add_charset('utf-8', None)
logger = logging.getLogger('get-lore-mbox')
-VERSION = '0.2.1'
+VERSION = '0.2.2'
# You can use bash-style globbing here
WANTHDRS = [
@@ -283,7 +284,8 @@ class LoreSeries:
lmsg.followup_trailers.append(('Link', linkmask % lmsg.msgid))
logger.info(' %s', lmsg.full_subject)
msg = lmsg.get_am_message(trailer_order=trailer_order)
- mbx.add(msg)
+ # Pass a policy that avoids most legacy encoding horrors
+ mbx.add(msg.as_bytes(policy=msg.policy.clone(utf8=True, cte_type='8bit', max_line_length=None)))
else:
logger.error(' ERROR: missing [%s/%s]!', at, self.expected)
at += 1
@@ -413,22 +415,9 @@ class LoreMessage:
@staticmethod
def clean_header(hdrval):
- new_hdrval = ''
- dhdrs = email.header.decode_header(hdrval)
- for dhdr in dhdrs:
- if dhdr[1] is not None:
- try:
- uval = dhdr[0].decode(dhdr[1], errors='replace')
- except LookupError:
- # Not known charset/encoding. Try utf-8 and hope for the best.
- uval = dhdr[0].decode('utf-8', errors='replace')
- elif isinstance(dhdr[0], (bytes, bytearray)):
- uval = dhdr[0].decode('utf-8', errors='replace')
- else:
- uval = dhdr[0]
- uval = uval.replace('\n', ' ')
- new_hdrval += re.sub(r'\s+', ' ', uval).strip()
- return new_hdrval
+ uval = hdrval.replace('\n', ' ')
+ new_hdrval = re.sub(r'\s+', ' ', uval)
+ return new_hdrval.strip()
@staticmethod
def get_clean_msgid(msg, header='Message-Id'):