diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2023-02-27 13:20:56 -0500 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2023-02-27 13:24:37 -0500 |
commit | ebd05d6210e3421af2918dc178985789cc5fc390 (patch) | |
tree | dc45d4dc3e0d9ad94ee25c415009d2730285b7ea | |
parent | ada3021c64dd484e53219eb3af55da6c8f25d0ec (diff) | |
download | b4-ebd05d6210e3421af2918dc178985789cc5fc390.tar.gz |
Abstract out our own get_payload for better charset support
When we use msg.get_payload(decode=True), we can't blindly call
.decode() on that, because we need to pay attention to the charset of
the message. We're already doing various checks for this elsewhere, so
move that logic into a static method and use that whenever we need to
get payload of a message that we didn't construct ourselves.
Reported-by: Rob Herring <robh@kernel.org>
Link: https://msgid.link/CAL_JsqJULTWSv8Ww3g=gdLTUqpcgJRD5HFXO_qsUK7L0JN7caw@mail.gmail.com
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | b4/__init__.py | 69 | ||||
-rw-r--r-- | b4/ez.py | 11 | ||||
-rw-r--r-- | b4/mbox.py | 5 |
3 files changed, 46 insertions, 39 deletions
diff --git a/b4/__init__.py b/b4/__init__.py index 4d5a6c9..674bff2 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -1040,37 +1040,7 @@ class LoreMessage: self.date = self.date.replace(tzinfo=datetime.timezone.utc) # walk until we find the first text/plain part - mcharset = self.msg.get_content_charset() - if not mcharset: - mcharset = 'utf-8' - self.charset = mcharset - - for part in msg.walk(): - cte = part.get_content_type() - if cte.find('/plain') < 0 and cte.find('/x-patch') < 0: - continue - payload = part.get_payload(decode=True) - if payload is None: - continue - pcharset = part.get_content_charset() - if not pcharset: - pcharset = mcharset - try: - payload = payload.decode(pcharset, errors='replace') - self.charset = pcharset - except LookupError: - # what kind of encoding is that? - # Whatever, we'll use utf-8 and hope for the best - payload = payload.decode('utf-8', errors='replace') - part.set_param('charset', 'utf-8') - self.charset = 'utf-8' - if self.body is None: - self.body = payload - continue - # If we already found a body, but we now find something that contains a diff, - # then we prefer this part - if DIFF_RE.search(payload): - self.body = payload + self.body, self.charset = LoreMessage.get_payload(self.msg) if self.body is None: # Woah, we didn't find any usable parts @@ -1397,6 +1367,43 @@ class LoreMessage: return '\n'.join(out) @staticmethod + def get_payload(msg: email.message.Message) -> Tuple[str, str]: + # walk until we find the first text/plain part + mcharset = msg.get_content_charset() + if not mcharset: + mcharset = 'utf-8' + + mbody = None + for part in msg.walk(): + cte = part.get_content_type() + if cte.find('/plain') < 0 and cte.find('/x-patch') < 0: + continue + payload = part.get_payload(decode=True) + if payload is None: + continue + pcharset = part.get_content_charset() + if not pcharset: + pcharset = mcharset + try: + payload = payload.decode(pcharset, errors='replace') + mcharset = pcharset + except LookupError: + # what kind of encoding is that? + # Whatever, we'll use utf-8 and hope for the best + payload = payload.decode('utf-8', errors='replace') + part.set_param('charset', 'utf-8') + mcharset = 'utf-8' + if mbody is None: + mbody = payload + continue + # If we already found a body, but we now find something that contains a diff, + # then we prefer this part + if DIFF_RE.search(payload): + mbody = payload + + return mbody, mcharset + + @staticmethod def clean_header(hdrval): if hdrval is None: return '' @@ -826,7 +826,7 @@ def update_trailers(cmdargs: argparse.Namespace) -> None: if not msg: continue commit_map[commit] = msg - body = msg.get_payload(decode=True).decode() + body, charset = b4.LoreMessage.get_payload(msg) patchid = b4.LoreMessage.get_patch_id(body) ls = b4.LoreSubject(msg.get('subject')) by_subject[ls.subject] = commit @@ -876,7 +876,8 @@ def update_trailers(cmdargs: argparse.Namespace) -> None: logger.debug('No match for %s', lmsg.full_subject) continue - parts = b4.LoreMessage.get_body_parts(commit_map[commit].get_payload(decode=True).decode()) + mbody, mcharset = b4.LoreMessage.get_payload(commit_map[commit]) + parts = b4.LoreMessage.get_body_parts(mbody) for fltr in addtrailers: if fltr not in parts[2]: if commit not in updates: @@ -1045,7 +1046,7 @@ def add_cover(csubject: b4.LoreSubject, msgid_tpt: str, patches: List[Tuple[str, def mixin_cover(cbody: str, patches: List[Tuple[str, email.message.Message]]) -> None: msg = patches[0][1] - pbody = msg.get_payload(decode=True).decode() + pbody, pcharset = b4.LoreMessage.get_payload(msg) pheaders, pmessage, ptrailers, pbasement, psignature = b4.LoreMessage.get_body_parts(pbody) cheaders, cmessage, ctrailers, cbasement, csignature = b4.LoreMessage.get_body_parts(cbody) nbparts = list() @@ -1311,7 +1312,7 @@ def cmd_send(cmdargs: argparse.Namespace) -> None: for commit, msg in patches: if not msg: continue - body = msg.get_payload(decode=True).decode() + body, charset = b4.LoreMessage.get_payload(msg) btrs, junk = b4.LoreMessage.find_trailers(body) for btr in btrs: if btr.type != 'person': @@ -1909,7 +1910,7 @@ def cmd_prep(cmdargs: argparse.Namespace) -> None: if b4.LoreMessage.get_clean_msgid(msg) == msgid: # Prepare annotated tag body from the cover letter lsubject = b4.LoreSubject(msg.get('subject')) - cbody = msg.get_payload(decode=True).decode() + cbody, charset = b4.LoreMessage.get_payload(msg) prefixes = lsubject.get_extra_prefixes() if prefixes: subject = '[%s] %s' % (' '.join(prefixes), lsubject.subject) @@ -536,9 +536,8 @@ def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[int] = N continue if not lsub.reply: - payload = msg.get_payload(decode=True) - if isinstance(payload, bytes): - payload = payload.decode() + payload, charset = b4.LoreMessage.get_payload(msg) + if payload: matches = re.search(r'^change-id:\s+(\S+)', payload, flags=re.I | re.M) if matches: logger.debug('Found change-id %s', matches.groups()[0]) |