aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2023-02-27 13:20:56 -0500
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2023-02-27 13:24:37 -0500
commitebd05d6210e3421af2918dc178985789cc5fc390 (patch)
treedc45d4dc3e0d9ad94ee25c415009d2730285b7ea
parentada3021c64dd484e53219eb3af55da6c8f25d0ec (diff)
downloadb4-ebd05d6210e3421af2918dc178985789cc5fc390.tar.gz
Abstract out our own get_payload for better charset support
When we use msg.get_payload(decode=True), we can't blindly call .decode() on that, because we need to pay attention to the charset of the message. We're already doing various checks for this elsewhere, so move that logic into a static method and use that whenever we need to get payload of a message that we didn't construct ourselves. Reported-by: Rob Herring <robh@kernel.org> Link: https://msgid.link/CAL_JsqJULTWSv8Ww3g=gdLTUqpcgJRD5HFXO_qsUK7L0JN7caw@mail.gmail.com Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--b4/__init__.py69
-rw-r--r--b4/ez.py11
-rw-r--r--b4/mbox.py5
3 files changed, 46 insertions, 39 deletions
diff --git a/b4/__init__.py b/b4/__init__.py
index 4d5a6c9..674bff2 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -1040,37 +1040,7 @@ class LoreMessage:
self.date = self.date.replace(tzinfo=datetime.timezone.utc)
# walk until we find the first text/plain part
- mcharset = self.msg.get_content_charset()
- if not mcharset:
- mcharset = 'utf-8'
- self.charset = mcharset
-
- for part in msg.walk():
- cte = part.get_content_type()
- if cte.find('/plain') < 0 and cte.find('/x-patch') < 0:
- continue
- payload = part.get_payload(decode=True)
- if payload is None:
- continue
- pcharset = part.get_content_charset()
- if not pcharset:
- pcharset = mcharset
- try:
- payload = payload.decode(pcharset, errors='replace')
- self.charset = pcharset
- except LookupError:
- # what kind of encoding is that?
- # Whatever, we'll use utf-8 and hope for the best
- payload = payload.decode('utf-8', errors='replace')
- part.set_param('charset', 'utf-8')
- self.charset = 'utf-8'
- if self.body is None:
- self.body = payload
- continue
- # If we already found a body, but we now find something that contains a diff,
- # then we prefer this part
- if DIFF_RE.search(payload):
- self.body = payload
+ self.body, self.charset = LoreMessage.get_payload(self.msg)
if self.body is None:
# Woah, we didn't find any usable parts
@@ -1397,6 +1367,43 @@ class LoreMessage:
return '\n'.join(out)
@staticmethod
+ def get_payload(msg: email.message.Message) -> Tuple[str, str]:
+ # walk until we find the first text/plain part
+ mcharset = msg.get_content_charset()
+ if not mcharset:
+ mcharset = 'utf-8'
+
+ mbody = None
+ for part in msg.walk():
+ cte = part.get_content_type()
+ if cte.find('/plain') < 0 and cte.find('/x-patch') < 0:
+ continue
+ payload = part.get_payload(decode=True)
+ if payload is None:
+ continue
+ pcharset = part.get_content_charset()
+ if not pcharset:
+ pcharset = mcharset
+ try:
+ payload = payload.decode(pcharset, errors='replace')
+ mcharset = pcharset
+ except LookupError:
+ # what kind of encoding is that?
+ # Whatever, we'll use utf-8 and hope for the best
+ payload = payload.decode('utf-8', errors='replace')
+ part.set_param('charset', 'utf-8')
+ mcharset = 'utf-8'
+ if mbody is None:
+ mbody = payload
+ continue
+ # If we already found a body, but we now find something that contains a diff,
+ # then we prefer this part
+ if DIFF_RE.search(payload):
+ mbody = payload
+
+ return mbody, mcharset
+
+ @staticmethod
def clean_header(hdrval):
if hdrval is None:
return ''
diff --git a/b4/ez.py b/b4/ez.py
index 74afddc..3971b6f 100644
--- a/b4/ez.py
+++ b/b4/ez.py
@@ -826,7 +826,7 @@ def update_trailers(cmdargs: argparse.Namespace) -> None:
if not msg:
continue
commit_map[commit] = msg
- body = msg.get_payload(decode=True).decode()
+ body, charset = b4.LoreMessage.get_payload(msg)
patchid = b4.LoreMessage.get_patch_id(body)
ls = b4.LoreSubject(msg.get('subject'))
by_subject[ls.subject] = commit
@@ -876,7 +876,8 @@ def update_trailers(cmdargs: argparse.Namespace) -> None:
logger.debug('No match for %s', lmsg.full_subject)
continue
- parts = b4.LoreMessage.get_body_parts(commit_map[commit].get_payload(decode=True).decode())
+ mbody, mcharset = b4.LoreMessage.get_payload(commit_map[commit])
+ parts = b4.LoreMessage.get_body_parts(mbody)
for fltr in addtrailers:
if fltr not in parts[2]:
if commit not in updates:
@@ -1045,7 +1046,7 @@ def add_cover(csubject: b4.LoreSubject, msgid_tpt: str, patches: List[Tuple[str,
def mixin_cover(cbody: str, patches: List[Tuple[str, email.message.Message]]) -> None:
msg = patches[0][1]
- pbody = msg.get_payload(decode=True).decode()
+ pbody, pcharset = b4.LoreMessage.get_payload(msg)
pheaders, pmessage, ptrailers, pbasement, psignature = b4.LoreMessage.get_body_parts(pbody)
cheaders, cmessage, ctrailers, cbasement, csignature = b4.LoreMessage.get_body_parts(cbody)
nbparts = list()
@@ -1311,7 +1312,7 @@ def cmd_send(cmdargs: argparse.Namespace) -> None:
for commit, msg in patches:
if not msg:
continue
- body = msg.get_payload(decode=True).decode()
+ body, charset = b4.LoreMessage.get_payload(msg)
btrs, junk = b4.LoreMessage.find_trailers(body)
for btr in btrs:
if btr.type != 'person':
@@ -1909,7 +1910,7 @@ def cmd_prep(cmdargs: argparse.Namespace) -> None:
if b4.LoreMessage.get_clean_msgid(msg) == msgid:
# Prepare annotated tag body from the cover letter
lsubject = b4.LoreSubject(msg.get('subject'))
- cbody = msg.get_payload(decode=True).decode()
+ cbody, charset = b4.LoreMessage.get_payload(msg)
prefixes = lsubject.get_extra_prefixes()
if prefixes:
subject = '[%s] %s' % (' '.join(prefixes), lsubject.subject)
diff --git a/b4/mbox.py b/b4/mbox.py
index 0b0fc40..fb9d092 100644
--- a/b4/mbox.py
+++ b/b4/mbox.py
@@ -536,9 +536,8 @@ def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[int] = N
continue
if not lsub.reply:
- payload = msg.get_payload(decode=True)
- if isinstance(payload, bytes):
- payload = payload.decode()
+ payload, charset = b4.LoreMessage.get_payload(msg)
+ if payload:
matches = re.search(r'^change-id:\s+(\S+)', payload, flags=re.I | re.M)
if matches:
logger.debug('Found change-id %s', matches.groups()[0])