aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMario Limonciello <mario.limonciello@amd.com>2023-11-16 10:42:10 -0600
committerMario Limonciello <mario.limonciello@amd.com>2023-11-16 10:42:10 -0600
commit8228c2222fcf5791fe5643252e4d248839c199e9 (patch)
tree936f598573e4b45f62cb73159e655ba77d26ded7
parent58ec43257cffef033c3210d92d3dd57ac431f262 (diff)
downloadlinux-firmware-8228c2222fcf5791fe5643252e4d248839c199e9.tar.gz
Try both utf-8 and windows-1252 for decoding email
Recent submissions from Cirrus were classified as spam by the lore analysis robot script. This is because cirrus used windows-1252 for the encoding which failed to decode as utf-8. Try both encodings when decoding email. Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
-rwxr-xr-xcontrib/process_linux_firmware.py31
1 files changed, 22 insertions, 9 deletions
diff --git a/contrib/process_linux_firmware.py b/contrib/process_linux_firmware.py
index 668e35c0..ea108391 100755
--- a/contrib/process_linux_firmware.py
+++ b/contrib/process_linux_firmware.py
@@ -34,6 +34,8 @@ content_types = {
def classify_content(content):
# load content into the email library
msg = email.message_from_string(content)
+ decoded = None
+ body = None
# check the subject
subject = msg["Subject"]
@@ -42,17 +44,28 @@ def classify_content(content):
if "PATCH" in subject:
return ContentType.PATCH
- for part in msg.walk():
- if part.get_content_type() == "text/plain":
+ if msg.is_multipart():
+ for part in msg.walk():
+ if part.get_content_type() == "text/plain":
+ body = part.get_payload(decode=True)
+ else:
+ body = msg.get_payload(decode=True)
+
+ if body:
+ for encoding in ["utf-8", "windows-1252"]:
try:
- body = part.get_payload(decode=True).decode("utf-8")
- for key in content_types.keys():
- if key in body:
- return content_types[key]
- break
- except UnicodeDecodeError as e:
- logging.warning("Failed to decode email: %s, treating as SPAM" % e)
+ decoded = body.decode(encoding)
break
+ except UnicodeDecodeError:
+ pass
+
+ if decoded:
+ for key in content_types.keys():
+ if key in decoded:
+ return content_types[key]
+ else:
+ logging.warning("Failed to decode email: %s, treating as SPAM", body)
+
return ContentType.SPAM