diff options
author | Mario Limonciello <mario.limonciello@amd.com> | 2023-11-16 10:42:10 -0600 |
---|---|---|
committer | Mario Limonciello <mario.limonciello@amd.com> | 2023-11-16 10:42:10 -0600 |
commit | 8228c2222fcf5791fe5643252e4d248839c199e9 (patch) | |
tree | 936f598573e4b45f62cb73159e655ba77d26ded7 | |
parent | 58ec43257cffef033c3210d92d3dd57ac431f262 (diff) | |
download | linux-firmware-8228c2222fcf5791fe5643252e4d248839c199e9.tar.gz |
Try both utf-8 and windows-1252 for decoding email
Recent submissions from Cirrus were classified as spam by the lore
analysis robot script. This is because cirrus used windows-1252 for
the encoding which failed to decode as utf-8.
Try both encodings when decoding email.
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
-rwxr-xr-x | contrib/process_linux_firmware.py | 31 |
1 files changed, 22 insertions, 9 deletions
diff --git a/contrib/process_linux_firmware.py b/contrib/process_linux_firmware.py index 668e35c0..ea108391 100755 --- a/contrib/process_linux_firmware.py +++ b/contrib/process_linux_firmware.py @@ -34,6 +34,8 @@ content_types = { def classify_content(content): # load content into the email library msg = email.message_from_string(content) + decoded = None + body = None # check the subject subject = msg["Subject"] @@ -42,17 +44,28 @@ def classify_content(content): if "PATCH" in subject: return ContentType.PATCH - for part in msg.walk(): - if part.get_content_type() == "text/plain": + if msg.is_multipart(): + for part in msg.walk(): + if part.get_content_type() == "text/plain": + body = part.get_payload(decode=True) + else: + body = msg.get_payload(decode=True) + + if body: + for encoding in ["utf-8", "windows-1252"]: try: - body = part.get_payload(decode=True).decode("utf-8") - for key in content_types.keys(): - if key in body: - return content_types[key] - break - except UnicodeDecodeError as e: - logging.warning("Failed to decode email: %s, treating as SPAM" % e) + decoded = body.decode(encoding) break + except UnicodeDecodeError: + pass + + if decoded: + for key in content_types.keys(): + if key in decoded: + return content_types[key] + else: + logging.warning("Failed to decode email: %s, treating as SPAM", body) + return ContentType.SPAM |