Catch unicode decode errors

emails with spam might have non-ASCII characters, don't let the script explode. Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
author: Mario Limonciello <mario.limonciello@amd.com> 2023-10-23 10:10:24 -0500
committer: Mario Limonciello <mario.limonciello@amd.com> 2023-10-23 10:10:47 -0500
commit: 7bfa5f4d10c4921a87a1b4588e63aec3b8c2c60c (patch)
tree: 2dfcb7f111f16a8d57a02071506ca7a8b2a004db
parent: d983107a2dfa60d70df4101b69bfe7054db9704c (diff)
download: linux-firmware-7bfa5f4d10c4921a87a1b4588e63aec3b8c2c60c.tar.gz
1 files changed, 9 insertions, 5 deletions
diff --git a/contrib/process_linux_firmware.py b/contrib/process_linux_firmware.py
index 2b7c7f8d..ab005ee7 100755
--- a/contrib/process_linux_firmware.py
+++ b/contrib/process_linux_firmware.py
@@ -44,11 +44,15 @@ def classify_content(content):
 
     for part in msg.walk():
         if part.get_content_type() == "text/plain":
-            body = part.get_payload(decode=True).decode("utf-8")
-            for key in content_types.keys():
-                if key in body:
-                    return content_types[key]
-            break
+            try:
+                body = part.get_payload(decode=True).decode("utf-8")
+                for key in content_types.keys():
+                    if key in body:
+                        return content_types[key]
+                break
+            except UnicodeDecodeError as e:
+                logging.warning("Failed to decode email: %s, treating as SPAM" % e)
+                break
     return ContentType.SPAM
author	Mario Limonciello <mario.limonciello@amd.com>	2023-10-23 10:10:24 -0500
committer	Mario Limonciello <mario.limonciello@amd.com>	2023-10-23 10:10:47 -0500
commit	7bfa5f4d10c4921a87a1b4588e63aec3b8c2c60c (patch)
tree	2dfcb7f111f16a8d57a02071506ca7a8b2a004db
parent	d983107a2dfa60d70df4101b69bfe7054db9704c (diff)
download	linux-firmware-7bfa5f4d10c4921a87a1b4588e63aec3b8c2c60c.tar.gz