aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-03-10 09:08:00 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-03-10 09:08:00 -0400
commit4c7cf63cbc2d3c4cc4b333df67a6b6355cdf25e6 (patch)
tree477893556bbe7c107e04f367f125b209ba160b64
parent41d9210c37833bedd6e357eaf23a81d120493bdd (diff)
downloadkorg-helpers-4c7cf63cbc2d3c4cc4b333df67a6b6355cdf25e6.tar.gz
Add more fuzzy trailer matching logic
Will now also accept trailers if: - there's a full match for local part and partial match for domain part of the email in from/trailer - we find a comma in the name and match the trailer after rearranging it Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rwxr-xr-xget-lore-mbox.py34
1 files changed, 32 insertions, 2 deletions
diff --git a/get-lore-mbox.py b/get-lore-mbox.py
index 5160fb5..58244bc 100755
--- a/get-lore-mbox.py
+++ b/get-lore-mbox.py
@@ -30,7 +30,7 @@ charset.add_charset('utf-8', None)
emlpolicy = email.policy.EmailPolicy(utf8=True, cte_type='8bit', max_line_length=None)
logger = logging.getLogger('get-lore-mbox')
-VERSION = '0.2.14'
+VERSION = '0.2.15'
# You can use bash-style globbing here
WANTHDRS = [
@@ -431,10 +431,40 @@ class LoreMessage:
# Basic sanity checking -- the trailer must match the name or the email
# in the From header, to avoid false-positive trailer parsing errors
for tname, tvalue in matches:
+ tmatch = False
namedata = email.utils.getaddresses([tvalue])[0]
tfrom = re.sub(r'\+[^@]+@', '@', namedata[1].lower())
hfrom = re.sub(r'\+[^@]+@', '@', self.fromemail.lower())
- if tfrom == hfrom or namedata[0].lower() == self.fromname.lower():
+ tlname = namedata[0].lower()
+ hlname = self.fromname.lower()
+ tchunks = tfrom.split('@')
+ hchunks = hfrom.split('@')
+ if tfrom == hfrom:
+ logger.debug(' trailer exact email match')
+ tmatch = True
+ # See if domain part of one of the addresses is a subset of the other one,
+ # which should match cases like @linux.intel.com and @intel.com
+ elif (len(tchunks) == 2 and len(hchunks) == 2
+ and tchunks[0] == hchunks[0]
+ and (tchunks[1].find(hchunks[1]) >= 0 or hchunks[1].find(tchunks[1]) >= 0)):
+ logger.debug(' trailer fuzzy email match')
+ tmatch = True
+ # Does the name match, at least?
+ elif tlname == hlname:
+ logger.debug(' trailer exact name match')
+ tmatch = True
+ # Finally, see if the header From has a comma in it and try to find all
+ # parts in the trailer name
+ elif hlname.find(',') > 0:
+ nmatch = True
+ for nchunk in hlname.split(','):
+ if hlname.find(nchunk.strip()) < 0:
+ nmatch = False
+ break
+ if nmatch:
+ logger.debug(' trailer fuzzy name match')
+ tmatch = True
+ if tmatch:
self.trailers.append((tname, tvalue))
else:
logger.debug(' ignoring "%s: %s" due to from mismatch (from: %s %s)', tname, tvalue,