diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-03-10 09:08:00 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-03-10 09:08:00 -0400 |
commit | 4c7cf63cbc2d3c4cc4b333df67a6b6355cdf25e6 (patch) | |
tree | 477893556bbe7c107e04f367f125b209ba160b64 | |
parent | 41d9210c37833bedd6e357eaf23a81d120493bdd (diff) | |
download | korg-helpers-4c7cf63cbc2d3c4cc4b333df67a6b6355cdf25e6.tar.gz |
Add more fuzzy trailer matching logic
Will now also accept trailers if:
- there's a full match for local part and partial match for domain part
of the email in from/trailer
- we find a comma in the name and match the trailer after rearranging
it
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rwxr-xr-x | get-lore-mbox.py | 34 |
1 files changed, 32 insertions, 2 deletions
diff --git a/get-lore-mbox.py b/get-lore-mbox.py index 5160fb5..58244bc 100755 --- a/get-lore-mbox.py +++ b/get-lore-mbox.py @@ -30,7 +30,7 @@ charset.add_charset('utf-8', None) emlpolicy = email.policy.EmailPolicy(utf8=True, cte_type='8bit', max_line_length=None) logger = logging.getLogger('get-lore-mbox') -VERSION = '0.2.14' +VERSION = '0.2.15' # You can use bash-style globbing here WANTHDRS = [ @@ -431,10 +431,40 @@ class LoreMessage: # Basic sanity checking -- the trailer must match the name or the email # in the From header, to avoid false-positive trailer parsing errors for tname, tvalue in matches: + tmatch = False namedata = email.utils.getaddresses([tvalue])[0] tfrom = re.sub(r'\+[^@]+@', '@', namedata[1].lower()) hfrom = re.sub(r'\+[^@]+@', '@', self.fromemail.lower()) - if tfrom == hfrom or namedata[0].lower() == self.fromname.lower(): + tlname = namedata[0].lower() + hlname = self.fromname.lower() + tchunks = tfrom.split('@') + hchunks = hfrom.split('@') + if tfrom == hfrom: + logger.debug(' trailer exact email match') + tmatch = True + # See if domain part of one of the addresses is a subset of the other one, + # which should match cases like @linux.intel.com and @intel.com + elif (len(tchunks) == 2 and len(hchunks) == 2 + and tchunks[0] == hchunks[0] + and (tchunks[1].find(hchunks[1]) >= 0 or hchunks[1].find(tchunks[1]) >= 0)): + logger.debug(' trailer fuzzy email match') + tmatch = True + # Does the name match, at least? + elif tlname == hlname: + logger.debug(' trailer exact name match') + tmatch = True + # Finally, see if the header From has a comma in it and try to find all + # parts in the trailer name + elif hlname.find(',') > 0: + nmatch = True + for nchunk in hlname.split(','): + if hlname.find(nchunk.strip()) < 0: + nmatch = False + break + if nmatch: + logger.debug(' trailer fuzzy name match') + tmatch = True + if tmatch: self.trailers.append((tname, tvalue)) else: logger.debug(' ignoring "%s: %s" due to from mismatch (from: %s %s)', tname, tvalue, |