diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-02-01 17:25:58 -0500 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-02-01 17:25:58 -0500 |
commit | 944e03692b2b3071df09153c53b9b9fb5ed4ab71 (patch) | |
tree | 7e625cf036c9d29092e56270f06a22b7960eb728 | |
parent | 080e36b34b5bac5b32d46b094725bf46d8d8623e (diff) | |
download | korg-helpers-944e03692b2b3071df09153c53b9b9fb5ed4ab71.tar.gz |
Improvements based on feedback from Linus
- Give thread summary after saving (without -a)
- Be less verbose by default when saving with -a
- Remove raw inbox after making an am-version
- Remove most of the header bloat in the am-ready mbox
- Remove any mime data from the am-ready mbox and save as 8bit
(this makes manual editing much simpler)
- Try to figure out what the cover letter is, even if it's not using the
00/NN notation
- Try a few more ways of figuring out base-commit information
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rwxr-xr-x | get-lore-mbox.py | 167 |
1 files changed, 129 insertions, 38 deletions
diff --git a/get-lore-mbox.py b/get-lore-mbox.py index 78f9ad5..b09dc0b 100755 --- a/get-lore-mbox.py +++ b/get-lore-mbox.py @@ -13,6 +13,8 @@ import email.utils import subprocess import logging import re +import fnmatch +import time import requests import gzip @@ -24,6 +26,25 @@ _DEFAULT_CONFIG = { 'linkmask': 'https://lore.kernel.org/r/%s', } +WANTHDRS = {'sender', + 'from', + 'to', + 'cc', + 'subject', + 'date', + 'message-id', + 'resent-message-id', + 'reply-to', + 'in-reply-to', + 'references', + 'mime-*', + 'list-id', + 'content-*', + 'errors-to', + 'x-mailing-list', + 'resent-to', + } + def git_get_command_lines(gitdir, args): out = git_run_command(gitdir, args) @@ -90,7 +111,7 @@ def get_msgid_from_stdin(): sys.exit(1) -def get_pi_thread_by_msgid(msgid, config, outdir='.'): +def get_pi_thread_by_msgid(msgid, config, outdir='.', wantname=None): # Grab the head from lore, to see where we are redirected midmask = config['midmask'] % msgid logger.info('Looking up %s', midmask) @@ -101,15 +122,18 @@ def get_pi_thread_by_msgid(msgid, config, outdir='.'): canonical = resp.headers['Location'].rstrip('/') resp.close() t_mbx_url = '%s/t.mbox.gz' % canonical - logger.critical('Grabbing thread from %s', t_mbx_url) + logger.info('Grabbing thread from %s', t_mbx_url) resp = requests.get(t_mbx_url) t_mbox = gzip.decompress(resp.content) - # Save it into msgid.mbox - savefile = '%s.t.mbx' % msgid - savefile = os.path.join(outdir, savefile) + if wantname: + savefile = os.path.join(outdir, wantname) + else: + # Save it into msgid.mbox + savefile = '%s.t.mbx' % msgid + savefile = os.path.join(outdir, savefile) with open(savefile, 'wb') as fh: + logger.debug('Saving %s', savefile) fh.write(t_mbox) - logger.info('Saved thread into %s', savefile) return savefile @@ -127,15 +151,15 @@ def get_plain_part(msg): return body -def git_add_trailers(msg, trailers): +def git_add_trailers(body, trailers): cmdargs = ['interpret-trailers'] - payload = msg.get_payload(decode=True) for trailer in trailers: - logger.info(' Adding trailer: %s', trailer) - cmdargs += ['--trailer', trailer] - output = git_run_command(None, args=cmdargs, stdin=payload) - msg.set_payload(output.encode('utf-8')) - return msg + # Check if this trailer is already in the body + if body.find(trailer) < 0: + logger.info(' Adding trailer: %s', trailer) + cmdargs += ['--trailer', trailer] + output = git_run_command(None, args=cmdargs, stdin=body.encode('utf-8')) + return output def get_clean_msgid(msg, header='Message-ID'): @@ -145,13 +169,14 @@ def get_clean_msgid(msg, header='Message-ID'): return msgid -def mbox_to_am(mboxfile, config, outdir='.', wantver=None): +def mbox_to_am(mboxfile, config, outdir='.', wantver=None, wantname=None): mbx = mailbox.mbox(mboxfile) count = len(mbx) logger.info('Analyzing %s messages in the thread', count) am_kept = list() msgid_map = dict() slug = None + cover_key = None sorted_keys = [None, None] trailer_map = dict() expected_count = 1 @@ -164,6 +189,15 @@ def mbox_to_am(mboxfile, config, outdir='.', wantver=None): # Start by looking at prefixes in the subject matches = re.search(r'\[PATCH([^\]]+)\]', subject, re.IGNORECASE) if not matches: + # if the key is 0, it may be a cover letter. Look for + # presence of a diffstat + if key == 0: + body = get_plain_part(msg) + if re.search(r'^\s*\d+\sfile.*\d+ insertion.*\d+ deletion', body, re.MULTILINE | re.IGNORECASE): + # Looks like a cover letter, so keep it in mind, unless we find + # something else better suited to be a cover letter + cover_key = key + am_kept.append(key) # Ignoring this message continue cur_count = 1 @@ -182,7 +216,7 @@ def mbox_to_am(mboxfile, config, outdir='.', wantver=None): if vn is None or new_vn > vn: if new_vn != 1: - logger.info('Found new series version: v%s', new_vn) + logger.debug('Found new series version: v%s', new_vn) if wantver is None or wantver == new_vn: # Blow away anything we currently have in sorted_keys sorted_keys = [None] * (expected_count + 1) @@ -195,15 +229,21 @@ def mbox_to_am(mboxfile, config, outdir='.', wantver=None): logger.info(' Ignoring v%s: %s', vn, subject) continue - # We use a "slug" for mbox name, based on the subject + # We use a "slug" for mbox name, based on the date and author if not slug: - slug = re.sub(r'\s*\[.*?\]\s*', '', subject).lower() - slug = re.sub(r'\W+', '_', slug).strip('_') + msgdate = email.utils.parsedate_tz(str(msg['Date'])) + prefix = time.strftime('%Y%m%d', msgdate[:9]) + authorline = email.utils.getaddresses(msg.get_all('from', []))[0] + if authorline[0]: + author = re.sub(r'\W+', '_', authorline[0]).strip('_').lower() + else: + author = re.sub(r'\W+', '_', authorline[1]).strip('_').lower() + slug = '%s_%s' % (prefix, author) if vn != 1: slug = 'v%s_%s' % (vn, slug) body = get_plain_part(msg) - logger.info(' Processing: %s', subject) - if re.search(r'\D0+/\d+', subject) and sorted_keys[0] is None: + logger.debug(' Processing: %s', subject) + if cur_count == 0 and sorted_keys[0] is None: am_kept.append(key) sorted_keys[cur_count] = key continue @@ -228,19 +268,30 @@ def mbox_to_am(mboxfile, config, outdir='.', wantver=None): trailer_map[irt_key] = matches.groups() if not len(am_kept): - logger.critical('Did not find any patches to save') + logger.info('Did not find any patches to save') return None - am_filename = os.path.join(outdir, '%s.mbx' % slug) + if not wantname: + am_filename = os.path.join(outdir, '%s.mbx' % slug) + else: + am_filename = os.path.join(outdir, wantname) + if wantname.find('.') < 0: + slug = wantname + else: + slug = '.'.join(wantname.split('.')[:-1]) if os.path.exists(am_filename): os.unlink(am_filename) am_mbx = mailbox.mbox(am_filename) at = 0 - logger.critical('---') + logger.info('---') logger.critical('Writing %s', am_filename) have_missing = False for key in sorted_keys: + if at == 0 and key is None and cover_key is not None: + # Use the best candidate for the cover letter + key = cover_key + if key is None: if at != 0: logger.error(' ERROR: missing [%s/%s]!', at, expected_count) @@ -248,32 +299,57 @@ def mbox_to_am(mboxfile, config, outdir='.', wantver=None): else: msg = mbx[key] subject = re.sub(r'\s+', ' ', msg['Subject']) - logger.critical(' %s', subject) + logger.info(' %s', subject) + body = get_plain_part(msg) if key in trailer_map: - msg = git_add_trailers(msg, trailer_map[key]) - if msg['Content-Transfer-Encoding'] == 'base64': - msg.replace_header('Content-Transfer-Encoding', '8bit') + body = git_add_trailers(body, trailer_map[key]) + msg.set_payload(body.encode('utf-8')) + if msg['Content-Transfer-Encoding'] not in (None, '8bit'): + msg.replace_header('Content-Transfer-Encoding', '8bit') + + newhdrs = [] + for hdrname, hdrval in list(msg._headers): + lhdrname = hdrname.lower() + wanthdr = False + for hdrmatch in WANTHDRS: + if fnmatch.fnmatch(lhdrname, hdrmatch): + wanthdr = True + break + if wanthdr: + newhdrs.append((hdrname, hdrval)) + + msg._headers = newhdrs am_mbx.add(msg) at += 1 + logger.critical('Total patches: %s', len(am_mbx)) - logger.critical('---') + logger.info('---') if have_missing: logger.critical('WARNING: Thread incomplete, please check above!') top_msgid = get_clean_msgid(am_mbx[0]) linkurl = config['linkmask'] % top_msgid - logger.critical('Link: %s', linkurl) + logger.info('Link: %s', linkurl) # Look for base-commit line in the first message body = get_plain_part(am_mbx[0]) - matches = re.search(r'^\s*base-commit: (\S+)\s*$', body, re.MULTILINE) + matches = re.search(r'base-commit: .*?([0-9a-f]+)', body, re.MULTILINE) + base_commit = None + if matches: base_commit = matches.groups()[0] - logger.critical('Base-commit included, you can branch using:') - logger.critical(' git checkout -b %s %s', slug, base_commit) - logger.critical(' git am %s', am_filename) else: - logger.critical('No base-commit information included, sorry.') - logger.critical(' git checkout -b %s master', slug) - logger.critical(' git am %s', am_filename) + # Try a more relaxed search + matches = re.search(r'based on .*?([0-9a-f]+)', body, re.MULTILINE) + if matches: + base_commit = matches.groups()[0] + + if base_commit: + logger.info('Base-commit included, you can branch using:') + logger.info(' git checkout -b %s %s', slug, base_commit) + logger.info(' git am %s', am_filename) + else: + logger.info('No base-commit information included, sorry.') + logger.info(' git checkout -b %s master', slug) + logger.info(' git am %s', am_filename) am_mbx.close() return am_filename @@ -288,6 +364,8 @@ def main(cmdargs): if cmdargs.quiet: ch.setLevel(logging.CRITICAL) + elif cmdargs.debug: + ch.setLevel(logging.DEBUG) else: ch.setLevel(logging.INFO) @@ -301,9 +379,18 @@ def main(cmdargs): msgid = msgid.strip('<>') config = get_config_from_git() - mboxfile = get_pi_thread_by_msgid(msgid, config, outdir=cmdargs.outdir) + mboxfile = get_pi_thread_by_msgid(msgid, config, outdir=cmdargs.outdir, wantname=cmdargs.wantname) if mboxfile and cmdargs.amready: - mbox_to_am(mboxfile, config, outdir=cmdargs.outdir, wantver=cmdargs.version) + # Move it into -thread + threadmbox = '%s-thread' % mboxfile + os.rename(mboxfile, threadmbox) + mbox_to_am(threadmbox, config, outdir=cmdargs.outdir, wantver=cmdargs.version, + wantname=cmdargs.wantname) + os.unlink(threadmbox) + else: + mbx = mailbox.mbox(mboxfile) + logger.critical('Saved %s', mboxfile) + logger.critical('%s messages in the thread', len(mbx)) if __name__ == '__main__': @@ -318,6 +405,10 @@ if __name__ == '__main__': help='Make an mbox ready for git am') parser.add_argument('-v', '--version', type=int, default=None, help='Get a specific version of the patch/series (use with -a)') + parser.add_argument('-n', '--mbox-name', dest='wantname', default=None, + help='Filename to name the mbox file') + parser.add_argument('-d', '--debug', action='store_true', default=False, + help='Add more debugging info to the output') parser.add_argument('-q', '--quiet', action='store_true', default=False, help='Only output errors to the stdout') main(parser.parse_args()) |