diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-01-31 17:28:13 -0500 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-01-31 17:28:13 -0500 |
commit | 1522ae002ed1627872b3cec0b379b2b8b09a5c73 (patch) | |
tree | e1b684c136d98e0d3c46541f90e0b70e538fc2a5 | |
parent | 2c44372604e5fea1a090e59447dccc74e439b09a (diff) | |
download | korg-helpers-1522ae002ed1627872b3cec0b379b2b8b09a5c73.tar.gz |
Initial version of get-lore-mbox
This gets you full threads from lore.kernel.org using just a single
message-id of a message anywhere in that thread. It can also create mbox
files ready for use with "git am", with all trailers tallied up.
Needs more testing, but generally works fairly well.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rwxr-xr-x | get-lore-mbox.py | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/get-lore-mbox.py b/get-lore-mbox.py new file mode 100755 index 0000000..7ad26c3 --- /dev/null +++ b/get-lore-mbox.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +__author__ = 'Konstantin Ryabitsev <konstantin@linuxfoundation.org>' + +import os +import sys +import argparse +import mailbox +import email +import email.message +import email.utils +import subprocess +import logging +import re + +import requests +import gzip + +logger = logging.getLogger('lorify') + +_DEFAULT_CONFIG = { + 'midmask': 'https://lore.kernel.org/r/%s', + 'linkmask': 'https://lore.kernel.org/r/%s', +} + + +def git_get_command_lines(gitdir, args): + out = git_run_command(gitdir, args) + lines = list() + if out: + for line in out.split('\n'): + if line == '': + continue + lines.append(line) + + return lines + + +def git_run_command(gitdir, args, stdin=None, logstderr=False): + cmdargs = ['git', '--no-pager'] + if gitdir: + cmdargs += ['--git-dir', gitdir] + cmdargs += args + + logger.debug('Running %s' % ' '.join(cmdargs)) + + if stdin: + (output, error) = subprocess.Popen(cmdargs, stdout=subprocess.PIPE, + stdin=subprocess.PIPE, + stderr=subprocess.PIPE).communicate(input=stdin) + else: + (output, error) = subprocess.Popen(cmdargs, stdout=subprocess.PIPE, + stderr=subprocess.PIPE).communicate() + + output = output.strip().decode('utf-8', errors='replace') + + if logstderr and len(error.strip()): + logger.debug('Stderr: %s', error.decode('utf-8', errors='replace')) + + return output + + +def get_config_from_git(): + gitconfig = _DEFAULT_CONFIG + args = ['config', '-z', '--get-regexp', r'lorify\..*'] + out = git_run_command(None, args) + if not out: + return gitconfig + + for line in out.split('\x00'): + if not line: + continue + key, value = line.split('\n', 1) + try: + chunks = key.split('.') + cfgkey = chunks[-1] + gitconfig[cfgkey] = value + except ValueError: + logger.debug('Ignoring git config entry %s', line) + + return gitconfig + + +def get_msgid_from_stdin(): + if not sys.stdin.isatty(): + message = email.message_from_string(sys.stdin.read()) + return message.get('Message-ID', None) + logger.error('Error: pipe a message or pass msgid as parameter') + sys.exit(1) + + +def get_pi_thread_by_msgid(msgid, config, outdir='.'): + # Grab the head from lore, to see where we are redirected + midmask = config['midmask'] % msgid + logger.info('Looking up %s', midmask) + resp = requests.head(midmask) + canonical = resp.headers['Location'].rstrip('/') + resp.close() + t_mbx_url = '%s/t.mbox.gz' % canonical + logger.critical('Grabbing thread from %s', t_mbx_url) + resp = requests.get(t_mbx_url) + t_mbox = gzip.decompress(resp.content) + # Save it into msgid.mbox + savefile = '%s.t.mbx' % msgid + savefile = os.path.join(outdir, savefile) + with open(savefile, 'wb') as fh: + fh.write(t_mbox) + logger.info('Saved thread into %s', savefile) + return savefile + + +def get_plain_part(msg): + # walk until we find the first text/plain part + body = None + for part in msg.walk(): + if part.get_content_type().find('text/plain') < 0: + continue + body = part.get_payload(decode=True) + if body is None: + continue + + body = body.decode('utf-8', errors='replace') + return body + + +def git_add_trailers(mbx, key, trailers): + cmdargs = ['interpret-trailers'] + irt_bytes = mbx.get_bytes(key) + for trailer in trailers: + logger.info(' Adding trailer: %s', trailer) + cmdargs += ['--trailer', trailer] + output = git_run_command(None, args=cmdargs, stdin=irt_bytes) + updated_msg = email.message_from_string(output) + mbx[key] = updated_msg + + +def get_clean_msgid(msg, header='Message-ID'): + msgid = msg.get(header) + if msgid: + msgid = msg.get(header).strip().strip('<>') + return msgid + + +def mbox_to_am(mboxfile, config, outdir='.', wantver=None): + mbx = mailbox.mbox(mboxfile) + count = len(mbx) + logger.info('Analyzing %s messages in the thread', count) + am_kept = list() + msgid_map = dict() + slug = None + sorted_keys = [None, None] + expected_count = 1 + vn = None + for key, msg in mbx.items(): + msgid = get_clean_msgid(msg) + msgid_map[msgid] = key + subject = msg['Subject'].replace('\n', '') + + # Start by looking at prefixes in the subject + matches = re.search(r'\[([^\]]+)\]', subject) + if not matches: + # Ignoring this message + continue + cur_count = 1 + expected_count = 1 + new_vn = 1 + for prefix in matches.groups()[0].split(): + # Does it match \d/\d? + if re.search(r'\d/\d', prefix): + cur, expected = prefix.split('/') + cur_count = int(cur) + expected_count = int(expected) + # Is does it have a v\d? + matches = re.search(r'v(\d+)', prefix) + if matches: + new_vn = int(matches.groups()[0]) + + if new_vn != vn: + if new_vn != 1: + logger.info('Found new series version: v%s', new_vn) + if wantver is None or wantver == new_vn: + # Blow away anything we currently have in sorted_keys + sorted_keys = [None] * (expected_count + 1) + slug = None + vn = new_vn + elif vn is None: + vn = new_vn + + if wantver is not None and wantver != vn: + logger.info(' Ignoring v%s: %s', vn, subject) + continue + + # We use a "slug" for mbox name, based on the subject + if not slug: + slug = re.sub(r'\s*\[.*?\]\s*', '', subject).lower() + slug = re.sub(r'\W+', '_', slug).strip('_') + if vn != 1: + slug = 'v%s_%s' % (vn, slug) + body = get_plain_part(msg) + logger.info(' Processing: %s', subject) + if re.search(r'\D0+/\d+', subject) and sorted_keys[0] is None: + am_kept.append(key) + sorted_keys[cur_count] = key + continue + # skip to the next message + # Do we have a '^---' followed by '^+++' in the body anywhere? + if re.search(r'^---.*\n\+\+\+', body, re.MULTILINE): + # Contains a diff, so keep it and move to next + am_kept.append(key) + sorted_keys[cur_count] = key + continue + # Do we have something that looks like a new trailer? + matches = re.search(r'^\s*([\w-]+: .*<\S+>)\s*$', body, re.MULTILINE) + if not matches: + continue + # Where do we need to stick them? + irt_key = 0 + irt_id = get_clean_msgid(msg, header='In-Reply-To') + if irt_id and irt_id in msgid_map: + irt_key = msgid_map[irt_id] + git_add_trailers(mbx, irt_key, matches.groups()) + + if not len(am_kept): + logger.critical('Did not find any patches to save') + return None + + am_filename = os.path.join(outdir, '%s.mbx' % slug) + + if os.path.exists(am_filename): + os.unlink(am_filename) + am_mbx = mailbox.mbox(am_filename) + at = 0 + logger.critical('---') + logger.critical('Writing %s', am_filename) + have_missing = False + for key in sorted_keys: + if key is None: + if at != 0: + logger.error(' ERROR: missing [%s/%s]!', at, expected_count) + have_missing = True + else: + am_mbx.add(mbx[key]) + subject = mbx[key]['Subject'].replace('\n', '') + logger.critical(' %s', subject) + at += 1 + + logger.critical('---') + if have_missing: + logger.critical('WARNING: Thread incomplete, please check above!') + top_msgid = get_clean_msgid(am_mbx[0]) + linkurl = config['linkmask'] % top_msgid + logger.critical('Link: %s', linkurl) + # Look for base-commit line in the first message + body = get_plain_part(am_mbx[0]) + matches = re.search(r'^\s*base-commit: (\S+)\s*$', body, re.MULTILINE) + if matches: + base_commit = matches.groups()[0] + logger.critical('Base-commit included, you can branch using:') + logger.critical(' git checkout -b %s %s', slug, base_commit) + logger.critical(' git am %s', am_filename) + else: + logger.critical('No base-commit information included, sorry.') + logger.critical(' git checkout -b %s master', slug) + logger.critical(' git am %s', am_filename) + am_mbx.close() + + return am_filename + + +def lorify(cmdargs): + logger.setLevel(logging.DEBUG) + + ch = logging.StreamHandler() + formatter = logging.Formatter('%(message)s') + ch.setFormatter(formatter) + + if cmdargs.quiet: + ch.setLevel(logging.CRITICAL) + else: + ch.setLevel(logging.INFO) + + logger.addHandler(ch) + + if not cmdargs.msgid: + logger.debug('Getting Message-ID from stdin') + msgid = get_msgid_from_stdin() + else: + msgid = cmdargs.msgid + + msgid = msgid.strip('<>') + config = get_config_from_git() + mboxfile = get_pi_thread_by_msgid(msgid, config, outdir=cmdargs.outdir) + if cmdargs.amready: + mbox_to_am(mboxfile, config, outdir=cmdargs.outdir, wantver=cmdargs.version) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument('msgid', nargs='?', + help='Message ID to process, or pipe a raw message') + parser.add_argument('-o', '--outdir', default='.', + help='Output into this directory') + parser.add_argument('-a', '--am-ready', dest='amready', action='store_true', default=False, + help='Make an mbox ready for git am') + parser.add_argument('-v', '--version', type=int, default=None, + help='Get a specific version of the patch/series (use with -a)') + parser.add_argument('-q', '--quiet', action='store_true', default=False, + help='Only output errors to the stdout') + lorify(parser.parse_args()) |