diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2022-10-27 17:59:31 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2022-10-27 17:59:31 -0400 |
commit | bd2bcf440031daccabc0f98fa763691e03f6bb55 (patch) | |
tree | e082482d71ce390ac6ab153eb311eecf0da30883 | |
parent | 71e9f92eefa47a3d9b184e876e0ebbe10dc2461d (diff) | |
download | b4-bd2bcf440031daccabc0f98fa763691e03f6bb55.tar.gz |
Drop --use-project and reimplement --check-newer
New public-inbox instances have much better querying capabilities, so
drop the use of --use-project and reimplement check-newer to use actual
query language.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | b4/__init__.py | 16 | ||||
-rw-r--r-- | b4/command.py | 4 | ||||
-rw-r--r-- | b4/diff.py | 6 | ||||
-rw-r--r-- | b4/ez.py | 1 | ||||
-rw-r--r-- | b4/mbox.py | 238 |
5 files changed, 95 insertions, 170 deletions
diff --git a/b4/__init__.py b/b4/__init__.py index bbaff61..1b644e2 100644 --- a/b4/__init__.py +++ b/b4/__init__.py @@ -2453,9 +2453,6 @@ def get_msgid(cmdargs: argparse.Namespace) -> Optional[str]: logger.debug('Overriding midmask with passed url parameters') config['midmask'] = f'{wantloc.scheme}://{wantloc.netloc}/{chunks[0]}/%s' msgid = urllib.parse.unquote(chunks[1]) - # Infer the project name from the URL, if possible - if chunks[0] != 'r': - cmdargs.useproject = chunks[0] # Handle special case when msgid is prepended by id: or rfc822msgid: if msgid.find('id:') >= 0: msgid = re.sub(r'^\w*id:', '', msgid) @@ -2563,7 +2560,7 @@ def mailsplit_bytes(bmbox: bytes, outdir: str, pipesep: Optional[str] = None) -> return msgs -def get_pi_search_results(query: str, nocache: bool = False): +def get_pi_search_results(query: str, nocache: bool = False) -> Optional[List[email.message.Message]]: config = get_main_config() searchmask = config.get('searchmask') if not searchmask: @@ -2654,7 +2651,7 @@ def get_pi_thread_by_url(t_mbx_url: str, nocache: bool = False): return split_and_dedupe_pi_results(t_mbox, cachedir=cachedir) -def get_pi_thread_by_msgid(msgid: str, useproject: Optional[str] = None, nocache: bool = False, +def get_pi_thread_by_msgid(msgid: str, nocache: bool = False, onlymsgids: Optional[set] = None) -> Optional[list]: qmsgid = urllib.parse.quote_plus(msgid) config = get_main_config() @@ -2663,10 +2660,8 @@ def get_pi_thread_by_msgid(msgid: str, useproject: Optional[str] = None, nocache # In fact, /all/ naming is arbitrary, but for now we are going to # hardcode it to lore.kernel.org settings and maybe make it configurable # in the future, if necessary. - if loc.path.startswith('/all/') and not useproject: - useproject = 'all' - if useproject: - projurl = '%s://%s/%s' % (loc.scheme, loc.netloc, useproject) + if loc.path.startswith('/all/'): + projurl = '%s://%s/all' % (loc.scheme, loc.netloc) else: # Grab the head from lore, to see where we are redirected midmask = config['midmask'] % qmsgid @@ -3410,8 +3405,7 @@ def retrieve_messages(cmdargs: argparse.Namespace) -> Tuple[Optional[str], Optio if 'cherrypick' in cmdargs and cmdargs.cherrypick == '_': # Just that msgid, please pickings = {msgid} - msgs = get_pi_thread_by_msgid(msgid, useproject=cmdargs.useproject, nocache=cmdargs.nocache, - onlymsgids=pickings) + msgs = get_pi_thread_by_msgid(msgid, nocache=cmdargs.nocache, onlymsgids=pickings) if not msgs: return None, msgs else: diff --git a/b4/command.py b/b4/command.py index fd262f7..2271642 100644 --- a/b4/command.py +++ b/b4/command.py @@ -16,8 +16,6 @@ logger = b4.logger def cmd_retrieval_common_opts(sp): sp.add_argument('msgid', nargs='?', help='Message ID to process, or pipe a raw message') - sp.add_argument('-p', '--use-project', dest='useproject', default=None, - help='Use a specific project instead of default (linux-mm, linux-hardening, etc)') sp.add_argument('-m', '--use-local-mbox', dest='localmbox', default=None, help='Instead of grabbing a thread from lore, process this mbox file (or - for stdin)') sp.add_argument('--stdin-pipe-sep', @@ -234,8 +232,6 @@ def setup_parser() -> argparse.ArgumentParser: help='Message ID to process, or pipe a raw message') sp_diff.add_argument('-g', '--gitdir', default=None, help='Operate on this git tree instead of current dir') - sp_diff.add_argument('-p', '--use-project', dest='useproject', default=None, - help='Use a specific project instead of default (linux-mm, linux-hardening, etc)') sp_diff.add_argument('-C', '--no-cache', dest='nocache', action='store_true', default=False, help='Do not use local cache') sp_diff.add_argument('-v', '--compare-versions', dest='wantvers', type=int, default=None, nargs='+', @@ -29,8 +29,6 @@ def diff_same_thread_series(cmdargs): identifier = msgid if wantvers: identifier += '-' + '-'.join([str(x) for x in wantvers]) - if cmdargs.useproject: - identifier += '-' + cmdargs.useproject cachedir = b4.get_cache_file(identifier, suffix='diff.msgs') if os.path.exists(cachedir) and not cmdargs.nocache: @@ -40,11 +38,11 @@ def diff_same_thread_series(cmdargs): with open(os.path.join(cachedir, msg), 'rb') as fh: msgs.append(email.message_from_binary_file(fh)) else: - msgs = b4.get_pi_thread_by_msgid(msgid, useproject=cmdargs.useproject, nocache=cmdargs.nocache) + msgs = b4.get_pi_thread_by_msgid(msgid, nocache=cmdargs.nocache) if not msgs: logger.critical('Unable to retrieve thread: %s', msgid) return - msgs = b4.mbox.get_extra_series(msgs, direction=-1, wantvers=wantvers, useproject=cmdargs.useproject) + msgs = b4.mbox.get_extra_series(msgs, direction=-1, wantvers=wantvers) if os.path.exists(cachedir): shutil.rmtree(cachedir) pathlib.Path(cachedir).mkdir(parents=True) @@ -1264,7 +1264,6 @@ def cmd_send(cmdargs: argparse.Namespace) -> None: sign = False cover_msg = None - # TODO: Need to send obsoleted-by follow-ups, just need to figure out where. send_msgs = list() for commit, msg in patches: if not msg: @@ -27,7 +27,7 @@ import xml.etree.ElementTree import b4 -from typing import Optional, Tuple +from typing import Optional, Tuple, List from string import Template logger = b4.logger @@ -114,22 +114,6 @@ def make_am(msgs, cmdargs, msgid): logger.critical('Total patches: %s', len(am_msgs)) else: logger.info('Total patches: %s (cherrypicked: %s)', len(am_msgs), cmdargs.cherrypick) - # Check if any of the followup-trailers is an Obsoleted-by - if not cmdargs.checknewer: - warned = False - for lmsg in lser.patches: - # Only check cover letter or first patch - if not lmsg or lmsg.counter > 1: - continue - for ltr in list(lmsg.followup_trailers): - if ltr.lname == 'obsoleted-by': - lmsg.followup_trailers.remove(ltr) - if warned: - continue - logger.critical('---') - logger.critical('WARNING: Found an Obsoleted-by follow-up trailer!') - logger.critical(' Rerun with -c to automatically retrieve the new series.') - warned = True if lser.has_cover and lser.patches[0].followup_trailers and not covertrailers: # Warn that some trailers were sent to the cover letter @@ -528,30 +512,31 @@ def save_as_quilt(am_msgs, q_dirname): sfh.write('%s\n' % patch_filename) -def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[int] = None, nocache: bool = False, - useproject: Optional[str] = None) -> list: +def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[int] = None, + nocache: bool = False) -> List[email.message.Message]: base_msg = None latest_revision = None seen_msgids = set() seen_covers = set() - obsoleted = list() + queries = set() for msg in msgs: msgid = b4.LoreMessage.get_clean_msgid(msg) seen_msgids.add(msgid) lsub = b4.LoreSubject(msg['Subject']) - if direction > 0 and lsub.reply: - # Does it have an Obsoleted-by: trailer? - rmsg = b4.LoreMessage(msg) - trailers, mismatches = rmsg.get_trailers() - for ltr in trailers: - if ltr.lname == 'obsoleted-by': - for chunk in ltr.value.split('/'): - if chunk.find('@') > 0 and chunk not in seen_msgids: - obsoleted.append(chunk) - break + # Ignore patches above 1 if lsub.counter > 1: continue + + if not lsub.reply: + payload = msg.get_payload() + if isinstance(payload, str): + matches = re.search(r'^change-id:\s+(\S+)', payload, flags=re.I | re.M) + if matches: + logger.debug('Found change-id %s', matches.groups()[0]) + q = 'nq:"change-id: %s"' % matches.groups()[0] + queries.add(q) + if base_msg is not None: logger.debug('Current base_msg: %s', base_msg['Subject']) logger.debug('Checking the subject on %s', lsub.full_subject) @@ -565,132 +550,85 @@ def get_extra_series(msgs: list, direction: int = 1, wantvers: Optional[int] = N # A patch/series without a cover letter base_msg = msg - if base_msg is None: - logger.debug('Could not find cover of 1st patch in mbox') + if not queries and base_msg is None: return msgs - config = b4.get_main_config() - loc = urllib.parse.urlparse(config['midmask']) - if not useproject: - useproject = 'all' - - listarc = f'{loc.scheme}://{loc.netloc}/{useproject}/' - # Make sure it exists - queryurl = f'{listarc}_/text/config/raw' - session = b4.get_requests_session() - resp = session.get(queryurl) - if not resp.status_code == 200: - logger.info('Unable to figure out list archive location') + # Get subject info from base_msg again + lsub = b4.LoreSubject(base_msg['Subject']) + if not len(lsub.prefixes): + logger.debug('Not checking for new revisions: no prefixes on the cover letter.') + return msgs + if direction < 0 and latest_revision <= 1: + logger.debug('This is the earliest version of the series') return msgs + if direction < 0 and wantvers is None: + wantvers = [latest_revision - 1] + + fromeml = email.utils.getaddresses(base_msg.get_all('from', []))[0][1] + msgdate = email.utils.parsedate_tz(str(base_msg['Date'])) + q = 's:"%s" AND f:"%s"' % (lsub.subject.replace('"', ''), fromeml) + queries.add(q) + startdate = time.strftime('%Y%m%d', msgdate[:9]) + if direction > 0: + logger.critical('Checking for newer revisions') + datelim = 'd:%s..' % startdate + else: + logger.critical('Checking for older revisions') + datelim = 'd:..%s' % startdate - nt_msgs = list() - if len(obsoleted): - for nt_msgid in obsoleted: - logger.info('Obsoleted-by: %s', nt_msgid) - # Grab this thread from remote - t_mbx_url = '%s/%s/t.mbox.gz' % (listarc.rstrip('/'), nt_msgid) - potentials = b4.get_pi_thread_by_url(t_mbx_url, nocache=nocache) - if potentials: - potentials = b4.get_strict_thread(potentials, nt_msgid) - nt_msgs += potentials - logger.info(' Added %s messages from that thread', len(potentials)) - else: - logger.info(' No messages added from that thread') + q = '(%s) AND %s' % (' OR '.join(queries), datelim) + q_msgs = b4.get_pi_search_results(q, nocache=nocache) + if not q_msgs: + return msgs - else: - # Get subject info from base_msg again - lsub = b4.LoreSubject(base_msg['Subject']) - if not len(lsub.prefixes): - logger.debug('Not checking for new revisions: no prefixes on the cover letter.') - return msgs - if direction < 0 and latest_revision <= 1: - logger.debug('This is the latest version of the series') - return msgs - if direction < 0 and wantvers is None: - wantvers = [latest_revision - 1] - - base_msgid = b4.LoreMessage.get_clean_msgid(base_msg) - fromeml = email.utils.getaddresses(base_msg.get_all('from', []))[0][1] - msgdate = email.utils.parsedate_tz(str(base_msg['Date'])) - startdate = time.strftime('%Y%m%d', msgdate[:9]) - if direction > 0: - q = 's:"%s" AND f:"%s" AND d:%s..' % (lsub.subject.replace('"', ''), fromeml, startdate) - queryurl = '%s?%s' % (listarc, urllib.parse.urlencode({'q': q, 'x': 'A', 'o': '-1'})) - logger.critical('Checking for newer revisions on %s', listarc) - else: - q = 's:"%s" AND f:"%s" AND d:..%s' % (lsub.subject.replace('"', ''), fromeml, startdate) - queryurl = '%s?%s' % (listarc, urllib.parse.urlencode({'q': q, 'x': 'A', 'o': '1'})) - logger.critical('Checking for older revisions on %s', listarc) - - logger.debug('Query URL: %s', queryurl) - session = b4.get_requests_session() - resp = session.get(queryurl) - # try to parse it - try: - tree = xml.etree.ElementTree.fromstring(resp.content) - except xml.etree.ElementTree.ParseError as ex: - logger.debug('Unable to parse results, ignoring: %s', ex) - resp.close() - return msgs - resp.close() - ns = {'atom': 'http://www.w3.org/2005/Atom'} - entries = tree.findall('atom:entry', ns) - seen_urls = set() - - for entry in entries: - title = entry.find('atom:title', ns).text - lsub = b4.LoreSubject(title) - if lsub.reply or lsub.counter > 1: - logger.debug('Ignoring result (not interesting): %s', title) - continue - link = entry.find('atom:link', ns).get('href') - if direction > 0 and lsub.revision <= latest_revision: - logger.debug('Ignoring result (not new revision): %s', title) - continue - elif direction < 0 and lsub.revision >= latest_revision: - logger.debug('Ignoring result (not old revision): %s', title) - continue - elif direction < 0 and lsub.revision not in wantvers: - logger.debug('Ignoring result (not revision we want): %s', title) - continue - if link.find('/%s/' % base_msgid) > 0: - logger.debug('Ignoring result (same thread as ours):%s', title) - continue - if lsub.revision == 1 and lsub.revision == latest_revision: - # Someone sent a separate message with an identical title but no new vX in the subject line - if direction > 0: - # It's *probably* a new revision. - logger.debug('Likely a new revision: %s', title) - else: - # It's *probably* an older revision. - logger.debug('Likely an older revision: %s', title) - elif direction > 0 and lsub.revision > latest_revision: - logger.debug('Definitely a new revision [v%s]: %s', lsub.revision, title) - elif direction < 0 and lsub.revision < latest_revision: - logger.debug('Definitely an older revision [v%s]: %s', lsub.revision, title) + seen_revisions = dict() + for q_msg in q_msgs: + q_msgid = b4.LoreMessage.get_clean_msgid(q_msg) + lsub = b4.LoreSubject(q_msg.get('subject')) + if q_msgid in seen_msgids: + logger.debug('Skipping %s: already have it', lsub.full_subject) + continue + if lsub.reply: + # These will get sorted out later + logger.debug('Adding reply: %s', lsub.full_subject) + msgs.append(q_msg) + seen_msgids.add(q_msgid) + continue + + if direction > 0 and lsub.revision <= latest_revision: + logger.debug('Ignoring result (not new revision): %s', lsub.full_subject) + continue + elif direction < 0 and lsub.revision >= latest_revision: + logger.debug('Ignoring result (not old revision): %s', lsub.full_subject) + continue + elif direction < 0 and lsub.revision not in wantvers: + logger.debug('Ignoring result (not revision we want): %s', lsub.full_subject) + continue + + if lsub.revision == 1 and lsub.revision == latest_revision: + # Someone sent a separate message with an identical title but no new vX in the subject line + if direction > 0: + # It's *probably* a new revision. + logger.debug('Likely a new revision: %s', lsub.full_subject) else: - logger.debug('No idea what this is: %s', title) - continue - t_mbx_url = '%st.mbox.gz' % link - if t_mbx_url in seen_urls: - continue - seen_urls.add(t_mbx_url) - logger.info('New revision: %s', title) - potentials = b4.get_pi_thread_by_url(t_mbx_url, nocache=nocache) - if potentials: - nt_msgs += potentials - logger.info(' Added %s messages from that thread', len(potentials)) - - # Append all of these to the existing mailbox - for nt_msg in nt_msgs: - nt_msgid = b4.LoreMessage.get_clean_msgid(nt_msg) - if nt_msgid in seen_msgids: - logger.debug('Duplicate message, skipping') + # It's *probably* an older revision. + logger.debug('Likely an older revision: %s', lsub.full_subject) + elif direction > 0 and lsub.revision > latest_revision: + logger.debug('Definitely a new revision [v%s]: %s', lsub.revision, lsub.full_subject) + elif direction < 0 and lsub.revision < latest_revision: + logger.debug('Definitely an older revision [v%s]: %s', lsub.revision, lsub.full_subject) + else: + logger.debug('No idea what this is: %s', lsub.subject) continue - nt_subject = re.sub(r'\s+', ' ', nt_msg['Subject']) - logger.debug('Adding: %s', nt_subject) - msgs.append(nt_msg) - seen_msgids.add(nt_msgid) + if lsub.revision not in seen_revisions: + seen_revisions[lsub.revision] = 0 + seen_revisions[lsub.revision] += 1 + logger.debug('Adding: %s', lsub.full_subject) + msgs.append(q_msg) + seen_msgids.add(q_msgid) + + for rev, count in seen_revisions.items(): + logger.info(' Added from v%s: %s patches', rev, count) return msgs @@ -724,7 +662,7 @@ def main(cmdargs): sys.exit(1) if len(msgs) and cmdargs.checknewer and b4.can_network: - msgs = get_extra_series(msgs, direction=1, useproject=cmdargs.useproject) + msgs = get_extra_series(msgs, direction=1, nocache=cmdargs.nocache) if cmdargs.subcmd in ('am', 'shazam'): make_am(msgs, cmdargs, msgid) |