diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2022-01-25 09:09:42 -0500 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2022-01-25 09:09:42 -0500 |
commit | 3da44d217d546e8b7fe5b00f905ca38913255db2 (patch) | |
tree | 1c7a0615a8adc224c55b041ae3b0c6f871c089e9 | |
parent | 41cfb037967b23f873979dac3de7d018397ccc7c (diff) | |
download | korg-helpers-3da44d217d546e8b7fe5b00f905ca38913255db2.tar.gz |
patchwork-bot: add ability to match by msgid
Patchwork API 1.2 allows us to match by msgid, so update patchwork-bot
to use Link: trailers for additional matching when we are unable to
match by patchwork hash.
Additionally, reworks caching that wasn't actually saving us any time
and drops the use of git patch-id that we used in the absence of msgid
matching.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rwxr-xr-x | git-patchwork-bot.py | 157 |
1 files changed, 70 insertions, 87 deletions
diff --git a/git-patchwork-bot.py b/git-patchwork-bot.py index ebf4bd3..5dcf2e9 100755 --- a/git-patchwork-bot.py +++ b/git-patchwork-bot.py @@ -48,6 +48,7 @@ charset.add_charset('utf-8', charset.SHORTEST) __VERSION__ = '2.0' DB_VERSION = 1 REST_API_VERSION = '1.2' +LORE_RE = re.compile(r'^\s*Link:\s+\S+://(?:lore|lkml)\.kernel\.org\S*/([^@]+@[^@\s/]+)$', flags=re.M | re.I) HUNK_RE = re.compile(r'^@@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? @@') FILENAME_RE = re.compile(r'^(---|\+\+\+) (\S+)') REST_PER_PAGE = 100 @@ -62,6 +63,7 @@ CACHEDIR = os.path.expanduser('~/.cache/git-patchwork-bot') _project_cache = dict() _server_cache = dict() +_rev_cache = dict() logger = logging.getLogger('gitpwcron') @@ -226,7 +228,7 @@ class Restmaker: def get_patchwork_patches_by_project_hash(rm: Restmaker, project: int, pwhash: str) -> List[int]: - logger.debug('Looking up %s', pwhash) + logger.debug('Looking up hash=%s', pwhash) params = [ ('project', project), ('archived', 'false'), @@ -240,6 +242,21 @@ def get_patchwork_patches_by_project_hash(rm: Restmaker, project: int, pwhash: s return [patch['id'] for patch in patches] +def get_patchwork_patches_by_project_msgid(rm: Restmaker, project: int, msgid: str) -> List[int]: + logger.debug('Looking up msgid=%s', msgid) + params = [ + ('project', project), + ('archived', 'false'), + ('msgid', msgid), + ] + patches = rm.get_patches_list(params) + if not patches: + logger.debug('No match for msgid=%s', msgid) + return list() + + return [patch['id'] for patch in patches] + + def get_patchwork_pull_requests_by_project(rm: Restmaker, project: int, fromstate: List[str]) -> Set[Tuple]: params = [ ('project', project), @@ -338,19 +355,6 @@ def db_init_common_sqlite_db(c: sqlite3.Cursor) -> None: db_save_meta(c) -def db_init_cache_sqlite_db(c: sqlite3.Cursor) -> None: - logger.info('Initializing new sqlite3 db with metadata version %s', DB_VERSION) - db_init_common_sqlite_db(c) - c.execute(''' - CREATE TABLE revs ( - rev TEXT NOT NULL, - patchwork_id TEXT NOT NULL, - git_id TEXT NOT NULL, - created DATE - )''') - c.execute('''CREATE UNIQUE INDEX idx_rev ON revs(rev)''') - - def db_init_pw_sqlite_db(c: sqlite3.Cursor) -> None: logger.info('Initializing new sqlite3 db with metadata version %s', DB_VERSION) db_init_common_sqlite_db(c) @@ -462,8 +466,8 @@ def git_get_new_revs(gitdir: str, db_heads: List[Tuple[str, str]], git_heads: Li return newrevs -def git_get_rev_diff(gitdir: str, rev: str) -> str: - args = ['diff', '%s~..%s' % (rev, rev)] +def git_get_rev_info(gitdir: str, rev: str) -> str: + args = ['show', rev] return git_run_command(gitdir, args) @@ -528,8 +532,8 @@ def listify(obj: Union[str, list, None]) -> list: return [obj] -def send_summary(serieslist: List[dict], committers: Dict[int, str], to_state: str, refname: str, pname: str, - rs: Dict[str, str], hs: Dict[str, str]) -> str: +def send_summary(serieslist: List[dict], committers: Dict[int, str], to_state: str, refname: str, revs: Dict[int, str], + pname: str, rs: Dict[str, str], hs: Dict[str, str]) -> str: logger.info('Preparing summary') # we send summaries by project, so the project name is going to be all the same @@ -567,8 +571,10 @@ def send_summary(serieslist: List[dict], committers: Dict[int, str], to_state: s if len(patches) > 1: summary.append(' Patches: %s' % patches[0].get('name')) for patch in patches[1:]: - count += 1 - summary.append(' %s' % patch.get('name')) + pid = patch.get('id') + if pid in revs: + count += 1 + summary.append(' %s' % patch.get('name')) summary.append('') bodytpt = Template(CONFIG['templates']['summary']) @@ -733,9 +739,12 @@ def notify_submitters(serieslist: List[dict], committers: Dict[int, str], refnam for patch in patches: summary.append(' - %s' % patch.get('name')) pid = patch.get('id') - committer = committers.get(pid, 'unknown committer') - if 'commitlink' in rs: - summary.append(' %s' % (rs['commitlink'] % revs[pid])) + if pid in revs: + committer = committers.get(pid, 'unknown committer') + if 'commitlink' in rs: + summary.append(' %s' % (rs['commitlink'] % revs[pid])) + else: + summary.append(' (no matching commit)') bodytpt = Template(CONFIG['templates']['submitter']) params = { @@ -1049,6 +1058,8 @@ def housekeeping(pname: str) -> None: def pwrun(repo: str, rsettings: Dict[str, Union[str, list, dict]]) -> None: + global _rev_cache + git_heads = git_get_repo_heads(repo, branch=rsettings.get('branch', '--heads')) if not git_heads: logger.info('Could not get the latest ref in %s', repo) @@ -1097,20 +1108,11 @@ def pwrun(repo: str, rsettings: Dict[str, Union[str, list, dict]]) -> None: logger.debug('No new revs in %s', repo) return - rcdbpath = os.path.join(CACHEDIR, 'revcache.db') - rcdb_exists = os.path.isfile(rcdbpath) - rcdbconn = sqlite3.connect(rcdbpath, sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES) - rc = rcdbconn.cursor() - - if not rcdb_exists: - db_init_cache_sqlite_db(rc) - else: - rc.execute('''DELETE FROM revs WHERE created > datetime('now', ?)''', ('-30 days',)) + logger.info('Processing: %s', repo) count = 0 for pname, psettings in rsettings['projects'].items(): rpwhashes = dict() - rgithashes = dict() wantstates = list() have_prs = False for refname, revlines in newrevs.items(): @@ -1126,28 +1128,28 @@ def pwrun(repo: str, rsettings: Dict[str, Union[str, list, dict]]) -> None: continue rpwhashes[refname] = set() - logger.debug('Looking at %s', refname) for rev, logline, committer in revlines: if logline.find('Merge') == 0 and logline.find('://') > 0: have_prs = True - rpwhashes[refname].add((rev, logline, committer, None)) + rpwhashes[refname].add((rev, logline, committer, None, None)) continue - hits = rc.execute('SELECT patchwork_id, git_id FROM revs WHERE rev=?', (rev,)).fetchall() - if not hits: - diff = git_get_rev_diff(repo, rev) - pwhash = get_patchwork_hash(diff) - git_patch_id = git_get_patch_id(diff) - if pwhash and git_patch_id: - rc.execute('''INSERT INTO revs - VALUES (?, ?, ?, datetime('now'))''', (rev, pwhash, git_patch_id)) - else: - pwhash = hits[0][0] - git_patch_id = hits[0][1] - - rgithashes[git_patch_id] = rev - if pwhash: - rpwhashes[refname].add((rev, logline, committer, pwhash)) + if rev not in _rev_cache: + info = git_get_rev_info(repo, rev) + pwhash = get_patchwork_hash(info) + if not pwhash: + # Theoretically, should never happen? + logger.debug('Skipping %s (no pwhash)', rev) + continue + msgid = None + if info.find('Link:') > 0: + lore_match = LORE_RE.search(info) + if lore_match: + msgid = lore_match.group(1) + logger.debug('Msgid for %s: %s', rev, msgid) + _rev_cache[rev] = (rev, logline, committer, pwhash, msgid) + + rpwhashes[refname].add(_rev_cache[rev]) if not wantstates: wantstates = ['new', 'under-review'] @@ -1159,14 +1161,14 @@ def pwrun(repo: str, rsettings: Dict[str, Union[str, list, dict]]) -> None: project_id = project['id'] if have_prs: - logger.info('PR merge commit found, loading up pull requests') + logger.info(' PR merge commit found, loading up pull requests') # Find all from states we're interested in prs = get_patchwork_pull_requests_by_project(rm, project_id, wantstates) else: prs = set() for refname, hashpairs in rpwhashes.items(): - logger.info('Analyzing %d revisions in %s', len(hashpairs), refname) + logger.info(' Analyzing %d revisions in %s', len(hashpairs), refname) # Get our settings hsettings = None for wanthead, hsettings in psettings.items(): @@ -1183,7 +1185,7 @@ def pwrun(repo: str, rsettings: Dict[str, Union[str, list, dict]]) -> None: # We create patch_id->rev mapping first revs = dict() committers = dict() - for rev, logline, committer, pwhash in hashpairs: + for rev, logline, committer, pwhash, msgid in hashpairs: if have_prs and pwhash is None: if logline.find(' of ') > 0: matches = re.search(r'Merge\s\S+\s[\'\"](\S+)[\'\"]\sof\s(\w+://\S+)', logline) @@ -1204,27 +1206,31 @@ def pwrun(repo: str, rsettings: Dict[str, Union[str, list, dict]]) -> None: for pull_host, pull_refname, patch_id in prs: if pull_host.find(m_host) > -1 and pull_refname.find(m_refname) > -1: - logger.info('Found matching pull request in %s (id: %s)', logline, patch_id) + logger.info(' Found matching pull request in %s (id: %s)', logline, patch_id) revs[patch_id] = rev committers[patch_id] = committer break continue # Do we have a matching hash on the server? - logger.debug('Matching: %s', logline) - # Theoretically, should only return one, but we play it safe and - # handle for multiple matches. + logger.info(' Matching by hash: %s (%s)', pwhash, logline) patch_ids = get_patchwork_patches_by_project_hash(rm, project_id, pwhash) + if not patch_ids and msgid: + # Match by message-id, if we have it + logger.info(' Matching by msgid: %s (%s)', msgid, logline) + patch_ids = get_patchwork_patches_by_project_msgid(rm, project_id, msgid) if not patch_ids: + logger.info(' No match for: %s', logline) continue for patch_id in patch_ids: + logger.info(' Matched: %s', patch_id) pdata = rm.get_patch(patch_id) if not pdata: - logger.debug('Ignoring patch_id=%d due to REST error', patch_id) + logger.info(' Ignoring due to REST error') continue if pdata.get('state') not in fromstate: - logger.debug('Ignoring patch_id=%d due to state=%s', patch_id, pdata.get('state')) + logger.info(' Ignoring due to state=%s', pdata.get('state')) continue revs[patch_id] = rev committers[patch_id] = committer @@ -1233,49 +1239,29 @@ def pwrun(repo: str, rsettings: Dict[str, Union[str, list, dict]]) -> None: updated_series = list() done_patches = set() for patch_id in list(revs.keys()): + logger.info(' Processing: %s', patch_id) if patch_id in done_patches: # we've already updated this series - logger.debug('Already applied %d as part of previous series', patch_id) + logger.info(' Already applied as part of previous series') continue pdata = rm.get_patch(patch_id) serieslist = pdata.get('series', None) if not serieslist: # This is probably from the time before patchwork-2 migration. # We'll just ignore those. - logger.debug('A patch without an associated series? Woah.') + logger.info(' A patch without an associated series? Woah.') continue for series in serieslist: series_id = series.get('id') sdata = rm.get_series(series_id) - if not sdata.get('received_all'): - logger.debug('Series %d is incomplete, skipping', series_id) - continue update_queue = list() for spatch in sdata.get('patches'): spatch_id = spatch.get('id') - spdata = rm.get_patch(spatch_id) - rev = None if spatch_id in revs: rev = revs[spatch_id] - else: - # try to use the more fuzzy git-patch-id matching - spatch_hash = git_get_patch_id(spdata.get('diff')) - if spatch_hash is not None and spatch_hash in rgithashes: - logger.debug('Matched via git-patch-id') - rev = rgithashes[spatch_hash] - revs[spatch_id] = rev - # same committer - committers[spatch_id] = committers[patch_id] - - if rev is None: - logger.debug('Could not produce precise match for %s', spatch_id) - logger.debug('Will not update series: %s', sdata.get('name')) - update_queue = list() - break - - update_queue.append((spatch.get('name'), spatch_id, to_state, rev)) + update_queue.append((spatch.get('name'), spatch_id, to_state, rev)) if update_queue: logger.info('Marking series "%s": %s', to_state, sdata.get('name')) @@ -1290,7 +1276,7 @@ def pwrun(repo: str, rsettings: Dict[str, Union[str, list, dict]]) -> None: logger.info(' Updating (DRYRUN): %s', sname) if len(updated_series) and hsettings.get('send_summary', False): - send_summary(updated_series, committers, to_state, refname, pname, rsettings, hsettings) + send_summary(updated_series, committers, to_state, refname, revs, pname, rsettings, hsettings) if len(updated_series) and hsettings.get('notify_submitter', False): notify_submitters(updated_series, committers, refname, revs, pname, rsettings, hsettings) @@ -1299,8 +1285,6 @@ def pwrun(repo: str, rsettings: Dict[str, Union[str, list, dict]]) -> None: else: logger.info('No patches updated on %s', rm.server) - rcdbconn.commit() - rcdbconn.close() if not DRYRUN: db_save_repo_heads(c, git_heads) dbconn.commit() @@ -1324,7 +1308,6 @@ def check_repos() -> None: if not os.path.isdir(fullpath) and not settings.get('branch'): logger.info('Worktree must specify "branch" setting: %s', repo) continue - logger.info('Processing: %s', repo) pwrun(fullpath, settings) |