diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-07-24 16:16:50 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-07-24 16:16:50 -0400 |
commit | b1925c535ae3e3507d38f96d9bda2c569fffbaed (patch) | |
tree | 9a53cf4c90642a21718f1d607e4beab20d86e6ca | |
parent | b482a84bb03a04b3faba5ea76a2eb247bb38b3a9 (diff) | |
download | grokmirror-b1925c535ae3e3507d38f96d9bda2c569fffbaed.tar.gz |
Implement baselines and islandcores
This implements core.alternateRefsPrefixes and pack.islandCore features
when using objstore repos. See the example config file for documentation
on when and how to use them.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | grokmirror.conf | 29 | ||||
-rw-r--r-- | grokmirror/__init__.py | 2 | ||||
-rwxr-xr-x | grokmirror/fsck.py | 48 |
3 files changed, 65 insertions, 14 deletions
diff --git a/grokmirror.conf b/grokmirror.conf index 4ae3c8e..365c1d8 100644 --- a/grokmirror.conf +++ b/grokmirror.conf @@ -36,13 +36,13 @@ loglevel = info # Grokmirror will set up an object storage repository and fetch all refs from # both repositories: # _alternates/[random-guid-name].git -# refs/virtual/[sha1-of-foo/bar/linux.git]/heads/master -# refs/virtual/[sha1-of-foo/bar/linux.git]/heads/devbranch -# refs/virtual/[sha1-of-foo/bar/linux.git]/tags/v5.0-rc3 +# refs/virtual/[sha1-of-foo/bar/linux.git:12]/heads/master +# refs/virtual/[sha1-of-foo/bar/linux.git:12]/heads/devbranch +# refs/virtual/[sha1-of-foo/bar/linux.git:12]/tags/v5.0-rc3 # ... -# refs/virtual/[sha1-of-baz/quux/linux.git]/heads/master -# refs/virtual/[sha1-of-baz/quux/linux.git]/heads/devbranch -# refs/virtual/[sha1-of-baz/quux/linux.git]/tags/v5.0-rc3 +# refs/virtual/[sha1-of-baz/quux/linux.git:12]/heads/master +# refs/virtual/[sha1-of-baz/quux/linux.git:12]/heads/devbranch +# refs/virtual/[sha1-of-baz/quux/linux.git:12]/tags/v5.0-rc3 # ... # # This will dramatically improve storage on disk, as original repositories will be @@ -62,7 +62,20 @@ objstore = ${toplevel}/_alternates # from these repos into refs/virtual/*. # # Leave blank if you don't have any private repos (or don't offer a web UI). -private = */private/* +#private = */private/* +# +# If you have a lot of forks using the same objstore repo, you may end up with +# thousands of refs being negotiated during each remote update. This tends to +# result in higher load and bigger negotiation transfers. Setting the "baselines" option +# allows you to designate a repo that is likely to have most of the relevant objects +# and ignore the rest of the objstore refs. This is done using the core.alternateRefsPrefixes +# feature (see git-config). +baselines = */kernel/git/stable/* +# +# Objstore repos are repacked with delta island support (see git-config), but if you +# have one repo that is a lot more likely to be cloned than all the other ones, you can +# designate it as "islandCore", which will give it priority when creating packs. +islandcores = */kernel/git/torvalds/* # Used by grok-pull for now, but may be shared with others in the future [remote] @@ -87,7 +100,7 @@ projectslist = ${core:toplevel}/projects.list # When generating projects.list, start at this subpath instead # of at the toplevel. Useful when mirroring kernel or when generating # multiple gitweb/cgit configurations for the same tree. -projectslist_trimtop = /pub/scm/ +projectslist_trimtop = # # When generating projects.list, also create entries for symlinks. # Otherwise we assume they are just legacy and keep them out of diff --git a/grokmirror/__init__.py b/grokmirror/__init__.py index 1b845bf..cb66b2a 100644 --- a/grokmirror/__init__.py +++ b/grokmirror/__init__.py @@ -948,5 +948,3 @@ def get_repack_level(obj_info, max_loose_objects=1200, max_packs=20, pc_loose_ob needs_repack = 1 return needs_repack - - diff --git a/grokmirror/fsck.py b/grokmirror/fsck.py index c079e96..5714a3e 100755 --- a/grokmirror/fsck.py +++ b/grokmirror/fsck.py @@ -25,6 +25,7 @@ import random import datetime import shutil import gc +import fnmatch from fcntl import lockf, LOCK_EX, LOCK_UN, LOCK_NB @@ -526,7 +527,6 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False, fetched_obstrepos = set() obst_changes = False analyzed = 0 - sibling_strategy = config['core'].get('objstore_sibling_strategy', 'loose') logger.info('Analyzing %s (%s repos)', toplevel, len(status)) for fullpath in list(status): analyzed += 1 @@ -757,6 +757,8 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False, analyzed = 0 logger.info('Analyzing %s (%s repos)', obstdir, len(obstrepos)) + baselines = [x.strip() for x in config['core'].get('baselines', '').split('\n')] + islandcores = [x.strip() for x in config['core'].get('islandcores', '').split('\n')] for obstrepo in obstrepos: analyzed += 1 logger.debug('Processing objstore repo: %s', os.path.basename(obstrepo)) @@ -818,8 +820,8 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False, # Not an else, because the previous step may have migrated things if obstrepo not in amap or not len(amap[obstrepo]): obst_changes = True - # XXX: Theoretically, nothing should have cloned a new repo while we were migrating, because - # they should have found a better candidate as well. + # XXX: Is there a possible race condition here if grok-pull cloned a new repo + # while we were migrating this one? logger.info('%s: deleting (no longer used by anything)', os.path.basename(obstrepo)) if obstrepo in amap: amap.pop(obstrepo) @@ -835,6 +837,8 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False, my_remotes = grokmirror.list_repo_remotes(obstrepo, withurl=True) # Use the first child repo as our "reference" entry in manifest refrepo = None + set_baseline = False + set_islandcore = False for virtref, childpath in my_remotes: # Is it still relevant? if childpath not in amap[obstrepo]: @@ -865,6 +869,42 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False, if gitdir not in manifest: continue + # Do we need to set any alternateRefsPrefixes? + if not set_baseline: + is_baseline = False + for baseline in baselines: + # Does this repo match a baseline + if fnmatch.fnmatch(gitdir, baseline): + is_baseline = True + break + if is_baseline: + set_baseline = True + refpref = 'refs/virtual/%s/heads/' % virtref + # Go through all remotes and set their alternateRefsPrefixes + for s_virtref, s_childpath in my_remotes: + # is it already set to that? + entries = grokmirror.get_config_from_git(s_childpath, r'core\.alternate*') + if entries.get('alternaterefsprefixes') != refpref: + s_gitdir = '/' + os.path.relpath(s_childpath, toplevel) + logger.info(' reconfig: %s (baseline to %s)', s_gitdir, virtref) + grokmirror.set_git_config(s_childpath, 'core.alternateRefsPrefixes', refpref) + + # Do we need to set islandCore? + if not set_islandcore: + is_islandcore = False + for islandcore in islandcores: + # Does this repo match a baseline + if fnmatch.fnmatch(gitdir, islandcore): + is_islandcore = True + break + if is_islandcore: + set_islandcore = True + # is it already set to that? + entries = grokmirror.get_config_from_git(obstrepo, r'pack\.island*') + if entries.get('islandcore') != virtref: + logger.info(' reconfig: %s (islandCore to %s)', os.path.basename(obstrepo), virtref) + grokmirror.set_git_config(obstrepo, 'pack.islandCore', virtref) + if refrepo is None: # Legacy "reference=" setting in manifest refrepo = gitdir @@ -874,7 +914,7 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False, manifest[gitdir]['forkgroup'] = os.path.basename(obstrepo[:-4]) - if obstrepo not in status: + if obstrepo not in status or set_islandcore: # We don't use obstrepo fingerprints, so we set it to None status[obstrepo] = { 'lastcheck': 'never', |