aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-07-24 16:16:50 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-07-24 16:16:50 -0400
commitb1925c535ae3e3507d38f96d9bda2c569fffbaed (patch)
tree9a53cf4c90642a21718f1d607e4beab20d86e6ca
parentb482a84bb03a04b3faba5ea76a2eb247bb38b3a9 (diff)
downloadgrokmirror-b1925c535ae3e3507d38f96d9bda2c569fffbaed.tar.gz
Implement baselines and islandcores
This implements core.alternateRefsPrefixes and pack.islandCore features when using objstore repos. See the example config file for documentation on when and how to use them. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--grokmirror.conf29
-rw-r--r--grokmirror/__init__.py2
-rwxr-xr-xgrokmirror/fsck.py48
3 files changed, 65 insertions, 14 deletions
diff --git a/grokmirror.conf b/grokmirror.conf
index 4ae3c8e..365c1d8 100644
--- a/grokmirror.conf
+++ b/grokmirror.conf
@@ -36,13 +36,13 @@ loglevel = info
# Grokmirror will set up an object storage repository and fetch all refs from
# both repositories:
# _alternates/[random-guid-name].git
-# refs/virtual/[sha1-of-foo/bar/linux.git]/heads/master
-# refs/virtual/[sha1-of-foo/bar/linux.git]/heads/devbranch
-# refs/virtual/[sha1-of-foo/bar/linux.git]/tags/v5.0-rc3
+# refs/virtual/[sha1-of-foo/bar/linux.git:12]/heads/master
+# refs/virtual/[sha1-of-foo/bar/linux.git:12]/heads/devbranch
+# refs/virtual/[sha1-of-foo/bar/linux.git:12]/tags/v5.0-rc3
# ...
-# refs/virtual/[sha1-of-baz/quux/linux.git]/heads/master
-# refs/virtual/[sha1-of-baz/quux/linux.git]/heads/devbranch
-# refs/virtual/[sha1-of-baz/quux/linux.git]/tags/v5.0-rc3
+# refs/virtual/[sha1-of-baz/quux/linux.git:12]/heads/master
+# refs/virtual/[sha1-of-baz/quux/linux.git:12]/heads/devbranch
+# refs/virtual/[sha1-of-baz/quux/linux.git:12]/tags/v5.0-rc3
# ...
#
# This will dramatically improve storage on disk, as original repositories will be
@@ -62,7 +62,20 @@ objstore = ${toplevel}/_alternates
# from these repos into refs/virtual/*.
#
# Leave blank if you don't have any private repos (or don't offer a web UI).
-private = */private/*
+#private = */private/*
+#
+# If you have a lot of forks using the same objstore repo, you may end up with
+# thousands of refs being negotiated during each remote update. This tends to
+# result in higher load and bigger negotiation transfers. Setting the "baselines" option
+# allows you to designate a repo that is likely to have most of the relevant objects
+# and ignore the rest of the objstore refs. This is done using the core.alternateRefsPrefixes
+# feature (see git-config).
+baselines = */kernel/git/stable/*
+#
+# Objstore repos are repacked with delta island support (see git-config), but if you
+# have one repo that is a lot more likely to be cloned than all the other ones, you can
+# designate it as "islandCore", which will give it priority when creating packs.
+islandcores = */kernel/git/torvalds/*
# Used by grok-pull for now, but may be shared with others in the future
[remote]
@@ -87,7 +100,7 @@ projectslist = ${core:toplevel}/projects.list
# When generating projects.list, start at this subpath instead
# of at the toplevel. Useful when mirroring kernel or when generating
# multiple gitweb/cgit configurations for the same tree.
-projectslist_trimtop = /pub/scm/
+projectslist_trimtop =
#
# When generating projects.list, also create entries for symlinks.
# Otherwise we assume they are just legacy and keep them out of
diff --git a/grokmirror/__init__.py b/grokmirror/__init__.py
index 1b845bf..cb66b2a 100644
--- a/grokmirror/__init__.py
+++ b/grokmirror/__init__.py
@@ -948,5 +948,3 @@ def get_repack_level(obj_info, max_loose_objects=1200, max_packs=20, pc_loose_ob
needs_repack = 1
return needs_repack
-
-
diff --git a/grokmirror/fsck.py b/grokmirror/fsck.py
index c079e96..5714a3e 100755
--- a/grokmirror/fsck.py
+++ b/grokmirror/fsck.py
@@ -25,6 +25,7 @@ import random
import datetime
import shutil
import gc
+import fnmatch
from fcntl import lockf, LOCK_EX, LOCK_UN, LOCK_NB
@@ -526,7 +527,6 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False,
fetched_obstrepos = set()
obst_changes = False
analyzed = 0
- sibling_strategy = config['core'].get('objstore_sibling_strategy', 'loose')
logger.info('Analyzing %s (%s repos)', toplevel, len(status))
for fullpath in list(status):
analyzed += 1
@@ -757,6 +757,8 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False,
analyzed = 0
logger.info('Analyzing %s (%s repos)', obstdir, len(obstrepos))
+ baselines = [x.strip() for x in config['core'].get('baselines', '').split('\n')]
+ islandcores = [x.strip() for x in config['core'].get('islandcores', '').split('\n')]
for obstrepo in obstrepos:
analyzed += 1
logger.debug('Processing objstore repo: %s', os.path.basename(obstrepo))
@@ -818,8 +820,8 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False,
# Not an else, because the previous step may have migrated things
if obstrepo not in amap or not len(amap[obstrepo]):
obst_changes = True
- # XXX: Theoretically, nothing should have cloned a new repo while we were migrating, because
- # they should have found a better candidate as well.
+ # XXX: Is there a possible race condition here if grok-pull cloned a new repo
+ # while we were migrating this one?
logger.info('%s: deleting (no longer used by anything)', os.path.basename(obstrepo))
if obstrepo in amap:
amap.pop(obstrepo)
@@ -835,6 +837,8 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False,
my_remotes = grokmirror.list_repo_remotes(obstrepo, withurl=True)
# Use the first child repo as our "reference" entry in manifest
refrepo = None
+ set_baseline = False
+ set_islandcore = False
for virtref, childpath in my_remotes:
# Is it still relevant?
if childpath not in amap[obstrepo]:
@@ -865,6 +869,42 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False,
if gitdir not in manifest:
continue
+ # Do we need to set any alternateRefsPrefixes?
+ if not set_baseline:
+ is_baseline = False
+ for baseline in baselines:
+ # Does this repo match a baseline
+ if fnmatch.fnmatch(gitdir, baseline):
+ is_baseline = True
+ break
+ if is_baseline:
+ set_baseline = True
+ refpref = 'refs/virtual/%s/heads/' % virtref
+ # Go through all remotes and set their alternateRefsPrefixes
+ for s_virtref, s_childpath in my_remotes:
+ # is it already set to that?
+ entries = grokmirror.get_config_from_git(s_childpath, r'core\.alternate*')
+ if entries.get('alternaterefsprefixes') != refpref:
+ s_gitdir = '/' + os.path.relpath(s_childpath, toplevel)
+ logger.info(' reconfig: %s (baseline to %s)', s_gitdir, virtref)
+ grokmirror.set_git_config(s_childpath, 'core.alternateRefsPrefixes', refpref)
+
+ # Do we need to set islandCore?
+ if not set_islandcore:
+ is_islandcore = False
+ for islandcore in islandcores:
+ # Does this repo match a baseline
+ if fnmatch.fnmatch(gitdir, islandcore):
+ is_islandcore = True
+ break
+ if is_islandcore:
+ set_islandcore = True
+ # is it already set to that?
+ entries = grokmirror.get_config_from_git(obstrepo, r'pack\.island*')
+ if entries.get('islandcore') != virtref:
+ logger.info(' reconfig: %s (islandCore to %s)', os.path.basename(obstrepo), virtref)
+ grokmirror.set_git_config(obstrepo, 'pack.islandCore', virtref)
+
if refrepo is None:
# Legacy "reference=" setting in manifest
refrepo = gitdir
@@ -874,7 +914,7 @@ def fsck_mirror(config, verbose=False, force=False, repack_only=False,
manifest[gitdir]['forkgroup'] = os.path.basename(obstrepo[:-4])
- if obstrepo not in status:
+ if obstrepo not in status or set_islandcore:
# We don't use obstrepo fingerprints, so we set it to None
status[obstrepo] = {
'lastcheck': 'never',