diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2019-01-07 15:32:30 -0500 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2019-01-07 15:32:30 -0500 |
commit | 4f84116b066b57b48940974a78e4092c9b8cc020 (patch) | |
tree | 7b18bf4ce552ec42bf0c95e09b24f5f50493ecf1 | |
parent | 24b3ad044c2fd0127833cdf59425d0de78b2c134 (diff) | |
download | grokmirror-4f84116b066b57b48940974a78e4092c9b8cc020.tar.gz |
Make preciousObjects configurable
The trade-offs for enabling preciousObjects are pretty hefty, so make
this option off by default and configurable by the end-user. It's
probably a feature that makes sense on the master, but is an overkill on
the mirrors.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | fsck.conf | 8 | ||||
-rwxr-xr-x | grokmirror/fsck.py | 107 | ||||
-rwxr-xr-x | grokmirror/pull.py | 4 |
3 files changed, 77 insertions, 42 deletions
@@ -68,3 +68,11 @@ extra_repack_flags_full = --window=250 --depth=50 # are relying on this repo via alternates, it will not be pruned to avoid # potential corruption. prune = yes +# +# Setting precious=yes will add extensions.preciousObjects=true git configuration +# to all repositories that are parents to others (via git alternates). Turning this +# on will help eliminate the possibility of repository corruption, but at a price of +# keeping all redundant objects on disk forever. Repositories with preciousObjects +# will still be repacked periodically, but redundant packs and loose objects will +# never be cleaned up and will be kept around forever. +precious = no diff --git a/grokmirror/fsck.py b/grokmirror/fsck.py index b777544..b021015 100755 --- a/grokmirror/fsck.py +++ b/grokmirror/fsck.py @@ -37,9 +37,10 @@ def check_reclone_error(fullpath, config, errors): for line in errors: for estring in config['reclone_on_errors']: if line.find(estring) != -1: - # is preciousObjects set for this repo? - if check_precious_objects(fullpath): - logger.critical('\tpreciousObjects set, not requesting auto-reclone') + # is this repo used for alternates? + gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/') + if grokmirror.is_alt_repo(config['toplevel'], gitdir): + logger.critical('\tused for alternates, not requesting auto-reclone') return else: reclone = line @@ -108,6 +109,8 @@ def run_git_repack(fullpath, config, level=1): # Returns false if we hit any errors on the way repack_ok = True + is_precious = check_precious_objects(fullpath) + # Figure out what our repack flags should be. repack_flags = list() if 'extra_repack_flags' in config and len(config['extra_repack_flags']): @@ -121,7 +124,13 @@ def run_git_repack(fullpath, config, level=1): gitdir = '/' + os.path.relpath(fullpath, config['toplevel']).lstrip('/') if grokmirror.is_alt_repo(config['toplevel'], gitdir): # we are a "mother repo" - set_precious_objects(fullpath) + if not is_precious and ('precious' in config and config['precious'] == 'yes'): + is_precious = True + set_precious_objects(fullpath) + + if not is_precious: + repack_flags.append('-k') + # are we using alternates ourselves? Multiple levels of alternates are # a bad idea in general due high possibility of corruption. if os.path.exists(os.path.join(fullpath, 'objects', 'info', 'alternates')): @@ -138,19 +147,21 @@ def run_git_repack(fullpath, config, level=1): elif os.path.exists(os.path.join(fullpath, 'objects', 'info', 'alternates')): # we are a "child repo" repack_flags.append('-l') - repack_flags.append('-d') if level > 1: repack_flags.append('-A') else: # we have no relationships with other repos - repack_flags.append('-d') if level > 1: logger.info(' repack : performing a full repack for optimal deltas') repack_flags.append('-a') - repack_flags.append('-k') + if not is_precious: + repack_flags.append('-k') repack_flags += full_repack_flags + if not is_precious: + repack_flags.append('-d') + args = ['repack'] + repack_flags logger.info(' repack : repacking with "%s"', ' '.join(repack_flags)) @@ -286,6 +297,7 @@ def get_repo_obj_info(fullpath): def set_precious_objects(fullpath): # It's better to just set it blindly without checking first, # as this results in one fewer shell-out. + logger.debug('Setting preciousObjects for %s', fullpath) args = ['config', 'extensions.preciousObjects', 'true'] grokmirror.run_git_command(fullpath, args) @@ -389,6 +401,12 @@ def fsck_mirror(name, config, verbose=False, force=False, repack_only=False, today = datetime.datetime.today() todayiso = today.strftime('%F') + if force: + # Use randomization for next check, again + checkdelay = random.randint(1, frequency) + else: + checkdelay = frequency + # Go through the manifest and compare with status # noinspection PyTypeChecker e_find = em.counter(total=len(manifest), desc='Discovering:', unit='repos', leave=False) @@ -447,7 +465,6 @@ def fsck_mirror(name, config, verbose=False, force=False, repack_only=False, needs_repack = needs_prune = needs_fsck = 0 obj_info = get_repo_obj_info(fullpath) - has_precious_objects = check_precious_objects(fullpath) try: packs = int(obj_info['packs']) count_loose = int(obj_info['count']) @@ -455,16 +472,23 @@ def fsck_mirror(name, config, verbose=False, force=False, repack_only=False, logger.warning('Unable to count objects in %s, skipping' % fullpath) continue + schedcheck = datetime.datetime.strptime(status[fullpath]['nextcheck'], '%Y-%m-%d') + nextcheck = today + datetime.timedelta(days=checkdelay) + if 'repack' not in config.keys() or config['repack'] != 'yes': # don't look at me if you turned off repack + logger.debug('Not repacking because repack=no in config') needs_repack = 0 elif repack_all_full and (count_loose > 0 or packs > 1): + logger.debug('needs_repack=2 due to repack_all_full') needs_repack = 2 elif repack_all_quick and count_loose > 0: + logger.debug('needs_repack=1 due to repack_all_quick') needs_repack = 1 elif conn_only: # don't do any repacks if we're running forced connectivity checks, unless # you specifically passed --repack-all-foo + logger.debug('needs_repack=0 due to --conn-only') needs_repack = 0 else: # for now, hardcode the maximum loose objects and packs @@ -480,34 +504,45 @@ def fsck_mirror(name, config, verbose=False, force=False, repack_only=False, if packs >= max_packs: logger.debug('Triggering full repack of %s because packs > 20', fullpath) needs_repack = 2 - # If we have precious objects, we don't consider loose objects for anything - elif not has_precious_objects: - if count_loose >= max_loose_objects: - logger.debug('Triggering quick repack of %s because loose objects > 1200', fullpath) + elif count_loose >= max_loose_objects: + logger.debug('Triggering quick repack of %s because loose objects > 1200', fullpath) + needs_repack = 1 + else: + # is the number of loose objects or their size more than 10% of + # the overall total? + in_pack = int(obj_info['in-pack']) + size_loose = int(obj_info['size']) + size_pack = int(obj_info['size-pack']) + total_obj = count_loose + in_pack + total_size = size_loose + size_pack + # set some arbitrary "worth bothering" limits so we don't + # continuously repack tiny repos. + if total_obj > 500 and count_loose/total_obj*100 >= pc_loose_objects: + logger.debug('Triggering repack of %s because loose objects > %s%% of total', + fullpath, pc_loose_objects) + needs_repack = 1 + elif total_size > 1024 and size_loose/total_size*100 >= pc_loose_size: + logger.debug('Triggering repack of %s because loose size > %s%% of total', + fullpath, pc_loose_size) needs_repack = 1 - else: - # is the number of loose objects or their size more than 10% of - # the overall total? - in_pack = int(obj_info['in-pack']) - size_loose = int(obj_info['size']) - size_pack = int(obj_info['size-pack']) - total_obj = count_loose + in_pack - total_size = size_loose + size_pack - # set some arbitrary "worth bothering" limits so we don't - # continuously repack tiny repos. - if total_obj > 500 and count_loose/total_obj*100 >= pc_loose_objects: - logger.debug('Triggering repack of %s because loose objects > %s%% of total', - fullpath, pc_loose_objects) - needs_repack = 1 - elif total_size > 1024 and size_loose/total_size*100 >= pc_loose_size: - logger.debug('Triggering repack of %s because loose size > %s%% of total', - fullpath, pc_loose_size) - needs_repack = 1 + + if needs_repack > 0 and check_precious_objects(fullpath): + # if we have preciousObjects, then we only repack based on the same + # schedule as fsck. + logger.debug('preciousObjects is set') + # for repos with preciousObjects, we use the fsck schedule for repacking + if schedcheck <= today: + logger.debug('Time for a full periodic repack of a preciousObjects repo') + status[fullpath]['nextcheck'] = nextcheck.strftime('%F') + needs_repack = 2 + else: + logger.debug('Not repacking preciousObjects repo outside of schedule') + needs_repack = 0 # Do we need to fsck it? if not (repack_all_quick or repack_all_full or repack_only): - nextcheck = datetime.datetime.strptime(status[fullpath]['nextcheck'], '%Y-%m-%d') - if nextcheck <= today or force: + if schedcheck <= today or force: + status[fullpath]['nextcheck'] = nextcheck.strftime('%F') needs_fsck = 1 if needs_repack or needs_fsck or needs_prune: @@ -565,14 +600,6 @@ def fsck_mirror(name, config, verbose=False, force=False, repack_only=False, status[fullpath]['lastcheck'] = todayiso status[fullpath]['s_elapsed'] = int(endt-startt) - if force: - # Use randomization for next check, again - delay = random.randint(1, frequency) - else: - delay = frequency - - nextdate = today + datetime.timedelta(days=delay) - status[fullpath]['nextcheck'] = nextdate.strftime('%F') logger.info(' done : %ss, next check on %s', status[fullpath]['s_elapsed'], status[fullpath]['nextcheck']) diff --git a/grokmirror/pull.py b/grokmirror/pull.py index bccaa75..53b2fa0 100755 --- a/grokmirror/pull.py +++ b/grokmirror/pull.py @@ -512,7 +512,7 @@ def pull_mirror(name, config, verbose=False, force=False, nomtime=False, manifest = grokmirror.read_manifest(manifile) # Don't accept empty manifests -- that indicates something is wrong if not len(manifest.keys()): - logger.critical('Remote manifest empty or unparseable! Quitting.') + logger.warning('Remote manifest empty or unparseable! Quitting.') return 1 else: @@ -943,7 +943,7 @@ def pull_mirror(name, config, verbose=False, force=False, nomtime=False, culled[gitdir]['fingerprint'] = my_fingerprint run_post_update_hook(hookscript, toplevel, gitdir) else: - logger.critical('Was not able to clone %s', gitdir) + logger.warning('Was not able to clone %s', gitdir) # Remove it from our manifest so we can try re-cloning # next time grok-pull runs del culled[gitdir] |