diff options
author | Theodore Ts'o <tytso@mit.edu> | 2023-07-05 18:43:46 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2023-07-05 22:52:23 -0400 |
commit | 9c911e52edb4cb96adab0c78482e6f54fe804647 (patch) | |
tree | 960851a23c0e323c3ff15f6ab6297cfbfa1e7815 | |
parent | 9de08c65dec9b0311856f06a95b2e60659ce4739 (diff) | |
download | xfstests-bld-9c911e52edb4cb96adab0c78482e6f54fe804647.tar.gz |
gce-xfstests: add retry logic when starting VM's
Preemptible or spot VM's are more likely to fail due to lack of
resources in the GCE zone. So add retry logic where we will retry
three times, and if enabled, fall back to using standard VM's if it's
not possible to start spot VM's.
Also transition to using spot VM's instead of preemptible VM's. A
Spot VM is a newer version of preemptible VM's. and the primary
difference is that a Spot VM will not automtically terminate after 24
hours. Also, --spot is a lot easier to type that --preemptible on the
gce-xfstests command-line. :-)
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rwxr-xr-x | run-fstests/gce-xfstests | 66 | ||||
-rw-r--r-- | run-fstests/util/parse_cli | 20 | ||||
-rwxr-xr-x | test-appliance/files/root/runtests.sh | 2 |
3 files changed, 61 insertions, 27 deletions
diff --git a/run-fstests/gce-xfstests b/run-fstests/gce-xfstests index 7f1b0742..b85d39fb 100755 --- a/run-fstests/gce-xfstests +++ b/run-fstests/gce-xfstests @@ -799,13 +799,6 @@ fi SCOPES="https://www.googleapis.com/auth/cloud-platform" -if test -n "$PREEMPTIBLE" -then - PREEMPTIBLE="--preemptible" -else - PREEMPTIBLE="--maintenance-policy MIGRATE" -fi - if [ -z ${TESTRUNID:+x} ]; then TESTRUNID=$(date +%Y%m%d%H%M%S) # if GCE_USER is non-empty or unset @@ -1218,10 +1211,16 @@ fi function launch_vm () { echo "Launching $INSTANCE using $GCE_MACHTYPE..." +if test -n "$GCE_SPOT" +then + SPOT="--provisioning-model=SPOT" +else + SPOT= +fi run_gcloud compute \ instances create "$INSTANCE" --zone "$GCE_ZONE" \ --machine-type "$GCE_MACHTYPE" --network "$GCE_NETWORK" \ - $SSD_ARG $PREEMPTIBLE \ + $SSD_ARG $SPOT \ $SERVICE_ACCOUNT_OPT_VM \ --scopes "$SCOPES" \ --metadata "^ ^$ARG" \ @@ -1231,24 +1230,47 @@ function launch_vm () { err=$? } ERRFILE=/tmp/gce-xfstests-err-$$ -launch_vm 2> $ERRFILE - -if test "$err" -gt 0 ; then - cat $ERRFILE - if grep -q images/family/xfstests-amd64 $ERRFILE ; then - if grep -q "The project .* was not found" $ERRFILE; then - exit $err - fi - echo Retrying with the image family xfstests - ROOT_FS=xfstests - launch_vm - if test "$err" -gt 0 ; then +RETRY_COUNTER=0 +while true +do + launch_vm 2> $ERRFILE + + if test "$err" -gt 0 ; then + cat $ERRFILE + if grep -q images/family/xfstests-amd64 $ERRFILE ; then + if grep -q "The project .* was not found" $ERRFILE; then + exit $err + fi + echo Retrying with the image family xfstests + ROOT_FS=xfstests + launch_vm + if test "$err" -gt 0 ; then + exit $err + fi + elif grep -iq resource $ERRFILE && grep -iq available $ERRFILE ; then + let RETRY_COUNTER++ + if test $RETRY_COUNTER -lt 3 + then + echo "Resources unavailable, retrying ($RETRY_COUNTER)..." + sleep 1 + else + if test -n "$GCE_SPOT" -a -n "$GCE_SPOT_FALLBACK" + then + echo "Unable to start spot VM, retrying with standard..." + GCE_SPOT= + RETRY_COUNTER=0 + else + echo "Unable to start VM" + exit $err + fi + fi + else exit $err fi else - exit $err + break fi -fi +done rm -f $ERRFILE diff --git a/run-fstests/util/parse_cli b/run-fstests/util/parse_cli index 18b459b6..770c2046 100644 --- a/run-fstests/util/parse_cli +++ b/run-fstests/util/parse_cli @@ -288,6 +288,8 @@ no-email no-insert no-log no-preemptible +no-spot-fallback +no-spot no-punch no-region-shard no-virtio-rng @@ -297,6 +299,8 @@ numa: pmem-device pts-size: preemptible +spot +spot-fallback primary_fstype: repo: skip-kernel-arch-probe @@ -592,9 +596,13 @@ while (( $# >= 1 )); do supported_flavors gce GCE_NO_VM_TIMEOUT=yes ;; - --preemptible|--preempt) + --preemptible|--spot) supported_flavors gce - PREEMPTIBLE=yes + GCE_SPOT=yes + ;; + --spot-fallback) + supported_flavors gce + GCE_SPOT_FALLBACK=yes ;; --primary_fstype) shift PRIMARY_FSTYPE="$1" @@ -603,9 +611,13 @@ while (( $# >= 1 )); do supported_flavors gce PTS_SIZE="$1" ;; - --no-preemptible|--no-preempt) + --no-preemptible|--no-spot) + supported_flavors gce + GCE_SPOT= + ;; + --no-spot-fallback) supported_flavors gce - PREMPTIBLE= + GCE_SPOT_FALLBACK= ;; --gce-disk-spec) shift supported_flavors gce diff --git a/test-appliance/files/root/runtests.sh b/test-appliance/files/root/runtests.sh index 9d31e0f6..80a166fe 100755 --- a/test-appliance/files/root/runtests.sh +++ b/test-appliance/files/root/runtests.sh @@ -641,7 +641,7 @@ do else touch "$RESULT_BASE/completed" fi - if test -n "$last_time" + if test -n "$last_test" then if test -f "$RESULT_BASE/results.xml"; then add_error_xunit "$RESULT_BASE/results.xml" "$last_test" "xfstests.global" |