aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2023-07-05 18:43:46 -0400
committerTheodore Ts'o <tytso@mit.edu>2023-07-05 22:52:23 -0400
commit9c911e52edb4cb96adab0c78482e6f54fe804647 (patch)
tree960851a23c0e323c3ff15f6ab6297cfbfa1e7815
parent9de08c65dec9b0311856f06a95b2e60659ce4739 (diff)
downloadxfstests-bld-9c911e52edb4cb96adab0c78482e6f54fe804647.tar.gz
gce-xfstests: add retry logic when starting VM's
Preemptible or spot VM's are more likely to fail due to lack of resources in the GCE zone. So add retry logic where we will retry three times, and if enabled, fall back to using standard VM's if it's not possible to start spot VM's. Also transition to using spot VM's instead of preemptible VM's. A Spot VM is a newer version of preemptible VM's. and the primary difference is that a Spot VM will not automtically terminate after 24 hours. Also, --spot is a lot easier to type that --preemptible on the gce-xfstests command-line. :-) Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rwxr-xr-xrun-fstests/gce-xfstests66
-rw-r--r--run-fstests/util/parse_cli20
-rwxr-xr-xtest-appliance/files/root/runtests.sh2
3 files changed, 61 insertions, 27 deletions
diff --git a/run-fstests/gce-xfstests b/run-fstests/gce-xfstests
index 7f1b0742..b85d39fb 100755
--- a/run-fstests/gce-xfstests
+++ b/run-fstests/gce-xfstests
@@ -799,13 +799,6 @@ fi
SCOPES="https://www.googleapis.com/auth/cloud-platform"
-if test -n "$PREEMPTIBLE"
-then
- PREEMPTIBLE="--preemptible"
-else
- PREEMPTIBLE="--maintenance-policy MIGRATE"
-fi
-
if [ -z ${TESTRUNID:+x} ]; then
TESTRUNID=$(date +%Y%m%d%H%M%S)
# if GCE_USER is non-empty or unset
@@ -1218,10 +1211,16 @@ fi
function launch_vm () {
echo "Launching $INSTANCE using $GCE_MACHTYPE..."
+if test -n "$GCE_SPOT"
+then
+ SPOT="--provisioning-model=SPOT"
+else
+ SPOT=
+fi
run_gcloud compute \
instances create "$INSTANCE" --zone "$GCE_ZONE" \
--machine-type "$GCE_MACHTYPE" --network "$GCE_NETWORK" \
- $SSD_ARG $PREEMPTIBLE \
+ $SSD_ARG $SPOT \
$SERVICE_ACCOUNT_OPT_VM \
--scopes "$SCOPES" \
--metadata "^ ^$ARG" \
@@ -1231,24 +1230,47 @@ function launch_vm () {
err=$?
}
ERRFILE=/tmp/gce-xfstests-err-$$
-launch_vm 2> $ERRFILE
-
-if test "$err" -gt 0 ; then
- cat $ERRFILE
- if grep -q images/family/xfstests-amd64 $ERRFILE ; then
- if grep -q "The project .* was not found" $ERRFILE; then
- exit $err
- fi
- echo Retrying with the image family xfstests
- ROOT_FS=xfstests
- launch_vm
- if test "$err" -gt 0 ; then
+RETRY_COUNTER=0
+while true
+do
+ launch_vm 2> $ERRFILE
+
+ if test "$err" -gt 0 ; then
+ cat $ERRFILE
+ if grep -q images/family/xfstests-amd64 $ERRFILE ; then
+ if grep -q "The project .* was not found" $ERRFILE; then
+ exit $err
+ fi
+ echo Retrying with the image family xfstests
+ ROOT_FS=xfstests
+ launch_vm
+ if test "$err" -gt 0 ; then
+ exit $err
+ fi
+ elif grep -iq resource $ERRFILE && grep -iq available $ERRFILE ; then
+ let RETRY_COUNTER++
+ if test $RETRY_COUNTER -lt 3
+ then
+ echo "Resources unavailable, retrying ($RETRY_COUNTER)..."
+ sleep 1
+ else
+ if test -n "$GCE_SPOT" -a -n "$GCE_SPOT_FALLBACK"
+ then
+ echo "Unable to start spot VM, retrying with standard..."
+ GCE_SPOT=
+ RETRY_COUNTER=0
+ else
+ echo "Unable to start VM"
+ exit $err
+ fi
+ fi
+ else
exit $err
fi
else
- exit $err
+ break
fi
-fi
+done
rm -f $ERRFILE
diff --git a/run-fstests/util/parse_cli b/run-fstests/util/parse_cli
index 18b459b6..770c2046 100644
--- a/run-fstests/util/parse_cli
+++ b/run-fstests/util/parse_cli
@@ -288,6 +288,8 @@ no-email
no-insert
no-log
no-preemptible
+no-spot-fallback
+no-spot
no-punch
no-region-shard
no-virtio-rng
@@ -297,6 +299,8 @@ numa:
pmem-device
pts-size:
preemptible
+spot
+spot-fallback
primary_fstype:
repo:
skip-kernel-arch-probe
@@ -592,9 +596,13 @@ while (( $# >= 1 )); do
supported_flavors gce
GCE_NO_VM_TIMEOUT=yes
;;
- --preemptible|--preempt)
+ --preemptible|--spot)
supported_flavors gce
- PREEMPTIBLE=yes
+ GCE_SPOT=yes
+ ;;
+ --spot-fallback)
+ supported_flavors gce
+ GCE_SPOT_FALLBACK=yes
;;
--primary_fstype) shift
PRIMARY_FSTYPE="$1"
@@ -603,9 +611,13 @@ while (( $# >= 1 )); do
supported_flavors gce
PTS_SIZE="$1"
;;
- --no-preemptible|--no-preempt)
+ --no-preemptible|--no-spot)
+ supported_flavors gce
+ GCE_SPOT=
+ ;;
+ --no-spot-fallback)
supported_flavors gce
- PREMPTIBLE=
+ GCE_SPOT_FALLBACK=
;;
--gce-disk-spec) shift
supported_flavors gce
diff --git a/test-appliance/files/root/runtests.sh b/test-appliance/files/root/runtests.sh
index 9d31e0f6..80a166fe 100755
--- a/test-appliance/files/root/runtests.sh
+++ b/test-appliance/files/root/runtests.sh
@@ -641,7 +641,7 @@ do
else
touch "$RESULT_BASE/completed"
fi
- if test -n "$last_time"
+ if test -n "$last_test"
then
if test -f "$RESULT_BASE/results.xml"; then
add_error_xunit "$RESULT_BASE/results.xml" "$last_test" "xfstests.global"