aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJin Wen <wenx.jin@intel.com>2018-09-17 12:47:46 -0700
committerAndi Kleen <ak@linux.intel.com>2018-09-17 16:02:56 -0700
commit0163b104b147239027c46661eada86bb87760fd9 (patch)
tree923dbb5c03489139a8c2a2b55e0cce99cdabd70f
parent74bffd8b0aa27051aeaa1983a7b23975ca8d8726 (diff)
downloadmce-test-0163b104b147239027c46661eada86bb87760fd9.tar.gz
Add 'EDAC' regression test case
This test is used for verifying EDAC driver by checking if its output can keep correct under different kernel release by comparing against a reference result run earlier or on earlier kernel version. Signed-off-by: Jin Wen <wenx.jin@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rwxr-xr-xcases/function/edac/edac.sh268
-rwxr-xr-xcases/function/edac/runtest.sh41
-rw-r--r--groups/function1
3 files changed, 310 insertions, 0 deletions
diff --git a/cases/function/edac/edac.sh b/cases/function/edac/edac.sh
new file mode 100755
index 0000000..5c96cf1
--- /dev/null
+++ b/cases/function/edac/edac.sh
@@ -0,0 +1,268 @@
+#!/bin/bash
+
+# This test is used for verifying EDAC driver by checking if its output can
+# keep correct under different kernel release via comparing against a reference
+# result run earlier or on earlier kernel version, which saved in a file, named
+# as 'edac_ref_file'.
+# Here we only do memory error injection check for EDAC driver.
+# When inject CE memory error and consume it on some specific addresses that
+# saved in the above reference file, if the EDAC related dmesg output is same
+# as the relative content of the reference file, we call the test is PASS,
+# otherwise call it FAIL. If the reference file doesn't exist, this script
+# will generate it and exit test, you need to re-run the script to complete
+# the test.
+# If memory configuration on the SUT platform is changed, you need to delete the
+# original reference file and re-generate it.
+
+export ROOT=`(cd ../../../; pwd)`
+. $ROOT/lib/functions.sh
+setup_path
+. $ROOT/lib/mce.sh
+
+EDAC_DIR=$ROOT/cases/function/edac
+LOG_DIR=$EDAC_DIR/log
+EDAC_REF_FILE=$EDAC_DIR/edac_ref_file
+MEM_CONF_FILE=$EDAC_DIR/mem_conf_file
+EINJ_IF=""
+LOG_FILE=$LOG_DIR/$(date +%Y-%m-%d.%H.%M.%S)-`uname -r`.log
+# memory CE error
+ERR_TYPE=0x8
+URANDOM=0
+PAGESIZE=4096
+# Lots of addresses to be injected, actually it is a number of
+# tested addresses during each iomem range, e.g.,if 3 iomem ranges
+# are used, the total number will be $NUM_TESTADDR * 3 .
+NUM_TESTADDR=100
+NUM_TOSAVE=20
+COUNT_FAIL=0
+RANGE_SIZE_THR=500
+declare -a LINE_REC
+
+check_support()
+{
+ check_debugfs
+ modinfo einj &> /dev/null
+ if [ $? -eq 0 ]; then
+ modprobe einj param_extension=1
+ [ $? -eq 0 ] ||
+ die "module einj is not supported?"
+ fi
+ lsmod | grep -q edac
+ [ $? -eq 0 ] ||
+ die "EDAC related modules aren't found."
+ EINJ_IF=`cat /proc/mounts | grep debugfs | cut -d ' ' -f2 | head -1`/apei/einj
+ if [ ! -d $EINJ_IF ]; then
+ die "einj isn't supported, please check your bios setting"
+ fi
+}
+
+save_memconf()
+{
+ # save memory configuration on the platform for comparison
+ dmidecode -t 17 > $MEM_CONF_FILE
+}
+
+get_random()
+{
+ # get a random number greater than 32767
+ URANDOM=`od -An -N4 -t uL /dev/urandom | tr -d " "`
+}
+
+# avoid selecting address at the same line
+check_same_value()
+{
+
+ for i in `seq 0 $(($1 - 1))`
+ do
+ if [ $2 -eq ${LINE_REC[$i]} ]; then
+ return 1
+ fi
+ done
+ return 0
+}
+
+save_edac_info()
+{
+ local lines
+ local rand_line
+ local tmpfile=$(mktemp)
+ local saved=0
+
+ lines=`cat edac_mesg | grep "EDAC.*CE.*page:" | wc -l`
+ if [ $lines -eq 0 ]; then
+ echo "Fail: can't found EDAC related information"
+ exit 1
+ fi
+ echo "Kernel Version: `uname -r`" >> $EDAC_REF_FILE
+ echo -e "Created Date: `date`\n" >> $EDAC_REF_FILE
+ cat edac_mesg | grep "EDAC.*CE.*page:" > $tmpfile
+ while [ $saved -lt $NUM_TOSAVE ]
+ do
+ get_random
+ rand_line=$(($URANDOM % $lines))
+ if [ $rand_line -eq 0 ]; then
+ rand_line=1
+ fi
+ if [ $saved -eq 0 ]; then
+ LINE_REC[$saved]=$rand_line
+ let "saved += 1"
+ sed -n "${rand_line}p" $tmpfile >> $EDAC_REF_FILE
+ continue
+ fi
+ check_same_value $saved $rand_line
+ [ $? -eq 1 ] && continue
+ LINE_REC[$saved]=$rand_line
+ let "saved += 1"
+ sed -n "${rand_line}p" $tmpfile >> $EDAC_REF_FILE
+ done
+ rm -f $tmpfile
+}
+
+inject_lot_ce()
+{
+ local start_addr
+ local end_addr
+ local rand_addr
+ local test_pfn
+ local test_addr
+
+ dmesg -c &> /dev/null
+ : > edac_mesg
+ echo $ERR_TYPE > $EINJ_IF/error_type
+ echo 0xfffffffffffff000 > $EINJ_IF/param2
+ echo 0x0 > $EINJ_IF/notrigger
+ get_random
+ cat /proc/iomem | grep "System RAM" | cut -d ':' -f1 > iomem_tmp
+ echo "Inject a lot of CE memory errors into some of the following addresses:"
+ while read line
+ do
+ start_addr=`echo $line | awk -F '-' '{print "0x"$1}'`
+ end_addr=`echo $line | awk -F '-' '{print "0x"$2}'`
+ # pick address greater than or equal to 0x100000
+ [[ $start_addr -lt 0x100000 ]] && continue
+ # skip injecting error into small memory areas(<500MB)
+ [[ $(($end_addr - $start_addr)) -lt $(($RANGE_SIZE_THR * 0x100000)) ]] && continue
+ printf "0x%016lx - 0x%016lx\n" $start_addr $end_addr
+ rand_addr=$(($start_addr + $URANDOM % ($end_addr - $start_addr)))
+ if [[ $(($rand_addr + $NUM_TESTADDR * $PAGESIZE)) -gt $end_addr ]]; then
+ rand_addr=$(printf "0x%lx" $start_addr)
+ fi
+ for i in `seq 1 $NUM_TESTADDR`
+ do
+ let "test_pfn = $rand_addr / $PAGESIZE + $i"
+ test_addr=$(printf "0x%lx" $test_pfn)"000"
+ [[ $test_addr -gt $end_addr ]] && break
+ echo $test_addr > $EINJ_IF/param1
+ echo 1 > $EINJ_IF/error_inject
+ dmesg -c >> edac_mesg
+ done
+ done < iomem_tmp
+ # avoid some messages coming later
+ sleep 1
+ dmesg -c >> edac_mesg
+ save_edac_info
+ save_memconf
+}
+
+inject_spec_addr()
+{
+ local addr
+
+ dmesg -c &> /dev/null
+ echo $ERR_TYPE > $EINJ_IF/error_type
+ echo 0xfffffffffffff000 > $EINJ_IF/param2
+ echo 0x0 > $EINJ_IF/notrigger
+ while read line
+ do
+ # check only EDAC related information
+ echo "$line" | grep -q EDAC
+ [ $? -ne 0 ] && continue
+ addr=$(echo "$line" | grep -o "page:0x[a-f0-9]*" | cut -d':' -f2)"000"
+ #printf "addr=0x%x\n" $addr
+ echo $addr > $EINJ_IF/param1
+ echo 1 > $EINJ_IF/error_inject
+ check_result "$line"
+ done < $EDAC_REF_FILE
+}
+
+check_result()
+{
+ local addr
+ local tmpstr
+ local edac_str
+
+ addr=$(echo "$@" | grep -o "page:0x[a-f0-9]*" | cut -d':' -f2)"000"
+ tmpstr="$@"
+ # remove timestamp in head of each line
+ edac_str=${tmpstr#\[*.*\] }
+ dmesg | grep -q "$edac_str"
+ if [ $? -ne 0 ]; then
+ # re-check it to avoid later coming message
+ sleep 1
+ dmesg | grep -q "$edac_str"
+ if [ $? -eq 0 ]; then
+ printf "0x%016lx PASS\n" $addr | tee -a $LOG_FILE
+ else
+ printf "0x%016lx FAIL\n" $addr | tee -a $LOG_FILE
+ let "COUNT_FAIL += 1"
+ fi
+ else
+ printf "0x%016lx PASS\n" $addr | tee -a $LOG_FILE
+ fi
+ echo -e "\nEDAC dmesg output as below:\n" >> $LOG_FILE
+ dmesg -c >> $LOG_FILE
+ echo >> $LOG_FILE
+}
+
+check_mem_conf()
+{
+ local tmpfile=$(mktemp)
+ dmidecode -t 17 > $tmpfile
+ diff -q $tmpfile $MEM_CONF_FILE &> /dev/null
+ if [ $? -eq 0 ]; then
+ rm -f $tmpfile
+ return 0
+ else
+ rm -f $tmpfile
+ return 1
+ fi
+}
+
+cleanup()
+{
+ rm -f iomem_tmp
+ rm -f edac_mesg
+}
+
+trap "cleanup" 0 2 9 15
+main()
+{
+ if [ `id -u` -ne 0 ]; then
+ echo "Must be run as root"
+ fi
+ check_support
+ if [ ! -e $EDAC_REF_FILE ]; then
+ echo "---------------------------------------------------"
+ echo "Reference result doesn't exist, wait to generate..."
+ echo "---------------------------------------------------"
+ inject_lot_ce
+ echo "----------------------------------------------------------------------------"
+ echo "Reference result is already generated, please go test via re-running the script!"
+ echo "----------------------------------------------------------------------------"
+ exit 1
+ fi
+ check_mem_conf
+ [ $? -eq 0 ] || \
+ die "memory configuration changed, please delete '$EDAC_REF_FILE', then re-run the test."
+ mkdir -p $LOG_DIR
+ echo -e "\nKernel Version: `uname -r`\n" | tee -a $LOG_FILE
+ echo -e "Test all addresses in EDAC reference file...\n" | tee -a $LOG_FILE
+ inject_spec_addr
+ if [ $COUNT_FAIL -gt 0 ]; then
+ echo -e "\nTest FAIL\n" | tee -a $LOG_FILE
+ else
+ echo -e "\nTest PASS\n" | tee -a $LOG_FILE
+ fi
+ echo "More detail please check log in $LOG_FILE"
+}
+main
diff --git a/cases/function/edac/runtest.sh b/cases/function/edac/runtest.sh
new file mode 100755
index 0000000..0c872e6
--- /dev/null
+++ b/cases/function/edac/runtest.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# This test is used to validate EDAC function for RAS. Only check EDAC
+# relative information in dmesg output when inject Memory Correctable
+# Error with EINJ tool.
+
+cat <<-EOF
+
+***************************************************************************
+Pay attention:
+
+EDAC subsystem is a hardware specific driver to report hardware related error,
+here only Memory Correctable Error is checked.
+This test is used for verifying EDAC driver by checking if its output can
+keep correct under different kernel release via comparing against a reference
+result run earlier or on earlier kernel version.
+***************************************************************************
+
+EOF
+
+TMP="../../../work"
+TMP_DIR=${TMP_DIR:-$TMP}
+if [ ! -d $TMP_DIR ]; then
+ TMP_DIR=$TMP
+fi
+export TMP_DIR
+
+echo 0 > $TMP_DIR/error.$$
+
+pushd `dirname $0` > /dev/null
+./edac.sh
+[ $? -eq 0 ] || echo 1 > $TMP_DIR/error.$$
+popd > /dev/null
+
+grep -q "1" $TMP_DIR/error.$$
+if [ $? -eq 0 ]
+then
+ exit 1
+else
+ exit 0
+fi
+
diff --git a/groups/function b/groups/function
index 6e28f35..63a7e8a 100644
--- a/groups/function
+++ b/groups/function
@@ -5,6 +5,7 @@ ERST-INJ cases/function/erst-inject/runtest.sh on
eMCA cases/function/emca-inj/runtest.sh off
#PFA test depends on correct BIOS/mcelog setting
PFA cases/function/pfa/runtest.sh on
+EDAC cases/function/edac/runtest.sh off
#SRAR test highly depends on BIOS implementation
SRAR-DCU cases/function/core_recovery/runtest_dcu.sh off
SRAR-IFU cases/function/core_recovery/runtest_ifu.sh off