aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVincent Fu <vincentfu@gmail.com>2024-02-27 10:26:00 -0500
committerVincent Fu <vincent.fu@samsung.com>2024-02-27 12:36:45 -0500
commit5ae4f4220a48dddddc84c8b839ef9d8a1ed4edb1 (patch)
tree5c6464a43faf028c167ad2232aaa3acae2accf3f
parent97ef7f3a2755902dc547a8933724f63639473ac0 (diff)
downloadfio-5ae4f4220a48dddddc84c8b839ef9d8a1ed4edb1.tar.gz
gettime: fix cpuclock-test on AMD platforms
Starting with gcc 11 __sync_synchronize() compiles to lock or QWORD PTR [rsp], 0 on x86_64 platforms. Previously it compiled to an mfence instruction. See line 47 of https://godbolt.org/z/xfE18K7b4 for an example. On Intel platforms this change does not affect the result of fio's CPU clock test. But on AMD platforms, this change causes fio's CPU clock test to fail and fio to fall back to clock_gettime() instead of using the CPU clock for timing. This patch has fio explicitly use an mfence instruction instead of __sync_synchornize() in the CPU clock test code on x86_64 platforms in order to allow the CPU clock test to pass on AMD platforms. Reviewed-by: Jens Axboe <axboe@kernel.dk> Link: https://lore.kernel.org/r/20240227155856.5012-1-vincent.fu@samsung.com Signed-off-by: Vincent Fu <vincent.fu@samsung.com>
-rw-r--r--arch/arch-x86_64.h5
-rw-r--r--arch/arch.h7
-rw-r--r--gettime.c2
3 files changed, 13 insertions, 1 deletions
diff --git a/arch/arch-x86_64.h b/arch/arch-x86_64.h
index 86ce1b7ed..b402dc6df 100644
--- a/arch/arch-x86_64.h
+++ b/arch/arch-x86_64.h
@@ -26,6 +26,11 @@ static inline unsigned long arch_ffz(unsigned long bitmask)
return bitmask;
}
+static inline void tsc_barrier(void)
+{
+ __asm__ __volatile__("mfence":::"memory");
+}
+
static inline unsigned long long get_cpu_clock(void)
{
unsigned int lo, hi;
diff --git a/arch/arch.h b/arch/arch.h
index 3ee9b0538..7e294ddfb 100644
--- a/arch/arch.h
+++ b/arch/arch.h
@@ -108,6 +108,13 @@ extern unsigned long arch_flags;
#include "arch-generic.h"
#endif
+#if !defined(__x86_64__) && defined(CONFIG_SYNC_SYNC)
+static inline void tsc_barrier(void)
+{
+ __sync_synchronize();
+}
+#endif
+
#include "../lib/ffz.h"
/* IWYU pragma: end_exports */
diff --git a/gettime.c b/gettime.c
index bc66a3ac9..5ca312063 100644
--- a/gettime.c
+++ b/gettime.c
@@ -623,7 +623,7 @@ static void *clock_thread_fn(void *data)
seq = *t->seq;
if (seq == UINT_MAX)
break;
- __sync_synchronize();
+ tsc_barrier();
tsc = get_cpu_clock();
} while (seq != atomic32_compare_and_swap(t->seq, seq, seq + 1));