summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOliver Upton <oliver.upton@linux.dev>2023-11-29 10:31:37 +0000
committerOliver Upton <oliver.upton@linux.dev>2023-11-29 10:31:49 +0000
commit60ad5c2833b090583eae724a6e1917f4efe0c702 (patch)
tree305ec91d928777f6f9f3e55785fa34377ca01052
parente1b00f7a4adca5928ab3032e17743180a83bb4d0 (diff)
downloadaarch64-memcpy-60ad5c2833b090583eae724a6e1917f4efe0c702.tar.gz
prefetch
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
-rw-r--r--Makefile1
-rw-r--r--copy_template.h1
-rw-r--r--main.c5
-rw-r--r--memcpy.h1
-rw-r--r--memcpy_ldp_stp.S3
-rw-r--r--memcpy_ldp_str.S3
-rw-r--r--memcpy_ldp_sttr.S3
-rw-r--r--memcpy_ldr_stp.S3
-rw-r--r--memcpy_ldtr_stp.S3
-rw-r--r--memcpy_prfm_2k_ldp_str.S19
10 files changed, 42 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index a424394..c4b94f2 100644
--- a/Makefile
+++ b/Makefile
@@ -7,6 +7,7 @@ OBJS += memcpy_ldp_sttr.o
OBJS += memcpy_ldp_stp.o
OBJS += memcpy_ldr_stp.o
OBJS += memcpy_ldtr_stp.o
+OBJS += memcpy_prfm_2k_ldp_str.o
memcpy: $(OBJS)
$(CC) $(CFLAGS) $(OBJS) -o $@
diff --git a/copy_template.h b/copy_template.h
index 3ed0124..3c15f25 100644
--- a/copy_template.h
+++ b/copy_template.h
@@ -18,6 +18,7 @@ D_h .req x10
sub count, count, #64
1:
+ prfm1 src
stp1 A_l, A_h, dst, #16
ldp1 A_l, A_h, src, #16
stp1 B_l, B_h, dst, #16
diff --git a/main.c b/main.c
index 3542501..6f28f68 100644
--- a/main.c
+++ b/main.c
@@ -20,6 +20,7 @@ enum mode {
LDR_STP,
LDTR_STP,
LDP_STP,
+ PRFM_2K_LDP_STR,
NR_MODES
};
@@ -40,6 +41,7 @@ static void pr_modes(void)
PR_MODE(LDR_STP);
PR_MODE(LDTR_STP);
PR_MODE(LDP_STP);
+ PR_MODE(PRFM_2K_LDP_STR);
}
static void pr_help(const char *progname)
@@ -92,6 +94,9 @@ static void do_memcpy(void *dst, const void *src, size_t chunk_size)
case LDP_STP:
memcpy_ldp_stp(dst, src, chunk_size);
break;
+ case PRFM_2K_LDP_STR:
+ memcpy_prfm_2k_ldp_str(dst, src, chunk_size);
+ break;
default:
assert(0);
}
diff --git a/memcpy.h b/memcpy.h
index e0fce8c..947177c 100644
--- a/memcpy.h
+++ b/memcpy.h
@@ -8,5 +8,6 @@ void memcpy_ldp_sttr(void *dst, const void *src, size_t count);
void memcpy_ldr_stp(void *dst, const void *src, size_t count);
void memcpy_ldtr_stp(void *dst, const void *src, size_t count);
void memcpy_ldp_stp(void *dst, const void *src, size_t count);
+void memcpy_prfm_2k_ldp_str(void *dst, const void *src, size_t count);
#endif /* __MEMCPY_H__ */
diff --git a/memcpy_ldp_stp.S b/memcpy_ldp_stp.S
index a9851f5..87b676f 100644
--- a/memcpy_ldp_stp.S
+++ b/memcpy_ldp_stp.S
@@ -1,3 +1,6 @@
+.macro prfm1, Xn
+.endm
+
.macro ldp1, Xt1, Xt2, Xn, imm
ldp \Xt1, \Xt2, [\Xn], \imm
.endm
diff --git a/memcpy_ldp_str.S b/memcpy_ldp_str.S
index 0b51165..a61eddb 100644
--- a/memcpy_ldp_str.S
+++ b/memcpy_ldp_str.S
@@ -1,3 +1,6 @@
+.macro prfm1, Xn
+.endm
+
.macro ldp1, Xt1, Xt2, Xn, imm
ldp \Xt1, \Xt2, [\Xn], \imm
.endm
diff --git a/memcpy_ldp_sttr.S b/memcpy_ldp_sttr.S
index b517d7a..e346104 100644
--- a/memcpy_ldp_sttr.S
+++ b/memcpy_ldp_sttr.S
@@ -1,3 +1,6 @@
+.macro prfm1, Xn
+.endm
+
.macro ldp1, Xt1, Xt2, Xn, imm
ldp \Xt1, \Xt2, [\Xn], \imm
.endm
diff --git a/memcpy_ldr_stp.S b/memcpy_ldr_stp.S
index 78aed07..2c0081b 100644
--- a/memcpy_ldr_stp.S
+++ b/memcpy_ldr_stp.S
@@ -1,3 +1,6 @@
+.macro prfm1, Xn
+.endm
+
.macro ldp1, Xt1, Xt2, Xn, imm
ldr \Xt1, [\Xn]
ldr \Xt2, [\Xn, #8]
diff --git a/memcpy_ldtr_stp.S b/memcpy_ldtr_stp.S
index e7ea408..df8eb05 100644
--- a/memcpy_ldtr_stp.S
+++ b/memcpy_ldtr_stp.S
@@ -1,3 +1,6 @@
+.macro prfm1, Xn
+.endm
+
.macro ldp1, Xt1, Xt2, Xn, imm
ldtr \Xt1, [\Xn]
ldtr \Xt2, [\Xn, #8]
diff --git a/memcpy_prfm_2k_ldp_str.S b/memcpy_prfm_2k_ldp_str.S
new file mode 100644
index 0000000..6d4a52c
--- /dev/null
+++ b/memcpy_prfm_2k_ldp_str.S
@@ -0,0 +1,19 @@
+.macro prfm1, Xn
+ prfm pldl2keep, [\Xn, #0x800]
+ prfm pldl2keep, [\Xn, #0x840]
+.endm
+
+.macro ldp1, Xt1, Xt2, Xn, imm
+ ldp \Xt1, \Xt2, [\Xn], \imm
+.endm
+
+.macro stp1, Xt1, Xt2, Xn, imm
+ str \Xt1, [\Xn]
+ str \Xt2, [\Xn, #8]
+ add \Xn, \Xn, \imm
+.endm
+
+.globl memcpy_prfm_2k_ldp_str
+memcpy_prfm_2k_ldp_str:
+ #include "copy_template.h"
+ ret