diff options
author | Oliver Upton <oliver.upton@linux.dev> | 2023-11-29 10:31:37 +0000 |
---|---|---|
committer | Oliver Upton <oliver.upton@linux.dev> | 2023-11-29 10:31:49 +0000 |
commit | 60ad5c2833b090583eae724a6e1917f4efe0c702 (patch) | |
tree | 305ec91d928777f6f9f3e55785fa34377ca01052 | |
parent | e1b00f7a4adca5928ab3032e17743180a83bb4d0 (diff) | |
download | aarch64-memcpy-60ad5c2833b090583eae724a6e1917f4efe0c702.tar.gz |
prefetch
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | copy_template.h | 1 | ||||
-rw-r--r-- | main.c | 5 | ||||
-rw-r--r-- | memcpy.h | 1 | ||||
-rw-r--r-- | memcpy_ldp_stp.S | 3 | ||||
-rw-r--r-- | memcpy_ldp_str.S | 3 | ||||
-rw-r--r-- | memcpy_ldp_sttr.S | 3 | ||||
-rw-r--r-- | memcpy_ldr_stp.S | 3 | ||||
-rw-r--r-- | memcpy_ldtr_stp.S | 3 | ||||
-rw-r--r-- | memcpy_prfm_2k_ldp_str.S | 19 |
10 files changed, 42 insertions, 0 deletions
@@ -7,6 +7,7 @@ OBJS += memcpy_ldp_sttr.o OBJS += memcpy_ldp_stp.o OBJS += memcpy_ldr_stp.o OBJS += memcpy_ldtr_stp.o +OBJS += memcpy_prfm_2k_ldp_str.o memcpy: $(OBJS) $(CC) $(CFLAGS) $(OBJS) -o $@ diff --git a/copy_template.h b/copy_template.h index 3ed0124..3c15f25 100644 --- a/copy_template.h +++ b/copy_template.h @@ -18,6 +18,7 @@ D_h .req x10 sub count, count, #64 1: + prfm1 src stp1 A_l, A_h, dst, #16 ldp1 A_l, A_h, src, #16 stp1 B_l, B_h, dst, #16 @@ -20,6 +20,7 @@ enum mode { LDR_STP, LDTR_STP, LDP_STP, + PRFM_2K_LDP_STR, NR_MODES }; @@ -40,6 +41,7 @@ static void pr_modes(void) PR_MODE(LDR_STP); PR_MODE(LDTR_STP); PR_MODE(LDP_STP); + PR_MODE(PRFM_2K_LDP_STR); } static void pr_help(const char *progname) @@ -92,6 +94,9 @@ static void do_memcpy(void *dst, const void *src, size_t chunk_size) case LDP_STP: memcpy_ldp_stp(dst, src, chunk_size); break; + case PRFM_2K_LDP_STR: + memcpy_prfm_2k_ldp_str(dst, src, chunk_size); + break; default: assert(0); } @@ -8,5 +8,6 @@ void memcpy_ldp_sttr(void *dst, const void *src, size_t count); void memcpy_ldr_stp(void *dst, const void *src, size_t count); void memcpy_ldtr_stp(void *dst, const void *src, size_t count); void memcpy_ldp_stp(void *dst, const void *src, size_t count); +void memcpy_prfm_2k_ldp_str(void *dst, const void *src, size_t count); #endif /* __MEMCPY_H__ */ diff --git a/memcpy_ldp_stp.S b/memcpy_ldp_stp.S index a9851f5..87b676f 100644 --- a/memcpy_ldp_stp.S +++ b/memcpy_ldp_stp.S @@ -1,3 +1,6 @@ +.macro prfm1, Xn +.endm + .macro ldp1, Xt1, Xt2, Xn, imm ldp \Xt1, \Xt2, [\Xn], \imm .endm diff --git a/memcpy_ldp_str.S b/memcpy_ldp_str.S index 0b51165..a61eddb 100644 --- a/memcpy_ldp_str.S +++ b/memcpy_ldp_str.S @@ -1,3 +1,6 @@ +.macro prfm1, Xn +.endm + .macro ldp1, Xt1, Xt2, Xn, imm ldp \Xt1, \Xt2, [\Xn], \imm .endm diff --git a/memcpy_ldp_sttr.S b/memcpy_ldp_sttr.S index b517d7a..e346104 100644 --- a/memcpy_ldp_sttr.S +++ b/memcpy_ldp_sttr.S @@ -1,3 +1,6 @@ +.macro prfm1, Xn +.endm + .macro ldp1, Xt1, Xt2, Xn, imm ldp \Xt1, \Xt2, [\Xn], \imm .endm diff --git a/memcpy_ldr_stp.S b/memcpy_ldr_stp.S index 78aed07..2c0081b 100644 --- a/memcpy_ldr_stp.S +++ b/memcpy_ldr_stp.S @@ -1,3 +1,6 @@ +.macro prfm1, Xn +.endm + .macro ldp1, Xt1, Xt2, Xn, imm ldr \Xt1, [\Xn] ldr \Xt2, [\Xn, #8] diff --git a/memcpy_ldtr_stp.S b/memcpy_ldtr_stp.S index e7ea408..df8eb05 100644 --- a/memcpy_ldtr_stp.S +++ b/memcpy_ldtr_stp.S @@ -1,3 +1,6 @@ +.macro prfm1, Xn +.endm + .macro ldp1, Xt1, Xt2, Xn, imm ldtr \Xt1, [\Xn] ldtr \Xt2, [\Xn, #8] diff --git a/memcpy_prfm_2k_ldp_str.S b/memcpy_prfm_2k_ldp_str.S new file mode 100644 index 0000000..6d4a52c --- /dev/null +++ b/memcpy_prfm_2k_ldp_str.S @@ -0,0 +1,19 @@ +.macro prfm1, Xn + prfm pldl2keep, [\Xn, #0x800] + prfm pldl2keep, [\Xn, #0x840] +.endm + +.macro ldp1, Xt1, Xt2, Xn, imm + ldp \Xt1, \Xt2, [\Xn], \imm +.endm + +.macro stp1, Xt1, Xt2, Xn, imm + str \Xt1, [\Xn] + str \Xt2, [\Xn, #8] + add \Xn, \Xn, \imm +.endm + +.globl memcpy_prfm_2k_ldp_str +memcpy_prfm_2k_ldp_str: + #include "copy_template.h" + ret |