aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Rutland <mark.rutland@arm.com>2022-08-23 10:47:00 +0100
committerMark Rutland <mark.rutland@arm.com>2024-01-22 13:36:32 +0000
commitb0b5e23fe88a1e89631cc59ff11ae553d720489a (patch)
tree8eb74abf95b2348692ae61c06afd6bd7edde7edb
parent6613476e225e090cc9aad49be7fa504e290dd33d (diff)
downloadlinux-arm64/alternatives/info.tar.gz
HACK: arm64: alternatives: dump summary of alternativesarm64/alternatives/info
NOTE: THIS PATCH IS NOT INTENDED FOR UPSTREAM. To figure out whether it's worth making further changes to alternatives (e.g. whether it's worth replacing regular entries with callbacks), it would be useful to know the makeup of alternatives in a given kernel Image or module. This patch makes the alternatives code dump a summary of kernel alternatives at boot time, and module alternatives at module load time. For example, a defconfig v6.0-rc3+ kernel build with GCC 12.1.0 looks reports: | alternatives: Alternatives summary: | entries: 28000 (336000 bytes) | standard: 17280 | callback: 10720 | instructions: 32052 (128208 bytes) | replacements: 20962 ( 83848 bytes) | alternatives: cpucap 1 => entries: 925, orig: 1295, repl: 0, cb: 925 | alternatives: cpucap 2 => entries: 10, orig: 10, repl: 10, cb: 0 | alternatives: cpucap 4 => entries: 2, orig: 2, repl: 2, cb: 0 | alternatives: cpucap 5 => entries: 49, orig: 142, repl: 142, cb: 0 | alternatives: cpucap 10 => entries: 36, orig: 36, repl: 36, cb: 0 | alternatives: cpucap 11 => entries: 9, orig: 12, repl: 12, cb: 0 | alternatives: cpucap 12 => entries: 3, orig: 6, repl: 6, cb: 0 | alternatives: cpucap 13 => entries: 17, orig: 17, repl: 17, cb: 0 | alternatives: cpucap 14 => entries: 3, orig: 3, repl: 3, cb: 0 | alternatives: cpucap 16 => entries: 1, orig: 1, repl: 1, cb: 0 | alternatives: cpucap 18 => entries: 7, orig: 13, repl: 13, cb: 0 | alternatives: cpucap 19 => entries: 2, orig: 2, repl: 2, cb: 0 | alternatives: cpucap 20 => entries: 17, orig: 17, repl: 17, cb: 0 | alternatives: cpucap 24 => entries: 1128, orig: 1128, repl: 1128, cb: 0 | alternatives: cpucap 26 => entries: 10780, orig: 13953, repl: 4158, cb: 9795 | alternatives: cpucap 27 => entries: 39, orig: 39, repl: 39, cb: 0 | alternatives: cpucap 28 => entries: 4, orig: 8, repl: 8, cb: 0 | alternatives: cpucap 29 => entries: 15, orig: 15, repl: 15, cb: 0 | alternatives: cpucap 30 => entries: 15, orig: 27, repl: 27, cb: 0 | alternatives: cpucap 31 => entries: 3, orig: 3, repl: 3, cb: 0 | alternatives: cpucap 32 => entries: 59, orig: 118, repl: 118, cb: 0 | alternatives: cpucap 33 => entries: 6, orig: 6, repl: 6, cb: 0 | alternatives: cpucap 36 => entries: 20, orig: 20, repl: 20, cb: 0 | alternatives: cpucap 37 => entries: 2727, orig: 2727, repl: 2727, cb: 0 | alternatives: cpucap 38 => entries: 3, orig: 3, repl: 3, cb: 0 | alternatives: cpucap 40 => entries: 25, orig: 29, repl: 29, cb: 0 | alternatives: cpucap 41 => entries: 11, orig: 21, repl: 21, cb: 0 | alternatives: cpucap 42 => entries: 142, orig: 152, repl: 152, cb: 0 | alternatives: cpucap 44 => entries: 63, orig: 63, repl: 63, cb: 0 | alternatives: cpucap 45 => entries: 4, orig: 4, repl: 4, cb: 0 | alternatives: cpucap 46 => entries: 5, orig: 5, repl: 5, cb: 0 | alternatives: cpucap 47 => entries: 2, orig: 2, repl: 2, cb: 0 | alternatives: cpucap 50 => entries: 3, orig: 3, repl: 3, cb: 0 | alternatives: cpucap 51 => entries: 105, orig: 105, repl: 105, cb: 0 | alternatives: cpucap 52 => entries: 57, orig: 59, repl: 59, cb: 0 | alternatives: cpucap 53 => entries: 3, orig: 3, repl: 3, cb: 0 | alternatives: cpucap 54 => entries: 5, orig: 5, repl: 5, cb: 0 | alternatives: cpucap 55 => entries: 1, orig: 1, repl: 1, cb: 0 | alternatives: cpucap 59 => entries: 28, orig: 28, repl: 28, cb: 0 | alternatives: cpucap 60 => entries: 2, orig: 2, repl: 2, cb: 0 | alternatives: cpucap 61 => entries: 1, orig: 1, repl: 1, cb: 0 | alternatives: cpucap 65 => entries: 2, orig: 2, repl: 2, cb: 0 | alternatives: cpucap 68 => entries: 1, orig: 1, repl: 1, cb: 0 | alternatives: cpucap 70 => entries: 1, orig: 1, repl: 1, cb: 0 | alternatives: cpucap 71 => entries: 1, orig: 3, repl: 3, cb: 0 | alternatives: cpucap 72 => entries: 1, orig: 1, repl: 1, cb: 0 | alternatives: cpucap 73 => entries: 32, orig: 32, repl: 32, cb: 0 | alternatives: cpucap 74 => entries: 4, orig: 4, repl: 4, cb: 0 | alternatives: cpucap 75 => entries: 5, orig: 5, repl: 5, cb: 0 | alternatives: cpucap 76 => entries: 11391, orig: 11391, repl: 11391, cb: 0 | alternatives: cpucap 77 => entries: 1, orig: 1, repl: 1, cb: 0 | alternatives: cpucap 78 => entries: 64, orig: 224, repl: 224, cb: 0 | alternatives: cpucap 79 => entries: 141, orig: 282, repl: 282, cb: 0 | alternatives: cpucap 80 => entries: 19, orig: 19, repl: 19, cb: 0 From this, it's worth noting: * cpucap 1 is ARM64_ALWAYS_SYSTEM. * cpucap 24 is ARM64_HAS_IRQ_PRIO_MASKING. Due to the existing structure of the alternatives, alternative entries are created for the irqflags.h code even when CONFIG_ARM64_PSEUDO_NMI=n, creating ~14KiB of alt_instr entries, and ~4KiB of replacement instructions. This could be avoided by reworking the irqflags.h code to use the new alternative_has_feature_*() helpers. * cpucap 26 is ARM64_HAS_LSE_ATOMICS, and most entries are using the shared NOP patcher. The other entries are for inline cmpxchg sequences. * cpucap 37 is ARM64_HAS_VIRT_HOST_EXTN. * cpucap 76 is ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE, which could be rewritten to use a callback to patch LDR to LDAR (or vice-versa), were the insn framework extended, to save ~44KiB of replacement instructions. NOTE: THIS PATCH IS NOT INTENDED FOR UPSTREAM. Signed-off-by: Mark Rutland <mark.rutland@arm.com>
-rw-r--r--arch/arm64/kernel/alternative.c67
1 files changed, 65 insertions, 2 deletions
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 8ff6610af49664..432bc0a89a527f 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -139,15 +139,74 @@ static noinstr void clean_dcache_range_nopatch(u64 start, u64 end)
} while (cur += d_size, cur < end);
}
+#define for_each_region_alt(region, alt) \
+ for (struct alt_instr *alt = (region)->begin; \
+ (alt) < (region)->end; \
+ (alt)++)
+
+static void summarize_alternatives(const struct alt_region *region)
+{
+ unsigned int entries[ARM64_NCAPS] = { 0 };
+ unsigned int orig_len[ARM64_NCAPS] = { 0 };
+ unsigned int repl_len[ARM64_NCAPS] = { 0 };
+ unsigned int callbacks[ARM64_NCAPS] = { 0 };
+
+ unsigned int total_entries = 0;
+ unsigned int total_orig = 0;
+ unsigned int total_repl = 0;
+ unsigned int total_callbacks = 0;
+
+ for_each_region_alt(region, alt) {
+ int cap = ALT_CAP(alt);
+
+ entries[cap]++;
+ total_entries++;
+
+ orig_len[cap] += alt->orig_len;
+ total_orig += alt->orig_len;
+
+ repl_len[cap] += alt->alt_len;
+ total_repl += alt->alt_len;
+
+ if (ALT_HAS_CB(alt)) {
+ callbacks[cap]++;
+ total_callbacks++;
+ }
+ }
+
+ pr_info("Alternatives summary:\n"
+ " entries: %6u (%6zu bytes)\n"
+ " standard: %6d\n"
+ " callback: %6d\n"
+ " instructions: %6u (%6u bytes)\n"
+ " replacements: %6u (%6u bytes)\n",
+ total_entries, total_entries * sizeof (struct alt_instr),
+ total_entries - total_callbacks,
+ total_callbacks,
+ total_orig / AARCH64_INSN_SIZE, total_orig,
+ total_repl / AARCH64_INSN_SIZE, total_repl);
+
+ for (int i = 0; i < ARM64_NCAPS; i++) {
+ if (!entries[i])
+ continue;
+
+ pr_info("cpucap %2d => entries: %5d, orig: %5d, repl: %5d, cb: %5d\n",
+ i,
+ entries[i],
+ orig_len[i] / AARCH64_INSN_SIZE,
+ repl_len[i] / AARCH64_INSN_SIZE,
+ callbacks[i]);
+ }
+}
+
static void __apply_alternatives(const struct alt_region *region,
bool is_module,
unsigned long *cpucap_mask)
{
- struct alt_instr *alt;
__le32 *origptr, *updptr;
alternative_cb_t alt_cb;
- for (alt = region->begin; alt < region->end; alt++) {
+ for_each_region_alt(region, alt) {
int nr_inst;
int cap = ALT_CAP(alt);
@@ -270,6 +329,8 @@ void __init apply_boot_alternatives(void)
/* If called on non-boot cpu things could go wrong */
WARN_ON(smp_processor_id() != 0);
+ summarize_alternatives(&kernel_alternatives);
+
pr_info("applying boot alternatives\n");
__apply_alternatives(&kernel_alternatives, false,
@@ -287,6 +348,8 @@ void apply_alternatives_module(void *start, size_t length)
bitmap_fill(all_capabilities, ARM64_NCAPS);
+ summarize_alternatives(&region);
+
__apply_alternatives(&region, true, &all_capabilities[0]);
}
#endif