aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJean-Philippe Brucker <jean-philippe.brucker@arm.com>2015-12-10 19:24:59 +0000
committerMark Rutland <mark.rutland@arm.com>2016-06-15 10:27:35 +0100
commit50eb940341fe22e2a3e36d9d8d06ad40b8b961f0 (patch)
tree6e30e03142b34331126aa307076309ce54558efe
parent5714dbb8b62ac25f336e5256dcc2fd8fe7c37b1b (diff)
downloadboot-wrapper-aarch64-50eb940341fe22e2a3e36d9d8d06ad40b8b961f0.tar.gz
Replace exclusive accesses with a bakery lock
The commit prepares the removal of the MMU identity map, which was only used for exclusive accesses (ldxr/stxr) in psci_cpu_on. Instead of relying on exclusives, we assume that when stage-1 translation is disabled, all EL3 memory accesses have Device type, with non-gathering and non-reordering attributes. This guarantees single-copy atomicity of aligned halfword accesses, as per the ARM ARM. We can thus switch to a less constrained (albeit bulkier) locking mechanism. This patch implements Lamport's bakery lock, which doesn't rely on atomic compare-and-swap primitives. For bisectability, we remove the call to switch_to_idmap here. Otherwise, the assertions made by the bakery lock code, regarding order of accesses, are invalid. The rest of the code will be removed in a subsequent patch. Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com> Signed-off-by: Mark Rutland <mark.rutland@arm.com>
-rw-r--r--Makefile.am2
-rw-r--r--arch/aarch64/include/asm/psci.h40
-rw-r--r--arch/aarch64/psci.S1
-rw-r--r--bakery_lock.c137
-rw-r--r--include/bakery_lock.h48
-rw-r--r--include/compiler.h1
-rw-r--r--include/cpu.h1
-rw-r--r--include/psci.h2
-rw-r--r--psci.c20
9 files changed, 204 insertions, 48 deletions
diff --git a/Makefile.am b/Makefile.am
index 70dedc2..fb42c45 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -92,7 +92,7 @@ CFLAGS += -Wall -fomit-frame-pointer
CFLAGS += -ffunction-sections -fdata-sections
LDFLAGS += --gc-sections
-OFILES += boot_common.o ns.o $(GIC) cache.o lib.o
+OFILES += boot_common.o bakery_lock.o ns.o $(GIC) cache.o lib.o
OFILES += $(addprefix $(ARCH_SRC),boot.o stack.o mmu.o $(BOOTMETHOD) utils.o)
all: $(IMAGE)
diff --git a/arch/aarch64/include/asm/psci.h b/arch/aarch64/include/asm/psci.h
deleted file mode 100644
index ddab76c..0000000
--- a/arch/aarch64/include/asm/psci.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * arch/aarch64/include/asm/psci.h
- *
- * Copyright (C) 2015 ARM Limited. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE.txt file.
- */
-#ifndef __ASM_AARCH64_PSCI_H
-#define __ASM_AARCH64_PSCI_H
-
-#ifndef __ASSEMBLY__
-
-static inline int psci_store_address(unsigned long address,
- unsigned long *branch_entry)
-{
- unsigned long ret;
-
- asm volatile (
- "1:\n"
- "ldxr %0, [%2]\n"
- "subs %0, %0, %3\n"
- "b.ne 2f\n"
- "stxr %w0, %1, [%2]\n"
- "cbnz %w0, 1b\n"
- "2:\n"
- : "=&r" (ret)
- : "r" (address), "r" (branch_entry), "J" (PSCI_ADDR_INVALID)
- : "cc");
-
- if (ret != 0)
- /* ret value comes from subs */
- return PSCI_RET_ALREADY_ON;
-
- return PSCI_RET_SUCCESS;
-}
-
-#endif /* !__ASSEMBLY__ */
-
-#endif
diff --git a/arch/aarch64/psci.S b/arch/aarch64/psci.S
index b8d07e5..5f7e2d7 100644
--- a/arch/aarch64/psci.S
+++ b/arch/aarch64/psci.S
@@ -102,7 +102,6 @@ smc_exit:
start_el3:
ldr x0, =vector
bl setup_vector
- bl switch_to_idmap
/* only boot the primary cpu (entry 0 in the table) */
cpuid x0, x1
diff --git a/bakery_lock.c b/bakery_lock.c
new file mode 100644
index 0000000..877118c
--- /dev/null
+++ b/bakery_lock.c
@@ -0,0 +1,137 @@
+/*
+ * bakery_lock.c - Lamport's bakery algorithm
+ *
+ * Copyright (C) 2015 ARM Limited. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE.txt file.
+ *
+ *
+ * Simplest implementation of Lamport's bakery lock [1]. Applies only to device
+ * memory with attributes non-gathering and non-reordering.
+ *
+ * This algorithm's strength resides in the fact that it doesn't rely on
+ * hardware synchronisation mechanisms and as such, doesn't require normal
+ * cacheable memory on ARMv8. CPUs write only to their own memory locations,
+ * and read from all other CPUs' ones, in order to decide whose turn it is to
+ * have the lock.
+ *
+ * The algorithm correctness is based on the following assumptions:
+ *
+ * 1) Accesses to choosing[k] (here tickets[k].choosing) are done atomically.
+ * In other words, simultaneous read and write to choosing[k] do not occur.
+ * In this implementation, it is guaranteed by single-copy atomicity, for
+ * accesses of type Device with non-gathering attributes. The algorithm
+ * doesn't require accesses to number[k] to be atomic, even though this
+ * implementation guarantees that as well.
+ *
+ * 2) Storage of number[k] allows it to become large enough for practical use of
+ * the lock. Indeed, if the lock is contended all of the time, the value of
+ * max(number[1..N]) will keep increasing, and this algorithm doesn't handle
+ * wrapping of the ticket number. In this implementation, we assume that we
+ * will never reach 32766 (0x7fff) overlapping calls to bakery_lock.
+ *
+ * [1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming Problem"
+ */
+
+#include <bakery_lock.h>
+#include <cpu.h>
+
+/*
+ * Return the result of (number_a, cpu_a) < (number_b, cpu_b)
+ */
+static unsigned int less_than(unsigned long cpu_a, unsigned long number_a,
+ unsigned long cpu_b, unsigned long number_b)
+{
+ if (number_a == number_b)
+ return cpu_a < cpu_b;
+
+ return number_a < number_b;
+}
+
+static unsigned int choose_number(bakery_ticket_t *tickets, unsigned self)
+{
+ int cpu;
+ unsigned int max_number = 0;
+ bakery_ticket_t ticket;
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ if (cpu == self)
+ continue;
+
+ ticket = read_ticket_once(tickets[cpu]);
+
+ if (max_number < ticket.number)
+ max_number = ticket.number;
+ }
+
+ return 1 + max_number;
+}
+
+/**
+ * Wait for our turn to enter a critical section
+ *
+ * @tickets: array of size NR_CPUS, indexed by logical IDs.
+ * @self: logical ID of the current CPU
+ *
+ * Note: since this implementation assumes that all loads and stores to tickets
+ * are of Device type with non-gathering and non-reordering attributes, we
+ * expect all of them to be performed, in program order. As a result, the
+ * following function is pretty relaxed in terms of barriers: we only
+ * synchronize before sev(), and introduce system-wide memory barriers around
+ * the critical section.
+ */
+void bakery_lock(bakery_ticket_t *tickets, unsigned self)
+{
+ int cpu, number_self;
+ bakery_ticket_t ticket;
+
+ /* Doorway */
+ write_ticket_once(tickets[self], 1, 0);
+ number_self = choose_number(tickets, self);
+ write_ticket_once(tickets[self], 0, number_self);
+
+ dsb(st);
+ sev();
+
+ /* Bakery */
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ uint16_t number_cpu;
+
+ if (cpu == self)
+ continue;
+
+ ticket = read_ticket_once(tickets[cpu]);
+ while (ticket.choosing) {
+ wfe();
+ ticket = read_ticket_once(tickets[cpu]);
+ }
+
+ number_cpu = ticket.number;
+
+ /*
+ * Wait until that CPU updates its ticket. We only need to do
+ * the comparison once, since any update to tickets[cpu].number
+ * will be to a value greater than ours, or zero.
+ */
+ if (number_cpu != 0 && less_than(cpu, number_cpu,
+ self, number_self)) {
+ do {
+ wfe();
+ ticket = read_ticket_once(tickets[cpu]);
+ } while (number_cpu == ticket.number);
+ }
+ }
+
+ dmb(sy);
+}
+
+void bakery_unlock(bakery_ticket_t *tickets, unsigned self)
+{
+ dmb(sy);
+
+ write_ticket_once(tickets[self], 0, 0);
+
+ dsb(st);
+ sev();
+}
diff --git a/include/bakery_lock.h b/include/bakery_lock.h
new file mode 100644
index 0000000..ebeb893
--- /dev/null
+++ b/include/bakery_lock.h
@@ -0,0 +1,48 @@
+/*
+ * include/bakery_lock.h
+ *
+ * Copyright (C) 2015 ARM Limited. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE.txt file.
+ */
+
+#ifndef __BAKERY_LOCK_H
+#define __BAKERY_LOCK_H
+
+#include <stdint.h>
+
+#include <compiler.h>
+
+/*
+ * We *must* access this structure with 16 or 8 bit accesses, aligned on 16-bit.
+ * Helpers read/write_ticket_once should be used for this.
+ */
+typedef union {
+ struct __packed {
+ uint16_t number : 15;
+ uint16_t choosing : 1;
+ };
+ uint16_t __val;
+} bakery_ticket_t;
+
+#define write_ticket_once(ticket, choosing_, number_) \
+({ \
+ bakery_ticket_t __t = { \
+ .number = (number_), \
+ .choosing = (choosing_), \
+ }; \
+ *(volatile uint16_t *)&(ticket).__val = __t.__val; \
+})
+
+#define read_ticket_once(ticket) \
+({ \
+ bakery_ticket_t __t; \
+ __t.__val = *(volatile uint16_t *)&(ticket).__val; \
+ __t; \
+})
+
+void bakery_lock(bakery_ticket_t *tickets, unsigned self);
+void bakery_unlock(bakery_ticket_t *tickets, unsigned self);
+
+#endif
diff --git a/include/compiler.h b/include/compiler.h
index f9ee2f3..9d7a92a 100644
--- a/include/compiler.h
+++ b/include/compiler.h
@@ -14,5 +14,6 @@
#define unreachable() __builtin_unreachable()
#define __noreturn __attribute__((noreturn))
+#define __packed __attribute__((packed))
#endif
diff --git a/include/cpu.h b/include/cpu.h
index b4dae94..704d6d8 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -16,6 +16,7 @@
#ifndef __ASSEMBLY__
#define isb() asm volatile ("isb\n" : : : "memory")
+#define dmb(arg) asm volatile ("dmb " #arg "\n" : : : "memory")
#define dsb(arg) asm volatile ("dsb " #arg "\n" : : : "memory")
#define sev() asm volatile ("sev\n" : : : "memory")
#define wfe() asm volatile ("wfe\n" : : : "memory")
diff --git a/include/psci.h b/include/psci.h
index a2caa59..21ca5ee 100644
--- a/include/psci.h
+++ b/include/psci.h
@@ -24,6 +24,4 @@
#define PSCI_ADDR_INVALID (-1)
-#include <asm/psci.h>
-
#endif
diff --git a/psci.c b/psci.c
index 6c47577..fad6f5d 100644
--- a/psci.c
+++ b/psci.c
@@ -9,6 +9,7 @@
#include <stdint.h>
+#include <bakery_lock.h>
#include <cpu.h>
#include <psci.h>
#include <spin.h>
@@ -19,18 +20,29 @@
static unsigned long branch_table[NR_CPUS];
+bakery_ticket_t branch_table_lock[NR_CPUS];
+
+static int psci_store_address(unsigned int cpu, unsigned long address)
+{
+ if (branch_table[cpu] != PSCI_ADDR_INVALID)
+ return PSCI_RET_ALREADY_ON;
+
+ branch_table[cpu] = address;
+ return PSCI_RET_SUCCESS;
+}
+
int psci_cpu_on(unsigned long target_mpidr, unsigned long address)
{
int ret;
unsigned int cpu = find_logical_id(target_mpidr);
+ unsigned int this_cpu = find_logical_id(read_mpidr());
if (cpu == MPIDR_INVALID)
return PSCI_RET_INVALID_PARAMETERS;
- ret = psci_store_address(address, branch_table + cpu);
-
- dsb(ishst);
- sev();
+ bakery_lock(branch_table_lock, this_cpu);
+ ret = psci_store_address(cpu, address);
+ bakery_unlock(branch_table_lock, this_cpu);
return ret;
}