aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2017-10-16 10:13:48 +0100
committerWill Deacon <will.deacon@arm.com>2017-10-16 10:13:48 +0100
commit658230d9740e71b66a065bc963a39c8633e12b4c (patch)
treed91cee58069cf70069dadbba8fb5e05a8174bccd
downloadqrwlock-rmem-658230d9740e71b66a065bc963a39c8633e12b4c.tar.gz
First steps towards userspace qrwlock
Signed-off-by: Will Deacon <will.deacon@arm.com>
-rw-r--r--Makefile12
-rw-r--r--kernel.h281
-rw-r--r--main.c12
-rw-r--r--qrwlock.c84
-rw-r--r--qrwlock.h134
5 files changed, 523 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..2c2a6d1
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,12 @@
+CC=aarch64-linux-gnu-gcc
+CFLAGS=-Wall -O2 -fno-strict-aliasing
+TARGET=qrwlock
+OBJS= main.o qrwlock.o
+
+$(TARGET) : $(OBJS)
+all: $(TARGET)
+
+clean:
+ rm -f $(TARGET) $(OBJS)
+
+.PHONY: clean
diff --git a/kernel.h b/kernel.h
new file mode 100644
index 0000000..8c061e3
--- /dev/null
+++ b/kernel.h
@@ -0,0 +1,281 @@
+/*
+ * Kernel internals needed by qrwlock for standalone implementation.
+ */
+
+#ifndef __KERNEL_H
+#define __KERNEL_H
+
+#include <stdlib.h> /* abort() */
+
+/* Compiler bits */
+#define __branch_check__(x, expect, is_constant) ({ \
+ int ______r; \
+ ______r = __builtin_expect(!!(x), expect); \
+ ______r; \
+ })
+
+#define likely(x) (__branch_check__(x, 1, __builtin_constant_p(x)))
+#define unlikely(x) (__branch_check__(x, 0, __builtin_constant_p(x)))
+#define __force
+#define __aligned(x) __attribute__((aligned(x)))
+
+/* Type definitions */
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+typedef struct {
+ int counter;
+} atomic_t;
+
+#define ATOMIC_INIT(i) { (i) }
+
+typedef struct {
+ u16 owner;
+ u16 next;
+} __aligned(4) arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { 0 , 0 }
+
+typedef struct qrwlock {
+ union {
+ atomic_t cnts;
+ struct {
+ u8 wlocked; /* Locked for write? */
+ u8 __lstate[3];
+ };
+ };
+ arch_spinlock_t wait_lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED { \
+ { .cnts = ATOMIC_INIT(0), }, \
+ .wait_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
+}
+
+/*
+ * TODO: We should do something more interesting here, since this affects
+ * the reader slowpath.
+ */
+#define in_interrupt() 0
+
+/* READ_ONCE */
+#define __READ_ONCE_SIZE \
+({ \
+ switch (size) { \
+ case 1: *(u8 *)res = *(volatile u8 *)p; break; \
+ case 2: *(u16 *)res = *(volatile u16 *)p; break; \
+ case 4: *(u32 *)res = *(volatile u32 *)p; break; \
+ case 8: *(u64 *)res = *(volatile u64 *)p; break; \
+ default: \
+ abort(); /* hack for userspace */ \
+ } \
+})
+
+static __always_inline
+void __read_once_size(const volatile void *p, void *res, int size)
+{
+ __READ_ONCE_SIZE;
+}
+
+#define __READ_ONCE(x) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u; \
+ __read_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+#define READ_ONCE(x) __READ_ONCE(x)
+
+/* cmpxchg */
+#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl) \
+static inline unsigned long \
+ __cmpxchg_case_##name(volatile void *ptr, \
+ unsigned long old, \
+ unsigned long new) \
+{ \
+ unsigned long tmp, oldval; \
+ \
+ asm volatile( \
+ " prfm pstl1strm, %[v]\n" \
+ "1: ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n" \
+ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
+ " cbnz %" #w "[tmp], 2f\n" \
+ " st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \
+ " cbnz %w[tmp], 1b\n" \
+ " " #mb "\n" \
+ "2:" \
+ : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \
+ [v] "+Q" (*(unsigned long *)ptr) \
+ : [old] "Lr" (old), [new] "r" (new) \
+ : cl); \
+ \
+ return oldval; \
+} \
+
+__CMPXCHG_CASE(w, b, 1, , , , )
+__CMPXCHG_CASE(w, h, 2, , , , )
+__CMPXCHG_CASE(w, , 4, , , , )
+__CMPXCHG_CASE( , , 8, , , , )
+__CMPXCHG_CASE(w, b, acq_1, , a, , "memory")
+__CMPXCHG_CASE(w, h, acq_2, , a, , "memory")
+__CMPXCHG_CASE(w, , acq_4, , a, , "memory")
+__CMPXCHG_CASE( , , acq_8, , a, , "memory")
+
+#define __CMPXCHG_GEN(sfx) \
+static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
+ unsigned long old, \
+ unsigned long new, \
+ int size) \
+{ \
+ switch (size) { \
+ case 1: \
+ return __cmpxchg_case##sfx##_1(ptr, (u8)old, new); \
+ case 2: \
+ return __cmpxchg_case##sfx##_2(ptr, (u16)old, new); \
+ case 4: \
+ return __cmpxchg_case##sfx##_4(ptr, old, new); \
+ case 8: \
+ return __cmpxchg_case##sfx##_8(ptr, old, new); \
+ default: \
+ abort(); /* hack for userspace */ \
+ } \
+}
+
+__CMPXCHG_GEN()
+__CMPXCHG_GEN(_acq)
+
+#define __cmpxchg_wrapper(sfx, ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __cmpxchg##sfx((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))); \
+ __ret; \
+})
+
+#define cmpxchg_relaxed(...) __cmpxchg_wrapper( , __VA_ARGS__)
+#define cmpxchg_acquire(...) __cmpxchg_wrapper(_acq, __VA_ARGS__)
+
+/* load-acquire/store-release */
+#define __smp_store_release(p, v) \
+do { \
+ union { typeof(*p) __val; char __c[1]; } __u = \
+ { .__val = (__force typeof(*p)) (v) }; \
+ switch (sizeof(*p)) { \
+ case 1: \
+ asm volatile ("stlrb %w1, %0" \
+ : "=Q" (*p) \
+ : "r" (*(u8 *)__u.__c) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm volatile ("stlrh %w1, %0" \
+ : "=Q" (*p) \
+ : "r" (*(u16 *)__u.__c) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm volatile ("stlr %w1, %0" \
+ : "=Q" (*p) \
+ : "r" (*(u32 *)__u.__c) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm volatile ("stlr %1, %0" \
+ : "=Q" (*p) \
+ : "r" (*(u64 *)__u.__c) \
+ : "memory"); \
+ break; \
+ } \
+} while (0)
+#define smp_store_release(p, v) __smp_store_release(p, v)
+
+#define __smp_load_acquire(p) \
+({ \
+ union { typeof(*p) __val; char __c[1]; } __u; \
+ switch (sizeof(*p)) { \
+ case 1: \
+ asm volatile ("ldarb %w0, %1" \
+ : "=r" (*(u8 *)__u.__c) \
+ : "Q" (*p) : "memory"); \
+ break; \
+ case 2: \
+ asm volatile ("ldarh %w0, %1" \
+ : "=r" (*(u16 *)__u.__c) \
+ : "Q" (*p) : "memory"); \
+ break; \
+ case 4: \
+ asm volatile ("ldar %w0, %1" \
+ : "=r" (*(u32 *)__u.__c) \
+ : "Q" (*p) : "memory"); \
+ break; \
+ case 8: \
+ asm volatile ("ldar %0, %1" \
+ : "=r" (*(u64 *)__u.__c) \
+ : "Q" (*p) : "memory"); \
+ break; \
+ } \
+ __u.__val; \
+})
+#define smp_load_acquire(p) __smp_load_acquire(p)
+
+/* smp_cond_load_acquire */
+#define smp_cond_load_acquire(ptr, cond_expr) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = smp_load_acquire(__PTR); \
+ if (cond_expr) \
+ break; \
+/* TODO __cmpwait_relaxed(__PTR, VAL); */ \
+ } \
+ VAL; \
+})
+
+/* Atomics */
+#define atomic_read(v) READ_ONCE((v)->counter)
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+ /* TODO */
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+ /* TODO */
+}
+
+static inline int atomic_add_return_acquire(int i, atomic_t *v)
+{
+ /* TODO */
+ return i;
+}
+
+static inline int atomic_sub_return_release(int i, atomic_t *v)
+{
+ /* TODO */
+ return i;
+}
+
+#define atomic_cmpxchg_relaxed(v, old, new) \
+ cmpxchg_relaxed(&((v)->counter), (old), (new))
+#define atomic_cmpxchg_acquire(v, old, new) \
+ cmpxchg_acquire(&((v)->counter), (old), (new))
+
+#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
+
+/* Ticket spinlock */
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+ /* TODO */
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+ /* TODO */
+}
+
+#endif /* __KERNEL_H */
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..e71c166
--- /dev/null
+++ b/main.c
@@ -0,0 +1,12 @@
+#include "qrwlock.h"
+
+struct qrwlock lock;
+
+int main(void)
+{
+ arch_read_lock(&lock);
+ arch_read_unlock(&lock);
+ arch_write_lock(&lock);
+ arch_write_unlock(&lock);
+ return 0;
+}
diff --git a/qrwlock.c b/qrwlock.c
new file mode 100644
index 0000000..098c1f1
--- /dev/null
+++ b/qrwlock.c
@@ -0,0 +1,84 @@
+/*
+ * Queued read/write locks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ */
+#include "qrwlock.h"
+
+/**
+ * queued_read_lock_slowpath - acquire read lock of a queue rwlock
+ * @lock: Pointer to queue rwlock structure
+ */
+void queued_read_lock_slowpath(struct qrwlock *lock)
+{
+ /*
+ * Readers come here when they cannot get the lock without waiting
+ */
+ if (unlikely(in_interrupt())) {
+ /*
+ * Readers in interrupt context will get the lock immediately
+ * if the writer is just waiting (not holding the lock yet),
+ * so spin with ACQUIRE semantics until the lock is available
+ * without waiting in the queue.
+ */
+ atomic_cond_read_acquire(&lock->cnts, !(VAL & _QW_LOCKED));
+ return;
+ }
+ atomic_sub(_QR_BIAS, &lock->cnts);
+
+ /*
+ * Put the reader into the wait queue
+ */
+ arch_spin_lock(&lock->wait_lock);
+ atomic_add(_QR_BIAS, &lock->cnts);
+
+ /*
+ * The ACQUIRE semantics of the following spinning code ensure
+ * that accesses can't leak upwards out of our subsequent critical
+ * section in the case that the lock is currently held for write.
+ */
+ atomic_cond_read_acquire(&lock->cnts, !(VAL & _QW_LOCKED));
+
+ /*
+ * Signal the next one in queue to become queue head
+ */
+ arch_spin_unlock(&lock->wait_lock);
+}
+
+/**
+ * queued_write_lock_slowpath - acquire write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+void queued_write_lock_slowpath(struct qrwlock *lock)
+{
+ /* Put the writer into the wait queue */
+ arch_spin_lock(&lock->wait_lock);
+
+ /* Try to acquire the lock directly if no reader is present */
+ if (!atomic_read(&lock->cnts) &&
+ (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0))
+ goto unlock;
+
+ /* Set the waiting flag to notify readers that a writer is pending */
+ atomic_add(_QW_WAITING, &lock->cnts);
+
+ /* When no more readers or writers, set the locked flag */
+ do {
+ atomic_cond_read_acquire(&lock->cnts, VAL == _QW_WAITING);
+ } while (atomic_cmpxchg_relaxed(&lock->cnts, _QW_WAITING,
+ _QW_LOCKED) != _QW_WAITING);
+unlock:
+ arch_spin_unlock(&lock->wait_lock);
+}
diff --git a/qrwlock.h b/qrwlock.h
new file mode 100644
index 0000000..cda43be
--- /dev/null
+++ b/qrwlock.h
@@ -0,0 +1,134 @@
+/*
+ * Queue read/write lock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ */
+#ifndef __ASM_GENERIC_QRWLOCK_H
+#define __ASM_GENERIC_QRWLOCK_H
+
+#include "kernel.h"
+
+/*
+ * Writer states & reader shift and bias.
+ */
+#define _QW_WAITING 0x100 /* A writer is waiting */
+#define _QW_LOCKED 0x0ff /* A writer holds the lock */
+#define _QW_WMASK 0x1ff /* Writer mask */
+#define _QR_SHIFT 9 /* Reader count shift */
+#define _QR_BIAS (1U << _QR_SHIFT)
+
+/*
+ * External function declarations
+ */
+extern void queued_read_lock_slowpath(struct qrwlock *lock);
+extern void queued_write_lock_slowpath(struct qrwlock *lock);
+
+/**
+ * queued_read_trylock - try to acquire read lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static inline int queued_read_trylock(struct qrwlock *lock)
+{
+ u32 cnts;
+
+ cnts = atomic_read(&lock->cnts);
+ if (likely(!(cnts & _QW_WMASK))) {
+ cnts = (u32)atomic_add_return_acquire(_QR_BIAS, &lock->cnts);
+ if (likely(!(cnts & _QW_WMASK)))
+ return 1;
+ atomic_sub(_QR_BIAS, &lock->cnts);
+ }
+ return 0;
+}
+
+/**
+ * queued_write_trylock - try to acquire write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static inline int queued_write_trylock(struct qrwlock *lock)
+{
+ u32 cnts;
+
+ cnts = atomic_read(&lock->cnts);
+ if (unlikely(cnts))
+ return 0;
+
+ return likely(atomic_cmpxchg_acquire(&lock->cnts,
+ cnts, cnts | _QW_LOCKED) == cnts);
+}
+/**
+ * queued_read_lock - acquire read lock of a queue rwlock
+ * @lock: Pointer to queue rwlock structure
+ */
+static inline void queued_read_lock(struct qrwlock *lock)
+{
+ u32 cnts;
+
+ cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts);
+ if (likely(!(cnts & _QW_WMASK)))
+ return;
+
+ /* The slowpath will decrement the reader count, if necessary. */
+ queued_read_lock_slowpath(lock);
+}
+
+/**
+ * queued_write_lock - acquire write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+static inline void queued_write_lock(struct qrwlock *lock)
+{
+ /* Optimize for the unfair lock case where the fair flag is 0. */
+ if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
+ return;
+
+ queued_write_lock_slowpath(lock);
+}
+
+/**
+ * queued_read_unlock - release read lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+static inline void queued_read_unlock(struct qrwlock *lock)
+{
+ /*
+ * Atomically decrement the reader count
+ */
+ (void)atomic_sub_return_release(_QR_BIAS, &lock->cnts);
+}
+
+/**
+ * queued_write_unlock - release write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+static inline void queued_write_unlock(struct qrwlock *lock)
+{
+ smp_store_release(&lock->wlocked, 0);
+}
+
+/*
+ * Remapping rwlock architecture specific functions to the corresponding
+ * queue rwlock functions.
+ */
+#define arch_read_lock(l) queued_read_lock(l)
+#define arch_write_lock(l) queued_write_lock(l)
+#define arch_read_trylock(l) queued_read_trylock(l)
+#define arch_write_trylock(l) queued_write_trylock(l)
+#define arch_read_unlock(l) queued_read_unlock(l)
+#define arch_write_unlock(l) queued_write_unlock(l)
+
+#endif /* __ASM_GENERIC_QRWLOCK_H */