aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-01-08 18:19:44 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-01-08 18:19:44 -0800
commit6cbf5b3105f31217053570e7ca722b739a9242a4 (patch)
tree6e2cdd781c14ae73e6afe5034a2ae1d131781a69
parentf0a78b3e2a0c842cc7b4c2686f4a35681f02ca72 (diff)
parent2b9d9e0a9ba0e24cb9c78336481f0ed8b2bc1ff2 (diff)
downloadmisc-6cbf5b3105f31217053570e7ca722b739a9242a4.tar.gz
Merge tag 'locking-core-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molar: "Lock guards: - Use lock guards in the ptrace code - Introduce conditional guards to extend to conditional lock primitives like mutex_trylock()/mutex_lock_interruptible()/etc. lockdep: - Optimize 'struct lock_class' to be smaller - Update file patterns in MAINTAINERS mutexes: - Document mutex lifetime rules a bit more" * tag 'locking-core-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: locking/mutex: Clarify that mutex_unlock(), and most other sleeping locks, can still use the lock object after it's unlocked locking/mutex: Document that mutex_unlock() is non-atomic ptrace: Convert ptrace_attach() to use lock guards locking/lockdep: Slightly reorder 'struct lock_class' to save some memory MAINTAINERS: Add include/linux/lockdep*.h cleanup: Add conditional guard support
-rw-r--r--Documentation/locking/mutex-design.rst18
-rw-r--r--MAINTAINERS2
-rw-r--r--include/linux/cleanup.h52
-rw-r--r--include/linux/lockdep_types.h2
-rw-r--r--include/linux/mutex.h3
-rw-r--r--include/linux/rwsem.h8
-rw-r--r--include/linux/sched/task.h2
-rw-r--r--include/linux/spinlock.h41
-rw-r--r--kernel/locking/mutex.c5
-rw-r--r--kernel/ptrace.c128
10 files changed, 184 insertions, 77 deletions
diff --git a/Documentation/locking/mutex-design.rst b/Documentation/locking/mutex-design.rst
index 78540cd7f54b2..7c30b4aa5e28f 100644
--- a/Documentation/locking/mutex-design.rst
+++ b/Documentation/locking/mutex-design.rst
@@ -101,6 +101,24 @@ features that make lock debugging easier and faster:
- Detects multi-task circular deadlocks and prints out all affected
locks and tasks (and only those tasks).
+Mutexes - and most other sleeping locks like rwsems - do not provide an
+implicit reference for the memory they occupy, which reference is released
+with mutex_unlock().
+
+[ This is in contrast with spin_unlock() [or completion_done()], which
+ APIs can be used to guarantee that the memory is not touched by the
+ lock implementation after spin_unlock()/completion_done() releases
+ the lock. ]
+
+mutex_unlock() may access the mutex structure even after it has internally
+released the lock already - so it's not safe for another context to
+acquire the mutex and assume that the mutex_unlock() context is not using
+the structure anymore.
+
+The mutex user must ensure that the mutex is not destroyed while a
+release operation is still in progress - in other words, callers of
+mutex_unlock() must ensure that the mutex stays alive until mutex_unlock()
+has returned.
Interfaces
----------
diff --git a/MAINTAINERS b/MAINTAINERS
index 5ef3bad7def99..f71c525405e02 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12424,7 +12424,7 @@ S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
F: Documentation/locking/
F: arch/*/include/asm/spinlock*.h
-F: include/linux/lockdep.h
+F: include/linux/lockdep*.h
F: include/linux/mutex*.h
F: include/linux/rwlock*.h
F: include/linux/rwsem*.h
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index 9f1a9c455b684..c2d09bc4f9768 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -125,25 +125,55 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
* trivial wrapper around DEFINE_CLASS() above specifically
* for locks.
*
+ * DEFINE_GUARD_COND(name, ext, condlock)
+ * wrapper around EXTEND_CLASS above to add conditional lock
+ * variants to a base class, eg. mutex_trylock() or
+ * mutex_lock_interruptible().
+ *
* guard(name):
- * an anonymous instance of the (guard) class
+ * an anonymous instance of the (guard) class, not recommended for
+ * conditional locks.
*
* scoped_guard (name, args...) { }:
* similar to CLASS(name, scope)(args), except the variable (with the
* explicit name 'scope') is declard in a for-loop such that its scope is
* bound to the next (compound) statement.
*
+ * for conditional locks the loop body is skipped when the lock is not
+ * acquired.
+ *
+ * scoped_cond_guard (name, fail, args...) { }:
+ * similar to scoped_guard(), except it does fail when the lock
+ * acquire fails.
+ *
*/
#define DEFINE_GUARD(_name, _type, _lock, _unlock) \
- DEFINE_CLASS(_name, _type, _unlock, ({ _lock; _T; }), _type _T)
+ DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \
+ static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
+ { return *_T; }
+
+#define DEFINE_GUARD_COND(_name, _ext, _condlock) \
+ EXTEND_CLASS(_name, _ext, \
+ ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \
+ class_##_name##_t _T) \
+ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \
+ { return class_##_name##_lock_ptr(_T); }
#define guard(_name) \
CLASS(_name, __UNIQUE_ID(guard))
+#define __guard_ptr(_name) class_##_name##_lock_ptr
+
#define scoped_guard(_name, args...) \
for (CLASS(_name, scope)(args), \
- *done = NULL; !done; done = (void *)1)
+ *done = NULL; __guard_ptr(_name)(&scope) && !done; done = (void *)1)
+
+#define scoped_cond_guard(_name, _fail, args...) \
+ for (CLASS(_name, scope)(args), \
+ *done = NULL; !done; done = (void *)1) \
+ if (!__guard_ptr(_name)(&scope)) _fail; \
+ else
/*
* Additional helper macros for generating lock guards with types, either for
@@ -152,6 +182,7 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
*
* DEFINE_LOCK_GUARD_0(name, lock, unlock, ...)
* DEFINE_LOCK_GUARD_1(name, type, lock, unlock, ...)
+ * DEFINE_LOCK_GUARD_1_COND(name, ext, condlock)
*
* will result in the following type:
*
@@ -173,6 +204,11 @@ typedef struct { \
static inline void class_##_name##_destructor(class_##_name##_t *_T) \
{ \
if (_T->lock) { _unlock; } \
+} \
+ \
+static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \
+{ \
+ return _T->lock; \
}
@@ -201,4 +237,14 @@ __DEFINE_LOCK_GUARD_1(_name, _type, _lock)
__DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \
__DEFINE_LOCK_GUARD_0(_name, _lock)
+#define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \
+ EXTEND_CLASS(_name, _ext, \
+ ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\
+ if (_T->lock && !(_condlock)) _T->lock = NULL; \
+ _t; }), \
+ typeof_member(class_##_name##_t, lock) l) \
+ static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \
+ { return class_##_name##_lock_ptr(_T); }
+
+
#endif /* __LINUX_GUARDS_H */
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index 2ebc323d345ae..857d785e89e6a 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -127,12 +127,12 @@ struct lock_class {
unsigned long usage_mask;
const struct lock_trace *usage_traces[LOCK_TRACE_STATES];
+ const char *name;
/*
* Generation counter, when doing certain classes of graph walking,
* to ensure that we check one node only once:
*/
int name_version;
- const char *name;
u8 wait_type_inner;
u8 wait_type_outer;
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index a33aa9eb9fc3b..95d11308f995d 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -221,6 +221,7 @@ extern void mutex_unlock(struct mutex *lock);
extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T))
-DEFINE_FREE(mutex, struct mutex *, if (_T) mutex_unlock(_T))
+DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T))
+DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0)
#endif /* __LINUX_MUTEX_H */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 1dd530ce8b45b..9c29689ff505e 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -203,11 +203,11 @@ extern void up_read(struct rw_semaphore *sem);
extern void up_write(struct rw_semaphore *sem);
DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T))
-DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T))
-
-DEFINE_FREE(up_read, struct rw_semaphore *, if (_T) up_read(_T))
-DEFINE_FREE(up_write, struct rw_semaphore *, if (_T) up_write(_T))
+DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T))
+DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0)
+DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T))
+DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T))
/*
* downgrade write lock to read lock
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index a23af225c8983..4f3dca3535568 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -226,4 +226,6 @@ static inline void task_unlock(struct task_struct *p)
spin_unlock(&p->alloc_lock);
}
+DEFINE_GUARD(task_lock, struct task_struct *, task_lock(_T), task_unlock(_T))
+
#endif /* _LINUX_SCHED_TASK_H */
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 31d3d747a9db7..90bc853cafb6a 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -507,6 +507,8 @@ DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t,
raw_spin_lock(_T->lock),
raw_spin_unlock(_T->lock))
+DEFINE_LOCK_GUARD_1_COND(raw_spinlock, _try, raw_spin_trylock(_T->lock))
+
DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t,
raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING),
raw_spin_unlock(_T->lock))
@@ -515,23 +517,62 @@ DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t,
raw_spin_lock_irq(_T->lock),
raw_spin_unlock_irq(_T->lock))
+DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irq, _try, raw_spin_trylock_irq(_T->lock))
+
DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t,
raw_spin_lock_irqsave(_T->lock, _T->flags),
raw_spin_unlock_irqrestore(_T->lock, _T->flags),
unsigned long flags)
+DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irqsave, _try,
+ raw_spin_trylock_irqsave(_T->lock, _T->flags))
+
DEFINE_LOCK_GUARD_1(spinlock, spinlock_t,
spin_lock(_T->lock),
spin_unlock(_T->lock))
+DEFINE_LOCK_GUARD_1_COND(spinlock, _try, spin_trylock(_T->lock))
+
DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t,
spin_lock_irq(_T->lock),
spin_unlock_irq(_T->lock))
+DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try,
+ spin_trylock_irq(_T->lock))
+
DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t,
spin_lock_irqsave(_T->lock, _T->flags),
spin_unlock_irqrestore(_T->lock, _T->flags),
unsigned long flags)
+DEFINE_LOCK_GUARD_1_COND(spinlock_irqsave, _try,
+ spin_trylock_irqsave(_T->lock, _T->flags))
+
+DEFINE_LOCK_GUARD_1(read_lock, rwlock_t,
+ read_lock(_T->lock),
+ read_unlock(_T->lock))
+
+DEFINE_LOCK_GUARD_1(read_lock_irq, rwlock_t,
+ read_lock_irq(_T->lock),
+ read_unlock_irq(_T->lock))
+
+DEFINE_LOCK_GUARD_1(read_lock_irqsave, rwlock_t,
+ read_lock_irqsave(_T->lock, _T->flags),
+ read_unlock_irqrestore(_T->lock, _T->flags),
+ unsigned long flags)
+
+DEFINE_LOCK_GUARD_1(write_lock, rwlock_t,
+ write_lock(_T->lock),
+ write_unlock(_T->lock))
+
+DEFINE_LOCK_GUARD_1(write_lock_irq, rwlock_t,
+ write_lock_irq(_T->lock),
+ write_unlock_irq(_T->lock))
+
+DEFINE_LOCK_GUARD_1(write_lock_irqsave, rwlock_t,
+ write_lock_irqsave(_T->lock, _T->flags),
+ write_unlock_irqrestore(_T->lock, _T->flags),
+ unsigned long flags)
+
#undef __LINUX_INSIDE_SPINLOCK_H
#endif /* __LINUX_SPINLOCK_H */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 2deeeca3e71bd..cbae8c0b89ab2 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -532,6 +532,11 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
* This function must not be used in interrupt context. Unlocking
* of a not locked mutex is not allowed.
*
+ * The caller must ensure that the mutex stays alive until this function has
+ * returned - mutex_unlock() can NOT directly be used to release an object such
+ * that another concurrent task can free it.
+ * Mutexes are different from spinlocks & refcounts in this aspect.
+ *
* This function is similar to (but not equivalent to) up().
*/
void __sched mutex_unlock(struct mutex *lock)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index d8b5e13a2229f..5c579fb9a5e3d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -386,6 +386,34 @@ static int check_ptrace_options(unsigned long data)
return 0;
}
+static inline void ptrace_set_stopped(struct task_struct *task)
+{
+ guard(spinlock)(&task->sighand->siglock);
+
+ /*
+ * If the task is already STOPPED, set JOBCTL_TRAP_STOP and
+ * TRAPPING, and kick it so that it transits to TRACED. TRAPPING
+ * will be cleared if the child completes the transition or any
+ * event which clears the group stop states happens. We'll wait
+ * for the transition to complete before returning from this
+ * function.
+ *
+ * This hides STOPPED -> RUNNING -> TRACED transition from the
+ * attaching thread but a different thread in the same group can
+ * still observe the transient RUNNING state. IOW, if another
+ * thread's WNOHANG wait(2) on the stopped tracee races against
+ * ATTACH, the wait(2) may fail due to the transient RUNNING.
+ *
+ * The following task_is_stopped() test is safe as both transitions
+ * in and out of STOPPED are protected by siglock.
+ */
+ if (task_is_stopped(task) &&
+ task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) {
+ task->jobctl &= ~JOBCTL_STOPPED;
+ signal_wake_up_state(task, __TASK_STOPPED);
+ }
+}
+
static int ptrace_attach(struct task_struct *task, long request,
unsigned long addr,
unsigned long flags)
@@ -393,17 +421,17 @@ static int ptrace_attach(struct task_struct *task, long request,
bool seize = (request == PTRACE_SEIZE);
int retval;
- retval = -EIO;
if (seize) {
if (addr != 0)
- goto out;
+ return -EIO;
/*
* This duplicates the check in check_ptrace_options() because
* ptrace_attach() and ptrace_setoptions() have historically
* used different error codes for unknown ptrace options.
*/
if (flags & ~(unsigned long)PTRACE_O_MASK)
- goto out;
+ return -EIO;
+
retval = check_ptrace_options(flags);
if (retval)
return retval;
@@ -414,88 +442,54 @@ static int ptrace_attach(struct task_struct *task, long request,
audit_ptrace(task);
- retval = -EPERM;
if (unlikely(task->flags & PF_KTHREAD))
- goto out;
+ return -EPERM;
if (same_thread_group(task, current))
- goto out;
+ return -EPERM;
/*
* Protect exec's credential calculations against our interference;
* SUID, SGID and LSM creds get determined differently
* under ptrace.
*/
- retval = -ERESTARTNOINTR;
- if (mutex_lock_interruptible(&task->signal->cred_guard_mutex))
- goto out;
+ scoped_cond_guard (mutex_intr, return -ERESTARTNOINTR,
+ &task->signal->cred_guard_mutex) {
- task_lock(task);
- retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
- task_unlock(task);
- if (retval)
- goto unlock_creds;
+ scoped_guard (task_lock, task) {
+ retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
+ if (retval)
+ return retval;
+ }
- write_lock_irq(&tasklist_lock);
- retval = -EPERM;
- if (unlikely(task->exit_state))
- goto unlock_tasklist;
- if (task->ptrace)
- goto unlock_tasklist;
+ scoped_guard (write_lock_irq, &tasklist_lock) {
+ if (unlikely(task->exit_state))
+ return -EPERM;
+ if (task->ptrace)
+ return -EPERM;
- task->ptrace = flags;
+ task->ptrace = flags;
- ptrace_link(task, current);
+ ptrace_link(task, current);
- /* SEIZE doesn't trap tracee on attach */
- if (!seize)
- send_sig_info(SIGSTOP, SEND_SIG_PRIV, task);
+ /* SEIZE doesn't trap tracee on attach */
+ if (!seize)
+ send_sig_info(SIGSTOP, SEND_SIG_PRIV, task);
- spin_lock(&task->sighand->siglock);
+ ptrace_set_stopped(task);
+ }
+ }
/*
- * If the task is already STOPPED, set JOBCTL_TRAP_STOP and
- * TRAPPING, and kick it so that it transits to TRACED. TRAPPING
- * will be cleared if the child completes the transition or any
- * event which clears the group stop states happens. We'll wait
- * for the transition to complete before returning from this
- * function.
- *
- * This hides STOPPED -> RUNNING -> TRACED transition from the
- * attaching thread but a different thread in the same group can
- * still observe the transient RUNNING state. IOW, if another
- * thread's WNOHANG wait(2) on the stopped tracee races against
- * ATTACH, the wait(2) may fail due to the transient RUNNING.
- *
- * The following task_is_stopped() test is safe as both transitions
- * in and out of STOPPED are protected by siglock.
+ * We do not bother to change retval or clear JOBCTL_TRAPPING
+ * if wait_on_bit() was interrupted by SIGKILL. The tracer will
+ * not return to user-mode, it will exit and clear this bit in
+ * __ptrace_unlink() if it wasn't already cleared by the tracee;
+ * and until then nobody can ptrace this task.
*/
- if (task_is_stopped(task) &&
- task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) {
- task->jobctl &= ~JOBCTL_STOPPED;
- signal_wake_up_state(task, __TASK_STOPPED);
- }
-
- spin_unlock(&task->sighand->siglock);
-
- retval = 0;
-unlock_tasklist:
- write_unlock_irq(&tasklist_lock);
-unlock_creds:
- mutex_unlock(&task->signal->cred_guard_mutex);
-out:
- if (!retval) {
- /*
- * We do not bother to change retval or clear JOBCTL_TRAPPING
- * if wait_on_bit() was interrupted by SIGKILL. The tracer will
- * not return to user-mode, it will exit and clear this bit in
- * __ptrace_unlink() if it wasn't already cleared by the tracee;
- * and until then nobody can ptrace this task.
- */
- wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE);
- proc_ptrace_connector(task, PTRACE_ATTACH);
- }
+ wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE);
+ proc_ptrace_connector(task, PTRACE_ATTACH);
- return retval;
+ return 0;
}
/**