From 6d83365a4baf2b72119fbc7cb8769ec35591bb0c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Jul 2009 08:29:19 -0500 Subject: [PATCH] revert preempt BKL revert commit 37ffffaf131b6620d27af5a1477f6db507718018 in tip. [ basically, this is the -R of 8e3e076c5a78519a9f64cd384e8f18bc21882ce0 ] While we understand that preemptible BKL is not a brilliant idea in the first place, we have not the capacity of developers to fix all the BKL leftovers right away. For PREEMPT-RT we rely on preemptible BKL for now. We still look into removing BKL completely and it's high on our todo list. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Paul Gortmaker --- include/linux/hardirq.h | 18 +++---- kernel/sched.c | 27 ++++++++++-- lib/kernel_lock.c | 109 +++++++++++++--------------------------------- 3 files changed, 62 insertions(+), 92 deletions(-) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d5b3876..5d79504 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -92,14 +92,6 @@ */ #define in_nmi() (preempt_count() & NMI_MASK) -#if defined(CONFIG_PREEMPT) -# define PREEMPT_INATOMIC_BASE kernel_locked() -# define PREEMPT_CHECK_OFFSET 1 -#else -# define PREEMPT_INATOMIC_BASE 0 -# define PREEMPT_CHECK_OFFSET 0 -#endif - /* * Are we running in atomic context? WARNING: this macro cannot * always detect atomic context; in particular, it cannot know about @@ -107,11 +99,17 @@ * used in the general case to determine whether sleeping is possible. * Do not use in_atomic() in driver code. */ -#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE) +#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) + +#ifdef CONFIG_PREEMPT +# define PREEMPT_CHECK_OFFSET 1 +#else +# define PREEMPT_CHECK_OFFSET 0 +#endif /* * Check whether we were atomic before we did preempt_disable(): - * (used by the scheduler, *after* releasing the kernel lock) + * (used by the scheduler) */ #define in_atomic_preempt_off() \ ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) diff --git a/kernel/sched.c b/kernel/sched.c index 22c233e..1c38248 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3856,6 +3856,8 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) asmlinkage void __sched preempt_schedule(void) { struct thread_info *ti = current_thread_info(); + struct task_struct *task = current; + int saved_lock_depth; /* * If there is a non-zero preempt_count or interrupts are disabled, @@ -3866,7 +3868,16 @@ asmlinkage void __sched preempt_schedule(void) do { add_preempt_count(PREEMPT_ACTIVE); + + /* + * We keep the big kernel semaphore locked, but we + * clear ->lock_depth so that schedule() doesnt + * auto-release the semaphore: + */ + saved_lock_depth = task->lock_depth; + task->lock_depth = -1; schedule(); + task->lock_depth = saved_lock_depth; sub_preempt_count(PREEMPT_ACTIVE); /* @@ -3887,15 +3898,26 @@ EXPORT_SYMBOL(preempt_schedule); asmlinkage void __sched preempt_schedule_irq(void) { struct thread_info *ti = current_thread_info(); + struct task_struct *task = current; + int saved_lock_depth; /* Catch callers which need to be fixed */ BUG_ON(ti->preempt_count || !irqs_disabled()); do { add_preempt_count(PREEMPT_ACTIVE); + + /* + * We keep the big kernel semaphore locked, but we + * clear ->lock_depth so that schedule() doesnt + * auto-release the semaphore: + */ + saved_lock_depth = task->lock_depth; + task->lock_depth = -1; local_irq_enable(); schedule(); local_irq_disable(); + task->lock_depth = saved_lock_depth; sub_preempt_count(PREEMPT_ACTIVE); /* @@ -5281,11 +5303,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) raw_spin_unlock_irqrestore(&rq->lock, flags); /* Set the preempt count _outside_ the spinlocks! */ -#if defined(CONFIG_PREEMPT) - task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0); -#else task_thread_info(idle)->preempt_count = 0; -#endif + /* * The idle tasks have their own, simple scheduling class: */ diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 5354922..2c9b548 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -14,107 +14,56 @@ #include /* - * The 'big kernel lock' + * The 'big kernel semaphore' * - * This spinlock is taken and released recursively by lock_kernel() + * This mutex is taken and released recursively by lock_kernel() * and unlock_kernel(). It is transparently dropped and reacquired * over schedule(). It is used to protect legacy code that hasn't * been migrated to a proper locking design yet. * + * Note: code locked by this semaphore will only be serialized against + * other code using the same locking facility. The code guarantees that + * the task remains on the same CPU. + * * Don't use in new code. */ -static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag); - +static struct semaphore kernel_sem; /* - * Acquire/release the underlying lock from the scheduler. + * Re-acquire the kernel semaphore. * - * This is called with preemption disabled, and should - * return an error value if it cannot get the lock and - * TIF_NEED_RESCHED gets set. + * This function is called with preemption off. * - * If it successfully gets the lock, it should increment - * the preemption count like any spinlock does. - * - * (This works on UP too - do_raw_spin_trylock will never - * return false in that case) + * We are executing in schedule() so the code must be extremely careful + * about recursion, both due to the down() and due to the enabling of + * preemption. schedule() will re-check the preemption flag after + * reacquiring the semaphore. */ int __lockfunc __reacquire_kernel_lock(void) { - while (!do_raw_spin_trylock(&kernel_flag)) { - if (need_resched()) - return -EAGAIN; - cpu_relax(); - } - preempt_disable(); - return 0; -} + int saved_lock_depth = current->lock_depth; -void __lockfunc __release_kernel_lock(void) -{ - do_raw_spin_unlock(&kernel_flag); - __preempt_enable_no_resched(); -} + BUG_ON(saved_lock_depth < 0); -/* - * These are the BKL spinlocks - we try to be polite about preemption. - * If SMP is not on (ie UP preemption), this all goes away because the - * do_raw_spin_trylock() will always succeed. - */ -#ifdef CONFIG_PREEMPT -static inline void __lock_kernel(void) -{ - preempt_disable(); - if (unlikely(!do_raw_spin_trylock(&kernel_flag))) { - /* - * If preemption was disabled even before this - * was called, there's nothing we can be polite - * about - just spin. - */ - if (preempt_count() > 1) { - do_raw_spin_lock(&kernel_flag); - return; - } + current->lock_depth = -1; + local_irq_enable(); - /* - * Otherwise, let's wait for the kernel lock - * with preemption enabled.. - */ - do { - preempt_enable(); - while (raw_spin_is_locked(&kernel_flag)) - cpu_relax(); - preempt_disable(); - } while (!do_raw_spin_trylock(&kernel_flag)); - } -} + down(&kernel_sem); -#else + preempt_disable(); + local_irq_disable(); + current->lock_depth = saved_lock_depth; -/* - * Non-preemption case - just get the spinlock - */ -static inline void __lock_kernel(void) -{ - do_raw_spin_lock(&kernel_flag); + return 0; } -#endif -static inline void __unlock_kernel(void) +void __lockfunc __release_kernel_lock(void) { - /* - * the BKL is not covered by lockdep, so we open-code the - * unlocking sequence (and thus avoid the dep-chain ops): - */ - do_raw_spin_unlock(&kernel_flag); - preempt_enable(); + up(&kernel_sem); } /* - * Getting the big kernel lock. - * - * This cannot happen asynchronously, so we only need to - * worry about other CPU's. + * Getting the big kernel semaphore. */ void __lockfunc _lock_kernel(const char *func, const char *file, int line) { @@ -124,7 +73,10 @@ void __lockfunc _lock_kernel(const char *func, const char *file, int line) if (likely(!depth)) { might_sleep(); - __lock_kernel(); + /* + * No recursion worries - we set up lock_depth _after_ + */ + down(&kernel_sem); } current->lock_depth = depth; } @@ -132,8 +84,9 @@ void __lockfunc _lock_kernel(const char *func, const char *file, int line) void __lockfunc _unlock_kernel(const char *func, const char *file, int line) { BUG_ON(current->lock_depth < 0); + if (likely(--current->lock_depth < 0)) - __unlock_kernel(); + up(&kernel_sem); trace_unlock_kernel(func, file, line); } -- 1.7.0.4