From: Oleg Nesterov This patch tries to document new locking rules in kernel/timer.c. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton --- kernel/timer.c | 35 ++++++++++++++++++++++++++--------- 1 files changed, 26 insertions(+), 9 deletions(-) diff -puN kernel/timer.c~timers-comments-update kernel/timer.c --- 25/kernel/timer.c~timers-comments-update 2005-05-02 18:45:40.000000000 -0700 +++ 25-akpm/kernel/timer.c 2005-05-02 18:46:38.000000000 -0700 @@ -158,6 +158,10 @@ static void internal_add_timer(tvec_base } typedef struct timer_base_s timer_base_t; +/* + * Used by TIMER_INITIALIZER, we can't use per_cpu(tvec_bases) + * at compile time, and we need timer->base to lock the timer. + */ timer_base_t __init_timer_base ____cacheline_aligned_in_smp = { .lock = SPIN_LOCK_UNLOCKED }; EXPORT_SYMBOL(__init_timer_base); @@ -188,6 +192,18 @@ static inline void detach_timer(struct t entry->prev = LIST_POISON2; } +/* + * We are using hashed locking: holding per_cpu(tvec_bases).t_base.lock + * means that all timers which are tied to this base via timer->base are + * locked, and the base itself is locked too. + * + * So __run_timers/migrate_timers can safely modify all timers which could + * be found on ->tvX lists. + * + * When the timer's base is locked, and the timer removed from list, it is + * possible to set timer->base = NULL and drop the lock: the timer remains + * locked. + */ static timer_base_t *lock_timer_base(struct timer_list *timer, unsigned long *flags) { @@ -195,11 +211,11 @@ static timer_base_t *lock_timer_base(str for (;;) { base = timer->base; - /* Can be NULL while __mod_timer switches bases */ if (likely(base != NULL)) { spin_lock_irqsave(&base->lock, *flags); if (likely(base == timer->base)) return base; + /* The timer has migrated to another CPU */ spin_unlock_irqrestore(&base->lock, *flags); } cpu_relax(); @@ -226,18 +242,19 @@ int __mod_timer(struct timer_list *timer new_base = &__get_cpu_var(tvec_bases); if (base != &new_base->t_base) { + /* + * We are trying to schedule the timer on the local CPU. + * However we can't change timer's base while it is running, + * otherwise del_timer_sync() can't detect that the timer's + * handler yet has not finished. This also guarantees that + * the timer is serialized wrt itself. + */ if (unlikely(base->running_timer == timer)) { - /* - * Don't change timer's base while it is running. - * Needed for serialization of timer wrt itself. - */ + /* The timer remains on a former base */ new_base = container_of(base, tvec_base_t, t_base); } else { + /* See the comment in lock_timer_base() */ timer->base = NULL; - /* - * Safe: the timer can't be seen via ->entry and - * lock_timer_base() checks ->base != 0. - */ spin_unlock(&base->lock); spin_lock(&new_base->t_base.lock); timer->base = &new_base->t_base; _