Merge tag 'regmap-v4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie...
[linux-2.6-block.git] / kernel / time / timer.c
index e212df24ad3f6299c39413f68cb76ab44d4af67b..520499dd85af42e96b2bbd8c729df36d238ad27a 100644 (file)
@@ -70,11 +70,11 @@ EXPORT_SYMBOL(jiffies_64);
 #define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
 
 struct tvec {
-       struct list_head vec[TVN_SIZE];
+       struct hlist_head vec[TVN_SIZE];
 };
 
 struct tvec_root {
-       struct list_head vec[TVR_SIZE];
+       struct hlist_head vec[TVR_SIZE];
 };
 
 struct tvec_base {
@@ -85,6 +85,8 @@ struct tvec_base {
        unsigned long active_timers;
        unsigned long all_timers;
        int cpu;
+       bool migration_enabled;
+       bool nohz_active;
        struct tvec_root tv1;
        struct tvec tv2;
        struct tvec tv3;
@@ -92,43 +94,60 @@ struct tvec_base {
        struct tvec tv5;
 } ____cacheline_aligned;
 
-/*
- * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've
- * made NULL special, hint: lock_timer_base()) and we cannot get a compile time
- * pointer to per-cpu entries because we don't know where we'll map the section,
- * even for the boot cpu.
- *
- * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the
- * rest of them.
- */
-struct tvec_base boot_tvec_bases;
-EXPORT_SYMBOL(boot_tvec_bases);
 
-static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
+static DEFINE_PER_CPU(struct tvec_base, tvec_bases);
 
-/* Functions below help us manage 'deferrable' flag */
-static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+unsigned int sysctl_timer_migration = 1;
+
+void timers_update_migration(bool update_nohz)
 {
-       return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE);
+       bool on = sysctl_timer_migration && tick_nohz_active;
+       unsigned int cpu;
+
+       /* Avoid the loop, if nothing to update */
+       if (this_cpu_read(tvec_bases.migration_enabled) == on)
+               return;
+
+       for_each_possible_cpu(cpu) {
+               per_cpu(tvec_bases.migration_enabled, cpu) = on;
+               per_cpu(hrtimer_bases.migration_enabled, cpu) = on;
+               if (!update_nohz)
+                       continue;
+               per_cpu(tvec_bases.nohz_active, cpu) = true;
+               per_cpu(hrtimer_bases.nohz_active, cpu) = true;
+       }
 }
 
-static inline unsigned int tbase_get_irqsafe(struct tvec_base *base)
+int timer_migration_handler(struct ctl_table *table, int write,
+                           void __user *buffer, size_t *lenp,
+                           loff_t *ppos)
 {
-       return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE);
+       static DEFINE_MUTEX(mutex);
+       int ret;
+
+       mutex_lock(&mutex);
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
+       if (!ret && write)
+               timers_update_migration(false);
+       mutex_unlock(&mutex);
+       return ret;
 }
 
-static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
+static inline struct tvec_base *get_target_base(struct tvec_base *base,
+                                               int pinned)
 {
-       return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK));
+       if (pinned || !base->migration_enabled)
+               return this_cpu_ptr(&tvec_bases);
+       return per_cpu_ptr(&tvec_bases, get_nohz_timer_target());
 }
-
-static inline void
-timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
+#else
+static inline struct tvec_base *get_target_base(struct tvec_base *base,
+                                               int pinned)
 {
-       unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK;
-
-       timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags);
+       return this_cpu_ptr(&tvec_bases);
 }
+#endif
 
 static unsigned long round_jiffies_common(unsigned long j, int cpu,
                bool force_up)
@@ -356,7 +375,7 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 {
        unsigned long expires = timer->expires;
        unsigned long idx = expires - base->timer_jiffies;
-       struct list_head *vec;
+       struct hlist_head *vec;
 
        if (idx < TVR_SIZE) {
                int i = expires & TVR_MASK;
@@ -390,7 +409,7 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
                vec = base->tv5.vec + i;
        }
 
-       list_add(&timer->entry, vec);
+       hlist_add_head(&timer->entry, vec);
 }
 
 static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
@@ -403,7 +422,7 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
        /*
         * Update base->active_timers and base->next_timer
         */
-       if (!tbase_get_deferrable(timer->base)) {
+       if (!(timer->flags & TIMER_DEFERRABLE)) {
                if (!base->active_timers++ ||
                    time_before(timer->expires, base->next_timer))
                        base->next_timer = timer->expires;
@@ -422,8 +441,11 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
         * require special care against races with idle_cpu(), lets deal
         * with that later.
         */
-       if (!tbase_get_deferrable(timer->base) || tick_nohz_full_cpu(base->cpu))
-               wake_up_nohz_cpu(base->cpu);
+       if (base->nohz_active) {
+               if (!(timer->flags & TIMER_DEFERRABLE) ||
+                   tick_nohz_full_cpu(base->cpu))
+                       wake_up_nohz_cpu(base->cpu);
+       }
 }
 
 #ifdef CONFIG_TIMER_STATS
@@ -439,15 +461,12 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
 
 static void timer_stats_account_timer(struct timer_list *timer)
 {
-       unsigned int flag = 0;
-
        if (likely(!timer->start_site))
                return;
-       if (unlikely(tbase_get_deferrable(timer->base)))
-               flag |= TIMER_STATS_FLAG_DEFERRABLE;
 
        timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
-                                timer->function, timer->start_comm, flag);
+                                timer->function, timer->start_comm,
+                                timer->flags);
 }
 
 #else
@@ -504,8 +523,8 @@ static int timer_fixup_activate(void *addr, enum debug_obj_state state)
                 * statically initialized. We just make sure that it
                 * is tracked in the object tracker.
                 */
-               if (timer->entry.next == NULL &&
-                   timer->entry.prev == TIMER_ENTRY_STATIC) {
+               if (timer->entry.pprev == NULL &&
+                   timer->entry.next == TIMER_ENTRY_STATIC) {
                        debug_object_init(timer, &timer_debug_descr);
                        debug_object_activate(timer, &timer_debug_descr);
                        return 0;
@@ -551,7 +570,7 @@ static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
 
        switch (state) {
        case ODEBUG_STATE_NOTAVAILABLE:
-               if (timer->entry.prev == TIMER_ENTRY_STATIC) {
+               if (timer->entry.next == TIMER_ENTRY_STATIC) {
                        /*
                         * This is not really a fixup. The timer was
                         * statically initialized. We just make sure that it
@@ -636,7 +655,7 @@ static inline void
 debug_activate(struct timer_list *timer, unsigned long expires)
 {
        debug_timer_activate(timer);
-       trace_timer_start(timer, expires, tbase_get_deferrable(timer->base));
+       trace_timer_start(timer, expires, timer->flags);
 }
 
 static inline void debug_deactivate(struct timer_list *timer)
@@ -653,10 +672,8 @@ static inline void debug_assert_init(struct timer_list *timer)
 static void do_init_timer(struct timer_list *timer, unsigned int flags,
                          const char *name, struct lock_class_key *key)
 {
-       struct tvec_base *base = raw_cpu_read(tvec_bases);
-
-       timer->entry.next = NULL;
-       timer->base = (void *)((unsigned long)base | flags);
+       timer->entry.pprev = NULL;
+       timer->flags = flags | raw_smp_processor_id();
        timer->slack = -1;
 #ifdef CONFIG_TIMER_STATS
        timer->start_site = NULL;
@@ -687,21 +704,21 @@ EXPORT_SYMBOL(init_timer_key);
 
 static inline void detach_timer(struct timer_list *timer, bool clear_pending)
 {
-       struct list_head *entry = &timer->entry;
+       struct hlist_node *entry = &timer->entry;
 
        debug_deactivate(timer);
 
-       __list_del(entry->prev, entry->next);
+       __hlist_del(entry);
        if (clear_pending)
-               entry->next = NULL;
-       entry->prev = LIST_POISON2;
+               entry->pprev = NULL;
+       entry->next = LIST_POISON2;
 }
 
 static inline void
 detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
 {
        detach_timer(timer, true);
-       if (!tbase_get_deferrable(timer->base))
+       if (!(timer->flags & TIMER_DEFERRABLE))
                base->active_timers--;
        base->all_timers--;
 }
@@ -713,7 +730,7 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
                return 0;
 
        detach_timer(timer, clear_pending);
-       if (!tbase_get_deferrable(timer->base)) {
+       if (!(timer->flags & TIMER_DEFERRABLE)) {
                base->active_timers--;
                if (timer->expires == base->next_timer)
                        base->next_timer = base->timer_jiffies;
@@ -732,24 +749,22 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
  * So __run_timers/migrate_timers can safely modify all timers which could
  * be found on ->tvX lists.
  *
- * When the timer's base is locked, and the timer removed from list, it is
- * possible to set timer->base = NULL and drop the lock: the timer remains
- * locked.
+ * When the timer's base is locked and removed from the list, the
+ * TIMER_MIGRATING flag is set, FIXME
  */
 static struct tvec_base *lock_timer_base(struct timer_list *timer,
                                        unsigned long *flags)
        __acquires(timer->base->lock)
 {
-       struct tvec_base *base;
-
        for (;;) {
-               struct tvec_base *prelock_base = timer->base;
-               base = tbase_get_base(prelock_base);
-               if (likely(base != NULL)) {
+               u32 tf = timer->flags;
+               struct tvec_base *base;
+
+               if (!(tf & TIMER_MIGRATING)) {
+                       base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
                        spin_lock_irqsave(&base->lock, *flags);
-                       if (likely(prelock_base == timer->base))
+                       if (timer->flags == tf)
                                return base;
-                       /* The timer has migrated to another CPU */
                        spin_unlock_irqrestore(&base->lock, *flags);
                }
                cpu_relax();
@@ -758,11 +773,11 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
 
 static inline int
 __mod_timer(struct timer_list *timer, unsigned long expires,
-                                               bool pending_only, int pinned)
+           bool pending_only, int pinned)
 {
        struct tvec_base *base, *new_base;
        unsigned long flags;
-       int ret = 0 , cpu;
+       int ret = 0;
 
        timer_stats_timer_set_start_info(timer);
        BUG_ON(!timer->function);
@@ -775,8 +790,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 
        debug_activate(timer, expires);
 
-       cpu = get_nohz_timer_target(pinned);
-       new_base = per_cpu(tvec_bases, cpu);
+       new_base = get_target_base(base, pinned);
 
        if (base != new_base) {
                /*
@@ -788,11 +802,13 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
                 */
                if (likely(base->running_timer != timer)) {
                        /* See the comment in lock_timer_base() */
-                       timer_set_base(timer, NULL);
+                       timer->flags |= TIMER_MIGRATING;
+
                        spin_unlock(&base->lock);
                        base = new_base;
                        spin_lock(&base->lock);
-                       timer_set_base(timer, base);
+                       timer->flags &= ~TIMER_BASEMASK;
+                       timer->flags |= base->cpu;
                }
        }
 
@@ -954,13 +970,13 @@ EXPORT_SYMBOL(add_timer);
  */
 void add_timer_on(struct timer_list *timer, int cpu)
 {
-       struct tvec_base *base = per_cpu(tvec_bases, cpu);
+       struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
        unsigned long flags;
 
        timer_stats_timer_set_start_info(timer);
        BUG_ON(timer_pending(timer) || !timer->function);
        spin_lock_irqsave(&base->lock, flags);
-       timer_set_base(timer, base);
+       timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
        debug_activate(timer, timer->expires);
        internal_add_timer(base, timer);
        spin_unlock_irqrestore(&base->lock, flags);
@@ -1025,8 +1041,6 @@ int try_to_del_timer_sync(struct timer_list *timer)
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
 #ifdef CONFIG_SMP
-static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
-
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1081,7 +1095,7 @@ int del_timer_sync(struct timer_list *timer)
         * don't use it in hardirq context, because it
         * could lead to deadlock.
         */
-       WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base));
+       WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
        for (;;) {
                int ret = try_to_del_timer_sync(timer);
                if (ret >= 0)
@@ -1095,17 +1109,17 @@ EXPORT_SYMBOL(del_timer_sync);
 static int cascade(struct tvec_base *base, struct tvec *tv, int index)
 {
        /* cascade all the timers from tv up one level */
-       struct timer_list *timer, *tmp;
-       struct list_head tv_list;
+       struct timer_list *timer;
+       struct hlist_node *tmp;
+       struct hlist_head tv_list;
 
-       list_replace_init(tv->vec + index, &tv_list);
+       hlist_move_list(tv->vec + index, &tv_list);
 
        /*
         * We are removing _all_ timers from the list, so we
         * don't have to detach them individually.
         */
-       list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
-               BUG_ON(tbase_get_base(timer->base) != base);
+       hlist_for_each_entry_safe(timer, tmp, &tv_list, entry) {
                /* No accounting, while moving them */
                __internal_add_timer(base, timer);
        }
@@ -1172,8 +1186,8 @@ static inline void __run_timers(struct tvec_base *base)
        spin_lock_irq(&base->lock);
 
        while (time_after_eq(jiffies, base->timer_jiffies)) {
-               struct list_head work_list;
-               struct list_head *head = &work_list;
+               struct hlist_head work_list;
+               struct hlist_head *head = &work_list;
                int index;
 
                if (!base->all_timers) {
@@ -1192,16 +1206,16 @@ static inline void __run_timers(struct tvec_base *base)
                                        !cascade(base, &base->tv4, INDEX(2)))
                        cascade(base, &base->tv5, INDEX(3));
                ++base->timer_jiffies;
-               list_replace_init(base->tv1.vec + index, head);
-               while (!list_empty(head)) {
+               hlist_move_list(base->tv1.vec + index, head);
+               while (!hlist_empty(head)) {
                        void (*fn)(unsigned long);
                        unsigned long data;
                        bool irqsafe;
 
-                       timer = list_first_entry(head, struct timer_list,entry);
+                       timer = hlist_entry(head->first, struct timer_list, entry);
                        fn = timer->function;
                        data = timer->data;
-                       irqsafe = tbase_get_irqsafe(timer->base);
+                       irqsafe = timer->flags & TIMER_IRQSAFE;
 
                        timer_stats_account_timer(timer);
 
@@ -1240,8 +1254,8 @@ static unsigned long __next_timer_interrupt(struct tvec_base *base)
        /* Look for timer events in tv1. */
        index = slot = timer_jiffies & TVR_MASK;
        do {
-               list_for_each_entry(nte, base->tv1.vec + slot, entry) {
-                       if (tbase_get_deferrable(nte->base))
+               hlist_for_each_entry(nte, base->tv1.vec + slot, entry) {
+                       if (nte->flags & TIMER_DEFERRABLE)
                                continue;
 
                        found = 1;
@@ -1271,8 +1285,8 @@ cascade:
 
                index = slot = timer_jiffies & TVN_MASK;
                do {
-                       list_for_each_entry(nte, varp->vec + slot, entry) {
-                               if (tbase_get_deferrable(nte->base))
+                       hlist_for_each_entry(nte, varp->vec + slot, entry) {
+                               if (nte->flags & TIMER_DEFERRABLE)
                                        continue;
 
                                found = 1;
@@ -1342,7 +1356,7 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
  */
 u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 {
-       struct tvec_base *base = __this_cpu_read(tvec_bases);
+       struct tvec_base *base = this_cpu_ptr(&tvec_bases);
        u64 expires = KTIME_MAX;
        unsigned long nextevt;
 
@@ -1394,7 +1408,7 @@ void update_process_times(int user_tick)
  */
 static void run_timer_softirq(struct softirq_action *h)
 {
-       struct tvec_base *base = __this_cpu_read(tvec_bases);
+       struct tvec_base *base = this_cpu_ptr(&tvec_bases);
 
        if (time_after_eq(jiffies, base->timer_jiffies))
                __run_timers(base);
@@ -1530,15 +1544,16 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
 EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
+static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
 {
        struct timer_list *timer;
+       int cpu = new_base->cpu;
 
-       while (!list_empty(head)) {
-               timer = list_first_entry(head, struct timer_list, entry);
+       while (!hlist_empty(head)) {
+               timer = hlist_entry(head->first, struct timer_list, entry);
                /* We ignore the accounting on the dying cpu */
                detach_timer(timer, false);
-               timer_set_base(timer, new_base);
+               timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
                internal_add_timer(new_base, timer);
        }
 }
@@ -1550,8 +1565,8 @@ static void migrate_timers(int cpu)
        int i;
 
        BUG_ON(cpu_online(cpu));
-       old_base = per_cpu(tvec_bases, cpu);
-       new_base = get_cpu_var(tvec_bases);
+       old_base = per_cpu_ptr(&tvec_bases, cpu);
+       new_base = this_cpu_ptr(&tvec_bases);
        /*
         * The caller is globally serialized and nobody else
         * takes two locks at once, deadlock is not possible.
@@ -1575,7 +1590,6 @@ static void migrate_timers(int cpu)
 
        spin_unlock(&old_base->lock);
        spin_unlock_irq(&new_base->lock);
-       put_cpu_var(tvec_bases);
 }
 
 static int timer_cpu_notify(struct notifier_block *self,
@@ -1601,52 +1615,27 @@ static inline void timer_register_cpu_notifier(void)
 static inline void timer_register_cpu_notifier(void) { }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static void __init init_timer_cpu(struct tvec_base *base, int cpu)
+static void __init init_timer_cpu(int cpu)
 {
-       int j;
-
-       BUG_ON(base != tbase_get_base(base));
+       struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
 
        base->cpu = cpu;
-       per_cpu(tvec_bases, cpu) = base;
        spin_lock_init(&base->lock);
 
-       for (j = 0; j < TVN_SIZE; j++) {
-               INIT_LIST_HEAD(base->tv5.vec + j);
-               INIT_LIST_HEAD(base->tv4.vec + j);
-               INIT_LIST_HEAD(base->tv3.vec + j);
-               INIT_LIST_HEAD(base->tv2.vec + j);
-       }
-       for (j = 0; j < TVR_SIZE; j++)
-               INIT_LIST_HEAD(base->tv1.vec + j);
-
        base->timer_jiffies = jiffies;
        base->next_timer = base->timer_jiffies;
 }
 
 static void __init init_timer_cpus(void)
 {
-       struct tvec_base *base;
-       int local_cpu = smp_processor_id();
        int cpu;
 
-       for_each_possible_cpu(cpu) {
-               if (cpu == local_cpu)
-                       base = &boot_tvec_bases;
-#ifdef CONFIG_SMP
-               else
-                       base = per_cpu_ptr(&__tvec_bases, cpu);
-#endif
-
-               init_timer_cpu(base, cpu);
-       }
+       for_each_possible_cpu(cpu)
+               init_timer_cpu(cpu);
 }
 
 void __init init_timers(void)
 {
-       /* ensure there are enough low bits for flags in timer->base pointer */
-       BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
-
        init_timer_cpus();
        init_timer_stats();
        timer_register_cpu_notifier();