Merge branch 'timers-for-linus-urgent' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 9 Dec 2009 03:28:09 +0000 (19:28 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 9 Dec 2009 03:28:09 +0000 (19:28 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 9 Dec 2009 03:28:09 +0000 (19:28 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 9 Dec 2009 03:28:09 +0000 (19:28 -0800)
diff --combined arch/powerpc/kernel/time.c

index d18a7f04edec5af925c97dbc46059ea4ce1f9776,39713312fbc734a897d0b91fa9bc34ec2ca7bd22..674800b242d6167262178006c7f50afbe65c26ca
--- 1/arch/powerpc/kernel/time.c
--- 2/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@@ -54,7 -54,6 +54,7 @@@
   #include <linux/irq.h>
   #include <linux/delay.h>
   #include <linux/perf_event.h>
+ +#include <asm/trace.h>
   
   #include <asm/io.h>
   #include <asm/processor.h>
@@@ -572,8 -571,6 +572,8 @@@ void timer_interrupt(struct pt_regs * r
         struct clock_event_device *evt = &decrementer->event;
         u64 now;
   
+ +      trace_timer_interrupt_entry(regs);
+ +
         /* Ensure a positive value is written to the decrementer, or else
          * some CPUs will continuue to take decrementer exceptions */
         set_dec(DECREMENTER_MAX);
@@@ -593,7 -590,6 +593,7 @@@
                 now = decrementer->next_tb - now;
                 if (now <= DECREMENTER_MAX)
                         set_dec((int)now);
+ +              trace_timer_interrupt_exit(regs);
                 return;
         }
         old_regs = set_irq_regs(regs);
@@@ -624,8 -620,6 +624,8 @@@
   
         irq_exit();
         set_irq_regs(old_regs);
+ +
+ +      trace_timer_interrupt_exit(regs);
   }
   
   void wakeup_decrementer(void)
@@@ -834,7 -828,8 +834,8 @@@ static cycle_t timebase_read(struct clo
         return (cycle_t)get_tb();
   }
   
- void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
+                    u32 mult)
   {
         u64 t2x, stamp_xsec;
   
@@@ -847,7 -842,7 +848,7 @@@
   
         /* XXX this assumes clock->shift == 22 */
         /* 4611686018 ~= 2^(20+64-22) / 1e9 */
-       t2x = (u64) clock->mult * 4611686018ULL;
+       t2x = (u64) mult * 4611686018ULL;
         stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC;
         do_div(stamp_xsec, 1000000000);
         stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC;
@@@ -924,7 -919,7 +925,7 @@@ static void register_decrementer_clocke
         *dec = decrementer_clockevent;
         dec->cpumask = cpumask_of(cpu);
   
- -      printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
+ +      printk(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
                dec->name, dec->mult, dec->shift, cpu);
   
         clockevents_register_device(dec);
diff --combined arch/x86/kernel/vsyscall_64.c

index e02d92d12bcd326f907f6751d3cb4024243913f6,62f39d79b7754557f0725d5b80b958e5d7c245d8..9055e5872ff0a5afa668d1d7fa9517eeb6f618c8
--- 1/arch/x86/kernel/vsyscall_64.c
--- 2/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@@ -73,7 -73,8 +73,8 @@@ void update_vsyscall_tz(void
         write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
   }
   
- void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
+                    u32 mult)
   {
         unsigned long flags;
   
@@@ -82,7 -83,7 +83,7 @@@
         vsyscall_gtod_data.clock.vread = clock->vread;
         vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
         vsyscall_gtod_data.clock.mask = clock->mask;
-       vsyscall_gtod_data.clock.mult = clock->mult;
+       vsyscall_gtod_data.clock.mult = mult;
         vsyscall_gtod_data.clock.shift = clock->shift;
         vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
         vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
@@@ -237,7 -238,7 +238,7 @@@ static ctl_table kernel_table2[] = 
   };
   
   static ctl_table kernel_root_table2[] = {
- -      { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
+ +      { .procname = "kernel", .mode = 0555,
           .child = kernel_table2 },
         {}
   };
diff --combined include/linux/clocksource.h

index 279c5478e8a6aaf7c4a6944166dcc22eb708d5ed,95e4995d99879f921b3f287ffae429671f9bba4d..8a4a130cc19698ab8a03c6c6344e3b24f42932f8
--- 1/include/linux/clocksource.h
--- 2/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@@ -151,7 -151,6 +151,7 @@@ extern u64 timecounter_cyc2time(struct 
    *                    subtraction of non 64 bit counters
    * @mult:             cycle to nanosecond multiplier
    * @shift:            cycle to nanosecond divisor (power of two)
+ + * @max_idle_ns:      max idle time permitted by the clocksource (nsecs)
    * @flags:            flags describing special properties
    * @vread:            vsyscall based read
    * @resume:           resume function for the clocksource, if necessary
@@@ -169,7 -168,6 +169,7 @@@ struct clocksource 
         cycle_t mask;
         u32 mult;
         u32 shift;
+ +      u64 max_idle_ns;
         unsigned long flags;
         cycle_t (*vread)(void);
         void (*resume)(void);
@@@ -281,21 -279,13 +281,23 @@@ extern void clocksource_resume(void)
   extern struct clocksource * __init __weak clocksource_default_clock(void);
   extern void clocksource_mark_unstable(struct clocksource *cs);
   
+ +extern void
+ +clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
+ +
+ +static inline void
+ +clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec)
+ +{
+ +      return clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
+ +                                    NSEC_PER_SEC, minsec);
+ +}
+ +
   #ifdef CONFIG_GENERIC_TIME_VSYSCALL
- extern void update_vsyscall(struct timespec *ts, struct clocksource *c);
+ extern void
+ update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult);
   extern void update_vsyscall_tz(void);
   #else
- static inline void update_vsyscall(struct timespec *ts, struct clocksource *c)
+ static inline void
+ update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult)
   {
   }
   
diff --combined kernel/time/timekeeping.c

index d1aebd73b19146795a4589f6c38ffcf6268c0335,2a6d3e3e2c3eceedcbcf42c98439acb656e0dede..af4135f058254b0607fc2594f2c52ae6355e76b1
--- 1/kernel/time/timekeeping.c
--- 2/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@@ -165,12 -165,19 +165,12 @@@ struct timespec raw_time
   /* flag for if timekeeping is suspended */
   int __read_mostly timekeeping_suspended;
   
- -static struct timespec xtime_cache __attribute__ ((aligned (16)));
- -void update_xtime_cache(u64 nsec)
- -{
- -      xtime_cache = xtime;
- -      timespec_add_ns(&xtime_cache, nsec);
- -}
- -
   /* must hold xtime_lock */
   void timekeeping_leap_insert(int leapsecond)
   {
         xtime.tv_sec += leapsecond;
         wall_to_monotonic.tv_sec -= leapsecond;
-       update_vsyscall(&xtime, timekeeper.clock);
+       update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
   }
   
   #ifdef CONFIG_GENERIC_TIME
@@@ -325,10 -332,12 +325,10 @@@ int do_settimeofday(struct timespec *tv
   
         xtime = *tv;
   
- -      update_xtime_cache(0);
- -
         timekeeper.ntp_error = 0;
         ntp_clear();
   
-       update_vsyscall(&xtime, timekeeper.clock);
+       update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
   
         write_sequnlock_irqrestore(&xtime_lock, flags);
   
@@@ -478,17 -487,6 +478,17 @@@ int timekeeping_valid_for_hres(void
         return ret;
   }
   
+ +/**
+ + * timekeeping_max_deferment - Returns max time the clocksource can be deferred
+ + *
+ + * Caller must observe xtime_lock via read_seqbegin/read_seqretry to
+ + * ensure that the clocksource does not change!
+ + */
+ +u64 timekeeping_max_deferment(void)
+ +{
+ +      return timekeeper.clock->max_idle_ns;
+ +}
+ +
   /**
    * read_persistent_clock -  Return time from the persistent clock.
    *
@@@ -550,6 -548,7 +550,6 @@@ void __init timekeeping_init(void
         }
         set_normalized_timespec(&wall_to_monotonic,
                                 -boot.tv_sec, -boot.tv_nsec);
- -      update_xtime_cache(0);
         total_sleep_time.tv_sec = 0;
         total_sleep_time.tv_nsec = 0;
         write_sequnlock_irqrestore(&xtime_lock, flags);
@@@ -583,6 -582,7 +583,6 @@@ static int timekeeping_resume(struct sy
                 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
                 total_sleep_time = timespec_add_safe(total_sleep_time, ts);
         }
- -      update_xtime_cache(0);
         /* re-base the last cycle value */
         timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
         timekeeper.ntp_error = 0;
@@@ -722,49 -722,6 +722,49 @@@ static void timekeeping_adjust(s64 offs
                                 timekeeper.ntp_error_shift;
   }
   
+ +/**
+ + * logarithmic_accumulation - shifted accumulation of cycles
+ + *
+ + * This functions accumulates a shifted interval of cycles into
+ + * into a shifted interval nanoseconds. Allows for O(log) accumulation
+ + * loop.
+ + *
+ + * Returns the unconsumed cycles.
+ + */
+ +static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
+ +{
+ +      u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
+ +
+ +      /* If the offset is smaller then a shifted interval, do nothing */
+ +      if (offset < timekeeper.cycle_interval<<shift)
+ +              return offset;
+ +
+ +      /* Accumulate one shifted interval */
+ +      offset -= timekeeper.cycle_interval << shift;
+ +      timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift;
+ +
+ +      timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
+ +      while (timekeeper.xtime_nsec >= nsecps) {
+ +              timekeeper.xtime_nsec -= nsecps;
+ +              xtime.tv_sec++;
+ +              second_overflow();
+ +      }
+ +
+ +      /* Accumulate into raw time */
+ +      raw_time.tv_nsec += timekeeper.raw_interval << shift;;
+ +      while (raw_time.tv_nsec >= NSEC_PER_SEC) {
+ +              raw_time.tv_nsec -= NSEC_PER_SEC;
+ +              raw_time.tv_sec++;
+ +      }
+ +
+ +      /* Accumulate error between NTP and clock interval */
+ +      timekeeper.ntp_error += tick_length << shift;
+ +      timekeeper.ntp_error -= timekeeper.xtime_interval <<
+ +                              (timekeeper.ntp_error_shift + shift);
+ +
+ +      return offset;
+ +}
+ +
   /**
    * update_wall_time - Uses the current clocksource to increment the wall time
    *
@@@ -774,7 -731,7 +774,7 @@@ void update_wall_time(void
   {
         struct clocksource *clock;
         cycle_t offset;
- -      u64 nsecs;
+ +      int shift = 0, maxshift;
   
         /* Make sure we're fully resumed: */
         if (unlikely(timekeeping_suspended))
@@@ -788,22 -745,33 +788,22 @@@
   #endif
         timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
   
- -      /* normally this loop will run just once, however in the
- -       * case of lost or late ticks, it will accumulate correctly.
+ +      /*
+ +       * With NO_HZ we may have to accumulate many cycle_intervals
+ +       * (think "ticks") worth of time at once. To do this efficiently,
+ +       * we calculate the largest doubling multiple of cycle_intervals
+ +       * that is smaller then the offset. We then accumulate that
+ +       * chunk in one go, and then try to consume the next smaller
+ +       * doubled multiple.
          */
+ +      shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
+ +      shift = max(0, shift);
+ +      /* Bound shift to one less then what overflows tick_length */
+ +      maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1;
+ +      shift = min(shift, maxshift);
         while (offset >= timekeeper.cycle_interval) {
- -              u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
- -
- -              /* accumulate one interval */
- -              offset -= timekeeper.cycle_interval;
- -              clock->cycle_last += timekeeper.cycle_interval;
- -
- -              timekeeper.xtime_nsec += timekeeper.xtime_interval;
- -              if (timekeeper.xtime_nsec >= nsecps) {
- -                      timekeeper.xtime_nsec -= nsecps;
- -                      xtime.tv_sec++;
- -                      second_overflow();
- -              }
- -
- -              raw_time.tv_nsec += timekeeper.raw_interval;
- -              if (raw_time.tv_nsec >= NSEC_PER_SEC) {
- -                      raw_time.tv_nsec -= NSEC_PER_SEC;
- -                      raw_time.tv_sec++;
- -              }
- -
- -              /* accumulate error between NTP and clock interval */
- -              timekeeper.ntp_error += tick_length;
- -              timekeeper.ntp_error -= timekeeper.xtime_interval <<
- -                                      timekeeper.ntp_error_shift;
+ +              offset = logarithmic_accumulation(offset, shift);
+ +              shift--;
         }
   
         /* correct the clock when NTP error is too big */
@@@ -839,8 -807,11 +839,8 @@@
         timekeeper.ntp_error += timekeeper.xtime_nsec <<
                                 timekeeper.ntp_error_shift;
   
- -      nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
- -      update_xtime_cache(nsecs);
- -
         /* check to see if there is a new clocksource to use */
-       update_vsyscall(&xtime, timekeeper.clock);
+       update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
   }
   
   /**
@@@ -875,13 -846,13 +875,13 @@@ void monotonic_to_bootbased(struct time
   
   unsigned long get_seconds(void)
   {
- -      return xtime_cache.tv_sec;
+ +      return xtime.tv_sec;
   }
   EXPORT_SYMBOL(get_seconds);
   
   struct timespec __current_kernel_time(void)
   {
- -      return xtime_cache;
+ +      return xtime;
   }
   
   struct timespec current_kernel_time(void)
@@@ -891,7 -862,8 +891,7 @@@
   
         do {
                 seq = read_seqbegin(&xtime_lock);
- -
- -              now = xtime_cache;
+ +              now = xtime;
         } while (read_seqretry(&xtime_lock, seq));
   
         return now;
@@@ -905,7 -877,8 +905,7 @@@ struct timespec get_monotonic_coarse(vo
   
         do {
                 seq = read_seqbegin(&xtime_lock);
- -
- -              now = xtime_cache;
+ +              now = xtime;
                 mono = wall_to_monotonic;
         } while (read_seqretry(&xtime_lock, seq));
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 9 Dec 2009 03:28:09 +0000 (19:28 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 9 Dec 2009 03:28:09 +0000 (19:28 -0800)
		1	2
arch/powerpc/kernel/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/vsyscall_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/clocksource.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time/timekeeping.c	patch \|	diff1 \|	diff2 \|	blob \| history