2 * linux/arch/x86-64/kernel/time.c
4 * "High Precision Event Timer" based timekeeping.
6 * Copyright (c) 1991,1992,1995 Linus Torvalds
7 * Copyright (c) 1994 Alan Modra
8 * Copyright (c) 1995 Markus Kuhn
9 * Copyright (c) 1996 Ingo Molnar
10 * Copyright (c) 1998 Andrea Arcangeli
11 * Copyright (c) 2002,2006 Vojtech Pavlik
12 * Copyright (c) 2003 Andi Kleen
13 * RTC support code taken from arch/i386/kernel/timers/time_hpet.c
16 #include <linux/kernel.h>
17 #include <linux/sched.h>
18 #include <linux/interrupt.h>
19 #include <linux/init.h>
20 #include <linux/mc146818rtc.h>
21 #include <linux/time.h>
22 #include <linux/ioport.h>
23 #include <linux/module.h>
24 #include <linux/device.h>
25 #include <linux/sysdev.h>
26 #include <linux/bcd.h>
27 #include <linux/notifier.h>
28 #include <linux/cpu.h>
29 #include <linux/kallsyms.h>
30 #include <linux/acpi.h>
32 #include <acpi/achware.h> /* for PM timer frequency */
33 #include <acpi/acpi_bus.h>
35 #include <asm/8253pit.h>
36 #include <asm/pgtable.h>
37 #include <asm/vsyscall.h>
38 #include <asm/timex.h>
39 #include <asm/proto.h>
41 #include <asm/sections.h>
42 #include <linux/cpufreq.h>
43 #include <linux/hpet.h>
47 #ifdef CONFIG_CPU_FREQ
48 extern void cpufreq_delayed_get(void);
50 extern void i8254_timer_resume(void);
51 extern int using_apic_timer;
53 static char *timename = NULL;
55 DEFINE_SPINLOCK(rtc_lock);
56 EXPORT_SYMBOL(rtc_lock);
57 DEFINE_SPINLOCK(i8253_lock);
59 unsigned long vxtime_hz = PIT_TICK_RATE;
60 int report_lost_ticks; /* command line option */
61 unsigned long long monotonic_base;
63 struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
65 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
66 struct timespec __xtime __section_xtime;
67 struct timezone __sys_tz __section_sys_tz;
69 unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
72 * This version of gettimeofday() has microsecond resolution and better than
73 * microsecond precision, as we're using at least a 10 MHz (usually 14.31818
77 void do_gettimeofday(struct timeval *tv)
80 unsigned int sec, usec;
83 seq = read_seqbegin(&xtime_lock);
86 usec = xtime.tv_nsec / NSEC_PER_USEC;
88 /* i386 does some correction here to keep the clock
89 monotonous even when ntpd is fixing drift.
90 But they didn't work for me, there is a non monotonic
91 clock anyways with ntp.
92 I dropped all corrections now until a real solution can
93 be found. Note when you fix it here you need to do the same
94 in arch/x86_64/kernel/vsyscall.c and export all needed
95 variables in vmlinux.lds. -AK */
96 usec += do_gettimeoffset();
98 } while (read_seqretry(&xtime_lock, seq));
100 tv->tv_sec = sec + usec / USEC_PER_SEC;
101 tv->tv_usec = usec % USEC_PER_SEC;
104 EXPORT_SYMBOL(do_gettimeofday);
107 * settimeofday() first undoes the correction that gettimeofday would do
108 * on the time, and then saves it. This is ugly, but has been like this for
112 int do_settimeofday(struct timespec *tv)
114 time_t wtm_sec, sec = tv->tv_sec;
115 long wtm_nsec, nsec = tv->tv_nsec;
117 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
120 write_seqlock_irq(&xtime_lock);
122 nsec -= do_gettimeoffset() * NSEC_PER_USEC;
124 wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
125 wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
127 set_normalized_timespec(&xtime, sec, nsec);
128 set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
132 write_sequnlock_irq(&xtime_lock);
137 EXPORT_SYMBOL(do_settimeofday);
139 unsigned long profile_pc(struct pt_regs *regs)
141 unsigned long pc = instruction_pointer(regs);
143 /* Assume the lock function has either no stack frame or a copy
145 Eflags always has bits 22 and up cleared unlike kernel addresses. */
146 if (!user_mode(regs) && in_lock_functions(pc)) {
147 unsigned long *sp = (unsigned long *)regs->rsp;
155 EXPORT_SYMBOL(profile_pc);
158 * In order to set the CMOS clock precisely, set_rtc_mmss has to be called 500
159 * ms after the second nowtime has started, because when nowtime is written
160 * into the registers of the CMOS clock, it will jump to the next second
161 * precisely 500 ms later. Check the Motorola MC146818A or Dallas DS12887 data
165 static void set_rtc_mmss(unsigned long nowtime)
167 int real_seconds, real_minutes, cmos_minutes;
168 unsigned char control, freq_select;
171 * IRQs are disabled when we're called from the timer interrupt,
172 * no need for spin_lock_irqsave()
175 spin_lock(&rtc_lock);
178 * Tell the clock it's being set and stop it.
181 control = CMOS_READ(RTC_CONTROL);
182 CMOS_WRITE(control | RTC_SET, RTC_CONTROL);
184 freq_select = CMOS_READ(RTC_FREQ_SELECT);
185 CMOS_WRITE(freq_select | RTC_DIV_RESET2, RTC_FREQ_SELECT);
187 cmos_minutes = CMOS_READ(RTC_MINUTES);
188 BCD_TO_BIN(cmos_minutes);
191 * since we're only adjusting minutes and seconds, don't interfere with hour
192 * overflow. This avoids messing with unknown time zones but requires your RTC
193 * not to be off by more than 15 minutes. Since we're calling it only when
194 * our clock is externally synchronized using NTP, this shouldn't be a problem.
197 real_seconds = nowtime % 60;
198 real_minutes = nowtime / 60;
199 if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1)
200 real_minutes += 30; /* correct for half hour time zone */
203 if (abs(real_minutes - cmos_minutes) >= 30) {
204 printk(KERN_WARNING "time.c: can't update CMOS clock "
205 "from %d to %d\n", cmos_minutes, real_minutes);
207 BIN_TO_BCD(real_seconds);
208 BIN_TO_BCD(real_minutes);
209 CMOS_WRITE(real_seconds, RTC_SECONDS);
210 CMOS_WRITE(real_minutes, RTC_MINUTES);
214 * The following flags have to be released exactly in this order, otherwise the
215 * DS12887 (popular MC146818A clone with integrated battery and quartz) will
216 * not reset the oscillator and will not update precisely 500 ms later. You
217 * won't find this mentioned in the Dallas Semiconductor data sheets, but who
218 * believes data sheets anyway ... -- Markus Kuhn
221 CMOS_WRITE(control, RTC_CONTROL);
222 CMOS_WRITE(freq_select, RTC_FREQ_SELECT);
224 spin_unlock(&rtc_lock);
228 /* monotonic_clock(): returns # of nanoseconds passed since time_init()
229 * Note: This function is required to return accurate
230 * time even in the absence of multiple timer ticks.
232 extern unsigned long long cycles_2_ns(unsigned long long cyc);
233 unsigned long long monotonic_clock(void)
236 u32 last_offset, this_offset, offset;
237 unsigned long long base;
239 if (vxtime.mode == VXTIME_HPET) {
241 seq = read_seqbegin(&xtime_lock);
243 last_offset = vxtime.last;
244 base = monotonic_base;
245 this_offset = hpet_readl(HPET_COUNTER);
246 } while (read_seqretry(&xtime_lock, seq));
247 offset = (this_offset - last_offset);
248 offset *= NSEC_PER_TICK / hpet_tick;
251 seq = read_seqbegin(&xtime_lock);
253 last_offset = vxtime.last_tsc;
254 base = monotonic_base;
255 } while (read_seqretry(&xtime_lock, seq));
256 this_offset = get_cycles_sync();
257 offset = cycles_2_ns(this_offset - last_offset);
259 return base + offset;
261 EXPORT_SYMBOL(monotonic_clock);
263 static noinline void handle_lost_ticks(int lost)
265 static long lost_count;
267 if (report_lost_ticks) {
268 printk(KERN_WARNING "time.c: Lost %d timer tick(s)! ", lost);
269 print_symbol("rip %s)\n", get_irq_regs()->rip);
272 if (lost_count == 1000 && !warned) {
273 printk(KERN_WARNING "warning: many lost ticks.\n"
274 KERN_WARNING "Your time source seems to be instable or "
275 "some driver is hogging interupts\n");
276 print_symbol("rip %s\n", get_irq_regs()->rip);
277 if (vxtime.mode == VXTIME_TSC && hpet_address) {
278 printk(KERN_WARNING "Falling back to HPET\n");
280 vxtime.last = hpet_readl(HPET_T0_CMP) -
283 vxtime.last = hpet_readl(HPET_COUNTER);
284 vxtime.mode = VXTIME_HPET;
285 vxtime.hpet_address = hpet_address;
286 do_gettimeoffset = do_gettimeoffset_hpet;
288 /* else should fall back to PIT, but code missing. */
293 #ifdef CONFIG_CPU_FREQ
294 /* In some cases the CPU can change frequency without us noticing
295 Give cpufreq a change to catch up. */
296 if ((lost_count+1) % 25 == 0)
297 cpufreq_delayed_get();
301 void main_timer_handler(void)
303 static unsigned long rtc_update = 0;
305 int delay = 0, offset = 0, lost = 0;
308 * Here we are in the timer irq handler. We have irqs locally disabled (so we
309 * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
310 * on the other CPU, so we need a lock. We also need to lock the vsyscall
311 * variables, because both do_timer() and us change them -arca+vojtech
314 write_seqlock(&xtime_lock);
317 offset = hpet_readl(HPET_COUNTER);
319 if (hpet_use_timer) {
320 /* if we're using the hpet timer functionality,
321 * we can more accurately know the counter value
322 * when the timer interrupt occured.
324 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
325 delay = hpet_readl(HPET_COUNTER) - offset;
326 } else if (!pmtmr_ioport) {
327 spin_lock(&i8253_lock);
330 delay |= inb(0x40) << 8;
331 spin_unlock(&i8253_lock);
332 delay = LATCH - 1 - delay;
335 tsc = get_cycles_sync();
337 if (vxtime.mode == VXTIME_HPET) {
338 if (offset - vxtime.last > hpet_tick) {
339 lost = (offset - vxtime.last) / hpet_tick - 1;
343 (offset - vxtime.last) * NSEC_PER_TICK / hpet_tick;
345 vxtime.last = offset;
346 #ifdef CONFIG_X86_PM_TIMER
347 } else if (vxtime.mode == VXTIME_PMTMR) {
348 lost = pmtimer_mark_offset();
351 offset = (((tsc - vxtime.last_tsc) *
352 vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK;
357 if (offset > USEC_PER_TICK) {
358 lost = offset / USEC_PER_TICK;
359 offset %= USEC_PER_TICK;
362 monotonic_base += cycles_2_ns(tsc - vxtime.last_tsc);
364 vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
366 if ((((tsc - vxtime.last_tsc) *
367 vxtime.tsc_quot) >> US_SCALE) < offset)
368 vxtime.last_tsc = tsc -
369 (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
373 handle_lost_ticks(lost);
378 * Do the timer stuff.
383 update_process_times(user_mode(get_irq_regs()));
387 * In the SMP case we use the local APIC timer interrupt to do the profiling,
388 * except when we simulate SMP mode on a uniprocessor system, in that case we
389 * have to call the local interrupt handler.
392 if (!using_apic_timer)
393 smp_local_timer_interrupt();
396 * If we have an externally synchronized Linux clock, then update CMOS clock
397 * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
398 * closest to exactly 500 ms before the next second. If the update fails, we
399 * don't care, as it'll be updated on the next turn, and the problem (time way
400 * off) isn't likely to go away much sooner anyway.
403 if (ntp_synced() && xtime.tv_sec > rtc_update &&
404 abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
405 set_rtc_mmss(xtime.tv_sec);
406 rtc_update = xtime.tv_sec + 660;
409 write_sequnlock(&xtime_lock);
412 static irqreturn_t timer_interrupt(int irq, void *dev_id)
414 if (apic_runs_main_timer > 1)
416 main_timer_handler();
417 if (using_apic_timer)
418 smp_send_timer_broadcast_ipi();
422 static unsigned long get_cmos_time(void)
424 unsigned int year, mon, day, hour, min, sec;
426 unsigned century = 0;
428 spin_lock_irqsave(&rtc_lock, flags);
431 sec = CMOS_READ(RTC_SECONDS);
432 min = CMOS_READ(RTC_MINUTES);
433 hour = CMOS_READ(RTC_HOURS);
434 day = CMOS_READ(RTC_DAY_OF_MONTH);
435 mon = CMOS_READ(RTC_MONTH);
436 year = CMOS_READ(RTC_YEAR);
438 if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
439 acpi_gbl_FADT.century)
440 century = CMOS_READ(acpi_gbl_FADT.century);
442 } while (sec != CMOS_READ(RTC_SECONDS));
444 spin_unlock_irqrestore(&rtc_lock, flags);
447 * We know that x86-64 always uses BCD format, no need to check the
460 year += century * 100;
461 printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
464 * x86-64 systems only exists since 2002.
465 * This will work up to Dec 31, 2100
470 return mktime(year, mon, day, hour, min, sec);
475 * pit_calibrate_tsc() uses the speaker output (channel 2) of
476 * the PIT. This is better than using the timer interrupt output,
477 * because we can read the value of the speaker with just one inb(),
478 * where we need three i/o operations for the interrupt channel.
479 * We count how many ticks the TSC does in 50 ms.
482 static unsigned int __init pit_calibrate_tsc(void)
484 unsigned long start, end;
487 spin_lock_irqsave(&i8253_lock, flags);
489 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
492 outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
493 outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42);
494 start = get_cycles_sync();
495 while ((inb(0x61) & 0x20) == 0);
496 end = get_cycles_sync();
498 spin_unlock_irqrestore(&i8253_lock, flags);
500 return (end - start) / 50;
503 #define PIT_MODE 0x43
506 static void __init __pit_init(int val, u8 mode)
510 spin_lock_irqsave(&i8253_lock, flags);
511 outb_p(mode, PIT_MODE);
512 outb_p(val & 0xff, PIT_CH0); /* LSB */
513 outb_p(val >> 8, PIT_CH0); /* MSB */
514 spin_unlock_irqrestore(&i8253_lock, flags);
517 void __init pit_init(void)
519 __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */
522 void __init pit_stop_interrupt(void)
524 __pit_init(0, 0x30); /* mode 0 */
527 void __init stop_timer_interrupt(void)
532 hpet_timer_stop_set_go(0);
535 pit_stop_interrupt();
537 printk(KERN_INFO "timer: %s interrupt stopped.\n", name);
540 int __init time_setup(char *str)
542 report_lost_ticks = 1;
546 static struct irqaction irq0 = {
547 timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
550 void __init time_init(void)
554 xtime.tv_sec = get_cmos_time();
557 set_normalized_timespec(&wall_to_monotonic,
558 -xtime.tv_sec, -xtime.tv_nsec);
560 if (!hpet_arch_init())
561 vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period;
565 if (hpet_use_timer) {
566 /* set tick_nsec to use the proper rate for HPET */
567 tick_nsec = TICK_NSEC_HPET;
568 cpu_khz = hpet_calibrate_tsc();
570 #ifdef CONFIG_X86_PM_TIMER
571 } else if (pmtmr_ioport && !hpet_address) {
572 vxtime_hz = PM_TIMER_FREQUENCY;
575 cpu_khz = pit_calibrate_tsc();
579 cpu_khz = pit_calibrate_tsc();
583 vxtime.mode = VXTIME_TSC;
584 vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
585 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
586 vxtime.last_tsc = get_cycles_sync();
587 set_cyc2ns_scale(cpu_khz);
596 * Decide what mode gettimeofday should use.
598 void time_init_gtod(void)
602 if (unsynchronized_tsc())
605 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
606 vgetcpu_mode = VGETCPU_RDTSCP;
608 vgetcpu_mode = VGETCPU_LSL;
610 if (hpet_address && notsc) {
611 timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
613 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
615 vxtime.last = hpet_readl(HPET_COUNTER);
616 vxtime.mode = VXTIME_HPET;
617 vxtime.hpet_address = hpet_address;
618 do_gettimeoffset = do_gettimeoffset_hpet;
619 #ifdef CONFIG_X86_PM_TIMER
620 /* Using PM for gettimeofday is quite slow, but we have no other
621 choice because the TSC is too unreliable on some systems. */
622 } else if (pmtmr_ioport && !hpet_address && notsc) {
624 do_gettimeoffset = do_gettimeoffset_pm;
625 vxtime.mode = VXTIME_PMTMR;
627 printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n");
630 timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
631 vxtime.mode = VXTIME_TSC;
634 printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n",
635 vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype);
636 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
637 cpu_khz / 1000, cpu_khz % 1000);
638 vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
639 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
640 vxtime.last_tsc = get_cycles_sync();
642 set_cyc2ns_scale(cpu_khz);
645 __setup("report_lost_ticks", time_setup);
647 static long clock_cmos_diff;
648 static unsigned long sleep_start;
651 * sysfs support for the timer.
654 static int timer_suspend(struct sys_device *dev, pm_message_t state)
657 * Estimate time zone so that set_time can update the clock
659 long cmos_time = get_cmos_time();
661 clock_cmos_diff = -cmos_time;
662 clock_cmos_diff += get_seconds();
663 sleep_start = cmos_time;
667 static int timer_resume(struct sys_device *dev)
671 unsigned long ctime = get_cmos_time();
672 long sleep_length = (ctime - sleep_start) * HZ;
674 if (sleep_length < 0) {
675 printk(KERN_WARNING "Time skew detected in timer resume!\n");
676 /* The time after the resume must not be earlier than the time
677 * before the suspend or some nasty things will happen
685 i8254_timer_resume();
687 sec = ctime + clock_cmos_diff;
688 write_seqlock_irqsave(&xtime_lock,flags);
691 if (vxtime.mode == VXTIME_HPET) {
693 vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
695 vxtime.last = hpet_readl(HPET_COUNTER);
696 #ifdef CONFIG_X86_PM_TIMER
697 } else if (vxtime.mode == VXTIME_PMTMR) {
701 vxtime.last_tsc = get_cycles_sync();
702 write_sequnlock_irqrestore(&xtime_lock,flags);
703 jiffies += sleep_length;
704 monotonic_base += sleep_length * (NSEC_PER_SEC/HZ);
705 touch_softlockup_watchdog();
709 static struct sysdev_class timer_sysclass = {
710 .resume = timer_resume,
711 .suspend = timer_suspend,
712 set_kset_name("timer"),
715 /* XXX this driverfs stuff should probably go elsewhere later -john */
716 static struct sys_device device_timer = {
718 .cls = &timer_sysclass,
721 static int time_init_device(void)
723 int error = sysdev_class_register(&timer_sysclass);
725 error = sysdev_register(&device_timer);
729 device_initcall(time_init_device);