Commit | Line | Data |
---|---|---|
2aae950b AK |
1 | /* |
2 | * Copyright 2006 Andi Kleen, SUSE Labs. | |
3 | * Subject to the GNU Public License, v.2 | |
4 | * | |
f144a6b4 | 5 | * Fast user context implementation of clock_gettime, gettimeofday, and time. |
2aae950b | 6 | * |
7a59ed41 SS |
7 | * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> |
8 | * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany | |
9 | * | |
2aae950b AK |
10 | * The code should have no internal unresolved relocations. |
11 | * Check with readelf after changing. | |
2aae950b AK |
12 | */ |
13 | ||
7a59ed41 | 14 | #include <uapi/linux/time.h> |
2aae950b | 15 | #include <asm/vgtod.h> |
2aae950b | 16 | #include <asm/hpet.h> |
7c03156f | 17 | #include <asm/vvar.h> |
2aae950b | 18 | #include <asm/unistd.h> |
7c03156f SS |
19 | #include <asm/msr.h> |
20 | #include <linux/math64.h> | |
21 | #include <linux/time.h> | |
2aae950b | 22 | |
8c49d9a7 | 23 | #define gtod (&VVAR(vsyscall_gtod_data)) |
2aae950b | 24 | |
7a59ed41 SS |
25 | extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); |
26 | extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); | |
27 | extern time_t __vdso_time(time_t *t); | |
28 | ||
7c03156f | 29 | #ifdef CONFIG_HPET_TIMER |
f40c3300 AL |
30 | extern u8 hpet_page |
31 | __attribute__((visibility("hidden"))); | |
32 | ||
33 | static notrace cycle_t vread_hpet(void) | |
7c03156f | 34 | { |
f40c3300 | 35 | return *(const volatile u32 *)(&hpet_page + HPET_COUNTER); |
7c03156f SS |
36 | } |
37 | #endif | |
38 | ||
7a59ed41 SS |
39 | #ifndef BUILD_VDSO32 |
40 | ||
7c03156f SS |
41 | #include <linux/kernel.h> |
42 | #include <asm/vsyscall.h> | |
43 | #include <asm/fixmap.h> | |
44 | #include <asm/pvclock.h> | |
45 | ||
411f790c SS |
46 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
47 | { | |
48 | long ret; | |
49 | asm("syscall" : "=a" (ret) : | |
50 | "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); | |
51 | return ret; | |
98d0ac38 AL |
52 | } |
53 | ||
411f790c | 54 | notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) |
98d0ac38 | 55 | { |
411f790c SS |
56 | long ret; |
57 | ||
58 | asm("syscall" : "=a" (ret) : | |
59 | "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); | |
60 | return ret; | |
98d0ac38 AL |
61 | } |
62 | ||
51c19b4f MT |
63 | #ifdef CONFIG_PARAVIRT_CLOCK |
64 | ||
65 | static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu) | |
66 | { | |
67 | const struct pvclock_vsyscall_time_info *pvti_base; | |
68 | int idx = cpu / (PAGE_SIZE/PVTI_SIZE); | |
69 | int offset = cpu % (PAGE_SIZE/PVTI_SIZE); | |
70 | ||
71 | BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END); | |
72 | ||
73 | pvti_base = (struct pvclock_vsyscall_time_info *) | |
74 | __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx); | |
75 | ||
76 | return &pvti_base[offset]; | |
77 | } | |
78 | ||
79 | static notrace cycle_t vread_pvclock(int *mode) | |
80 | { | |
81 | const struct pvclock_vsyscall_time_info *pvti; | |
82 | cycle_t ret; | |
83 | u64 last; | |
84 | u32 version; | |
51c19b4f MT |
85 | u8 flags; |
86 | unsigned cpu, cpu1; | |
87 | ||
88 | ||
89 | /* | |
e04c5d76 MT |
90 | * Note: hypervisor must guarantee that: |
91 | * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. | |
92 | * 2. that per-CPU pvclock time info is updated if the | |
93 | * underlying CPU changes. | |
94 | * 3. that version is increased whenever underlying CPU | |
95 | * changes. | |
96 | * | |
51c19b4f MT |
97 | */ |
98 | do { | |
99 | cpu = __getcpu() & VGETCPU_CPU_MASK; | |
100 | /* TODO: We can put vcpu id into higher bits of pvti.version. | |
101 | * This will save a couple of cycles by getting rid of | |
102 | * __getcpu() calls (Gleb). | |
103 | */ | |
104 | ||
105 | pvti = get_pvti(cpu); | |
106 | ||
51c19b4f MT |
107 | version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); |
108 | ||
109 | /* | |
110 | * Test we're still on the cpu as well as the version. | |
111 | * We could have been migrated just after the first | |
112 | * vgetcpu but before fetching the version, so we | |
113 | * wouldn't notice a version change. | |
114 | */ | |
115 | cpu1 = __getcpu() & VGETCPU_CPU_MASK; | |
116 | } while (unlikely(cpu != cpu1 || | |
117 | (pvti->pvti.version & 1) || | |
e04c5d76 | 118 | pvti->pvti.version != version)); |
51c19b4f MT |
119 | |
120 | if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) | |
121 | *mode = VCLOCK_NONE; | |
122 | ||
123 | /* refer to tsc.c read_tsc() comment for rationale */ | |
7c03156f | 124 | last = gtod->cycle_last; |
51c19b4f MT |
125 | |
126 | if (likely(ret >= last)) | |
127 | return ret; | |
128 | ||
129 | return last; | |
130 | } | |
131 | #endif | |
132 | ||
7a59ed41 SS |
133 | #else |
134 | ||
7a59ed41 SS |
135 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
136 | { | |
137 | long ret; | |
138 | ||
139 | asm( | |
140 | "mov %%ebx, %%edx \n" | |
141 | "mov %2, %%ebx \n" | |
6f121e54 | 142 | "call __kernel_vsyscall \n" |
7a59ed41 SS |
143 | "mov %%edx, %%ebx \n" |
144 | : "=a" (ret) | |
145 | : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) | |
146 | : "memory", "edx"); | |
147 | return ret; | |
148 | } | |
149 | ||
150 | notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) | |
151 | { | |
152 | long ret; | |
153 | ||
154 | asm( | |
155 | "mov %%ebx, %%edx \n" | |
156 | "mov %2, %%ebx \n" | |
6f121e54 | 157 | "call __kernel_vsyscall \n" |
7a59ed41 SS |
158 | "mov %%edx, %%ebx \n" |
159 | : "=a" (ret) | |
160 | : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) | |
161 | : "memory", "edx"); | |
162 | return ret; | |
163 | } | |
164 | ||
165 | #ifdef CONFIG_PARAVIRT_CLOCK | |
166 | ||
167 | static notrace cycle_t vread_pvclock(int *mode) | |
168 | { | |
169 | *mode = VCLOCK_NONE; | |
170 | return 0; | |
171 | } | |
172 | #endif | |
173 | ||
174 | #endif | |
175 | ||
411f790c | 176 | notrace static cycle_t vread_tsc(void) |
2aae950b | 177 | { |
411f790c SS |
178 | cycle_t ret; |
179 | u64 last; | |
2aae950b | 180 | |
411f790c SS |
181 | /* |
182 | * Empirically, a fence (of type that depends on the CPU) | |
183 | * before rdtsc is enough to ensure that rdtsc is ordered | |
184 | * with respect to loads. The various CPU manuals are unclear | |
185 | * as to whether rdtsc can be reordered with later loads, | |
186 | * but no one has ever seen it happen. | |
187 | */ | |
188 | rdtsc_barrier(); | |
7a59ed41 | 189 | ret = (cycle_t)__native_read_tsc(); |
a939e817 | 190 | |
7c03156f | 191 | last = gtod->cycle_last; |
a939e817 | 192 | |
411f790c SS |
193 | if (likely(ret >= last)) |
194 | return ret; | |
195 | ||
196 | /* | |
197 | * GCC likes to generate cmov here, but this branch is extremely | |
198 | * predictable (it's just a funciton of time and the likely is | |
199 | * very likely) and there's a data dependence, so force GCC | |
200 | * to generate a branch instead. I don't barrier() because | |
201 | * we don't actually need a barrier, and if this function | |
202 | * ever gets inlined it will generate worse code. | |
203 | */ | |
204 | asm volatile (""); | |
205 | return last; | |
206 | } | |
a939e817 | 207 | |
51c19b4f | 208 | notrace static inline u64 vgetsns(int *mode) |
2aae950b | 209 | { |
7a59ed41 | 210 | u64 v; |
98d0ac38 | 211 | cycles_t cycles; |
7c03156f SS |
212 | |
213 | if (gtod->vclock_mode == VCLOCK_TSC) | |
98d0ac38 | 214 | cycles = vread_tsc(); |
7a59ed41 | 215 | #ifdef CONFIG_HPET_TIMER |
7c03156f | 216 | else if (gtod->vclock_mode == VCLOCK_HPET) |
98d0ac38 | 217 | cycles = vread_hpet(); |
7a59ed41 | 218 | #endif |
51c19b4f | 219 | #ifdef CONFIG_PARAVIRT_CLOCK |
7c03156f | 220 | else if (gtod->vclock_mode == VCLOCK_PVCLOCK) |
51c19b4f MT |
221 | cycles = vread_pvclock(mode); |
222 | #endif | |
a939e817 JS |
223 | else |
224 | return 0; | |
7c03156f SS |
225 | v = (cycles - gtod->cycle_last) & gtod->mask; |
226 | return v * gtod->mult; | |
2aae950b AK |
227 | } |
228 | ||
5f293474 AL |
229 | /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ |
230 | notrace static int __always_inline do_realtime(struct timespec *ts) | |
2aae950b | 231 | { |
650ea024 JS |
232 | unsigned long seq; |
233 | u64 ns; | |
a939e817 JS |
234 | int mode; |
235 | ||
2aae950b | 236 | do { |
7c03156f SS |
237 | seq = gtod_read_begin(gtod); |
238 | mode = gtod->vclock_mode; | |
2aae950b | 239 | ts->tv_sec = gtod->wall_time_sec; |
650ea024 | 240 | ns = gtod->wall_time_snsec; |
51c19b4f | 241 | ns += vgetsns(&mode); |
7c03156f SS |
242 | ns >>= gtod->shift; |
243 | } while (unlikely(gtod_read_retry(gtod, seq))); | |
244 | ||
245 | ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); | |
246 | ts->tv_nsec = ns; | |
a939e817 | 247 | |
a939e817 | 248 | return mode; |
2aae950b AK |
249 | } |
250 | ||
7a59ed41 | 251 | notrace static int __always_inline do_monotonic(struct timespec *ts) |
2aae950b | 252 | { |
650ea024 JS |
253 | unsigned long seq; |
254 | u64 ns; | |
a939e817 JS |
255 | int mode; |
256 | ||
2aae950b | 257 | do { |
7c03156f SS |
258 | seq = gtod_read_begin(gtod); |
259 | mode = gtod->vclock_mode; | |
91ec87d5 | 260 | ts->tv_sec = gtod->monotonic_time_sec; |
650ea024 | 261 | ns = gtod->monotonic_time_snsec; |
51c19b4f | 262 | ns += vgetsns(&mode); |
7c03156f SS |
263 | ns >>= gtod->shift; |
264 | } while (unlikely(gtod_read_retry(gtod, seq))); | |
265 | ||
266 | ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); | |
267 | ts->tv_nsec = ns; | |
0f51f285 | 268 | |
a939e817 | 269 | return mode; |
2aae950b AK |
270 | } |
271 | ||
ce39c640 | 272 | notrace static void do_realtime_coarse(struct timespec *ts) |
da15cfda | 273 | { |
274 | unsigned long seq; | |
275 | do { | |
7c03156f SS |
276 | seq = gtod_read_begin(gtod); |
277 | ts->tv_sec = gtod->wall_time_coarse_sec; | |
278 | ts->tv_nsec = gtod->wall_time_coarse_nsec; | |
279 | } while (unlikely(gtod_read_retry(gtod, seq))); | |
da15cfda | 280 | } |
281 | ||
ce39c640 | 282 | notrace static void do_monotonic_coarse(struct timespec *ts) |
da15cfda | 283 | { |
91ec87d5 | 284 | unsigned long seq; |
da15cfda | 285 | do { |
7c03156f SS |
286 | seq = gtod_read_begin(gtod); |
287 | ts->tv_sec = gtod->monotonic_time_coarse_sec; | |
288 | ts->tv_nsec = gtod->monotonic_time_coarse_nsec; | |
289 | } while (unlikely(gtod_read_retry(gtod, seq))); | |
da15cfda | 290 | } |
291 | ||
23adec55 | 292 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) |
2aae950b | 293 | { |
0d7b8547 AL |
294 | switch (clock) { |
295 | case CLOCK_REALTIME: | |
ce39c640 SS |
296 | if (do_realtime(ts) == VCLOCK_NONE) |
297 | goto fallback; | |
0d7b8547 AL |
298 | break; |
299 | case CLOCK_MONOTONIC: | |
ce39c640 SS |
300 | if (do_monotonic(ts) == VCLOCK_NONE) |
301 | goto fallback; | |
0d7b8547 AL |
302 | break; |
303 | case CLOCK_REALTIME_COARSE: | |
ce39c640 SS |
304 | do_realtime_coarse(ts); |
305 | break; | |
0d7b8547 | 306 | case CLOCK_MONOTONIC_COARSE: |
ce39c640 SS |
307 | do_monotonic_coarse(ts); |
308 | break; | |
309 | default: | |
310 | goto fallback; | |
0d7b8547 AL |
311 | } |
312 | ||
a939e817 | 313 | return 0; |
ce39c640 SS |
314 | fallback: |
315 | return vdso_fallback_gettime(clock, ts); | |
2aae950b AK |
316 | } |
317 | int clock_gettime(clockid_t, struct timespec *) | |
318 | __attribute__((weak, alias("__vdso_clock_gettime"))); | |
319 | ||
23adec55 | 320 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) |
2aae950b | 321 | { |
a939e817 | 322 | if (likely(tv != NULL)) { |
0df1ea2b SS |
323 | if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) |
324 | return vdso_fallback_gtod(tv, tz); | |
a939e817 | 325 | tv->tv_usec /= 1000; |
2aae950b | 326 | } |
a939e817 | 327 | if (unlikely(tz != NULL)) { |
7c03156f SS |
328 | tz->tz_minuteswest = gtod->tz_minuteswest; |
329 | tz->tz_dsttime = gtod->tz_dsttime; | |
a939e817 JS |
330 | } |
331 | ||
a939e817 | 332 | return 0; |
2aae950b AK |
333 | } |
334 | int gettimeofday(struct timeval *, struct timezone *) | |
335 | __attribute__((weak, alias("__vdso_gettimeofday"))); | |
f144a6b4 | 336 | |
0d7b8547 AL |
337 | /* |
338 | * This will break when the xtime seconds get inaccurate, but that is | |
339 | * unlikely | |
340 | */ | |
f144a6b4 AL |
341 | notrace time_t __vdso_time(time_t *t) |
342 | { | |
7a59ed41 | 343 | /* This is atomic on x86 so we don't need any locks. */ |
af8c93d8 | 344 | time_t result = ACCESS_ONCE(gtod->wall_time_sec); |
f144a6b4 AL |
345 | |
346 | if (t) | |
347 | *t = result; | |
348 | return result; | |
349 | } | |
350 | int time(time_t *t) | |
351 | __attribute__((weak, alias("__vdso_time"))); |