Commit | Line | Data |
---|---|---|
7e300dab | 1 | // SPDX-License-Identifier: GPL-2.0-only |
2aae950b AK |
2 | /* |
3 | * Copyright 2006 Andi Kleen, SUSE Labs. | |
2aae950b | 4 | * |
f144a6b4 | 5 | * Fast user context implementation of clock_gettime, gettimeofday, and time. |
2aae950b | 6 | * |
7a59ed41 SS |
7 | * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> |
8 | * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany | |
9 | * | |
2aae950b AK |
10 | * The code should have no internal unresolved relocations. |
11 | * Check with readelf after changing. | |
2aae950b AK |
12 | */ |
13 | ||
7a59ed41 | 14 | #include <uapi/linux/time.h> |
2aae950b | 15 | #include <asm/vgtod.h> |
7c03156f | 16 | #include <asm/vvar.h> |
2aae950b | 17 | #include <asm/unistd.h> |
7c03156f | 18 | #include <asm/msr.h> |
76480a6a | 19 | #include <asm/pvclock.h> |
90b20432 | 20 | #include <asm/mshyperv.h> |
7c03156f SS |
21 | #include <linux/math64.h> |
22 | #include <linux/time.h> | |
76480a6a | 23 | #include <linux/kernel.h> |
2aae950b | 24 | |
8c49d9a7 | 25 | #define gtod (&VVAR(vsyscall_gtod_data)) |
2aae950b | 26 | |
7a59ed41 SS |
27 | extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); |
28 | extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); | |
29 | extern time_t __vdso_time(time_t *t); | |
30 | ||
dac16fba | 31 | #ifdef CONFIG_PARAVIRT_CLOCK |
459e3a21 | 32 | extern u8 pvclock_page[PAGE_SIZE] |
dac16fba AL |
33 | __attribute__((visibility("hidden"))); |
34 | #endif | |
35 | ||
90b20432 | 36 | #ifdef CONFIG_HYPERV_TSCPAGE |
459e3a21 | 37 | extern u8 hvclock_page[PAGE_SIZE] |
90b20432 VK |
38 | __attribute__((visibility("hidden"))); |
39 | #endif | |
40 | ||
7a59ed41 SS |
41 | #ifndef BUILD_VDSO32 |
42 | ||
411f790c SS |
43 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
44 | { | |
45 | long ret; | |
715bd9d1 AL |
46 | asm ("syscall" : "=a" (ret), "=m" (*ts) : |
47 | "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : | |
89fe0a1f | 48 | "rcx", "r11"); |
411f790c | 49 | return ret; |
98d0ac38 AL |
50 | } |
51 | ||
76480a6a AL |
52 | #else |
53 | ||
54 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) | |
55 | { | |
56 | long ret; | |
57 | ||
715bd9d1 | 58 | asm ( |
76480a6a | 59 | "mov %%ebx, %%edx \n" |
02e42566 | 60 | "mov %[clock], %%ebx \n" |
76480a6a AL |
61 | "call __kernel_vsyscall \n" |
62 | "mov %%edx, %%ebx \n" | |
715bd9d1 | 63 | : "=a" (ret), "=m" (*ts) |
02e42566 | 64 | : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) |
89fe0a1f | 65 | : "edx"); |
76480a6a AL |
66 | return ret; |
67 | } | |
68 | ||
76480a6a AL |
69 | #endif |
70 | ||
71 | #ifdef CONFIG_PARAVIRT_CLOCK | |
dac16fba | 72 | static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) |
51c19b4f | 73 | { |
dac16fba | 74 | return (const struct pvclock_vsyscall_time_info *)&pvclock_page; |
51c19b4f MT |
75 | } |
76 | ||
4f72adc5 | 77 | static notrace u64 vread_pvclock(void) |
51c19b4f | 78 | { |
dac16fba | 79 | const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; |
abe9efa7 | 80 | u32 version; |
3e89bf35 | 81 | u64 ret; |
51c19b4f MT |
82 | |
83 | /* | |
6b078f5d AL |
84 | * Note: The kernel and hypervisor must guarantee that cpu ID |
85 | * number maps 1:1 to per-CPU pvclock time info. | |
86 | * | |
87 | * Because the hypervisor is entirely unaware of guest userspace | |
88 | * preemption, it cannot guarantee that per-CPU pvclock time | |
89 | * info is updated if the underlying CPU changes or that that | |
90 | * version is increased whenever underlying CPU changes. | |
91 | * | |
92 | * On KVM, we are guaranteed that pvti updates for any vCPU are | |
93 | * atomic as seen by *all* vCPUs. This is an even stronger | |
94 | * guarantee than we get with a normal seqlock. | |
73459e2a | 95 | * |
6b078f5d AL |
96 | * On Xen, we don't appear to have that guarantee, but Xen still |
97 | * supplies a valid seqlock using the version field. | |
78fd8c72 | 98 | * |
6b078f5d AL |
99 | * We only do pvclock vdso timing at all if |
100 | * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to | |
101 | * mean that all vCPUs have matching pvti and that the TSC is | |
102 | * synced, so we can just look at vCPU 0's pvti. | |
51c19b4f | 103 | */ |
6b078f5d | 104 | |
6b078f5d | 105 | do { |
3aed64f6 | 106 | version = pvclock_read_begin(pvti); |
6b078f5d | 107 | |
4f72adc5 TG |
108 | if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) |
109 | return U64_MAX; | |
78fd8c72 | 110 | |
108b249c | 111 | ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); |
3aed64f6 | 112 | } while (pvclock_read_retry(pvti, version)); |
6b078f5d | 113 | |
3e89bf35 | 114 | return ret; |
51c19b4f MT |
115 | } |
116 | #endif | |
90b20432 | 117 | #ifdef CONFIG_HYPERV_TSCPAGE |
4f72adc5 | 118 | static notrace u64 vread_hvclock(void) |
90b20432 VK |
119 | { |
120 | const struct ms_hyperv_tsc_page *tsc_pg = | |
121 | (const struct ms_hyperv_tsc_page *)&hvclock_page; | |
90b20432 | 122 | |
4f72adc5 | 123 | return hv_read_tsc_page(tsc_pg); |
90b20432 VK |
124 | } |
125 | #endif | |
51c19b4f | 126 | |
4f72adc5 | 127 | notrace static inline u64 vgetcyc(int mode) |
2aae950b | 128 | { |
4f72adc5 | 129 | if (mode == VCLOCK_TSC) |
3e89bf35 | 130 | return (u64)rdtsc_ordered(); |
ff17bbe0 AL |
131 | |
132 | /* | |
133 | * For any memory-mapped vclock type, we need to make sure that gcc | |
134 | * doesn't cleverly hoist a load before the mode check. Otherwise we | |
135 | * might end up touching the memory-mapped page even if the vclock in | |
136 | * question isn't enabled, which will segfault. Hence the barriers. | |
137 | */ | |
51c19b4f | 138 | #ifdef CONFIG_PARAVIRT_CLOCK |
ff17bbe0 AL |
139 | if (mode == VCLOCK_PVCLOCK) { |
140 | barrier(); | |
4f72adc5 | 141 | return vread_pvclock(); |
ff17bbe0 | 142 | } |
90b20432 VK |
143 | #endif |
144 | #ifdef CONFIG_HYPERV_TSCPAGE | |
ff17bbe0 AL |
145 | if (mode == VCLOCK_HVCLOCK) { |
146 | barrier(); | |
4f72adc5 | 147 | return vread_hvclock(); |
ff17bbe0 | 148 | } |
51c19b4f | 149 | #endif |
4f72adc5 | 150 | return U64_MAX; |
2aae950b AK |
151 | } |
152 | ||
e9a62f76 | 153 | notrace static int do_hres(clockid_t clk, struct timespec *ts) |
2aae950b | 154 | { |
e9a62f76 | 155 | struct vgtod_ts *base = >od->basetime[clk]; |
99c19e6a | 156 | u64 cycles, last, sec, ns; |
77e9c678 | 157 | unsigned int seq; |
a939e817 | 158 | |
2aae950b | 159 | do { |
7c03156f | 160 | seq = gtod_read_begin(gtod); |
99c19e6a | 161 | cycles = vgetcyc(gtod->vclock_mode); |
49116f20 | 162 | ns = base->nsec; |
3e89bf35 | 163 | last = gtod->cycle_last; |
4f72adc5 TG |
164 | if (unlikely((s64)cycles < 0)) |
165 | return vdso_fallback_gettime(clk, ts); | |
3e89bf35 TG |
166 | if (cycles > last) |
167 | ns += (cycles - last) * gtod->mult; | |
7c03156f | 168 | ns >>= gtod->shift; |
99c19e6a | 169 | sec = base->sec; |
7c03156f SS |
170 | } while (unlikely(gtod_read_retry(gtod, seq))); |
171 | ||
99c19e6a AL |
172 | /* |
173 | * Do this outside the loop: a race inside the loop could result | |
174 | * in __iter_div_u64_rem() being extremely slow. | |
175 | */ | |
176 | ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); | |
7c03156f | 177 | ts->tv_nsec = ns; |
0f51f285 | 178 | |
4f72adc5 | 179 | return 0; |
2aae950b AK |
180 | } |
181 | ||
6deec5bd | 182 | notrace static void do_coarse(clockid_t clk, struct timespec *ts) |
da15cfda | 183 | { |
6deec5bd | 184 | struct vgtod_ts *base = >od->basetime[clk]; |
77e9c678 | 185 | unsigned int seq; |
49116f20 | 186 | |
da15cfda | 187 | do { |
7c03156f | 188 | seq = gtod_read_begin(gtod); |
49116f20 TG |
189 | ts->tv_sec = base->sec; |
190 | ts->tv_nsec = base->nsec; | |
7c03156f | 191 | } while (unlikely(gtod_read_retry(gtod, seq))); |
da15cfda | 192 | } |
193 | ||
23adec55 | 194 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) |
2aae950b | 195 | { |
f3e83938 | 196 | unsigned int msk; |
0d7b8547 | 197 | |
f3e83938 TG |
198 | /* Sort out negative (CPU/FD) and invalid clocks */ |
199 | if (unlikely((unsigned int) clock >= MAX_CLOCKS)) | |
200 | return vdso_fallback_gettime(clock, ts); | |
201 | ||
202 | /* | |
203 | * Convert the clockid to a bitmask and use it to check which | |
204 | * clocks are handled in the VDSO directly. | |
205 | */ | |
206 | msk = 1U << clock; | |
207 | if (likely(msk & VGTOD_HRES)) { | |
4f72adc5 | 208 | return do_hres(clock, ts); |
f3e83938 TG |
209 | } else if (msk & VGTOD_COARSE) { |
210 | do_coarse(clock, ts); | |
211 | return 0; | |
212 | } | |
ce39c640 | 213 | return vdso_fallback_gettime(clock, ts); |
2aae950b | 214 | } |
f3e83938 | 215 | |
2aae950b AK |
216 | int clock_gettime(clockid_t, struct timespec *) |
217 | __attribute__((weak, alias("__vdso_clock_gettime"))); | |
218 | ||
23adec55 | 219 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) |
2aae950b | 220 | { |
a939e817 | 221 | if (likely(tv != NULL)) { |
e9a62f76 TG |
222 | struct timespec *ts = (struct timespec *) tv; |
223 | ||
4f72adc5 | 224 | do_hres(CLOCK_REALTIME, ts); |
a939e817 | 225 | tv->tv_usec /= 1000; |
2aae950b | 226 | } |
a939e817 | 227 | if (unlikely(tz != NULL)) { |
7c03156f SS |
228 | tz->tz_minuteswest = gtod->tz_minuteswest; |
229 | tz->tz_dsttime = gtod->tz_dsttime; | |
a939e817 JS |
230 | } |
231 | ||
a939e817 | 232 | return 0; |
2aae950b AK |
233 | } |
234 | int gettimeofday(struct timeval *, struct timezone *) | |
235 | __attribute__((weak, alias("__vdso_gettimeofday"))); | |
f144a6b4 | 236 | |
0d7b8547 AL |
237 | /* |
238 | * This will break when the xtime seconds get inaccurate, but that is | |
239 | * unlikely | |
240 | */ | |
f144a6b4 AL |
241 | notrace time_t __vdso_time(time_t *t) |
242 | { | |
7a59ed41 | 243 | /* This is atomic on x86 so we don't need any locks. */ |
49116f20 | 244 | time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec); |
f144a6b4 AL |
245 | |
246 | if (t) | |
247 | *t = result; | |
248 | return result; | |
249 | } | |
88edb57d | 250 | time_t time(time_t *t) |
f144a6b4 | 251 | __attribute__((weak, alias("__vdso_time"))); |