Commit | Line | Data |
---|---|---|
35728b82 | 1 | // SPDX-License-Identifier: GPL-2.0 |
112f38a4 | 2 | /* |
58c5fc2b TG |
3 | * Generic sched_clock() support, to extend low level hardware time |
4 | * counters to full 64-bit ns values. | |
112f38a4 RK |
5 | */ |
6 | #include <linux/clocksource.h> | |
7 | #include <linux/init.h> | |
8 | #include <linux/jiffies.h> | |
a08ca5d1 | 9 | #include <linux/ktime.h> |
112f38a4 | 10 | #include <linux/kernel.h> |
a42c3629 | 11 | #include <linux/moduleparam.h> |
112f38a4 | 12 | #include <linux/sched.h> |
e6017571 | 13 | #include <linux/sched/clock.h> |
f153d017 | 14 | #include <linux/syscore_ops.h> |
a08ca5d1 | 15 | #include <linux/hrtimer.h> |
38ff87f7 | 16 | #include <linux/sched_clock.h> |
85c3d2dd | 17 | #include <linux/seqlock.h> |
e7e3ff1b | 18 | #include <linux/bitops.h> |
112f38a4 | 19 | |
cf7c9c17 | 20 | /** |
32fea568 | 21 | * struct clock_read_data - data required to read from sched_clock() |
cf7c9c17 | 22 | * |
32fea568 IM |
23 | * @epoch_ns: sched_clock() value at last update |
24 | * @epoch_cyc: Clock cycle value at last update. | |
cf7c9c17 | 25 | * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit |
32fea568 IM |
26 | * clocks. |
27 | * @read_sched_clock: Current clock source (or dummy source when suspended). | |
28 | * @mult: Multipler for scaled math conversion. | |
29 | * @shift: Shift value for scaled math conversion. | |
cf7c9c17 DT |
30 | * |
31 | * Care must be taken when updating this structure; it is read by | |
13dbeb38 | 32 | * some very hot code paths. It occupies <=40 bytes and, when combined |
cf7c9c17 DT |
33 | * with the seqcount used to synchronize access, comfortably fits into |
34 | * a 64 byte cache line. | |
35 | */ | |
36 | struct clock_read_data { | |
2f0778af | 37 | u64 epoch_ns; |
e7e3ff1b | 38 | u64 epoch_cyc; |
cf7c9c17 DT |
39 | u64 sched_clock_mask; |
40 | u64 (*read_sched_clock)(void); | |
2f0778af MZ |
41 | u32 mult; |
42 | u32 shift; | |
43 | }; | |
44 | ||
cf7c9c17 | 45 | /** |
32fea568 | 46 | * struct clock_data - all data needed for sched_clock() (including |
cf7c9c17 DT |
47 | * registration of a new clock source) |
48 | * | |
1809bfa4 DT |
49 | * @seq: Sequence counter for protecting updates. The lowest |
50 | * bit is the index for @read_data. | |
cf7c9c17 | 51 | * @read_data: Data required to read from sched_clock. |
32fea568 IM |
52 | * @wrap_kt: Duration for which clock can run before wrapping. |
53 | * @rate: Tick rate of the registered clock. | |
54 | * @actual_read_sched_clock: Registered hardware level clock read function. | |
cf7c9c17 DT |
55 | * |
56 | * The ordering of this structure has been chosen to optimize cache | |
32fea568 IM |
57 | * performance. In particular 'seq' and 'read_data[0]' (combined) should fit |
58 | * into a single 64-byte cache line. | |
cf7c9c17 DT |
59 | */ |
60 | struct clock_data { | |
32fea568 IM |
61 | seqcount_t seq; |
62 | struct clock_read_data read_data[2]; | |
63 | ktime_t wrap_kt; | |
64 | unsigned long rate; | |
65 | ||
13dbeb38 | 66 | u64 (*actual_read_sched_clock)(void); |
cf7c9c17 DT |
67 | }; |
68 | ||
a08ca5d1 | 69 | static struct hrtimer sched_clock_timer; |
a42c3629 RK |
70 | static int irqtime = -1; |
71 | ||
72 | core_param(irqtime, irqtime, int, 0400); | |
2f0778af | 73 | |
e7e3ff1b | 74 | static u64 notrace jiffy_sched_clock_read(void) |
2f0778af | 75 | { |
e7e3ff1b SB |
76 | /* |
77 | * We don't need to use get_jiffies_64 on 32-bit arches here | |
78 | * because we register with BITS_PER_LONG | |
79 | */ | |
80 | return (u64)(jiffies - INITIAL_JIFFIES); | |
81 | } | |
82 | ||
cf7c9c17 | 83 | static struct clock_data cd ____cacheline_aligned = { |
1809bfa4 DT |
84 | .read_data[0] = { .mult = NSEC_PER_SEC / HZ, |
85 | .read_sched_clock = jiffy_sched_clock_read, }, | |
13dbeb38 | 86 | .actual_read_sched_clock = jiffy_sched_clock_read, |
cf7c9c17 | 87 | }; |
2f0778af | 88 | |
cea15092 | 89 | static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) |
2f0778af MZ |
90 | { |
91 | return (cyc * mult) >> shift; | |
92 | } | |
93 | ||
b4042cea | 94 | unsigned long long notrace sched_clock(void) |
2f0778af | 95 | { |
8710e914 | 96 | u64 cyc, res; |
e1e41b6c | 97 | unsigned int seq; |
1809bfa4 | 98 | struct clock_read_data *rd; |
336ae118 | 99 | |
2f0778af | 100 | do { |
1809bfa4 DT |
101 | seq = raw_read_seqcount(&cd.seq); |
102 | rd = cd.read_data + (seq & 1); | |
8710e914 | 103 | |
13dbeb38 DT |
104 | cyc = (rd->read_sched_clock() - rd->epoch_cyc) & |
105 | rd->sched_clock_mask; | |
106 | res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift); | |
85c3d2dd | 107 | } while (read_seqcount_retry(&cd.seq, seq)); |
2f0778af | 108 | |
8710e914 | 109 | return res; |
2f0778af MZ |
110 | } |
111 | ||
1809bfa4 DT |
112 | /* |
113 | * Updating the data required to read the clock. | |
114 | * | |
32fea568 | 115 | * sched_clock() will never observe mis-matched data even if called from |
1809bfa4 | 116 | * an NMI. We do this by maintaining an odd/even copy of the data and |
32fea568 IM |
117 | * steering sched_clock() to one or the other using a sequence counter. |
118 | * In order to preserve the data cache profile of sched_clock() as much | |
1809bfa4 DT |
119 | * as possible the system reverts back to the even copy when the update |
120 | * completes; the odd copy is used *only* during an update. | |
121 | */ | |
122 | static void update_clock_read_data(struct clock_read_data *rd) | |
123 | { | |
124 | /* update the backup (odd) copy with the new data */ | |
125 | cd.read_data[1] = *rd; | |
126 | ||
127 | /* steer readers towards the odd copy */ | |
128 | raw_write_seqcount_latch(&cd.seq); | |
129 | ||
130 | /* now its safe for us to update the normal (even) copy */ | |
131 | cd.read_data[0] = *rd; | |
132 | ||
133 | /* switch readers back to the even copy */ | |
134 | raw_write_seqcount_latch(&cd.seq); | |
135 | } | |
136 | ||
2f0778af | 137 | /* |
32fea568 | 138 | * Atomically update the sched_clock() epoch. |
2f0778af | 139 | */ |
9fee69a8 | 140 | static void update_sched_clock(void) |
2f0778af | 141 | { |
e7e3ff1b | 142 | u64 cyc; |
2f0778af | 143 | u64 ns; |
1809bfa4 DT |
144 | struct clock_read_data rd; |
145 | ||
146 | rd = cd.read_data[0]; | |
2f0778af | 147 | |
13dbeb38 | 148 | cyc = cd.actual_read_sched_clock(); |
32fea568 | 149 | ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); |
1809bfa4 DT |
150 | |
151 | rd.epoch_ns = ns; | |
152 | rd.epoch_cyc = cyc; | |
153 | ||
154 | update_clock_read_data(&rd); | |
2f0778af | 155 | } |
112f38a4 | 156 | |
a08ca5d1 | 157 | static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) |
112f38a4 | 158 | { |
2f0778af | 159 | update_sched_clock(); |
a08ca5d1 | 160 | hrtimer_forward_now(hrt, cd.wrap_kt); |
32fea568 | 161 | |
a08ca5d1 | 162 | return HRTIMER_RESTART; |
112f38a4 RK |
163 | } |
164 | ||
32fea568 IM |
165 | void __init |
166 | sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) | |
112f38a4 | 167 | { |
5ae8aabe SB |
168 | u64 res, wrap, new_mask, new_epoch, cyc, ns; |
169 | u32 new_mult, new_shift; | |
a08ca5d1 | 170 | unsigned long r; |
112f38a4 | 171 | char r_unit; |
1809bfa4 | 172 | struct clock_read_data rd; |
112f38a4 | 173 | |
c115739d RH |
174 | if (cd.rate > rate) |
175 | return; | |
176 | ||
2f0778af | 177 | WARN_ON(!irqs_disabled()); |
112f38a4 | 178 | |
32fea568 | 179 | /* Calculate the mult/shift to convert counter ticks to ns. */ |
5ae8aabe SB |
180 | clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600); |
181 | ||
182 | new_mask = CLOCKSOURCE_MASK(bits); | |
8710e914 | 183 | cd.rate = rate; |
5ae8aabe | 184 | |
32fea568 | 185 | /* Calculate how many nanosecs until we risk wrapping */ |
fb82fe2f | 186 | wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL); |
8710e914 | 187 | cd.wrap_kt = ns_to_ktime(wrap); |
5ae8aabe | 188 | |
1809bfa4 DT |
189 | rd = cd.read_data[0]; |
190 | ||
32fea568 | 191 | /* Update epoch for new counter and update 'epoch_ns' from old counter*/ |
5ae8aabe | 192 | new_epoch = read(); |
13dbeb38 | 193 | cyc = cd.actual_read_sched_clock(); |
32fea568 | 194 | ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); |
13dbeb38 | 195 | cd.actual_read_sched_clock = read; |
5ae8aabe | 196 | |
32fea568 IM |
197 | rd.read_sched_clock = read; |
198 | rd.sched_clock_mask = new_mask; | |
199 | rd.mult = new_mult; | |
200 | rd.shift = new_shift; | |
201 | rd.epoch_cyc = new_epoch; | |
202 | rd.epoch_ns = ns; | |
203 | ||
1809bfa4 | 204 | update_clock_read_data(&rd); |
112f38a4 | 205 | |
1b8955bc DE |
206 | if (sched_clock_timer.function != NULL) { |
207 | /* update timeout for clock wrap */ | |
208 | hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); | |
209 | } | |
210 | ||
112f38a4 RK |
211 | r = rate; |
212 | if (r >= 4000000) { | |
213 | r /= 1000000; | |
214 | r_unit = 'M'; | |
32fea568 IM |
215 | } else { |
216 | if (r >= 1000) { | |
217 | r /= 1000; | |
218 | r_unit = 'k'; | |
219 | } else { | |
220 | r_unit = ' '; | |
221 | } | |
222 | } | |
223 | ||
224 | /* Calculate the ns resolution of this counter */ | |
5ae8aabe SB |
225 | res = cyc_to_ns(1ULL, new_mult, new_shift); |
226 | ||
a08ca5d1 SB |
227 | pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n", |
228 | bits, r, r_unit, res, wrap); | |
112f38a4 | 229 | |
32fea568 | 230 | /* Enable IRQ time accounting if we have a fast enough sched_clock() */ |
a42c3629 RK |
231 | if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) |
232 | enable_sched_clock_irqtime(); | |
233 | ||
d75f773c | 234 | pr_debug("Registered %pS as sched_clock source\n", read); |
2f0778af MZ |
235 | } |
236 | ||
5d2a4e91 | 237 | void __init generic_sched_clock_init(void) |
211baa70 | 238 | { |
2f0778af | 239 | /* |
32fea568 | 240 | * If no sched_clock() function has been provided at that point, |
2f0778af MZ |
241 | * make it the final one one. |
242 | */ | |
13dbeb38 | 243 | if (cd.actual_read_sched_clock == jiffy_sched_clock_read) |
e7e3ff1b | 244 | sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); |
2f0778af | 245 | |
a08ca5d1 SB |
246 | update_sched_clock(); |
247 | ||
248 | /* | |
249 | * Start the timer to keep sched_clock() properly updated and | |
250 | * sets the initial epoch. | |
251 | */ | |
252 | hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
253 | sched_clock_timer.function = sched_clock_poll; | |
254 | hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); | |
211baa70 | 255 | } |
f153d017 | 256 | |
13dbeb38 DT |
257 | /* |
258 | * Clock read function for use when the clock is suspended. | |
259 | * | |
260 | * This function makes it appear to sched_clock() as if the clock | |
261 | * stopped counting at its last update. | |
1809bfa4 DT |
262 | * |
263 | * This function must only be called from the critical | |
264 | * section in sched_clock(). It relies on the read_seqcount_retry() | |
265 | * at the end of the critical section to be sure we observe the | |
32fea568 | 266 | * correct copy of 'epoch_cyc'. |
13dbeb38 DT |
267 | */ |
268 | static u64 notrace suspended_sched_clock_read(void) | |
269 | { | |
e1e41b6c | 270 | unsigned int seq = raw_read_seqcount(&cd.seq); |
1809bfa4 DT |
271 | |
272 | return cd.read_data[seq & 1].epoch_cyc; | |
13dbeb38 DT |
273 | } |
274 | ||
3f2552f7 | 275 | int sched_clock_suspend(void) |
f153d017 | 276 | { |
1809bfa4 | 277 | struct clock_read_data *rd = &cd.read_data[0]; |
cf7c9c17 | 278 | |
f723aa18 SB |
279 | update_sched_clock(); |
280 | hrtimer_cancel(&sched_clock_timer); | |
13dbeb38 | 281 | rd->read_sched_clock = suspended_sched_clock_read; |
32fea568 | 282 | |
f153d017 RK |
283 | return 0; |
284 | } | |
285 | ||
3f2552f7 | 286 | void sched_clock_resume(void) |
237ec6f2 | 287 | { |
1809bfa4 | 288 | struct clock_read_data *rd = &cd.read_data[0]; |
cf7c9c17 | 289 | |
13dbeb38 | 290 | rd->epoch_cyc = cd.actual_read_sched_clock(); |
f723aa18 | 291 | hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); |
13dbeb38 | 292 | rd->read_sched_clock = cd.actual_read_sched_clock; |
237ec6f2 CC |
293 | } |
294 | ||
f153d017 | 295 | static struct syscore_ops sched_clock_ops = { |
32fea568 IM |
296 | .suspend = sched_clock_suspend, |
297 | .resume = sched_clock_resume, | |
f153d017 RK |
298 | }; |
299 | ||
300 | static int __init sched_clock_syscore_init(void) | |
301 | { | |
302 | register_syscore_ops(&sched_clock_ops); | |
32fea568 | 303 | |
f153d017 RK |
304 | return 0; |
305 | } | |
306 | device_initcall(sched_clock_syscore_init); |