Commit | Line | Data |
---|---|---|
52a65ff5 | 1 | // SPDX-License-Identifier: GPL-2.0 |
f3f59fbc TG |
2 | // Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> |
3 | ||
e1c92149 | 4 | #include <linux/kernel.h> |
b2d3d61a | 5 | #include <linux/percpu.h> |
e1c92149 | 6 | #include <linux/slab.h> |
b2d3d61a DL |
7 | #include <linux/static_key.h> |
8 | #include <linux/interrupt.h> | |
e1c92149 | 9 | #include <linux/idr.h> |
b2d3d61a | 10 | #include <linux/irq.h> |
e1c92149 DL |
11 | #include <linux/math64.h> |
12 | ||
13 | #include <trace/events/irq.h> | |
b2d3d61a DL |
14 | |
15 | #include "internals.h" | |
16 | ||
17 | DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); | |
18 | ||
19 | DEFINE_PER_CPU(struct irq_timings, irq_timings); | |
20 | ||
e1c92149 DL |
21 | struct irqt_stat { |
22 | u64 next_evt; | |
23 | u64 last_ts; | |
24 | u64 variance; | |
25 | u32 avg; | |
26 | u32 nr_samples; | |
27 | int anomalies; | |
28 | int valid; | |
29 | }; | |
30 | ||
31 | static DEFINE_IDR(irqt_stats); | |
32 | ||
b2d3d61a DL |
33 | void irq_timings_enable(void) |
34 | { | |
35 | static_branch_enable(&irq_timing_enabled); | |
36 | } | |
37 | ||
38 | void irq_timings_disable(void) | |
39 | { | |
40 | static_branch_disable(&irq_timing_enabled); | |
41 | } | |
e1c92149 DL |
42 | |
43 | /** | |
44 | * irqs_update - update the irq timing statistics with a new timestamp | |
45 | * | |
46 | * @irqs: an irqt_stat struct pointer | |
47 | * @ts: the new timestamp | |
48 | * | |
49 | * The statistics are computed online, in other words, the code is | |
50 | * designed to compute the statistics on a stream of values rather | |
51 | * than doing multiple passes on the values to compute the average, | |
52 | * then the variance. The integer division introduces a loss of | |
53 | * precision but with an acceptable error margin regarding the results | |
54 | * we would have with the double floating precision: we are dealing | |
55 | * with nanosec, so big numbers, consequently the mantisse is | |
56 | * negligeable, especially when converting the time in usec | |
57 | * afterwards. | |
58 | * | |
59 | * The computation happens at idle time. When the CPU is not idle, the | |
60 | * interrupts' timestamps are stored in the circular buffer, when the | |
61 | * CPU goes idle and this routine is called, all the buffer's values | |
62 | * are injected in the statistical model continuying to extend the | |
63 | * statistics from the previous busy-idle cycle. | |
64 | * | |
65 | * The observations showed a device will trigger a burst of periodic | |
66 | * interrupts followed by one or two peaks of longer time, for | |
67 | * instance when a SD card device flushes its cache, then the periodic | |
68 | * intervals occur again. A one second inactivity period resets the | |
69 | * stats, that gives us the certitude the statistical values won't | |
70 | * exceed 1x10^9, thus the computation won't overflow. | |
71 | * | |
72 | * Basically, the purpose of the algorithm is to watch the periodic | |
73 | * interrupts and eliminate the peaks. | |
74 | * | |
75 | * An interrupt is considered periodically stable if the interval of | |
76 | * its occurences follow the normal distribution, thus the values | |
77 | * comply with: | |
78 | * | |
79 | * avg - 3 x stddev < value < avg + 3 x stddev | |
80 | * | |
81 | * Which can be simplified to: | |
82 | * | |
83 | * -3 x stddev < value - avg < 3 x stddev | |
84 | * | |
85 | * abs(value - avg) < 3 x stddev | |
86 | * | |
87 | * In order to save a costly square root computation, we use the | |
88 | * variance. For the record, stddev = sqrt(variance). The equation | |
89 | * above becomes: | |
90 | * | |
91 | * abs(value - avg) < 3 x sqrt(variance) | |
92 | * | |
93 | * And finally we square it: | |
94 | * | |
95 | * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2 | |
96 | * | |
97 | * (value - avg) x (value - avg) < 9 x variance | |
98 | * | |
99 | * Statistically speaking, any values out of this interval is | |
100 | * considered as an anomaly and is discarded. However, a normal | |
101 | * distribution appears when the number of samples is 30 (it is the | |
102 | * rule of thumb in statistics, cf. "30 samples" on Internet). When | |
103 | * there are three consecutive anomalies, the statistics are resetted. | |
104 | * | |
105 | */ | |
106 | static void irqs_update(struct irqt_stat *irqs, u64 ts) | |
107 | { | |
108 | u64 old_ts = irqs->last_ts; | |
109 | u64 variance = 0; | |
110 | u64 interval; | |
111 | s64 diff; | |
112 | ||
113 | /* | |
114 | * The timestamps are absolute time values, we need to compute | |
115 | * the timing interval between two interrupts. | |
116 | */ | |
117 | irqs->last_ts = ts; | |
118 | ||
119 | /* | |
120 | * The interval type is u64 in order to deal with the same | |
121 | * type in our computation, that prevent mindfuck issues with | |
122 | * overflow, sign and division. | |
123 | */ | |
124 | interval = ts - old_ts; | |
125 | ||
126 | /* | |
127 | * The interrupt triggered more than one second apart, that | |
128 | * ends the sequence as predictible for our purpose. In this | |
129 | * case, assume we have the beginning of a sequence and the | |
130 | * timestamp is the first value. As it is impossible to | |
131 | * predict anything at this point, return. | |
132 | * | |
133 | * Note the first timestamp of the sequence will always fall | |
134 | * in this test because the old_ts is zero. That is what we | |
135 | * want as we need another timestamp to compute an interval. | |
136 | */ | |
137 | if (interval >= NSEC_PER_SEC) { | |
138 | memset(irqs, 0, sizeof(*irqs)); | |
139 | irqs->last_ts = ts; | |
140 | return; | |
141 | } | |
142 | ||
143 | /* | |
144 | * Pre-compute the delta with the average as the result is | |
145 | * used several times in this function. | |
146 | */ | |
147 | diff = interval - irqs->avg; | |
148 | ||
149 | /* | |
150 | * Increment the number of samples. | |
151 | */ | |
152 | irqs->nr_samples++; | |
153 | ||
154 | /* | |
155 | * Online variance divided by the number of elements if there | |
156 | * is more than one sample. Normally the formula is division | |
157 | * by nr_samples - 1 but we assume the number of element will be | |
158 | * more than 32 and dividing by 32 instead of 31 is enough | |
159 | * precise. | |
160 | */ | |
161 | if (likely(irqs->nr_samples > 1)) | |
162 | variance = irqs->variance >> IRQ_TIMINGS_SHIFT; | |
163 | ||
164 | /* | |
165 | * The rule of thumb in statistics for the normal distribution | |
166 | * is having at least 30 samples in order to have the model to | |
167 | * apply. Values outside the interval are considered as an | |
168 | * anomaly. | |
169 | */ | |
170 | if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) { | |
171 | /* | |
172 | * After three consecutive anomalies, we reset the | |
173 | * stats as it is no longer stable enough. | |
174 | */ | |
175 | if (irqs->anomalies++ >= 3) { | |
176 | memset(irqs, 0, sizeof(*irqs)); | |
177 | irqs->last_ts = ts; | |
178 | return; | |
179 | } | |
180 | } else { | |
181 | /* | |
182 | * The anomalies must be consecutives, so at this | |
183 | * point, we reset the anomalies counter. | |
184 | */ | |
185 | irqs->anomalies = 0; | |
186 | } | |
187 | ||
188 | /* | |
189 | * The interrupt is considered stable enough to try to predict | |
190 | * the next event on it. | |
191 | */ | |
192 | irqs->valid = 1; | |
193 | ||
194 | /* | |
195 | * Online average algorithm: | |
196 | * | |
197 | * new_average = average + ((value - average) / count) | |
198 | * | |
199 | * The variance computation depends on the new average | |
200 | * to be computed here first. | |
201 | * | |
202 | */ | |
203 | irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT); | |
204 | ||
205 | /* | |
206 | * Online variance algorithm: | |
207 | * | |
208 | * new_variance = variance + (value - average) x (value - new_average) | |
209 | * | |
210 | * Warning: irqs->avg is updated with the line above, hence | |
211 | * 'interval - irqs->avg' is no longer equal to 'diff' | |
212 | */ | |
213 | irqs->variance = irqs->variance + (diff * (interval - irqs->avg)); | |
214 | ||
215 | /* | |
216 | * Update the next event | |
217 | */ | |
218 | irqs->next_evt = ts + irqs->avg; | |
219 | } | |
220 | ||
221 | /** | |
222 | * irq_timings_next_event - Return when the next event is supposed to arrive | |
223 | * | |
224 | * During the last busy cycle, the number of interrupts is incremented | |
225 | * and stored in the irq_timings structure. This information is | |
226 | * necessary to: | |
227 | * | |
228 | * - know if the index in the table wrapped up: | |
229 | * | |
230 | * If more than the array size interrupts happened during the | |
231 | * last busy/idle cycle, the index wrapped up and we have to | |
232 | * begin with the next element in the array which is the last one | |
233 | * in the sequence, otherwise it is a the index 0. | |
234 | * | |
235 | * - have an indication of the interrupts activity on this CPU | |
236 | * (eg. irq/sec) | |
237 | * | |
238 | * The values are 'consumed' after inserting in the statistical model, | |
239 | * thus the count is reinitialized. | |
240 | * | |
241 | * The array of values **must** be browsed in the time direction, the | |
242 | * timestamp must increase between an element and the next one. | |
243 | * | |
244 | * Returns a nanosec time based estimation of the earliest interrupt, | |
245 | * U64_MAX otherwise. | |
246 | */ | |
247 | u64 irq_timings_next_event(u64 now) | |
248 | { | |
249 | struct irq_timings *irqts = this_cpu_ptr(&irq_timings); | |
250 | struct irqt_stat *irqs; | |
251 | struct irqt_stat __percpu *s; | |
252 | u64 ts, next_evt = U64_MAX; | |
253 | int i, irq = 0; | |
254 | ||
255 | /* | |
256 | * This function must be called with the local irq disabled in | |
257 | * order to prevent the timings circular buffer to be updated | |
258 | * while we are reading it. | |
259 | */ | |
a934d4d1 | 260 | lockdep_assert_irqs_disabled(); |
e1c92149 DL |
261 | |
262 | /* | |
263 | * Number of elements in the circular buffer: If it happens it | |
264 | * was flushed before, then the number of elements could be | |
265 | * smaller than IRQ_TIMINGS_SIZE, so the count is used, | |
266 | * otherwise the array size is used as we wrapped. The index | |
267 | * begins from zero when we did not wrap. That could be done | |
268 | * in a nicer way with the proper circular array structure | |
269 | * type but with the cost of extra computation in the | |
270 | * interrupt handler hot path. We choose efficiency. | |
271 | * | |
272 | * Inject measured irq/timestamp to the statistical model | |
273 | * while decrementing the counter because we consume the data | |
274 | * from our circular buffer. | |
275 | */ | |
276 | for (i = irqts->count & IRQ_TIMINGS_MASK, | |
277 | irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count); | |
278 | irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) { | |
279 | ||
280 | irq = irq_timing_decode(irqts->values[i], &ts); | |
281 | ||
282 | s = idr_find(&irqt_stats, irq); | |
283 | if (s) { | |
284 | irqs = this_cpu_ptr(s); | |
285 | irqs_update(irqs, ts); | |
286 | } | |
287 | } | |
288 | ||
289 | /* | |
290 | * Look in the list of interrupts' statistics, the earliest | |
291 | * next event. | |
292 | */ | |
293 | idr_for_each_entry(&irqt_stats, s, i) { | |
294 | ||
295 | irqs = this_cpu_ptr(s); | |
296 | ||
297 | if (!irqs->valid) | |
298 | continue; | |
299 | ||
300 | if (irqs->next_evt <= now) { | |
301 | irq = i; | |
302 | next_evt = now; | |
303 | ||
304 | /* | |
305 | * This interrupt mustn't use in the future | |
306 | * until new events occur and update the | |
307 | * statistics. | |
308 | */ | |
309 | irqs->valid = 0; | |
310 | break; | |
311 | } | |
312 | ||
313 | if (irqs->next_evt < next_evt) { | |
314 | irq = i; | |
315 | next_evt = irqs->next_evt; | |
316 | } | |
317 | } | |
318 | ||
319 | return next_evt; | |
320 | } | |
321 | ||
322 | void irq_timings_free(int irq) | |
323 | { | |
324 | struct irqt_stat __percpu *s; | |
325 | ||
326 | s = idr_find(&irqt_stats, irq); | |
327 | if (s) { | |
328 | free_percpu(s); | |
329 | idr_remove(&irqt_stats, irq); | |
330 | } | |
331 | } | |
332 | ||
333 | int irq_timings_alloc(int irq) | |
334 | { | |
335 | struct irqt_stat __percpu *s; | |
336 | int id; | |
337 | ||
338 | /* | |
339 | * Some platforms can have the same private interrupt per cpu, | |
340 | * so this function may be be called several times with the | |
341 | * same interrupt number. Just bail out in case the per cpu | |
342 | * stat structure is already allocated. | |
343 | */ | |
344 | s = idr_find(&irqt_stats, irq); | |
345 | if (s) | |
346 | return 0; | |
347 | ||
348 | s = alloc_percpu(*s); | |
349 | if (!s) | |
350 | return -ENOMEM; | |
351 | ||
352 | idr_preload(GFP_KERNEL); | |
353 | id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT); | |
354 | idr_preload_end(); | |
355 | ||
356 | if (id < 0) { | |
357 | free_percpu(s); | |
358 | return id; | |
359 | } | |
360 | ||
361 | return 0; | |
362 | } |