Commit | Line | Data |
---|---|---|
b2d3d61a DL |
1 | /* |
2 | * linux/kernel/irq/timings.c | |
3 | * | |
4 | * Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | * | |
10 | */ | |
e1c92149 | 11 | #include <linux/kernel.h> |
b2d3d61a | 12 | #include <linux/percpu.h> |
e1c92149 | 13 | #include <linux/slab.h> |
b2d3d61a DL |
14 | #include <linux/static_key.h> |
15 | #include <linux/interrupt.h> | |
e1c92149 | 16 | #include <linux/idr.h> |
b2d3d61a | 17 | #include <linux/irq.h> |
e1c92149 DL |
18 | #include <linux/math64.h> |
19 | ||
20 | #include <trace/events/irq.h> | |
b2d3d61a DL |
21 | |
22 | #include "internals.h" | |
23 | ||
24 | DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); | |
25 | ||
26 | DEFINE_PER_CPU(struct irq_timings, irq_timings); | |
27 | ||
e1c92149 DL |
28 | struct irqt_stat { |
29 | u64 next_evt; | |
30 | u64 last_ts; | |
31 | u64 variance; | |
32 | u32 avg; | |
33 | u32 nr_samples; | |
34 | int anomalies; | |
35 | int valid; | |
36 | }; | |
37 | ||
38 | static DEFINE_IDR(irqt_stats); | |
39 | ||
b2d3d61a DL |
40 | void irq_timings_enable(void) |
41 | { | |
42 | static_branch_enable(&irq_timing_enabled); | |
43 | } | |
44 | ||
45 | void irq_timings_disable(void) | |
46 | { | |
47 | static_branch_disable(&irq_timing_enabled); | |
48 | } | |
e1c92149 DL |
49 | |
50 | /** | |
51 | * irqs_update - update the irq timing statistics with a new timestamp | |
52 | * | |
53 | * @irqs: an irqt_stat struct pointer | |
54 | * @ts: the new timestamp | |
55 | * | |
56 | * The statistics are computed online, in other words, the code is | |
57 | * designed to compute the statistics on a stream of values rather | |
58 | * than doing multiple passes on the values to compute the average, | |
59 | * then the variance. The integer division introduces a loss of | |
60 | * precision but with an acceptable error margin regarding the results | |
61 | * we would have with the double floating precision: we are dealing | |
62 | * with nanosec, so big numbers, consequently the mantisse is | |
63 | * negligeable, especially when converting the time in usec | |
64 | * afterwards. | |
65 | * | |
66 | * The computation happens at idle time. When the CPU is not idle, the | |
67 | * interrupts' timestamps are stored in the circular buffer, when the | |
68 | * CPU goes idle and this routine is called, all the buffer's values | |
69 | * are injected in the statistical model continuying to extend the | |
70 | * statistics from the previous busy-idle cycle. | |
71 | * | |
72 | * The observations showed a device will trigger a burst of periodic | |
73 | * interrupts followed by one or two peaks of longer time, for | |
74 | * instance when a SD card device flushes its cache, then the periodic | |
75 | * intervals occur again. A one second inactivity period resets the | |
76 | * stats, that gives us the certitude the statistical values won't | |
77 | * exceed 1x10^9, thus the computation won't overflow. | |
78 | * | |
79 | * Basically, the purpose of the algorithm is to watch the periodic | |
80 | * interrupts and eliminate the peaks. | |
81 | * | |
82 | * An interrupt is considered periodically stable if the interval of | |
83 | * its occurences follow the normal distribution, thus the values | |
84 | * comply with: | |
85 | * | |
86 | * avg - 3 x stddev < value < avg + 3 x stddev | |
87 | * | |
88 | * Which can be simplified to: | |
89 | * | |
90 | * -3 x stddev < value - avg < 3 x stddev | |
91 | * | |
92 | * abs(value - avg) < 3 x stddev | |
93 | * | |
94 | * In order to save a costly square root computation, we use the | |
95 | * variance. For the record, stddev = sqrt(variance). The equation | |
96 | * above becomes: | |
97 | * | |
98 | * abs(value - avg) < 3 x sqrt(variance) | |
99 | * | |
100 | * And finally we square it: | |
101 | * | |
102 | * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2 | |
103 | * | |
104 | * (value - avg) x (value - avg) < 9 x variance | |
105 | * | |
106 | * Statistically speaking, any values out of this interval is | |
107 | * considered as an anomaly and is discarded. However, a normal | |
108 | * distribution appears when the number of samples is 30 (it is the | |
109 | * rule of thumb in statistics, cf. "30 samples" on Internet). When | |
110 | * there are three consecutive anomalies, the statistics are resetted. | |
111 | * | |
112 | */ | |
113 | static void irqs_update(struct irqt_stat *irqs, u64 ts) | |
114 | { | |
115 | u64 old_ts = irqs->last_ts; | |
116 | u64 variance = 0; | |
117 | u64 interval; | |
118 | s64 diff; | |
119 | ||
120 | /* | |
121 | * The timestamps are absolute time values, we need to compute | |
122 | * the timing interval between two interrupts. | |
123 | */ | |
124 | irqs->last_ts = ts; | |
125 | ||
126 | /* | |
127 | * The interval type is u64 in order to deal with the same | |
128 | * type in our computation, that prevent mindfuck issues with | |
129 | * overflow, sign and division. | |
130 | */ | |
131 | interval = ts - old_ts; | |
132 | ||
133 | /* | |
134 | * The interrupt triggered more than one second apart, that | |
135 | * ends the sequence as predictible for our purpose. In this | |
136 | * case, assume we have the beginning of a sequence and the | |
137 | * timestamp is the first value. As it is impossible to | |
138 | * predict anything at this point, return. | |
139 | * | |
140 | * Note the first timestamp of the sequence will always fall | |
141 | * in this test because the old_ts is zero. That is what we | |
142 | * want as we need another timestamp to compute an interval. | |
143 | */ | |
144 | if (interval >= NSEC_PER_SEC) { | |
145 | memset(irqs, 0, sizeof(*irqs)); | |
146 | irqs->last_ts = ts; | |
147 | return; | |
148 | } | |
149 | ||
150 | /* | |
151 | * Pre-compute the delta with the average as the result is | |
152 | * used several times in this function. | |
153 | */ | |
154 | diff = interval - irqs->avg; | |
155 | ||
156 | /* | |
157 | * Increment the number of samples. | |
158 | */ | |
159 | irqs->nr_samples++; | |
160 | ||
161 | /* | |
162 | * Online variance divided by the number of elements if there | |
163 | * is more than one sample. Normally the formula is division | |
164 | * by nr_samples - 1 but we assume the number of element will be | |
165 | * more than 32 and dividing by 32 instead of 31 is enough | |
166 | * precise. | |
167 | */ | |
168 | if (likely(irqs->nr_samples > 1)) | |
169 | variance = irqs->variance >> IRQ_TIMINGS_SHIFT; | |
170 | ||
171 | /* | |
172 | * The rule of thumb in statistics for the normal distribution | |
173 | * is having at least 30 samples in order to have the model to | |
174 | * apply. Values outside the interval are considered as an | |
175 | * anomaly. | |
176 | */ | |
177 | if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) { | |
178 | /* | |
179 | * After three consecutive anomalies, we reset the | |
180 | * stats as it is no longer stable enough. | |
181 | */ | |
182 | if (irqs->anomalies++ >= 3) { | |
183 | memset(irqs, 0, sizeof(*irqs)); | |
184 | irqs->last_ts = ts; | |
185 | return; | |
186 | } | |
187 | } else { | |
188 | /* | |
189 | * The anomalies must be consecutives, so at this | |
190 | * point, we reset the anomalies counter. | |
191 | */ | |
192 | irqs->anomalies = 0; | |
193 | } | |
194 | ||
195 | /* | |
196 | * The interrupt is considered stable enough to try to predict | |
197 | * the next event on it. | |
198 | */ | |
199 | irqs->valid = 1; | |
200 | ||
201 | /* | |
202 | * Online average algorithm: | |
203 | * | |
204 | * new_average = average + ((value - average) / count) | |
205 | * | |
206 | * The variance computation depends on the new average | |
207 | * to be computed here first. | |
208 | * | |
209 | */ | |
210 | irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT); | |
211 | ||
212 | /* | |
213 | * Online variance algorithm: | |
214 | * | |
215 | * new_variance = variance + (value - average) x (value - new_average) | |
216 | * | |
217 | * Warning: irqs->avg is updated with the line above, hence | |
218 | * 'interval - irqs->avg' is no longer equal to 'diff' | |
219 | */ | |
220 | irqs->variance = irqs->variance + (diff * (interval - irqs->avg)); | |
221 | ||
222 | /* | |
223 | * Update the next event | |
224 | */ | |
225 | irqs->next_evt = ts + irqs->avg; | |
226 | } | |
227 | ||
228 | /** | |
229 | * irq_timings_next_event - Return when the next event is supposed to arrive | |
230 | * | |
231 | * During the last busy cycle, the number of interrupts is incremented | |
232 | * and stored in the irq_timings structure. This information is | |
233 | * necessary to: | |
234 | * | |
235 | * - know if the index in the table wrapped up: | |
236 | * | |
237 | * If more than the array size interrupts happened during the | |
238 | * last busy/idle cycle, the index wrapped up and we have to | |
239 | * begin with the next element in the array which is the last one | |
240 | * in the sequence, otherwise it is a the index 0. | |
241 | * | |
242 | * - have an indication of the interrupts activity on this CPU | |
243 | * (eg. irq/sec) | |
244 | * | |
245 | * The values are 'consumed' after inserting in the statistical model, | |
246 | * thus the count is reinitialized. | |
247 | * | |
248 | * The array of values **must** be browsed in the time direction, the | |
249 | * timestamp must increase between an element and the next one. | |
250 | * | |
251 | * Returns a nanosec time based estimation of the earliest interrupt, | |
252 | * U64_MAX otherwise. | |
253 | */ | |
254 | u64 irq_timings_next_event(u64 now) | |
255 | { | |
256 | struct irq_timings *irqts = this_cpu_ptr(&irq_timings); | |
257 | struct irqt_stat *irqs; | |
258 | struct irqt_stat __percpu *s; | |
259 | u64 ts, next_evt = U64_MAX; | |
260 | int i, irq = 0; | |
261 | ||
262 | /* | |
263 | * This function must be called with the local irq disabled in | |
264 | * order to prevent the timings circular buffer to be updated | |
265 | * while we are reading it. | |
266 | */ | |
267 | WARN_ON_ONCE(!irqs_disabled()); | |
268 | ||
269 | /* | |
270 | * Number of elements in the circular buffer: If it happens it | |
271 | * was flushed before, then the number of elements could be | |
272 | * smaller than IRQ_TIMINGS_SIZE, so the count is used, | |
273 | * otherwise the array size is used as we wrapped. The index | |
274 | * begins from zero when we did not wrap. That could be done | |
275 | * in a nicer way with the proper circular array structure | |
276 | * type but with the cost of extra computation in the | |
277 | * interrupt handler hot path. We choose efficiency. | |
278 | * | |
279 | * Inject measured irq/timestamp to the statistical model | |
280 | * while decrementing the counter because we consume the data | |
281 | * from our circular buffer. | |
282 | */ | |
283 | for (i = irqts->count & IRQ_TIMINGS_MASK, | |
284 | irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count); | |
285 | irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) { | |
286 | ||
287 | irq = irq_timing_decode(irqts->values[i], &ts); | |
288 | ||
289 | s = idr_find(&irqt_stats, irq); | |
290 | if (s) { | |
291 | irqs = this_cpu_ptr(s); | |
292 | irqs_update(irqs, ts); | |
293 | } | |
294 | } | |
295 | ||
296 | /* | |
297 | * Look in the list of interrupts' statistics, the earliest | |
298 | * next event. | |
299 | */ | |
300 | idr_for_each_entry(&irqt_stats, s, i) { | |
301 | ||
302 | irqs = this_cpu_ptr(s); | |
303 | ||
304 | if (!irqs->valid) | |
305 | continue; | |
306 | ||
307 | if (irqs->next_evt <= now) { | |
308 | irq = i; | |
309 | next_evt = now; | |
310 | ||
311 | /* | |
312 | * This interrupt mustn't use in the future | |
313 | * until new events occur and update the | |
314 | * statistics. | |
315 | */ | |
316 | irqs->valid = 0; | |
317 | break; | |
318 | } | |
319 | ||
320 | if (irqs->next_evt < next_evt) { | |
321 | irq = i; | |
322 | next_evt = irqs->next_evt; | |
323 | } | |
324 | } | |
325 | ||
326 | return next_evt; | |
327 | } | |
328 | ||
329 | void irq_timings_free(int irq) | |
330 | { | |
331 | struct irqt_stat __percpu *s; | |
332 | ||
333 | s = idr_find(&irqt_stats, irq); | |
334 | if (s) { | |
335 | free_percpu(s); | |
336 | idr_remove(&irqt_stats, irq); | |
337 | } | |
338 | } | |
339 | ||
340 | int irq_timings_alloc(int irq) | |
341 | { | |
342 | struct irqt_stat __percpu *s; | |
343 | int id; | |
344 | ||
345 | /* | |
346 | * Some platforms can have the same private interrupt per cpu, | |
347 | * so this function may be be called several times with the | |
348 | * same interrupt number. Just bail out in case the per cpu | |
349 | * stat structure is already allocated. | |
350 | */ | |
351 | s = idr_find(&irqt_stats, irq); | |
352 | if (s) | |
353 | return 0; | |
354 | ||
355 | s = alloc_percpu(*s); | |
356 | if (!s) | |
357 | return -ENOMEM; | |
358 | ||
359 | idr_preload(GFP_KERNEL); | |
360 | id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT); | |
361 | idr_preload_end(); | |
362 | ||
363 | if (id < 0) { | |
364 | free_percpu(s); | |
365 | return id; | |
366 | } | |
367 | ||
368 | return 0; | |
369 | } |