Commit | Line | Data |
---|---|---|
bce29ac9 DBO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * OS Noise Tracer: computes the OS Noise suffered by a running thread. | |
a955d7ea | 4 | * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. |
bce29ac9 DBO |
5 | * |
6 | * Based on "hwlat_detector" tracer by: | |
7 | * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> | |
8 | * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> | |
9 | * With feedback from Clark Williams <williams@redhat.com> | |
10 | * | |
11 | * And also based on the rtsl tracer presented on: | |
12 | * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux | |
13 | * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems | |
14 | * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. | |
15 | * | |
16 | * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> | |
17 | */ | |
18 | ||
19 | #include <linux/kthread.h> | |
20 | #include <linux/tracefs.h> | |
21 | #include <linux/uaccess.h> | |
22 | #include <linux/cpumask.h> | |
23 | #include <linux/delay.h> | |
24 | #include <linux/sched/clock.h> | |
a955d7ea | 25 | #include <uapi/linux/sched/types.h> |
bce29ac9 DBO |
26 | #include <linux/sched.h> |
27 | #include "trace.h" | |
28 | ||
29 | #ifdef CONFIG_X86_LOCAL_APIC | |
30 | #include <asm/trace/irq_vectors.h> | |
31 | #undef TRACE_INCLUDE_PATH | |
32 | #undef TRACE_INCLUDE_FILE | |
33 | #endif /* CONFIG_X86_LOCAL_APIC */ | |
34 | ||
35 | #include <trace/events/irq.h> | |
36 | #include <trace/events/sched.h> | |
37 | ||
38 | #define CREATE_TRACE_POINTS | |
39 | #include <trace/events/osnoise.h> | |
40 | ||
bce29ac9 DBO |
41 | /* |
42 | * Default values. | |
43 | */ | |
44 | #define BANNER "osnoise: " | |
45 | #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ | |
46 | #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ | |
47 | ||
a955d7ea DBO |
48 | #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ |
49 | #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ | |
50 | ||
b179d48b DBO |
51 | /* |
52 | * osnoise/options entries. | |
53 | */ | |
54 | enum osnoise_options_index { | |
55 | OSN_DEFAULTS = 0, | |
30838fcd | 56 | OSN_WORKLOAD, |
1603dda4 | 57 | OSN_PANIC_ON_STOP, |
b5dce200 DBO |
58 | OSN_PREEMPT_DISABLE, |
59 | OSN_IRQ_DISABLE, | |
b179d48b DBO |
60 | OSN_MAX |
61 | }; | |
62 | ||
b5dce200 DBO |
63 | static const char * const osnoise_options_str[OSN_MAX] = { |
64 | "DEFAULTS", | |
65 | "OSNOISE_WORKLOAD", | |
66 | "PANIC_ON_STOP", | |
67 | "OSNOISE_PREEMPT_DISABLE", | |
68 | "OSNOISE_IRQ_DISABLE" }; | |
b179d48b | 69 | |
ec370890 DBO |
70 | #define OSN_DEFAULT_OPTIONS 0x2 |
71 | static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; | |
b179d48b | 72 | |
dae18134 DBO |
73 | /* |
74 | * trace_array of the enabled osnoise/timerlat instances. | |
75 | */ | |
76 | struct osnoise_instance { | |
77 | struct list_head list; | |
78 | struct trace_array *tr; | |
79 | }; | |
d7458bc0 DBO |
80 | |
81 | static struct list_head osnoise_instances; | |
dae18134 DBO |
82 | |
83 | static bool osnoise_has_registered_instances(void) | |
84 | { | |
85 | return !!list_first_or_null_rcu(&osnoise_instances, | |
86 | struct osnoise_instance, | |
87 | list); | |
88 | } | |
89 | ||
2fac8d64 DBO |
90 | /* |
91 | * osnoise_instance_registered - check if a tr is already registered | |
92 | */ | |
93 | static int osnoise_instance_registered(struct trace_array *tr) | |
94 | { | |
95 | struct osnoise_instance *inst; | |
96 | int found = 0; | |
97 | ||
98 | rcu_read_lock(); | |
99 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { | |
100 | if (inst->tr == tr) | |
101 | found = 1; | |
102 | } | |
103 | rcu_read_unlock(); | |
104 | ||
105 | return found; | |
106 | } | |
107 | ||
dae18134 DBO |
108 | /* |
109 | * osnoise_register_instance - register a new trace instance | |
110 | * | |
111 | * Register a trace_array *tr in the list of instances running | |
112 | * osnoise/timerlat tracers. | |
113 | */ | |
114 | static int osnoise_register_instance(struct trace_array *tr) | |
115 | { | |
116 | struct osnoise_instance *inst; | |
117 | ||
118 | /* | |
119 | * register/unregister serialization is provided by trace's | |
120 | * trace_types_lock. | |
121 | */ | |
122 | lockdep_assert_held(&trace_types_lock); | |
123 | ||
124 | inst = kmalloc(sizeof(*inst), GFP_KERNEL); | |
125 | if (!inst) | |
126 | return -ENOMEM; | |
127 | ||
128 | INIT_LIST_HEAD_RCU(&inst->list); | |
129 | inst->tr = tr; | |
130 | list_add_tail_rcu(&inst->list, &osnoise_instances); | |
131 | ||
132 | return 0; | |
133 | } | |
134 | ||
135 | /* | |
136 | * osnoise_unregister_instance - unregister a registered trace instance | |
137 | * | |
138 | * Remove the trace_array *tr from the list of instances running | |
139 | * osnoise/timerlat tracers. | |
140 | */ | |
141 | static void osnoise_unregister_instance(struct trace_array *tr) | |
142 | { | |
143 | struct osnoise_instance *inst; | |
144 | int found = 0; | |
145 | ||
146 | /* | |
147 | * register/unregister serialization is provided by trace's | |
148 | * trace_types_lock. | |
149 | */ | |
685b64e4 CW |
150 | list_for_each_entry_rcu(inst, &osnoise_instances, list, |
151 | lockdep_is_held(&trace_types_lock)) { | |
dae18134 DBO |
152 | if (inst->tr == tr) { |
153 | list_del_rcu(&inst->list); | |
154 | found = 1; | |
155 | break; | |
156 | } | |
157 | } | |
158 | ||
159 | if (!found) | |
160 | return; | |
161 | ||
cae16f2c | 162 | kvfree_rcu_mightsleep(inst); |
dae18134 DBO |
163 | } |
164 | ||
bce29ac9 DBO |
165 | /* |
166 | * NMI runtime info. | |
167 | */ | |
168 | struct osn_nmi { | |
169 | u64 count; | |
170 | u64 delta_start; | |
171 | }; | |
172 | ||
173 | /* | |
174 | * IRQ runtime info. | |
175 | */ | |
176 | struct osn_irq { | |
177 | u64 count; | |
178 | u64 arrival_time; | |
179 | u64 delta_start; | |
180 | }; | |
181 | ||
a955d7ea DBO |
182 | #define IRQ_CONTEXT 0 |
183 | #define THREAD_CONTEXT 1 | |
e88ed227 | 184 | #define THREAD_URET 2 |
bce29ac9 DBO |
185 | /* |
186 | * sofirq runtime info. | |
187 | */ | |
188 | struct osn_softirq { | |
189 | u64 count; | |
190 | u64 arrival_time; | |
191 | u64 delta_start; | |
192 | }; | |
193 | ||
194 | /* | |
195 | * thread runtime info. | |
196 | */ | |
197 | struct osn_thread { | |
198 | u64 count; | |
199 | u64 arrival_time; | |
200 | u64 delta_start; | |
201 | }; | |
202 | ||
203 | /* | |
204 | * Runtime information: this structure saves the runtime information used by | |
205 | * one sampling thread. | |
206 | */ | |
207 | struct osnoise_variables { | |
208 | struct task_struct *kthread; | |
209 | bool sampling; | |
210 | pid_t pid; | |
211 | struct osn_nmi nmi; | |
212 | struct osn_irq irq; | |
213 | struct osn_softirq softirq; | |
214 | struct osn_thread thread; | |
215 | local_t int_counter; | |
216 | }; | |
217 | ||
218 | /* | |
219 | * Per-cpu runtime information. | |
220 | */ | |
7a025e06 | 221 | static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); |
bce29ac9 DBO |
222 | |
223 | /* | |
224 | * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU | |
225 | */ | |
226 | static inline struct osnoise_variables *this_cpu_osn_var(void) | |
227 | { | |
228 | return this_cpu_ptr(&per_cpu_osnoise_var); | |
229 | } | |
230 | ||
af178143 SR |
231 | /* |
232 | * Protect the interface. | |
233 | */ | |
234 | static struct mutex interface_lock; | |
235 | ||
a955d7ea | 236 | #ifdef CONFIG_TIMERLAT_TRACER |
bce29ac9 | 237 | /* |
a955d7ea DBO |
238 | * Runtime information for the timer mode. |
239 | */ | |
240 | struct timerlat_variables { | |
241 | struct task_struct *kthread; | |
242 | struct hrtimer timer; | |
243 | u64 rel_period; | |
244 | u64 abs_period; | |
245 | bool tracing_thread; | |
246 | u64 count; | |
e88ed227 | 247 | bool uthread_migrate; |
a955d7ea DBO |
248 | }; |
249 | ||
7a025e06 | 250 | static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); |
a955d7ea DBO |
251 | |
252 | /* | |
253 | * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU | |
254 | */ | |
255 | static inline struct timerlat_variables *this_cpu_tmr_var(void) | |
256 | { | |
257 | return this_cpu_ptr(&per_cpu_timerlat_var); | |
258 | } | |
259 | ||
260 | /* | |
261 | * tlat_var_reset - Reset the values of the given timerlat_variables | |
bce29ac9 | 262 | */ |
a955d7ea | 263 | static inline void tlat_var_reset(void) |
bce29ac9 | 264 | { |
a955d7ea DBO |
265 | struct timerlat_variables *tlat_var; |
266 | int cpu; | |
e6a53481 SR |
267 | |
268 | /* Synchronize with the timerlat interfaces */ | |
269 | mutex_lock(&interface_lock); | |
bce29ac9 DBO |
270 | /* |
271 | * So far, all the values are initialized as 0, so | |
272 | * zeroing the structure is perfect. | |
273 | */ | |
a955d7ea DBO |
274 | for_each_cpu(cpu, cpu_online_mask) { |
275 | tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); | |
e6a53481 SR |
276 | if (tlat_var->kthread) |
277 | hrtimer_cancel(&tlat_var->timer); | |
a955d7ea DBO |
278 | memset(tlat_var, 0, sizeof(*tlat_var)); |
279 | } | |
e6a53481 | 280 | mutex_unlock(&interface_lock); |
bce29ac9 | 281 | } |
a955d7ea DBO |
282 | #else /* CONFIG_TIMERLAT_TRACER */ |
283 | #define tlat_var_reset() do {} while (0) | |
284 | #endif /* CONFIG_TIMERLAT_TRACER */ | |
bce29ac9 DBO |
285 | |
286 | /* | |
a955d7ea | 287 | * osn_var_reset - Reset the values of the given osnoise_variables |
bce29ac9 | 288 | */ |
a955d7ea | 289 | static inline void osn_var_reset(void) |
bce29ac9 DBO |
290 | { |
291 | struct osnoise_variables *osn_var; | |
292 | int cpu; | |
293 | ||
a955d7ea DBO |
294 | /* |
295 | * So far, all the values are initialized as 0, so | |
296 | * zeroing the structure is perfect. | |
297 | */ | |
bce29ac9 DBO |
298 | for_each_cpu(cpu, cpu_online_mask) { |
299 | osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); | |
a955d7ea | 300 | memset(osn_var, 0, sizeof(*osn_var)); |
bce29ac9 DBO |
301 | } |
302 | } | |
303 | ||
a955d7ea DBO |
304 | /* |
305 | * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables | |
306 | */ | |
307 | static inline void osn_var_reset_all(void) | |
308 | { | |
309 | osn_var_reset(); | |
310 | tlat_var_reset(); | |
311 | } | |
312 | ||
bce29ac9 DBO |
313 | /* |
314 | * Tells NMIs to call back to the osnoise tracer to record timestamps. | |
315 | */ | |
316 | bool trace_osnoise_callback_enabled; | |
317 | ||
318 | /* | |
319 | * osnoise sample structure definition. Used to store the statistics of a | |
320 | * sample run. | |
321 | */ | |
322 | struct osnoise_sample { | |
323 | u64 runtime; /* runtime */ | |
324 | u64 noise; /* noise */ | |
325 | u64 max_sample; /* max single noise sample */ | |
326 | int hw_count; /* # HW (incl. hypervisor) interference */ | |
327 | int nmi_count; /* # NMIs during this sample */ | |
328 | int irq_count; /* # IRQs during this sample */ | |
329 | int softirq_count; /* # softirqs during this sample */ | |
330 | int thread_count; /* # threads during this sample */ | |
331 | }; | |
332 | ||
a955d7ea DBO |
333 | #ifdef CONFIG_TIMERLAT_TRACER |
334 | /* | |
335 | * timerlat sample structure definition. Used to store the statistics of | |
336 | * a sample run. | |
337 | */ | |
338 | struct timerlat_sample { | |
339 | u64 timer_latency; /* timer_latency */ | |
340 | unsigned int seqnum; /* unique sequence */ | |
341 | int context; /* timer context */ | |
342 | }; | |
343 | #endif | |
344 | ||
bce29ac9 DBO |
345 | /* |
346 | * Tracer data. | |
347 | */ | |
348 | static struct osnoise_data { | |
349 | u64 sample_period; /* total sampling period */ | |
350 | u64 sample_runtime; /* active sampling portion of period */ | |
a955d7ea DBO |
351 | u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ |
352 | u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ | |
353 | #ifdef CONFIG_TIMERLAT_TRACER | |
354 | u64 timerlat_period; /* timerlat period */ | |
355 | u64 print_stack; /* print IRQ stack if total > */ | |
356 | int timerlat_tracer; /* timerlat tracer */ | |
357 | #endif | |
bce29ac9 DBO |
358 | bool tainted; /* infor users and developers about a problem */ |
359 | } osnoise_data = { | |
360 | .sample_period = DEFAULT_SAMPLE_PERIOD, | |
361 | .sample_runtime = DEFAULT_SAMPLE_RUNTIME, | |
362 | .stop_tracing = 0, | |
363 | .stop_tracing_total = 0, | |
a955d7ea DBO |
364 | #ifdef CONFIG_TIMERLAT_TRACER |
365 | .print_stack = 0, | |
366 | .timerlat_period = DEFAULT_TIMERLAT_PERIOD, | |
367 | .timerlat_tracer = 0, | |
368 | #endif | |
bce29ac9 DBO |
369 | }; |
370 | ||
ccb67544 DBO |
371 | #ifdef CONFIG_TIMERLAT_TRACER |
372 | static inline bool timerlat_enabled(void) | |
373 | { | |
374 | return osnoise_data.timerlat_tracer; | |
375 | } | |
376 | ||
377 | static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) | |
378 | { | |
379 | struct timerlat_variables *tlat_var = this_cpu_tmr_var(); | |
380 | /* | |
381 | * If the timerlat is enabled, but the irq handler did | |
382 | * not run yet enabling timerlat_tracer, do not trace. | |
383 | */ | |
384 | if (!tlat_var->tracing_thread) { | |
385 | osn_var->softirq.arrival_time = 0; | |
386 | osn_var->softirq.delta_start = 0; | |
387 | return 0; | |
388 | } | |
389 | return 1; | |
390 | } | |
391 | ||
392 | static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) | |
393 | { | |
394 | struct timerlat_variables *tlat_var = this_cpu_tmr_var(); | |
395 | /* | |
396 | * If the timerlat is enabled, but the irq handler did | |
397 | * not run yet enabling timerlat_tracer, do not trace. | |
398 | */ | |
399 | if (!tlat_var->tracing_thread) { | |
400 | osn_var->thread.delta_start = 0; | |
401 | osn_var->thread.arrival_time = 0; | |
402 | return 0; | |
403 | } | |
404 | return 1; | |
405 | } | |
406 | #else /* CONFIG_TIMERLAT_TRACER */ | |
407 | static inline bool timerlat_enabled(void) | |
408 | { | |
409 | return false; | |
410 | } | |
411 | ||
412 | static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) | |
413 | { | |
414 | return 1; | |
415 | } | |
416 | static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) | |
417 | { | |
418 | return 1; | |
419 | } | |
420 | #endif | |
421 | ||
d03721a6 | 422 | #ifdef CONFIG_PREEMPT_RT |
bce29ac9 DBO |
423 | /* |
424 | * Print the osnoise header info. | |
425 | */ | |
426 | static void print_osnoise_headers(struct seq_file *s) | |
d03721a6 DBO |
427 | { |
428 | if (osnoise_data.tainted) | |
429 | seq_puts(s, "# osnoise is tainted!\n"); | |
430 | ||
431 | seq_puts(s, "# _-------=> irqs-off\n"); | |
432 | seq_puts(s, "# / _------=> need-resched\n"); | |
433 | seq_puts(s, "# | / _-----=> need-resched-lazy\n"); | |
434 | seq_puts(s, "# || / _----=> hardirq/softirq\n"); | |
435 | seq_puts(s, "# ||| / _---=> preempt-depth\n"); | |
436 | seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); | |
437 | seq_puts(s, "# ||||| / _-=> migrate-disable\n"); | |
438 | ||
439 | seq_puts(s, "# |||||| / "); | |
440 | seq_puts(s, " MAX\n"); | |
441 | ||
442 | seq_puts(s, "# ||||| / "); | |
443 | seq_puts(s, " SINGLE Interference counters:\n"); | |
444 | ||
445 | seq_puts(s, "# ||||||| RUNTIME "); | |
446 | seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); | |
447 | ||
448 | seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); | |
449 | seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); | |
450 | ||
451 | seq_puts(s, "# | | | ||||||| | | "); | |
452 | seq_puts(s, " | | | | | | | |\n"); | |
453 | } | |
454 | #else /* CONFIG_PREEMPT_RT */ | |
455 | static void print_osnoise_headers(struct seq_file *s) | |
bce29ac9 DBO |
456 | { |
457 | if (osnoise_data.tainted) | |
458 | seq_puts(s, "# osnoise is tainted!\n"); | |
459 | ||
460 | seq_puts(s, "# _-----=> irqs-off\n"); | |
461 | seq_puts(s, "# / _----=> need-resched\n"); | |
462 | seq_puts(s, "# | / _---=> hardirq/softirq\n"); | |
e0f3b18b DBO |
463 | seq_puts(s, "# || / _--=> preempt-depth\n"); |
464 | seq_puts(s, "# ||| / _-=> migrate-disable "); | |
465 | seq_puts(s, " MAX\n"); | |
466 | seq_puts(s, "# |||| / delay "); | |
bce29ac9 DBO |
467 | seq_puts(s, " SINGLE Interference counters:\n"); |
468 | ||
e0f3b18b | 469 | seq_puts(s, "# ||||| RUNTIME "); |
bce29ac9 DBO |
470 | seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); |
471 | ||
e0f3b18b | 472 | seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US "); |
bce29ac9 DBO |
473 | seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); |
474 | ||
e0f3b18b | 475 | seq_puts(s, "# | | | ||||| | | "); |
bce29ac9 DBO |
476 | seq_puts(s, " | | | | | | | |\n"); |
477 | } | |
d03721a6 | 478 | #endif /* CONFIG_PREEMPT_RT */ |
bce29ac9 DBO |
479 | |
480 | /* | |
481 | * osnoise_taint - report an osnoise error. | |
482 | */ | |
483 | #define osnoise_taint(msg) ({ \ | |
dae18134 DBO |
484 | struct osnoise_instance *inst; \ |
485 | struct trace_buffer *buffer; \ | |
bce29ac9 | 486 | \ |
dae18134 DBO |
487 | rcu_read_lock(); \ |
488 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { \ | |
489 | buffer = inst->tr->array_buffer.buffer; \ | |
490 | trace_array_printk_buf(buffer, _THIS_IP_, msg); \ | |
491 | } \ | |
492 | rcu_read_unlock(); \ | |
bce29ac9 DBO |
493 | osnoise_data.tainted = true; \ |
494 | }) | |
495 | ||
496 | /* | |
497 | * Record an osnoise_sample into the tracer buffer. | |
498 | */ | |
dae18134 DBO |
499 | static void |
500 | __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) | |
bce29ac9 | 501 | { |
bce29ac9 DBO |
502 | struct trace_event_call *call = &event_osnoise; |
503 | struct ring_buffer_event *event; | |
504 | struct osnoise_entry *entry; | |
505 | ||
506 | event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry), | |
507 | tracing_gen_ctx()); | |
508 | if (!event) | |
509 | return; | |
510 | entry = ring_buffer_event_data(event); | |
511 | entry->runtime = sample->runtime; | |
512 | entry->noise = sample->noise; | |
513 | entry->max_sample = sample->max_sample; | |
514 | entry->hw_count = sample->hw_count; | |
515 | entry->nmi_count = sample->nmi_count; | |
516 | entry->irq_count = sample->irq_count; | |
517 | entry->softirq_count = sample->softirq_count; | |
518 | entry->thread_count = sample->thread_count; | |
519 | ||
520 | if (!call_filter_check_discard(call, entry, buffer, event)) | |
521 | trace_buffer_unlock_commit_nostack(buffer, event); | |
522 | } | |
523 | ||
dae18134 DBO |
524 | /* |
525 | * Record an osnoise_sample on all osnoise instances. | |
526 | */ | |
527 | static void trace_osnoise_sample(struct osnoise_sample *sample) | |
528 | { | |
529 | struct osnoise_instance *inst; | |
530 | struct trace_buffer *buffer; | |
531 | ||
532 | rcu_read_lock(); | |
533 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { | |
534 | buffer = inst->tr->array_buffer.buffer; | |
535 | __trace_osnoise_sample(sample, buffer); | |
536 | } | |
537 | rcu_read_unlock(); | |
538 | } | |
539 | ||
a955d7ea DBO |
540 | #ifdef CONFIG_TIMERLAT_TRACER |
541 | /* | |
542 | * Print the timerlat header info. | |
543 | */ | |
e1c4ad4a DBO |
544 | #ifdef CONFIG_PREEMPT_RT |
545 | static void print_timerlat_headers(struct seq_file *s) | |
546 | { | |
547 | seq_puts(s, "# _-------=> irqs-off\n"); | |
548 | seq_puts(s, "# / _------=> need-resched\n"); | |
549 | seq_puts(s, "# | / _-----=> need-resched-lazy\n"); | |
550 | seq_puts(s, "# || / _----=> hardirq/softirq\n"); | |
551 | seq_puts(s, "# ||| / _---=> preempt-depth\n"); | |
552 | seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); | |
553 | seq_puts(s, "# ||||| / _-=> migrate-disable\n"); | |
554 | seq_puts(s, "# |||||| /\n"); | |
555 | seq_puts(s, "# ||||||| ACTIVATION\n"); | |
556 | seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); | |
557 | seq_puts(s, " CONTEXT LATENCY\n"); | |
558 | seq_puts(s, "# | | | ||||||| | | "); | |
559 | seq_puts(s, " | |\n"); | |
560 | } | |
561 | #else /* CONFIG_PREEMPT_RT */ | |
a955d7ea DBO |
562 | static void print_timerlat_headers(struct seq_file *s) |
563 | { | |
564 | seq_puts(s, "# _-----=> irqs-off\n"); | |
565 | seq_puts(s, "# / _----=> need-resched\n"); | |
566 | seq_puts(s, "# | / _---=> hardirq/softirq\n"); | |
567 | seq_puts(s, "# || / _--=> preempt-depth\n"); | |
aeafcb82 DBO |
568 | seq_puts(s, "# ||| / _-=> migrate-disable\n"); |
569 | seq_puts(s, "# |||| / delay\n"); | |
570 | seq_puts(s, "# ||||| ACTIVATION\n"); | |
571 | seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID "); | |
572 | seq_puts(s, " CONTEXT LATENCY\n"); | |
573 | seq_puts(s, "# | | | ||||| | | "); | |
a955d7ea DBO |
574 | seq_puts(s, " | |\n"); |
575 | } | |
e1c4ad4a | 576 | #endif /* CONFIG_PREEMPT_RT */ |
a955d7ea | 577 | |
dae18134 DBO |
578 | static void |
579 | __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) | |
a955d7ea | 580 | { |
a955d7ea | 581 | struct trace_event_call *call = &event_osnoise; |
a955d7ea DBO |
582 | struct ring_buffer_event *event; |
583 | struct timerlat_entry *entry; | |
584 | ||
585 | event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry), | |
586 | tracing_gen_ctx()); | |
587 | if (!event) | |
588 | return; | |
589 | entry = ring_buffer_event_data(event); | |
590 | entry->seqnum = sample->seqnum; | |
591 | entry->context = sample->context; | |
592 | entry->timer_latency = sample->timer_latency; | |
593 | ||
594 | if (!call_filter_check_discard(call, entry, buffer, event)) | |
595 | trace_buffer_unlock_commit_nostack(buffer, event); | |
596 | } | |
597 | ||
dae18134 DBO |
598 | /* |
599 | * Record an timerlat_sample into the tracer buffer. | |
600 | */ | |
601 | static void trace_timerlat_sample(struct timerlat_sample *sample) | |
602 | { | |
603 | struct osnoise_instance *inst; | |
604 | struct trace_buffer *buffer; | |
605 | ||
606 | rcu_read_lock(); | |
607 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { | |
608 | buffer = inst->tr->array_buffer.buffer; | |
609 | __trace_timerlat_sample(sample, buffer); | |
610 | } | |
611 | rcu_read_unlock(); | |
612 | } | |
613 | ||
a955d7ea DBO |
614 | #ifdef CONFIG_STACKTRACE |
615 | ||
616 | #define MAX_CALLS 256 | |
617 | ||
618 | /* | |
619 | * Stack trace will take place only at IRQ level, so, no need | |
620 | * to control nesting here. | |
621 | */ | |
622 | struct trace_stack { | |
623 | int stack_size; | |
624 | int nr_entries; | |
625 | unsigned long calls[MAX_CALLS]; | |
626 | }; | |
627 | ||
628 | static DEFINE_PER_CPU(struct trace_stack, trace_stack); | |
629 | ||
630 | /* | |
631 | * timerlat_save_stack - save a stack trace without printing | |
632 | * | |
633 | * Save the current stack trace without printing. The | |
634 | * stack will be printed later, after the end of the measurement. | |
635 | */ | |
636 | static void timerlat_save_stack(int skip) | |
637 | { | |
638 | unsigned int size, nr_entries; | |
639 | struct trace_stack *fstack; | |
640 | ||
641 | fstack = this_cpu_ptr(&trace_stack); | |
642 | ||
643 | size = ARRAY_SIZE(fstack->calls); | |
644 | ||
645 | nr_entries = stack_trace_save(fstack->calls, size, skip); | |
646 | ||
647 | fstack->stack_size = nr_entries * sizeof(unsigned long); | |
648 | fstack->nr_entries = nr_entries; | |
649 | ||
650 | return; | |
651 | ||
652 | } | |
dae18134 DBO |
653 | |
654 | static void | |
655 | __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) | |
a955d7ea DBO |
656 | { |
657 | struct trace_event_call *call = &event_osnoise; | |
a955d7ea | 658 | struct ring_buffer_event *event; |
a955d7ea | 659 | struct stack_entry *entry; |
a955d7ea DBO |
660 | |
661 | event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size, | |
662 | tracing_gen_ctx()); | |
663 | if (!event) | |
dae18134 | 664 | return; |
a955d7ea DBO |
665 | |
666 | entry = ring_buffer_event_data(event); | |
667 | ||
668 | memcpy(&entry->caller, fstack->calls, size); | |
669 | entry->size = fstack->nr_entries; | |
670 | ||
671 | if (!call_filter_check_discard(call, entry, buffer, event)) | |
672 | trace_buffer_unlock_commit_nostack(buffer, event); | |
dae18134 | 673 | } |
a955d7ea | 674 | |
dae18134 DBO |
675 | /* |
676 | * timerlat_dump_stack - dump a stack trace previously saved | |
677 | */ | |
b14f4568 | 678 | static void timerlat_dump_stack(u64 latency) |
dae18134 DBO |
679 | { |
680 | struct osnoise_instance *inst; | |
681 | struct trace_buffer *buffer; | |
682 | struct trace_stack *fstack; | |
683 | unsigned int size; | |
684 | ||
b14f4568 DBO |
685 | /* |
686 | * trace only if latency > print_stack config, if enabled. | |
687 | */ | |
688 | if (!osnoise_data.print_stack || osnoise_data.print_stack > latency) | |
689 | return; | |
690 | ||
dae18134 DBO |
691 | preempt_disable_notrace(); |
692 | fstack = this_cpu_ptr(&trace_stack); | |
693 | size = fstack->stack_size; | |
694 | ||
695 | rcu_read_lock(); | |
696 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { | |
697 | buffer = inst->tr->array_buffer.buffer; | |
698 | __timerlat_dump_stack(buffer, fstack, size); | |
699 | ||
700 | } | |
701 | rcu_read_unlock(); | |
a955d7ea DBO |
702 | preempt_enable_notrace(); |
703 | } | |
b14f4568 DBO |
704 | #else /* CONFIG_STACKTRACE */ |
705 | #define timerlat_dump_stack(u64 latency) do {} while (0) | |
a955d7ea DBO |
706 | #define timerlat_save_stack(a) do {} while (0) |
707 | #endif /* CONFIG_STACKTRACE */ | |
708 | #endif /* CONFIG_TIMERLAT_TRACER */ | |
709 | ||
bce29ac9 DBO |
710 | /* |
711 | * Macros to encapsulate the time capturing infrastructure. | |
712 | */ | |
713 | #define time_get() trace_clock_local() | |
714 | #define time_to_us(x) div_u64(x, 1000) | |
715 | #define time_sub(a, b) ((a) - (b)) | |
716 | ||
717 | /* | |
718 | * cond_move_irq_delta_start - Forward the delta_start of a running IRQ | |
719 | * | |
720 | * If an IRQ is preempted by an NMI, its delta_start is pushed forward | |
721 | * to discount the NMI interference. | |
722 | * | |
723 | * See get_int_safe_duration(). | |
724 | */ | |
725 | static inline void | |
726 | cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) | |
727 | { | |
728 | if (osn_var->irq.delta_start) | |
729 | osn_var->irq.delta_start += duration; | |
730 | } | |
731 | ||
732 | #ifndef CONFIG_PREEMPT_RT | |
733 | /* | |
734 | * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. | |
735 | * | |
736 | * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed | |
737 | * forward to discount the interference. | |
738 | * | |
739 | * See get_int_safe_duration(). | |
740 | */ | |
741 | static inline void | |
742 | cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) | |
743 | { | |
744 | if (osn_var->softirq.delta_start) | |
745 | osn_var->softirq.delta_start += duration; | |
746 | } | |
747 | #else /* CONFIG_PREEMPT_RT */ | |
748 | #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) | |
749 | #endif | |
750 | ||
751 | /* | |
752 | * cond_move_thread_delta_start - Forward the delta_start of a running thread | |
753 | * | |
754 | * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start | |
755 | * is pushed forward to discount the interference. | |
756 | * | |
757 | * See get_int_safe_duration(). | |
758 | */ | |
759 | static inline void | |
760 | cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) | |
761 | { | |
762 | if (osn_var->thread.delta_start) | |
763 | osn_var->thread.delta_start += duration; | |
764 | } | |
765 | ||
766 | /* | |
767 | * get_int_safe_duration - Get the duration of a window | |
768 | * | |
769 | * The irq, softirq and thread varaibles need to have its duration without | |
770 | * the interference from higher priority interrupts. Instead of keeping a | |
771 | * variable to discount the interrupt interference from these variables, the | |
772 | * starting time of these variables are pushed forward with the interrupt's | |
773 | * duration. In this way, a single variable is used to: | |
774 | * | |
775 | * - Know if a given window is being measured. | |
776 | * - Account its duration. | |
777 | * - Discount the interference. | |
778 | * | |
779 | * To avoid getting inconsistent values, e.g.,: | |
780 | * | |
781 | * now = time_get() | |
782 | * ---> interrupt! | |
783 | * delta_start -= int duration; | |
784 | * <--- | |
785 | * duration = now - delta_start; | |
786 | * | |
787 | * result: negative duration if the variable duration before the | |
788 | * interrupt was smaller than the interrupt execution. | |
789 | * | |
790 | * A counter of interrupts is used. If the counter increased, try | |
791 | * to capture an interference safe duration. | |
792 | */ | |
793 | static inline s64 | |
794 | get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) | |
795 | { | |
796 | u64 int_counter, now; | |
797 | s64 duration; | |
798 | ||
799 | do { | |
800 | int_counter = local_read(&osn_var->int_counter); | |
801 | /* synchronize with interrupts */ | |
802 | barrier(); | |
803 | ||
804 | now = time_get(); | |
805 | duration = (now - *delta_start); | |
806 | ||
807 | /* synchronize with interrupts */ | |
808 | barrier(); | |
809 | } while (int_counter != local_read(&osn_var->int_counter)); | |
810 | ||
811 | /* | |
812 | * This is an evidence of race conditions that cause | |
813 | * a value to be "discounted" too much. | |
814 | */ | |
815 | if (duration < 0) | |
816 | osnoise_taint("Negative duration!\n"); | |
817 | ||
818 | *delta_start = 0; | |
819 | ||
820 | return duration; | |
821 | } | |
822 | ||
823 | /* | |
824 | * | |
825 | * set_int_safe_time - Save the current time on *time, aware of interference | |
826 | * | |
827 | * Get the time, taking into consideration a possible interference from | |
828 | * higher priority interrupts. | |
829 | * | |
830 | * See get_int_safe_duration() for an explanation. | |
831 | */ | |
832 | static u64 | |
833 | set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) | |
834 | { | |
835 | u64 int_counter; | |
836 | ||
837 | do { | |
838 | int_counter = local_read(&osn_var->int_counter); | |
839 | /* synchronize with interrupts */ | |
840 | barrier(); | |
841 | ||
842 | *time = time_get(); | |
843 | ||
844 | /* synchronize with interrupts */ | |
845 | barrier(); | |
846 | } while (int_counter != local_read(&osn_var->int_counter)); | |
847 | ||
848 | return int_counter; | |
849 | } | |
850 | ||
a955d7ea DBO |
851 | #ifdef CONFIG_TIMERLAT_TRACER |
852 | /* | |
853 | * copy_int_safe_time - Copy *src into *desc aware of interference | |
854 | */ | |
855 | static u64 | |
856 | copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) | |
857 | { | |
858 | u64 int_counter; | |
859 | ||
860 | do { | |
861 | int_counter = local_read(&osn_var->int_counter); | |
862 | /* synchronize with interrupts */ | |
863 | barrier(); | |
864 | ||
865 | *dst = *src; | |
866 | ||
867 | /* synchronize with interrupts */ | |
868 | barrier(); | |
869 | } while (int_counter != local_read(&osn_var->int_counter)); | |
870 | ||
871 | return int_counter; | |
872 | } | |
873 | #endif /* CONFIG_TIMERLAT_TRACER */ | |
874 | ||
bce29ac9 DBO |
875 | /* |
876 | * trace_osnoise_callback - NMI entry/exit callback | |
877 | * | |
878 | * This function is called at the entry and exit NMI code. The bool enter | |
879 | * distinguishes between either case. This function is used to note a NMI | |
880 | * occurrence, compute the noise caused by the NMI, and to remove the noise | |
881 | * it is potentially causing on other interference variables. | |
882 | */ | |
883 | void trace_osnoise_callback(bool enter) | |
884 | { | |
885 | struct osnoise_variables *osn_var = this_cpu_osn_var(); | |
886 | u64 duration; | |
887 | ||
888 | if (!osn_var->sampling) | |
889 | return; | |
890 | ||
891 | /* | |
892 | * Currently trace_clock_local() calls sched_clock() and the | |
893 | * generic version is not NMI safe. | |
894 | */ | |
895 | if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { | |
896 | if (enter) { | |
897 | osn_var->nmi.delta_start = time_get(); | |
898 | local_inc(&osn_var->int_counter); | |
899 | } else { | |
900 | duration = time_get() - osn_var->nmi.delta_start; | |
901 | ||
902 | trace_nmi_noise(osn_var->nmi.delta_start, duration); | |
903 | ||
904 | cond_move_irq_delta_start(osn_var, duration); | |
905 | cond_move_softirq_delta_start(osn_var, duration); | |
906 | cond_move_thread_delta_start(osn_var, duration); | |
907 | } | |
908 | } | |
909 | ||
910 | if (enter) | |
911 | osn_var->nmi.count++; | |
912 | } | |
913 | ||
914 | /* | |
915 | * osnoise_trace_irq_entry - Note the starting of an IRQ | |
916 | * | |
917 | * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, | |
918 | * it is safe to use a single variable (ons_var->irq) to save the statistics. | |
919 | * The arrival_time is used to report... the arrival time. The delta_start | |
920 | * is used to compute the duration at the IRQ exit handler. See | |
921 | * cond_move_irq_delta_start(). | |
922 | */ | |
923 | void osnoise_trace_irq_entry(int id) | |
924 | { | |
925 | struct osnoise_variables *osn_var = this_cpu_osn_var(); | |
926 | ||
927 | if (!osn_var->sampling) | |
928 | return; | |
929 | /* | |
930 | * This value will be used in the report, but not to compute | |
931 | * the execution time, so it is safe to get it unsafe. | |
932 | */ | |
933 | osn_var->irq.arrival_time = time_get(); | |
934 | set_int_safe_time(osn_var, &osn_var->irq.delta_start); | |
935 | osn_var->irq.count++; | |
936 | ||
937 | local_inc(&osn_var->int_counter); | |
938 | } | |
939 | ||
940 | /* | |
941 | * osnoise_irq_exit - Note the end of an IRQ, sava data and trace | |
942 | * | |
943 | * Computes the duration of the IRQ noise, and trace it. Also discounts the | |
944 | * interference from other sources of noise could be currently being accounted. | |
945 | */ | |
946 | void osnoise_trace_irq_exit(int id, const char *desc) | |
947 | { | |
948 | struct osnoise_variables *osn_var = this_cpu_osn_var(); | |
022632f6 | 949 | s64 duration; |
bce29ac9 DBO |
950 | |
951 | if (!osn_var->sampling) | |
952 | return; | |
953 | ||
954 | duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start); | |
955 | trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration); | |
956 | osn_var->irq.arrival_time = 0; | |
957 | cond_move_softirq_delta_start(osn_var, duration); | |
958 | cond_move_thread_delta_start(osn_var, duration); | |
959 | } | |
960 | ||
961 | /* | |
962 | * trace_irqentry_callback - Callback to the irq:irq_entry traceevent | |
963 | * | |
964 | * Used to note the starting of an IRQ occurece. | |
965 | */ | |
966 | static void trace_irqentry_callback(void *data, int irq, | |
967 | struct irqaction *action) | |
968 | { | |
969 | osnoise_trace_irq_entry(irq); | |
970 | } | |
971 | ||
972 | /* | |
973 | * trace_irqexit_callback - Callback to the irq:irq_exit traceevent | |
974 | * | |
975 | * Used to note the end of an IRQ occurece. | |
976 | */ | |
977 | static void trace_irqexit_callback(void *data, int irq, | |
978 | struct irqaction *action, int ret) | |
979 | { | |
980 | osnoise_trace_irq_exit(irq, action->name); | |
981 | } | |
982 | ||
983 | /* | |
984 | * arch specific register function. | |
985 | */ | |
986 | int __weak osnoise_arch_register(void) | |
987 | { | |
988 | return 0; | |
989 | } | |
990 | ||
991 | /* | |
992 | * arch specific unregister function. | |
993 | */ | |
994 | void __weak osnoise_arch_unregister(void) | |
995 | { | |
996 | return; | |
997 | } | |
998 | ||
999 | /* | |
1000 | * hook_irq_events - Hook IRQ handling events | |
1001 | * | |
1002 | * This function hooks the IRQ related callbacks to the respective trace | |
1003 | * events. | |
1004 | */ | |
f7d9f637 | 1005 | static int hook_irq_events(void) |
bce29ac9 DBO |
1006 | { |
1007 | int ret; | |
1008 | ||
1009 | ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL); | |
1010 | if (ret) | |
1011 | goto out_err; | |
1012 | ||
1013 | ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL); | |
1014 | if (ret) | |
1015 | goto out_unregister_entry; | |
1016 | ||
1017 | ret = osnoise_arch_register(); | |
1018 | if (ret) | |
1019 | goto out_irq_exit; | |
1020 | ||
1021 | return 0; | |
1022 | ||
1023 | out_irq_exit: | |
1024 | unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); | |
1025 | out_unregister_entry: | |
1026 | unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); | |
1027 | out_err: | |
1028 | return -EINVAL; | |
1029 | } | |
1030 | ||
1031 | /* | |
1032 | * unhook_irq_events - Unhook IRQ handling events | |
1033 | * | |
1034 | * This function unhooks the IRQ related callbacks to the respective trace | |
1035 | * events. | |
1036 | */ | |
f7d9f637 | 1037 | static void unhook_irq_events(void) |
bce29ac9 DBO |
1038 | { |
1039 | osnoise_arch_unregister(); | |
1040 | unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL); | |
1041 | unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL); | |
1042 | } | |
1043 | ||
1044 | #ifndef CONFIG_PREEMPT_RT | |
1045 | /* | |
1046 | * trace_softirq_entry_callback - Note the starting of a softirq | |
1047 | * | |
1048 | * Save the starting time of a softirq. As softirqs are non-preemptive to | |
1049 | * other softirqs, it is safe to use a single variable (ons_var->softirq) | |
1050 | * to save the statistics. The arrival_time is used to report... the | |
1051 | * arrival time. The delta_start is used to compute the duration at the | |
1052 | * softirq exit handler. See cond_move_softirq_delta_start(). | |
1053 | */ | |
f7d9f637 | 1054 | static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) |
bce29ac9 DBO |
1055 | { |
1056 | struct osnoise_variables *osn_var = this_cpu_osn_var(); | |
1057 | ||
1058 | if (!osn_var->sampling) | |
1059 | return; | |
1060 | /* | |
1061 | * This value will be used in the report, but not to compute | |
1062 | * the execution time, so it is safe to get it unsafe. | |
1063 | */ | |
1064 | osn_var->softirq.arrival_time = time_get(); | |
1065 | set_int_safe_time(osn_var, &osn_var->softirq.delta_start); | |
1066 | osn_var->softirq.count++; | |
1067 | ||
1068 | local_inc(&osn_var->int_counter); | |
1069 | } | |
1070 | ||
1071 | /* | |
1072 | * trace_softirq_exit_callback - Note the end of an softirq | |
1073 | * | |
1074 | * Computes the duration of the softirq noise, and trace it. Also discounts the | |
1075 | * interference from other sources of noise could be currently being accounted. | |
1076 | */ | |
f7d9f637 | 1077 | static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) |
bce29ac9 DBO |
1078 | { |
1079 | struct osnoise_variables *osn_var = this_cpu_osn_var(); | |
022632f6 | 1080 | s64 duration; |
bce29ac9 DBO |
1081 | |
1082 | if (!osn_var->sampling) | |
1083 | return; | |
1084 | ||
ccb67544 DBO |
1085 | if (unlikely(timerlat_enabled())) |
1086 | if (!timerlat_softirq_exit(osn_var)) | |
a955d7ea | 1087 | return; |
a955d7ea | 1088 | |
bce29ac9 DBO |
1089 | duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start); |
1090 | trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration); | |
1091 | cond_move_thread_delta_start(osn_var, duration); | |
1092 | osn_var->softirq.arrival_time = 0; | |
1093 | } | |
1094 | ||
1095 | /* | |
1096 | * hook_softirq_events - Hook softirq handling events | |
1097 | * | |
1098 | * This function hooks the softirq related callbacks to the respective trace | |
1099 | * events. | |
1100 | */ | |
1101 | static int hook_softirq_events(void) | |
1102 | { | |
1103 | int ret; | |
1104 | ||
1105 | ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL); | |
1106 | if (ret) | |
1107 | goto out_err; | |
1108 | ||
1109 | ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL); | |
1110 | if (ret) | |
1111 | goto out_unreg_entry; | |
1112 | ||
1113 | return 0; | |
1114 | ||
1115 | out_unreg_entry: | |
1116 | unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); | |
1117 | out_err: | |
1118 | return -EINVAL; | |
1119 | } | |
1120 | ||
1121 | /* | |
1122 | * unhook_softirq_events - Unhook softirq handling events | |
1123 | * | |
1124 | * This function hooks the softirq related callbacks to the respective trace | |
1125 | * events. | |
1126 | */ | |
1127 | static void unhook_softirq_events(void) | |
1128 | { | |
1129 | unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL); | |
1130 | unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL); | |
1131 | } | |
1132 | #else /* CONFIG_PREEMPT_RT */ | |
1133 | /* | |
1134 | * softirq are threads on the PREEMPT_RT mode. | |
1135 | */ | |
1136 | static int hook_softirq_events(void) | |
1137 | { | |
1138 | return 0; | |
1139 | } | |
1140 | static void unhook_softirq_events(void) | |
1141 | { | |
1142 | } | |
1143 | #endif | |
1144 | ||
1145 | /* | |
1146 | * thread_entry - Record the starting of a thread noise window | |
1147 | * | |
1148 | * It saves the context switch time for a noisy thread, and increments | |
1149 | * the interference counters. | |
1150 | */ | |
1151 | static void | |
1152 | thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) | |
1153 | { | |
1154 | if (!osn_var->sampling) | |
1155 | return; | |
1156 | /* | |
1157 | * The arrival time will be used in the report, but not to compute | |
1158 | * the execution time, so it is safe to get it unsafe. | |
1159 | */ | |
1160 | osn_var->thread.arrival_time = time_get(); | |
1161 | ||
1162 | set_int_safe_time(osn_var, &osn_var->thread.delta_start); | |
1163 | ||
1164 | osn_var->thread.count++; | |
1165 | local_inc(&osn_var->int_counter); | |
1166 | } | |
1167 | ||
1168 | /* | |
1169 | * thread_exit - Report the end of a thread noise window | |
1170 | * | |
1171 | * It computes the total noise from a thread, tracing if needed. | |
1172 | */ | |
1173 | static void | |
1174 | thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) | |
1175 | { | |
022632f6 | 1176 | s64 duration; |
bce29ac9 DBO |
1177 | |
1178 | if (!osn_var->sampling) | |
1179 | return; | |
1180 | ||
ccb67544 DBO |
1181 | if (unlikely(timerlat_enabled())) |
1182 | if (!timerlat_thread_exit(osn_var)) | |
a955d7ea | 1183 | return; |
a955d7ea | 1184 | |
bce29ac9 DBO |
1185 | duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start); |
1186 | ||
1187 | trace_thread_noise(t, osn_var->thread.arrival_time, duration); | |
1188 | ||
1189 | osn_var->thread.arrival_time = 0; | |
1190 | } | |
1191 | ||
e88ed227 DBO |
1192 | #ifdef CONFIG_TIMERLAT_TRACER |
1193 | /* | |
1194 | * osnoise_stop_exception - Stop tracing and the tracer. | |
1195 | */ | |
1196 | static __always_inline void osnoise_stop_exception(char *msg, int cpu) | |
1197 | { | |
1198 | struct osnoise_instance *inst; | |
1199 | struct trace_array *tr; | |
1200 | ||
1201 | rcu_read_lock(); | |
1202 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { | |
1203 | tr = inst->tr; | |
1204 | trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, | |
1205 | "stop tracing hit on cpu %d due to exception: %s\n", | |
1206 | smp_processor_id(), | |
1207 | msg); | |
1208 | ||
1209 | if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) | |
1210 | panic("tracer hit on cpu %d due to exception: %s\n", | |
1211 | smp_processor_id(), | |
1212 | msg); | |
1213 | ||
1214 | tracer_tracing_off(tr); | |
1215 | } | |
1216 | rcu_read_unlock(); | |
1217 | } | |
1218 | ||
1219 | /* | |
1220 | * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler | |
1221 | * | |
1222 | * his function is hooked to the sched:sched_migrate_task trace event, and monitors | |
1223 | * timerlat user-space thread migration. | |
1224 | */ | |
1225 | static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu) | |
1226 | { | |
1227 | struct osnoise_variables *osn_var; | |
1228 | long cpu = task_cpu(p); | |
1229 | ||
1230 | osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); | |
1231 | if (osn_var->pid == p->pid && dest_cpu != cpu) { | |
1232 | per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1; | |
1233 | osnoise_taint("timerlat user-thread migrated\n"); | |
1234 | osnoise_stop_exception("timerlat user-thread migrated", cpu); | |
1235 | } | |
1236 | } | |
1237 | ||
1238 | static int register_migration_monitor(void) | |
1239 | { | |
1240 | int ret = 0; | |
1241 | ||
1242 | /* | |
1243 | * Timerlat thread migration check is only required when running timerlat in user-space. | |
1244 | * Thus, enable callback only if timerlat is set with no workload. | |
1245 | */ | |
1246 | if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) | |
1247 | ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL); | |
1248 | ||
1249 | return ret; | |
1250 | } | |
1251 | ||
1252 | static void unregister_migration_monitor(void) | |
1253 | { | |
1254 | if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) | |
1255 | unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL); | |
1256 | } | |
1257 | #else | |
1258 | static int register_migration_monitor(void) | |
1259 | { | |
1260 | return 0; | |
1261 | } | |
1262 | static void unregister_migration_monitor(void) {} | |
1263 | #endif | |
bce29ac9 DBO |
1264 | /* |
1265 | * trace_sched_switch - sched:sched_switch trace event handler | |
1266 | * | |
1267 | * This function is hooked to the sched:sched_switch trace event, and it is | |
1268 | * used to record the beginning and to report the end of a thread noise window. | |
1269 | */ | |
f7d9f637 | 1270 | static void |
fa2c3254 | 1271 | trace_sched_switch_callback(void *data, bool preempt, |
fa2c3254 | 1272 | struct task_struct *p, |
9c2136be DK |
1273 | struct task_struct *n, |
1274 | unsigned int prev_state) | |
bce29ac9 DBO |
1275 | { |
1276 | struct osnoise_variables *osn_var = this_cpu_osn_var(); | |
30838fcd | 1277 | int workload = test_bit(OSN_WORKLOAD, &osnoise_options); |
bce29ac9 | 1278 | |
30838fcd | 1279 | if ((p->pid != osn_var->pid) || !workload) |
bce29ac9 DBO |
1280 | thread_exit(osn_var, p); |
1281 | ||
30838fcd | 1282 | if ((n->pid != osn_var->pid) || !workload) |
bce29ac9 DBO |
1283 | thread_entry(osn_var, n); |
1284 | } | |
1285 | ||
1286 | /* | |
e88ed227 | 1287 | * hook_thread_events - Hook the instrumentation for thread noise |
bce29ac9 DBO |
1288 | * |
1289 | * Hook the osnoise tracer callbacks to handle the noise from other | |
1290 | * threads on the necessary kernel events. | |
1291 | */ | |
f7d9f637 | 1292 | static int hook_thread_events(void) |
bce29ac9 DBO |
1293 | { |
1294 | int ret; | |
1295 | ||
1296 | ret = register_trace_sched_switch(trace_sched_switch_callback, NULL); | |
1297 | if (ret) | |
1298 | return -EINVAL; | |
1299 | ||
e88ed227 DBO |
1300 | ret = register_migration_monitor(); |
1301 | if (ret) | |
1302 | goto out_unreg; | |
1303 | ||
bce29ac9 | 1304 | return 0; |
e88ed227 DBO |
1305 | |
1306 | out_unreg: | |
1307 | unregister_trace_sched_switch(trace_sched_switch_callback, NULL); | |
1308 | return -EINVAL; | |
bce29ac9 DBO |
1309 | } |
1310 | ||
1311 | /* | |
e88ed227 | 1312 | * unhook_thread_events - unhook the instrumentation for thread noise |
bce29ac9 DBO |
1313 | * |
1314 | * Unook the osnoise tracer callbacks to handle the noise from other | |
1315 | * threads on the necessary kernel events. | |
1316 | */ | |
f7d9f637 | 1317 | static void unhook_thread_events(void) |
bce29ac9 DBO |
1318 | { |
1319 | unregister_trace_sched_switch(trace_sched_switch_callback, NULL); | |
e88ed227 | 1320 | unregister_migration_monitor(); |
bce29ac9 DBO |
1321 | } |
1322 | ||
1323 | /* | |
1324 | * save_osn_sample_stats - Save the osnoise_sample statistics | |
1325 | * | |
1326 | * Save the osnoise_sample statistics before the sampling phase. These | |
1327 | * values will be used later to compute the diff betwneen the statistics | |
1328 | * before and after the osnoise sampling. | |
1329 | */ | |
f7d9f637 DBO |
1330 | static void |
1331 | save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) | |
bce29ac9 DBO |
1332 | { |
1333 | s->nmi_count = osn_var->nmi.count; | |
1334 | s->irq_count = osn_var->irq.count; | |
1335 | s->softirq_count = osn_var->softirq.count; | |
1336 | s->thread_count = osn_var->thread.count; | |
1337 | } | |
1338 | ||
1339 | /* | |
1340 | * diff_osn_sample_stats - Compute the osnoise_sample statistics | |
1341 | * | |
1342 | * After a sample period, compute the difference on the osnoise_sample | |
1343 | * statistics. The struct osnoise_sample *s contains the statistics saved via | |
1344 | * save_osn_sample_stats() before the osnoise sampling. | |
1345 | */ | |
f7d9f637 DBO |
1346 | static void |
1347 | diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) | |
bce29ac9 DBO |
1348 | { |
1349 | s->nmi_count = osn_var->nmi.count - s->nmi_count; | |
1350 | s->irq_count = osn_var->irq.count - s->irq_count; | |
1351 | s->softirq_count = osn_var->softirq.count - s->softirq_count; | |
1352 | s->thread_count = osn_var->thread.count - s->thread_count; | |
1353 | } | |
1354 | ||
1355 | /* | |
1356 | * osnoise_stop_tracing - Stop tracing and the tracer. | |
1357 | */ | |
0e05ba49 | 1358 | static __always_inline void osnoise_stop_tracing(void) |
bce29ac9 | 1359 | { |
dae18134 DBO |
1360 | struct osnoise_instance *inst; |
1361 | struct trace_array *tr; | |
1362 | ||
1363 | rcu_read_lock(); | |
1364 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { | |
1365 | tr = inst->tr; | |
1366 | trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, | |
1367 | "stop tracing hit on cpu %d\n", smp_processor_id()); | |
1368 | ||
1603dda4 DBO |
1369 | if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) |
1370 | panic("tracer hit stop condition on CPU %d\n", smp_processor_id()); | |
1371 | ||
dae18134 DBO |
1372 | tracer_tracing_off(tr); |
1373 | } | |
1374 | rcu_read_unlock(); | |
1375 | } | |
0e05ba49 | 1376 | |
cb7ca871 DBO |
1377 | /* |
1378 | * osnoise_has_tracing_on - Check if there is at least one instance on | |
1379 | */ | |
1380 | static __always_inline int osnoise_has_tracing_on(void) | |
1381 | { | |
1382 | struct osnoise_instance *inst; | |
1383 | int trace_is_on = 0; | |
1384 | ||
1385 | rcu_read_lock(); | |
1386 | list_for_each_entry_rcu(inst, &osnoise_instances, list) | |
1387 | trace_is_on += tracer_tracing_is_on(inst->tr); | |
1388 | rcu_read_unlock(); | |
1389 | ||
1390 | return trace_is_on; | |
1391 | } | |
1392 | ||
dae18134 DBO |
1393 | /* |
1394 | * notify_new_max_latency - Notify a new max latency via fsnotify interface. | |
1395 | */ | |
1396 | static void notify_new_max_latency(u64 latency) | |
1397 | { | |
1398 | struct osnoise_instance *inst; | |
1399 | struct trace_array *tr; | |
0e05ba49 | 1400 | |
dae18134 DBO |
1401 | rcu_read_lock(); |
1402 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { | |
1403 | tr = inst->tr; | |
d3cba7f0 | 1404 | if (tracer_tracing_is_on(tr) && tr->max_latency < latency) { |
dae18134 DBO |
1405 | tr->max_latency = latency; |
1406 | latency_fsnotify(tr); | |
1407 | } | |
1408 | } | |
1409 | rcu_read_unlock(); | |
bce29ac9 DBO |
1410 | } |
1411 | ||
1412 | /* | |
1413 | * run_osnoise - Sample the time and look for osnoise | |
1414 | * | |
1415 | * Used to capture the time, looking for potential osnoise latency repeatedly. | |
1416 | * Different from hwlat_detector, it is called with preemption and interrupts | |
1417 | * enabled. This allows irqs, softirqs and threads to run, interfering on the | |
1418 | * osnoise sampling thread, as they would do with a regular thread. | |
1419 | */ | |
1420 | static int run_osnoise(void) | |
1421 | { | |
b5dce200 | 1422 | bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options); |
bce29ac9 | 1423 | struct osnoise_variables *osn_var = this_cpu_osn_var(); |
bce29ac9 DBO |
1424 | u64 start, sample, last_sample; |
1425 | u64 last_int_count, int_count; | |
19c3eaa7 | 1426 | s64 noise = 0, max_noise = 0; |
bce29ac9 DBO |
1427 | s64 total, last_total = 0; |
1428 | struct osnoise_sample s; | |
b5dce200 | 1429 | bool disable_preemption; |
bce29ac9 | 1430 | unsigned int threshold; |
bce29ac9 | 1431 | u64 runtime, stop_in; |
19c3eaa7 DBO |
1432 | u64 sum_noise = 0; |
1433 | int hw_count = 0; | |
bce29ac9 DBO |
1434 | int ret = -1; |
1435 | ||
b5dce200 DBO |
1436 | /* |
1437 | * Disabling preemption is only required if IRQs are enabled, | |
1438 | * and the options is set on. | |
1439 | */ | |
1440 | disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options); | |
1441 | ||
bce29ac9 DBO |
1442 | /* |
1443 | * Considers the current thread as the workload. | |
1444 | */ | |
1445 | osn_var->pid = current->pid; | |
1446 | ||
1447 | /* | |
1448 | * Save the current stats for the diff | |
1449 | */ | |
1450 | save_osn_sample_stats(osn_var, &s); | |
1451 | ||
1452 | /* | |
c40583e1 | 1453 | * if threshold is 0, use the default value of 1 us. |
bce29ac9 | 1454 | */ |
c40583e1 | 1455 | threshold = tracing_thresh ? : 1000; |
bce29ac9 | 1456 | |
b5dce200 DBO |
1457 | /* |
1458 | * Apply PREEMPT and IRQ disabled options. | |
1459 | */ | |
1460 | if (disable_irq) | |
1461 | local_irq_disable(); | |
1462 | ||
1463 | if (disable_preemption) | |
1464 | preempt_disable(); | |
1465 | ||
bce29ac9 DBO |
1466 | /* |
1467 | * Make sure NMIs see sampling first | |
1468 | */ | |
1469 | osn_var->sampling = true; | |
1470 | barrier(); | |
1471 | ||
1472 | /* | |
1473 | * Transform the *_us config to nanoseconds to avoid the | |
1474 | * division on the main loop. | |
1475 | */ | |
1476 | runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; | |
1477 | stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; | |
1478 | ||
1479 | /* | |
1480 | * Start timestemp | |
1481 | */ | |
1482 | start = time_get(); | |
1483 | ||
1484 | /* | |
1485 | * "previous" loop. | |
1486 | */ | |
1487 | last_int_count = set_int_safe_time(osn_var, &last_sample); | |
1488 | ||
1489 | do { | |
1490 | /* | |
1491 | * Get sample! | |
1492 | */ | |
1493 | int_count = set_int_safe_time(osn_var, &sample); | |
1494 | ||
1495 | noise = time_sub(sample, last_sample); | |
1496 | ||
1497 | /* | |
1498 | * This shouldn't happen. | |
1499 | */ | |
1500 | if (noise < 0) { | |
1501 | osnoise_taint("negative noise!"); | |
1502 | goto out; | |
1503 | } | |
1504 | ||
1505 | /* | |
1506 | * Sample runtime. | |
1507 | */ | |
1508 | total = time_sub(sample, start); | |
1509 | ||
1510 | /* | |
1511 | * Check for possible overflows. | |
1512 | */ | |
1513 | if (total < last_total) { | |
1514 | osnoise_taint("total overflow!"); | |
1515 | break; | |
1516 | } | |
1517 | ||
1518 | last_total = total; | |
1519 | ||
1520 | if (noise >= threshold) { | |
1521 | int interference = int_count - last_int_count; | |
1522 | ||
1523 | if (noise > max_noise) | |
1524 | max_noise = noise; | |
1525 | ||
1526 | if (!interference) | |
1527 | hw_count++; | |
1528 | ||
1529 | sum_noise += noise; | |
1530 | ||
1531 | trace_sample_threshold(last_sample, noise, interference); | |
1532 | ||
1533 | if (osnoise_data.stop_tracing) | |
1534 | if (noise > stop_in) | |
1535 | osnoise_stop_tracing(); | |
1536 | } | |
1537 | ||
caf4c86b NSJ |
1538 | /* |
1539 | * In some cases, notably when running on a nohz_full CPU with | |
1540 | * a stopped tick PREEMPT_RCU has no way to account for QSs. | |
1541 | * This will eventually cause unwarranted noise as PREEMPT_RCU | |
1542 | * will force preemption as the means of ending the current | |
1543 | * grace period. We avoid this problem by calling | |
32a9f26e | 1544 | * rcu_momentary_eqs(), which performs a zero duration |
caf4c86b NSJ |
1545 | * EQS allowing PREEMPT_RCU to end the current grace period. |
1546 | * This call shouldn't be wrapped inside an RCU critical | |
1547 | * section. | |
1548 | * | |
1549 | * Note that in non PREEMPT_RCU kernels QSs are handled through | |
1550 | * cond_resched() | |
1551 | */ | |
1552 | if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { | |
b5dce200 DBO |
1553 | if (!disable_irq) |
1554 | local_irq_disable(); | |
1555 | ||
32a9f26e | 1556 | rcu_momentary_eqs(); |
b5dce200 DBO |
1557 | |
1558 | if (!disable_irq) | |
1559 | local_irq_enable(); | |
caf4c86b NSJ |
1560 | } |
1561 | ||
bce29ac9 DBO |
1562 | /* |
1563 | * For the non-preemptive kernel config: let threads runs, if | |
b5dce200 | 1564 | * they so wish, unless set not do to so. |
bce29ac9 | 1565 | */ |
b5dce200 DBO |
1566 | if (!disable_irq && !disable_preemption) |
1567 | cond_resched(); | |
bce29ac9 DBO |
1568 | |
1569 | last_sample = sample; | |
1570 | last_int_count = int_count; | |
1571 | ||
1572 | } while (total < runtime && !kthread_should_stop()); | |
1573 | ||
1574 | /* | |
1575 | * Finish the above in the view for interrupts. | |
1576 | */ | |
1577 | barrier(); | |
1578 | ||
1579 | osn_var->sampling = false; | |
1580 | ||
1581 | /* | |
1582 | * Make sure sampling data is no longer updated. | |
1583 | */ | |
1584 | barrier(); | |
1585 | ||
b5dce200 DBO |
1586 | /* |
1587 | * Return to the preemptive state. | |
1588 | */ | |
1589 | if (disable_preemption) | |
1590 | preempt_enable(); | |
1591 | ||
1592 | if (disable_irq) | |
1593 | local_irq_enable(); | |
1594 | ||
bce29ac9 DBO |
1595 | /* |
1596 | * Save noise info. | |
1597 | */ | |
1598 | s.noise = time_to_us(sum_noise); | |
1599 | s.runtime = time_to_us(total); | |
1600 | s.max_sample = time_to_us(max_noise); | |
1601 | s.hw_count = hw_count; | |
1602 | ||
1603 | /* Save interference stats info */ | |
1604 | diff_osn_sample_stats(osn_var, &s); | |
1605 | ||
1606 | trace_osnoise_sample(&s); | |
1607 | ||
dae18134 | 1608 | notify_new_max_latency(max_noise); |
bce29ac9 DBO |
1609 | |
1610 | if (osnoise_data.stop_tracing_total) | |
1611 | if (s.noise > osnoise_data.stop_tracing_total) | |
1612 | osnoise_stop_tracing(); | |
1613 | ||
1614 | return 0; | |
1615 | out: | |
1616 | return ret; | |
1617 | } | |
1618 | ||
1619 | static struct cpumask osnoise_cpumask; | |
1620 | static struct cpumask save_cpumask; | |
177e1cc2 | 1621 | static struct cpumask kthread_cpumask; |
bce29ac9 | 1622 | |
dd990352 DBO |
1623 | /* |
1624 | * osnoise_sleep - sleep until the next period | |
1625 | */ | |
cb7ca871 | 1626 | static void osnoise_sleep(bool skip_period) |
dd990352 DBO |
1627 | { |
1628 | u64 interval; | |
1629 | ktime_t wake_time; | |
1630 | ||
1631 | mutex_lock(&interface_lock); | |
cb7ca871 DBO |
1632 | if (skip_period) |
1633 | interval = osnoise_data.sample_period; | |
1634 | else | |
1635 | interval = osnoise_data.sample_period - osnoise_data.sample_runtime; | |
dd990352 DBO |
1636 | mutex_unlock(&interface_lock); |
1637 | ||
1638 | /* | |
1639 | * differently from hwlat_detector, the osnoise tracer can run | |
1640 | * without a pause because preemption is on. | |
1641 | */ | |
1642 | if (!interval) { | |
1643 | /* Let synchronize_rcu_tasks() make progress */ | |
1644 | cond_resched_tasks_rcu_qs(); | |
1645 | return; | |
1646 | } | |
1647 | ||
1648 | wake_time = ktime_add_us(ktime_get(), interval); | |
1649 | __set_current_state(TASK_INTERRUPTIBLE); | |
1650 | ||
b18c58af | 1651 | while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) { |
dd990352 DBO |
1652 | if (kthread_should_stop()) |
1653 | break; | |
1654 | } | |
1655 | } | |
1656 | ||
4998e7fd DBO |
1657 | /* |
1658 | * osnoise_migration_pending - checks if the task needs to migrate | |
1659 | * | |
1660 | * osnoise/timerlat threads are per-cpu. If there is a pending request to | |
1661 | * migrate the thread away from the current CPU, something bad has happened. | |
1662 | * Play the good citizen and leave. | |
1663 | * | |
1664 | * Returns 0 if it is safe to continue, 1 otherwise. | |
1665 | */ | |
1666 | static inline int osnoise_migration_pending(void) | |
1667 | { | |
1668 | if (!current->migration_pending) | |
1669 | return 0; | |
1670 | ||
1671 | /* | |
1672 | * If migration is pending, there is a task waiting for the | |
1673 | * tracer to enable migration. The tracer does not allow migration, | |
1674 | * thus: taint and leave to unblock the blocked thread. | |
1675 | */ | |
1676 | osnoise_taint("migration requested to osnoise threads, leaving."); | |
1677 | ||
1678 | /* | |
1679 | * Unset this thread from the threads managed by the interface. | |
1680 | * The tracers are responsible for cleaning their env before | |
1681 | * exiting. | |
1682 | */ | |
1683 | mutex_lock(&interface_lock); | |
1684 | this_cpu_osn_var()->kthread = NULL; | |
177e1cc2 | 1685 | cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask); |
4998e7fd DBO |
1686 | mutex_unlock(&interface_lock); |
1687 | ||
1688 | return 1; | |
1689 | } | |
1690 | ||
bce29ac9 DBO |
1691 | /* |
1692 | * osnoise_main - The osnoise detection kernel thread | |
1693 | * | |
1694 | * Calls run_osnoise() function to measure the osnoise for the configured runtime, | |
1695 | * every period. | |
1696 | */ | |
1697 | static int osnoise_main(void *data) | |
1698 | { | |
4998e7fd DBO |
1699 | unsigned long flags; |
1700 | ||
1701 | /* | |
1702 | * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. | |
1703 | * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. | |
1704 | * | |
1705 | * To work around this limitation, disable migration and remove the | |
1706 | * flag. | |
1707 | */ | |
1708 | migrate_disable(); | |
1709 | raw_spin_lock_irqsave(¤t->pi_lock, flags); | |
1710 | current->flags &= ~(PF_NO_SETAFFINITY); | |
1711 | raw_spin_unlock_irqrestore(¤t->pi_lock, flags); | |
bce29ac9 DBO |
1712 | |
1713 | while (!kthread_should_stop()) { | |
4998e7fd DBO |
1714 | if (osnoise_migration_pending()) |
1715 | break; | |
1716 | ||
cb7ca871 DBO |
1717 | /* skip a period if tracing is off on all instances */ |
1718 | if (!osnoise_has_tracing_on()) { | |
1719 | osnoise_sleep(true); | |
1720 | continue; | |
1721 | } | |
1722 | ||
bce29ac9 | 1723 | run_osnoise(); |
cb7ca871 | 1724 | osnoise_sleep(false); |
bce29ac9 DBO |
1725 | } |
1726 | ||
4998e7fd | 1727 | migrate_enable(); |
bce29ac9 DBO |
1728 | return 0; |
1729 | } | |
1730 | ||
a955d7ea DBO |
1731 | #ifdef CONFIG_TIMERLAT_TRACER |
1732 | /* | |
1733 | * timerlat_irq - hrtimer handler for timerlat. | |
1734 | */ | |
1735 | static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) | |
1736 | { | |
1737 | struct osnoise_variables *osn_var = this_cpu_osn_var(); | |
a955d7ea DBO |
1738 | struct timerlat_variables *tlat; |
1739 | struct timerlat_sample s; | |
1740 | u64 now; | |
1741 | u64 diff; | |
1742 | ||
1743 | /* | |
1744 | * I am not sure if the timer was armed for this CPU. So, get | |
1745 | * the timerlat struct from the timer itself, not from this | |
1746 | * CPU. | |
1747 | */ | |
1748 | tlat = container_of(timer, struct timerlat_variables, timer); | |
1749 | ||
1750 | now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); | |
1751 | ||
1752 | /* | |
1753 | * Enable the osnoise: events for thread an softirq. | |
1754 | */ | |
1755 | tlat->tracing_thread = true; | |
1756 | ||
1757 | osn_var->thread.arrival_time = time_get(); | |
1758 | ||
1759 | /* | |
1760 | * A hardirq is running: the timer IRQ. It is for sure preempting | |
1761 | * a thread, and potentially preempting a softirq. | |
1762 | * | |
1763 | * At this point, it is not interesting to know the duration of the | |
1764 | * preempted thread (and maybe softirq), but how much time they will | |
1765 | * delay the beginning of the execution of the timer thread. | |
1766 | * | |
1767 | * To get the correct (net) delay added by the softirq, its delta_start | |
1768 | * is set as the IRQ one. In this way, at the return of the IRQ, the delta | |
1769 | * start of the sofitrq will be zeroed, accounting then only the time | |
1770 | * after that. | |
1771 | * | |
1772 | * The thread follows the same principle. However, if a softirq is | |
1773 | * running, the thread needs to receive the softirq delta_start. The | |
1774 | * reason being is that the softirq will be the last to be unfolded, | |
1775 | * resseting the thread delay to zero. | |
01e181c7 DBO |
1776 | * |
1777 | * The PREEMPT_RT is a special case, though. As softirqs run as threads | |
1778 | * on RT, moving the thread is enough. | |
a955d7ea | 1779 | */ |
01e181c7 | 1780 | if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) { |
a955d7ea DBO |
1781 | copy_int_safe_time(osn_var, &osn_var->thread.delta_start, |
1782 | &osn_var->softirq.delta_start); | |
1783 | ||
1784 | copy_int_safe_time(osn_var, &osn_var->softirq.delta_start, | |
1785 | &osn_var->irq.delta_start); | |
1786 | } else { | |
1787 | copy_int_safe_time(osn_var, &osn_var->thread.delta_start, | |
1788 | &osn_var->irq.delta_start); | |
1789 | } | |
a955d7ea DBO |
1790 | |
1791 | /* | |
1792 | * Compute the current time with the expected time. | |
1793 | */ | |
1794 | diff = now - tlat->abs_period; | |
1795 | ||
1796 | tlat->count++; | |
1797 | s.seqnum = tlat->count; | |
1798 | s.timer_latency = diff; | |
1799 | s.context = IRQ_CONTEXT; | |
1800 | ||
1801 | trace_timerlat_sample(&s); | |
1802 | ||
aa748949 DBO |
1803 | if (osnoise_data.stop_tracing) { |
1804 | if (time_to_us(diff) >= osnoise_data.stop_tracing) { | |
4dd2aea2 DBO |
1805 | |
1806 | /* | |
1807 | * At this point, if stop_tracing is set and <= print_stack, | |
1808 | * print_stack is set and would be printed in the thread handler. | |
1809 | * | |
1810 | * Thus, print the stack trace as it is helpful to define the | |
1811 | * root cause of an IRQ latency. | |
1812 | */ | |
1813 | if (osnoise_data.stop_tracing <= osnoise_data.print_stack) { | |
1814 | timerlat_save_stack(0); | |
1815 | timerlat_dump_stack(time_to_us(diff)); | |
1816 | } | |
a955d7ea | 1817 | |
a955d7ea | 1818 | osnoise_stop_tracing(); |
aa748949 | 1819 | notify_new_max_latency(diff); |
9c556e5a | 1820 | |
632478a0 DBO |
1821 | wake_up_process(tlat->kthread); |
1822 | ||
9c556e5a | 1823 | return HRTIMER_NORESTART; |
aa748949 DBO |
1824 | } |
1825 | } | |
a955d7ea DBO |
1826 | |
1827 | wake_up_process(tlat->kthread); | |
1828 | ||
1829 | if (osnoise_data.print_stack) | |
1830 | timerlat_save_stack(0); | |
1831 | ||
1832 | return HRTIMER_NORESTART; | |
1833 | } | |
1834 | ||
1835 | /* | |
1836 | * wait_next_period - Wait for the next period for timerlat | |
1837 | */ | |
1838 | static int wait_next_period(struct timerlat_variables *tlat) | |
1839 | { | |
1840 | ktime_t next_abs_period, now; | |
1841 | u64 rel_period = osnoise_data.timerlat_period * 1000; | |
1842 | ||
1843 | now = hrtimer_cb_get_time(&tlat->timer); | |
1844 | next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); | |
1845 | ||
1846 | /* | |
1847 | * Save the next abs_period. | |
1848 | */ | |
1849 | tlat->abs_period = (u64) ktime_to_ns(next_abs_period); | |
1850 | ||
1851 | /* | |
1852 | * If the new abs_period is in the past, skip the activation. | |
1853 | */ | |
1854 | while (ktime_compare(now, next_abs_period) > 0) { | |
1855 | next_abs_period = ns_to_ktime(tlat->abs_period + rel_period); | |
1856 | tlat->abs_period = (u64) ktime_to_ns(next_abs_period); | |
1857 | } | |
1858 | ||
1859 | set_current_state(TASK_INTERRUPTIBLE); | |
1860 | ||
1861 | hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD); | |
1862 | schedule(); | |
1863 | return 1; | |
1864 | } | |
1865 | ||
1866 | /* | |
1867 | * timerlat_main- Timerlat main | |
1868 | */ | |
1869 | static int timerlat_main(void *data) | |
1870 | { | |
1871 | struct osnoise_variables *osn_var = this_cpu_osn_var(); | |
1872 | struct timerlat_variables *tlat = this_cpu_tmr_var(); | |
1873 | struct timerlat_sample s; | |
1874 | struct sched_param sp; | |
4998e7fd | 1875 | unsigned long flags; |
a955d7ea DBO |
1876 | u64 now, diff; |
1877 | ||
1878 | /* | |
1879 | * Make the thread RT, that is how cyclictest is usually used. | |
1880 | */ | |
1881 | sp.sched_priority = DEFAULT_TIMERLAT_PRIO; | |
1882 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
1883 | ||
4998e7fd DBO |
1884 | /* |
1885 | * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. | |
1886 | * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. | |
1887 | * | |
1888 | * To work around this limitation, disable migration and remove the | |
1889 | * flag. | |
1890 | */ | |
1891 | migrate_disable(); | |
1892 | raw_spin_lock_irqsave(¤t->pi_lock, flags); | |
1893 | current->flags &= ~(PF_NO_SETAFFINITY); | |
1894 | raw_spin_unlock_irqrestore(¤t->pi_lock, flags); | |
1895 | ||
a955d7ea DBO |
1896 | tlat->count = 0; |
1897 | tlat->tracing_thread = false; | |
1898 | ||
1899 | hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); | |
1900 | tlat->timer.function = timerlat_irq; | |
1901 | tlat->kthread = current; | |
1902 | osn_var->pid = current->pid; | |
1903 | /* | |
1904 | * Anotate the arrival time. | |
1905 | */ | |
1906 | tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); | |
1907 | ||
1908 | wait_next_period(tlat); | |
1909 | ||
1910 | osn_var->sampling = 1; | |
1911 | ||
1912 | while (!kthread_should_stop()) { | |
4998e7fd | 1913 | |
a955d7ea DBO |
1914 | now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); |
1915 | diff = now - tlat->abs_period; | |
1916 | ||
1917 | s.seqnum = tlat->count; | |
1918 | s.timer_latency = diff; | |
1919 | s.context = THREAD_CONTEXT; | |
1920 | ||
1921 | trace_timerlat_sample(&s); | |
1922 | ||
b9f451a9 DBO |
1923 | notify_new_max_latency(diff); |
1924 | ||
b14f4568 | 1925 | timerlat_dump_stack(time_to_us(diff)); |
a955d7ea DBO |
1926 | |
1927 | tlat->tracing_thread = false; | |
1928 | if (osnoise_data.stop_tracing_total) | |
1929 | if (time_to_us(diff) >= osnoise_data.stop_tracing_total) | |
1930 | osnoise_stop_tracing(); | |
1931 | ||
4998e7fd DBO |
1932 | if (osnoise_migration_pending()) |
1933 | break; | |
1934 | ||
a955d7ea DBO |
1935 | wait_next_period(tlat); |
1936 | } | |
1937 | ||
1938 | hrtimer_cancel(&tlat->timer); | |
4998e7fd | 1939 | migrate_enable(); |
a955d7ea DBO |
1940 | return 0; |
1941 | } | |
ccb67544 DBO |
1942 | #else /* CONFIG_TIMERLAT_TRACER */ |
1943 | static int timerlat_main(void *data) | |
1944 | { | |
1945 | return 0; | |
1946 | } | |
a955d7ea DBO |
1947 | #endif /* CONFIG_TIMERLAT_TRACER */ |
1948 | ||
bce29ac9 | 1949 | /* |
c8895e27 DBO |
1950 | * stop_kthread - stop a workload thread |
1951 | */ | |
1952 | static void stop_kthread(unsigned int cpu) | |
1953 | { | |
1954 | struct task_struct *kthread; | |
1955 | ||
b484a02c | 1956 | kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL); |
30838fcd | 1957 | if (kthread) { |
177e1cc2 SR |
1958 | if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) && |
1959 | !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) { | |
e88ed227 | 1960 | kthread_stop(kthread); |
177e1cc2 | 1961 | } else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) { |
e88ed227 DBO |
1962 | /* |
1963 | * This is a user thread waiting on the timerlat_fd. We need | |
1964 | * to close all users, and the best way to guarantee this is | |
1965 | * by killing the thread. NOTE: this is a purpose specific file. | |
1966 | */ | |
1967 | kill_pid(kthread->thread_pid, SIGKILL, 1); | |
1968 | put_task_struct(kthread); | |
1969 | } | |
30838fcd | 1970 | } else { |
e88ed227 | 1971 | /* if no workload, just return */ |
30838fcd | 1972 | if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { |
e88ed227 DBO |
1973 | /* |
1974 | * This is set in the osnoise tracer case. | |
1975 | */ | |
30838fcd DBO |
1976 | per_cpu(per_cpu_osnoise_var, cpu).sampling = false; |
1977 | barrier(); | |
30838fcd DBO |
1978 | } |
1979 | } | |
c8895e27 DBO |
1980 | } |
1981 | ||
1982 | /* | |
1983 | * stop_per_cpu_kthread - Stop per-cpu threads | |
bce29ac9 DBO |
1984 | * |
1985 | * Stop the osnoise sampling htread. Use this on unload and at system | |
1986 | * shutdown. | |
1987 | */ | |
1988 | static void stop_per_cpu_kthreads(void) | |
1989 | { | |
bce29ac9 DBO |
1990 | int cpu; |
1991 | ||
b484a02c WL |
1992 | cpus_read_lock(); |
1993 | ||
1994 | for_each_online_cpu(cpu) | |
c8895e27 | 1995 | stop_kthread(cpu); |
b484a02c WL |
1996 | |
1997 | cpus_read_unlock(); | |
c8895e27 DBO |
1998 | } |
1999 | ||
2000 | /* | |
2001 | * start_kthread - Start a workload tread | |
2002 | */ | |
2003 | static int start_kthread(unsigned int cpu) | |
2004 | { | |
2005 | struct task_struct *kthread; | |
2006 | void *main = osnoise_main; | |
2007 | char comm[24]; | |
2008 | ||
0bb0a5c1 WL |
2009 | /* Do not start a new thread if it is already running */ |
2010 | if (per_cpu(per_cpu_osnoise_var, cpu).kthread) | |
2011 | return 0; | |
2012 | ||
ccb67544 | 2013 | if (timerlat_enabled()) { |
c8895e27 DBO |
2014 | snprintf(comm, 24, "timerlat/%d", cpu); |
2015 | main = timerlat_main; | |
2016 | } else { | |
30838fcd DBO |
2017 | /* if no workload, just return */ |
2018 | if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { | |
2019 | per_cpu(per_cpu_osnoise_var, cpu).sampling = true; | |
2020 | barrier(); | |
2021 | return 0; | |
2022 | } | |
c8895e27 | 2023 | snprintf(comm, 24, "osnoise/%d", cpu); |
bce29ac9 | 2024 | } |
ccb67544 | 2025 | |
11e4e352 | 2026 | kthread = kthread_run_on_cpu(main, NULL, cpu, comm); |
c8895e27 DBO |
2027 | |
2028 | if (IS_ERR(kthread)) { | |
2029 | pr_err(BANNER "could not start sampling thread\n"); | |
2030 | stop_per_cpu_kthreads(); | |
2031 | return -ENOMEM; | |
2032 | } | |
2033 | ||
2034 | per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; | |
177e1cc2 | 2035 | cpumask_set_cpu(cpu, &kthread_cpumask); |
c8895e27 DBO |
2036 | |
2037 | return 0; | |
bce29ac9 DBO |
2038 | } |
2039 | ||
2040 | /* | |
2041 | * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads | |
2042 | * | |
2043 | * This starts the kernel thread that will look for osnoise on many | |
2044 | * cpus. | |
2045 | */ | |
15ca4bdb | 2046 | static int start_per_cpu_kthreads(void) |
bce29ac9 DBO |
2047 | { |
2048 | struct cpumask *current_mask = &save_cpumask; | |
4b6b08f2 | 2049 | int retval = 0; |
bce29ac9 DBO |
2050 | int cpu; |
2051 | ||
e88ed227 DBO |
2052 | if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { |
2053 | if (timerlat_enabled()) | |
2054 | return 0; | |
2055 | } | |
2056 | ||
99c37d1a | 2057 | cpus_read_lock(); |
bce29ac9 | 2058 | /* |
66df27f1 | 2059 | * Run only on online CPUs in which osnoise is allowed to run. |
bce29ac9 | 2060 | */ |
66df27f1 | 2061 | cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); |
bce29ac9 | 2062 | |
177e1cc2 SR |
2063 | for_each_possible_cpu(cpu) { |
2064 | if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) { | |
2065 | struct task_struct *kthread; | |
2066 | ||
0bb0a5c1 | 2067 | kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL); |
177e1cc2 SR |
2068 | if (!WARN_ON(!kthread)) |
2069 | kthread_stop(kthread); | |
2070 | } | |
177e1cc2 | 2071 | } |
bce29ac9 DBO |
2072 | |
2073 | for_each_cpu(cpu, current_mask) { | |
c8895e27 DBO |
2074 | retval = start_kthread(cpu); |
2075 | if (retval) { | |
99ee9317 | 2076 | cpus_read_unlock(); |
bce29ac9 | 2077 | stop_per_cpu_kthreads(); |
99ee9317 | 2078 | return retval; |
bce29ac9 | 2079 | } |
bce29ac9 DBO |
2080 | } |
2081 | ||
99c37d1a | 2082 | cpus_read_unlock(); |
c8895e27 | 2083 | |
4b6b08f2 | 2084 | return retval; |
bce29ac9 DBO |
2085 | } |
2086 | ||
c8895e27 DBO |
2087 | #ifdef CONFIG_HOTPLUG_CPU |
2088 | static void osnoise_hotplug_workfn(struct work_struct *dummy) | |
2089 | { | |
c8895e27 DBO |
2090 | unsigned int cpu = smp_processor_id(); |
2091 | ||
c8895e27 DBO |
2092 | mutex_lock(&trace_types_lock); |
2093 | ||
dae18134 | 2094 | if (!osnoise_has_registered_instances()) |
c8895e27 DBO |
2095 | goto out_unlock_trace; |
2096 | ||
2097 | mutex_lock(&interface_lock); | |
99c37d1a | 2098 | cpus_read_lock(); |
c8895e27 | 2099 | |
829e0c9f WL |
2100 | if (!cpu_online(cpu)) |
2101 | goto out_unlock; | |
c8895e27 DBO |
2102 | if (!cpumask_test_cpu(cpu, &osnoise_cpumask)) |
2103 | goto out_unlock; | |
2104 | ||
c8895e27 DBO |
2105 | start_kthread(cpu); |
2106 | ||
2107 | out_unlock: | |
99c37d1a | 2108 | cpus_read_unlock(); |
c8895e27 DBO |
2109 | mutex_unlock(&interface_lock); |
2110 | out_unlock_trace: | |
2111 | mutex_unlock(&trace_types_lock); | |
2112 | } | |
2113 | ||
2114 | static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); | |
2115 | ||
2116 | /* | |
2117 | * osnoise_cpu_init - CPU hotplug online callback function | |
2118 | */ | |
2119 | static int osnoise_cpu_init(unsigned int cpu) | |
2120 | { | |
2121 | schedule_work_on(cpu, &osnoise_hotplug_work); | |
2122 | return 0; | |
2123 | } | |
2124 | ||
2125 | /* | |
2126 | * osnoise_cpu_die - CPU hotplug offline callback function | |
2127 | */ | |
2128 | static int osnoise_cpu_die(unsigned int cpu) | |
2129 | { | |
2130 | stop_kthread(cpu); | |
2131 | return 0; | |
2132 | } | |
2133 | ||
2134 | static void osnoise_init_hotplug_support(void) | |
2135 | { | |
2136 | int ret; | |
2137 | ||
2138 | ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online", | |
2139 | osnoise_cpu_init, osnoise_cpu_die); | |
2140 | if (ret < 0) | |
2141 | pr_warn(BANNER "Error to init cpu hotplug support\n"); | |
2142 | ||
2143 | return; | |
2144 | } | |
2145 | #else /* CONFIG_HOTPLUG_CPU */ | |
2146 | static void osnoise_init_hotplug_support(void) | |
2147 | { | |
498627b4 | 2148 | return; |
c8895e27 DBO |
2149 | } |
2150 | #endif /* CONFIG_HOTPLUG_CPU */ | |
2151 | ||
b179d48b DBO |
2152 | /* |
2153 | * seq file functions for the osnoise/options file. | |
2154 | */ | |
2155 | static void *s_options_start(struct seq_file *s, loff_t *pos) | |
2156 | { | |
2157 | int option = *pos; | |
2158 | ||
2159 | mutex_lock(&interface_lock); | |
2160 | ||
2161 | if (option >= OSN_MAX) | |
2162 | return NULL; | |
2163 | ||
2164 | return pos; | |
2165 | } | |
2166 | ||
2167 | static void *s_options_next(struct seq_file *s, void *v, loff_t *pos) | |
2168 | { | |
2169 | int option = ++(*pos); | |
2170 | ||
2171 | if (option >= OSN_MAX) | |
2172 | return NULL; | |
2173 | ||
2174 | return pos; | |
2175 | } | |
2176 | ||
2177 | static int s_options_show(struct seq_file *s, void *v) | |
2178 | { | |
2179 | loff_t *pos = v; | |
2180 | int option = *pos; | |
2181 | ||
2182 | if (option == OSN_DEFAULTS) { | |
2183 | if (osnoise_options == OSN_DEFAULT_OPTIONS) | |
2184 | seq_printf(s, "%s", osnoise_options_str[option]); | |
2185 | else | |
2186 | seq_printf(s, "NO_%s", osnoise_options_str[option]); | |
2187 | goto out; | |
2188 | } | |
2189 | ||
2190 | if (test_bit(option, &osnoise_options)) | |
2191 | seq_printf(s, "%s", osnoise_options_str[option]); | |
2192 | else | |
2193 | seq_printf(s, "NO_%s", osnoise_options_str[option]); | |
2194 | ||
2195 | out: | |
2196 | if (option != OSN_MAX) | |
2197 | seq_puts(s, " "); | |
2198 | ||
2199 | return 0; | |
2200 | } | |
2201 | ||
2202 | static void s_options_stop(struct seq_file *s, void *v) | |
2203 | { | |
2204 | seq_puts(s, "\n"); | |
2205 | mutex_unlock(&interface_lock); | |
2206 | } | |
2207 | ||
2208 | static const struct seq_operations osnoise_options_seq_ops = { | |
2209 | .start = s_options_start, | |
2210 | .next = s_options_next, | |
2211 | .show = s_options_show, | |
2212 | .stop = s_options_stop | |
2213 | }; | |
2214 | ||
2215 | static int osnoise_options_open(struct inode *inode, struct file *file) | |
2216 | { | |
2217 | return seq_open(file, &osnoise_options_seq_ops); | |
2218 | }; | |
2219 | ||
2220 | /** | |
2221 | * osnoise_options_write - Write function for "options" entry | |
2222 | * @filp: The active open file structure | |
2223 | * @ubuf: The user buffer that contains the value to write | |
2224 | * @cnt: The maximum number of bytes to write to "file" | |
2225 | * @ppos: The current position in @file | |
2226 | * | |
2227 | * Writing the option name sets the option, writing the "NO_" | |
2228 | * prefix in front of the option name disables it. | |
2229 | * | |
2230 | * Writing "DEFAULTS" resets the option values to the default ones. | |
2231 | */ | |
2232 | static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, | |
2233 | size_t cnt, loff_t *ppos) | |
2234 | { | |
2235 | int running, option, enable, retval; | |
2236 | char buf[256], *option_str; | |
2237 | ||
2238 | if (cnt >= 256) | |
2239 | return -EINVAL; | |
2240 | ||
2241 | if (copy_from_user(buf, ubuf, cnt)) | |
2242 | return -EFAULT; | |
2243 | ||
2244 | buf[cnt] = 0; | |
2245 | ||
2246 | if (strncmp(buf, "NO_", 3)) { | |
2247 | option_str = strstrip(buf); | |
2248 | enable = true; | |
2249 | } else { | |
2250 | option_str = strstrip(&buf[3]); | |
2251 | enable = false; | |
2252 | } | |
2253 | ||
2254 | option = match_string(osnoise_options_str, OSN_MAX, option_str); | |
2255 | if (option < 0) | |
2256 | return -EINVAL; | |
2257 | ||
2258 | /* | |
2259 | * trace_types_lock is taken to avoid concurrency on start/stop. | |
2260 | */ | |
2261 | mutex_lock(&trace_types_lock); | |
2262 | running = osnoise_has_registered_instances(); | |
2263 | if (running) | |
2264 | stop_per_cpu_kthreads(); | |
2265 | ||
2266 | mutex_lock(&interface_lock); | |
2267 | /* | |
2268 | * avoid CPU hotplug operations that might read options. | |
2269 | */ | |
2270 | cpus_read_lock(); | |
2271 | ||
2272 | retval = cnt; | |
2273 | ||
2274 | if (enable) { | |
2275 | if (option == OSN_DEFAULTS) | |
2276 | osnoise_options = OSN_DEFAULT_OPTIONS; | |
2277 | else | |
2278 | set_bit(option, &osnoise_options); | |
2279 | } else { | |
2280 | if (option == OSN_DEFAULTS) | |
2281 | retval = -EINVAL; | |
2282 | else | |
2283 | clear_bit(option, &osnoise_options); | |
2284 | } | |
2285 | ||
2286 | cpus_read_unlock(); | |
2287 | mutex_unlock(&interface_lock); | |
2288 | ||
2289 | if (running) | |
2290 | start_per_cpu_kthreads(); | |
2291 | mutex_unlock(&trace_types_lock); | |
2292 | ||
2293 | return retval; | |
2294 | } | |
2295 | ||
bce29ac9 DBO |
2296 | /* |
2297 | * osnoise_cpus_read - Read function for reading the "cpus" file | |
2298 | * @filp: The active open file structure | |
2299 | * @ubuf: The userspace provided buffer to read value into | |
2300 | * @cnt: The maximum number of bytes to read | |
2301 | * @ppos: The current "file" position | |
2302 | * | |
2303 | * Prints the "cpus" output into the user-provided buffer. | |
2304 | */ | |
2305 | static ssize_t | |
2306 | osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, | |
2307 | loff_t *ppos) | |
2308 | { | |
2309 | char *mask_str; | |
2310 | int len; | |
2311 | ||
2312 | mutex_lock(&interface_lock); | |
2313 | ||
2314 | len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1; | |
2315 | mask_str = kmalloc(len, GFP_KERNEL); | |
2316 | if (!mask_str) { | |
2317 | count = -ENOMEM; | |
2318 | goto out_unlock; | |
2319 | } | |
2320 | ||
2321 | len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)); | |
2322 | if (len >= count) { | |
2323 | count = -EINVAL; | |
2324 | goto out_free; | |
2325 | } | |
2326 | ||
2327 | count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); | |
2328 | ||
2329 | out_free: | |
2330 | kfree(mask_str); | |
2331 | out_unlock: | |
2332 | mutex_unlock(&interface_lock); | |
2333 | ||
2334 | return count; | |
2335 | } | |
2336 | ||
bce29ac9 DBO |
2337 | /* |
2338 | * osnoise_cpus_write - Write function for "cpus" entry | |
2339 | * @filp: The active open file structure | |
2340 | * @ubuf: The user buffer that contains the value to write | |
2341 | * @cnt: The maximum number of bytes to write to "file" | |
2342 | * @ppos: The current position in @file | |
2343 | * | |
2344 | * This function provides a write implementation for the "cpus" | |
2345 | * interface to the osnoise trace. By default, it lists all CPUs, | |
2346 | * in this way, allowing osnoise threads to run on any online CPU | |
2347 | * of the system. It serves to restrict the execution of osnoise to the | |
66df27f1 DBO |
2348 | * set of CPUs writing via this interface. Why not use "tracing_cpumask"? |
2349 | * Because the user might be interested in tracing what is running on | |
2350 | * other CPUs. For instance, one might run osnoise in one HT CPU | |
2351 | * while observing what is running on the sibling HT CPU. | |
bce29ac9 DBO |
2352 | */ |
2353 | static ssize_t | |
2354 | osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, | |
2355 | loff_t *ppos) | |
2356 | { | |
bce29ac9 DBO |
2357 | cpumask_var_t osnoise_cpumask_new; |
2358 | int running, err; | |
2359 | char buf[256]; | |
2360 | ||
2361 | if (count >= 256) | |
2362 | return -EINVAL; | |
2363 | ||
2364 | if (copy_from_user(buf, ubuf, count)) | |
2365 | return -EFAULT; | |
2366 | ||
2367 | if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL)) | |
2368 | return -ENOMEM; | |
2369 | ||
2370 | err = cpulist_parse(buf, osnoise_cpumask_new); | |
2371 | if (err) | |
2372 | goto err_free; | |
2373 | ||
2374 | /* | |
dae18134 | 2375 | * trace_types_lock is taken to avoid concurrency on start/stop. |
bce29ac9 DBO |
2376 | */ |
2377 | mutex_lock(&trace_types_lock); | |
dae18134 | 2378 | running = osnoise_has_registered_instances(); |
bce29ac9 | 2379 | if (running) |
2bd1bdf0 | 2380 | stop_per_cpu_kthreads(); |
bce29ac9 DBO |
2381 | |
2382 | mutex_lock(&interface_lock); | |
c8895e27 DBO |
2383 | /* |
2384 | * osnoise_cpumask is read by CPU hotplug operations. | |
2385 | */ | |
99c37d1a | 2386 | cpus_read_lock(); |
c8895e27 | 2387 | |
bce29ac9 | 2388 | cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new); |
c8895e27 | 2389 | |
99c37d1a | 2390 | cpus_read_unlock(); |
bce29ac9 DBO |
2391 | mutex_unlock(&interface_lock); |
2392 | ||
2393 | if (running) | |
2bd1bdf0 | 2394 | start_per_cpu_kthreads(); |
bce29ac9 DBO |
2395 | mutex_unlock(&trace_types_lock); |
2396 | ||
2397 | free_cpumask_var(osnoise_cpumask_new); | |
2398 | return count; | |
2399 | ||
2400 | err_free: | |
2401 | free_cpumask_var(osnoise_cpumask_new); | |
2402 | ||
2403 | return err; | |
2404 | } | |
2405 | ||
e88ed227 DBO |
2406 | #ifdef CONFIG_TIMERLAT_TRACER |
2407 | static int timerlat_fd_open(struct inode *inode, struct file *file) | |
2408 | { | |
2409 | struct osnoise_variables *osn_var; | |
2410 | struct timerlat_variables *tlat; | |
2411 | long cpu = (long) inode->i_cdev; | |
2412 | ||
2413 | mutex_lock(&interface_lock); | |
2414 | ||
2415 | /* | |
2416 | * This file is accessible only if timerlat is enabled, and | |
2417 | * NO_OSNOISE_WORKLOAD is set. | |
2418 | */ | |
2419 | if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) { | |
2420 | mutex_unlock(&interface_lock); | |
2421 | return -EINVAL; | |
2422 | } | |
2423 | ||
2424 | migrate_disable(); | |
2425 | ||
2426 | osn_var = this_cpu_osn_var(); | |
2427 | ||
2428 | /* | |
2429 | * The osn_var->pid holds the single access to this file. | |
2430 | */ | |
2431 | if (osn_var->pid) { | |
2432 | mutex_unlock(&interface_lock); | |
2433 | migrate_enable(); | |
2434 | return -EBUSY; | |
2435 | } | |
2436 | ||
2437 | /* | |
2438 | * timerlat tracer is a per-cpu tracer. Check if the user-space too | |
2439 | * is pinned to a single CPU. The tracer laters monitor if the task | |
2440 | * migrates and then disables tracer if it does. However, it is | |
2441 | * worth doing this basic acceptance test to avoid obviusly wrong | |
2442 | * setup. | |
2443 | */ | |
2444 | if (current->nr_cpus_allowed > 1 || cpu != smp_processor_id()) { | |
2445 | mutex_unlock(&interface_lock); | |
2446 | migrate_enable(); | |
2447 | return -EPERM; | |
2448 | } | |
2449 | ||
2450 | /* | |
2451 | * From now on, it is good to go. | |
2452 | */ | |
2453 | file->private_data = inode->i_cdev; | |
2454 | ||
2455 | get_task_struct(current); | |
2456 | ||
2457 | osn_var->kthread = current; | |
2458 | osn_var->pid = current->pid; | |
2459 | ||
2460 | /* | |
2461 | * Setup is done. | |
2462 | */ | |
2463 | mutex_unlock(&interface_lock); | |
2464 | ||
2465 | tlat = this_cpu_tmr_var(); | |
2466 | tlat->count = 0; | |
2467 | ||
1389358b DBO |
2468 | hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); |
2469 | tlat->timer.function = timerlat_irq; | |
2470 | ||
e88ed227 DBO |
2471 | migrate_enable(); |
2472 | return 0; | |
2473 | }; | |
2474 | ||
2475 | /* | |
2476 | * timerlat_fd_read - Read function for "timerlat_fd" file | |
2477 | * @file: The active open file structure | |
2478 | * @ubuf: The userspace provided buffer to read value into | |
2479 | * @cnt: The maximum number of bytes to read | |
2480 | * @ppos: The current "file" position | |
2481 | * | |
2482 | * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error. | |
2483 | */ | |
2484 | static ssize_t | |
2485 | timerlat_fd_read(struct file *file, char __user *ubuf, size_t count, | |
2486 | loff_t *ppos) | |
2487 | { | |
2488 | long cpu = (long) file->private_data; | |
2489 | struct osnoise_variables *osn_var; | |
2490 | struct timerlat_variables *tlat; | |
2491 | struct timerlat_sample s; | |
2492 | s64 diff; | |
2493 | u64 now; | |
2494 | ||
2495 | migrate_disable(); | |
2496 | ||
2497 | tlat = this_cpu_tmr_var(); | |
2498 | ||
2499 | /* | |
2500 | * While in user-space, the thread is migratable. There is nothing | |
2501 | * we can do about it. | |
2502 | * So, if the thread is running on another CPU, stop the machinery. | |
2503 | */ | |
2504 | if (cpu == smp_processor_id()) { | |
2505 | if (tlat->uthread_migrate) { | |
2506 | migrate_enable(); | |
2507 | return -EINVAL; | |
2508 | } | |
2509 | } else { | |
2510 | per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1; | |
2511 | osnoise_taint("timerlat user thread migrate\n"); | |
2512 | osnoise_stop_tracing(); | |
2513 | migrate_enable(); | |
2514 | return -EINVAL; | |
2515 | } | |
2516 | ||
2517 | osn_var = this_cpu_osn_var(); | |
2518 | ||
2519 | /* | |
2520 | * The timerlat in user-space runs in a different order: | |
2521 | * the read() starts from the execution of the previous occurrence, | |
2522 | * sleeping for the next occurrence. | |
2523 | * | |
2524 | * So, skip if we are entering on read() before the first wakeup | |
2525 | * from timerlat IRQ: | |
2526 | */ | |
2527 | if (likely(osn_var->sampling)) { | |
2528 | now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); | |
2529 | diff = now - tlat->abs_period; | |
2530 | ||
2531 | /* | |
2532 | * it was not a timer firing, but some other signal? | |
2533 | */ | |
2534 | if (diff < 0) | |
2535 | goto out; | |
2536 | ||
2537 | s.seqnum = tlat->count; | |
2538 | s.timer_latency = diff; | |
2539 | s.context = THREAD_URET; | |
2540 | ||
2541 | trace_timerlat_sample(&s); | |
2542 | ||
2543 | notify_new_max_latency(diff); | |
2544 | ||
2545 | tlat->tracing_thread = false; | |
2546 | if (osnoise_data.stop_tracing_total) | |
2547 | if (time_to_us(diff) >= osnoise_data.stop_tracing_total) | |
2548 | osnoise_stop_tracing(); | |
2549 | } else { | |
2550 | tlat->tracing_thread = false; | |
2551 | tlat->kthread = current; | |
2552 | ||
e88ed227 DBO |
2553 | /* Annotate now to drift new period */ |
2554 | tlat->abs_period = hrtimer_cb_get_time(&tlat->timer); | |
2555 | ||
2556 | osn_var->sampling = 1; | |
2557 | } | |
2558 | ||
2559 | /* wait for the next period */ | |
2560 | wait_next_period(tlat); | |
2561 | ||
2562 | /* This is the wakeup from this cycle */ | |
2563 | now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer)); | |
2564 | diff = now - tlat->abs_period; | |
2565 | ||
2566 | /* | |
2567 | * it was not a timer firing, but some other signal? | |
2568 | */ | |
2569 | if (diff < 0) | |
2570 | goto out; | |
2571 | ||
2572 | s.seqnum = tlat->count; | |
2573 | s.timer_latency = diff; | |
2574 | s.context = THREAD_CONTEXT; | |
2575 | ||
2576 | trace_timerlat_sample(&s); | |
2577 | ||
2578 | if (osnoise_data.stop_tracing_total) { | |
2579 | if (time_to_us(diff) >= osnoise_data.stop_tracing_total) { | |
2580 | timerlat_dump_stack(time_to_us(diff)); | |
2581 | notify_new_max_latency(diff); | |
2582 | osnoise_stop_tracing(); | |
2583 | } | |
2584 | } | |
2585 | ||
2586 | out: | |
2587 | migrate_enable(); | |
2588 | return 0; | |
2589 | } | |
2590 | ||
2591 | static int timerlat_fd_release(struct inode *inode, struct file *file) | |
2592 | { | |
2593 | struct osnoise_variables *osn_var; | |
2594 | struct timerlat_variables *tlat_var; | |
2595 | long cpu = (long) file->private_data; | |
2596 | ||
2597 | migrate_disable(); | |
2598 | mutex_lock(&interface_lock); | |
2599 | ||
2600 | osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); | |
2601 | tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); | |
2602 | ||
e6a53481 SR |
2603 | if (tlat_var->kthread) |
2604 | hrtimer_cancel(&tlat_var->timer); | |
e88ed227 DBO |
2605 | memset(tlat_var, 0, sizeof(*tlat_var)); |
2606 | ||
2607 | osn_var->sampling = 0; | |
2608 | osn_var->pid = 0; | |
2609 | ||
2610 | /* | |
2611 | * We are leaving, not being stopped... see stop_kthread(); | |
2612 | */ | |
2613 | if (osn_var->kthread) { | |
2614 | put_task_struct(osn_var->kthread); | |
2615 | osn_var->kthread = NULL; | |
2616 | } | |
2617 | ||
2618 | mutex_unlock(&interface_lock); | |
2619 | migrate_enable(); | |
2620 | return 0; | |
2621 | } | |
2622 | #endif | |
2623 | ||
bce29ac9 DBO |
2624 | /* |
2625 | * osnoise/runtime_us: cannot be greater than the period. | |
2626 | */ | |
2627 | static struct trace_min_max_param osnoise_runtime = { | |
2628 | .lock = &interface_lock, | |
2629 | .val = &osnoise_data.sample_runtime, | |
2630 | .max = &osnoise_data.sample_period, | |
2631 | .min = NULL, | |
2632 | }; | |
2633 | ||
2634 | /* | |
2635 | * osnoise/period_us: cannot be smaller than the runtime. | |
2636 | */ | |
2637 | static struct trace_min_max_param osnoise_period = { | |
2638 | .lock = &interface_lock, | |
2639 | .val = &osnoise_data.sample_period, | |
2640 | .max = NULL, | |
2641 | .min = &osnoise_data.sample_runtime, | |
2642 | }; | |
2643 | ||
2644 | /* | |
2645 | * osnoise/stop_tracing_us: no limit. | |
2646 | */ | |
2647 | static struct trace_min_max_param osnoise_stop_tracing_in = { | |
2648 | .lock = &interface_lock, | |
2649 | .val = &osnoise_data.stop_tracing, | |
2650 | .max = NULL, | |
2651 | .min = NULL, | |
2652 | }; | |
2653 | ||
2654 | /* | |
2655 | * osnoise/stop_tracing_total_us: no limit. | |
2656 | */ | |
2657 | static struct trace_min_max_param osnoise_stop_tracing_total = { | |
2658 | .lock = &interface_lock, | |
2659 | .val = &osnoise_data.stop_tracing_total, | |
2660 | .max = NULL, | |
2661 | .min = NULL, | |
2662 | }; | |
2663 | ||
a955d7ea DBO |
2664 | #ifdef CONFIG_TIMERLAT_TRACER |
2665 | /* | |
2666 | * osnoise/print_stack: print the stacktrace of the IRQ handler if the total | |
2667 | * latency is higher than val. | |
2668 | */ | |
2669 | static struct trace_min_max_param osnoise_print_stack = { | |
2670 | .lock = &interface_lock, | |
2671 | .val = &osnoise_data.print_stack, | |
2672 | .max = NULL, | |
2673 | .min = NULL, | |
2674 | }; | |
2675 | ||
2676 | /* | |
2677 | * osnoise/timerlat_period: min 100 us, max 1 s | |
2678 | */ | |
7a025e06 TR |
2679 | static u64 timerlat_min_period = 100; |
2680 | static u64 timerlat_max_period = 1000000; | |
a955d7ea DBO |
2681 | static struct trace_min_max_param timerlat_period = { |
2682 | .lock = &interface_lock, | |
2683 | .val = &osnoise_data.timerlat_period, | |
2684 | .max = &timerlat_max_period, | |
2685 | .min = &timerlat_min_period, | |
2686 | }; | |
e88ed227 DBO |
2687 | |
2688 | static const struct file_operations timerlat_fd_fops = { | |
2689 | .open = timerlat_fd_open, | |
2690 | .read = timerlat_fd_read, | |
2691 | .release = timerlat_fd_release, | |
2692 | .llseek = generic_file_llseek, | |
2693 | }; | |
a955d7ea DBO |
2694 | #endif |
2695 | ||
bce29ac9 DBO |
2696 | static const struct file_operations cpus_fops = { |
2697 | .open = tracing_open_generic, | |
2698 | .read = osnoise_cpus_read, | |
2699 | .write = osnoise_cpus_write, | |
2700 | .llseek = generic_file_llseek, | |
2701 | }; | |
2702 | ||
b179d48b DBO |
2703 | static const struct file_operations osnoise_options_fops = { |
2704 | .open = osnoise_options_open, | |
2705 | .read = seq_read, | |
2706 | .llseek = seq_lseek, | |
2707 | .release = seq_release, | |
2708 | .write = osnoise_options_write | |
2709 | }; | |
2710 | ||
ccb67544 | 2711 | #ifdef CONFIG_TIMERLAT_TRACER |
b14f4568 DBO |
2712 | #ifdef CONFIG_STACKTRACE |
2713 | static int init_timerlat_stack_tracefs(struct dentry *top_dir) | |
ccb67544 DBO |
2714 | { |
2715 | struct dentry *tmp; | |
2716 | ||
ccb67544 DBO |
2717 | tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, |
2718 | &osnoise_print_stack, &trace_min_max_fops); | |
2719 | if (!tmp) | |
2720 | return -ENOMEM; | |
b14f4568 DBO |
2721 | |
2722 | return 0; | |
2723 | } | |
2724 | #else /* CONFIG_STACKTRACE */ | |
2725 | static int init_timerlat_stack_tracefs(struct dentry *top_dir) | |
2726 | { | |
2727 | return 0; | |
2728 | } | |
2729 | #endif /* CONFIG_STACKTRACE */ | |
2730 | ||
e88ed227 DBO |
2731 | static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir) |
2732 | { | |
2733 | struct dentry *timerlat_fd; | |
2734 | struct dentry *per_cpu; | |
2735 | struct dentry *cpu_dir; | |
2736 | char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */ | |
2737 | long cpu; | |
2738 | ||
2739 | /* | |
2740 | * Why not using tracing instance per_cpu/ dir? | |
2741 | * | |
2742 | * Because osnoise/timerlat have a single workload, having | |
2743 | * multiple files like these are wast of memory. | |
2744 | */ | |
2745 | per_cpu = tracefs_create_dir("per_cpu", top_dir); | |
2746 | if (!per_cpu) | |
2747 | return -ENOMEM; | |
2748 | ||
2749 | for_each_possible_cpu(cpu) { | |
2750 | snprintf(cpu_str, 30, "cpu%ld", cpu); | |
2751 | cpu_dir = tracefs_create_dir(cpu_str, per_cpu); | |
2752 | if (!cpu_dir) | |
2753 | goto out_clean; | |
2754 | ||
2755 | timerlat_fd = trace_create_file("timerlat_fd", TRACE_MODE_READ, | |
2756 | cpu_dir, NULL, &timerlat_fd_fops); | |
2757 | if (!timerlat_fd) | |
2758 | goto out_clean; | |
2759 | ||
2760 | /* Record the CPU */ | |
2761 | d_inode(timerlat_fd)->i_cdev = (void *)(cpu); | |
2762 | } | |
2763 | ||
2764 | return 0; | |
2765 | ||
2766 | out_clean: | |
2767 | tracefs_remove(per_cpu); | |
2768 | return -ENOMEM; | |
2769 | } | |
2770 | ||
b14f4568 DBO |
2771 | /* |
2772 | * init_timerlat_tracefs - A function to initialize the timerlat interface files | |
2773 | */ | |
2774 | static int init_timerlat_tracefs(struct dentry *top_dir) | |
2775 | { | |
2776 | struct dentry *tmp; | |
e88ed227 | 2777 | int retval; |
ccb67544 DBO |
2778 | |
2779 | tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, | |
2780 | &timerlat_period, &trace_min_max_fops); | |
2781 | if (!tmp) | |
2782 | return -ENOMEM; | |
2783 | ||
e88ed227 DBO |
2784 | retval = osnoise_create_cpu_timerlat_fd(top_dir); |
2785 | if (retval) | |
2786 | return retval; | |
2787 | ||
b14f4568 | 2788 | return init_timerlat_stack_tracefs(top_dir); |
ccb67544 DBO |
2789 | } |
2790 | #else /* CONFIG_TIMERLAT_TRACER */ | |
2791 | static int init_timerlat_tracefs(struct dentry *top_dir) | |
2792 | { | |
2793 | return 0; | |
2794 | } | |
2795 | #endif /* CONFIG_TIMERLAT_TRACER */ | |
2796 | ||
bce29ac9 DBO |
2797 | /* |
2798 | * init_tracefs - A function to initialize the tracefs interface files | |
2799 | * | |
a955d7ea DBO |
2800 | * This function creates entries in tracefs for "osnoise" and "timerlat". |
2801 | * It creates these directories in the tracing directory, and within that | |
2802 | * directory the use can change and view the configs. | |
bce29ac9 DBO |
2803 | */ |
2804 | static int init_tracefs(void) | |
2805 | { | |
2806 | struct dentry *top_dir; | |
2807 | struct dentry *tmp; | |
2808 | int ret; | |
2809 | ||
2810 | ret = tracing_init_dentry(); | |
2811 | if (ret) | |
2812 | return -ENOMEM; | |
2813 | ||
2814 | top_dir = tracefs_create_dir("osnoise", NULL); | |
2815 | if (!top_dir) | |
a955d7ea | 2816 | return 0; |
bce29ac9 | 2817 | |
21ccc9cd | 2818 | tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir, |
bce29ac9 DBO |
2819 | &osnoise_period, &trace_min_max_fops); |
2820 | if (!tmp) | |
2821 | goto err; | |
2822 | ||
21ccc9cd | 2823 | tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir, |
bce29ac9 DBO |
2824 | &osnoise_runtime, &trace_min_max_fops); |
2825 | if (!tmp) | |
2826 | goto err; | |
2827 | ||
21ccc9cd | 2828 | tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir, |
bce29ac9 DBO |
2829 | &osnoise_stop_tracing_in, &trace_min_max_fops); |
2830 | if (!tmp) | |
2831 | goto err; | |
2832 | ||
21ccc9cd | 2833 | tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir, |
bce29ac9 DBO |
2834 | &osnoise_stop_tracing_total, &trace_min_max_fops); |
2835 | if (!tmp) | |
2836 | goto err; | |
2837 | ||
21ccc9cd | 2838 | tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); |
bce29ac9 DBO |
2839 | if (!tmp) |
2840 | goto err; | |
a955d7ea | 2841 | |
b179d48b DBO |
2842 | tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL, |
2843 | &osnoise_options_fops); | |
2844 | if (!tmp) | |
2845 | goto err; | |
2846 | ||
ccb67544 DBO |
2847 | ret = init_timerlat_tracefs(top_dir); |
2848 | if (ret) | |
a955d7ea | 2849 | goto err; |
bce29ac9 DBO |
2850 | |
2851 | return 0; | |
2852 | ||
2853 | err: | |
2854 | tracefs_remove(top_dir); | |
2855 | return -ENOMEM; | |
2856 | } | |
2857 | ||
2858 | static int osnoise_hook_events(void) | |
2859 | { | |
2860 | int retval; | |
2861 | ||
2862 | /* | |
2863 | * Trace is already hooked, we are re-enabling from | |
2864 | * a stop_tracing_*. | |
2865 | */ | |
2866 | if (trace_osnoise_callback_enabled) | |
2867 | return 0; | |
2868 | ||
2869 | retval = hook_irq_events(); | |
2870 | if (retval) | |
2871 | return -EINVAL; | |
2872 | ||
2873 | retval = hook_softirq_events(); | |
2874 | if (retval) | |
2875 | goto out_unhook_irq; | |
2876 | ||
2877 | retval = hook_thread_events(); | |
2878 | /* | |
2879 | * All fine! | |
2880 | */ | |
2881 | if (!retval) | |
2882 | return 0; | |
2883 | ||
2884 | unhook_softirq_events(); | |
2885 | out_unhook_irq: | |
2886 | unhook_irq_events(); | |
2887 | return -EINVAL; | |
2888 | } | |
2889 | ||
0878355b NY |
2890 | static void osnoise_unhook_events(void) |
2891 | { | |
2892 | unhook_thread_events(); | |
2893 | unhook_softirq_events(); | |
2894 | unhook_irq_events(); | |
2895 | } | |
2896 | ||
15ca4bdb DBO |
2897 | /* |
2898 | * osnoise_workload_start - start the workload and hook to events | |
2899 | */ | |
2900 | static int osnoise_workload_start(void) | |
bce29ac9 DBO |
2901 | { |
2902 | int retval; | |
2903 | ||
2fac8d64 DBO |
2904 | /* |
2905 | * Instances need to be registered after calling workload | |
2906 | * start. Hence, if there is already an instance, the | |
2907 | * workload was already registered. Otherwise, this | |
2908 | * code is on the way to register the first instance, | |
2909 | * and the workload will start. | |
2910 | */ | |
2911 | if (osnoise_has_registered_instances()) | |
2912 | return 0; | |
2913 | ||
bce29ac9 DBO |
2914 | osn_var_reset_all(); |
2915 | ||
2916 | retval = osnoise_hook_events(); | |
2917 | if (retval) | |
a955d7ea | 2918 | return retval; |
c3b6343c | 2919 | |
bce29ac9 | 2920 | /* |
c3b6343c DBO |
2921 | * Make sure that ftrace_nmi_enter/exit() see reset values |
2922 | * before enabling trace_osnoise_callback_enabled. | |
bce29ac9 DBO |
2923 | */ |
2924 | barrier(); | |
2925 | trace_osnoise_callback_enabled = true; | |
2926 | ||
15ca4bdb | 2927 | retval = start_per_cpu_kthreads(); |
a955d7ea | 2928 | if (retval) { |
0878355b NY |
2929 | trace_osnoise_callback_enabled = false; |
2930 | /* | |
2931 | * Make sure that ftrace_nmi_enter/exit() see | |
2932 | * trace_osnoise_callback_enabled as false before continuing. | |
2933 | */ | |
2934 | barrier(); | |
2935 | ||
2936 | osnoise_unhook_events(); | |
a955d7ea DBO |
2937 | return retval; |
2938 | } | |
2939 | ||
a955d7ea DBO |
2940 | return 0; |
2941 | } | |
2942 | ||
15ca4bdb DBO |
2943 | /* |
2944 | * osnoise_workload_stop - stop the workload and unhook the events | |
2945 | */ | |
2946 | static void osnoise_workload_stop(void) | |
bce29ac9 | 2947 | { |
2fac8d64 DBO |
2948 | /* |
2949 | * Instances need to be unregistered before calling | |
2950 | * stop. Hence, if there is a registered instance, more | |
2951 | * than one instance is running, and the workload will not | |
2952 | * yet stop. Otherwise, this code is on the way to disable | |
2953 | * the last instance, and the workload can stop. | |
2954 | */ | |
dae18134 | 2955 | if (osnoise_has_registered_instances()) |
bce29ac9 DBO |
2956 | return; |
2957 | ||
f0cfe17b DBO |
2958 | /* |
2959 | * If callbacks were already disabled in a previous stop | |
2960 | * call, there is no need to disable then again. | |
2961 | * | |
2962 | * For instance, this happens when tracing is stopped via: | |
2963 | * echo 0 > tracing_on | |
2964 | * echo nop > current_tracer. | |
2965 | */ | |
2966 | if (!trace_osnoise_callback_enabled) | |
2967 | return; | |
2968 | ||
bce29ac9 | 2969 | trace_osnoise_callback_enabled = false; |
c3b6343c DBO |
2970 | /* |
2971 | * Make sure that ftrace_nmi_enter/exit() see | |
2972 | * trace_osnoise_callback_enabled as false before continuing. | |
2973 | */ | |
bce29ac9 DBO |
2974 | barrier(); |
2975 | ||
2976 | stop_per_cpu_kthreads(); | |
2977 | ||
0878355b | 2978 | osnoise_unhook_events(); |
bce29ac9 DBO |
2979 | } |
2980 | ||
15ca4bdb DBO |
2981 | static void osnoise_tracer_start(struct trace_array *tr) |
2982 | { | |
2983 | int retval; | |
2984 | ||
2fac8d64 DBO |
2985 | /* |
2986 | * If the instance is already registered, there is no need to | |
2987 | * register it again. | |
2988 | */ | |
2989 | if (osnoise_instance_registered(tr)) | |
15ca4bdb DBO |
2990 | return; |
2991 | ||
2992 | retval = osnoise_workload_start(); | |
2993 | if (retval) | |
2994 | pr_err(BANNER "Error starting osnoise tracer\n"); | |
2995 | ||
dae18134 | 2996 | osnoise_register_instance(tr); |
15ca4bdb DBO |
2997 | } |
2998 | ||
2999 | static void osnoise_tracer_stop(struct trace_array *tr) | |
3000 | { | |
dae18134 | 3001 | osnoise_unregister_instance(tr); |
15ca4bdb DBO |
3002 | osnoise_workload_stop(); |
3003 | } | |
3004 | ||
bce29ac9 DBO |
3005 | static int osnoise_tracer_init(struct trace_array *tr) |
3006 | { | |
2fac8d64 DBO |
3007 | /* |
3008 | * Only allow osnoise tracer if timerlat tracer is not running | |
3009 | * already. | |
3010 | */ | |
3011 | if (timerlat_enabled()) | |
bce29ac9 DBO |
3012 | return -EBUSY; |
3013 | ||
bce29ac9 DBO |
3014 | tr->max_latency = 0; |
3015 | ||
3016 | osnoise_tracer_start(tr); | |
bce29ac9 DBO |
3017 | return 0; |
3018 | } | |
3019 | ||
3020 | static void osnoise_tracer_reset(struct trace_array *tr) | |
3021 | { | |
3022 | osnoise_tracer_stop(tr); | |
3023 | } | |
3024 | ||
3025 | static struct tracer osnoise_tracer __read_mostly = { | |
3026 | .name = "osnoise", | |
3027 | .init = osnoise_tracer_init, | |
3028 | .reset = osnoise_tracer_reset, | |
3029 | .start = osnoise_tracer_start, | |
3030 | .stop = osnoise_tracer_stop, | |
3031 | .print_header = print_osnoise_headers, | |
3032 | .allow_instances = true, | |
3033 | }; | |
3034 | ||
a955d7ea DBO |
3035 | #ifdef CONFIG_TIMERLAT_TRACER |
3036 | static void timerlat_tracer_start(struct trace_array *tr) | |
3037 | { | |
3038 | int retval; | |
3039 | ||
2fac8d64 DBO |
3040 | /* |
3041 | * If the instance is already registered, there is no need to | |
3042 | * register it again. | |
3043 | */ | |
3044 | if (osnoise_instance_registered(tr)) | |
a955d7ea DBO |
3045 | return; |
3046 | ||
15ca4bdb | 3047 | retval = osnoise_workload_start(); |
a955d7ea | 3048 | if (retval) |
2fac8d64 | 3049 | pr_err(BANNER "Error starting timerlat tracer\n"); |
a955d7ea | 3050 | |
dae18134 DBO |
3051 | osnoise_register_instance(tr); |
3052 | ||
a955d7ea | 3053 | return; |
a955d7ea DBO |
3054 | } |
3055 | ||
3056 | static void timerlat_tracer_stop(struct trace_array *tr) | |
3057 | { | |
3058 | int cpu; | |
3059 | ||
2fac8d64 | 3060 | osnoise_unregister_instance(tr); |
a955d7ea | 3061 | |
2fac8d64 DBO |
3062 | /* |
3063 | * Instruct the threads to stop only if this is the last instance. | |
3064 | */ | |
3065 | if (!osnoise_has_registered_instances()) { | |
3066 | for_each_online_cpu(cpu) | |
3067 | per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; | |
3068 | } | |
a955d7ea | 3069 | |
2fac8d64 | 3070 | osnoise_workload_stop(); |
a955d7ea DBO |
3071 | } |
3072 | ||
3073 | static int timerlat_tracer_init(struct trace_array *tr) | |
3074 | { | |
2fac8d64 DBO |
3075 | /* |
3076 | * Only allow timerlat tracer if osnoise tracer is not running already. | |
3077 | */ | |
3078 | if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer) | |
a955d7ea DBO |
3079 | return -EBUSY; |
3080 | ||
2fac8d64 DBO |
3081 | /* |
3082 | * If this is the first instance, set timerlat_tracer to block | |
3083 | * osnoise tracer start. | |
3084 | */ | |
3085 | if (!osnoise_has_registered_instances()) | |
3086 | osnoise_data.timerlat_tracer = 1; | |
a955d7ea | 3087 | |
2fac8d64 | 3088 | tr->max_latency = 0; |
a955d7ea DBO |
3089 | timerlat_tracer_start(tr); |
3090 | ||
3091 | return 0; | |
3092 | } | |
3093 | ||
3094 | static void timerlat_tracer_reset(struct trace_array *tr) | |
3095 | { | |
3096 | timerlat_tracer_stop(tr); | |
2fac8d64 DBO |
3097 | |
3098 | /* | |
3099 | * If this is the last instance, reset timerlat_tracer allowing | |
3100 | * osnoise to be started. | |
3101 | */ | |
3102 | if (!osnoise_has_registered_instances()) | |
3103 | osnoise_data.timerlat_tracer = 0; | |
a955d7ea DBO |
3104 | } |
3105 | ||
3106 | static struct tracer timerlat_tracer __read_mostly = { | |
3107 | .name = "timerlat", | |
3108 | .init = timerlat_tracer_init, | |
3109 | .reset = timerlat_tracer_reset, | |
3110 | .start = timerlat_tracer_start, | |
3111 | .stop = timerlat_tracer_stop, | |
3112 | .print_header = print_timerlat_headers, | |
3113 | .allow_instances = true, | |
3114 | }; | |
ccb67544 DBO |
3115 | |
3116 | __init static int init_timerlat_tracer(void) | |
3117 | { | |
3118 | return register_tracer(&timerlat_tracer); | |
3119 | } | |
3120 | #else /* CONFIG_TIMERLAT_TRACER */ | |
3121 | __init static int init_timerlat_tracer(void) | |
3122 | { | |
3123 | return 0; | |
3124 | } | |
a955d7ea DBO |
3125 | #endif /* CONFIG_TIMERLAT_TRACER */ |
3126 | ||
bce29ac9 DBO |
3127 | __init static int init_osnoise_tracer(void) |
3128 | { | |
3129 | int ret; | |
3130 | ||
3131 | mutex_init(&interface_lock); | |
3132 | ||
3133 | cpumask_copy(&osnoise_cpumask, cpu_all_mask); | |
3134 | ||
3135 | ret = register_tracer(&osnoise_tracer); | |
a955d7ea DBO |
3136 | if (ret) { |
3137 | pr_err(BANNER "Error registering osnoise!\n"); | |
bce29ac9 | 3138 | return ret; |
a955d7ea DBO |
3139 | } |
3140 | ||
ccb67544 | 3141 | ret = init_timerlat_tracer(); |
a955d7ea | 3142 | if (ret) { |
ccb67544 | 3143 | pr_err(BANNER "Error registering timerlat!\n"); |
a955d7ea DBO |
3144 | return ret; |
3145 | } | |
ccb67544 | 3146 | |
c8895e27 | 3147 | osnoise_init_hotplug_support(); |
bce29ac9 | 3148 | |
dae18134 DBO |
3149 | INIT_LIST_HEAD_RCU(&osnoise_instances); |
3150 | ||
bce29ac9 DBO |
3151 | init_tracefs(); |
3152 | ||
3153 | return 0; | |
3154 | } | |
3155 | late_initcall(init_osnoise_tracer); |