Commit | Line | Data |
---|---|---|
6954e415 SRV |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright (C) 2021 VMware Inc, Steven Rostedt <rostedt@goodmis.org> | |
4 | */ | |
8d6e9098 SRV |
5 | #include <linux/spinlock.h> |
6 | #include <linux/irq_work.h> | |
6954e415 SRV |
7 | #include <linux/slab.h> |
8 | #include "trace.h" | |
9 | ||
8d6e9098 SRV |
10 | /* See pid_list.h for details */ |
11 | ||
12 | static inline union lower_chunk *get_lower_chunk(struct trace_pid_list *pid_list) | |
13 | { | |
14 | union lower_chunk *chunk; | |
15 | ||
16 | lockdep_assert_held(&pid_list->lock); | |
17 | ||
18 | if (!pid_list->lower_list) | |
19 | return NULL; | |
20 | ||
21 | chunk = pid_list->lower_list; | |
22 | pid_list->lower_list = chunk->next; | |
23 | pid_list->free_lower_chunks--; | |
24 | WARN_ON_ONCE(pid_list->free_lower_chunks < 0); | |
25 | chunk->next = NULL; | |
26 | /* | |
27 | * If a refill needs to happen, it can not happen here | |
28 | * as the scheduler run queue locks are held. | |
29 | */ | |
30 | if (pid_list->free_lower_chunks <= CHUNK_REALLOC) | |
31 | irq_work_queue(&pid_list->refill_irqwork); | |
32 | ||
33 | return chunk; | |
34 | } | |
35 | ||
36 | static inline union upper_chunk *get_upper_chunk(struct trace_pid_list *pid_list) | |
37 | { | |
38 | union upper_chunk *chunk; | |
39 | ||
40 | lockdep_assert_held(&pid_list->lock); | |
41 | ||
42 | if (!pid_list->upper_list) | |
43 | return NULL; | |
44 | ||
45 | chunk = pid_list->upper_list; | |
46 | pid_list->upper_list = chunk->next; | |
47 | pid_list->free_upper_chunks--; | |
48 | WARN_ON_ONCE(pid_list->free_upper_chunks < 0); | |
49 | chunk->next = NULL; | |
50 | /* | |
51 | * If a refill needs to happen, it can not happen here | |
52 | * as the scheduler run queue locks are held. | |
53 | */ | |
54 | if (pid_list->free_upper_chunks <= CHUNK_REALLOC) | |
55 | irq_work_queue(&pid_list->refill_irqwork); | |
56 | ||
57 | return chunk; | |
58 | } | |
59 | ||
60 | static inline void put_lower_chunk(struct trace_pid_list *pid_list, | |
61 | union lower_chunk *chunk) | |
62 | { | |
63 | lockdep_assert_held(&pid_list->lock); | |
64 | ||
65 | chunk->next = pid_list->lower_list; | |
66 | pid_list->lower_list = chunk; | |
67 | pid_list->free_lower_chunks++; | |
68 | } | |
69 | ||
70 | static inline void put_upper_chunk(struct trace_pid_list *pid_list, | |
71 | union upper_chunk *chunk) | |
72 | { | |
73 | lockdep_assert_held(&pid_list->lock); | |
74 | ||
75 | chunk->next = pid_list->upper_list; | |
76 | pid_list->upper_list = chunk; | |
77 | pid_list->free_upper_chunks++; | |
78 | } | |
79 | ||
80 | static inline bool upper_empty(union upper_chunk *chunk) | |
81 | { | |
82 | /* | |
83 | * If chunk->data has no lower chunks, it will be the same | |
84 | * as a zeroed bitmask. Use find_first_bit() to test it | |
85 | * and if it doesn't find any bits set, then the array | |
86 | * is empty. | |
87 | */ | |
88 | int bit = find_first_bit((unsigned long *)chunk->data, | |
89 | sizeof(chunk->data) * 8); | |
90 | return bit >= sizeof(chunk->data) * 8; | |
91 | } | |
92 | ||
93 | static inline int pid_split(unsigned int pid, unsigned int *upper1, | |
94 | unsigned int *upper2, unsigned int *lower) | |
95 | { | |
96 | /* MAX_PID should cover all pids */ | |
97 | BUILD_BUG_ON(MAX_PID < PID_MAX_LIMIT); | |
98 | ||
99 | /* In case a bad pid is passed in, then fail */ | |
100 | if (unlikely(pid >= MAX_PID)) | |
101 | return -1; | |
102 | ||
103 | *upper1 = (pid >> UPPER1_SHIFT) & UPPER_MASK; | |
104 | *upper2 = (pid >> UPPER2_SHIFT) & UPPER_MASK; | |
105 | *lower = pid & LOWER_MASK; | |
106 | ||
107 | return 0; | |
108 | } | |
109 | ||
110 | static inline unsigned int pid_join(unsigned int upper1, | |
111 | unsigned int upper2, unsigned int lower) | |
112 | { | |
113 | return ((upper1 & UPPER_MASK) << UPPER1_SHIFT) | | |
114 | ((upper2 & UPPER_MASK) << UPPER2_SHIFT) | | |
115 | (lower & LOWER_MASK); | |
116 | } | |
117 | ||
6954e415 SRV |
118 | /** |
119 | * trace_pid_list_is_set - test if the pid is set in the list | |
120 | * @pid_list: The pid list to test | |
121 | * @pid: The pid to to see if set in the list. | |
122 | * | |
123 | * Tests if @pid is is set in the @pid_list. This is usually called | |
124 | * from the scheduler when a task is scheduled. Its pid is checked | |
125 | * if it should be traced or not. | |
126 | * | |
127 | * Return true if the pid is in the list, false otherwise. | |
128 | */ | |
129 | bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid) | |
130 | { | |
8d6e9098 SRV |
131 | union upper_chunk *upper_chunk; |
132 | union lower_chunk *lower_chunk; | |
133 | unsigned long flags; | |
134 | unsigned int upper1; | |
135 | unsigned int upper2; | |
136 | unsigned int lower; | |
137 | bool ret = false; | |
138 | ||
139 | if (!pid_list) | |
6954e415 SRV |
140 | return false; |
141 | ||
8d6e9098 SRV |
142 | if (pid_split(pid, &upper1, &upper2, &lower) < 0) |
143 | return false; | |
144 | ||
145 | raw_spin_lock_irqsave(&pid_list->lock, flags); | |
146 | upper_chunk = pid_list->upper[upper1]; | |
147 | if (upper_chunk) { | |
148 | lower_chunk = upper_chunk->data[upper2]; | |
149 | if (lower_chunk) | |
150 | ret = test_bit(lower, lower_chunk->data); | |
151 | } | |
152 | raw_spin_unlock_irqrestore(&pid_list->lock, flags); | |
153 | ||
154 | return ret; | |
6954e415 SRV |
155 | } |
156 | ||
157 | /** | |
158 | * trace_pid_list_set - add a pid to the list | |
159 | * @pid_list: The pid list to add the @pid to. | |
160 | * @pid: The pid to add. | |
161 | * | |
162 | * Adds @pid to @pid_list. This is usually done explicitly by a user | |
163 | * adding a task to be traced, or indirectly by the fork function | |
164 | * when children should be traced and a task's pid is in the list. | |
165 | * | |
166 | * Return 0 on success, negative otherwise. | |
167 | */ | |
168 | int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid) | |
169 | { | |
8d6e9098 SRV |
170 | union upper_chunk *upper_chunk; |
171 | union lower_chunk *lower_chunk; | |
172 | unsigned long flags; | |
173 | unsigned int upper1; | |
174 | unsigned int upper2; | |
175 | unsigned int lower; | |
176 | int ret; | |
6954e415 | 177 | |
8d6e9098 SRV |
178 | if (!pid_list) |
179 | return -ENODEV; | |
6954e415 | 180 | |
8d6e9098 SRV |
181 | if (pid_split(pid, &upper1, &upper2, &lower) < 0) |
182 | return -EINVAL; | |
183 | ||
184 | raw_spin_lock_irqsave(&pid_list->lock, flags); | |
185 | upper_chunk = pid_list->upper[upper1]; | |
186 | if (!upper_chunk) { | |
187 | upper_chunk = get_upper_chunk(pid_list); | |
188 | if (!upper_chunk) { | |
189 | ret = -ENOMEM; | |
190 | goto out; | |
191 | } | |
192 | pid_list->upper[upper1] = upper_chunk; | |
193 | } | |
194 | lower_chunk = upper_chunk->data[upper2]; | |
195 | if (!lower_chunk) { | |
196 | lower_chunk = get_lower_chunk(pid_list); | |
197 | if (!lower_chunk) { | |
198 | ret = -ENOMEM; | |
199 | goto out; | |
200 | } | |
201 | upper_chunk->data[upper2] = lower_chunk; | |
202 | } | |
203 | set_bit(lower, lower_chunk->data); | |
204 | ret = 0; | |
205 | out: | |
206 | raw_spin_unlock_irqrestore(&pid_list->lock, flags); | |
207 | return ret; | |
6954e415 SRV |
208 | } |
209 | ||
210 | /** | |
211 | * trace_pid_list_clear - remove a pid from the list | |
212 | * @pid_list: The pid list to remove the @pid from. | |
213 | * @pid: The pid to remove. | |
214 | * | |
215 | * Removes @pid from @pid_list. This is usually done explicitly by a user | |
216 | * removing tasks from tracing, or indirectly by the exit function | |
217 | * when a task that is set to be traced exits. | |
218 | * | |
219 | * Return 0 on success, negative otherwise. | |
220 | */ | |
221 | int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid) | |
222 | { | |
8d6e9098 SRV |
223 | union upper_chunk *upper_chunk; |
224 | union lower_chunk *lower_chunk; | |
225 | unsigned long flags; | |
226 | unsigned int upper1; | |
227 | unsigned int upper2; | |
228 | unsigned int lower; | |
229 | ||
230 | if (!pid_list) | |
231 | return -ENODEV; | |
232 | ||
233 | if (pid_split(pid, &upper1, &upper2, &lower) < 0) | |
6954e415 SRV |
234 | return -EINVAL; |
235 | ||
8d6e9098 SRV |
236 | raw_spin_lock_irqsave(&pid_list->lock, flags); |
237 | upper_chunk = pid_list->upper[upper1]; | |
238 | if (!upper_chunk) | |
239 | goto out; | |
6954e415 | 240 | |
8d6e9098 SRV |
241 | lower_chunk = upper_chunk->data[upper2]; |
242 | if (!lower_chunk) | |
243 | goto out; | |
244 | ||
245 | clear_bit(lower, lower_chunk->data); | |
246 | ||
247 | /* if there's no more bits set, add it to the free list */ | |
248 | if (find_first_bit(lower_chunk->data, LOWER_MAX) >= LOWER_MAX) { | |
249 | put_lower_chunk(pid_list, lower_chunk); | |
250 | upper_chunk->data[upper2] = NULL; | |
251 | if (upper_empty(upper_chunk)) { | |
252 | put_upper_chunk(pid_list, upper_chunk); | |
253 | pid_list->upper[upper1] = NULL; | |
254 | } | |
255 | } | |
256 | out: | |
257 | raw_spin_unlock_irqrestore(&pid_list->lock, flags); | |
6954e415 SRV |
258 | return 0; |
259 | } | |
260 | ||
261 | /** | |
262 | * trace_pid_list_next - return the next pid in the list | |
263 | * @pid_list: The pid list to examine. | |
264 | * @pid: The pid to start from | |
265 | * @next: The pointer to place the pid that is set starting from @pid. | |
266 | * | |
267 | * Looks for the next consecutive pid that is in @pid_list starting | |
268 | * at the pid specified by @pid. If one is set (including @pid), then | |
269 | * that pid is placed into @next. | |
270 | * | |
271 | * Return 0 when a pid is found, -1 if there are no more pids included. | |
272 | */ | |
273 | int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid, | |
274 | unsigned int *next) | |
275 | { | |
8d6e9098 SRV |
276 | union upper_chunk *upper_chunk; |
277 | union lower_chunk *lower_chunk; | |
278 | unsigned long flags; | |
279 | unsigned int upper1; | |
280 | unsigned int upper2; | |
281 | unsigned int lower; | |
282 | ||
283 | if (!pid_list) | |
284 | return -ENODEV; | |
285 | ||
286 | if (pid_split(pid, &upper1, &upper2, &lower) < 0) | |
287 | return -EINVAL; | |
6954e415 | 288 | |
8d6e9098 SRV |
289 | raw_spin_lock_irqsave(&pid_list->lock, flags); |
290 | for (; upper1 <= UPPER_MASK; upper1++, upper2 = 0) { | |
291 | upper_chunk = pid_list->upper[upper1]; | |
292 | ||
293 | if (!upper_chunk) | |
294 | continue; | |
295 | ||
296 | for (; upper2 <= UPPER_MASK; upper2++, lower = 0) { | |
297 | lower_chunk = upper_chunk->data[upper2]; | |
298 | if (!lower_chunk) | |
299 | continue; | |
300 | ||
301 | lower = find_next_bit(lower_chunk->data, LOWER_MAX, | |
302 | lower); | |
303 | if (lower < LOWER_MAX) | |
304 | goto found; | |
305 | } | |
6954e415 | 306 | } |
8d6e9098 SRV |
307 | |
308 | found: | |
309 | raw_spin_unlock_irqrestore(&pid_list->lock, flags); | |
310 | if (upper1 > UPPER_MASK) | |
311 | return -1; | |
312 | ||
313 | *next = pid_join(upper1, upper2, lower); | |
314 | return 0; | |
6954e415 SRV |
315 | } |
316 | ||
317 | /** | |
318 | * trace_pid_list_first - return the first pid in the list | |
319 | * @pid_list: The pid list to examine. | |
320 | * @pid: The pointer to place the pid first found pid that is set. | |
321 | * | |
322 | * Looks for the first pid that is set in @pid_list, and places it | |
323 | * into @pid if found. | |
324 | * | |
325 | * Return 0 when a pid is found, -1 if there are no pids set. | |
326 | */ | |
327 | int trace_pid_list_first(struct trace_pid_list *pid_list, unsigned int *pid) | |
328 | { | |
8d6e9098 SRV |
329 | return trace_pid_list_next(pid_list, 0, pid); |
330 | } | |
331 | ||
332 | static void pid_list_refill_irq(struct irq_work *iwork) | |
333 | { | |
334 | struct trace_pid_list *pid_list = container_of(iwork, struct trace_pid_list, | |
335 | refill_irqwork); | |
336 | union upper_chunk *upper; | |
337 | union lower_chunk *lower; | |
338 | union upper_chunk **upper_next = &upper; | |
339 | union lower_chunk **lower_next = &lower; | |
340 | int upper_count; | |
341 | int lower_count; | |
342 | int ucnt = 0; | |
343 | int lcnt = 0; | |
6954e415 | 344 | |
8d6e9098 SRV |
345 | again: |
346 | raw_spin_lock(&pid_list->lock); | |
347 | upper_count = CHUNK_ALLOC - pid_list->free_upper_chunks; | |
348 | lower_count = CHUNK_ALLOC - pid_list->free_lower_chunks; | |
349 | raw_spin_unlock(&pid_list->lock); | |
350 | ||
351 | if (upper_count <= 0 && lower_count <= 0) | |
352 | return; | |
6954e415 | 353 | |
8d6e9098 SRV |
354 | while (upper_count-- > 0) { |
355 | union upper_chunk *chunk; | |
356 | ||
357 | chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); | |
358 | if (!chunk) | |
359 | break; | |
360 | *upper_next = chunk; | |
361 | upper_next = &chunk->next; | |
362 | ucnt++; | |
6954e415 | 363 | } |
8d6e9098 SRV |
364 | |
365 | while (lower_count-- > 0) { | |
366 | union lower_chunk *chunk; | |
367 | ||
368 | chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); | |
369 | if (!chunk) | |
370 | break; | |
371 | *lower_next = chunk; | |
372 | lower_next = &chunk->next; | |
373 | lcnt++; | |
374 | } | |
375 | ||
376 | raw_spin_lock(&pid_list->lock); | |
377 | if (upper) { | |
378 | *upper_next = pid_list->upper_list; | |
379 | pid_list->upper_list = upper; | |
380 | pid_list->free_upper_chunks += ucnt; | |
381 | } | |
382 | if (lower) { | |
383 | *lower_next = pid_list->lower_list; | |
384 | pid_list->lower_list = lower; | |
385 | pid_list->free_lower_chunks += lcnt; | |
386 | } | |
387 | raw_spin_unlock(&pid_list->lock); | |
388 | ||
389 | /* | |
390 | * On success of allocating all the chunks, both counters | |
391 | * will be less than zero. If they are not, then an allocation | |
392 | * failed, and we should not try again. | |
393 | */ | |
394 | if (upper_count >= 0 || lower_count >= 0) | |
395 | return; | |
396 | /* | |
397 | * When the locks were released, free chunks could have | |
398 | * been used and allocation needs to be done again. Might as | |
399 | * well allocate it now. | |
400 | */ | |
401 | goto again; | |
6954e415 SRV |
402 | } |
403 | ||
404 | /** | |
405 | * trace_pid_list_alloc - create a new pid_list | |
406 | * | |
407 | * Allocates a new pid_list to store pids into. | |
408 | * | |
409 | * Returns the pid_list on success, NULL otherwise. | |
410 | */ | |
411 | struct trace_pid_list *trace_pid_list_alloc(void) | |
412 | { | |
413 | struct trace_pid_list *pid_list; | |
8d6e9098 SRV |
414 | int i; |
415 | ||
416 | /* According to linux/thread.h, pids can be no bigger that 30 bits */ | |
417 | WARN_ON_ONCE(pid_max > (1 << 30)); | |
6954e415 | 418 | |
8d6e9098 | 419 | pid_list = kzalloc(sizeof(*pid_list), GFP_KERNEL); |
6954e415 SRV |
420 | if (!pid_list) |
421 | return NULL; | |
422 | ||
8d6e9098 | 423 | init_irq_work(&pid_list->refill_irqwork, pid_list_refill_irq); |
6954e415 | 424 | |
8d6e9098 SRV |
425 | raw_spin_lock_init(&pid_list->lock); |
426 | ||
427 | for (i = 0; i < CHUNK_ALLOC; i++) { | |
428 | union upper_chunk *chunk; | |
429 | ||
430 | chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); | |
431 | if (!chunk) | |
432 | break; | |
433 | chunk->next = pid_list->upper_list; | |
434 | pid_list->upper_list = chunk; | |
435 | pid_list->free_upper_chunks++; | |
6954e415 | 436 | } |
8d6e9098 SRV |
437 | |
438 | for (i = 0; i < CHUNK_ALLOC; i++) { | |
439 | union lower_chunk *chunk; | |
440 | ||
441 | chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); | |
442 | if (!chunk) | |
443 | break; | |
444 | chunk->next = pid_list->lower_list; | |
445 | pid_list->lower_list = chunk; | |
446 | pid_list->free_lower_chunks++; | |
447 | } | |
448 | ||
6954e415 SRV |
449 | return pid_list; |
450 | } | |
451 | ||
452 | /** | |
453 | * trace_pid_list_free - Frees an allocated pid_list. | |
454 | * | |
455 | * Frees the memory for a pid_list that was allocated. | |
456 | */ | |
457 | void trace_pid_list_free(struct trace_pid_list *pid_list) | |
458 | { | |
8d6e9098 SRV |
459 | union upper_chunk *upper; |
460 | union lower_chunk *lower; | |
461 | int i, j; | |
462 | ||
6954e415 SRV |
463 | if (!pid_list) |
464 | return; | |
465 | ||
8d6e9098 SRV |
466 | irq_work_sync(&pid_list->refill_irqwork); |
467 | ||
468 | while (pid_list->lower_list) { | |
469 | union lower_chunk *chunk; | |
470 | ||
471 | chunk = pid_list->lower_list; | |
472 | pid_list->lower_list = pid_list->lower_list->next; | |
473 | kfree(chunk); | |
474 | } | |
475 | ||
476 | while (pid_list->upper_list) { | |
477 | union upper_chunk *chunk; | |
478 | ||
479 | chunk = pid_list->upper_list; | |
480 | pid_list->upper_list = pid_list->upper_list->next; | |
481 | kfree(chunk); | |
482 | } | |
483 | ||
484 | for (i = 0; i < UPPER1_SIZE; i++) { | |
485 | upper = pid_list->upper[i]; | |
486 | if (upper) { | |
487 | for (j = 0; j < UPPER2_SIZE; j++) { | |
488 | lower = upper->data[j]; | |
489 | kfree(lower); | |
490 | } | |
491 | kfree(upper); | |
492 | } | |
493 | } | |
6954e415 SRV |
494 | kfree(pid_list); |
495 | } |