Commit | Line | Data |
---|---|---|
1802d0be | 1 | // SPDX-License-Identifier: GPL-2.0-only |
cd11016e AP |
2 | /* |
3 | * Generic stack depot for storing stack traces. | |
4 | * | |
5 | * Some debugging tools need to save stack traces of certain events which can | |
6 | * be later presented to the user. For example, KASAN needs to safe alloc and | |
7 | * free stacks for each object, but storing two stack traces per object | |
8 | * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for | |
9 | * that). | |
10 | * | |
11 | * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc | |
12 | * and free stacks repeat a lot, we save about 100x space. | |
13 | * Stacks are never removed from depot, so we store them contiguously one after | |
9dbbc3b9 | 14 | * another in a contiguous memory allocation. |
cd11016e AP |
15 | * |
16 | * Author: Alexander Potapenko <glider@google.com> | |
17 | * Copyright (C) 2016 Google, Inc. | |
18 | * | |
19 | * Based on code by Dmitry Chernenkov. | |
cd11016e AP |
20 | */ |
21 | ||
22 | #include <linux/gfp.h> | |
23 | #include <linux/jhash.h> | |
24 | #include <linux/kernel.h> | |
25 | #include <linux/mm.h> | |
2dba5eb1 | 26 | #include <linux/mutex.h> |
cd11016e AP |
27 | #include <linux/percpu.h> |
28 | #include <linux/printk.h> | |
29 | #include <linux/slab.h> | |
30 | #include <linux/stacktrace.h> | |
31 | #include <linux/stackdepot.h> | |
32 | #include <linux/string.h> | |
33 | #include <linux/types.h> | |
e1fdc403 | 34 | #include <linux/memblock.h> |
cd11016e AP |
35 | |
36 | #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8) | |
37 | ||
7c31190b | 38 | #define STACK_ALLOC_NULL_PROTECTION_BITS 1 |
cd11016e AP |
39 | #define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */ |
40 | #define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER)) | |
41 | #define STACK_ALLOC_ALIGN 4 | |
42 | #define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \ | |
43 | STACK_ALLOC_ALIGN) | |
7c31190b JK |
44 | #define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \ |
45 | STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS) | |
02754e0a | 46 | #define STACK_ALLOC_SLABS_CAP 8192 |
cd11016e AP |
47 | #define STACK_ALLOC_MAX_SLABS \ |
48 | (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \ | |
49 | (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP) | |
50 | ||
51 | /* The compact structure to store the reference to stacks. */ | |
52 | union handle_parts { | |
53 | depot_stack_handle_t handle; | |
54 | struct { | |
55 | u32 slabindex : STACK_ALLOC_INDEX_BITS; | |
56 | u32 offset : STACK_ALLOC_OFFSET_BITS; | |
7c31190b | 57 | u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS; |
cd11016e AP |
58 | }; |
59 | }; | |
60 | ||
61 | struct stack_record { | |
62 | struct stack_record *next; /* Link in the hashtable */ | |
63 | u32 hash; /* Hash in the hastable */ | |
64 | u32 size; /* Number of frames in the stack */ | |
65 | union handle_parts handle; | |
3a2b67e6 | 66 | unsigned long entries[]; /* Variable-sized array of entries. */ |
cd11016e AP |
67 | }; |
68 | ||
69 | static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; | |
70 | ||
71 | static int depot_index; | |
72 | static int next_slab_inited; | |
73 | static size_t depot_offset; | |
78564b94 | 74 | static DEFINE_RAW_SPINLOCK(depot_lock); |
cd11016e AP |
75 | |
76 | static bool init_stack_slab(void **prealloc) | |
77 | { | |
78 | if (!*prealloc) | |
79 | return false; | |
80 | /* | |
81 | * This smp_load_acquire() pairs with smp_store_release() to | |
82 | * |next_slab_inited| below and in depot_alloc_stack(). | |
83 | */ | |
84 | if (smp_load_acquire(&next_slab_inited)) | |
85 | return true; | |
86 | if (stack_slabs[depot_index] == NULL) { | |
87 | stack_slabs[depot_index] = *prealloc; | |
305e519c | 88 | *prealloc = NULL; |
cd11016e | 89 | } else { |
305e519c AP |
90 | /* If this is the last depot slab, do not touch the next one. */ |
91 | if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { | |
92 | stack_slabs[depot_index + 1] = *prealloc; | |
93 | *prealloc = NULL; | |
94 | } | |
cd11016e AP |
95 | /* |
96 | * This smp_store_release pairs with smp_load_acquire() from | |
ee050dc8 | 97 | * |next_slab_inited| above and in stack_depot_save(). |
cd11016e AP |
98 | */ |
99 | smp_store_release(&next_slab_inited, 1); | |
100 | } | |
cd11016e AP |
101 | return true; |
102 | } | |
103 | ||
104 | /* Allocation of a new stack in raw storage */ | |
7f2b8818 ME |
105 | static struct stack_record * |
106 | depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) | |
cd11016e | 107 | { |
cd11016e | 108 | struct stack_record *stack; |
3a2b67e6 | 109 | size_t required_size = struct_size(stack, entries, size); |
cd11016e AP |
110 | |
111 | required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN); | |
112 | ||
113 | if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) { | |
114 | if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) { | |
115 | WARN_ONCE(1, "Stack depot reached limit capacity"); | |
116 | return NULL; | |
117 | } | |
118 | depot_index++; | |
119 | depot_offset = 0; | |
120 | /* | |
121 | * smp_store_release() here pairs with smp_load_acquire() from | |
ee050dc8 | 122 | * |next_slab_inited| in stack_depot_save() and |
cd11016e AP |
123 | * init_stack_slab(). |
124 | */ | |
125 | if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) | |
126 | smp_store_release(&next_slab_inited, 0); | |
127 | } | |
128 | init_stack_slab(prealloc); | |
129 | if (stack_slabs[depot_index] == NULL) | |
130 | return NULL; | |
131 | ||
132 | stack = stack_slabs[depot_index] + depot_offset; | |
133 | ||
134 | stack->hash = hash; | |
135 | stack->size = size; | |
136 | stack->handle.slabindex = depot_index; | |
137 | stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN; | |
7c31190b | 138 | stack->handle.valid = 1; |
47e684aa | 139 | memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); |
cd11016e AP |
140 | depot_offset += required_size; |
141 | ||
142 | return stack; | |
143 | } | |
144 | ||
d2620936 | 145 | #define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER) |
cd11016e AP |
146 | #define STACK_HASH_MASK (STACK_HASH_SIZE - 1) |
147 | #define STACK_HASH_SEED 0x9747b28c | |
148 | ||
e1fdc403 VJ |
149 | static bool stack_depot_disable; |
150 | static struct stack_record **stack_table; | |
151 | ||
152 | static int __init is_stack_depot_disabled(char *str) | |
153 | { | |
64427985 VJ |
154 | int ret; |
155 | ||
156 | ret = kstrtobool(str, &stack_depot_disable); | |
157 | if (!ret && stack_depot_disable) { | |
e1fdc403 VJ |
158 | pr_info("Stack Depot is disabled\n"); |
159 | stack_table = NULL; | |
160 | } | |
161 | return 0; | |
162 | } | |
163 | early_param("stack_depot_disable", is_stack_depot_disabled); | |
164 | ||
2dba5eb1 VB |
165 | /* |
166 | * __ref because of memblock_alloc(), which will not be actually called after | |
167 | * the __init code is gone, because at that point slab_is_available() is true | |
168 | */ | |
169 | __ref int stack_depot_init(void) | |
e1fdc403 | 170 | { |
2dba5eb1 VB |
171 | static DEFINE_MUTEX(stack_depot_init_mutex); |
172 | ||
173 | mutex_lock(&stack_depot_init_mutex); | |
174 | if (!stack_depot_disable && !stack_table) { | |
e1fdc403 VJ |
175 | size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); |
176 | int i; | |
177 | ||
2dba5eb1 VB |
178 | if (slab_is_available()) { |
179 | pr_info("Stack Depot allocating hash table with kvmalloc\n"); | |
180 | stack_table = kvmalloc(size, GFP_KERNEL); | |
181 | } else { | |
182 | pr_info("Stack Depot allocating hash table with memblock_alloc\n"); | |
183 | stack_table = memblock_alloc(size, SMP_CACHE_BYTES); | |
184 | } | |
185 | if (stack_table) { | |
186 | for (i = 0; i < STACK_HASH_SIZE; i++) | |
187 | stack_table[i] = NULL; | |
188 | } else { | |
189 | pr_err("Stack Depot hash table allocation failed, disabling\n"); | |
190 | stack_depot_disable = true; | |
191 | mutex_unlock(&stack_depot_init_mutex); | |
192 | return -ENOMEM; | |
193 | } | |
e1fdc403 | 194 | } |
2dba5eb1 | 195 | mutex_unlock(&stack_depot_init_mutex); |
e1fdc403 VJ |
196 | return 0; |
197 | } | |
2dba5eb1 | 198 | EXPORT_SYMBOL_GPL(stack_depot_init); |
cd11016e AP |
199 | |
200 | /* Calculate hash for a stack */ | |
201 | static inline u32 hash_stack(unsigned long *entries, unsigned int size) | |
202 | { | |
203 | return jhash2((u32 *)entries, | |
180644f8 GS |
204 | array_size(size, sizeof(*entries)) / sizeof(u32), |
205 | STACK_HASH_SEED); | |
cd11016e AP |
206 | } |
207 | ||
a571b272 AP |
208 | /* Use our own, non-instrumented version of memcmp(). |
209 | * | |
210 | * We actually don't care about the order, just the equality. | |
211 | */ | |
212 | static inline | |
213 | int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, | |
214 | unsigned int n) | |
215 | { | |
216 | for ( ; n-- ; u1++, u2++) { | |
217 | if (*u1 != *u2) | |
218 | return 1; | |
219 | } | |
220 | return 0; | |
221 | } | |
222 | ||
cd11016e AP |
223 | /* Find a stack that is equal to the one stored in entries in the hash */ |
224 | static inline struct stack_record *find_stack(struct stack_record *bucket, | |
225 | unsigned long *entries, int size, | |
226 | u32 hash) | |
227 | { | |
228 | struct stack_record *found; | |
229 | ||
230 | for (found = bucket; found; found = found->next) { | |
231 | if (found->hash == hash && | |
232 | found->size == size && | |
a571b272 | 233 | !stackdepot_memcmp(entries, found->entries, size)) |
cd11016e | 234 | return found; |
cd11016e AP |
235 | } |
236 | return NULL; | |
237 | } | |
238 | ||
0f68d45e IK |
239 | /** |
240 | * stack_depot_snprint - print stack entries from a depot into a buffer | |
241 | * | |
242 | * @handle: Stack depot handle which was returned from | |
243 | * stack_depot_save(). | |
244 | * @buf: Pointer to the print buffer | |
245 | * | |
246 | * @size: Size of the print buffer | |
247 | * | |
248 | * @spaces: Number of leading spaces to print | |
249 | * | |
250 | * Return: Number of bytes printed. | |
251 | */ | |
252 | int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, | |
253 | int spaces) | |
254 | { | |
255 | unsigned long *entries; | |
256 | unsigned int nr_entries; | |
257 | ||
258 | nr_entries = stack_depot_fetch(handle, &entries); | |
259 | return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, | |
260 | spaces) : 0; | |
261 | } | |
262 | EXPORT_SYMBOL_GPL(stack_depot_snprint); | |
263 | ||
505be481 IK |
264 | /** |
265 | * stack_depot_print - print stack entries from a depot | |
266 | * | |
267 | * @stack: Stack depot handle which was returned from | |
268 | * stack_depot_save(). | |
269 | * | |
270 | */ | |
271 | void stack_depot_print(depot_stack_handle_t stack) | |
272 | { | |
273 | unsigned long *entries; | |
274 | unsigned int nr_entries; | |
275 | ||
276 | nr_entries = stack_depot_fetch(stack, &entries); | |
277 | if (nr_entries > 0) | |
278 | stack_trace_print(entries, nr_entries, 0); | |
279 | } | |
280 | EXPORT_SYMBOL_GPL(stack_depot_print); | |
281 | ||
c0cfc337 TG |
282 | /** |
283 | * stack_depot_fetch - Fetch stack entries from a depot | |
284 | * | |
285 | * @handle: Stack depot handle which was returned from | |
286 | * stack_depot_save(). | |
287 | * @entries: Pointer to store the entries address | |
288 | * | |
289 | * Return: The number of trace entries for this depot. | |
290 | */ | |
291 | unsigned int stack_depot_fetch(depot_stack_handle_t handle, | |
292 | unsigned long **entries) | |
cd11016e AP |
293 | { |
294 | union handle_parts parts = { .handle = handle }; | |
69866e15 | 295 | void *slab; |
cd11016e | 296 | size_t offset = parts.offset << STACK_ALLOC_ALIGN; |
69866e15 AP |
297 | struct stack_record *stack; |
298 | ||
299 | *entries = NULL; | |
4d4712c1 IK |
300 | if (!handle) |
301 | return 0; | |
302 | ||
69866e15 AP |
303 | if (parts.slabindex > depot_index) { |
304 | WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n", | |
305 | parts.slabindex, depot_index, handle); | |
306 | return 0; | |
307 | } | |
308 | slab = stack_slabs[parts.slabindex]; | |
309 | if (!slab) | |
310 | return 0; | |
311 | stack = slab + offset; | |
cd11016e | 312 | |
c0cfc337 TG |
313 | *entries = stack->entries; |
314 | return stack->size; | |
315 | } | |
316 | EXPORT_SYMBOL_GPL(stack_depot_fetch); | |
317 | ||
cd11016e | 318 | /** |
11ac25c6 | 319 | * __stack_depot_save - Save a stack trace from an array |
c0cfc337 TG |
320 | * |
321 | * @entries: Pointer to storage array | |
322 | * @nr_entries: Size of the storage array | |
323 | * @alloc_flags: Allocation gfp flags | |
11ac25c6 ME |
324 | * @can_alloc: Allocate stack slabs (increased chance of failure if false) |
325 | * | |
326 | * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is | |
327 | * %true, is allowed to replenish the stack slab pool in case no space is left | |
328 | * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids | |
329 | * any allocations and will fail if no space is left to store the stack trace. | |
330 | * | |
e9400660 ME |
331 | * If the stack trace in @entries is from an interrupt, only the portion up to |
332 | * interrupt entry is saved. | |
333 | * | |
11ac25c6 ME |
334 | * Context: Any context, but setting @can_alloc to %false is required if |
335 | * alloc_pages() cannot be used from the current context. Currently | |
336 | * this is the case from contexts where neither %GFP_ATOMIC nor | |
337 | * %GFP_NOWAIT can be used (NMI, raw_spin_lock). | |
cd11016e | 338 | * |
11ac25c6 | 339 | * Return: The handle of the stack struct stored in depot, 0 on failure. |
cd11016e | 340 | */ |
11ac25c6 ME |
341 | depot_stack_handle_t __stack_depot_save(unsigned long *entries, |
342 | unsigned int nr_entries, | |
343 | gfp_t alloc_flags, bool can_alloc) | |
cd11016e | 344 | { |
cd11016e | 345 | struct stack_record *found = NULL, **bucket; |
c0cfc337 | 346 | depot_stack_handle_t retval = 0; |
cd11016e AP |
347 | struct page *page = NULL; |
348 | void *prealloc = NULL; | |
c0cfc337 TG |
349 | unsigned long flags; |
350 | u32 hash; | |
cd11016e | 351 | |
e9400660 ME |
352 | /* |
353 | * If this stack trace is from an interrupt, including anything before | |
354 | * interrupt entry usually leads to unbounded stackdepot growth. | |
355 | * | |
356 | * Because use of filter_irq_stacks() is a requirement to ensure | |
357 | * stackdepot can efficiently deduplicate interrupt stacks, always | |
358 | * filter_irq_stacks() to simplify all callers' use of stackdepot. | |
359 | */ | |
360 | nr_entries = filter_irq_stacks(entries, nr_entries); | |
361 | ||
e1fdc403 | 362 | if (unlikely(nr_entries == 0) || stack_depot_disable) |
cd11016e AP |
363 | goto fast_exit; |
364 | ||
c0cfc337 | 365 | hash = hash_stack(entries, nr_entries); |
cd11016e AP |
366 | bucket = &stack_table[hash & STACK_HASH_MASK]; |
367 | ||
368 | /* | |
369 | * Fast path: look the stack trace up without locking. | |
370 | * The smp_load_acquire() here pairs with smp_store_release() to | |
371 | * |bucket| below. | |
372 | */ | |
c0cfc337 TG |
373 | found = find_stack(smp_load_acquire(bucket), entries, |
374 | nr_entries, hash); | |
cd11016e AP |
375 | if (found) |
376 | goto exit; | |
377 | ||
378 | /* | |
379 | * Check if the current or the next stack slab need to be initialized. | |
380 | * If so, allocate the memory - we won't be able to do that under the | |
381 | * lock. | |
382 | * | |
383 | * The smp_load_acquire() here pairs with smp_store_release() to | |
384 | * |next_slab_inited| in depot_alloc_stack() and init_stack_slab(). | |
385 | */ | |
11ac25c6 | 386 | if (unlikely(can_alloc && !smp_load_acquire(&next_slab_inited))) { |
cd11016e AP |
387 | /* |
388 | * Zero out zone modifiers, as we don't have specific zone | |
389 | * requirements. Keep the flags related to allocation in atomic | |
390 | * contexts and I/O. | |
391 | */ | |
392 | alloc_flags &= ~GFP_ZONEMASK; | |
393 | alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); | |
87cc271d | 394 | alloc_flags |= __GFP_NOWARN; |
cd11016e AP |
395 | page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER); |
396 | if (page) | |
397 | prealloc = page_address(page); | |
398 | } | |
399 | ||
78564b94 | 400 | raw_spin_lock_irqsave(&depot_lock, flags); |
cd11016e | 401 | |
c0cfc337 | 402 | found = find_stack(*bucket, entries, nr_entries, hash); |
cd11016e | 403 | if (!found) { |
7f2b8818 ME |
404 | struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc); |
405 | ||
cd11016e AP |
406 | if (new) { |
407 | new->next = *bucket; | |
408 | /* | |
409 | * This smp_store_release() pairs with | |
410 | * smp_load_acquire() from |bucket| above. | |
411 | */ | |
412 | smp_store_release(bucket, new); | |
413 | found = new; | |
414 | } | |
415 | } else if (prealloc) { | |
416 | /* | |
417 | * We didn't need to store this stack trace, but let's keep | |
418 | * the preallocated memory for the future. | |
419 | */ | |
420 | WARN_ON(!init_stack_slab(&prealloc)); | |
421 | } | |
422 | ||
78564b94 | 423 | raw_spin_unlock_irqrestore(&depot_lock, flags); |
cd11016e AP |
424 | exit: |
425 | if (prealloc) { | |
426 | /* Nobody used this memory, ok to free it. */ | |
427 | free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER); | |
428 | } | |
429 | if (found) | |
430 | retval = found->handle.handle; | |
431 | fast_exit: | |
432 | return retval; | |
433 | } | |
11ac25c6 ME |
434 | EXPORT_SYMBOL_GPL(__stack_depot_save); |
435 | ||
436 | /** | |
437 | * stack_depot_save - Save a stack trace from an array | |
438 | * | |
439 | * @entries: Pointer to storage array | |
440 | * @nr_entries: Size of the storage array | |
441 | * @alloc_flags: Allocation gfp flags | |
442 | * | |
443 | * Context: Contexts where allocations via alloc_pages() are allowed. | |
444 | * See __stack_depot_save() for more details. | |
445 | * | |
446 | * Return: The handle of the stack struct stored in depot, 0 on failure. | |
447 | */ | |
448 | depot_stack_handle_t stack_depot_save(unsigned long *entries, | |
449 | unsigned int nr_entries, | |
450 | gfp_t alloc_flags) | |
451 | { | |
452 | return __stack_depot_save(entries, nr_entries, alloc_flags, true); | |
453 | } | |
c0cfc337 | 454 | EXPORT_SYMBOL_GPL(stack_depot_save); |