Commit | Line | Data |
---|---|---|
1802d0be | 1 | // SPDX-License-Identifier: GPL-2.0-only |
cd11016e | 2 | /* |
b232b999 | 3 | * Stack depot - a stack trace storage that avoids duplication. |
cd11016e | 4 | * |
b232b999 AK |
5 | * Internally, stack depot maintains a hash table of unique stacktraces. The |
6 | * stack traces themselves are stored contiguously one after another in a set | |
7 | * of separate page allocations. | |
8 | * | |
cd11016e AP |
9 | * Author: Alexander Potapenko <glider@google.com> |
10 | * Copyright (C) 2016 Google, Inc. | |
11 | * | |
b232b999 | 12 | * Based on the code by Dmitry Chernenkov. |
cd11016e AP |
13 | */ |
14 | ||
4a6b5314 AK |
15 | #define pr_fmt(fmt) "stackdepot: " fmt |
16 | ||
cd11016e AP |
17 | #include <linux/gfp.h> |
18 | #include <linux/jhash.h> | |
19 | #include <linux/kernel.h> | |
8e00b2df | 20 | #include <linux/kmsan.h> |
4805180b | 21 | #include <linux/list.h> |
cd11016e | 22 | #include <linux/mm.h> |
2dba5eb1 | 23 | #include <linux/mutex.h> |
cd11016e AP |
24 | #include <linux/percpu.h> |
25 | #include <linux/printk.h> | |
410b764f | 26 | #include <linux/refcount.h> |
cd11016e | 27 | #include <linux/slab.h> |
a6cd9570 | 28 | #include <linux/spinlock.h> |
cd11016e AP |
29 | #include <linux/stacktrace.h> |
30 | #include <linux/stackdepot.h> | |
31 | #include <linux/string.h> | |
32 | #include <linux/types.h> | |
e1fdc403 | 33 | #include <linux/memblock.h> |
f9987921 | 34 | #include <linux/kasan-enabled.h> |
cd11016e | 35 | |
424cafee AK |
36 | #define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8) |
37 | ||
424cafee AK |
38 | #define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ |
39 | #define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER)) | |
40 | #define DEPOT_STACK_ALIGN 4 | |
41 | #define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) | |
5f9ce55e AK |
42 | #define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ |
43 | STACK_DEPOT_EXTRA_BITS) | |
bd9d9624 AK |
44 | #if IS_ENABLED(CONFIG_KMSAN) && CONFIG_STACKDEPOT_MAX_FRAMES >= 32 |
45 | /* | |
46 | * KMSAN is frequently used in fuzzing scenarios and thus saves a lot of stack | |
47 | * traces. As KMSAN does not support evicting stack traces from the stack | |
48 | * depot, the stack depot capacity might be reached quickly with large stack | |
49 | * records. Adjust the maximum number of stack depot pools for this case. | |
50 | */ | |
51 | #define DEPOT_POOLS_CAP (8192 * (CONFIG_STACKDEPOT_MAX_FRAMES / 16)) | |
52 | #else | |
424cafee | 53 | #define DEPOT_POOLS_CAP 8192 |
bd9d9624 | 54 | #endif |
424cafee AK |
55 | #define DEPOT_MAX_POOLS \ |
56 | (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \ | |
57 | (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP) | |
cd11016e | 58 | |
b232b999 | 59 | /* Compact structure that stores a reference to a stack. */ |
cd11016e AP |
60 | union handle_parts { |
61 | depot_stack_handle_t handle; | |
62 | struct { | |
424cafee AK |
63 | u32 pool_index : DEPOT_POOL_INDEX_BITS; |
64 | u32 offset : DEPOT_OFFSET_BITS; | |
424cafee | 65 | u32 extra : STACK_DEPOT_EXTRA_BITS; |
cd11016e AP |
66 | }; |
67 | }; | |
68 | ||
69 | struct stack_record { | |
4805180b | 70 | struct list_head list; /* Links in hash table or freelist */ |
b29d3188 | 71 | u32 hash; /* Hash in hash table */ |
b232b999 | 72 | u32 size; /* Number of stored frames */ |
cd11016e | 73 | union handle_parts handle; |
410b764f | 74 | refcount_t count; |
fc60e0ca | 75 | unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ |
cd11016e AP |
76 | }; |
77 | ||
fc60e0ca AK |
78 | #define DEPOT_STACK_RECORD_SIZE \ |
79 | ALIGN(sizeof(struct stack_record), 1 << DEPOT_STACK_ALIGN) | |
80 | ||
735df3c3 | 81 | static bool stack_depot_disabled; |
1c0310ad | 82 | static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); |
a5f1783b VB |
83 | static bool __stack_depot_early_init_passed __initdata; |
84 | ||
0d249ac0 | 85 | /* Use one hash table bucket per 16 KB of memory. */ |
4c2e9a67 | 86 | #define STACK_HASH_TABLE_SCALE 14 |
0d249ac0 | 87 | /* Limit the number of buckets between 4K and 1M. */ |
4c2e9a67 AK |
88 | #define STACK_BUCKET_NUMBER_ORDER_MIN 12 |
89 | #define STACK_BUCKET_NUMBER_ORDER_MAX 20 | |
0d249ac0 | 90 | /* Initial seed for jhash2. */ |
cd11016e AP |
91 | #define STACK_HASH_SEED 0x9747b28c |
92 | ||
4805180b AK |
93 | /* Hash table of stored stack records. */ |
94 | static struct list_head *stack_table; | |
0d249ac0 | 95 | /* Fixed order of the number of table buckets. Used when KASAN is enabled. */ |
4c2e9a67 | 96 | static unsigned int stack_bucket_number_order; |
0d249ac0 | 97 | /* Hash mask for indexing the table. */ |
f9987921 VB |
98 | static unsigned int stack_hash_mask; |
99 | ||
4805180b | 100 | /* Array of memory regions that store stack records. */ |
424cafee | 101 | static void *stack_pools[DEPOT_MAX_POOLS]; |
a5d21f71 AK |
102 | /* Newly allocated pool that is not yet added to stack_pools. */ |
103 | static void *new_pool; | |
b29d3188 AK |
104 | /* Number of pools in stack_pools. */ |
105 | static int pools_num; | |
4805180b AK |
106 | /* Freelist of stack records within stack_pools. */ |
107 | static LIST_HEAD(free_stacks); | |
d11a5621 AK |
108 | /* |
109 | * Stack depot tries to keep an extra pool allocated even before it runs out | |
b6a353d3 AK |
110 | * of space in the currently used pool. This flag marks whether this extra pool |
111 | * needs to be allocated. It has the value 0 when either an extra pool is not | |
112 | * yet allocated or if the limit on the number of pools is reached. | |
d11a5621 | 113 | */ |
a6cd9570 AK |
114 | static bool new_pool_required = true; |
115 | /* Lock that protects the variables above. */ | |
116 | static DEFINE_RWLOCK(pool_rwlock); | |
e1fdc403 | 117 | |
735df3c3 | 118 | static int __init disable_stack_depot(char *str) |
e1fdc403 | 119 | { |
4d07a037 | 120 | return kstrtobool(str, &stack_depot_disabled); |
e1fdc403 | 121 | } |
735df3c3 | 122 | early_param("stack_depot_disable", disable_stack_depot); |
e1fdc403 | 123 | |
1c0310ad | 124 | void __init stack_depot_request_early_init(void) |
a5f1783b | 125 | { |
1c0310ad | 126 | /* Too late to request early init now. */ |
a5f1783b VB |
127 | WARN_ON(__stack_depot_early_init_passed); |
128 | ||
1c0310ad | 129 | __stack_depot_early_init_requested = true; |
a5f1783b VB |
130 | } |
131 | ||
4805180b AK |
132 | /* Initialize list_head's within the hash table. */ |
133 | static void init_stack_table(unsigned long entries) | |
134 | { | |
135 | unsigned long i; | |
136 | ||
137 | for (i = 0; i < entries; i++) | |
138 | INIT_LIST_HEAD(&stack_table[i]); | |
139 | } | |
140 | ||
df225c87 | 141 | /* Allocates a hash table via memblock. Can only be used during early boot. */ |
a5f1783b VB |
142 | int __init stack_depot_early_init(void) |
143 | { | |
f9987921 | 144 | unsigned long entries = 0; |
a5f1783b | 145 | |
df225c87 | 146 | /* This function must be called only once, from mm_init(). */ |
a5f1783b VB |
147 | if (WARN_ON(__stack_depot_early_init_passed)) |
148 | return 0; | |
a5f1783b VB |
149 | __stack_depot_early_init_passed = true; |
150 | ||
4d07a037 AK |
151 | /* |
152 | * Print disabled message even if early init has not been requested: | |
153 | * stack_depot_init() will not print one. | |
154 | */ | |
155 | if (stack_depot_disabled) { | |
156 | pr_info("disabled\n"); | |
157 | return 0; | |
158 | } | |
159 | ||
df225c87 AK |
160 | /* |
161 | * If KASAN is enabled, use the maximum order: KASAN is frequently used | |
162 | * in fuzzing scenarios, which leads to a large number of different | |
163 | * stack traces being stored in stack depot. | |
164 | */ | |
4c2e9a67 AK |
165 | if (kasan_enabled() && !stack_bucket_number_order) |
166 | stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX; | |
f9987921 | 167 | |
4d07a037 AK |
168 | /* |
169 | * Check if early init has been requested after setting | |
170 | * stack_bucket_number_order: stack_depot_init() uses its value. | |
171 | */ | |
172 | if (!__stack_depot_early_init_requested) | |
a5f1783b VB |
173 | return 0; |
174 | ||
df225c87 | 175 | /* |
4c2e9a67 | 176 | * If stack_bucket_number_order is not set, leave entries as 0 to rely |
4805180b | 177 | * on the automatic calculations performed by alloc_large_system_hash(). |
df225c87 | 178 | */ |
4c2e9a67 AK |
179 | if (stack_bucket_number_order) |
180 | entries = 1UL << stack_bucket_number_order; | |
df225c87 | 181 | pr_info("allocating hash table via alloc_large_system_hash\n"); |
f9987921 | 182 | stack_table = alloc_large_system_hash("stackdepot", |
4805180b | 183 | sizeof(struct list_head), |
f9987921 | 184 | entries, |
4c2e9a67 | 185 | STACK_HASH_TABLE_SCALE, |
4805180b | 186 | HASH_EARLY, |
f9987921 VB |
187 | NULL, |
188 | &stack_hash_mask, | |
4c2e9a67 AK |
189 | 1UL << STACK_BUCKET_NUMBER_ORDER_MIN, |
190 | 1UL << STACK_BUCKET_NUMBER_ORDER_MAX); | |
a5f1783b | 191 | if (!stack_table) { |
4a6b5314 | 192 | pr_err("hash table allocation failed, disabling\n"); |
735df3c3 | 193 | stack_depot_disabled = true; |
a5f1783b VB |
194 | return -ENOMEM; |
195 | } | |
4805180b AK |
196 | if (!entries) { |
197 | /* | |
198 | * Obtain the number of entries that was calculated by | |
199 | * alloc_large_system_hash(). | |
200 | */ | |
201 | entries = stack_hash_mask + 1; | |
202 | } | |
203 | init_stack_table(entries); | |
a5f1783b VB |
204 | |
205 | return 0; | |
206 | } | |
207 | ||
df225c87 | 208 | /* Allocates a hash table via kvcalloc. Can be used after boot. */ |
a5f1783b | 209 | int stack_depot_init(void) |
e1fdc403 | 210 | { |
2dba5eb1 | 211 | static DEFINE_MUTEX(stack_depot_init_mutex); |
c60324fb | 212 | unsigned long entries; |
a5f1783b | 213 | int ret = 0; |
2dba5eb1 VB |
214 | |
215 | mutex_lock(&stack_depot_init_mutex); | |
f9987921 | 216 | |
c60324fb AK |
217 | if (stack_depot_disabled || stack_table) |
218 | goto out_unlock; | |
f9987921 | 219 | |
c60324fb | 220 | /* |
4c2e9a67 | 221 | * Similarly to stack_depot_early_init, use stack_bucket_number_order |
c60324fb AK |
222 | * if assigned, and rely on automatic scaling otherwise. |
223 | */ | |
4c2e9a67 AK |
224 | if (stack_bucket_number_order) { |
225 | entries = 1UL << stack_bucket_number_order; | |
c60324fb | 226 | } else { |
4c2e9a67 | 227 | int scale = STACK_HASH_TABLE_SCALE; |
c60324fb AK |
228 | |
229 | entries = nr_free_buffer_pages(); | |
230 | entries = roundup_pow_of_two(entries); | |
231 | ||
232 | if (scale > PAGE_SHIFT) | |
233 | entries >>= (scale - PAGE_SHIFT); | |
234 | else | |
235 | entries <<= (PAGE_SHIFT - scale); | |
e1fdc403 | 236 | } |
c60324fb | 237 | |
4c2e9a67 AK |
238 | if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN) |
239 | entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN; | |
240 | if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX) | |
241 | entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX; | |
c60324fb AK |
242 | |
243 | pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); | |
4805180b | 244 | stack_table = kvcalloc(entries, sizeof(struct list_head), GFP_KERNEL); |
c60324fb AK |
245 | if (!stack_table) { |
246 | pr_err("hash table allocation failed, disabling\n"); | |
247 | stack_depot_disabled = true; | |
248 | ret = -ENOMEM; | |
249 | goto out_unlock; | |
250 | } | |
251 | stack_hash_mask = entries - 1; | |
4805180b | 252 | init_stack_table(entries); |
c60324fb AK |
253 | |
254 | out_unlock: | |
2dba5eb1 | 255 | mutex_unlock(&stack_depot_init_mutex); |
c60324fb | 256 | |
a5f1783b | 257 | return ret; |
e1fdc403 | 258 | } |
2dba5eb1 | 259 | EXPORT_SYMBOL_GPL(stack_depot_init); |
cd11016e | 260 | |
b29d3188 AK |
261 | /* Initializes a stack depol pool. */ |
262 | static void depot_init_pool(void *pool) | |
263 | { | |
4805180b | 264 | int offset; |
b29d3188 | 265 | |
a6cd9570 AK |
266 | lockdep_assert_held_write(&pool_rwlock); |
267 | ||
4805180b AK |
268 | WARN_ON(!list_empty(&free_stacks)); |
269 | ||
270 | /* Initialize handles and link stack records into the freelist. */ | |
271 | for (offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE; | |
272 | offset += DEPOT_STACK_RECORD_SIZE) { | |
b29d3188 AK |
273 | struct stack_record *stack = pool + offset; |
274 | ||
275 | stack->handle.pool_index = pools_num; | |
276 | stack->handle.offset = offset >> DEPOT_STACK_ALIGN; | |
277 | stack->handle.extra = 0; | |
278 | ||
4805180b | 279 | list_add(&stack->list, &free_stacks); |
b29d3188 AK |
280 | } |
281 | ||
b29d3188 AK |
282 | /* Save reference to the pool to be used by depot_fetch_stack(). */ |
283 | stack_pools[pools_num] = pool; | |
a6cd9570 | 284 | pools_num++; |
b29d3188 AK |
285 | } |
286 | ||
b6a353d3 AK |
287 | /* Keeps the preallocated memory to be used for a new stack depot pool. */ |
288 | static void depot_keep_new_pool(void **prealloc) | |
15ef6a98 | 289 | { |
a6cd9570 AK |
290 | lockdep_assert_held_write(&pool_rwlock); |
291 | ||
15ef6a98 | 292 | /* |
b6a353d3 | 293 | * If a new pool is already saved or the maximum number of |
d11a5621 | 294 | * pools is reached, do not use the preallocated memory. |
15ef6a98 | 295 | */ |
b6a353d3 | 296 | if (!new_pool_required) |
514d5c55 | 297 | return; |
cd0fc64e | 298 | |
94b7d328 | 299 | /* |
b6a353d3 | 300 | * Use the preallocated memory for the new pool |
94b7d328 AK |
301 | * as long as we do not exceed the maximum number of pools. |
302 | */ | |
b29d3188 | 303 | if (pools_num < DEPOT_MAX_POOLS) { |
a5d21f71 | 304 | new_pool = *prealloc; |
15ef6a98 | 305 | *prealloc = NULL; |
15ef6a98 | 306 | } |
94b7d328 AK |
307 | |
308 | /* | |
b6a353d3 | 309 | * At this point, either a new pool is kept or the maximum |
94b7d328 AK |
310 | * number of pools is reached. In either case, take note that |
311 | * keeping another pool is not required. | |
94b7d328 | 312 | */ |
a6cd9570 | 313 | new_pool_required = false; |
15ef6a98 AK |
314 | } |
315 | ||
94b7d328 | 316 | /* Updates references to the current and the next stack depot pools. */ |
b29d3188 | 317 | static bool depot_update_pools(void **prealloc) |
15ef6a98 | 318 | { |
a6cd9570 AK |
319 | lockdep_assert_held_write(&pool_rwlock); |
320 | ||
b29d3188 | 321 | /* Check if we still have objects in the freelist. */ |
4805180b | 322 | if (!list_empty(&free_stacks)) |
b29d3188 AK |
323 | goto out_keep_prealloc; |
324 | ||
325 | /* Check if we have a new pool saved and use it. */ | |
326 | if (new_pool) { | |
327 | depot_init_pool(new_pool); | |
a5d21f71 | 328 | new_pool = NULL; |
b29d3188 AK |
329 | |
330 | /* Take note that we might need a new new_pool. */ | |
331 | if (pools_num < DEPOT_MAX_POOLS) | |
a6cd9570 | 332 | new_pool_required = true; |
b29d3188 AK |
333 | |
334 | /* Try keeping the preallocated memory for new_pool. */ | |
335 | goto out_keep_prealloc; | |
336 | } | |
337 | ||
338 | /* Bail out if we reached the pool limit. */ | |
339 | if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { | |
340 | WARN_ONCE(1, "Stack depot reached limit capacity"); | |
341 | return false; | |
15ef6a98 | 342 | } |
cd0fc64e | 343 | |
b29d3188 AK |
344 | /* Check if we have preallocated memory and use it. */ |
345 | if (*prealloc) { | |
346 | depot_init_pool(*prealloc); | |
94b7d328 AK |
347 | *prealloc = NULL; |
348 | return true; | |
349 | } | |
350 | ||
b29d3188 AK |
351 | return false; |
352 | ||
353 | out_keep_prealloc: | |
354 | /* Keep the preallocated memory for a new pool if required. */ | |
514d5c55 | 355 | if (*prealloc) |
b6a353d3 | 356 | depot_keep_new_pool(prealloc); |
94b7d328 AK |
357 | return true; |
358 | } | |
359 | ||
360 | /* Allocates a new stack in a stack depot pool. */ | |
361 | static struct stack_record * | |
362 | depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) | |
363 | { | |
364 | struct stack_record *stack; | |
94b7d328 | 365 | |
a6cd9570 AK |
366 | lockdep_assert_held_write(&pool_rwlock); |
367 | ||
b6a353d3 | 368 | /* Update current and new pools if required and possible. */ |
b29d3188 | 369 | if (!depot_update_pools(prealloc)) |
94b7d328 | 370 | return NULL; |
cd0fc64e | 371 | |
b29d3188 | 372 | /* Check if we have a stack record to save the stack trace. */ |
4805180b | 373 | if (list_empty(&free_stacks)) |
15ef6a98 AK |
374 | return NULL; |
375 | ||
4805180b AK |
376 | /* Get and unlink the first entry from the freelist. */ |
377 | stack = list_first_entry(&free_stacks, struct stack_record, list); | |
378 | list_del(&stack->list); | |
b29d3188 | 379 | |
fc60e0ca AK |
380 | /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ |
381 | if (size > CONFIG_STACKDEPOT_MAX_FRAMES) | |
382 | size = CONFIG_STACKDEPOT_MAX_FRAMES; | |
383 | ||
cd0fc64e | 384 | /* Save the stack trace. */ |
15ef6a98 AK |
385 | stack->hash = hash; |
386 | stack->size = size; | |
b29d3188 | 387 | /* stack->handle is already filled in by depot_init_pool(). */ |
410b764f | 388 | refcount_set(&stack->count, 1); |
15ef6a98 | 389 | memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); |
83130ab2 | 390 | |
8e00b2df AP |
391 | /* |
392 | * Let KMSAN know the stored stack record is initialized. This shall | |
393 | * prevent false positive reports if instrumented code accesses it. | |
394 | */ | |
b29d3188 | 395 | kmsan_unpoison_memory(stack, DEPOT_STACK_RECORD_SIZE); |
15ef6a98 AK |
396 | |
397 | return stack; | |
398 | } | |
399 | ||
83130ab2 AK |
400 | static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) |
401 | { | |
402 | union handle_parts parts = { .handle = handle }; | |
83130ab2 AK |
403 | void *pool; |
404 | size_t offset = parts.offset << DEPOT_STACK_ALIGN; | |
405 | struct stack_record *stack; | |
406 | ||
108be8de | 407 | lockdep_assert_held(&pool_rwlock); |
a6cd9570 AK |
408 | |
409 | if (parts.pool_index > pools_num) { | |
83130ab2 | 410 | WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", |
a6cd9570 | 411 | parts.pool_index, pools_num, handle); |
83130ab2 AK |
412 | return NULL; |
413 | } | |
414 | ||
415 | pool = stack_pools[parts.pool_index]; | |
416 | if (!pool) | |
417 | return NULL; | |
418 | ||
419 | stack = pool + offset; | |
420 | return stack; | |
421 | } | |
422 | ||
108be8de AK |
423 | /* Links stack into the freelist. */ |
424 | static void depot_free_stack(struct stack_record *stack) | |
425 | { | |
426 | lockdep_assert_held_write(&pool_rwlock); | |
427 | ||
428 | list_add(&stack->list, &free_stacks); | |
429 | } | |
430 | ||
b232b999 | 431 | /* Calculates the hash for a stack. */ |
cd11016e AP |
432 | static inline u32 hash_stack(unsigned long *entries, unsigned int size) |
433 | { | |
434 | return jhash2((u32 *)entries, | |
180644f8 GS |
435 | array_size(size, sizeof(*entries)) / sizeof(u32), |
436 | STACK_HASH_SEED); | |
cd11016e AP |
437 | } |
438 | ||
b232b999 AK |
439 | /* |
440 | * Non-instrumented version of memcmp(). | |
441 | * Does not check the lexicographical order, only the equality. | |
a571b272 AP |
442 | */ |
443 | static inline | |
444 | int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, | |
445 | unsigned int n) | |
446 | { | |
447 | for ( ; n-- ; u1++, u2++) { | |
448 | if (*u1 != *u2) | |
449 | return 1; | |
450 | } | |
451 | return 0; | |
452 | } | |
453 | ||
b232b999 | 454 | /* Finds a stack in a bucket of the hash table. */ |
4805180b | 455 | static inline struct stack_record *find_stack(struct list_head *bucket, |
cd11016e AP |
456 | unsigned long *entries, int size, |
457 | u32 hash) | |
458 | { | |
4805180b | 459 | struct list_head *pos; |
cd11016e AP |
460 | struct stack_record *found; |
461 | ||
a6cd9570 AK |
462 | lockdep_assert_held(&pool_rwlock); |
463 | ||
4805180b AK |
464 | list_for_each(pos, bucket) { |
465 | found = list_entry(pos, struct stack_record, list); | |
cd11016e AP |
466 | if (found->hash == hash && |
467 | found->size == size && | |
a571b272 | 468 | !stackdepot_memcmp(entries, found->entries, size)) |
cd11016e | 469 | return found; |
cd11016e AP |
470 | } |
471 | return NULL; | |
472 | } | |
473 | ||
022012dc AK |
474 | depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, |
475 | unsigned int nr_entries, | |
476 | gfp_t alloc_flags, | |
477 | depot_flags_t depot_flags) | |
cd11016e | 478 | { |
4805180b AK |
479 | struct list_head *bucket; |
480 | struct stack_record *found = NULL; | |
603c000c | 481 | depot_stack_handle_t handle = 0; |
cd11016e AP |
482 | struct page *page = NULL; |
483 | void *prealloc = NULL; | |
022012dc | 484 | bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC; |
a6cd9570 | 485 | bool need_alloc = false; |
c0cfc337 TG |
486 | unsigned long flags; |
487 | u32 hash; | |
cd11016e | 488 | |
022012dc AK |
489 | if (WARN_ON(depot_flags & ~STACK_DEPOT_FLAGS_MASK)) |
490 | return 0; | |
491 | ||
e9400660 ME |
492 | /* |
493 | * If this stack trace is from an interrupt, including anything before | |
b232b999 | 494 | * interrupt entry usually leads to unbounded stack depot growth. |
e9400660 | 495 | * |
b232b999 AK |
496 | * Since use of filter_irq_stacks() is a requirement to ensure stack |
497 | * depot can efficiently deduplicate interrupt stacks, always | |
498 | * filter_irq_stacks() to simplify all callers' use of stack depot. | |
e9400660 ME |
499 | */ |
500 | nr_entries = filter_irq_stacks(entries, nr_entries); | |
501 | ||
735df3c3 | 502 | if (unlikely(nr_entries == 0) || stack_depot_disabled) |
603c000c | 503 | return 0; |
cd11016e | 504 | |
c0cfc337 | 505 | hash = hash_stack(entries, nr_entries); |
f9987921 | 506 | bucket = &stack_table[hash & stack_hash_mask]; |
cd11016e | 507 | |
a6cd9570 | 508 | read_lock_irqsave(&pool_rwlock, flags); |
a914d8d6 | 509 | printk_deferred_enter(); |
a6cd9570 AK |
510 | |
511 | /* Fast path: look the stack trace up without full locking. */ | |
4805180b | 512 | found = find_stack(bucket, entries, nr_entries, hash); |
a6cd9570 | 513 | if (found) { |
410b764f AK |
514 | if (depot_flags & STACK_DEPOT_FLAG_GET) |
515 | refcount_inc(&found->count); | |
a914d8d6 | 516 | printk_deferred_exit(); |
a6cd9570 | 517 | read_unlock_irqrestore(&pool_rwlock, flags); |
cd11016e | 518 | goto exit; |
a6cd9570 AK |
519 | } |
520 | ||
521 | /* Take note if another stack pool needs to be allocated. */ | |
522 | if (new_pool_required) | |
523 | need_alloc = true; | |
524 | ||
a914d8d6 | 525 | printk_deferred_exit(); |
a6cd9570 | 526 | read_unlock_irqrestore(&pool_rwlock, flags); |
cd11016e AP |
527 | |
528 | /* | |
a6cd9570 AK |
529 | * Allocate memory for a new pool if required now: |
530 | * we won't be able to do that under the lock. | |
cd11016e | 531 | */ |
a6cd9570 | 532 | if (unlikely(can_alloc && need_alloc)) { |
cd11016e AP |
533 | /* |
534 | * Zero out zone modifiers, as we don't have specific zone | |
535 | * requirements. Keep the flags related to allocation in atomic | |
536 | * contexts and I/O. | |
537 | */ | |
538 | alloc_flags &= ~GFP_ZONEMASK; | |
539 | alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); | |
87cc271d | 540 | alloc_flags |= __GFP_NOWARN; |
424cafee | 541 | page = alloc_pages(alloc_flags, DEPOT_POOL_ORDER); |
cd11016e AP |
542 | if (page) |
543 | prealloc = page_address(page); | |
544 | } | |
545 | ||
a6cd9570 | 546 | write_lock_irqsave(&pool_rwlock, flags); |
a914d8d6 | 547 | printk_deferred_enter(); |
cd11016e | 548 | |
4805180b | 549 | found = find_stack(bucket, entries, nr_entries, hash); |
cd11016e | 550 | if (!found) { |
b232b999 AK |
551 | struct stack_record *new = |
552 | depot_alloc_stack(entries, nr_entries, hash, &prealloc); | |
7f2b8818 | 553 | |
cd11016e | 554 | if (new) { |
4805180b | 555 | list_add(&new->list, bucket); |
cd11016e AP |
556 | found = new; |
557 | } | |
410b764f AK |
558 | } else { |
559 | if (depot_flags & STACK_DEPOT_FLAG_GET) | |
560 | refcount_inc(&found->count); | |
cd11016e | 561 | /* |
b232b999 | 562 | * Stack depot already contains this stack trace, but let's |
b6a353d3 | 563 | * keep the preallocated memory for future. |
cd11016e | 564 | */ |
410b764f AK |
565 | if (prealloc) |
566 | depot_keep_new_pool(&prealloc); | |
cd11016e AP |
567 | } |
568 | ||
a914d8d6 | 569 | printk_deferred_exit(); |
a6cd9570 | 570 | write_unlock_irqrestore(&pool_rwlock, flags); |
cd11016e AP |
571 | exit: |
572 | if (prealloc) { | |
b232b999 | 573 | /* Stack depot didn't use this memory, free it. */ |
424cafee | 574 | free_pages((unsigned long)prealloc, DEPOT_POOL_ORDER); |
cd11016e AP |
575 | } |
576 | if (found) | |
603c000c AK |
577 | handle = found->handle.handle; |
578 | return handle; | |
cd11016e | 579 | } |
022012dc | 580 | EXPORT_SYMBOL_GPL(stack_depot_save_flags); |
11ac25c6 | 581 | |
11ac25c6 ME |
582 | depot_stack_handle_t stack_depot_save(unsigned long *entries, |
583 | unsigned int nr_entries, | |
584 | gfp_t alloc_flags) | |
585 | { | |
022012dc AK |
586 | return stack_depot_save_flags(entries, nr_entries, alloc_flags, |
587 | STACK_DEPOT_FLAG_CAN_ALLOC); | |
11ac25c6 | 588 | } |
c0cfc337 | 589 | EXPORT_SYMBOL_GPL(stack_depot_save); |
15ef6a98 | 590 | |
15ef6a98 AK |
591 | unsigned int stack_depot_fetch(depot_stack_handle_t handle, |
592 | unsigned long **entries) | |
593 | { | |
15ef6a98 | 594 | struct stack_record *stack; |
a6cd9570 | 595 | unsigned long flags; |
15ef6a98 AK |
596 | |
597 | *entries = NULL; | |
8e00b2df AP |
598 | /* |
599 | * Let KMSAN know *entries is initialized. This shall prevent false | |
600 | * positive reports if instrumented code accesses it. | |
601 | */ | |
602 | kmsan_unpoison_memory(entries, sizeof(*entries)); | |
603 | ||
0c5d44a8 | 604 | if (!handle || stack_depot_disabled) |
15ef6a98 AK |
605 | return 0; |
606 | ||
a6cd9570 | 607 | read_lock_irqsave(&pool_rwlock, flags); |
a914d8d6 | 608 | printk_deferred_enter(); |
a6cd9570 | 609 | |
83130ab2 | 610 | stack = depot_fetch_stack(handle); |
15ef6a98 | 611 | |
a914d8d6 | 612 | printk_deferred_exit(); |
a6cd9570 AK |
613 | read_unlock_irqrestore(&pool_rwlock, flags); |
614 | ||
15ef6a98 AK |
615 | *entries = stack->entries; |
616 | return stack->size; | |
617 | } | |
618 | EXPORT_SYMBOL_GPL(stack_depot_fetch); | |
619 | ||
108be8de AK |
620 | void stack_depot_put(depot_stack_handle_t handle) |
621 | { | |
622 | struct stack_record *stack; | |
623 | unsigned long flags; | |
624 | ||
625 | if (!handle || stack_depot_disabled) | |
626 | return; | |
627 | ||
628 | write_lock_irqsave(&pool_rwlock, flags); | |
a914d8d6 | 629 | printk_deferred_enter(); |
108be8de AK |
630 | |
631 | stack = depot_fetch_stack(handle); | |
632 | if (WARN_ON(!stack)) | |
633 | goto out; | |
634 | ||
635 | if (refcount_dec_and_test(&stack->count)) { | |
636 | /* Unlink stack from the hash table. */ | |
637 | list_del(&stack->list); | |
638 | ||
639 | /* Free stack. */ | |
640 | depot_free_stack(stack); | |
641 | } | |
642 | ||
643 | out: | |
a914d8d6 | 644 | printk_deferred_exit(); |
108be8de AK |
645 | write_unlock_irqrestore(&pool_rwlock, flags); |
646 | } | |
647 | EXPORT_SYMBOL_GPL(stack_depot_put); | |
648 | ||
15ef6a98 AK |
649 | void stack_depot_print(depot_stack_handle_t stack) |
650 | { | |
651 | unsigned long *entries; | |
652 | unsigned int nr_entries; | |
653 | ||
654 | nr_entries = stack_depot_fetch(stack, &entries); | |
655 | if (nr_entries > 0) | |
656 | stack_trace_print(entries, nr_entries, 0); | |
657 | } | |
658 | EXPORT_SYMBOL_GPL(stack_depot_print); | |
659 | ||
15ef6a98 AK |
660 | int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, |
661 | int spaces) | |
662 | { | |
663 | unsigned long *entries; | |
664 | unsigned int nr_entries; | |
665 | ||
666 | nr_entries = stack_depot_fetch(handle, &entries); | |
667 | return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, | |
668 | spaces) : 0; | |
669 | } | |
670 | EXPORT_SYMBOL_GPL(stack_depot_snprint); | |
671 | ||
36aa1e67 AK |
672 | depot_stack_handle_t __must_check stack_depot_set_extra_bits( |
673 | depot_stack_handle_t handle, unsigned int extra_bits) | |
674 | { | |
675 | union handle_parts parts = { .handle = handle }; | |
676 | ||
677 | /* Don't set extra bits on empty handles. */ | |
678 | if (!handle) | |
679 | return 0; | |
680 | ||
681 | parts.extra = extra_bits; | |
682 | return parts.handle; | |
683 | } | |
684 | EXPORT_SYMBOL(stack_depot_set_extra_bits); | |
685 | ||
15ef6a98 AK |
686 | unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) |
687 | { | |
688 | union handle_parts parts = { .handle = handle }; | |
689 | ||
690 | return parts.extra; | |
691 | } | |
692 | EXPORT_SYMBOL(stack_depot_get_extra_bits); |