| 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * Generic stack depot for storing stack traces. |
| 4 | * |
| 5 | * Some debugging tools need to save stack traces of certain events which can |
| 6 | * be later presented to the user. For example, KASAN needs to safe alloc and |
| 7 | * free stacks for each object, but storing two stack traces per object |
| 8 | * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for |
| 9 | * that). |
| 10 | * |
| 11 | * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc |
| 12 | * and free stacks repeat a lot, we save about 100x space. |
| 13 | * Stacks are never removed from depot, so we store them contiguously one after |
| 14 | * another in a contiguous memory allocation. |
| 15 | * |
| 16 | * Author: Alexander Potapenko <glider@google.com> |
| 17 | * Copyright (C) 2016 Google, Inc. |
| 18 | * |
| 19 | * Based on code by Dmitry Chernenkov. |
| 20 | */ |
| 21 | |
| 22 | #include <linux/gfp.h> |
| 23 | #include <linux/jhash.h> |
| 24 | #include <linux/kernel.h> |
| 25 | #include <linux/mm.h> |
| 26 | #include <linux/mutex.h> |
| 27 | #include <linux/percpu.h> |
| 28 | #include <linux/printk.h> |
| 29 | #include <linux/slab.h> |
| 30 | #include <linux/stacktrace.h> |
| 31 | #include <linux/stackdepot.h> |
| 32 | #include <linux/string.h> |
| 33 | #include <linux/types.h> |
| 34 | #include <linux/memblock.h> |
| 35 | |
| 36 | #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8) |
| 37 | |
| 38 | #define STACK_ALLOC_NULL_PROTECTION_BITS 1 |
| 39 | #define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */ |
| 40 | #define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER)) |
| 41 | #define STACK_ALLOC_ALIGN 4 |
| 42 | #define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \ |
| 43 | STACK_ALLOC_ALIGN) |
| 44 | #define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \ |
| 45 | STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS) |
| 46 | #define STACK_ALLOC_SLABS_CAP 8192 |
| 47 | #define STACK_ALLOC_MAX_SLABS \ |
| 48 | (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \ |
| 49 | (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP) |
| 50 | |
| 51 | /* The compact structure to store the reference to stacks. */ |
| 52 | union handle_parts { |
| 53 | depot_stack_handle_t handle; |
| 54 | struct { |
| 55 | u32 slabindex : STACK_ALLOC_INDEX_BITS; |
| 56 | u32 offset : STACK_ALLOC_OFFSET_BITS; |
| 57 | u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS; |
| 58 | }; |
| 59 | }; |
| 60 | |
| 61 | struct stack_record { |
| 62 | struct stack_record *next; /* Link in the hashtable */ |
| 63 | u32 hash; /* Hash in the hastable */ |
| 64 | u32 size; /* Number of frames in the stack */ |
| 65 | union handle_parts handle; |
| 66 | unsigned long entries[]; /* Variable-sized array of entries. */ |
| 67 | }; |
| 68 | |
| 69 | static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; |
| 70 | |
| 71 | static int depot_index; |
| 72 | static int next_slab_inited; |
| 73 | static size_t depot_offset; |
| 74 | static DEFINE_RAW_SPINLOCK(depot_lock); |
| 75 | |
| 76 | static bool init_stack_slab(void **prealloc) |
| 77 | { |
| 78 | if (!*prealloc) |
| 79 | return false; |
| 80 | /* |
| 81 | * This smp_load_acquire() pairs with smp_store_release() to |
| 82 | * |next_slab_inited| below and in depot_alloc_stack(). |
| 83 | */ |
| 84 | if (smp_load_acquire(&next_slab_inited)) |
| 85 | return true; |
| 86 | if (stack_slabs[depot_index] == NULL) { |
| 87 | stack_slabs[depot_index] = *prealloc; |
| 88 | *prealloc = NULL; |
| 89 | } else { |
| 90 | /* If this is the last depot slab, do not touch the next one. */ |
| 91 | if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { |
| 92 | stack_slabs[depot_index + 1] = *prealloc; |
| 93 | *prealloc = NULL; |
| 94 | } |
| 95 | /* |
| 96 | * This smp_store_release pairs with smp_load_acquire() from |
| 97 | * |next_slab_inited| above and in stack_depot_save(). |
| 98 | */ |
| 99 | smp_store_release(&next_slab_inited, 1); |
| 100 | } |
| 101 | return true; |
| 102 | } |
| 103 | |
| 104 | /* Allocation of a new stack in raw storage */ |
| 105 | static struct stack_record * |
| 106 | depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) |
| 107 | { |
| 108 | struct stack_record *stack; |
| 109 | size_t required_size = struct_size(stack, entries, size); |
| 110 | |
| 111 | required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN); |
| 112 | |
| 113 | if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) { |
| 114 | if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) { |
| 115 | WARN_ONCE(1, "Stack depot reached limit capacity"); |
| 116 | return NULL; |
| 117 | } |
| 118 | depot_index++; |
| 119 | depot_offset = 0; |
| 120 | /* |
| 121 | * smp_store_release() here pairs with smp_load_acquire() from |
| 122 | * |next_slab_inited| in stack_depot_save() and |
| 123 | * init_stack_slab(). |
| 124 | */ |
| 125 | if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) |
| 126 | smp_store_release(&next_slab_inited, 0); |
| 127 | } |
| 128 | init_stack_slab(prealloc); |
| 129 | if (stack_slabs[depot_index] == NULL) |
| 130 | return NULL; |
| 131 | |
| 132 | stack = stack_slabs[depot_index] + depot_offset; |
| 133 | |
| 134 | stack->hash = hash; |
| 135 | stack->size = size; |
| 136 | stack->handle.slabindex = depot_index; |
| 137 | stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN; |
| 138 | stack->handle.valid = 1; |
| 139 | memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); |
| 140 | depot_offset += required_size; |
| 141 | |
| 142 | return stack; |
| 143 | } |
| 144 | |
| 145 | #define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER) |
| 146 | #define STACK_HASH_MASK (STACK_HASH_SIZE - 1) |
| 147 | #define STACK_HASH_SEED 0x9747b28c |
| 148 | |
| 149 | static bool stack_depot_disable; |
| 150 | static struct stack_record **stack_table; |
| 151 | |
| 152 | static int __init is_stack_depot_disabled(char *str) |
| 153 | { |
| 154 | int ret; |
| 155 | |
| 156 | ret = kstrtobool(str, &stack_depot_disable); |
| 157 | if (!ret && stack_depot_disable) { |
| 158 | pr_info("Stack Depot is disabled\n"); |
| 159 | stack_table = NULL; |
| 160 | } |
| 161 | return 0; |
| 162 | } |
| 163 | early_param("stack_depot_disable", is_stack_depot_disabled); |
| 164 | |
| 165 | /* |
| 166 | * __ref because of memblock_alloc(), which will not be actually called after |
| 167 | * the __init code is gone, because at that point slab_is_available() is true |
| 168 | */ |
| 169 | __ref int stack_depot_init(void) |
| 170 | { |
| 171 | static DEFINE_MUTEX(stack_depot_init_mutex); |
| 172 | |
| 173 | mutex_lock(&stack_depot_init_mutex); |
| 174 | if (!stack_depot_disable && !stack_table) { |
| 175 | size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); |
| 176 | int i; |
| 177 | |
| 178 | if (slab_is_available()) { |
| 179 | pr_info("Stack Depot allocating hash table with kvmalloc\n"); |
| 180 | stack_table = kvmalloc(size, GFP_KERNEL); |
| 181 | } else { |
| 182 | pr_info("Stack Depot allocating hash table with memblock_alloc\n"); |
| 183 | stack_table = memblock_alloc(size, SMP_CACHE_BYTES); |
| 184 | } |
| 185 | if (stack_table) { |
| 186 | for (i = 0; i < STACK_HASH_SIZE; i++) |
| 187 | stack_table[i] = NULL; |
| 188 | } else { |
| 189 | pr_err("Stack Depot hash table allocation failed, disabling\n"); |
| 190 | stack_depot_disable = true; |
| 191 | mutex_unlock(&stack_depot_init_mutex); |
| 192 | return -ENOMEM; |
| 193 | } |
| 194 | } |
| 195 | mutex_unlock(&stack_depot_init_mutex); |
| 196 | return 0; |
| 197 | } |
| 198 | EXPORT_SYMBOL_GPL(stack_depot_init); |
| 199 | |
| 200 | /* Calculate hash for a stack */ |
| 201 | static inline u32 hash_stack(unsigned long *entries, unsigned int size) |
| 202 | { |
| 203 | return jhash2((u32 *)entries, |
| 204 | array_size(size, sizeof(*entries)) / sizeof(u32), |
| 205 | STACK_HASH_SEED); |
| 206 | } |
| 207 | |
| 208 | /* Use our own, non-instrumented version of memcmp(). |
| 209 | * |
| 210 | * We actually don't care about the order, just the equality. |
| 211 | */ |
| 212 | static inline |
| 213 | int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, |
| 214 | unsigned int n) |
| 215 | { |
| 216 | for ( ; n-- ; u1++, u2++) { |
| 217 | if (*u1 != *u2) |
| 218 | return 1; |
| 219 | } |
| 220 | return 0; |
| 221 | } |
| 222 | |
| 223 | /* Find a stack that is equal to the one stored in entries in the hash */ |
| 224 | static inline struct stack_record *find_stack(struct stack_record *bucket, |
| 225 | unsigned long *entries, int size, |
| 226 | u32 hash) |
| 227 | { |
| 228 | struct stack_record *found; |
| 229 | |
| 230 | for (found = bucket; found; found = found->next) { |
| 231 | if (found->hash == hash && |
| 232 | found->size == size && |
| 233 | !stackdepot_memcmp(entries, found->entries, size)) |
| 234 | return found; |
| 235 | } |
| 236 | return NULL; |
| 237 | } |
| 238 | |
| 239 | /** |
| 240 | * stack_depot_snprint - print stack entries from a depot into a buffer |
| 241 | * |
| 242 | * @handle: Stack depot handle which was returned from |
| 243 | * stack_depot_save(). |
| 244 | * @buf: Pointer to the print buffer |
| 245 | * |
| 246 | * @size: Size of the print buffer |
| 247 | * |
| 248 | * @spaces: Number of leading spaces to print |
| 249 | * |
| 250 | * Return: Number of bytes printed. |
| 251 | */ |
| 252 | int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, |
| 253 | int spaces) |
| 254 | { |
| 255 | unsigned long *entries; |
| 256 | unsigned int nr_entries; |
| 257 | |
| 258 | nr_entries = stack_depot_fetch(handle, &entries); |
| 259 | return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, |
| 260 | spaces) : 0; |
| 261 | } |
| 262 | EXPORT_SYMBOL_GPL(stack_depot_snprint); |
| 263 | |
| 264 | /** |
| 265 | * stack_depot_print - print stack entries from a depot |
| 266 | * |
| 267 | * @stack: Stack depot handle which was returned from |
| 268 | * stack_depot_save(). |
| 269 | * |
| 270 | */ |
| 271 | void stack_depot_print(depot_stack_handle_t stack) |
| 272 | { |
| 273 | unsigned long *entries; |
| 274 | unsigned int nr_entries; |
| 275 | |
| 276 | nr_entries = stack_depot_fetch(stack, &entries); |
| 277 | if (nr_entries > 0) |
| 278 | stack_trace_print(entries, nr_entries, 0); |
| 279 | } |
| 280 | EXPORT_SYMBOL_GPL(stack_depot_print); |
| 281 | |
| 282 | /** |
| 283 | * stack_depot_fetch - Fetch stack entries from a depot |
| 284 | * |
| 285 | * @handle: Stack depot handle which was returned from |
| 286 | * stack_depot_save(). |
| 287 | * @entries: Pointer to store the entries address |
| 288 | * |
| 289 | * Return: The number of trace entries for this depot. |
| 290 | */ |
| 291 | unsigned int stack_depot_fetch(depot_stack_handle_t handle, |
| 292 | unsigned long **entries) |
| 293 | { |
| 294 | union handle_parts parts = { .handle = handle }; |
| 295 | void *slab; |
| 296 | size_t offset = parts.offset << STACK_ALLOC_ALIGN; |
| 297 | struct stack_record *stack; |
| 298 | |
| 299 | *entries = NULL; |
| 300 | if (!handle) |
| 301 | return 0; |
| 302 | |
| 303 | if (parts.slabindex > depot_index) { |
| 304 | WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n", |
| 305 | parts.slabindex, depot_index, handle); |
| 306 | return 0; |
| 307 | } |
| 308 | slab = stack_slabs[parts.slabindex]; |
| 309 | if (!slab) |
| 310 | return 0; |
| 311 | stack = slab + offset; |
| 312 | |
| 313 | *entries = stack->entries; |
| 314 | return stack->size; |
| 315 | } |
| 316 | EXPORT_SYMBOL_GPL(stack_depot_fetch); |
| 317 | |
| 318 | /** |
| 319 | * __stack_depot_save - Save a stack trace from an array |
| 320 | * |
| 321 | * @entries: Pointer to storage array |
| 322 | * @nr_entries: Size of the storage array |
| 323 | * @alloc_flags: Allocation gfp flags |
| 324 | * @can_alloc: Allocate stack slabs (increased chance of failure if false) |
| 325 | * |
| 326 | * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is |
| 327 | * %true, is allowed to replenish the stack slab pool in case no space is left |
| 328 | * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids |
| 329 | * any allocations and will fail if no space is left to store the stack trace. |
| 330 | * |
| 331 | * Context: Any context, but setting @can_alloc to %false is required if |
| 332 | * alloc_pages() cannot be used from the current context. Currently |
| 333 | * this is the case from contexts where neither %GFP_ATOMIC nor |
| 334 | * %GFP_NOWAIT can be used (NMI, raw_spin_lock). |
| 335 | * |
| 336 | * Return: The handle of the stack struct stored in depot, 0 on failure. |
| 337 | */ |
| 338 | depot_stack_handle_t __stack_depot_save(unsigned long *entries, |
| 339 | unsigned int nr_entries, |
| 340 | gfp_t alloc_flags, bool can_alloc) |
| 341 | { |
| 342 | struct stack_record *found = NULL, **bucket; |
| 343 | depot_stack_handle_t retval = 0; |
| 344 | struct page *page = NULL; |
| 345 | void *prealloc = NULL; |
| 346 | unsigned long flags; |
| 347 | u32 hash; |
| 348 | |
| 349 | if (unlikely(nr_entries == 0) || stack_depot_disable) |
| 350 | goto fast_exit; |
| 351 | |
| 352 | hash = hash_stack(entries, nr_entries); |
| 353 | bucket = &stack_table[hash & STACK_HASH_MASK]; |
| 354 | |
| 355 | /* |
| 356 | * Fast path: look the stack trace up without locking. |
| 357 | * The smp_load_acquire() here pairs with smp_store_release() to |
| 358 | * |bucket| below. |
| 359 | */ |
| 360 | found = find_stack(smp_load_acquire(bucket), entries, |
| 361 | nr_entries, hash); |
| 362 | if (found) |
| 363 | goto exit; |
| 364 | |
| 365 | /* |
| 366 | * Check if the current or the next stack slab need to be initialized. |
| 367 | * If so, allocate the memory - we won't be able to do that under the |
| 368 | * lock. |
| 369 | * |
| 370 | * The smp_load_acquire() here pairs with smp_store_release() to |
| 371 | * |next_slab_inited| in depot_alloc_stack() and init_stack_slab(). |
| 372 | */ |
| 373 | if (unlikely(can_alloc && !smp_load_acquire(&next_slab_inited))) { |
| 374 | /* |
| 375 | * Zero out zone modifiers, as we don't have specific zone |
| 376 | * requirements. Keep the flags related to allocation in atomic |
| 377 | * contexts and I/O. |
| 378 | */ |
| 379 | alloc_flags &= ~GFP_ZONEMASK; |
| 380 | alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); |
| 381 | alloc_flags |= __GFP_NOWARN; |
| 382 | page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER); |
| 383 | if (page) |
| 384 | prealloc = page_address(page); |
| 385 | } |
| 386 | |
| 387 | raw_spin_lock_irqsave(&depot_lock, flags); |
| 388 | |
| 389 | found = find_stack(*bucket, entries, nr_entries, hash); |
| 390 | if (!found) { |
| 391 | struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc); |
| 392 | |
| 393 | if (new) { |
| 394 | new->next = *bucket; |
| 395 | /* |
| 396 | * This smp_store_release() pairs with |
| 397 | * smp_load_acquire() from |bucket| above. |
| 398 | */ |
| 399 | smp_store_release(bucket, new); |
| 400 | found = new; |
| 401 | } |
| 402 | } else if (prealloc) { |
| 403 | /* |
| 404 | * We didn't need to store this stack trace, but let's keep |
| 405 | * the preallocated memory for the future. |
| 406 | */ |
| 407 | WARN_ON(!init_stack_slab(&prealloc)); |
| 408 | } |
| 409 | |
| 410 | raw_spin_unlock_irqrestore(&depot_lock, flags); |
| 411 | exit: |
| 412 | if (prealloc) { |
| 413 | /* Nobody used this memory, ok to free it. */ |
| 414 | free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER); |
| 415 | } |
| 416 | if (found) |
| 417 | retval = found->handle.handle; |
| 418 | fast_exit: |
| 419 | return retval; |
| 420 | } |
| 421 | EXPORT_SYMBOL_GPL(__stack_depot_save); |
| 422 | |
| 423 | /** |
| 424 | * stack_depot_save - Save a stack trace from an array |
| 425 | * |
| 426 | * @entries: Pointer to storage array |
| 427 | * @nr_entries: Size of the storage array |
| 428 | * @alloc_flags: Allocation gfp flags |
| 429 | * |
| 430 | * Context: Contexts where allocations via alloc_pages() are allowed. |
| 431 | * See __stack_depot_save() for more details. |
| 432 | * |
| 433 | * Return: The handle of the stack struct stored in depot, 0 on failure. |
| 434 | */ |
| 435 | depot_stack_handle_t stack_depot_save(unsigned long *entries, |
| 436 | unsigned int nr_entries, |
| 437 | gfp_t alloc_flags) |
| 438 | { |
| 439 | return __stack_depot_save(entries, nr_entries, alloc_flags, true); |
| 440 | } |
| 441 | EXPORT_SYMBOL_GPL(stack_depot_save); |