Commit | Line | Data |
---|---|---|
bb359dbc AK |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * This file contains KASAN runtime code that manages shadow memory for | |
4 | * generic and software tag-based KASAN modes. | |
5 | * | |
6 | * Copyright (c) 2014 Samsung Electronics Co., Ltd. | |
7 | * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com> | |
8 | * | |
9 | * Some code borrowed from https://github.com/xairy/kasan-prototype by | |
10 | * Andrey Konovalov <andreyknvl@gmail.com> | |
11 | */ | |
12 | ||
13 | #include <linux/init.h> | |
14 | #include <linux/kasan.h> | |
15 | #include <linux/kernel.h> | |
2b830526 | 16 | #include <linux/kfence.h> |
bb359dbc AK |
17 | #include <linux/kmemleak.h> |
18 | #include <linux/memory.h> | |
19 | #include <linux/mm.h> | |
20 | #include <linux/string.h> | |
21 | #include <linux/types.h> | |
22 | #include <linux/vmalloc.h> | |
23 | ||
24 | #include <asm/cacheflush.h> | |
25 | #include <asm/tlbflush.h> | |
26 | ||
27 | #include "kasan.h" | |
28 | ||
29 | bool __kasan_check_read(const volatile void *p, unsigned int size) | |
30 | { | |
f00748bf | 31 | return kasan_check_range((unsigned long)p, size, false, _RET_IP_); |
bb359dbc AK |
32 | } |
33 | EXPORT_SYMBOL(__kasan_check_read); | |
34 | ||
35 | bool __kasan_check_write(const volatile void *p, unsigned int size) | |
36 | { | |
f00748bf | 37 | return kasan_check_range((unsigned long)p, size, true, _RET_IP_); |
bb359dbc AK |
38 | } |
39 | EXPORT_SYMBOL(__kasan_check_write); | |
40 | ||
41 | #undef memset | |
42 | void *memset(void *addr, int c, size_t len) | |
43 | { | |
f00748bf | 44 | if (!kasan_check_range((unsigned long)addr, len, true, _RET_IP_)) |
bb359dbc AK |
45 | return NULL; |
46 | ||
47 | return __memset(addr, c, len); | |
48 | } | |
49 | ||
50 | #ifdef __HAVE_ARCH_MEMMOVE | |
51 | #undef memmove | |
52 | void *memmove(void *dest, const void *src, size_t len) | |
53 | { | |
f00748bf AK |
54 | if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) || |
55 | !kasan_check_range((unsigned long)dest, len, true, _RET_IP_)) | |
bb359dbc AK |
56 | return NULL; |
57 | ||
58 | return __memmove(dest, src, len); | |
59 | } | |
60 | #endif | |
61 | ||
62 | #undef memcpy | |
63 | void *memcpy(void *dest, const void *src, size_t len) | |
64 | { | |
f00748bf AK |
65 | if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) || |
66 | !kasan_check_range((unsigned long)dest, len, true, _RET_IP_)) | |
bb359dbc AK |
67 | return NULL; |
68 | ||
69 | return __memcpy(dest, src, len); | |
70 | } | |
71 | ||
cde8a7eb | 72 | void kasan_poison(const void *addr, size_t size, u8 value) |
bb359dbc AK |
73 | { |
74 | void *shadow_start, *shadow_end; | |
75 | ||
76 | /* | |
77 | * Perform shadow offset calculation based on untagged address, as | |
78 | * some of the callers (e.g. kasan_poison_object_data) pass tagged | |
79 | * addresses to this function. | |
80 | */ | |
cde8a7eb | 81 | addr = kasan_reset_tag(addr); |
bb359dbc | 82 | |
2b830526 | 83 | /* Skip KFENCE memory if called explicitly outside of sl*b. */ |
cde8a7eb | 84 | if (is_kfence_address(addr)) |
2b830526 AP |
85 | return; |
86 | ||
cde8a7eb AK |
87 | if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK)) |
88 | return; | |
89 | if (WARN_ON(size & KASAN_GRANULE_MASK)) | |
90 | return; | |
91 | ||
92 | shadow_start = kasan_mem_to_shadow(addr); | |
93 | shadow_end = kasan_mem_to_shadow(addr + size); | |
bb359dbc AK |
94 | |
95 | __memset(shadow_start, value, shadow_end - shadow_start); | |
96 | } | |
573a4809 | 97 | EXPORT_SYMBOL(kasan_poison); |
bb359dbc | 98 | |
e2db1a9a | 99 | #ifdef CONFIG_KASAN_GENERIC |
cde8a7eb | 100 | void kasan_poison_last_granule(const void *addr, size_t size) |
e2db1a9a AK |
101 | { |
102 | if (size & KASAN_GRANULE_MASK) { | |
cde8a7eb | 103 | u8 *shadow = (u8 *)kasan_mem_to_shadow(addr + size); |
e2db1a9a AK |
104 | *shadow = size & KASAN_GRANULE_MASK; |
105 | } | |
106 | } | |
107 | #endif | |
108 | ||
cde8a7eb | 109 | void kasan_unpoison(const void *addr, size_t size) |
bb359dbc | 110 | { |
cde8a7eb | 111 | u8 tag = get_tag(addr); |
bb359dbc AK |
112 | |
113 | /* | |
114 | * Perform shadow offset calculation based on untagged address, as | |
115 | * some of the callers (e.g. kasan_unpoison_object_data) pass tagged | |
116 | * addresses to this function. | |
117 | */ | |
cde8a7eb | 118 | addr = kasan_reset_tag(addr); |
bb359dbc | 119 | |
2b830526 AP |
120 | /* |
121 | * Skip KFENCE memory if called explicitly outside of sl*b. Also note | |
122 | * that calls to ksize(), where size is not a multiple of machine-word | |
123 | * size, would otherwise poison the invalid portion of the word. | |
124 | */ | |
cde8a7eb AK |
125 | if (is_kfence_address(addr)) |
126 | return; | |
127 | ||
128 | if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK)) | |
2b830526 AP |
129 | return; |
130 | ||
cde8a7eb AK |
131 | /* Unpoison all granules that cover the object. */ |
132 | kasan_poison(addr, round_up(size, KASAN_GRANULE_SIZE), tag); | |
bb359dbc | 133 | |
e2db1a9a AK |
134 | /* Partially poison the last granule for the generic mode. */ |
135 | if (IS_ENABLED(CONFIG_KASAN_GENERIC)) | |
cde8a7eb | 136 | kasan_poison_last_granule(addr, size); |
bb359dbc AK |
137 | } |
138 | ||
139 | #ifdef CONFIG_MEMORY_HOTPLUG | |
140 | static bool shadow_mapped(unsigned long addr) | |
141 | { | |
142 | pgd_t *pgd = pgd_offset_k(addr); | |
143 | p4d_t *p4d; | |
144 | pud_t *pud; | |
145 | pmd_t *pmd; | |
146 | pte_t *pte; | |
147 | ||
148 | if (pgd_none(*pgd)) | |
149 | return false; | |
150 | p4d = p4d_offset(pgd, addr); | |
151 | if (p4d_none(*p4d)) | |
152 | return false; | |
153 | pud = pud_offset(p4d, addr); | |
154 | if (pud_none(*pud)) | |
155 | return false; | |
156 | ||
157 | /* | |
158 | * We can't use pud_large() or pud_huge(), the first one is | |
159 | * arch-specific, the last one depends on HUGETLB_PAGE. So let's abuse | |
160 | * pud_bad(), if pud is bad then it's bad because it's huge. | |
161 | */ | |
162 | if (pud_bad(*pud)) | |
163 | return true; | |
164 | pmd = pmd_offset(pud, addr); | |
165 | if (pmd_none(*pmd)) | |
166 | return false; | |
167 | ||
168 | if (pmd_bad(*pmd)) | |
169 | return true; | |
170 | pte = pte_offset_kernel(pmd, addr); | |
171 | return !pte_none(*pte); | |
172 | } | |
173 | ||
174 | static int __meminit kasan_mem_notifier(struct notifier_block *nb, | |
175 | unsigned long action, void *data) | |
176 | { | |
177 | struct memory_notify *mem_data = data; | |
178 | unsigned long nr_shadow_pages, start_kaddr, shadow_start; | |
179 | unsigned long shadow_end, shadow_size; | |
180 | ||
181 | nr_shadow_pages = mem_data->nr_pages >> KASAN_SHADOW_SCALE_SHIFT; | |
182 | start_kaddr = (unsigned long)pfn_to_kaddr(mem_data->start_pfn); | |
183 | shadow_start = (unsigned long)kasan_mem_to_shadow((void *)start_kaddr); | |
184 | shadow_size = nr_shadow_pages << PAGE_SHIFT; | |
185 | shadow_end = shadow_start + shadow_size; | |
186 | ||
187 | if (WARN_ON(mem_data->nr_pages % KASAN_GRANULE_SIZE) || | |
affc3f07 | 188 | WARN_ON(start_kaddr % KASAN_MEMORY_PER_SHADOW_PAGE)) |
bb359dbc AK |
189 | return NOTIFY_BAD; |
190 | ||
191 | switch (action) { | |
192 | case MEM_GOING_ONLINE: { | |
193 | void *ret; | |
194 | ||
195 | /* | |
196 | * If shadow is mapped already than it must have been mapped | |
197 | * during the boot. This could happen if we onlining previously | |
198 | * offlined memory. | |
199 | */ | |
200 | if (shadow_mapped(shadow_start)) | |
201 | return NOTIFY_OK; | |
202 | ||
203 | ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start, | |
204 | shadow_end, GFP_KERNEL, | |
205 | PAGE_KERNEL, VM_NO_GUARD, | |
206 | pfn_to_nid(mem_data->start_pfn), | |
207 | __builtin_return_address(0)); | |
208 | if (!ret) | |
209 | return NOTIFY_BAD; | |
210 | ||
211 | kmemleak_ignore(ret); | |
212 | return NOTIFY_OK; | |
213 | } | |
214 | case MEM_CANCEL_ONLINE: | |
215 | case MEM_OFFLINE: { | |
216 | struct vm_struct *vm; | |
217 | ||
218 | /* | |
219 | * shadow_start was either mapped during boot by kasan_init() | |
220 | * or during memory online by __vmalloc_node_range(). | |
221 | * In the latter case we can use vfree() to free shadow. | |
222 | * Non-NULL result of the find_vm_area() will tell us if | |
223 | * that was the second case. | |
224 | * | |
225 | * Currently it's not possible to free shadow mapped | |
226 | * during boot by kasan_init(). It's because the code | |
227 | * to do that hasn't been written yet. So we'll just | |
228 | * leak the memory. | |
229 | */ | |
230 | vm = find_vm_area((void *)shadow_start); | |
231 | if (vm) | |
232 | vfree((void *)shadow_start); | |
233 | } | |
234 | } | |
235 | ||
236 | return NOTIFY_OK; | |
237 | } | |
238 | ||
239 | static int __init kasan_memhotplug_init(void) | |
240 | { | |
241 | hotplug_memory_notifier(kasan_mem_notifier, 0); | |
242 | ||
243 | return 0; | |
244 | } | |
245 | ||
246 | core_initcall(kasan_memhotplug_init); | |
247 | #endif | |
248 | ||
249 | #ifdef CONFIG_KASAN_VMALLOC | |
250 | ||
251 | static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, | |
252 | void *unused) | |
253 | { | |
254 | unsigned long page; | |
255 | pte_t pte; | |
256 | ||
257 | if (likely(!pte_none(*ptep))) | |
258 | return 0; | |
259 | ||
260 | page = __get_free_page(GFP_KERNEL); | |
261 | if (!page) | |
262 | return -ENOMEM; | |
263 | ||
264 | memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE); | |
265 | pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL); | |
266 | ||
267 | spin_lock(&init_mm.page_table_lock); | |
268 | if (likely(pte_none(*ptep))) { | |
269 | set_pte_at(&init_mm, addr, ptep, pte); | |
270 | page = 0; | |
271 | } | |
272 | spin_unlock(&init_mm.page_table_lock); | |
273 | if (page) | |
274 | free_page(page); | |
275 | return 0; | |
276 | } | |
277 | ||
278 | int kasan_populate_vmalloc(unsigned long addr, unsigned long size) | |
279 | { | |
280 | unsigned long shadow_start, shadow_end; | |
281 | int ret; | |
282 | ||
283 | if (!is_vmalloc_or_module_addr((void *)addr)) | |
284 | return 0; | |
285 | ||
286 | shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr); | |
287 | shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE); | |
288 | shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size); | |
289 | shadow_end = ALIGN(shadow_end, PAGE_SIZE); | |
290 | ||
291 | ret = apply_to_page_range(&init_mm, shadow_start, | |
292 | shadow_end - shadow_start, | |
293 | kasan_populate_vmalloc_pte, NULL); | |
294 | if (ret) | |
295 | return ret; | |
296 | ||
297 | flush_cache_vmap(shadow_start, shadow_end); | |
298 | ||
299 | /* | |
300 | * We need to be careful about inter-cpu effects here. Consider: | |
301 | * | |
302 | * CPU#0 CPU#1 | |
303 | * WRITE_ONCE(p, vmalloc(100)); while (x = READ_ONCE(p)) ; | |
304 | * p[99] = 1; | |
305 | * | |
306 | * With compiler instrumentation, that ends up looking like this: | |
307 | * | |
308 | * CPU#0 CPU#1 | |
309 | * // vmalloc() allocates memory | |
310 | * // let a = area->addr | |
311 | * // we reach kasan_populate_vmalloc | |
f00748bf | 312 | * // and call kasan_unpoison: |
bb359dbc AK |
313 | * STORE shadow(a), unpoison_val |
314 | * ... | |
315 | * STORE shadow(a+99), unpoison_val x = LOAD p | |
316 | * // rest of vmalloc process <data dependency> | |
317 | * STORE p, a LOAD shadow(x+99) | |
318 | * | |
319 | * If there is no barrier between the end of unpoisioning the shadow | |
320 | * and the store of the result to p, the stores could be committed | |
321 | * in a different order by CPU#0, and CPU#1 could erroneously observe | |
322 | * poison in the shadow. | |
323 | * | |
324 | * We need some sort of barrier between the stores. | |
325 | * | |
326 | * In the vmalloc() case, this is provided by a smp_wmb() in | |
327 | * clear_vm_uninitialized_flag(). In the per-cpu allocator and in | |
328 | * get_vm_area() and friends, the caller gets shadow allocated but | |
329 | * doesn't have any pages mapped into the virtual address space that | |
330 | * has been reserved. Mapping those pages in will involve taking and | |
331 | * releasing a page-table lock, which will provide the barrier. | |
332 | */ | |
333 | ||
334 | return 0; | |
335 | } | |
336 | ||
337 | /* | |
338 | * Poison the shadow for a vmalloc region. Called as part of the | |
339 | * freeing process at the time the region is freed. | |
340 | */ | |
341 | void kasan_poison_vmalloc(const void *start, unsigned long size) | |
342 | { | |
343 | if (!is_vmalloc_or_module_addr(start)) | |
344 | return; | |
345 | ||
346 | size = round_up(size, KASAN_GRANULE_SIZE); | |
f00748bf | 347 | kasan_poison(start, size, KASAN_VMALLOC_INVALID); |
bb359dbc AK |
348 | } |
349 | ||
350 | void kasan_unpoison_vmalloc(const void *start, unsigned long size) | |
351 | { | |
352 | if (!is_vmalloc_or_module_addr(start)) | |
353 | return; | |
354 | ||
f00748bf | 355 | kasan_unpoison(start, size); |
bb359dbc AK |
356 | } |
357 | ||
358 | static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, | |
359 | void *unused) | |
360 | { | |
361 | unsigned long page; | |
362 | ||
363 | page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT); | |
364 | ||
365 | spin_lock(&init_mm.page_table_lock); | |
366 | ||
367 | if (likely(!pte_none(*ptep))) { | |
368 | pte_clear(&init_mm, addr, ptep); | |
369 | free_page(page); | |
370 | } | |
371 | spin_unlock(&init_mm.page_table_lock); | |
372 | ||
373 | return 0; | |
374 | } | |
375 | ||
376 | /* | |
377 | * Release the backing for the vmalloc region [start, end), which | |
378 | * lies within the free region [free_region_start, free_region_end). | |
379 | * | |
380 | * This can be run lazily, long after the region was freed. It runs | |
381 | * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap | |
382 | * infrastructure. | |
383 | * | |
384 | * How does this work? | |
385 | * ------------------- | |
386 | * | |
387 | * We have a region that is page aligned, labelled as A. | |
388 | * That might not map onto the shadow in a way that is page-aligned: | |
389 | * | |
390 | * start end | |
391 | * v v | |
392 | * |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc | |
393 | * -------- -------- -------- -------- -------- | |
394 | * | | | | | | |
395 | * | | | /-------/ | | |
396 | * \-------\|/------/ |/---------------/ | |
397 | * ||| || | |
398 | * |??AAAAAA|AAAAAAAA|AA??????| < shadow | |
399 | * (1) (2) (3) | |
400 | * | |
401 | * First we align the start upwards and the end downwards, so that the | |
402 | * shadow of the region aligns with shadow page boundaries. In the | |
403 | * example, this gives us the shadow page (2). This is the shadow entirely | |
404 | * covered by this allocation. | |
405 | * | |
406 | * Then we have the tricky bits. We want to know if we can free the | |
407 | * partially covered shadow pages - (1) and (3) in the example. For this, | |
408 | * we are given the start and end of the free region that contains this | |
409 | * allocation. Extending our previous example, we could have: | |
410 | * | |
411 | * free_region_start free_region_end | |
412 | * | start end | | |
413 | * v v v v | |
414 | * |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc | |
415 | * -------- -------- -------- -------- -------- | |
416 | * | | | | | | |
417 | * | | | /-------/ | | |
418 | * \-------\|/------/ |/---------------/ | |
419 | * ||| || | |
420 | * |FFAAAAAA|AAAAAAAA|AAF?????| < shadow | |
421 | * (1) (2) (3) | |
422 | * | |
423 | * Once again, we align the start of the free region up, and the end of | |
424 | * the free region down so that the shadow is page aligned. So we can free | |
425 | * page (1) - we know no allocation currently uses anything in that page, | |
426 | * because all of it is in the vmalloc free region. But we cannot free | |
427 | * page (3), because we can't be sure that the rest of it is unused. | |
428 | * | |
429 | * We only consider pages that contain part of the original region for | |
430 | * freeing: we don't try to free other pages from the free region or we'd | |
431 | * end up trying to free huge chunks of virtual address space. | |
432 | * | |
433 | * Concurrency | |
434 | * ----------- | |
435 | * | |
436 | * How do we know that we're not freeing a page that is simultaneously | |
437 | * being used for a fresh allocation in kasan_populate_vmalloc(_pte)? | |
438 | * | |
439 | * We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running | |
440 | * at the same time. While we run under free_vmap_area_lock, the population | |
441 | * code does not. | |
442 | * | |
443 | * free_vmap_area_lock instead operates to ensure that the larger range | |
444 | * [free_region_start, free_region_end) is safe: because __alloc_vmap_area and | |
445 | * the per-cpu region-finding algorithm both run under free_vmap_area_lock, | |
446 | * no space identified as free will become used while we are running. This | |
447 | * means that so long as we are careful with alignment and only free shadow | |
448 | * pages entirely covered by the free region, we will not run in to any | |
449 | * trouble - any simultaneous allocations will be for disjoint regions. | |
450 | */ | |
451 | void kasan_release_vmalloc(unsigned long start, unsigned long end, | |
452 | unsigned long free_region_start, | |
453 | unsigned long free_region_end) | |
454 | { | |
455 | void *shadow_start, *shadow_end; | |
456 | unsigned long region_start, region_end; | |
457 | unsigned long size; | |
458 | ||
affc3f07 AK |
459 | region_start = ALIGN(start, KASAN_MEMORY_PER_SHADOW_PAGE); |
460 | region_end = ALIGN_DOWN(end, KASAN_MEMORY_PER_SHADOW_PAGE); | |
bb359dbc | 461 | |
affc3f07 | 462 | free_region_start = ALIGN(free_region_start, KASAN_MEMORY_PER_SHADOW_PAGE); |
bb359dbc AK |
463 | |
464 | if (start != region_start && | |
465 | free_region_start < region_start) | |
affc3f07 | 466 | region_start -= KASAN_MEMORY_PER_SHADOW_PAGE; |
bb359dbc | 467 | |
affc3f07 | 468 | free_region_end = ALIGN_DOWN(free_region_end, KASAN_MEMORY_PER_SHADOW_PAGE); |
bb359dbc AK |
469 | |
470 | if (end != region_end && | |
471 | free_region_end > region_end) | |
affc3f07 | 472 | region_end += KASAN_MEMORY_PER_SHADOW_PAGE; |
bb359dbc AK |
473 | |
474 | shadow_start = kasan_mem_to_shadow((void *)region_start); | |
475 | shadow_end = kasan_mem_to_shadow((void *)region_end); | |
476 | ||
477 | if (shadow_end > shadow_start) { | |
478 | size = shadow_end - shadow_start; | |
479 | apply_to_existing_page_range(&init_mm, | |
480 | (unsigned long)shadow_start, | |
481 | size, kasan_depopulate_vmalloc_pte, | |
482 | NULL); | |
483 | flush_tlb_kernel_range((unsigned long)shadow_start, | |
484 | (unsigned long)shadow_end); | |
485 | } | |
486 | } | |
487 | ||
488 | #else /* CONFIG_KASAN_VMALLOC */ | |
489 | ||
490 | int kasan_module_alloc(void *addr, size_t size) | |
491 | { | |
492 | void *ret; | |
493 | size_t scaled_size; | |
494 | size_t shadow_size; | |
495 | unsigned long shadow_start; | |
496 | ||
497 | shadow_start = (unsigned long)kasan_mem_to_shadow(addr); | |
498 | scaled_size = (size + KASAN_GRANULE_SIZE - 1) >> | |
499 | KASAN_SHADOW_SCALE_SHIFT; | |
500 | shadow_size = round_up(scaled_size, PAGE_SIZE); | |
501 | ||
502 | if (WARN_ON(!PAGE_ALIGNED(shadow_start))) | |
503 | return -EINVAL; | |
504 | ||
505 | ret = __vmalloc_node_range(shadow_size, 1, shadow_start, | |
506 | shadow_start + shadow_size, | |
507 | GFP_KERNEL, | |
508 | PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, | |
509 | __builtin_return_address(0)); | |
510 | ||
511 | if (ret) { | |
512 | __memset(ret, KASAN_SHADOW_INIT, shadow_size); | |
513 | find_vm_area(addr)->flags |= VM_KASAN; | |
514 | kmemleak_ignore(ret); | |
515 | return 0; | |
516 | } | |
517 | ||
518 | return -ENOMEM; | |
519 | } | |
520 | ||
521 | void kasan_free_shadow(const struct vm_struct *vm) | |
522 | { | |
523 | if (vm->flags & VM_KASAN) | |
524 | vfree(kasan_mem_to_shadow(vm->addr)); | |
525 | } | |
526 | ||
527 | #endif |