Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
1da177e4 | 2 | /* |
2e892f43 CL |
3 | * Written by Mark Hemment, 1996 (markhe@nextd.demon.co.uk). |
4 | * | |
cde53535 | 5 | * (C) SGI 2006, Christoph Lameter |
2e892f43 CL |
6 | * Cleaned up and restructured to ease the addition of alternative |
7 | * implementations of SLAB allocators. | |
f1b6eb6e CL |
8 | * (C) Linux Foundation 2008-2013 |
9 | * Unified interface for all slab allocators | |
1da177e4 LT |
10 | */ |
11 | ||
12 | #ifndef _LINUX_SLAB_H | |
13 | #define _LINUX_SLAB_H | |
14 | ||
4ab5f8ec | 15 | #include <linux/cache.h> |
1b1cec4b | 16 | #include <linux/gfp.h> |
49b7f898 | 17 | #include <linux/overflow.h> |
1b1cec4b | 18 | #include <linux/types.h> |
1f458cbf | 19 | #include <linux/workqueue.h> |
f0a3a24b | 20 | #include <linux/percpu-refcount.h> |
54da6a09 | 21 | #include <linux/cleanup.h> |
3c615294 | 22 | #include <linux/hash.h> |
1f458cbf | 23 | |
cc61eb85 VB |
24 | enum _slab_flag_bits { |
25 | _SLAB_CONSISTENCY_CHECKS, | |
26 | _SLAB_RED_ZONE, | |
27 | _SLAB_POISON, | |
28 | _SLAB_KMALLOC, | |
29 | _SLAB_HWCACHE_ALIGN, | |
30 | _SLAB_CACHE_DMA, | |
31 | _SLAB_CACHE_DMA32, | |
32 | _SLAB_STORE_USER, | |
33 | _SLAB_PANIC, | |
34 | _SLAB_TYPESAFE_BY_RCU, | |
35 | _SLAB_TRACE, | |
36 | #ifdef CONFIG_DEBUG_OBJECTS | |
37 | _SLAB_DEBUG_OBJECTS, | |
38 | #endif | |
39 | _SLAB_NOLEAKTRACE, | |
40 | _SLAB_NO_MERGE, | |
41 | #ifdef CONFIG_FAILSLAB | |
42 | _SLAB_FAILSLAB, | |
43 | #endif | |
44 | #ifdef CONFIG_MEMCG_KMEM | |
45 | _SLAB_ACCOUNT, | |
46 | #endif | |
47 | #ifdef CONFIG_KASAN_GENERIC | |
48 | _SLAB_KASAN, | |
49 | #endif | |
50 | _SLAB_NO_USER_FLAGS, | |
51 | #ifdef CONFIG_KFENCE | |
52 | _SLAB_SKIP_KFENCE, | |
53 | #endif | |
54 | #ifndef CONFIG_SLUB_TINY | |
55 | _SLAB_RECLAIM_ACCOUNT, | |
56 | #endif | |
57 | _SLAB_OBJECT_POISON, | |
58 | _SLAB_CMPXCHG_DOUBLE, | |
59 | _SLAB_FLAGS_LAST_BIT | |
60 | }; | |
61 | ||
62 | #define __SLAB_FLAG_BIT(nr) ((slab_flags_t __force)(1U << (nr))) | |
63 | #define __SLAB_FLAG_UNUSED ((slab_flags_t __force)(0U)) | |
1da177e4 | 64 | |
2e892f43 CL |
65 | /* |
66 | * Flags to pass to kmem_cache_create(). | |
a9e0b9f2 | 67 | * The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op |
1da177e4 | 68 | */ |
d50112ed | 69 | /* DEBUG: Perform (expensive) checks on alloc/free */ |
cc61eb85 | 70 | #define SLAB_CONSISTENCY_CHECKS __SLAB_FLAG_BIT(_SLAB_CONSISTENCY_CHECKS) |
d50112ed | 71 | /* DEBUG: Red zone objs in a cache */ |
cc61eb85 | 72 | #define SLAB_RED_ZONE __SLAB_FLAG_BIT(_SLAB_RED_ZONE) |
d50112ed | 73 | /* DEBUG: Poison objects */ |
cc61eb85 | 74 | #define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON) |
6edf2576 | 75 | /* Indicate a kmalloc slab */ |
cc61eb85 | 76 | #define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC) |
d50112ed | 77 | /* Align objs on cache lines */ |
cc61eb85 | 78 | #define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) |
d50112ed | 79 | /* Use GFP_DMA memory */ |
cc61eb85 | 80 | #define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA) |
6d6ea1e9 | 81 | /* Use GFP_DMA32 memory */ |
cc61eb85 | 82 | #define SLAB_CACHE_DMA32 __SLAB_FLAG_BIT(_SLAB_CACHE_DMA32) |
d50112ed | 83 | /* DEBUG: Store the last owner for bug hunting */ |
cc61eb85 | 84 | #define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER) |
d50112ed | 85 | /* Panic if kmem_cache_create() fails */ |
cc61eb85 | 86 | #define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) |
d7de4c1d | 87 | /* |
5f0d5a3a | 88 | * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! |
d7de4c1d PZ |
89 | * |
90 | * This delays freeing the SLAB page by a grace period, it does _NOT_ | |
91 | * delay object freeing. This means that if you do kmem_cache_free() | |
92 | * that memory location is free to be reused at any time. Thus it may | |
93 | * be possible to see another object there in the same RCU grace period. | |
94 | * | |
95 | * This feature only ensures the memory location backing the object | |
96 | * stays valid, the trick to using this is relying on an independent | |
97 | * object validation pass. Something like: | |
98 | * | |
1143c9d9 | 99 | * begin: |
9ca73f26 | 100 | * rcu_read_lock(); |
d7de4c1d PZ |
101 | * obj = lockless_lookup(key); |
102 | * if (obj) { | |
103 | * if (!try_get_ref(obj)) // might fail for free objects | |
1143c9d9 SP |
104 | * rcu_read_unlock(); |
105 | * goto begin; | |
d7de4c1d PZ |
106 | * |
107 | * if (obj->key != key) { // not the object we expected | |
108 | * put_ref(obj); | |
1143c9d9 SP |
109 | * rcu_read_unlock(); |
110 | * goto begin; | |
d7de4c1d PZ |
111 | * } |
112 | * } | |
113 | * rcu_read_unlock(); | |
114 | * | |
68126702 JK |
115 | * This is useful if we need to approach a kernel structure obliquely, |
116 | * from its address obtained without the usual locking. We can lock | |
117 | * the structure to stabilize it and check it's still at the given address, | |
118 | * only if we can be sure that the memory has not been meanwhile reused | |
119 | * for some other kind of object (which our subsystem's lock might corrupt). | |
120 | * | |
121 | * rcu_read_lock before reading the address, then rcu_read_unlock after | |
122 | * taking the spinlock within the structure expected at that address. | |
5f0d5a3a | 123 | * |
e9f8a790 PM |
124 | * Note that it is not possible to acquire a lock within a structure |
125 | * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference | |
126 | * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages | |
127 | * are not zeroed before being given to the slab, which means that any | |
128 | * locks must be initialized after each and every kmem_struct_alloc(). | |
129 | * Alternatively, make the ctor passed to kmem_cache_create() initialize | |
130 | * the locks at page-allocation time, as is done in __i915_request_ctor(), | |
131 | * sighand_ctor(), and anon_vma_ctor(). Such a ctor permits readers | |
132 | * to safely acquire those ctor-initialized locks under rcu_read_lock() | |
133 | * protection. | |
134 | * | |
5f0d5a3a | 135 | * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. |
d7de4c1d | 136 | */ |
d50112ed | 137 | /* Defer freeing slabs to RCU */ |
cc61eb85 | 138 | #define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) |
d50112ed | 139 | /* Trace allocations and frees */ |
cc61eb85 | 140 | #define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE) |
1da177e4 | 141 | |
30327acf TG |
142 | /* Flag to prevent checks on free */ |
143 | #ifdef CONFIG_DEBUG_OBJECTS | |
cc61eb85 | 144 | # define SLAB_DEBUG_OBJECTS __SLAB_FLAG_BIT(_SLAB_DEBUG_OBJECTS) |
30327acf | 145 | #else |
cc61eb85 | 146 | # define SLAB_DEBUG_OBJECTS __SLAB_FLAG_UNUSED |
30327acf TG |
147 | #endif |
148 | ||
d50112ed | 149 | /* Avoid kmemleak tracing */ |
cc61eb85 | 150 | #define SLAB_NOLEAKTRACE __SLAB_FLAG_BIT(_SLAB_NOLEAKTRACE) |
d5cff635 | 151 | |
d0bf7d57 JDB |
152 | /* |
153 | * Prevent merging with compatible kmem caches. This flag should be used | |
154 | * cautiously. Valid use cases: | |
155 | * | |
156 | * - caches created for self-tests (e.g. kunit) | |
157 | * - general caches created and used by a subsystem, only when a | |
158 | * (subsystem-specific) debug option is enabled | |
159 | * - performance critical caches, should be very rare and consulted with slab | |
160 | * maintainers, and not used together with CONFIG_SLUB_TINY | |
161 | */ | |
cc61eb85 | 162 | #define SLAB_NO_MERGE __SLAB_FLAG_BIT(_SLAB_NO_MERGE) |
d0bf7d57 | 163 | |
d50112ed | 164 | /* Fault injection mark */ |
4c13dd3b | 165 | #ifdef CONFIG_FAILSLAB |
cc61eb85 | 166 | # define SLAB_FAILSLAB __SLAB_FLAG_BIT(_SLAB_FAILSLAB) |
4c13dd3b | 167 | #else |
cc61eb85 | 168 | # define SLAB_FAILSLAB __SLAB_FLAG_UNUSED |
4c13dd3b | 169 | #endif |
d50112ed | 170 | /* Account to memcg */ |
84c07d11 | 171 | #ifdef CONFIG_MEMCG_KMEM |
cc61eb85 | 172 | # define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) |
230e9fc2 | 173 | #else |
cc61eb85 | 174 | # define SLAB_ACCOUNT __SLAB_FLAG_UNUSED |
230e9fc2 | 175 | #endif |
2dff4405 | 176 | |
682ed089 | 177 | #ifdef CONFIG_KASAN_GENERIC |
cc61eb85 | 178 | #define SLAB_KASAN __SLAB_FLAG_BIT(_SLAB_KASAN) |
7ed2f9e6 | 179 | #else |
cc61eb85 | 180 | #define SLAB_KASAN __SLAB_FLAG_UNUSED |
7ed2f9e6 AP |
181 | #endif |
182 | ||
a285909f HY |
183 | /* |
184 | * Ignore user specified debugging flags. | |
185 | * Intended for caches created for self-tests so they have only flags | |
186 | * specified in the code and other flags are ignored. | |
187 | */ | |
cc61eb85 | 188 | #define SLAB_NO_USER_FLAGS __SLAB_FLAG_BIT(_SLAB_NO_USER_FLAGS) |
a285909f | 189 | |
b84e04f1 | 190 | #ifdef CONFIG_KFENCE |
cc61eb85 | 191 | #define SLAB_SKIP_KFENCE __SLAB_FLAG_BIT(_SLAB_SKIP_KFENCE) |
b84e04f1 | 192 | #else |
cc61eb85 | 193 | #define SLAB_SKIP_KFENCE __SLAB_FLAG_UNUSED |
b84e04f1 IK |
194 | #endif |
195 | ||
e12ba74d | 196 | /* The following flags affect the page allocator grouping pages by mobility */ |
d50112ed | 197 | /* Objects are reclaimable */ |
3d97d976 | 198 | #ifndef CONFIG_SLUB_TINY |
cc61eb85 | 199 | #define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT) |
3d97d976 | 200 | #else |
cc61eb85 | 201 | #define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_UNUSED |
3d97d976 | 202 | #endif |
e12ba74d | 203 | #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ |
fcf8a1e4 | 204 | |
6cb8f913 CL |
205 | /* |
206 | * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. | |
207 | * | |
208 | * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault. | |
209 | * | |
210 | * ZERO_SIZE_PTR can be passed to kfree though in the same way that NULL can. | |
211 | * Both make kfree a no-op. | |
212 | */ | |
213 | #define ZERO_SIZE_PTR ((void *)16) | |
214 | ||
1d4ec7b1 | 215 | #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ |
6cb8f913 CL |
216 | (unsigned long)ZERO_SIZE_PTR) |
217 | ||
0316bec2 | 218 | #include <linux/kasan.h> |
3b0efdfa | 219 | |
88f2ef73 | 220 | struct list_lru; |
2633d7a0 | 221 | struct mem_cgroup; |
2e892f43 CL |
222 | /* |
223 | * struct kmem_cache related prototypes | |
224 | */ | |
fda90124 | 225 | bool slab_is_available(void); |
1da177e4 | 226 | |
f4957d5b AD |
227 | struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, |
228 | unsigned int align, slab_flags_t flags, | |
8eb8284b DW |
229 | void (*ctor)(void *)); |
230 | struct kmem_cache *kmem_cache_create_usercopy(const char *name, | |
f4957d5b AD |
231 | unsigned int size, unsigned int align, |
232 | slab_flags_t flags, | |
7bbdb81e | 233 | unsigned int useroffset, unsigned int usersize, |
8eb8284b | 234 | void (*ctor)(void *)); |
72d67229 KC |
235 | void kmem_cache_destroy(struct kmem_cache *s); |
236 | int kmem_cache_shrink(struct kmem_cache *s); | |
2a4db7eb | 237 | |
0a31bd5f CL |
238 | /* |
239 | * Please use this macro to create slab caches. Simply specify the | |
240 | * name of the structure and maybe some flags that are listed above. | |
241 | * | |
242 | * The alignment of the struct determines object alignment. If you | |
243 | * f.e. add ____cacheline_aligned_in_smp to the struct declaration | |
244 | * then the objects will be properly aligned in SMP configurations. | |
245 | */ | |
8eb8284b DW |
246 | #define KMEM_CACHE(__struct, __flags) \ |
247 | kmem_cache_create(#__struct, sizeof(struct __struct), \ | |
248 | __alignof__(struct __struct), (__flags), NULL) | |
249 | ||
250 | /* | |
251 | * To whitelist a single field for copying to/from usercopy, use this | |
252 | * macro instead for KMEM_CACHE() above. | |
253 | */ | |
254 | #define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ | |
255 | kmem_cache_create_usercopy(#__struct, \ | |
256 | sizeof(struct __struct), \ | |
257 | __alignof__(struct __struct), (__flags), \ | |
258 | offsetof(struct __struct, __field), \ | |
259 | sizeof_field(struct __struct, __field), NULL) | |
0a31bd5f | 260 | |
34504667 CL |
261 | /* |
262 | * Common kmalloc functions provided by all allocators | |
263 | */ | |
9ed9cac1 | 264 | void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __realloc_size(2); |
72d67229 KC |
265 | void kfree(const void *objp); |
266 | void kfree_sensitive(const void *objp); | |
267 | size_t __ksize(const void *objp); | |
05a94065 | 268 | |
54da6a09 PZ |
269 | DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) |
270 | ||
05a94065 KC |
271 | /** |
272 | * ksize - Report actual allocation size of associated object | |
273 | * | |
274 | * @objp: Pointer returned from a prior kmalloc()-family allocation. | |
275 | * | |
276 | * This should not be used for writing beyond the originally requested | |
277 | * allocation size. Either use krealloc() or round up the allocation size | |
278 | * with kmalloc_size_roundup() prior to allocation. If this is used to | |
279 | * access beyond the originally requested allocation size, UBSAN_BOUNDS | |
280 | * and/or FORTIFY_SOURCE may trip, since they only know about the | |
281 | * originally allocated size via the __alloc_size attribute. | |
282 | */ | |
72d67229 | 283 | size_t ksize(const void *objp); |
05a94065 | 284 | |
5bb1bb35 | 285 | #ifdef CONFIG_PRINTK |
6e284c55 ZL |
286 | bool kmem_dump_obj(void *object); |
287 | #else | |
288 | static inline bool kmem_dump_obj(void *object) { return false; } | |
5bb1bb35 | 289 | #endif |
34504667 | 290 | |
c601fd69 CL |
291 | /* |
292 | * Some archs want to perform DMA into kmalloc caches and need a guaranteed | |
293 | * alignment larger than the alignment of a 64-bit integer. | |
8cf9e121 | 294 | * Setting ARCH_DMA_MINALIGN in arch headers allows that. |
c601fd69 | 295 | */ |
4ab5f8ec CM |
296 | #ifdef ARCH_HAS_DMA_MINALIGN |
297 | #if ARCH_DMA_MINALIGN > 8 && !defined(ARCH_KMALLOC_MINALIGN) | |
c601fd69 | 298 | #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN |
4ab5f8ec CM |
299 | #endif |
300 | #endif | |
301 | ||
302 | #ifndef ARCH_KMALLOC_MINALIGN | |
c601fd69 | 303 | #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) |
4ab5f8ec CM |
304 | #elif ARCH_KMALLOC_MINALIGN > 8 |
305 | #define KMALLOC_MIN_SIZE ARCH_KMALLOC_MINALIGN | |
306 | #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) | |
c601fd69 CL |
307 | #endif |
308 | ||
94a58c36 RV |
309 | /* |
310 | * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. | |
311 | * Intended for arches that get misalignment faults even for 64 bit integer | |
312 | * aligned buffers. | |
313 | */ | |
314 | #ifndef ARCH_SLAB_MINALIGN | |
315 | #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) | |
316 | #endif | |
317 | ||
d949a815 PC |
318 | /* |
319 | * Arches can define this function if they want to decide the minimum slab | |
320 | * alignment at runtime. The value returned by the function must be a power | |
321 | * of two and >= ARCH_SLAB_MINALIGN. | |
322 | */ | |
323 | #ifndef arch_slab_minalign | |
324 | static inline unsigned int arch_slab_minalign(void) | |
325 | { | |
326 | return ARCH_SLAB_MINALIGN; | |
327 | } | |
328 | #endif | |
329 | ||
94a58c36 | 330 | /* |
154036a3 AK |
331 | * kmem_cache_alloc and friends return pointers aligned to ARCH_SLAB_MINALIGN. |
332 | * kmalloc and friends return pointers aligned to both ARCH_KMALLOC_MINALIGN | |
333 | * and ARCH_SLAB_MINALIGN, but here we only assume the former alignment. | |
94a58c36 RV |
334 | */ |
335 | #define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN) | |
336 | #define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN) | |
337 | #define __assume_page_alignment __assume_aligned(PAGE_SIZE) | |
338 | ||
0aa817f0 | 339 | /* |
95a05b42 CL |
340 | * Kmalloc array related definitions |
341 | */ | |
342 | ||
95a05b42 | 343 | /* |
a9e0b9f2 | 344 | * SLUB directly allocates requests fitting in to an order-1 page |
d6a71648 | 345 | * (PAGE_SIZE*2). Larger requests are passed to the page allocator. |
0aa817f0 | 346 | */ |
d6a71648 | 347 | #define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1) |
5e0a760b | 348 | #define KMALLOC_SHIFT_MAX (MAX_PAGE_ORDER + PAGE_SHIFT) |
c601fd69 | 349 | #ifndef KMALLOC_SHIFT_LOW |
95a05b42 CL |
350 | #define KMALLOC_SHIFT_LOW 3 |
351 | #endif | |
0aa817f0 | 352 | |
95a05b42 CL |
353 | /* Maximum allocatable size */ |
354 | #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) | |
355 | /* Maximum size for which we actually use a slab cache */ | |
356 | #define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH) | |
d7cff4de | 357 | /* Maximum order allocatable via the slab allocator */ |
95a05b42 | 358 | #define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_MAX - PAGE_SHIFT) |
0aa817f0 | 359 | |
ce6a5026 CL |
360 | /* |
361 | * Kmalloc subsystem. | |
362 | */ | |
c601fd69 | 363 | #ifndef KMALLOC_MIN_SIZE |
95a05b42 | 364 | #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) |
ce6a5026 CL |
365 | #endif |
366 | ||
24f870d8 JK |
367 | /* |
368 | * This restriction comes from byte sized index implementation. | |
369 | * Page size is normally 2^12 bytes and, in this case, if we want to use | |
370 | * byte sized index which can represent 2^8 entries, the size of the object | |
371 | * should be equal or greater to 2^12 / 2^8 = 2^4 = 16. | |
372 | * If minimum size of kmalloc is less than 16, we use it as minimum object | |
373 | * size and give up to use byte sized index. | |
374 | */ | |
375 | #define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ | |
376 | (KMALLOC_MIN_SIZE) : 16) | |
377 | ||
3c615294 GR |
378 | #ifdef CONFIG_RANDOM_KMALLOC_CACHES |
379 | #define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies | |
380 | #else | |
381 | #define RANDOM_KMALLOC_CACHES_NR 0 | |
382 | #endif | |
383 | ||
1291523f VB |
384 | /* |
385 | * Whenever changing this, take care of that kmalloc_type() and | |
386 | * create_kmalloc_caches() still work as intended. | |
494c1dfe WL |
387 | * |
388 | * KMALLOC_NORMAL can contain only unaccounted objects whereas KMALLOC_CGROUP | |
389 | * is for accounted but unreclaimable and non-dma objects. All the other | |
390 | * kmem caches can have both accounted and unaccounted objects. | |
1291523f | 391 | */ |
cc252eae VB |
392 | enum kmalloc_cache_type { |
393 | KMALLOC_NORMAL = 0, | |
494c1dfe WL |
394 | #ifndef CONFIG_ZONE_DMA |
395 | KMALLOC_DMA = KMALLOC_NORMAL, | |
396 | #endif | |
397 | #ifndef CONFIG_MEMCG_KMEM | |
398 | KMALLOC_CGROUP = KMALLOC_NORMAL, | |
494c1dfe | 399 | #endif |
3c615294 GR |
400 | KMALLOC_RANDOM_START = KMALLOC_NORMAL, |
401 | KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR, | |
2f7c1c13 VB |
402 | #ifdef CONFIG_SLUB_TINY |
403 | KMALLOC_RECLAIM = KMALLOC_NORMAL, | |
404 | #else | |
1291523f | 405 | KMALLOC_RECLAIM, |
2f7c1c13 | 406 | #endif |
cc252eae VB |
407 | #ifdef CONFIG_ZONE_DMA |
408 | KMALLOC_DMA, | |
2f7c1c13 VB |
409 | #endif |
410 | #ifdef CONFIG_MEMCG_KMEM | |
411 | KMALLOC_CGROUP, | |
cc252eae VB |
412 | #endif |
413 | NR_KMALLOC_TYPES | |
414 | }; | |
415 | ||
cc252eae VB |
416 | extern struct kmem_cache * |
417 | kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; | |
418 | ||
494c1dfe WL |
419 | /* |
420 | * Define gfp bits that should not be set for KMALLOC_NORMAL. | |
421 | */ | |
422 | #define KMALLOC_NOT_NORMAL_BITS \ | |
423 | (__GFP_RECLAIMABLE | \ | |
424 | (IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \ | |
425 | (IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0)) | |
426 | ||
3c615294 GR |
427 | extern unsigned long random_kmalloc_seed; |
428 | ||
429 | static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller) | |
cc252eae | 430 | { |
4e45f712 VB |
431 | /* |
432 | * The most common case is KMALLOC_NORMAL, so test for it | |
494c1dfe | 433 | * with a single branch for all the relevant flags. |
4e45f712 | 434 | */ |
494c1dfe | 435 | if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0)) |
3c615294 GR |
436 | #ifdef CONFIG_RANDOM_KMALLOC_CACHES |
437 | /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */ | |
438 | return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed, | |
439 | ilog2(RANDOM_KMALLOC_CACHES_NR + 1)); | |
440 | #else | |
4e45f712 | 441 | return KMALLOC_NORMAL; |
3c615294 | 442 | #endif |
1291523f VB |
443 | |
444 | /* | |
494c1dfe WL |
445 | * At least one of the flags has to be set. Their priorities in |
446 | * decreasing order are: | |
447 | * 1) __GFP_DMA | |
448 | * 2) __GFP_RECLAIMABLE | |
449 | * 3) __GFP_ACCOUNT | |
1291523f | 450 | */ |
494c1dfe WL |
451 | if (IS_ENABLED(CONFIG_ZONE_DMA) && (flags & __GFP_DMA)) |
452 | return KMALLOC_DMA; | |
453 | if (!IS_ENABLED(CONFIG_MEMCG_KMEM) || (flags & __GFP_RECLAIMABLE)) | |
454 | return KMALLOC_RECLAIM; | |
455 | else | |
456 | return KMALLOC_CGROUP; | |
cc252eae VB |
457 | } |
458 | ||
ce6a5026 CL |
459 | /* |
460 | * Figure out which kmalloc slab an allocation of a certain size | |
461 | * belongs to. | |
462 | * 0 = zero alloc | |
463 | * 1 = 65 .. 96 bytes | |
1ed58b60 RV |
464 | * 2 = 129 .. 192 bytes |
465 | * n = 2^(n-1)+1 .. 2^n | |
588c7fa0 HY |
466 | * |
467 | * Note: __kmalloc_index() is compile-time optimized, and not runtime optimized; | |
468 | * typical usage is via kmalloc_index() and therefore evaluated at compile-time. | |
469 | * Callers where !size_is_constant should only be test modules, where runtime | |
470 | * overheads of __kmalloc_index() can be tolerated. Also see kmalloc_slab(). | |
ce6a5026 | 471 | */ |
588c7fa0 HY |
472 | static __always_inline unsigned int __kmalloc_index(size_t size, |
473 | bool size_is_constant) | |
ce6a5026 CL |
474 | { |
475 | if (!size) | |
476 | return 0; | |
477 | ||
478 | if (size <= KMALLOC_MIN_SIZE) | |
479 | return KMALLOC_SHIFT_LOW; | |
480 | ||
481 | if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96) | |
482 | return 1; | |
483 | if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192) | |
484 | return 2; | |
485 | if (size <= 8) return 3; | |
486 | if (size <= 16) return 4; | |
487 | if (size <= 32) return 5; | |
488 | if (size <= 64) return 6; | |
489 | if (size <= 128) return 7; | |
490 | if (size <= 256) return 8; | |
491 | if (size <= 512) return 9; | |
492 | if (size <= 1024) return 10; | |
493 | if (size <= 2 * 1024) return 11; | |
494 | if (size <= 4 * 1024) return 12; | |
495 | if (size <= 8 * 1024) return 13; | |
496 | if (size <= 16 * 1024) return 14; | |
497 | if (size <= 32 * 1024) return 15; | |
498 | if (size <= 64 * 1024) return 16; | |
499 | if (size <= 128 * 1024) return 17; | |
500 | if (size <= 256 * 1024) return 18; | |
501 | if (size <= 512 * 1024) return 19; | |
502 | if (size <= 1024 * 1024) return 20; | |
503 | if (size <= 2 * 1024 * 1024) return 21; | |
588c7fa0 | 504 | |
57b2b72a | 505 | if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant) |
588c7fa0 HY |
506 | BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()"); |
507 | else | |
508 | BUG(); | |
ce6a5026 CL |
509 | |
510 | /* Will never be reached. Needed because the compiler may complain */ | |
511 | return -1; | |
512 | } | |
d6a71648 | 513 | static_assert(PAGE_SHIFT <= 20); |
588c7fa0 | 514 | #define kmalloc_index(s) __kmalloc_index(s, true) |
ce6a5026 | 515 | |
c37495d6 | 516 | void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); |
838de63b VB |
517 | |
518 | /** | |
519 | * kmem_cache_alloc - Allocate an object | |
520 | * @cachep: The cache to allocate from. | |
521 | * @flags: See kmalloc(). | |
522 | * | |
523 | * Allocate an object from this cache. | |
524 | * See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags. | |
525 | * | |
526 | * Return: pointer to the new object or %NULL in case of error | |
527 | */ | |
528 | void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc; | |
88f2ef73 MS |
529 | void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, |
530 | gfp_t gfpflags) __assume_slab_alignment __malloc; | |
72d67229 | 531 | void kmem_cache_free(struct kmem_cache *s, void *objp); |
f1b6eb6e | 532 | |
484748f0 | 533 | /* |
9f706d68 | 534 | * Bulk allocation and freeing operations. These are accelerated in an |
484748f0 CL |
535 | * allocator specific way to avoid taking locks repeatedly or building |
536 | * metadata structures unnecessarily. | |
537 | * | |
538 | * Note that interrupts must be enabled when calling these functions. | |
539 | */ | |
72d67229 KC |
540 | void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); |
541 | int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p); | |
484748f0 | 542 | |
ca257195 JDB |
543 | static __always_inline void kfree_bulk(size_t size, void **p) |
544 | { | |
545 | kmem_cache_free_bulk(NULL, size, p); | |
546 | } | |
547 | ||
c37495d6 KC |
548 | void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment |
549 | __alloc_size(1); | |
72d67229 KC |
550 | void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment |
551 | __malloc; | |
f1b6eb6e | 552 | |
26a40990 HY |
553 | void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) |
554 | __assume_kmalloc_alignment __alloc_size(3); | |
f1b6eb6e | 555 | |
26a40990 HY |
556 | void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, |
557 | int node, size_t size) __assume_kmalloc_alignment | |
558 | __alloc_size(4); | |
e4c98d68 HY |
559 | void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment |
560 | __alloc_size(1); | |
a0c3b940 HY |
561 | |
562 | void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_alignment | |
563 | __alloc_size(1); | |
564 | ||
f1b6eb6e | 565 | /** |
838de63b | 566 | * kmalloc - allocate kernel memory |
f1b6eb6e | 567 | * @size: how many bytes of memory are required. |
838de63b | 568 | * @flags: describe the allocation context |
f1b6eb6e CL |
569 | * |
570 | * kmalloc is the normal method of allocating memory | |
571 | * for objects smaller than page size in the kernel. | |
7e3528c3 | 572 | * |
59bb4798 VB |
573 | * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN |
574 | * bytes. For @size of power of two bytes, the alignment is also guaranteed | |
575 | * to be at least to the size. | |
576 | * | |
01598ba6 | 577 | * The @flags argument may be one of the GFP flags defined at |
e9d198f2 | 578 | * include/linux/gfp_types.h and described at |
01598ba6 | 579 | * :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>` |
7e3528c3 | 580 | * |
01598ba6 | 581 | * The recommended usage of the @flags is described at |
2370ae4b | 582 | * :ref:`Documentation/core-api/memory-allocation.rst <memory_allocation>` |
7e3528c3 | 583 | * |
01598ba6 | 584 | * Below is a brief outline of the most useful GFP flags |
7e3528c3 | 585 | * |
01598ba6 MR |
586 | * %GFP_KERNEL |
587 | * Allocate normal kernel ram. May sleep. | |
7e3528c3 | 588 | * |
01598ba6 MR |
589 | * %GFP_NOWAIT |
590 | * Allocation will not sleep. | |
7e3528c3 | 591 | * |
01598ba6 MR |
592 | * %GFP_ATOMIC |
593 | * Allocation will not sleep. May use emergency pools. | |
7e3528c3 | 594 | * |
7e3528c3 RD |
595 | * Also it is possible to set different flags by OR'ing |
596 | * in one or more of the following additional @flags: | |
597 | * | |
838de63b VB |
598 | * %__GFP_ZERO |
599 | * Zero the allocated memory before returning. Also see kzalloc(). | |
600 | * | |
01598ba6 MR |
601 | * %__GFP_HIGH |
602 | * This allocation has high priority and may use emergency pools. | |
7e3528c3 | 603 | * |
01598ba6 MR |
604 | * %__GFP_NOFAIL |
605 | * Indicate that this allocation is in no way allowed to fail | |
606 | * (think twice before using). | |
7e3528c3 | 607 | * |
01598ba6 MR |
608 | * %__GFP_NORETRY |
609 | * If memory is not immediately available, | |
610 | * then give up at once. | |
7e3528c3 | 611 | * |
01598ba6 MR |
612 | * %__GFP_NOWARN |
613 | * If allocation fails, don't issue any warnings. | |
7e3528c3 | 614 | * |
01598ba6 MR |
615 | * %__GFP_RETRY_MAYFAIL |
616 | * Try really hard to succeed the allocation but fail | |
617 | * eventually. | |
f1b6eb6e | 618 | */ |
c37495d6 | 619 | static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) |
f1b6eb6e | 620 | { |
6fa57d78 | 621 | if (__builtin_constant_p(size) && size) { |
cc252eae | 622 | unsigned int index; |
3bf01933 | 623 | |
f1b6eb6e CL |
624 | if (size > KMALLOC_MAX_CACHE_SIZE) |
625 | return kmalloc_large(size, flags); | |
f1b6eb6e | 626 | |
cc252eae | 627 | index = kmalloc_index(size); |
26a40990 | 628 | return kmalloc_trace( |
3c615294 | 629 | kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], |
cc252eae | 630 | flags, size); |
f1b6eb6e CL |
631 | } |
632 | return __kmalloc(size, flags); | |
633 | } | |
634 | ||
c37495d6 | 635 | static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) |
f1b6eb6e | 636 | { |
6fa57d78 | 637 | if (__builtin_constant_p(size) && size) { |
bf37d791 | 638 | unsigned int index; |
f1b6eb6e | 639 | |
bf37d791 HY |
640 | if (size > KMALLOC_MAX_CACHE_SIZE) |
641 | return kmalloc_large_node(size, flags, node); | |
642 | ||
643 | index = kmalloc_index(size); | |
26a40990 | 644 | return kmalloc_node_trace( |
3c615294 | 645 | kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], |
26a40990 | 646 | flags, node, size); |
f1b6eb6e | 647 | } |
f1b6eb6e CL |
648 | return __kmalloc_node(size, flags, node); |
649 | } | |
650 | ||
e7efa615 MO |
651 | /** |
652 | * kmalloc_array - allocate memory for an array. | |
653 | * @n: number of elements. | |
654 | * @size: element size. | |
655 | * @flags: the type of memory to allocate (see kmalloc). | |
800590f5 | 656 | */ |
c37495d6 | 657 | static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_t flags) |
1da177e4 | 658 | { |
49b7f898 KC |
659 | size_t bytes; |
660 | ||
661 | if (unlikely(check_mul_overflow(n, size, &bytes))) | |
6193a2ff | 662 | return NULL; |
91c6a05f | 663 | if (__builtin_constant_p(n) && __builtin_constant_p(size)) |
49b7f898 KC |
664 | return kmalloc(bytes, flags); |
665 | return __kmalloc(bytes, flags); | |
a8203725 XW |
666 | } |
667 | ||
f0dbd2bd BG |
668 | /** |
669 | * krealloc_array - reallocate memory for an array. | |
670 | * @p: pointer to the memory chunk to reallocate | |
671 | * @new_n: new number of elements to alloc | |
672 | * @new_size: new size of a single member of the array | |
673 | * @flags: the type of memory to allocate (see kmalloc) | |
674 | */ | |
9ed9cac1 KC |
675 | static inline __realloc_size(2, 3) void * __must_check krealloc_array(void *p, |
676 | size_t new_n, | |
677 | size_t new_size, | |
678 | gfp_t flags) | |
f0dbd2bd BG |
679 | { |
680 | size_t bytes; | |
681 | ||
682 | if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) | |
683 | return NULL; | |
684 | ||
685 | return krealloc(p, bytes, flags); | |
686 | } | |
687 | ||
a8203725 XW |
688 | /** |
689 | * kcalloc - allocate memory for an array. The memory is set to zero. | |
690 | * @n: number of elements. | |
691 | * @size: element size. | |
692 | * @flags: the type of memory to allocate (see kmalloc). | |
693 | */ | |
c37495d6 | 694 | static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flags) |
a8203725 XW |
695 | { |
696 | return kmalloc_array(n, size, flags | __GFP_ZERO); | |
1da177e4 LT |
697 | } |
698 | ||
c45248db HY |
699 | void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, |
700 | unsigned long caller) __alloc_size(1); | |
701 | #define kmalloc_node_track_caller(size, flags, node) \ | |
702 | __kmalloc_node_track_caller(size, flags, node, \ | |
703 | _RET_IP_) | |
704 | ||
1d2c8eea CH |
705 | /* |
706 | * kmalloc_track_caller is a special version of kmalloc that records the | |
707 | * calling function of the routine calling it for slab leak tracking instead | |
708 | * of just the calling function (confusing, eh?). | |
709 | * It's useful when the call to kmalloc comes from a widely-used standard | |
710 | * allocator where we care about the real place the memory allocation | |
711 | * request comes from. | |
712 | */ | |
1d2c8eea | 713 | #define kmalloc_track_caller(size, flags) \ |
c45248db HY |
714 | __kmalloc_node_track_caller(size, flags, \ |
715 | NUMA_NO_NODE, _RET_IP_) | |
1da177e4 | 716 | |
c37495d6 KC |
717 | static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, |
718 | int node) | |
5799b255 | 719 | { |
49b7f898 KC |
720 | size_t bytes; |
721 | ||
722 | if (unlikely(check_mul_overflow(n, size, &bytes))) | |
5799b255 JT |
723 | return NULL; |
724 | if (__builtin_constant_p(n) && __builtin_constant_p(size)) | |
49b7f898 KC |
725 | return kmalloc_node(bytes, flags, node); |
726 | return __kmalloc_node(bytes, flags, node); | |
5799b255 JT |
727 | } |
728 | ||
c37495d6 | 729 | static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node) |
5799b255 JT |
730 | { |
731 | return kmalloc_array_node(n, size, flags | __GFP_ZERO, node); | |
732 | } | |
733 | ||
81cda662 CL |
734 | /* |
735 | * Shortcuts | |
736 | */ | |
737 | static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags) | |
738 | { | |
739 | return kmem_cache_alloc(k, flags | __GFP_ZERO); | |
740 | } | |
741 | ||
742 | /** | |
743 | * kzalloc - allocate memory. The memory is set to zero. | |
744 | * @size: how many bytes of memory are required. | |
745 | * @flags: the type of memory to allocate (see kmalloc). | |
746 | */ | |
c37495d6 | 747 | static inline __alloc_size(1) void *kzalloc(size_t size, gfp_t flags) |
81cda662 CL |
748 | { |
749 | return kmalloc(size, flags | __GFP_ZERO); | |
750 | } | |
751 | ||
979b0fea JL |
752 | /** |
753 | * kzalloc_node - allocate zeroed memory from a particular memory node. | |
754 | * @size: how many bytes of memory are required. | |
755 | * @flags: the type of memory to allocate (see kmalloc). | |
756 | * @node: memory node from which to allocate | |
757 | */ | |
c37495d6 | 758 | static inline __alloc_size(1) void *kzalloc_node(size_t size, gfp_t flags, int node) |
979b0fea JL |
759 | { |
760 | return kmalloc_node(size, flags | __GFP_ZERO, node); | |
761 | } | |
762 | ||
56bcf40f KC |
763 | extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __alloc_size(1); |
764 | static inline __alloc_size(1) void *kvmalloc(size_t size, gfp_t flags) | |
8587ca6f MWO |
765 | { |
766 | return kvmalloc_node(size, flags, NUMA_NO_NODE); | |
767 | } | |
56bcf40f | 768 | static inline __alloc_size(1) void *kvzalloc_node(size_t size, gfp_t flags, int node) |
8587ca6f MWO |
769 | { |
770 | return kvmalloc_node(size, flags | __GFP_ZERO, node); | |
771 | } | |
56bcf40f | 772 | static inline __alloc_size(1) void *kvzalloc(size_t size, gfp_t flags) |
8587ca6f MWO |
773 | { |
774 | return kvmalloc(size, flags | __GFP_ZERO); | |
775 | } | |
776 | ||
56bcf40f | 777 | static inline __alloc_size(1, 2) void *kvmalloc_array(size_t n, size_t size, gfp_t flags) |
8587ca6f MWO |
778 | { |
779 | size_t bytes; | |
780 | ||
781 | if (unlikely(check_mul_overflow(n, size, &bytes))) | |
782 | return NULL; | |
783 | ||
784 | return kvmalloc(bytes, flags); | |
785 | } | |
786 | ||
56bcf40f | 787 | static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t flags) |
8587ca6f MWO |
788 | { |
789 | return kvmalloc_array(n, size, flags | __GFP_ZERO); | |
790 | } | |
791 | ||
56bcf40f | 792 | extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) |
9ed9cac1 | 793 | __realloc_size(3); |
8587ca6f | 794 | extern void kvfree(const void *addr); |
a67d74a4 DW |
795 | DEFINE_FREE(kvfree, void *, if (_T) kvfree(_T)) |
796 | ||
8587ca6f MWO |
797 | extern void kvfree_sensitive(const void *addr, size_t len); |
798 | ||
07f361b2 | 799 | unsigned int kmem_cache_size(struct kmem_cache *s); |
05a94065 KC |
800 | |
801 | /** | |
802 | * kmalloc_size_roundup - Report allocation bucket size for the given size | |
803 | * | |
804 | * @size: Number of bytes to round up from. | |
805 | * | |
806 | * This returns the number of bytes that would be available in a kmalloc() | |
807 | * allocation of @size bytes. For example, a 126 byte request would be | |
808 | * rounded up to the next sized kmalloc bucket, 128 bytes. (This is strictly | |
809 | * for the general-purpose kmalloc()-based allocations, and is not for the | |
810 | * pre-sized kmem_cache_alloc()-based allocations.) | |
811 | * | |
812 | * Use this to kmalloc() the full bucket size ahead of time instead of using | |
813 | * ksize() to query the size after an allocation. | |
814 | */ | |
815 | size_t kmalloc_size_roundup(size_t size); | |
816 | ||
7e85ee0c PE |
817 | void __init kmem_cache_init_late(void); |
818 | ||
1da177e4 | 819 | #endif /* _LINUX_SLAB_H */ |