Make sure shmhuge allocations are huge page size aligned
[fio.git] / smalloc.c
CommitLineData
d24c33a4
JA
1/*
2 * simple memory allocator, backed by mmap() so that it hands out memory
3 * that can be shared across processes and threads
4 */
5#include <sys/mman.h>
6#include <stdio.h>
7#include <stdlib.h>
8#include <assert.h>
9#include <string.h>
10#include <unistd.h>
11#include <sys/types.h>
12#include <limits.h>
13
6548f47f 14#include "mutex.h"
b3268b92 15#include "arch/arch.h"
d24c33a4 16
55f6491d 17#define SMALLOC_REDZONE /* define to detect memory corruption */
d24c33a4 18
ec996e9c
JA
19#define SMALLOC_BPB 32 /* block size, bytes-per-bit in bitmap */
20#define SMALLOC_BPI (sizeof(unsigned int) * 8)
21#define SMALLOC_BPL (SMALLOC_BPB * SMALLOC_BPI)
22
5e012980 23#define INITIAL_SIZE 8192*1024 /* new pool size */
68857686 24#define MAX_POOLS 128 /* maximum number of pools to setup */
d24c33a4 25
55f6491d
JA
26#define SMALLOC_PRE_RED 0xdeadbeefU
27#define SMALLOC_POST_RED 0x5aa55aa5U
55f6491d 28
2b386d25 29unsigned int smalloc_pool_size = INITIAL_SIZE;
122426da 30const int int_mask = sizeof(int) - 1;
2b386d25 31
d24c33a4 32struct pool {
6548f47f 33 struct fio_mutex *lock; /* protects this pool */
d24c33a4 34 void *map; /* map of blocks */
ec996e9c
JA
35 unsigned int *bitmap; /* blocks free/busy map */
36 unsigned int free_blocks; /* free blocks */
37 unsigned int nr_blocks; /* total blocks */
38 unsigned int next_non_full;
d24c33a4 39 int fd; /* memory backing fd */
ec996e9c
JA
40 unsigned int mmap_size;
41};
42
43struct block_hdr {
44 unsigned int size;
45#ifdef SMALLOC_REDZONE
46 unsigned int prered;
47#endif
d24c33a4
JA
48};
49
50static struct pool mp[MAX_POOLS];
51static unsigned int nr_pools;
52static unsigned int last_pool;
6548f47f 53static struct fio_mutex *lock;
d24c33a4 54
d24c33a4
JA
55static inline void pool_lock(struct pool *pool)
56{
2e3e31e3 57 fio_mutex_down(pool->lock);
d24c33a4
JA
58}
59
60static inline void pool_unlock(struct pool *pool)
61{
2e3e31e3 62 fio_mutex_up(pool->lock);
d24c33a4
JA
63}
64
65864cf7 65static inline void global_read_lock(void)
d24c33a4 66{
2e3e31e3 67 fio_mutex_down_read(lock);
d24c33a4
JA
68}
69
65864cf7 70static inline void global_read_unlock(void)
d24c33a4 71{
2e3e31e3 72 fio_mutex_up_read(lock);
65864cf7
JA
73}
74
75static inline void global_write_lock(void)
76{
2e3e31e3 77 fio_mutex_down_write(lock);
65864cf7
JA
78}
79
80static inline void global_write_unlock(void)
81{
2e3e31e3 82 fio_mutex_up_write(lock);
d24c33a4
JA
83}
84
d24c33a4
JA
85static inline int ptr_valid(struct pool *pool, void *ptr)
86{
dcb69098 87 unsigned int pool_size = pool->nr_blocks * SMALLOC_BPL;
ec996e9c
JA
88
89 return (ptr >= pool->map) && (ptr < pool->map + pool_size);
d24c33a4
JA
90}
91
808e9ea8
JA
92static inline unsigned int size_to_blocks(unsigned int size)
93{
94 return (size + SMALLOC_BPB - 1) / SMALLOC_BPB;
95}
96
dcb69098
JA
97static int blocks_iter(struct pool *pool, unsigned int pool_idx,
98 unsigned int idx, unsigned int nr_blocks,
ec996e9c 99 int (*func)(unsigned int *map, unsigned int mask))
d24c33a4 100{
dcb69098 101
ec996e9c
JA
102 while (nr_blocks) {
103 unsigned int this_blocks, mask;
dcb69098
JA
104 unsigned int *map;
105
106 if (pool_idx >= pool->nr_blocks)
107 return 0;
108
109 map = &pool->bitmap[pool_idx];
ec996e9c
JA
110
111 this_blocks = nr_blocks;
112 if (this_blocks + idx > SMALLOC_BPI) {
113 this_blocks = SMALLOC_BPI - idx;
114 idx = SMALLOC_BPI - this_blocks;
115 }
116
117 if (this_blocks == SMALLOC_BPI)
118 mask = -1U;
119 else
120 mask = ((1U << this_blocks) - 1) << idx;
121
122 if (!func(map, mask))
123 return 0;
124
125 nr_blocks -= this_blocks;
126 idx = 0;
dcb69098 127 pool_idx++;
ec996e9c
JA
128 }
129
130 return 1;
d24c33a4
JA
131}
132
ec996e9c 133static int mask_cmp(unsigned int *map, unsigned int mask)
d24c33a4 134{
ec996e9c 135 return !(*map & mask);
d24c33a4
JA
136}
137
ec996e9c 138static int mask_clear(unsigned int *map, unsigned int mask)
d24c33a4 139{
dcb69098 140 assert((*map & mask) == mask);
ec996e9c
JA
141 *map &= ~mask;
142 return 1;
d24c33a4
JA
143}
144
ec996e9c 145static int mask_set(unsigned int *map, unsigned int mask)
d24c33a4 146{
dcb69098 147 assert(!(*map & mask));
ec996e9c
JA
148 *map |= mask;
149 return 1;
d24c33a4
JA
150}
151
dcb69098
JA
152static int blocks_free(struct pool *pool, unsigned int pool_idx,
153 unsigned int idx, unsigned int nr_blocks)
d24c33a4 154{
dcb69098 155 return blocks_iter(pool, pool_idx, idx, nr_blocks, mask_cmp);
d24c33a4
JA
156}
157
dcb69098
JA
158static void set_blocks(struct pool *pool, unsigned int pool_idx,
159 unsigned int idx, unsigned int nr_blocks)
d24c33a4 160{
dcb69098 161 blocks_iter(pool, pool_idx, idx, nr_blocks, mask_set);
d24c33a4
JA
162}
163
dcb69098
JA
164static void clear_blocks(struct pool *pool, unsigned int pool_idx,
165 unsigned int idx, unsigned int nr_blocks)
d24c33a4 166{
dcb69098 167 blocks_iter(pool, pool_idx, idx, nr_blocks, mask_clear);
d24c33a4
JA
168}
169
ec996e9c
JA
170static int find_next_zero(int word, int start)
171{
172 assert(word != -1U);
173 word >>= (start + 1);
b3268b92 174 return ffz(word) + start + 1;
d24c33a4
JA
175}
176
adf57099 177static int add_pool(struct pool *pool, unsigned int alloc_size)
d24c33a4 178{
ec996e9c 179 int fd, bitmap_blocks;
b8a6582e
JA
180 char file[] = "/tmp/.fio_smalloc.XXXXXX";
181 void *ptr;
ec996e9c 182
b8a6582e 183 fd = mkstemp(file);
d24c33a4
JA
184 if (fd < 0)
185 goto out_close;
186
55f6491d 187#ifdef SMALLOC_REDZONE
ec996e9c 188 alloc_size += sizeof(unsigned int);
55f6491d 189#endif
ec996e9c
JA
190 alloc_size += sizeof(struct block_hdr);
191 if (alloc_size < INITIAL_SIZE)
192 alloc_size = INITIAL_SIZE;
193
194 /* round up to nearest full number of blocks */
195 alloc_size = (alloc_size + SMALLOC_BPL - 1) & ~(SMALLOC_BPL - 1);
196 bitmap_blocks = alloc_size / SMALLOC_BPL;
197 alloc_size += bitmap_blocks * sizeof(unsigned int);
198 pool->mmap_size = alloc_size;
0b9d69ec 199
ec996e9c
JA
200 pool->nr_blocks = bitmap_blocks;
201 pool->free_blocks = bitmap_blocks * SMALLOC_BPB;
adf57099 202
ec996e9c 203 if (ftruncate(fd, alloc_size) < 0)
d24c33a4
JA
204 goto out_unlink;
205
ec996e9c 206 ptr = mmap(NULL, alloc_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
d24c33a4
JA
207 if (ptr == MAP_FAILED)
208 goto out_unlink;
209
ec996e9c
JA
210 memset(ptr, 0, alloc_size);
211 pool->map = ptr;
212 pool->bitmap = (void *) ptr + (pool->nr_blocks * SMALLOC_BPL);
d24c33a4 213
6548f47f 214 pool->lock = fio_mutex_init(1);
d24c33a4
JA
215 if (!pool->lock)
216 goto out_unlink;
d24c33a4 217
443bb114
JA
218 /*
219 * Unlink pool file now. It wont get deleted until the fd is closed,
220 * which happens both for cleanup or unexpected quit. This way we
221 * don't leave temp files around in case of a crash.
222 */
b8a6582e 223 unlink(file);
d24c33a4
JA
224 pool->fd = fd;
225
d24c33a4
JA
226 nr_pools++;
227 return 0;
228out_unlink:
ec996e9c 229 fprintf(stderr, "smalloc: failed adding pool\n");
d24c33a4 230 if (pool->map)
ec996e9c 231 munmap(pool->map, pool->mmap_size);
b8a6582e 232 unlink(file);
d24c33a4 233out_close:
b8a6582e 234 close(fd);
d24c33a4
JA
235 return 1;
236}
237
238void sinit(void)
239{
4d4e80f2 240 int ret;
d24c33a4 241
6548f47f 242 lock = fio_mutex_rw_init();
adf57099 243 ret = add_pool(&mp[0], INITIAL_SIZE);
d24c33a4
JA
244 assert(!ret);
245}
246
247static void cleanup_pool(struct pool *pool)
248{
443bb114
JA
249 /*
250 * This will also remove the temporary file we used as a backing
251 * store, it was already unlinked
252 */
d24c33a4 253 close(pool->fd);
ec996e9c 254 munmap(pool->map, pool->mmap_size);
6548f47f
JA
255
256 if (pool->lock)
257 fio_mutex_remove(pool->lock);
d24c33a4
JA
258}
259
260void scleanup(void)
261{
262 unsigned int i;
263
264 for (i = 0; i < nr_pools; i++)
265 cleanup_pool(&mp[i]);
266
6548f47f
JA
267 if (lock)
268 fio_mutex_remove(lock);
d24c33a4
JA
269}
270
89da54e8 271#ifdef SMALLOC_REDZONE
cf98708d
JA
272static void *postred_ptr(struct block_hdr *hdr)
273{
cf98708d
JA
274 unsigned long ptr;
275
276 ptr = (unsigned long) hdr + hdr->size - sizeof(unsigned int);
277 ptr = (ptr + int_mask) & ~int_mask;
278
279 return (void *) ptr;
280}
281
ec996e9c 282static void fill_redzone(struct block_hdr *hdr)
55f6491d 283{
cf98708d 284 unsigned int *postred = postred_ptr(hdr);
55f6491d 285
ec996e9c
JA
286 hdr->prered = SMALLOC_PRE_RED;
287 *postred = SMALLOC_POST_RED;
ec996e9c 288}
55f6491d 289
ec996e9c
JA
290static void sfree_check_redzone(struct block_hdr *hdr)
291{
cf98708d 292 unsigned int *postred = postred_ptr(hdr);
ec996e9c
JA
293
294 if (hdr->prered != SMALLOC_PRE_RED) {
55f6491d
JA
295 fprintf(stderr, "smalloc pre redzone destroyed!\n");
296 fprintf(stderr, " ptr=%p, prered=%x, expected %x\n",
ec996e9c 297 hdr, hdr->prered, SMALLOC_PRE_RED);
55f6491d
JA
298 assert(0);
299 }
300 if (*postred != SMALLOC_POST_RED) {
301 fprintf(stderr, "smalloc post redzone destroyed!\n");
302 fprintf(stderr, " ptr=%p, postred=%x, expected %x\n",
ec996e9c 303 hdr, *postred, SMALLOC_POST_RED);
55f6491d
JA
304 assert(0);
305 }
89da54e8
JA
306}
307#else
308static void fill_redzone(struct block_hdr *hdr)
309{
55f6491d
JA
310}
311
89da54e8
JA
312static void sfree_check_redzone(struct block_hdr *hdr)
313{
314}
315#endif
316
d24c33a4
JA
317static void sfree_pool(struct pool *pool, void *ptr)
318{
ec996e9c 319 struct block_hdr *hdr;
179446e0 320 unsigned int i, idx;
ec996e9c 321 unsigned long offset;
d24c33a4
JA
322
323 if (!ptr)
324 return;
325
ec996e9c
JA
326 ptr -= sizeof(*hdr);
327 hdr = ptr;
55f6491d 328
d24c33a4
JA
329 assert(ptr_valid(pool, ptr));
330
ec996e9c 331 sfree_check_redzone(hdr);
d24c33a4 332
ec996e9c
JA
333 offset = ptr - pool->map;
334 i = offset / SMALLOC_BPL;
335 idx = (offset % SMALLOC_BPL) / SMALLOC_BPB;
d24c33a4 336
ec996e9c 337 pool_lock(pool);
dcb69098 338 clear_blocks(pool, i, idx, size_to_blocks(hdr->size));
ec996e9c
JA
339 if (i < pool->next_non_full)
340 pool->next_non_full = i;
179446e0 341 pool->free_blocks += size_to_blocks(hdr->size);
d24c33a4
JA
342 pool_unlock(pool);
343}
344
345void sfree(void *ptr)
346{
347 struct pool *pool = NULL;
348 unsigned int i;
349
8e5732e5
JA
350 if (!ptr)
351 return;
352
65864cf7 353 global_read_lock();
d24c33a4
JA
354
355 for (i = 0; i < nr_pools; i++) {
356 if (ptr_valid(&mp[i], ptr)) {
357 pool = &mp[i];
358 break;
359 }
360 }
361
65864cf7 362 global_read_unlock();
d24c33a4
JA
363
364 assert(pool);
365 sfree_pool(pool, ptr);
366}
367
55f6491d 368static void *__smalloc_pool(struct pool *pool, unsigned int size)
d24c33a4 369{
ec996e9c
JA
370 unsigned int nr_blocks;
371 unsigned int i;
372 unsigned int offset;
373 unsigned int last_idx;
374 void *ret = NULL;
d24c33a4 375
d24c33a4 376 pool_lock(pool);
179446e0
JA
377
378 nr_blocks = size_to_blocks(size);
ec996e9c 379 if (nr_blocks > pool->free_blocks)
8e5732e5 380 goto fail;
5ec10eaa 381
ec996e9c
JA
382 i = pool->next_non_full;
383 last_idx = 0;
384 offset = -1U;
385 while (i < pool->nr_blocks) {
386 unsigned int idx;
d24c33a4 387
ec996e9c
JA
388 if (pool->bitmap[i] == -1U) {
389 i++;
390 pool->next_non_full = i;
391 last_idx = 0;
392 continue;
393 }
d24c33a4 394
ec996e9c 395 idx = find_next_zero(pool->bitmap[i], last_idx);
dcb69098 396 if (!blocks_free(pool, i, idx, nr_blocks)) {
ec996e9c
JA
397 idx += nr_blocks;
398 if (idx < SMALLOC_BPI)
399 last_idx = idx;
400 else {
401 last_idx = 0;
402 while (idx >= SMALLOC_BPI) {
403 i++;
404 idx -= SMALLOC_BPI;
405 }
406 }
407 continue;
d24c33a4 408 }
dcb69098 409 set_blocks(pool, i, idx, nr_blocks);
ec996e9c
JA
410 offset = i * SMALLOC_BPL + idx * SMALLOC_BPB;
411 break;
412 }
413
414 if (i < pool->nr_blocks) {
415 pool->free_blocks -= nr_blocks;
416 ret = pool->map + offset;
d24c33a4 417 }
ec996e9c 418fail:
d24c33a4 419 pool_unlock(pool);
ec996e9c 420 return ret;
d24c33a4
JA
421}
422
55f6491d
JA
423static void *smalloc_pool(struct pool *pool, unsigned int size)
424{
89da54e8 425 unsigned int alloc_size = size + sizeof(struct block_hdr);
55f6491d
JA
426 void *ptr;
427
cf98708d 428 /*
122426da
JA
429 * Round to int alignment, so that the postred pointer will
430 * be naturally aligned as well.
cf98708d 431 */
ec996e9c 432#ifdef SMALLOC_REDZONE
122426da
JA
433 alloc_size += sizeof(unsigned int);
434 alloc_size = (alloc_size + int_mask) & ~int_mask;
ec996e9c
JA
435#endif
436
437 ptr = __smalloc_pool(pool, alloc_size);
89da54e8
JA
438 if (ptr) {
439 struct block_hdr *hdr = ptr;
55f6491d 440
89da54e8
JA
441 hdr->size = alloc_size;
442 fill_redzone(hdr);
55f6491d 443
89da54e8
JA
444 ptr += sizeof(*hdr);
445 memset(ptr, 0, size);
446 }
ec996e9c 447
55f6491d 448 return ptr;
55f6491d
JA
449}
450
d24c33a4
JA
451void *smalloc(unsigned int size)
452{
453 unsigned int i;
454
d1271dc1 455 global_write_lock();
d24c33a4
JA
456 i = last_pool;
457
458 do {
459 for (; i < nr_pools; i++) {
460 void *ptr = smalloc_pool(&mp[i], size);
461
462 if (ptr) {
463 last_pool = i;
d1271dc1 464 global_write_unlock();
d24c33a4
JA
465 return ptr;
466 }
467 }
468 if (last_pool) {
469 last_pool = 0;
470 continue;
471 }
472
ec996e9c 473 if (nr_pools + 1 > MAX_POOLS)
d24c33a4
JA
474 break;
475 else {
476 i = nr_pools;
adf57099 477 if (add_pool(&mp[nr_pools], size))
65864cf7 478 goto out;
d24c33a4
JA
479 }
480 } while (1);
481
65864cf7 482out:
d1271dc1 483 global_write_unlock();
d24c33a4
JA
484 return NULL;
485}
486
487char *smalloc_strdup(const char *str)
488{
489 char *ptr;
490
491 ptr = smalloc(strlen(str) + 1);
492 strcpy(ptr, str);
493 return ptr;
494}