glusterfs: update for new API
[fio.git] / smalloc.c
index 9a7c25b..a2ad25a 100644 (file)
--- a/smalloc.c
+++ b/smalloc.c
  * that can be shared across processes and threads
  */
 #include <sys/mman.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <assert.h>
 #include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <limits.h>
 
-#include "mutex.h"
+#include "fio.h"
+#include "fio_sem.h"
+#include "os/os.h"
+#include "smalloc.h"
+#include "log.h"
 
-#undef ENABLE_RESIZE           /* define to enable pool resizing */
-#define MP_SAFE                        /* define to made allocator thread safe */
+#define SMALLOC_REDZONE                /* define to detect memory corruption */
 
-#define INITIAL_SIZE   65536   /* new pool size */
-#define MAX_POOLS      32      /* maximum number of pools to setup */
+#define SMALLOC_BPB    32      /* block size, bytes-per-bit in bitmap */
+#define SMALLOC_BPI    (sizeof(unsigned int) * 8)
+#define SMALLOC_BPL    (SMALLOC_BPB * SMALLOC_BPI)
 
-#ifdef ENABLE_RESIZE
-#define MAX_SIZE       8 * INITIAL_SIZE
-static unsigned int resize_error;
+#define INITIAL_SIZE   16*1024*1024    /* new pool size */
+#define INITIAL_POOLS  8               /* maximum number of pools to setup */
+
+#define MAX_POOLS      16
+
+#define SMALLOC_PRE_RED                0xdeadbeefU
+#define SMALLOC_POST_RED       0x5aa55aa5U
+
+unsigned int smalloc_pool_size = INITIAL_SIZE;
+#ifdef SMALLOC_REDZONE
+static const int int_mask = sizeof(int) - 1;
 #endif
 
 struct pool {
-       struct fio_mutex *lock;                 /* protects this pool */
+       struct fio_sem *lock;                   /* protects this pool */
        void *map;                              /* map of blocks */
-       void *last;                             /* next free block hint */
-       unsigned int size;                      /* size of pool */
-       unsigned int room;                      /* size left in pool */
-       unsigned int largest_block;             /* largest block free */
-       unsigned int free_since_compact;        /* sfree() since compact() */
-       int fd;                                 /* memory backing fd */
-       char file[PATH_MAX];                    /* filename for fd */
+       unsigned int *bitmap;                   /* blocks free/busy map */
+       size_t free_blocks;             /* free blocks */
+       size_t nr_blocks;                       /* total blocks */
+       size_t next_non_full;
+       size_t mmap_size;
+};
+
+struct block_hdr {
+       size_t size;
+#ifdef SMALLOC_REDZONE
+       unsigned int prered;
+#endif
 };
 
 static struct pool mp[MAX_POOLS];
 static unsigned int nr_pools;
 static unsigned int last_pool;
-static struct fio_mutex *lock;
-
-struct mem_hdr {
-       unsigned int size;
-};
 
-static inline void pool_lock(struct pool *pool)
+static inline int ptr_valid(struct pool *pool, void *ptr)
 {
-       if (pool->lock)
-               fio_mutex_down(pool->lock);
-}
+       unsigned int pool_size = pool->nr_blocks * SMALLOC_BPL;
 
-static inline void pool_unlock(struct pool *pool)
-{
-       if (pool->lock)
-               fio_mutex_up(pool->lock);
+       return (ptr >= pool->map) && (ptr < pool->map + pool_size);
 }
 
-static inline void global_read_lock(void)
+static inline size_t size_to_blocks(size_t size)
 {
-       if (lock)
-               fio_mutex_down_read(lock);
+       return (size + SMALLOC_BPB - 1) / SMALLOC_BPB;
 }
 
-static inline void global_read_unlock(void)
+static int blocks_iter(struct pool *pool, unsigned int pool_idx,
+                      unsigned int idx, size_t nr_blocks,
+                      int (*func)(unsigned int *map, unsigned int mask))
 {
-       if (lock)
-               fio_mutex_up_read(lock);
-}
 
-static inline void global_write_lock(void)
-{
-       if (lock)
-               fio_mutex_down_write(lock);
-}
+       while (nr_blocks) {
+               unsigned int this_blocks, mask;
+               unsigned int *map;
 
-static inline void global_write_unlock(void)
-{
-       if (lock)
-               fio_mutex_up_write(lock);
-}
+               if (pool_idx >= pool->nr_blocks)
+                       return 0;
 
-#define hdr_free(hdr)          ((hdr)->size & 0x80000000)
-#define hdr_size(hdr)          ((hdr)->size & ~0x80000000)
-#define hdr_mark_free(hdr)     ((hdr)->size |= 0x80000000)
+               map = &pool->bitmap[pool_idx];
 
-static inline int ptr_valid(struct pool *pool, void *ptr)
-{
-       return (ptr >= pool->map) && (ptr < pool->map + pool->size);
-}
+               this_blocks = nr_blocks;
+               if (this_blocks + idx > SMALLOC_BPI) {
+                       this_blocks = SMALLOC_BPI - idx;
+                       idx = SMALLOC_BPI - this_blocks;
+               }
 
-static inline int __hdr_valid(struct pool *pool, struct mem_hdr *hdr,
-                             unsigned int size)
-{
-       return ptr_valid(pool, hdr) && ptr_valid(pool, (void *) hdr + size - 1);
+               if (this_blocks == SMALLOC_BPI)
+                       mask = -1U;
+               else
+                       mask = ((1U << this_blocks) - 1) << idx;
+
+               if (!func(map, mask))
+                       return 0;
+
+               nr_blocks -= this_blocks;
+               idx = 0;
+               pool_idx++;
+       }
+
+       return 1;
 }
 
-static inline int hdr_valid(struct pool *pool, struct mem_hdr *hdr)
+static int mask_cmp(unsigned int *map, unsigned int mask)
 {
-       return __hdr_valid(pool, hdr, hdr_size(hdr));
+       return !(*map & mask);
 }
 
-static inline int region_free(struct mem_hdr *hdr)
+static int mask_clear(unsigned int *map, unsigned int mask)
 {
-       return hdr_free(hdr) || (!hdr_free(hdr) && !hdr_size(hdr));
+       assert((*map & mask) == mask);
+       *map &= ~mask;
+       return 1;
 }
 
-static inline struct mem_hdr *__hdr_nxt(struct pool *pool, struct mem_hdr *hdr,
-                                       unsigned int size)
+static int mask_set(unsigned int *map, unsigned int mask)
 {
-       struct mem_hdr *nxt = (void *) hdr + size + sizeof(*hdr);
-
-       if (__hdr_valid(pool, nxt, size))
-               return nxt;
-
-       return NULL;
+       assert(!(*map & mask));
+       *map |= mask;
+       return 1;
 }
 
-static inline struct mem_hdr *hdr_nxt(struct pool *pool, struct mem_hdr *hdr)
+static int blocks_free(struct pool *pool, unsigned int pool_idx,
+                      unsigned int idx, size_t nr_blocks)
 {
-       return __hdr_nxt(pool, hdr, hdr_size(hdr));
+       return blocks_iter(pool, pool_idx, idx, nr_blocks, mask_cmp);
 }
 
-static void merge(struct pool *pool, struct mem_hdr *hdr, struct mem_hdr *nxt)
+static void set_blocks(struct pool *pool, unsigned int pool_idx,
+                      unsigned int idx, size_t nr_blocks)
 {
-       unsigned int hfree = hdr_free(hdr);
-       unsigned int nfree = hdr_free(nxt);
-
-       hdr->size = hdr_size(hdr) + hdr_size(nxt) + sizeof(*nxt);
-       nxt->size = 0;
-
-       if (hfree)
-               hdr_mark_free(hdr);
-       if (nfree)
-               hdr_mark_free(nxt);
-
-       if (pool->last == nxt)
-               pool->last = hdr;
+       blocks_iter(pool, pool_idx, idx, nr_blocks, mask_set);
 }
 
-static int combine(struct pool *pool, struct mem_hdr *prv, struct mem_hdr *hdr)
+static void clear_blocks(struct pool *pool, unsigned int pool_idx,
+                        unsigned int idx, size_t nr_blocks)
 {
-       if (prv && hdr_free(prv) && hdr_free(hdr)) {
-               merge(pool, prv, hdr);
-               return 1;
-       }
-
-       return 0;
+       blocks_iter(pool, pool_idx, idx, nr_blocks, mask_clear);
 }
 
-static int compact_pool(struct pool *pool)
+static int find_next_zero(int word, int start)
 {
-       struct mem_hdr *hdr = pool->map, *nxt;
-       unsigned int compacted = 0;
-
-       if (pool->free_since_compact < 50)
-               return 1;
-
-       while (hdr) {
-               nxt = hdr_nxt(pool, hdr);
-               if (!nxt)
-                       break;
-               if (hdr_free(nxt) && hdr_free(hdr)) {
-                       merge(pool, hdr, nxt);
-                       compacted++;
-                       continue;
-               }
-               hdr = hdr_nxt(pool, hdr);
-       }
-
-       pool->free_since_compact = 0;
-       return !!compacted;
+       assert(word != -1U);
+       word >>= start;
+       return ffz(word) + start;
 }
 
-static int resize_pool(struct pool *pool)
+static bool add_pool(struct pool *pool, unsigned int alloc_size)
 {
-#ifdef ENABLE_RESIZE
-       unsigned int new_size = pool->size << 1;
-       struct mem_hdr *hdr, *last_hdr;
+       int bitmap_blocks;
+       int mmap_flags;
        void *ptr;
 
-       if (new_size >= MAX_SIZE || resize_error)
-               return 1;
-
-       if (ftruncate(pool->fd, new_size) < 0)
-               goto fail;
+       if (nr_pools == MAX_POOLS)
+               return false;
 
-       ptr = mremap(pool->map, pool->size, new_size, 0);
-       if (ptr == MAP_FAILED)
-               goto fail;
-
-       pool->map = ptr;
-       hdr = pool;
-       do {
-               last_hdr = hdr;
-       } while ((hdr = hdr_nxt(hdr)) != NULL);
-
-       if (hdr_free(last_hdr)) {
-               last_hdr->size = hdr_size(last_hdr) + new_size - pool_size;
-               hdr_mark_free(last_hdr);
-       } else {
-               struct mem_hdr *nxt;
-
-               nxt = (void *) last_hdr + hdr_size(last_hdr) + sizeof(*hdr);
-               nxt->size = new_size - pool_size - sizeof(*hdr);
-               hdr_mark_free(nxt);
-       }
-
-       pool_room += new_size - pool_size;
-       pool_size = new_size;
-       return 0;
-fail:
-       perror("resize");
-       resize_error = 1;
+#ifdef SMALLOC_REDZONE
+       alloc_size += sizeof(unsigned int);
+#endif
+       alloc_size += sizeof(struct block_hdr);
+       if (alloc_size < INITIAL_SIZE)
+               alloc_size = INITIAL_SIZE;
+
+       /* round up to nearest full number of blocks */
+       alloc_size = (alloc_size + SMALLOC_BPL - 1) & ~(SMALLOC_BPL - 1);
+       bitmap_blocks = alloc_size / SMALLOC_BPL;
+       alloc_size += bitmap_blocks * sizeof(unsigned int);
+       pool->mmap_size = alloc_size;
+
+       pool->nr_blocks = bitmap_blocks;
+       pool->free_blocks = bitmap_blocks * SMALLOC_BPB;
+
+       mmap_flags = OS_MAP_ANON;
+#ifdef CONFIG_ESX
+       mmap_flags |= MAP_PRIVATE;
 #else
-       return 1;
+       mmap_flags |= MAP_SHARED;
 #endif
-}
+       ptr = mmap(NULL, alloc_size, PROT_READ|PROT_WRITE, mmap_flags, -1, 0);
 
-static int add_pool(struct pool *pool)
-{
-       struct mem_hdr *hdr;
-       void *ptr;
-       int fd;
-
-       strcpy(pool->file, "/tmp/.fio_smalloc.XXXXXX");
-       fd = mkstemp(pool->file);
-       if (fd < 0)
-               goto out_close;
-
-       pool->size = INITIAL_SIZE;
-       if (ftruncate(fd, pool->size) < 0)
-               goto out_unlink;
-
-       ptr = mmap(NULL, pool->size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
        if (ptr == MAP_FAILED)
-               goto out_unlink;
+               goto out_fail;
 
-       memset(ptr, 0, pool->size);
-       pool->map = pool->last = ptr;
+       pool->map = ptr;
+       pool->bitmap = (unsigned int *)((char *) ptr + (pool->nr_blocks * SMALLOC_BPL));
+       memset(pool->bitmap, 0, bitmap_blocks * sizeof(unsigned int));
 
-#ifdef MP_SAFE
-       pool->lock = fio_mutex_init(1);
+       pool->lock = fio_sem_init(FIO_SEM_UNLOCKED);
        if (!pool->lock)
-               goto out_unlink;
-#endif
-
-       pool->fd = fd;
+               goto out_fail;
 
-       hdr = pool->map;
-       pool->room = hdr->size = pool->size - sizeof(*hdr);
-       pool->largest_block = pool->room;
-       hdr_mark_free(hdr);
-       global_write_lock();
        nr_pools++;
-       global_write_unlock();
-       return 0;
-out_unlink:
+       return true;
+out_fail:
+       log_err("smalloc: failed adding pool\n");
        if (pool->map)
-               munmap(pool->map, pool->size);
-       unlink(pool->file);
-out_close:
-       if (fd >= 0)
-               close(fd);
-       return 1;
+               munmap(pool->map, pool->mmap_size);
+       return false;
 }
 
 void sinit(void)
 {
-       int ret = add_pool(&mp[0]);
+       bool ret;
+       int i;
 
-#ifdef MP_SAFE
-       lock = fio_mutex_rw_init();
-#endif
-       assert(!ret);
+       for (i = 0; i < INITIAL_POOLS; i++) {
+               ret = add_pool(&mp[nr_pools], smalloc_pool_size);
+               if (!ret)
+                       break;
+       }
+
+       /*
+        * If we added at least one pool, we should be OK for most
+        * cases.
+        */
+       assert(i);
 }
 
 static void cleanup_pool(struct pool *pool)
 {
-       unlink(pool->file);
-       close(pool->fd);
-       munmap(pool->map, pool->size);
+       /*
+        * This will also remove the temporary file we used as a backing
+        * store, it was already unlinked
+        */
+       munmap(pool->map, pool->mmap_size);
 
        if (pool->lock)
-               fio_mutex_remove(pool->lock);
+               fio_sem_remove(pool->lock);
 }
 
 void scleanup(void)
@@ -292,35 +233,80 @@ void scleanup(void)
 
        for (i = 0; i < nr_pools; i++)
                cleanup_pool(&mp[i]);
+}
+
+#ifdef SMALLOC_REDZONE
+static void *postred_ptr(struct block_hdr *hdr)
+{
+       uintptr_t ptr;
+
+       ptr = (uintptr_t) hdr + hdr->size - sizeof(unsigned int);
+       ptr = (uintptr_t) PTR_ALIGN(ptr, int_mask);
+
+       return (void *) ptr;
+}
+
+static void fill_redzone(struct block_hdr *hdr)
+{
+       unsigned int *postred = postred_ptr(hdr);
 
-       if (lock)
-               fio_mutex_remove(lock);
+       hdr->prered = SMALLOC_PRE_RED;
+       *postred = SMALLOC_POST_RED;
+}
+
+static void sfree_check_redzone(struct block_hdr *hdr)
+{
+       unsigned int *postred = postred_ptr(hdr);
+
+       if (hdr->prered != SMALLOC_PRE_RED) {
+               log_err("smalloc pre redzone destroyed!\n"
+                       " ptr=%p, prered=%x, expected %x\n",
+                               hdr, hdr->prered, SMALLOC_PRE_RED);
+               assert(0);
+       }
+       if (*postred != SMALLOC_POST_RED) {
+               log_err("smalloc post redzone destroyed!\n"
+                       "  ptr=%p, postred=%x, expected %x\n",
+                               hdr, *postred, SMALLOC_POST_RED);
+               assert(0);
+       }
+}
+#else
+static void fill_redzone(struct block_hdr *hdr)
+{
 }
 
+static void sfree_check_redzone(struct block_hdr *hdr)
+{
+}
+#endif
+
 static void sfree_pool(struct pool *pool, void *ptr)
 {
-       struct mem_hdr *hdr, *nxt;
+       struct block_hdr *hdr;
+       unsigned int i, idx;
+       unsigned long offset;
 
        if (!ptr)
                return;
 
-       assert(ptr_valid(pool, ptr));
+       ptr -= sizeof(*hdr);
+       hdr = ptr;
 
-       pool_lock(pool);
-       hdr = ptr - sizeof(*hdr);
-       assert(!hdr_free(hdr));
-       hdr_mark_free(hdr);
-       pool->room -= hdr_size(hdr);
+       assert(ptr_valid(pool, ptr));
 
-       nxt = hdr_nxt(pool, hdr);
-       if (nxt && hdr_free(nxt))
-               merge(pool, hdr, nxt);
+       sfree_check_redzone(hdr);
 
-       if (hdr_size(hdr) > pool->largest_block)
-               pool->largest_block = hdr_size(hdr);
+       offset = ptr - pool->map;
+       i = offset / SMALLOC_BPL;
+       idx = (offset % SMALLOC_BPL) / SMALLOC_BPB;
 
-       pool->free_since_compact++;
-       pool_unlock(pool);
+       fio_sem_down(pool->lock);
+       clear_blocks(pool, i, idx, size_to_blocks(hdr->size));
+       if (i < pool->next_non_full)
+               pool->next_non_full = i;
+       pool->free_blocks += size_to_blocks(hdr->size);
+       fio_sem_up(pool->lock);
 }
 
 void sfree(void *ptr)
@@ -328,7 +314,8 @@ void sfree(void *ptr)
        struct pool *pool = NULL;
        unsigned int i;
 
-       global_read_lock();
+       if (!ptr)
+               return;
 
        for (i = 0; i < nr_pools; i++) {
                if (ptr_valid(&mp[i], ptr)) {
@@ -337,140 +324,141 @@ void sfree(void *ptr)
                }
        }
 
-       global_read_unlock();
+       if (pool) {
+               sfree_pool(pool, ptr);
+               return;
+       }
 
-       assert(pool);
-       sfree_pool(pool, ptr);
+       log_err("smalloc: ptr %p not from smalloc pool\n", ptr);
 }
 
-static void *smalloc_pool(struct pool *pool, unsigned int size)
+static void *__smalloc_pool(struct pool *pool, size_t size)
 {
-       struct mem_hdr *hdr, *prv;
-       int did_restart = 0;
-       void *ret;
+       size_t nr_blocks;
+       unsigned int i;
+       unsigned int offset;
+       unsigned int last_idx;
+       void *ret = NULL;
 
-       /*
-        * slight chance of race with sfree() here, but acceptable
-        */
-       if (!size || size > pool->room + sizeof(*hdr) ||
-           ((size > pool->largest_block) && pool->largest_block))
-               return NULL;
+       fio_sem_down(pool->lock);
 
-       pool_lock(pool);
-restart:
-       hdr = pool->last;
-       prv = NULL;
-       do {
-               if (combine(pool, prv, hdr))
-                       hdr = prv;
-                       
-               if (hdr_free(hdr) && hdr_size(hdr) >= size)
-                       break;
+       nr_blocks = size_to_blocks(size);
+       if (nr_blocks > pool->free_blocks)
+               goto fail;
 
-               prv = hdr;
-       } while ((hdr = hdr_nxt(pool, hdr)) != NULL);
+       i = pool->next_non_full;
+       last_idx = 0;
+       offset = -1U;
+       while (i < pool->nr_blocks) {
+               unsigned int idx;
 
-       if (!hdr)
-               goto fail;
+               if (pool->bitmap[i] == -1U) {
+                       i++;
+                       pool->next_non_full = i;
+                       last_idx = 0;
+                       continue;
+               }
 
-       /*
-        * more room, adjust next header if any
-        */
-       if (hdr_size(hdr) - size >= 2 * sizeof(*hdr)) {
-               struct mem_hdr *nxt = __hdr_nxt(pool, hdr, size);
-
-               if (nxt) {
-                       nxt->size = hdr_size(hdr) - size - sizeof(*hdr);
-                       if (hdr_size(hdr) == pool->largest_block)
-                               pool->largest_block = hdr_size(nxt);
-                       hdr_mark_free(nxt);
-               } else
-                       size = hdr_size(hdr);
-       } else
-               size = hdr_size(hdr);
-
-       if (size == hdr_size(hdr) && size == pool->largest_block)
-               pool->largest_block = 0;
+               idx = find_next_zero(pool->bitmap[i], last_idx);
+               if (!blocks_free(pool, i, idx, nr_blocks)) {
+                       idx += nr_blocks;
+                       if (idx < SMALLOC_BPI)
+                               last_idx = idx;
+                       else {
+                               last_idx = 0;
+                               while (idx >= SMALLOC_BPI) {
+                                       i++;
+                                       idx -= SMALLOC_BPI;
+                               }
+                       }
+                       continue;
+               }
+               set_blocks(pool, i, idx, nr_blocks);
+               offset = i * SMALLOC_BPL + idx * SMALLOC_BPB;
+               break;
+       }
 
-       /*
-        * also clears free bit
-        */
-       hdr->size = size;
-       pool->last = hdr_nxt(pool, hdr);
-       if (!pool->last)
-               pool->last = pool->map;
-       pool->room -= size;
-       pool_unlock(pool);
-
-       ret = (void *) hdr + sizeof(*hdr);
-       memset(ret, 0, size);
-       return ret;
+       if (i < pool->nr_blocks) {
+               pool->free_blocks -= nr_blocks;
+               ret = pool->map + offset;
+       }
 fail:
+       fio_sem_up(pool->lock);
+       return ret;
+}
+
+static void *smalloc_pool(struct pool *pool, size_t size)
+{
+       size_t alloc_size = size + sizeof(struct block_hdr);
+       void *ptr;
+
        /*
-        * if we fail to allocate, first compact the entries that we missed.
-        * if that also fails, increase the size of the pool
+        * Round to int alignment, so that the postred pointer will
+        * be naturally aligned as well.
         */
-       ++did_restart;
-       if (did_restart <= 1) {
-               if (!compact_pool(pool)) {
-                       pool->last = pool->map;
-                       goto restart;
-               }
-       }
-       ++did_restart;
-       if (did_restart <= 2) {
-               if (!resize_pool(pool)) {
-                       pool->last = pool->map;
-                       goto restart;
-               }
+#ifdef SMALLOC_REDZONE
+       alloc_size += sizeof(unsigned int);
+       alloc_size = (alloc_size + int_mask) & ~int_mask;
+#endif
+
+       ptr = __smalloc_pool(pool, alloc_size);
+       if (ptr) {
+               struct block_hdr *hdr = ptr;
+
+               hdr->size = alloc_size;
+               fill_redzone(hdr);
+
+               ptr += sizeof(*hdr);
+               memset(ptr, 0, size);
        }
-       pool_unlock(pool);
-       return NULL;
+
+       return ptr;
 }
 
-void *smalloc(unsigned int size)
+void *smalloc(size_t size)
 {
-       unsigned int i;
+       unsigned int i, end_pool;
+
+       if (size != (unsigned int) size)
+               return NULL;
 
-       global_read_lock();
        i = last_pool;
+       end_pool = nr_pools;
 
        do {
-               for (; i < nr_pools; i++) {
+               for (; i < end_pool; i++) {
                        void *ptr = smalloc_pool(&mp[i], size);
 
                        if (ptr) {
                                last_pool = i;
-                               global_read_unlock();
                                return ptr;
                        }
                }
                if (last_pool) {
-                       last_pool = 0;
+                       end_pool = last_pool;
+                       last_pool = i = 0;
                        continue;
                }
 
-               if (nr_pools + 1 >= MAX_POOLS)
-                       break;
-               else {
-                       i = nr_pools;
-                       global_read_unlock();
-                       if (add_pool(&mp[nr_pools]))
-                               goto out;
-                       global_read_lock();
-               }
+               break;
        } while (1);
 
-       global_read_unlock();
-out:
+       log_err("smalloc: OOM. Consider using --alloc-size to increase the "
+               "shared memory available.\n");
        return NULL;
 }
 
+void *scalloc(size_t nmemb, size_t size)
+{
+       return smalloc(nmemb * size);
+}
+
 char *smalloc_strdup(const char *str)
 {
-       char *ptr;
+       char *ptr = NULL;
 
        ptr = smalloc(strlen(str) + 1);
-       strcpy(ptr, str);
+       if (ptr)
+               strcpy(ptr, str);
        return ptr;
 }