mm/slab: refill cpu cache through a new slab without holding a node lock

[linux-2.6-block.git] / mm / slab.c
diff --git a/mm/slab.c b/mm/slab.c

index 17e2848979c53a369ad9d7d766d9872173c1787f..37600e91742f6f087402422337992433b80b13e4 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -213,6 +213,11 @@ static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
  static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
  static void cache_reap(struct work_struct *unused);
  
+static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
+                                               void **list);
+static inline void fixup_slab_list(struct kmem_cache *cachep,
+                               struct kmem_cache_node *n, struct page *page,
+                               void **list);
  static int slab_early_init = 1;
  
  #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
@@ -421,8 +426,6 @@ static struct kmem_cache kmem_cache_boot = {
         .name = "kmem_cache",
  };
  
-#define BAD_ALIEN_MAGIC 0x01020304ul
-
  static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
  
  static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -644,7 +647,7 @@ static int transfer_objects(struct array_cache *to,
  static inline struct alien_cache **alloc_alien_cache(int node,
                                                 int limit, gfp_t gfp)
  {
-       return (struct alien_cache **)BAD_ALIEN_MAGIC;
+       return NULL;
  }
  
  static inline void free_alien_cache(struct alien_cache **ac_ptr)
@@ -850,6 +853,46 @@ static inline gfp_t gfp_exact_node(gfp_t flags)
  }
  #endif
  
+static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
+{
+       struct kmem_cache_node *n;
+
+       /*
+        * Set up the kmem_cache_node for cpu before we can
+        * begin anything. Make sure some other cpu on this
+        * node has not already allocated this
+        */
+       n = get_node(cachep, node);
+       if (n) {
+               spin_lock_irq(&n->list_lock);
+               n->free_limit = (1 + nr_cpus_node(node)) * cachep->batchcount +
+                               cachep->num;
+               spin_unlock_irq(&n->list_lock);
+
+               return 0;
+       }
+
+       n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
+       if (!n)
+               return -ENOMEM;
+
+       kmem_cache_node_init(n);
+       n->next_reap = jiffies + REAPTIMEOUT_NODE +
+                   ((unsigned long)cachep) % REAPTIMEOUT_NODE;
+
+       n->free_limit =
+               (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num;
+
+       /*
+        * The kmem_cache_nodes don't come and go as CPUs
+        * come and go.  slab_mutex is sufficient
+        * protection here.
+        */
+       cachep->node[node] = n;
+
+       return 0;
+}
+
  /*
   * Allocates and initializes node for a node on each slab cache, used for
   * either memory or cpu hotplug.  If memory is being hot-added, the kmem_cache_node
@@ -861,46 +904,73 @@ static inline gfp_t gfp_exact_node(gfp_t flags)
   */
  static int init_cache_node_node(int node)
  {
+       int ret;
         struct kmem_cache *cachep;
-       struct kmem_cache_node *n;
-       const size_t memsize = sizeof(struct kmem_cache_node);
  
         list_for_each_entry(cachep, &slab_caches, list) {
-               /*
-                * Set up the kmem_cache_node for cpu before we can
-                * begin anything. Make sure some other cpu on this
-                * node has not already allocated this
-                */
-               n = get_node(cachep, node);
-               if (!n) {
-                       n = kmalloc_node(memsize, GFP_KERNEL, node);
-                       if (!n)
-                               return -ENOMEM;
-                       kmem_cache_node_init(n);
-                       n->next_reap = jiffies + REAPTIMEOUT_NODE +
-                           ((unsigned long)cachep) % REAPTIMEOUT_NODE;
-
-                       /*
-                        * The kmem_cache_nodes don't come and go as CPUs
-                        * come and go.  slab_mutex is sufficient
-                        * protection here.
-                        */
-                       cachep->node[node] = n;
-               }
-
-               spin_lock_irq(&n->list_lock);
-               n->free_limit =
-                       (1 + nr_cpus_node(node)) *
-                       cachep->batchcount + cachep->num;
-               spin_unlock_irq(&n->list_lock);
+               ret = init_cache_node(cachep, node, GFP_KERNEL);
+               if (ret)
+                       return ret;
         }
+
         return 0;
  }
  
-static inline int slabs_tofree(struct kmem_cache *cachep,
-                                               struct kmem_cache_node *n)
+static int setup_kmem_cache_node(struct kmem_cache *cachep,
+                               int node, gfp_t gfp, bool force_change)
  {
-       return (n->free_objects + cachep->num - 1) / cachep->num;
+       int ret = -ENOMEM;
+       struct kmem_cache_node *n;
+       struct array_cache *old_shared = NULL;
+       struct array_cache *new_shared = NULL;
+       struct alien_cache **new_alien = NULL;
+       LIST_HEAD(list);
+
+       if (use_alien_caches) {
+               new_alien = alloc_alien_cache(node, cachep->limit, gfp);
+               if (!new_alien)
+                       goto fail;
+       }
+
+       if (cachep->shared) {
+               new_shared = alloc_arraycache(node,
+                       cachep->shared * cachep->batchcount, 0xbaadf00d, gfp);
+               if (!new_shared)
+                       goto fail;
+       }
+
+       ret = init_cache_node(cachep, node, gfp);
+       if (ret)
+               goto fail;
+
+       n = get_node(cachep, node);
+       spin_lock_irq(&n->list_lock);
+       if (n->shared && force_change) {
+               free_block(cachep, n->shared->entry,
+                               n->shared->avail, node, &list);
+               n->shared->avail = 0;
+       }
+
+       if (!n->shared || force_change) {
+               old_shared = n->shared;
+               n->shared = new_shared;
+               new_shared = NULL;
+       }
+
+       if (!n->alien) {
+               n->alien = new_alien;
+               new_alien = NULL;
+       }
+
+       spin_unlock_irq(&n->list_lock);
+       slabs_destroy(cachep, &list);
+
+fail:
+       kfree(old_shared);
+       kfree(new_shared);
+       free_alien_cache(new_alien);
+
+       return ret;
  }
  
  static void cpuup_canceled(long cpu)
@@ -967,14 +1037,13 @@ free_slab:
                 n = get_node(cachep, node);
                 if (!n)
                         continue;
-               drain_freelist(cachep, n, slabs_tofree(cachep, n));
+               drain_freelist(cachep, n, INT_MAX);
         }
  }
  
  static int cpuup_prepare(long cpu)
  {
         struct kmem_cache *cachep;
-       struct kmem_cache_node *n = NULL;
         int node = cpu_to_mem(cpu);
         int err;
  
@@ -993,44 +1062,9 @@ static int cpuup_prepare(long cpu)
          * array caches
          */
         list_for_each_entry(cachep, &slab_caches, list) {
-               struct array_cache *shared = NULL;
-               struct alien_cache **alien = NULL;
-
-               if (cachep->shared) {
-                       shared = alloc_arraycache(node,
-                               cachep->shared * cachep->batchcount,
-                               0xbaadf00d, GFP_KERNEL);
-                       if (!shared)
-                               goto bad;
-               }
-               if (use_alien_caches) {
-                       alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
-                       if (!alien) {
-                               kfree(shared);
-                               goto bad;
-                       }
-               }
-               n = get_node(cachep, node);
-               BUG_ON(!n);
-
-               spin_lock_irq(&n->list_lock);
-               if (!n->shared) {
-                       /*
-                        * We are serialised from CPU_DEAD or
-                        * CPU_UP_CANCELLED by the cpucontrol lock
-                        */
-                       n->shared = shared;
-                       shared = NULL;
-               }
-#ifdef CONFIG_NUMA
-               if (!n->alien) {
-                       n->alien = alien;
-                       alien = NULL;
-               }
-#endif
-               spin_unlock_irq(&n->list_lock);
-               kfree(shared);
-               free_alien_cache(alien);
+               err = setup_kmem_cache_node(cachep, node, GFP_KERNEL, false);
+               if (err)
+                       goto bad;
         }
  
         return 0;
@@ -1119,7 +1153,7 @@ static int __meminit drain_cache_node_node(int node)
                 if (!n)
                         continue;
  
-               drain_freelist(cachep, n, slabs_tofree(cachep, n));
+               drain_freelist(cachep, n, INT_MAX);
  
                 if (!list_empty(&n->slabs_full) ||
                     !list_empty(&n->slabs_partial)) {
@@ -1212,7 +1246,7 @@ void __init kmem_cache_init(void)
                                         sizeof(struct rcu_head));
         kmem_cache = &kmem_cache_boot;
  
-       if (num_possible_nodes() == 1)
+       if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1)
                 use_alien_caches = 0;
  
         for (i = 0; i < NUM_INIT_LISTS; i++)
@@ -1781,7 +1815,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
  
                         /*
                          * Needed to avoid possible looping condition
-                        * in cache_grow()
+                        * in cache_grow_begin()
                          */
                         if (OFF_SLAB(freelist_cache))
                                 continue;
@@ -2180,6 +2214,11 @@ static void check_irq_on(void)
         BUG_ON(irqs_disabled());
  }
  
+static void check_mutex_acquired(void)
+{
+       BUG_ON(!mutex_is_locked(&slab_mutex));
+}
+
  static void check_spinlock_acquired(struct kmem_cache *cachep)
  {
  #ifdef CONFIG_SMP
@@ -2199,13 +2238,27 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
  #else
  #define check_irq_off()        do { } while(0)
  #define check_irq_on() do { } while(0)
+#define check_mutex_acquired() do { } while(0)
  #define check_spinlock_acquired(x) do { } while(0)
  #define check_spinlock_acquired_node(x, y) do { } while(0)
  #endif
  
-static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
-                       struct array_cache *ac,
-                       int force, int node);
+static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac,
+                               int node, bool free_all, struct list_head *list)
+{
+       int tofree;
+
+       if (!ac || !ac->avail)
+               return;
+
+       tofree = free_all ? ac->avail : (ac->limit + 4) / 5;
+       if (tofree > ac->avail)
+               tofree = (ac->avail + 1) / 2;
+
+       free_block(cachep, ac->entry, tofree, node, list);
+       ac->avail -= tofree;
+       memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail);
+}
  
  static void do_drain(void *arg)
  {
@@ -2229,6 +2282,7 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
  {
         struct kmem_cache_node *n;
         int node;
+       LIST_HEAD(list);
  
         on_each_cpu(do_drain, cachep, 1);
         check_irq_on();
@@ -2236,8 +2290,13 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
                 if (n->alien)
                         drain_alien_cache(cachep, n->alien);
  
-       for_each_kmem_cache_node(cachep, node, n)
-               drain_array(cachep, n, n->shared, 1, node);
+       for_each_kmem_cache_node(cachep, node, n) {
+               spin_lock_irq(&n->list_lock);
+               drain_array_locked(cachep, n->shared, node, true, &list);
+               spin_unlock_irq(&n->list_lock);
+
+               slabs_destroy(cachep, &list);
+       }
  }
  
  /*
@@ -2288,7 +2347,7 @@ int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate)
  
         check_irq_on();
         for_each_kmem_cache_node(cachep, node, n) {
-               drain_freelist(cachep, n, slabs_tofree(cachep, n));
+               drain_freelist(cachep, n, INT_MAX);
  
                 ret += !list_empty(&n->slabs_full) ||
                         !list_empty(&n->slabs_partial);
@@ -2502,13 +2561,15 @@ static void slab_map_pages(struct kmem_cache *cache, struct page *page,
   * Grow (by 1) the number of slabs within a cache.  This is called by
   * kmem_cache_alloc() when there are no active objs left in a cache.
   */
-static int cache_grow(struct kmem_cache *cachep,
-               gfp_t flags, int nodeid, struct page *page)
+static struct page *cache_grow_begin(struct kmem_cache *cachep,
+                               gfp_t flags, int nodeid)
  {
         void *freelist;
         size_t offset;
         gfp_t local_flags;
+       int page_node;
         struct kmem_cache_node *n;
+       struct page *page;
  
         /*
          * Be lazy and only check for valid flags here,  keeping it out of the
@@ -2520,20 +2581,7 @@ static int cache_grow(struct kmem_cache *cachep,
         }
         local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
  
-       /* Take the node list lock to change the colour_next on this node */
         check_irq_off();
-       n = get_node(cachep, nodeid);
-       spin_lock(&n->list_lock);
-
-       /* Get colour for the slab, and cal the next value. */
-       offset = n->colour_next;
-       n->colour_next++;
-       if (n->colour_next >= cachep->colour)
-               n->colour_next = 0;
-       spin_unlock(&n->list_lock);
-
-       offset *= cachep->colour_off;
-
         if (gfpflags_allow_blocking(local_flags))
                 local_irq_enable();
  
@@ -2549,14 +2597,27 @@ static int cache_grow(struct kmem_cache *cachep,
          * Get mem for the objs.  Attempt to allocate a physical page from
          * 'nodeid'.
          */
-       if (!page)
-               page = kmem_getpages(cachep, local_flags, nodeid);
+       page = kmem_getpages(cachep, local_flags, nodeid);
         if (!page)
                 goto failed;
  
+       page_node = page_to_nid(page);
+       n = get_node(cachep, page_node);
+
+       /* Get colour for the slab, and cal the next value. */
+       n->colour_next++;
+       if (n->colour_next >= cachep->colour)
+               n->colour_next = 0;
+
+       offset = n->colour_next;
+       if (offset >= cachep->colour)
+               offset = 0;
+
+       offset *= cachep->colour_off;
+
         /* Get slab management. */
         freelist = alloc_slabmgmt(cachep, page, offset,
-                       local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
+                       local_flags & ~GFP_CONSTRAINT_MASK, page_node);
         if (OFF_SLAB(cachep) && !freelist)
                 goto opps1;
  
@@ -2567,21 +2628,40 @@ static int cache_grow(struct kmem_cache *cachep,
  
         if (gfpflags_allow_blocking(local_flags))
                 local_irq_disable();
-       check_irq_off();
-       spin_lock(&n->list_lock);
  
-       /* Make slab active. */
-       list_add_tail(&page->lru, &(n->slabs_free));
-       STATS_INC_GROWN(cachep);
-       n->free_objects += cachep->num;
-       spin_unlock(&n->list_lock);
-       return 1;
+       return page;
+
  opps1:
         kmem_freepages(cachep, page);
  failed:
         if (gfpflags_allow_blocking(local_flags))
                 local_irq_disable();
-       return 0;
+       return NULL;
+}
+
+static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
+{
+       struct kmem_cache_node *n;
+       void *list = NULL;
+
+       check_irq_off();
+
+       if (!page)
+               return;
+
+       INIT_LIST_HEAD(&page->lru);
+       n = get_node(cachep, page_to_nid(page));
+
+       spin_lock(&n->list_lock);
+       if (!page->active)
+               list_add_tail(&page->lru, &(n->slabs_free));
+       else
+               fixup_slab_list(cachep, n, page, &list);
+       STATS_INC_GROWN(cachep);
+       n->free_objects += cachep->num - page->active;
+       spin_unlock(&n->list_lock);
+
+       fixup_objfreelist_debug(cachep, &list);
  }
  
  #if DEBUG
@@ -2785,6 +2865,30 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
         return obj;
  }
  
+/*
+ * Slab list should be fixed up by fixup_slab_list() for existing slab
+ * or cache_grow_end() for new slab
+ */
+static __always_inline int alloc_block(struct kmem_cache *cachep,
+               struct array_cache *ac, struct page *page, int batchcount)
+{
+       /*
+        * There must be at least one object available for
+        * allocation.
+        */
+       BUG_ON(page->active >= cachep->num);
+
+       while (page->active < cachep->num && batchcount--) {
+               STATS_INC_ALLOCED(cachep);
+               STATS_INC_ACTIVE(cachep);
+               STATS_SET_HIGH(cachep);
+
+               ac->entry[ac->avail++] = slab_get_obj(cachep, page);
+       }
+
+       return batchcount;
+}
+
  static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
  {
         int batchcount;
@@ -2792,11 +2896,11 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
         struct array_cache *ac;
         int node;
         void *list = NULL;
+       struct page *page;
  
         check_irq_off();
         node = numa_mem_id();
  
-retry:
         ac = cpu_cache_get(cachep);
         batchcount = ac->batchcount;
         if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -2819,7 +2923,6 @@ retry:
         }
  
         while (batchcount > 0) {
-               struct page *page;
                 /* Get slab alloc is to come from. */
                 page = get_first_slab(n, false);
                 if (!page)
@@ -2827,21 +2930,7 @@ retry:
  
                 check_spinlock_acquired(cachep);
  
-               /*
-                * The slab was either on partial or free list so
-                * there must be at least one object available for
-                * allocation.
-                */
-               BUG_ON(page->active >= cachep->num);
-
-               while (page->active < cachep->num && batchcount--) {
-                       STATS_INC_ALLOCED(cachep);
-                       STATS_INC_ACTIVE(cachep);
-                       STATS_SET_HIGH(cachep);
-
-                       ac->entry[ac->avail++] = slab_get_obj(cachep, page);
-               }
-
+               batchcount = alloc_block(cachep, ac, page, batchcount);
                 fixup_slab_list(cachep, n, page, &list);
         }
  
@@ -2852,8 +2941,6 @@ alloc_done:
         fixup_objfreelist_debug(cachep, &list);
  
         if (unlikely(!ac->avail)) {
-               int x;
-
                 /* Check if we can use obj in pfmemalloc slab */
                 if (sk_memalloc_socks()) {
                         void *obj = cache_alloc_pfmemalloc(cachep, n, flags);
@@ -2862,18 +2949,19 @@ alloc_done:
                                 return obj;
                 }
  
-               x = cache_grow(cachep, gfp_exact_node(flags), node, NULL);
+               page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
  
-               /* cache_grow can reenable interrupts, then ac could change. */
+               /*
+                * cache_grow_begin() can reenable interrupts,
+                * then ac could change.
+                */
                 ac = cpu_cache_get(cachep);
-               node = numa_mem_id();
+               if (!ac->avail && page)
+                       alloc_block(cachep, ac, page, batchcount);
+               cache_grow_end(cachep, page);
  
-               /* no objects in sight? abort */
-               if (!x && ac->avail == 0)
+               if (!ac->avail)
                         return NULL;
-
-               if (!ac->avail)         /* objects refilled by interrupt? */
-                       goto retry;
         }
         ac->touched = 1;
  
@@ -2998,19 +3086,17 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
  static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
  {
         struct zonelist *zonelist;
-       gfp_t local_flags;
         struct zoneref *z;
         struct zone *zone;
         enum zone_type high_zoneidx = gfp_zone(flags);
         void *obj = NULL;
+       struct page *page;
         int nid;
         unsigned int cpuset_mems_cookie;
  
         if (flags & __GFP_THISNODE)
                 return NULL;
  
-       local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
-
  retry_cpuset:
         cpuset_mems_cookie = read_mems_allowed_begin();
         zonelist = node_zonelist(mempolicy_slab_node(), flags);
@@ -3040,33 +3126,19 @@ retry:
                  * We may trigger various forms of reclaim on the allowed
                  * set and go into memory reserves if necessary.
                  */
-               struct page *page;
-
-               if (gfpflags_allow_blocking(local_flags))
-                       local_irq_enable();
-               kmem_flagcheck(cache, flags);
-               page = kmem_getpages(cache, local_flags, numa_mem_id());
-               if (gfpflags_allow_blocking(local_flags))
-                       local_irq_disable();
+               page = cache_grow_begin(cache, flags, numa_mem_id());
+               cache_grow_end(cache, page);
                 if (page) {
+                       nid = page_to_nid(page);
+                       obj = ____cache_alloc_node(cache,
+                               gfp_exact_node(flags), nid);
+
                         /*
-                        * Insert into the appropriate per node queues
+                        * Another processor may allocate the objects in
+                        * the slab since we are not holding any locks.
                          */
-                       nid = page_to_nid(page);
-                       if (cache_grow(cache, flags, nid, page)) {
-                               obj = ____cache_alloc_node(cache,
-                                       gfp_exact_node(flags), nid);
-                               if (!obj)
-                                       /*
-                                        * Another processor may allocate the
-                                        * objects in the slab since we are
-                                        * not holding any locks.
-                                        */
-                                       goto retry;
-                       } else {
-                               /* cache_grow already freed obj */
-                               obj = NULL;
-                       }
+                       if (!obj)
+                               goto retry;
                 }
         }
  
@@ -3083,15 +3155,13 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
  {
         struct page *page;
         struct kmem_cache_node *n;
-       void *obj;
+       void *obj = NULL;
         void *list = NULL;
-       int x;
  
         VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES);
         n = get_node(cachep, nodeid);
         BUG_ON(!n);
  
-retry:
         check_irq_off();
         spin_lock(&n->list_lock);
         page = get_first_slab(n, false);
@@ -3113,18 +3183,18 @@ retry:
  
         spin_unlock(&n->list_lock);
         fixup_objfreelist_debug(cachep, &list);
-       goto done;
+       return obj;
  
  must_grow:
         spin_unlock(&n->list_lock);
-       x = cache_grow(cachep, gfp_exact_node(flags), nodeid, NULL);
-       if (x)
-               goto retry;
-
-       return fallback_alloc(cachep, flags);
+       page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
+       if (page) {
+               /* This slab isn't counted yet so don't update free_objects */
+               obj = slab_get_obj(cachep, page);
+       }
+       cache_grow_end(cachep, page);
  
-done:
-       return obj;
+       return obj ? obj : fallback_alloc(cachep, flags);
  }
  
  static __always_inline void *
@@ -3242,6 +3312,9 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
  {
         int i;
         struct kmem_cache_node *n = get_node(cachep, node);
+       struct page *page;
+
+       n->free_objects += nr_objects;
  
         for (i = 0; i < nr_objects; i++) {
                 void *objp;
@@ -3254,17 +3327,11 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
                 check_spinlock_acquired_node(cachep, node);
                 slab_put_obj(cachep, page, objp);
                 STATS_DEC_ACTIVE(cachep);
-               n->free_objects++;
  
                 /* fixup slab chains */
-               if (page->active == 0) {
-                       if (n->free_objects > n->free_limit) {
-                               n->free_objects -= cachep->num;
-                               list_add_tail(&page->lru, list);
-                       } else {
-                               list_add(&page->lru, &n->slabs_free);
-                       }
-               } else {
+               if (page->active == 0)
+                       list_add(&page->lru, &n->slabs_free);
+               else {
                         /* Unconditionally move a slab to the end of the
                          * partial list on free - maximum time for the
                          * other objects to be freed, too.
@@ -3272,6 +3339,14 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
                         list_add_tail(&page->lru, &n->slabs_partial);
                 }
         }
+
+       while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) {
+               n->free_objects -= cachep->num;
+
+               page = list_last_entry(&n->slabs_free, struct page, lru);
+               list_del(&page->lru);
+               list_add(&page->lru, list);
+       }
  }
  
  static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
@@ -3645,72 +3720,19 @@ EXPORT_SYMBOL(kfree);
  /*
   * This initializes kmem_cache_node or resizes various caches for all nodes.
   */
-static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
+static int setup_kmem_cache_nodes(struct kmem_cache *cachep, gfp_t gfp)
  {
+       int ret;
         int node;
         struct kmem_cache_node *n;
-       struct array_cache *new_shared;
-       struct alien_cache **new_alien = NULL;
  
         for_each_online_node(node) {
-
-               if (use_alien_caches) {
-                       new_alien = alloc_alien_cache(node, cachep->limit, gfp);
-                       if (!new_alien)
-                               goto fail;
-               }
-
-               new_shared = NULL;
-               if (cachep->shared) {
-                       new_shared = alloc_arraycache(node,
-                               cachep->shared*cachep->batchcount,
-                                       0xbaadf00d, gfp);
-                       if (!new_shared) {
-                               free_alien_cache(new_alien);
-                               goto fail;
-                       }
-               }
-
-               n = get_node(cachep, node);
-               if (n) {
-                       struct array_cache *shared = n->shared;
-                       LIST_HEAD(list);
-
-                       spin_lock_irq(&n->list_lock);
-
-                       if (shared)
-                               free_block(cachep, shared->entry,
-                                               shared->avail, node, &list);
-
-                       n->shared = new_shared;
-                       if (!n->alien) {
-                               n->alien = new_alien;
-                               new_alien = NULL;
-                       }
-                       n->free_limit = (1 + nr_cpus_node(node)) *
-                                       cachep->batchcount + cachep->num;
-                       spin_unlock_irq(&n->list_lock);
-                       slabs_destroy(cachep, &list);
-                       kfree(shared);
-                       free_alien_cache(new_alien);
-                       continue;
-               }
-               n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
-               if (!n) {
-                       free_alien_cache(new_alien);
-                       kfree(new_shared);
+               ret = setup_kmem_cache_node(cachep, node, gfp, true);
+               if (ret)
                         goto fail;
-               }
  
-               kmem_cache_node_init(n);
-               n->next_reap = jiffies + REAPTIMEOUT_NODE +
-                               ((unsigned long)cachep) % REAPTIMEOUT_NODE;
-               n->shared = new_shared;
-               n->alien = new_alien;
-               n->free_limit = (1 + nr_cpus_node(node)) *
-                                       cachep->batchcount + cachep->num;
-               cachep->node[node] = n;
         }
+
         return 0;
  
  fail:
@@ -3752,7 +3774,7 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
         cachep->shared = shared;
  
         if (!prev)
-               goto alloc_node;
+               goto setup_node;
  
         for_each_online_cpu(cpu) {
                 LIST_HEAD(list);
@@ -3769,8 +3791,8 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
         }
         free_percpu(prev);
  
-alloc_node:
-       return alloc_kmem_cache_node(cachep, gfp);
+setup_node:
+       return setup_kmem_cache_nodes(cachep, gfp);
  }
  
  static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
@@ -3869,29 +3891,26 @@ skip_setup:
   * if drain_array() is used on the shared array.
   */
  static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
-                        struct array_cache *ac, int force, int node)
+                        struct array_cache *ac, int node)
  {
         LIST_HEAD(list);
-       int tofree;
+
+       /* ac from n->shared can be freed if we don't hold the slab_mutex. */
+       check_mutex_acquired();
  
         if (!ac || !ac->avail)
                 return;
-       if (ac->touched && !force) {
+
+       if (ac->touched) {
                 ac->touched = 0;
-       } else {
-               spin_lock_irq(&n->list_lock);
-               if (ac->avail) {
-                       tofree = force ? ac->avail : (ac->limit + 4) / 5;
-                       if (tofree > ac->avail)
-                               tofree = (ac->avail + 1) / 2;
-                       free_block(cachep, ac->entry, tofree, node, &list);
-                       ac->avail -= tofree;
-                       memmove(ac->entry, &(ac->entry[tofree]),
-                               sizeof(void *) * ac->avail);
-               }
-               spin_unlock_irq(&n->list_lock);
-               slabs_destroy(cachep, &list);
+               return;
         }
+
+       spin_lock_irq(&n->list_lock);
+       drain_array_locked(cachep, ac, node, false, &list);
+       spin_unlock_irq(&n->list_lock);
+
+       slabs_destroy(cachep, &list);
  }
  
  /**
@@ -3929,7 +3948,7 @@ static void cache_reap(struct work_struct *w)
  
                 reap_alien(searchp, n);
  
-               drain_array(searchp, n, cpu_cache_get(searchp), 0, node);
+               drain_array(searchp, n, cpu_cache_get(searchp), node);
  
                 /*
                  * These are racy checks but it does not matter
@@ -3940,7 +3959,7 @@ static void cache_reap(struct work_struct *w)
  
                 n->next_reap = jiffies + REAPTIMEOUT_NODE;
  
-               drain_array(searchp, n, n->shared, 0, node);
+               drain_array(searchp, n, n->shared, node);
  
                 if (n->free_touched)
                         n->free_touched = 0;