pr_cont(" are going to be killed due to memory.oom.group set\n");
}
+/*
+ * The value of NR_MEMCG_STOCK is selected to keep the cached memcgs and their
+ * nr_pages in a single cacheline. This may change in future.
+ */
+#define NR_MEMCG_STOCK 7
struct memcg_stock_pcp {
local_trylock_t stock_lock;
- struct mem_cgroup *cached; /* this never be root cgroup */
- unsigned int nr_pages;
+ uint8_t nr_pages[NR_MEMCG_STOCK];
+ struct mem_cgroup *cached[NR_MEMCG_STOCK];
struct obj_cgroup *cached_objcg;
struct pglist_data *cached_pgdat;
unsigned long flags;
#define FLUSHING_CACHED_CHARGE 0
};
-static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = {
+static DEFINE_PER_CPU_ALIGNED(struct memcg_stock_pcp, memcg_stock) = {
.stock_lock = INIT_LOCAL_TRYLOCK(stock_lock),
};
static DEFINE_MUTEX(percpu_charge_mutex);
gfp_t gfp_mask)
{
struct memcg_stock_pcp *stock;
- unsigned int stock_pages;
+ uint8_t stock_pages;
unsigned long flags;
bool ret = false;
+ int i;
if (nr_pages > MEMCG_CHARGE_BATCH)
return ret;
return ret;
stock = this_cpu_ptr(&memcg_stock);
- stock_pages = READ_ONCE(stock->nr_pages);
- if (memcg == READ_ONCE(stock->cached) && stock_pages >= nr_pages) {
- WRITE_ONCE(stock->nr_pages, stock_pages - nr_pages);
- ret = true;
+
+ for (i = 0; i < NR_MEMCG_STOCK; ++i) {
+ if (memcg != READ_ONCE(stock->cached[i]))
+ continue;
+
+ stock_pages = READ_ONCE(stock->nr_pages[i]);
+ if (stock_pages >= nr_pages) {
+ WRITE_ONCE(stock->nr_pages[i], stock_pages - nr_pages);
+ ret = true;
+ }
+ break;
}
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
/*
* Returns stocks cached in percpu and reset cached information.
*/
-static void drain_stock(struct memcg_stock_pcp *stock)
+static void drain_stock(struct memcg_stock_pcp *stock, int i)
{
- unsigned int stock_pages = READ_ONCE(stock->nr_pages);
- struct mem_cgroup *old = READ_ONCE(stock->cached);
+ struct mem_cgroup *old = READ_ONCE(stock->cached[i]);
+ uint8_t stock_pages;
if (!old)
return;
+ stock_pages = READ_ONCE(stock->nr_pages[i]);
if (stock_pages) {
memcg_uncharge(old, stock_pages);
- WRITE_ONCE(stock->nr_pages, 0);
+ WRITE_ONCE(stock->nr_pages[i], 0);
}
css_put(&old->css);
- WRITE_ONCE(stock->cached, NULL);
+ WRITE_ONCE(stock->cached[i], NULL);
+}
+
+static void drain_stock_fully(struct memcg_stock_pcp *stock)
+{
+ int i;
+
+ for (i = 0; i < NR_MEMCG_STOCK; ++i)
+ drain_stock(stock, i);
}
static void drain_local_stock(struct work_struct *dummy)
stock = this_cpu_ptr(&memcg_stock);
drain_obj_stock(stock);
- drain_stock(stock);
+ drain_stock_fully(stock);
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
struct memcg_stock_pcp *stock;
- unsigned int stock_pages;
+ struct mem_cgroup *cached;
+ uint8_t stock_pages;
unsigned long flags;
+ bool success = false;
+ int empty_slot = -1;
+ int i;
+
+ /*
+ * For now limit MEMCG_CHARGE_BATCH to 127 and less. In future if we
+ * decide to increase it more than 127 then we will need more careful
+ * handling of nr_pages[] in struct memcg_stock_pcp.
+ */
+ BUILD_BUG_ON(MEMCG_CHARGE_BATCH > S8_MAX);
VM_WARN_ON_ONCE(mem_cgroup_is_root(memcg));
- if (!local_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
+ if (nr_pages > MEMCG_CHARGE_BATCH ||
+ !local_trylock_irqsave(&memcg_stock.stock_lock, flags)) {
/*
- * In case of unlikely failure to lock percpu stock_lock
- * uncharge memcg directly.
+ * In case of larger than batch refill or unlikely failure to
+ * lock the percpu stock_lock, uncharge memcg directly.
*/
memcg_uncharge(memcg, nr_pages);
return;
}
stock = this_cpu_ptr(&memcg_stock);
- if (READ_ONCE(stock->cached) != memcg) { /* reset if necessary */
- drain_stock(stock);
- css_get(&memcg->css);
- WRITE_ONCE(stock->cached, memcg);
+ for (i = 0; i < NR_MEMCG_STOCK; ++i) {
+ cached = READ_ONCE(stock->cached[i]);
+ if (!cached && empty_slot == -1)
+ empty_slot = i;
+ if (memcg == READ_ONCE(stock->cached[i])) {
+ stock_pages = READ_ONCE(stock->nr_pages[i]) + nr_pages;
+ WRITE_ONCE(stock->nr_pages[i], stock_pages);
+ if (stock_pages > MEMCG_CHARGE_BATCH)
+ drain_stock(stock, i);
+ success = true;
+ break;
+ }
}
- stock_pages = READ_ONCE(stock->nr_pages) + nr_pages;
- WRITE_ONCE(stock->nr_pages, stock_pages);
- if (stock_pages > MEMCG_CHARGE_BATCH)
- drain_stock(stock);
+ if (!success) {
+ i = empty_slot;
+ if (i == -1) {
+ i = get_random_u32_below(NR_MEMCG_STOCK);
+ drain_stock(stock, i);
+ }
+ css_get(&memcg->css);
+ WRITE_ONCE(stock->cached[i], memcg);
+ WRITE_ONCE(stock->nr_pages[i], nr_pages);
+ }
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
}
+static bool is_drain_needed(struct memcg_stock_pcp *stock,
+ struct mem_cgroup *root_memcg)
+{
+ struct mem_cgroup *memcg;
+ bool flush = false;
+ int i;
+
+ rcu_read_lock();
+
+ if (obj_stock_flush_required(stock, root_memcg)) {
+ flush = true;
+ goto out;
+ }
+
+ for (i = 0; i < NR_MEMCG_STOCK; ++i) {
+ memcg = READ_ONCE(stock->cached[i]);
+ if (!memcg)
+ continue;
+
+ if (READ_ONCE(stock->nr_pages[i]) &&
+ mem_cgroup_is_descendant(memcg, root_memcg)) {
+ flush = true;
+ break;
+ }
+ }
+out:
+ rcu_read_unlock();
+ return flush;
+}
+
/*
* Drains all per-CPU charge caches for given root_memcg resp. subtree
* of the hierarchy under it.
curcpu = smp_processor_id();
for_each_online_cpu(cpu) {
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
- struct mem_cgroup *memcg;
- bool flush = false;
-
- rcu_read_lock();
- memcg = READ_ONCE(stock->cached);
- if (memcg && READ_ONCE(stock->nr_pages) &&
- mem_cgroup_is_descendant(memcg, root_memcg))
- flush = true;
- else if (obj_stock_flush_required(stock, root_memcg))
- flush = true;
- rcu_read_unlock();
+ bool flush = is_drain_needed(stock, root_memcg);
if (flush &&
!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
drain_obj_stock(stock);
local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
- drain_stock(stock);
+ drain_stock_fully(stock);
return 0;
}