tcp_memcontrol: Remove the per netns control.
[linux-2.6-block.git] / net / ipv4 / tcp_memcontrol.c
CommitLineData
d1a4c0b3
GC
1#include <net/tcp.h>
2#include <net/tcp_memcontrol.h>
3#include <net/sock.h>
3dc43e3e
GC
4#include <net/ip.h>
5#include <linux/nsproxy.h>
d1a4c0b3
GC
6#include <linux/memcontrol.h>
7#include <linux/module.h>
8
9static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
10{
11 return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
12}
13
14static void memcg_tcp_enter_memory_pressure(struct sock *sk)
15{
c48e074c 16 if (sk->sk_cgrp->memory_pressure)
d1a4c0b3
GC
17 *sk->sk_cgrp->memory_pressure = 1;
18}
19EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
20
1d62e436 21int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
d1a4c0b3
GC
22{
23 /*
24 * The root cgroup does not use res_counters, but rather,
25 * rely on the data already collected by the network
26 * subsystem
27 */
28 struct res_counter *res_parent = NULL;
29 struct cg_proto *cg_proto, *parent_cg;
30 struct tcp_memcontrol *tcp;
d1a4c0b3
GC
31 struct mem_cgroup *parent = parent_mem_cgroup(memcg);
32
33 cg_proto = tcp_prot.proto_cgroup(memcg);
34 if (!cg_proto)
6bc10349 35 return 0;
d1a4c0b3
GC
36
37 tcp = tcp_from_cgproto(cg_proto);
38
a4fe34bf
EB
39 tcp->tcp_prot_mem[0] = sysctl_tcp_mem[0];
40 tcp->tcp_prot_mem[1] = sysctl_tcp_mem[1];
41 tcp->tcp_prot_mem[2] = sysctl_tcp_mem[2];
d1a4c0b3
GC
42 tcp->tcp_memory_pressure = 0;
43
44 parent_cg = tcp_prot.proto_cgroup(parent);
45 if (parent_cg)
46 res_parent = parent_cg->memory_allocated;
47
48 res_counter_init(&tcp->tcp_memory_allocated, res_parent);
49 percpu_counter_init(&tcp->tcp_sockets_allocated, 0);
50
51 cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
52 cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
53 cg_proto->sysctl_mem = tcp->tcp_prot_mem;
54 cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
55 cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
56 cg_proto->memcg = memcg;
57
6bc10349 58 return 0;
d1a4c0b3
GC
59}
60EXPORT_SYMBOL(tcp_init_cgroup);
61
1d62e436 62void tcp_destroy_cgroup(struct mem_cgroup *memcg)
d1a4c0b3 63{
d1a4c0b3
GC
64 struct cg_proto *cg_proto;
65 struct tcp_memcontrol *tcp;
66
67 cg_proto = tcp_prot.proto_cgroup(memcg);
68 if (!cg_proto)
69 return;
70
71 tcp = tcp_from_cgproto(cg_proto);
72 percpu_counter_destroy(&tcp->tcp_sockets_allocated);
73}
74EXPORT_SYMBOL(tcp_destroy_cgroup);
3aaabe23
GC
75
76static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
77{
3aaabe23
GC
78 struct tcp_memcontrol *tcp;
79 struct cg_proto *cg_proto;
80 u64 old_lim;
81 int i;
82 int ret;
83
84 cg_proto = tcp_prot.proto_cgroup(memcg);
85 if (!cg_proto)
86 return -EINVAL;
87
6de5a8bf
SZ
88 if (val > RES_COUNTER_MAX)
89 val = RES_COUNTER_MAX;
3aaabe23
GC
90
91 tcp = tcp_from_cgproto(cg_proto);
92
93 old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
94 ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val);
95 if (ret)
96 return ret;
97
98 for (i = 0; i < 3; i++)
99 tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
a4fe34bf 100 sysctl_tcp_mem[i]);
3aaabe23 101
6de5a8bf 102 if (val == RES_COUNTER_MAX)
3f134619 103 clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
6de5a8bf 104 else if (val != RES_COUNTER_MAX) {
3f134619
GC
105 /*
106 * The active bit needs to be written after the static_key
107 * update. This is what guarantees that the socket activation
108 * function is the last one to run. See sock_update_memcg() for
109 * details, and note that we don't mark any socket as belonging
110 * to this memcg until that flag is up.
111 *
112 * We need to do this, because static_keys will span multiple
113 * sites, but we can't control their order. If we mark a socket
114 * as accounted, but the accounting functions are not patched in
115 * yet, we'll lose accounting.
116 *
117 * We never race with the readers in sock_update_memcg(),
118 * because when this value change, the code to process it is not
119 * patched in yet.
120 *
121 * The activated bit is used to guarantee that no two writers
122 * will do the update in the same memcg. Without that, we can't
123 * properly shutdown the static key.
124 */
125 if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
126 static_key_slow_inc(&memcg_socket_limit_enabled);
127 set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
128 }
3aaabe23
GC
129
130 return 0;
131}
132
182446d0 133static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
3aaabe23
GC
134 const char *buffer)
135{
182446d0 136 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
3aaabe23
GC
137 unsigned long long val;
138 int ret = 0;
139
140 switch (cft->private) {
141 case RES_LIMIT:
142 /* see memcontrol.c */
143 ret = res_counter_memparse_write_strategy(buffer, &val);
144 if (ret)
145 break;
146 ret = tcp_update_limit(memcg, val);
147 break;
148 default:
149 ret = -EINVAL;
150 break;
151 }
152 return ret;
153}
154
155static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
156{
157 struct tcp_memcontrol *tcp;
158 struct cg_proto *cg_proto;
159
160 cg_proto = tcp_prot.proto_cgroup(memcg);
161 if (!cg_proto)
162 return default_val;
163
164 tcp = tcp_from_cgproto(cg_proto);
165 return res_counter_read_u64(&tcp->tcp_memory_allocated, type);
166}
167
5a6dd343
GC
168static u64 tcp_read_usage(struct mem_cgroup *memcg)
169{
170 struct tcp_memcontrol *tcp;
171 struct cg_proto *cg_proto;
172
173 cg_proto = tcp_prot.proto_cgroup(memcg);
174 if (!cg_proto)
175 return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT;
176
177 tcp = tcp_from_cgproto(cg_proto);
178 return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
179}
180
182446d0 181static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
3aaabe23 182{
182446d0 183 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
3aaabe23
GC
184 u64 val;
185
186 switch (cft->private) {
187 case RES_LIMIT:
6de5a8bf 188 val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
3aaabe23 189 break;
5a6dd343
GC
190 case RES_USAGE:
191 val = tcp_read_usage(memcg);
192 break;
ffea59e5 193 case RES_FAILCNT:
0850f0f5
GC
194 case RES_MAX_USAGE:
195 val = tcp_read_stat(memcg, cft->private, 0);
ffea59e5 196 break;
3aaabe23
GC
197 default:
198 BUG();
199 }
200 return val;
201}
202
182446d0 203static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
ffea59e5
GC
204{
205 struct mem_cgroup *memcg;
206 struct tcp_memcontrol *tcp;
207 struct cg_proto *cg_proto;
208
182446d0 209 memcg = mem_cgroup_from_css(css);
ffea59e5
GC
210 cg_proto = tcp_prot.proto_cgroup(memcg);
211 if (!cg_proto)
212 return 0;
213 tcp = tcp_from_cgproto(cg_proto);
214
215 switch (event) {
0850f0f5
GC
216 case RES_MAX_USAGE:
217 res_counter_reset_max(&tcp->tcp_memory_allocated);
218 break;
ffea59e5
GC
219 case RES_FAILCNT:
220 res_counter_reset_failcnt(&tcp->tcp_memory_allocated);
221 break;
222 }
223
224 return 0;
225}
226
676f7c8f
TH
227static struct cftype tcp_files[] = {
228 {
229 .name = "kmem.tcp.limit_in_bytes",
230 .write_string = tcp_cgroup_write,
231 .read_u64 = tcp_cgroup_read,
232 .private = RES_LIMIT,
233 },
234 {
235 .name = "kmem.tcp.usage_in_bytes",
236 .read_u64 = tcp_cgroup_read,
237 .private = RES_USAGE,
238 },
239 {
240 .name = "kmem.tcp.failcnt",
241 .private = RES_FAILCNT,
242 .trigger = tcp_cgroup_reset,
243 .read_u64 = tcp_cgroup_read,
244 },
245 {
246 .name = "kmem.tcp.max_usage_in_bytes",
247 .private = RES_MAX_USAGE,
248 .trigger = tcp_cgroup_reset,
249 .read_u64 = tcp_cgroup_read,
250 },
6bc10349 251 { } /* terminate */
676f7c8f 252};
6bc10349
TH
253
254static int __init tcp_memcontrol_init(void)
255{
256 WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files));
257 return 0;
258}
259__initcall(tcp_memcontrol_init);