Commit | Line | Data |
---|---|---|
d1a4c0b3 GC |
1 | #include <net/tcp.h> |
2 | #include <net/tcp_memcontrol.h> | |
3 | #include <net/sock.h> | |
3dc43e3e GC |
4 | #include <net/ip.h> |
5 | #include <linux/nsproxy.h> | |
d1a4c0b3 GC |
6 | #include <linux/memcontrol.h> |
7 | #include <linux/module.h> | |
8 | ||
9 | static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto) | |
10 | { | |
11 | return container_of(cg_proto, struct tcp_memcontrol, cg_proto); | |
12 | } | |
13 | ||
14 | static void memcg_tcp_enter_memory_pressure(struct sock *sk) | |
15 | { | |
c48e074c | 16 | if (sk->sk_cgrp->memory_pressure) |
d1a4c0b3 GC |
17 | *sk->sk_cgrp->memory_pressure = 1; |
18 | } | |
19 | EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); | |
20 | ||
1d62e436 | 21 | int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
d1a4c0b3 GC |
22 | { |
23 | /* | |
24 | * The root cgroup does not use res_counters, but rather, | |
25 | * rely on the data already collected by the network | |
26 | * subsystem | |
27 | */ | |
28 | struct res_counter *res_parent = NULL; | |
29 | struct cg_proto *cg_proto, *parent_cg; | |
30 | struct tcp_memcontrol *tcp; | |
d1a4c0b3 | 31 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); |
3dc43e3e | 32 | struct net *net = current->nsproxy->net_ns; |
d1a4c0b3 GC |
33 | |
34 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
35 | if (!cg_proto) | |
6bc10349 | 36 | return 0; |
d1a4c0b3 GC |
37 | |
38 | tcp = tcp_from_cgproto(cg_proto); | |
39 | ||
3dc43e3e GC |
40 | tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0]; |
41 | tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1]; | |
42 | tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2]; | |
d1a4c0b3 GC |
43 | tcp->tcp_memory_pressure = 0; |
44 | ||
45 | parent_cg = tcp_prot.proto_cgroup(parent); | |
46 | if (parent_cg) | |
47 | res_parent = parent_cg->memory_allocated; | |
48 | ||
49 | res_counter_init(&tcp->tcp_memory_allocated, res_parent); | |
50 | percpu_counter_init(&tcp->tcp_sockets_allocated, 0); | |
51 | ||
52 | cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure; | |
53 | cg_proto->memory_pressure = &tcp->tcp_memory_pressure; | |
54 | cg_proto->sysctl_mem = tcp->tcp_prot_mem; | |
55 | cg_proto->memory_allocated = &tcp->tcp_memory_allocated; | |
56 | cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated; | |
57 | cg_proto->memcg = memcg; | |
58 | ||
6bc10349 | 59 | return 0; |
d1a4c0b3 GC |
60 | } |
61 | EXPORT_SYMBOL(tcp_init_cgroup); | |
62 | ||
1d62e436 | 63 | void tcp_destroy_cgroup(struct mem_cgroup *memcg) |
d1a4c0b3 | 64 | { |
d1a4c0b3 GC |
65 | struct cg_proto *cg_proto; |
66 | struct tcp_memcontrol *tcp; | |
67 | ||
68 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
69 | if (!cg_proto) | |
70 | return; | |
71 | ||
72 | tcp = tcp_from_cgproto(cg_proto); | |
73 | percpu_counter_destroy(&tcp->tcp_sockets_allocated); | |
74 | } | |
75 | EXPORT_SYMBOL(tcp_destroy_cgroup); | |
3aaabe23 GC |
76 | |
77 | static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) | |
78 | { | |
79 | struct net *net = current->nsproxy->net_ns; | |
80 | struct tcp_memcontrol *tcp; | |
81 | struct cg_proto *cg_proto; | |
82 | u64 old_lim; | |
83 | int i; | |
84 | int ret; | |
85 | ||
86 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
87 | if (!cg_proto) | |
88 | return -EINVAL; | |
89 | ||
90 | if (val > RESOURCE_MAX) | |
91 | val = RESOURCE_MAX; | |
92 | ||
93 | tcp = tcp_from_cgproto(cg_proto); | |
94 | ||
95 | old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | |
96 | ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val); | |
97 | if (ret) | |
98 | return ret; | |
99 | ||
100 | for (i = 0; i < 3; i++) | |
101 | tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, | |
102 | net->ipv4.sysctl_tcp_mem[i]); | |
103 | ||
3f134619 GC |
104 | if (val == RESOURCE_MAX) |
105 | clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); | |
106 | else if (val != RESOURCE_MAX) { | |
107 | /* | |
108 | * The active bit needs to be written after the static_key | |
109 | * update. This is what guarantees that the socket activation | |
110 | * function is the last one to run. See sock_update_memcg() for | |
111 | * details, and note that we don't mark any socket as belonging | |
112 | * to this memcg until that flag is up. | |
113 | * | |
114 | * We need to do this, because static_keys will span multiple | |
115 | * sites, but we can't control their order. If we mark a socket | |
116 | * as accounted, but the accounting functions are not patched in | |
117 | * yet, we'll lose accounting. | |
118 | * | |
119 | * We never race with the readers in sock_update_memcg(), | |
120 | * because when this value change, the code to process it is not | |
121 | * patched in yet. | |
122 | * | |
123 | * The activated bit is used to guarantee that no two writers | |
124 | * will do the update in the same memcg. Without that, we can't | |
125 | * properly shutdown the static key. | |
126 | */ | |
127 | if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) | |
128 | static_key_slow_inc(&memcg_socket_limit_enabled); | |
129 | set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); | |
130 | } | |
3aaabe23 GC |
131 | |
132 | return 0; | |
133 | } | |
134 | ||
135 | static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, | |
136 | const char *buffer) | |
137 | { | |
138 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | |
139 | unsigned long long val; | |
140 | int ret = 0; | |
141 | ||
142 | switch (cft->private) { | |
143 | case RES_LIMIT: | |
144 | /* see memcontrol.c */ | |
145 | ret = res_counter_memparse_write_strategy(buffer, &val); | |
146 | if (ret) | |
147 | break; | |
148 | ret = tcp_update_limit(memcg, val); | |
149 | break; | |
150 | default: | |
151 | ret = -EINVAL; | |
152 | break; | |
153 | } | |
154 | return ret; | |
155 | } | |
156 | ||
157 | static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) | |
158 | { | |
159 | struct tcp_memcontrol *tcp; | |
160 | struct cg_proto *cg_proto; | |
161 | ||
162 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
163 | if (!cg_proto) | |
164 | return default_val; | |
165 | ||
166 | tcp = tcp_from_cgproto(cg_proto); | |
167 | return res_counter_read_u64(&tcp->tcp_memory_allocated, type); | |
168 | } | |
169 | ||
5a6dd343 GC |
170 | static u64 tcp_read_usage(struct mem_cgroup *memcg) |
171 | { | |
172 | struct tcp_memcontrol *tcp; | |
173 | struct cg_proto *cg_proto; | |
174 | ||
175 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
176 | if (!cg_proto) | |
177 | return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT; | |
178 | ||
179 | tcp = tcp_from_cgproto(cg_proto); | |
180 | return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); | |
181 | } | |
182 | ||
3aaabe23 GC |
183 | static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft) |
184 | { | |
185 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | |
186 | u64 val; | |
187 | ||
188 | switch (cft->private) { | |
189 | case RES_LIMIT: | |
190 | val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX); | |
191 | break; | |
5a6dd343 GC |
192 | case RES_USAGE: |
193 | val = tcp_read_usage(memcg); | |
194 | break; | |
ffea59e5 | 195 | case RES_FAILCNT: |
0850f0f5 GC |
196 | case RES_MAX_USAGE: |
197 | val = tcp_read_stat(memcg, cft->private, 0); | |
ffea59e5 | 198 | break; |
3aaabe23 GC |
199 | default: |
200 | BUG(); | |
201 | } | |
202 | return val; | |
203 | } | |
204 | ||
ffea59e5 GC |
205 | static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event) |
206 | { | |
207 | struct mem_cgroup *memcg; | |
208 | struct tcp_memcontrol *tcp; | |
209 | struct cg_proto *cg_proto; | |
210 | ||
211 | memcg = mem_cgroup_from_cont(cont); | |
212 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
213 | if (!cg_proto) | |
214 | return 0; | |
215 | tcp = tcp_from_cgproto(cg_proto); | |
216 | ||
217 | switch (event) { | |
0850f0f5 GC |
218 | case RES_MAX_USAGE: |
219 | res_counter_reset_max(&tcp->tcp_memory_allocated); | |
220 | break; | |
ffea59e5 GC |
221 | case RES_FAILCNT: |
222 | res_counter_reset_failcnt(&tcp->tcp_memory_allocated); | |
223 | break; | |
224 | } | |
225 | ||
226 | return 0; | |
227 | } | |
228 | ||
3aaabe23 GC |
229 | unsigned long long tcp_max_memory(const struct mem_cgroup *memcg) |
230 | { | |
231 | struct tcp_memcontrol *tcp; | |
232 | struct cg_proto *cg_proto; | |
233 | ||
234 | cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg); | |
235 | if (!cg_proto) | |
236 | return 0; | |
237 | ||
238 | tcp = tcp_from_cgproto(cg_proto); | |
239 | return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | |
240 | } | |
241 | ||
242 | void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx) | |
243 | { | |
244 | struct tcp_memcontrol *tcp; | |
245 | struct cg_proto *cg_proto; | |
246 | ||
247 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
248 | if (!cg_proto) | |
249 | return; | |
250 | ||
251 | tcp = tcp_from_cgproto(cg_proto); | |
252 | ||
253 | tcp->tcp_prot_mem[idx] = val; | |
254 | } | |
676f7c8f TH |
255 | |
256 | static struct cftype tcp_files[] = { | |
257 | { | |
258 | .name = "kmem.tcp.limit_in_bytes", | |
259 | .write_string = tcp_cgroup_write, | |
260 | .read_u64 = tcp_cgroup_read, | |
261 | .private = RES_LIMIT, | |
262 | }, | |
263 | { | |
264 | .name = "kmem.tcp.usage_in_bytes", | |
265 | .read_u64 = tcp_cgroup_read, | |
266 | .private = RES_USAGE, | |
267 | }, | |
268 | { | |
269 | .name = "kmem.tcp.failcnt", | |
270 | .private = RES_FAILCNT, | |
271 | .trigger = tcp_cgroup_reset, | |
272 | .read_u64 = tcp_cgroup_read, | |
273 | }, | |
274 | { | |
275 | .name = "kmem.tcp.max_usage_in_bytes", | |
276 | .private = RES_MAX_USAGE, | |
277 | .trigger = tcp_cgroup_reset, | |
278 | .read_u64 = tcp_cgroup_read, | |
279 | }, | |
6bc10349 | 280 | { } /* terminate */ |
676f7c8f | 281 | }; |
6bc10349 TH |
282 | |
283 | static int __init tcp_memcontrol_init(void) | |
284 | { | |
285 | WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files)); | |
286 | return 0; | |
287 | } | |
288 | __initcall(tcp_memcontrol_init); |