Commit | Line | Data |
---|---|---|
d1a4c0b3 GC |
1 | #include <net/tcp.h> |
2 | #include <net/tcp_memcontrol.h> | |
3 | #include <net/sock.h> | |
3dc43e3e GC |
4 | #include <net/ip.h> |
5 | #include <linux/nsproxy.h> | |
d1a4c0b3 GC |
6 | #include <linux/memcontrol.h> |
7 | #include <linux/module.h> | |
8 | ||
9 | static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto) | |
10 | { | |
11 | return container_of(cg_proto, struct tcp_memcontrol, cg_proto); | |
12 | } | |
13 | ||
14 | static void memcg_tcp_enter_memory_pressure(struct sock *sk) | |
15 | { | |
c48e074c | 16 | if (sk->sk_cgrp->memory_pressure) |
d1a4c0b3 GC |
17 | *sk->sk_cgrp->memory_pressure = 1; |
18 | } | |
19 | EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); | |
20 | ||
1d62e436 | 21 | int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
d1a4c0b3 GC |
22 | { |
23 | /* | |
24 | * The root cgroup does not use res_counters, but rather, | |
25 | * rely on the data already collected by the network | |
26 | * subsystem | |
27 | */ | |
28 | struct res_counter *res_parent = NULL; | |
29 | struct cg_proto *cg_proto, *parent_cg; | |
30 | struct tcp_memcontrol *tcp; | |
d1a4c0b3 GC |
31 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); |
32 | ||
33 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
34 | if (!cg_proto) | |
6bc10349 | 35 | return 0; |
d1a4c0b3 GC |
36 | |
37 | tcp = tcp_from_cgproto(cg_proto); | |
38 | ||
a4fe34bf EB |
39 | tcp->tcp_prot_mem[0] = sysctl_tcp_mem[0]; |
40 | tcp->tcp_prot_mem[1] = sysctl_tcp_mem[1]; | |
41 | tcp->tcp_prot_mem[2] = sysctl_tcp_mem[2]; | |
d1a4c0b3 GC |
42 | tcp->tcp_memory_pressure = 0; |
43 | ||
44 | parent_cg = tcp_prot.proto_cgroup(parent); | |
45 | if (parent_cg) | |
46 | res_parent = parent_cg->memory_allocated; | |
47 | ||
48 | res_counter_init(&tcp->tcp_memory_allocated, res_parent); | |
49 | percpu_counter_init(&tcp->tcp_sockets_allocated, 0); | |
50 | ||
51 | cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure; | |
52 | cg_proto->memory_pressure = &tcp->tcp_memory_pressure; | |
53 | cg_proto->sysctl_mem = tcp->tcp_prot_mem; | |
54 | cg_proto->memory_allocated = &tcp->tcp_memory_allocated; | |
55 | cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated; | |
56 | cg_proto->memcg = memcg; | |
57 | ||
6bc10349 | 58 | return 0; |
d1a4c0b3 GC |
59 | } |
60 | EXPORT_SYMBOL(tcp_init_cgroup); | |
61 | ||
1d62e436 | 62 | void tcp_destroy_cgroup(struct mem_cgroup *memcg) |
d1a4c0b3 | 63 | { |
d1a4c0b3 GC |
64 | struct cg_proto *cg_proto; |
65 | struct tcp_memcontrol *tcp; | |
66 | ||
67 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
68 | if (!cg_proto) | |
69 | return; | |
70 | ||
71 | tcp = tcp_from_cgproto(cg_proto); | |
72 | percpu_counter_destroy(&tcp->tcp_sockets_allocated); | |
73 | } | |
74 | EXPORT_SYMBOL(tcp_destroy_cgroup); | |
3aaabe23 GC |
75 | |
76 | static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) | |
77 | { | |
3aaabe23 GC |
78 | struct tcp_memcontrol *tcp; |
79 | struct cg_proto *cg_proto; | |
80 | u64 old_lim; | |
81 | int i; | |
82 | int ret; | |
83 | ||
84 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
85 | if (!cg_proto) | |
86 | return -EINVAL; | |
87 | ||
6de5a8bf SZ |
88 | if (val > RES_COUNTER_MAX) |
89 | val = RES_COUNTER_MAX; | |
3aaabe23 GC |
90 | |
91 | tcp = tcp_from_cgproto(cg_proto); | |
92 | ||
93 | old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | |
94 | ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val); | |
95 | if (ret) | |
96 | return ret; | |
97 | ||
98 | for (i = 0; i < 3; i++) | |
99 | tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, | |
a4fe34bf | 100 | sysctl_tcp_mem[i]); |
3aaabe23 | 101 | |
6de5a8bf | 102 | if (val == RES_COUNTER_MAX) |
3f134619 | 103 | clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); |
6de5a8bf | 104 | else if (val != RES_COUNTER_MAX) { |
3f134619 GC |
105 | /* |
106 | * The active bit needs to be written after the static_key | |
107 | * update. This is what guarantees that the socket activation | |
108 | * function is the last one to run. See sock_update_memcg() for | |
109 | * details, and note that we don't mark any socket as belonging | |
110 | * to this memcg until that flag is up. | |
111 | * | |
112 | * We need to do this, because static_keys will span multiple | |
113 | * sites, but we can't control their order. If we mark a socket | |
114 | * as accounted, but the accounting functions are not patched in | |
115 | * yet, we'll lose accounting. | |
116 | * | |
117 | * We never race with the readers in sock_update_memcg(), | |
118 | * because when this value change, the code to process it is not | |
119 | * patched in yet. | |
120 | * | |
121 | * The activated bit is used to guarantee that no two writers | |
122 | * will do the update in the same memcg. Without that, we can't | |
123 | * properly shutdown the static key. | |
124 | */ | |
125 | if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) | |
126 | static_key_slow_inc(&memcg_socket_limit_enabled); | |
127 | set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); | |
128 | } | |
3aaabe23 GC |
129 | |
130 | return 0; | |
131 | } | |
132 | ||
182446d0 | 133 | static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, |
3aaabe23 GC |
134 | const char *buffer) |
135 | { | |
182446d0 | 136 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
3aaabe23 GC |
137 | unsigned long long val; |
138 | int ret = 0; | |
139 | ||
140 | switch (cft->private) { | |
141 | case RES_LIMIT: | |
142 | /* see memcontrol.c */ | |
143 | ret = res_counter_memparse_write_strategy(buffer, &val); | |
144 | if (ret) | |
145 | break; | |
146 | ret = tcp_update_limit(memcg, val); | |
147 | break; | |
148 | default: | |
149 | ret = -EINVAL; | |
150 | break; | |
151 | } | |
152 | return ret; | |
153 | } | |
154 | ||
155 | static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) | |
156 | { | |
157 | struct tcp_memcontrol *tcp; | |
158 | struct cg_proto *cg_proto; | |
159 | ||
160 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
161 | if (!cg_proto) | |
162 | return default_val; | |
163 | ||
164 | tcp = tcp_from_cgproto(cg_proto); | |
165 | return res_counter_read_u64(&tcp->tcp_memory_allocated, type); | |
166 | } | |
167 | ||
5a6dd343 GC |
168 | static u64 tcp_read_usage(struct mem_cgroup *memcg) |
169 | { | |
170 | struct tcp_memcontrol *tcp; | |
171 | struct cg_proto *cg_proto; | |
172 | ||
173 | cg_proto = tcp_prot.proto_cgroup(memcg); | |
174 | if (!cg_proto) | |
175 | return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT; | |
176 | ||
177 | tcp = tcp_from_cgproto(cg_proto); | |
178 | return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); | |
179 | } | |
180 | ||
182446d0 | 181 | static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) |
3aaabe23 | 182 | { |
182446d0 | 183 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
3aaabe23 GC |
184 | u64 val; |
185 | ||
186 | switch (cft->private) { | |
187 | case RES_LIMIT: | |
6de5a8bf | 188 | val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX); |
3aaabe23 | 189 | break; |
5a6dd343 GC |
190 | case RES_USAGE: |
191 | val = tcp_read_usage(memcg); | |
192 | break; | |
ffea59e5 | 193 | case RES_FAILCNT: |
0850f0f5 GC |
194 | case RES_MAX_USAGE: |
195 | val = tcp_read_stat(memcg, cft->private, 0); | |
ffea59e5 | 196 | break; |
3aaabe23 GC |
197 | default: |
198 | BUG(); | |
199 | } | |
200 | return val; | |
201 | } | |
202 | ||
182446d0 | 203 | static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event) |
ffea59e5 GC |
204 | { |
205 | struct mem_cgroup *memcg; | |
206 | struct tcp_memcontrol *tcp; | |
207 | struct cg_proto *cg_proto; | |
208 | ||
182446d0 | 209 | memcg = mem_cgroup_from_css(css); |
ffea59e5 GC |
210 | cg_proto = tcp_prot.proto_cgroup(memcg); |
211 | if (!cg_proto) | |
212 | return 0; | |
213 | tcp = tcp_from_cgproto(cg_proto); | |
214 | ||
215 | switch (event) { | |
0850f0f5 GC |
216 | case RES_MAX_USAGE: |
217 | res_counter_reset_max(&tcp->tcp_memory_allocated); | |
218 | break; | |
ffea59e5 GC |
219 | case RES_FAILCNT: |
220 | res_counter_reset_failcnt(&tcp->tcp_memory_allocated); | |
221 | break; | |
222 | } | |
223 | ||
224 | return 0; | |
225 | } | |
226 | ||
676f7c8f TH |
227 | static struct cftype tcp_files[] = { |
228 | { | |
229 | .name = "kmem.tcp.limit_in_bytes", | |
230 | .write_string = tcp_cgroup_write, | |
231 | .read_u64 = tcp_cgroup_read, | |
232 | .private = RES_LIMIT, | |
233 | }, | |
234 | { | |
235 | .name = "kmem.tcp.usage_in_bytes", | |
236 | .read_u64 = tcp_cgroup_read, | |
237 | .private = RES_USAGE, | |
238 | }, | |
239 | { | |
240 | .name = "kmem.tcp.failcnt", | |
241 | .private = RES_FAILCNT, | |
242 | .trigger = tcp_cgroup_reset, | |
243 | .read_u64 = tcp_cgroup_read, | |
244 | }, | |
245 | { | |
246 | .name = "kmem.tcp.max_usage_in_bytes", | |
247 | .private = RES_MAX_USAGE, | |
248 | .trigger = tcp_cgroup_reset, | |
249 | .read_u64 = tcp_cgroup_read, | |
250 | }, | |
6bc10349 | 251 | { } /* terminate */ |
676f7c8f | 252 | }; |
6bc10349 TH |
253 | |
254 | static int __init tcp_memcontrol_init(void) | |
255 | { | |
256 | WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files)); | |
257 | return 0; | |
258 | } | |
259 | __initcall(tcp_memcontrol_init); |