Commit | Line | Data |
---|---|---|
6ac99e8f MKL |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (c) 2019 Facebook */ | |
3 | #include <linux/rculist.h> | |
4 | #include <linux/list.h> | |
5 | #include <linux/hash.h> | |
6 | #include <linux/types.h> | |
7 | #include <linux/spinlock.h> | |
8 | #include <linux/bpf.h> | |
8e4597c6 | 9 | #include <linux/btf.h> |
5ce6e77c | 10 | #include <linux/btf_ids.h> |
450af8d0 | 11 | #include <linux/bpf_local_storage.h> |
6ac99e8f MKL |
12 | #include <net/bpf_sk_storage.h> |
13 | #include <net/sock.h> | |
1ed4d924 | 14 | #include <uapi/linux/sock_diag.h> |
6ac99e8f | 15 | #include <uapi/linux/btf.h> |
0fe4b381 | 16 | #include <linux/rcupdate_trace.h> |
6ac99e8f | 17 | |
4cc9ce4e KS |
18 | DEFINE_BPF_STORAGE_CACHE(sk_cache); |
19 | ||
1f00d375 | 20 | static struct bpf_local_storage_data * |
e794bfdd | 21 | bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) |
6ac99e8f | 22 | { |
1f00d375 KS |
23 | struct bpf_local_storage *sk_storage; |
24 | struct bpf_local_storage_map *smap; | |
6ac99e8f | 25 | |
0fe4b381 KS |
26 | sk_storage = |
27 | rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held()); | |
6ac99e8f MKL |
28 | if (!sk_storage) |
29 | return NULL; | |
30 | ||
1f00d375 KS |
31 | smap = (struct bpf_local_storage_map *)map; |
32 | return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit); | |
6ac99e8f MKL |
33 | } |
34 | ||
e794bfdd | 35 | static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) |
6ac99e8f | 36 | { |
1f00d375 | 37 | struct bpf_local_storage_data *sdata; |
6ac99e8f | 38 | |
e794bfdd | 39 | sdata = bpf_sk_storage_lookup(sk, map, false); |
6ac99e8f MKL |
40 | if (!sdata) |
41 | return -ENOENT; | |
42 | ||
a47eabf2 | 43 | bpf_selem_unlink(SELEM(sdata), false); |
6ac99e8f MKL |
44 | |
45 | return 0; | |
46 | } | |
47 | ||
8f51dfc7 | 48 | /* Called by __sk_destruct() & bpf_sk_storage_clone() */ |
6ac99e8f MKL |
49 | void bpf_sk_storage_free(struct sock *sk) |
50 | { | |
1f00d375 | 51 | struct bpf_local_storage *sk_storage; |
6ac99e8f MKL |
52 | |
53 | rcu_read_lock(); | |
54 | sk_storage = rcu_dereference(sk->sk_bpf_storage); | |
55 | if (!sk_storage) { | |
56 | rcu_read_unlock(); | |
57 | return; | |
58 | } | |
59 | ||
2ffcb6fc | 60 | bpf_local_storage_destroy(sk_storage); |
6ac99e8f | 61 | rcu_read_unlock(); |
6ac99e8f MKL |
62 | } |
63 | ||
e794bfdd | 64 | static void bpf_sk_storage_map_free(struct bpf_map *map) |
f836a56e | 65 | { |
c83597fa | 66 | bpf_local_storage_map_free(map, &sk_cache, NULL); |
6ac99e8f MKL |
67 | } |
68 | ||
e794bfdd | 69 | static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) |
f836a56e | 70 | { |
08a7ce38 | 71 | return bpf_local_storage_map_alloc(attr, &sk_cache, false); |
6ac99e8f MKL |
72 | } |
73 | ||
74 | static int notsupp_get_next_key(struct bpf_map *map, void *key, | |
75 | void *next_key) | |
76 | { | |
77 | return -ENOTSUPP; | |
78 | } | |
79 | ||
6ac99e8f MKL |
80 | static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) |
81 | { | |
1f00d375 | 82 | struct bpf_local_storage_data *sdata; |
6ac99e8f MKL |
83 | struct socket *sock; |
84 | int fd, err; | |
85 | ||
86 | fd = *(int *)key; | |
87 | sock = sockfd_lookup(fd, &err); | |
88 | if (sock) { | |
e794bfdd | 89 | sdata = bpf_sk_storage_lookup(sock->sk, map, true); |
6ac99e8f MKL |
90 | sockfd_put(sock); |
91 | return sdata ? sdata->data : NULL; | |
92 | } | |
93 | ||
94 | return ERR_PTR(err); | |
95 | } | |
96 | ||
d7ba4cc9 JK |
97 | static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, |
98 | void *value, u64 map_flags) | |
6ac99e8f | 99 | { |
1f00d375 | 100 | struct bpf_local_storage_data *sdata; |
6ac99e8f MKL |
101 | struct socket *sock; |
102 | int fd, err; | |
103 | ||
104 | fd = *(int *)key; | |
105 | sock = sockfd_lookup(fd, &err); | |
106 | if (sock) { | |
f836a56e KS |
107 | sdata = bpf_local_storage_update( |
108 | sock->sk, (struct bpf_local_storage_map *)map, value, | |
b00fa38a | 109 | map_flags, GFP_ATOMIC); |
6ac99e8f | 110 | sockfd_put(sock); |
71f150f4 | 111 | return PTR_ERR_OR_ZERO(sdata); |
6ac99e8f MKL |
112 | } |
113 | ||
114 | return err; | |
115 | } | |
116 | ||
d7ba4cc9 | 117 | static long bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) |
6ac99e8f MKL |
118 | { |
119 | struct socket *sock; | |
120 | int fd, err; | |
121 | ||
122 | fd = *(int *)key; | |
123 | sock = sockfd_lookup(fd, &err); | |
124 | if (sock) { | |
e794bfdd | 125 | err = bpf_sk_storage_del(sock->sk, map); |
6ac99e8f MKL |
126 | sockfd_put(sock); |
127 | return err; | |
128 | } | |
129 | ||
130 | return err; | |
131 | } | |
132 | ||
1f00d375 | 133 | static struct bpf_local_storage_elem * |
8f51dfc7 | 134 | bpf_sk_storage_clone_elem(struct sock *newsk, |
1f00d375 KS |
135 | struct bpf_local_storage_map *smap, |
136 | struct bpf_local_storage_elem *selem) | |
8f51dfc7 | 137 | { |
1f00d375 | 138 | struct bpf_local_storage_elem *copy_selem; |
8f51dfc7 | 139 | |
b00fa38a | 140 | copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC); |
8f51dfc7 SF |
141 | if (!copy_selem) |
142 | return NULL; | |
143 | ||
db559117 | 144 | if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) |
8f51dfc7 SF |
145 | copy_map_value_locked(&smap->map, SDATA(copy_selem)->data, |
146 | SDATA(selem)->data, true); | |
147 | else | |
148 | copy_map_value(&smap->map, SDATA(copy_selem)->data, | |
149 | SDATA(selem)->data); | |
150 | ||
151 | return copy_selem; | |
152 | } | |
153 | ||
154 | int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) | |
155 | { | |
1f00d375 KS |
156 | struct bpf_local_storage *new_sk_storage = NULL; |
157 | struct bpf_local_storage *sk_storage; | |
158 | struct bpf_local_storage_elem *selem; | |
8f51dfc7 SF |
159 | int ret = 0; |
160 | ||
161 | RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); | |
162 | ||
163 | rcu_read_lock(); | |
164 | sk_storage = rcu_dereference(sk->sk_bpf_storage); | |
165 | ||
166 | if (!sk_storage || hlist_empty(&sk_storage->list)) | |
167 | goto out; | |
168 | ||
169 | hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { | |
1f00d375 KS |
170 | struct bpf_local_storage_elem *copy_selem; |
171 | struct bpf_local_storage_map *smap; | |
8f51dfc7 SF |
172 | struct bpf_map *map; |
173 | ||
174 | smap = rcu_dereference(SDATA(selem)->smap); | |
175 | if (!(smap->map.map_flags & BPF_F_CLONE)) | |
176 | continue; | |
177 | ||
178 | /* Note that for lockless listeners adding new element | |
1f00d375 | 179 | * here can race with cleanup in bpf_local_storage_map_free. |
8f51dfc7 SF |
180 | * Try to grab map refcnt to make sure that it's still |
181 | * alive and prevent concurrent removal. | |
182 | */ | |
1e0bd5a0 | 183 | map = bpf_map_inc_not_zero(&smap->map); |
8f51dfc7 SF |
184 | if (IS_ERR(map)) |
185 | continue; | |
186 | ||
187 | copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); | |
188 | if (!copy_selem) { | |
189 | ret = -ENOMEM; | |
190 | bpf_map_put(map); | |
191 | goto out; | |
192 | } | |
193 | ||
194 | if (new_sk_storage) { | |
1f00d375 KS |
195 | bpf_selem_link_map(smap, copy_selem); |
196 | bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); | |
8f51dfc7 | 197 | } else { |
b00fa38a | 198 | ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC); |
8f51dfc7 | 199 | if (ret) { |
c0d63f30 | 200 | bpf_selem_free(copy_selem, smap, true); |
8f51dfc7 SF |
201 | atomic_sub(smap->elem_size, |
202 | &newsk->sk_omem_alloc); | |
203 | bpf_map_put(map); | |
204 | goto out; | |
205 | } | |
206 | ||
1f00d375 KS |
207 | new_sk_storage = |
208 | rcu_dereference(copy_selem->local_storage); | |
8f51dfc7 SF |
209 | } |
210 | bpf_map_put(map); | |
211 | } | |
212 | ||
213 | out: | |
214 | rcu_read_unlock(); | |
215 | ||
216 | /* In case of an error, don't free anything explicitly here, the | |
217 | * caller is responsible to call bpf_sk_storage_free. | |
218 | */ | |
219 | ||
220 | return ret; | |
221 | } | |
222 | ||
b00fa38a JK |
223 | /* *gfp_flags* is a hidden argument provided by the verifier */ |
224 | BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, | |
225 | void *, value, u64, flags, gfp_t, gfp_flags) | |
6ac99e8f | 226 | { |
1f00d375 | 227 | struct bpf_local_storage_data *sdata; |
6ac99e8f | 228 | |
0fe4b381 | 229 | WARN_ON_ONCE(!bpf_rcu_lock_held()); |
592a3498 | 230 | if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) |
6ac99e8f MKL |
231 | return (unsigned long)NULL; |
232 | ||
e794bfdd | 233 | sdata = bpf_sk_storage_lookup(sk, map, true); |
6ac99e8f MKL |
234 | if (sdata) |
235 | return (unsigned long)sdata->data; | |
236 | ||
237 | if (flags == BPF_SK_STORAGE_GET_F_CREATE && | |
238 | /* Cannot add new elem to a going away sk. | |
239 | * Otherwise, the new elem may become a leak | |
240 | * (and also other memory issues during map | |
241 | * destruction). | |
242 | */ | |
243 | refcount_inc_not_zero(&sk->sk_refcnt)) { | |
f836a56e KS |
244 | sdata = bpf_local_storage_update( |
245 | sk, (struct bpf_local_storage_map *)map, value, | |
b00fa38a | 246 | BPF_NOEXIST, gfp_flags); |
6ac99e8f MKL |
247 | /* sk must be a fullsock (guaranteed by verifier), |
248 | * so sock_gen_put() is unnecessary. | |
249 | */ | |
250 | sock_put(sk); | |
251 | return IS_ERR(sdata) ? | |
252 | (unsigned long)NULL : (unsigned long)sdata->data; | |
253 | } | |
254 | ||
255 | return (unsigned long)NULL; | |
256 | } | |
257 | ||
258 | BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) | |
259 | { | |
0fe4b381 | 260 | WARN_ON_ONCE(!bpf_rcu_lock_held()); |
592a3498 MKL |
261 | if (!sk || !sk_fullsock(sk)) |
262 | return -EINVAL; | |
263 | ||
6ac99e8f MKL |
264 | if (refcount_inc_not_zero(&sk->sk_refcnt)) { |
265 | int err; | |
266 | ||
e794bfdd | 267 | err = bpf_sk_storage_del(sk, map); |
6ac99e8f MKL |
268 | sock_put(sk); |
269 | return err; | |
270 | } | |
271 | ||
272 | return -ENOENT; | |
273 | } | |
274 | ||
e794bfdd MKL |
275 | static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, |
276 | void *owner, u32 size) | |
f836a56e | 277 | { |
7de6d09f | 278 | int optmem_max = READ_ONCE(sysctl_optmem_max); |
9e838b02 MKL |
279 | struct sock *sk = (struct sock *)owner; |
280 | ||
281 | /* same check as in sock_kmalloc() */ | |
7de6d09f KI |
282 | if (size <= optmem_max && |
283 | atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { | |
9e838b02 MKL |
284 | atomic_add(size, &sk->sk_omem_alloc); |
285 | return 0; | |
286 | } | |
287 | ||
288 | return -ENOMEM; | |
f836a56e KS |
289 | } |
290 | ||
e794bfdd MKL |
291 | static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap, |
292 | void *owner, u32 size) | |
f836a56e KS |
293 | { |
294 | struct sock *sk = owner; | |
295 | ||
296 | atomic_sub(size, &sk->sk_omem_alloc); | |
297 | } | |
298 | ||
299 | static struct bpf_local_storage __rcu ** | |
e794bfdd | 300 | bpf_sk_storage_ptr(void *owner) |
f836a56e KS |
301 | { |
302 | struct sock *sk = owner; | |
303 | ||
304 | return &sk->sk_bpf_storage; | |
305 | } | |
306 | ||
6ac99e8f | 307 | const struct bpf_map_ops sk_storage_map_ops = { |
f4d05259 | 308 | .map_meta_equal = bpf_map_meta_equal, |
1f00d375 | 309 | .map_alloc_check = bpf_local_storage_map_alloc_check, |
e794bfdd MKL |
310 | .map_alloc = bpf_sk_storage_map_alloc, |
311 | .map_free = bpf_sk_storage_map_free, | |
6ac99e8f MKL |
312 | .map_get_next_key = notsupp_get_next_key, |
313 | .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, | |
314 | .map_update_elem = bpf_fd_sk_storage_update_elem, | |
315 | .map_delete_elem = bpf_fd_sk_storage_delete_elem, | |
1f00d375 | 316 | .map_check_btf = bpf_local_storage_map_check_btf, |
3144bfa5 | 317 | .map_btf_id = &bpf_local_storage_map_btf_id[0], |
e794bfdd MKL |
318 | .map_local_storage_charge = bpf_sk_storage_charge, |
319 | .map_local_storage_uncharge = bpf_sk_storage_uncharge, | |
320 | .map_owner_storage_ptr = bpf_sk_storage_ptr, | |
7490b7f1 | 321 | .map_mem_usage = bpf_local_storage_map_mem_usage, |
6ac99e8f MKL |
322 | }; |
323 | ||
324 | const struct bpf_func_proto bpf_sk_storage_get_proto = { | |
325 | .func = bpf_sk_storage_get, | |
326 | .gpl_only = false, | |
327 | .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, | |
328 | .arg1_type = ARG_CONST_MAP_PTR, | |
592a3498 | 329 | .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, |
6ac99e8f MKL |
330 | .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, |
331 | .arg4_type = ARG_ANYTHING, | |
332 | }; | |
333 | ||
f7c6cb1d SF |
334 | const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = { |
335 | .func = bpf_sk_storage_get, | |
336 | .gpl_only = false, | |
337 | .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, | |
338 | .arg1_type = ARG_CONST_MAP_PTR, | |
339 | .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */ | |
340 | .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, | |
341 | .arg4_type = ARG_ANYTHING, | |
342 | }; | |
343 | ||
6ac99e8f MKL |
344 | const struct bpf_func_proto bpf_sk_storage_delete_proto = { |
345 | .func = bpf_sk_storage_delete, | |
346 | .gpl_only = false, | |
347 | .ret_type = RET_INTEGER, | |
348 | .arg1_type = ARG_CONST_MAP_PTR, | |
592a3498 | 349 | .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, |
30897832 KS |
350 | }; |
351 | ||
8e4597c6 MKL |
352 | static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) |
353 | { | |
354 | const struct btf *btf_vmlinux; | |
355 | const struct btf_type *t; | |
356 | const char *tname; | |
357 | u32 btf_id; | |
358 | ||
359 | if (prog->aux->dst_prog) | |
360 | return false; | |
361 | ||
362 | /* Ensure the tracing program is not tracing | |
363 | * any bpf_sk_storage*() function and also | |
364 | * use the bpf_sk_storage_(get|delete) helper. | |
365 | */ | |
366 | switch (prog->expected_attach_type) { | |
a50a85e4 | 367 | case BPF_TRACE_ITER: |
8e4597c6 MKL |
368 | case BPF_TRACE_RAW_TP: |
369 | /* bpf_sk_storage has no trace point */ | |
370 | return true; | |
371 | case BPF_TRACE_FENTRY: | |
372 | case BPF_TRACE_FEXIT: | |
373 | btf_vmlinux = bpf_get_btf_vmlinux(); | |
7ada3787 KKD |
374 | if (IS_ERR_OR_NULL(btf_vmlinux)) |
375 | return false; | |
8e4597c6 MKL |
376 | btf_id = prog->aux->attach_btf_id; |
377 | t = btf_type_by_id(btf_vmlinux, btf_id); | |
378 | tname = btf_name_by_offset(btf_vmlinux, t->name_off); | |
379 | return !!strncmp(tname, "bpf_sk_storage", | |
380 | strlen("bpf_sk_storage")); | |
381 | default: | |
382 | return false; | |
383 | } | |
384 | ||
385 | return false; | |
386 | } | |
387 | ||
b00fa38a JK |
388 | /* *gfp_flags* is a hidden argument provided by the verifier */ |
389 | BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, | |
390 | void *, value, u64, flags, gfp_t, gfp_flags) | |
8e4597c6 | 391 | { |
0fe4b381 | 392 | WARN_ON_ONCE(!bpf_rcu_lock_held()); |
afa79d08 | 393 | if (in_hardirq() || in_nmi()) |
8e4597c6 MKL |
394 | return (unsigned long)NULL; |
395 | ||
b00fa38a JK |
396 | return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags, |
397 | gfp_flags); | |
8e4597c6 MKL |
398 | } |
399 | ||
400 | BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, | |
401 | struct sock *, sk) | |
402 | { | |
0fe4b381 | 403 | WARN_ON_ONCE(!bpf_rcu_lock_held()); |
afa79d08 | 404 | if (in_hardirq() || in_nmi()) |
8e4597c6 MKL |
405 | return -EPERM; |
406 | ||
407 | return ____bpf_sk_storage_delete(map, sk); | |
408 | } | |
409 | ||
410 | const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = { | |
411 | .func = bpf_sk_storage_get_tracing, | |
412 | .gpl_only = false, | |
413 | .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, | |
414 | .arg1_type = ARG_CONST_MAP_PTR, | |
91571a51 | 415 | .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, |
8e4597c6 MKL |
416 | .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], |
417 | .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, | |
418 | .arg4_type = ARG_ANYTHING, | |
419 | .allowed = bpf_sk_storage_tracing_allowed, | |
420 | }; | |
421 | ||
422 | const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = { | |
423 | .func = bpf_sk_storage_delete_tracing, | |
424 | .gpl_only = false, | |
425 | .ret_type = RET_INTEGER, | |
426 | .arg1_type = ARG_CONST_MAP_PTR, | |
91571a51 | 427 | .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, |
8e4597c6 MKL |
428 | .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], |
429 | .allowed = bpf_sk_storage_tracing_allowed, | |
430 | }; | |
431 | ||
1ed4d924 MKL |
432 | struct bpf_sk_storage_diag { |
433 | u32 nr_maps; | |
434 | struct bpf_map *maps[]; | |
435 | }; | |
436 | ||
437 | /* The reply will be like: | |
438 | * INET_DIAG_BPF_SK_STORAGES (nla_nest) | |
439 | * SK_DIAG_BPF_STORAGE (nla_nest) | |
440 | * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) | |
441 | * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) | |
442 | * SK_DIAG_BPF_STORAGE (nla_nest) | |
443 | * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) | |
444 | * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) | |
445 | * .... | |
446 | */ | |
447 | static int nla_value_size(u32 value_size) | |
448 | { | |
449 | /* SK_DIAG_BPF_STORAGE (nla_nest) | |
450 | * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) | |
451 | * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) | |
452 | */ | |
453 | return nla_total_size(0) + nla_total_size(sizeof(u32)) + | |
454 | nla_total_size_64bit(value_size); | |
455 | } | |
456 | ||
457 | void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag) | |
458 | { | |
459 | u32 i; | |
460 | ||
461 | if (!diag) | |
462 | return; | |
463 | ||
464 | for (i = 0; i < diag->nr_maps; i++) | |
465 | bpf_map_put(diag->maps[i]); | |
466 | ||
467 | kfree(diag); | |
468 | } | |
469 | EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free); | |
470 | ||
471 | static bool diag_check_dup(const struct bpf_sk_storage_diag *diag, | |
472 | const struct bpf_map *map) | |
473 | { | |
474 | u32 i; | |
475 | ||
476 | for (i = 0; i < diag->nr_maps; i++) { | |
477 | if (diag->maps[i] == map) | |
478 | return true; | |
479 | } | |
480 | ||
481 | return false; | |
482 | } | |
483 | ||
484 | struct bpf_sk_storage_diag * | |
485 | bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) | |
486 | { | |
487 | struct bpf_sk_storage_diag *diag; | |
488 | struct nlattr *nla; | |
489 | u32 nr_maps = 0; | |
490 | int rem, err; | |
491 | ||
1f00d375 | 492 | /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as |
1ed4d924 MKL |
493 | * the map_alloc_check() side also does. |
494 | */ | |
2c78ee89 | 495 | if (!bpf_capable()) |
1ed4d924 MKL |
496 | return ERR_PTR(-EPERM); |
497 | ||
498 | nla_for_each_nested(nla, nla_stgs, rem) { | |
499 | if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) | |
500 | nr_maps++; | |
501 | } | |
502 | ||
fe0bdaec | 503 | diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL); |
1ed4d924 MKL |
504 | if (!diag) |
505 | return ERR_PTR(-ENOMEM); | |
506 | ||
507 | nla_for_each_nested(nla, nla_stgs, rem) { | |
508 | struct bpf_map *map; | |
509 | int map_fd; | |
510 | ||
511 | if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD) | |
512 | continue; | |
513 | ||
514 | map_fd = nla_get_u32(nla); | |
515 | map = bpf_map_get(map_fd); | |
516 | if (IS_ERR(map)) { | |
517 | err = PTR_ERR(map); | |
518 | goto err_free; | |
519 | } | |
520 | if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) { | |
521 | bpf_map_put(map); | |
522 | err = -EINVAL; | |
523 | goto err_free; | |
524 | } | |
525 | if (diag_check_dup(diag, map)) { | |
526 | bpf_map_put(map); | |
527 | err = -EEXIST; | |
528 | goto err_free; | |
529 | } | |
530 | diag->maps[diag->nr_maps++] = map; | |
531 | } | |
532 | ||
533 | return diag; | |
534 | ||
535 | err_free: | |
536 | bpf_sk_storage_diag_free(diag); | |
537 | return ERR_PTR(err); | |
538 | } | |
539 | EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); | |
540 | ||
1f00d375 | 541 | static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) |
1ed4d924 MKL |
542 | { |
543 | struct nlattr *nla_stg, *nla_value; | |
1f00d375 | 544 | struct bpf_local_storage_map *smap; |
1ed4d924 MKL |
545 | |
546 | /* It cannot exceed max nlattr's payload */ | |
1f00d375 | 547 | BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); |
1ed4d924 MKL |
548 | |
549 | nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE); | |
550 | if (!nla_stg) | |
551 | return -EMSGSIZE; | |
552 | ||
553 | smap = rcu_dereference(sdata->smap); | |
554 | if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) | |
555 | goto errout; | |
556 | ||
557 | nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE, | |
558 | smap->map.value_size, | |
559 | SK_DIAG_BPF_STORAGE_PAD); | |
560 | if (!nla_value) | |
561 | goto errout; | |
562 | ||
db559117 | 563 | if (btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)) |
1ed4d924 MKL |
564 | copy_map_value_locked(&smap->map, nla_data(nla_value), |
565 | sdata->data, true); | |
566 | else | |
567 | copy_map_value(&smap->map, nla_data(nla_value), sdata->data); | |
568 | ||
569 | nla_nest_end(skb, nla_stg); | |
570 | return 0; | |
571 | ||
572 | errout: | |
573 | nla_nest_cancel(skb, nla_stg); | |
574 | return -EMSGSIZE; | |
575 | } | |
576 | ||
577 | static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, | |
578 | int stg_array_type, | |
579 | unsigned int *res_diag_size) | |
580 | { | |
581 | /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ | |
582 | unsigned int diag_size = nla_total_size(0); | |
1f00d375 KS |
583 | struct bpf_local_storage *sk_storage; |
584 | struct bpf_local_storage_elem *selem; | |
585 | struct bpf_local_storage_map *smap; | |
1ed4d924 MKL |
586 | struct nlattr *nla_stgs; |
587 | unsigned int saved_len; | |
588 | int err = 0; | |
589 | ||
590 | rcu_read_lock(); | |
591 | ||
592 | sk_storage = rcu_dereference(sk->sk_bpf_storage); | |
593 | if (!sk_storage || hlist_empty(&sk_storage->list)) { | |
594 | rcu_read_unlock(); | |
595 | return 0; | |
596 | } | |
597 | ||
598 | nla_stgs = nla_nest_start(skb, stg_array_type); | |
599 | if (!nla_stgs) | |
600 | /* Continue to learn diag_size */ | |
601 | err = -EMSGSIZE; | |
602 | ||
603 | saved_len = skb->len; | |
604 | hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { | |
605 | smap = rcu_dereference(SDATA(selem)->smap); | |
606 | diag_size += nla_value_size(smap->map.value_size); | |
607 | ||
608 | if (nla_stgs && diag_get(SDATA(selem), skb)) | |
609 | /* Continue to learn diag_size */ | |
610 | err = -EMSGSIZE; | |
611 | } | |
612 | ||
613 | rcu_read_unlock(); | |
614 | ||
615 | if (nla_stgs) { | |
616 | if (saved_len == skb->len) | |
617 | nla_nest_cancel(skb, nla_stgs); | |
618 | else | |
619 | nla_nest_end(skb, nla_stgs); | |
620 | } | |
621 | ||
622 | if (diag_size == nla_total_size(0)) { | |
623 | *res_diag_size = 0; | |
624 | return 0; | |
625 | } | |
626 | ||
627 | *res_diag_size = diag_size; | |
628 | return err; | |
629 | } | |
630 | ||
631 | int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, | |
632 | struct sock *sk, struct sk_buff *skb, | |
633 | int stg_array_type, | |
634 | unsigned int *res_diag_size) | |
635 | { | |
636 | /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ | |
637 | unsigned int diag_size = nla_total_size(0); | |
1f00d375 KS |
638 | struct bpf_local_storage *sk_storage; |
639 | struct bpf_local_storage_data *sdata; | |
1ed4d924 MKL |
640 | struct nlattr *nla_stgs; |
641 | unsigned int saved_len; | |
642 | int err = 0; | |
643 | u32 i; | |
644 | ||
645 | *res_diag_size = 0; | |
646 | ||
647 | /* No map has been specified. Dump all. */ | |
648 | if (!diag->nr_maps) | |
649 | return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type, | |
650 | res_diag_size); | |
651 | ||
652 | rcu_read_lock(); | |
653 | sk_storage = rcu_dereference(sk->sk_bpf_storage); | |
654 | if (!sk_storage || hlist_empty(&sk_storage->list)) { | |
655 | rcu_read_unlock(); | |
656 | return 0; | |
657 | } | |
658 | ||
659 | nla_stgs = nla_nest_start(skb, stg_array_type); | |
660 | if (!nla_stgs) | |
661 | /* Continue to learn diag_size */ | |
662 | err = -EMSGSIZE; | |
663 | ||
664 | saved_len = skb->len; | |
665 | for (i = 0; i < diag->nr_maps; i++) { | |
1f00d375 KS |
666 | sdata = bpf_local_storage_lookup(sk_storage, |
667 | (struct bpf_local_storage_map *)diag->maps[i], | |
1ed4d924 MKL |
668 | false); |
669 | ||
670 | if (!sdata) | |
671 | continue; | |
672 | ||
673 | diag_size += nla_value_size(diag->maps[i]->value_size); | |
674 | ||
675 | if (nla_stgs && diag_get(sdata, skb)) | |
676 | /* Continue to learn diag_size */ | |
677 | err = -EMSGSIZE; | |
678 | } | |
679 | rcu_read_unlock(); | |
680 | ||
681 | if (nla_stgs) { | |
682 | if (saved_len == skb->len) | |
683 | nla_nest_cancel(skb, nla_stgs); | |
684 | else | |
685 | nla_nest_end(skb, nla_stgs); | |
686 | } | |
687 | ||
688 | if (diag_size == nla_total_size(0)) { | |
689 | *res_diag_size = 0; | |
690 | return 0; | |
691 | } | |
692 | ||
693 | *res_diag_size = diag_size; | |
694 | return err; | |
695 | } | |
696 | EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); | |
5ce6e77c YS |
697 | |
698 | struct bpf_iter_seq_sk_storage_map_info { | |
699 | struct bpf_map *map; | |
700 | unsigned int bucket_id; | |
701 | unsigned skip_elems; | |
702 | }; | |
703 | ||
1f00d375 | 704 | static struct bpf_local_storage_elem * |
5ce6e77c | 705 | bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, |
1f00d375 | 706 | struct bpf_local_storage_elem *prev_selem) |
c69d2ddb | 707 | __acquires(RCU) __releases(RCU) |
5ce6e77c | 708 | { |
1f00d375 KS |
709 | struct bpf_local_storage *sk_storage; |
710 | struct bpf_local_storage_elem *selem; | |
5ce6e77c | 711 | u32 skip_elems = info->skip_elems; |
1f00d375 | 712 | struct bpf_local_storage_map *smap; |
5ce6e77c YS |
713 | u32 bucket_id = info->bucket_id; |
714 | u32 i, count, n_buckets; | |
1f00d375 | 715 | struct bpf_local_storage_map_bucket *b; |
5ce6e77c | 716 | |
1f00d375 | 717 | smap = (struct bpf_local_storage_map *)info->map; |
5ce6e77c YS |
718 | n_buckets = 1U << smap->bucket_log; |
719 | if (bucket_id >= n_buckets) | |
720 | return NULL; | |
721 | ||
722 | /* try to find next selem in the same bucket */ | |
723 | selem = prev_selem; | |
724 | count = 0; | |
725 | while (selem) { | |
c69d2ddb | 726 | selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)), |
1f00d375 | 727 | struct bpf_local_storage_elem, map_node); |
5ce6e77c YS |
728 | if (!selem) { |
729 | /* not found, unlock and go to the next bucket */ | |
730 | b = &smap->buckets[bucket_id++]; | |
c69d2ddb | 731 | rcu_read_unlock(); |
5ce6e77c YS |
732 | skip_elems = 0; |
733 | break; | |
734 | } | |
c69d2ddb | 735 | sk_storage = rcu_dereference(selem->local_storage); |
5ce6e77c YS |
736 | if (sk_storage) { |
737 | info->skip_elems = skip_elems + count; | |
738 | return selem; | |
739 | } | |
740 | count++; | |
741 | } | |
742 | ||
743 | for (i = bucket_id; i < (1U << smap->bucket_log); i++) { | |
744 | b = &smap->buckets[i]; | |
c69d2ddb | 745 | rcu_read_lock(); |
5ce6e77c | 746 | count = 0; |
c69d2ddb YS |
747 | hlist_for_each_entry_rcu(selem, &b->list, map_node) { |
748 | sk_storage = rcu_dereference(selem->local_storage); | |
5ce6e77c YS |
749 | if (sk_storage && count >= skip_elems) { |
750 | info->bucket_id = i; | |
751 | info->skip_elems = count; | |
752 | return selem; | |
753 | } | |
754 | count++; | |
755 | } | |
c69d2ddb | 756 | rcu_read_unlock(); |
5ce6e77c YS |
757 | skip_elems = 0; |
758 | } | |
759 | ||
760 | info->bucket_id = i; | |
761 | info->skip_elems = 0; | |
762 | return NULL; | |
763 | } | |
764 | ||
765 | static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) | |
766 | { | |
1f00d375 | 767 | struct bpf_local_storage_elem *selem; |
5ce6e77c YS |
768 | |
769 | selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL); | |
770 | if (!selem) | |
771 | return NULL; | |
772 | ||
773 | if (*pos == 0) | |
774 | ++*pos; | |
775 | return selem; | |
776 | } | |
777 | ||
778 | static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v, | |
779 | loff_t *pos) | |
780 | { | |
781 | struct bpf_iter_seq_sk_storage_map_info *info = seq->private; | |
782 | ||
783 | ++*pos; | |
784 | ++info->skip_elems; | |
785 | return bpf_sk_storage_map_seq_find_next(seq->private, v); | |
786 | } | |
787 | ||
788 | struct bpf_iter__bpf_sk_storage_map { | |
789 | __bpf_md_ptr(struct bpf_iter_meta *, meta); | |
790 | __bpf_md_ptr(struct bpf_map *, map); | |
791 | __bpf_md_ptr(struct sock *, sk); | |
792 | __bpf_md_ptr(void *, value); | |
793 | }; | |
794 | ||
795 | DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, | |
796 | struct bpf_map *map, struct sock *sk, | |
797 | void *value) | |
798 | ||
799 | static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, | |
1f00d375 | 800 | struct bpf_local_storage_elem *selem) |
5ce6e77c YS |
801 | { |
802 | struct bpf_iter_seq_sk_storage_map_info *info = seq->private; | |
803 | struct bpf_iter__bpf_sk_storage_map ctx = {}; | |
1f00d375 | 804 | struct bpf_local_storage *sk_storage; |
5ce6e77c YS |
805 | struct bpf_iter_meta meta; |
806 | struct bpf_prog *prog; | |
807 | int ret = 0; | |
808 | ||
809 | meta.seq = seq; | |
810 | prog = bpf_iter_get_info(&meta, selem == NULL); | |
811 | if (prog) { | |
812 | ctx.meta = &meta; | |
813 | ctx.map = info->map; | |
814 | if (selem) { | |
c69d2ddb | 815 | sk_storage = rcu_dereference(selem->local_storage); |
1f00d375 | 816 | ctx.sk = sk_storage->owner; |
5ce6e77c YS |
817 | ctx.value = SDATA(selem)->data; |
818 | } | |
819 | ret = bpf_iter_run_prog(prog, &ctx); | |
820 | } | |
821 | ||
822 | return ret; | |
823 | } | |
824 | ||
825 | static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) | |
826 | { | |
827 | return __bpf_sk_storage_map_seq_show(seq, v); | |
828 | } | |
829 | ||
830 | static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) | |
c69d2ddb | 831 | __releases(RCU) |
5ce6e77c | 832 | { |
c69d2ddb | 833 | if (!v) |
5ce6e77c | 834 | (void)__bpf_sk_storage_map_seq_show(seq, v); |
c69d2ddb YS |
835 | else |
836 | rcu_read_unlock(); | |
5ce6e77c YS |
837 | } |
838 | ||
839 | static int bpf_iter_init_sk_storage_map(void *priv_data, | |
840 | struct bpf_iter_aux_info *aux) | |
841 | { | |
842 | struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; | |
843 | ||
3c5f6e69 | 844 | bpf_map_inc_with_uref(aux->map); |
5ce6e77c YS |
845 | seq_info->map = aux->map; |
846 | return 0; | |
847 | } | |
848 | ||
3c5f6e69 HT |
849 | static void bpf_iter_fini_sk_storage_map(void *priv_data) |
850 | { | |
851 | struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; | |
852 | ||
853 | bpf_map_put_with_uref(seq_info->map); | |
854 | } | |
855 | ||
5e7b3020 YS |
856 | static int bpf_iter_attach_map(struct bpf_prog *prog, |
857 | union bpf_iter_link_info *linfo, | |
858 | struct bpf_iter_aux_info *aux) | |
5ce6e77c | 859 | { |
5e7b3020 YS |
860 | struct bpf_map *map; |
861 | int err = -EINVAL; | |
862 | ||
863 | if (!linfo->map.map_fd) | |
864 | return -EBADF; | |
865 | ||
866 | map = bpf_map_get_with_uref(linfo->map.map_fd); | |
867 | if (IS_ERR(map)) | |
868 | return PTR_ERR(map); | |
5ce6e77c YS |
869 | |
870 | if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) | |
5e7b3020 | 871 | goto put_map; |
5ce6e77c | 872 | |
52bd05eb | 873 | if (prog->aux->max_rdwr_access > map->value_size) { |
5e7b3020 YS |
874 | err = -EACCES; |
875 | goto put_map; | |
876 | } | |
5ce6e77c | 877 | |
5e7b3020 | 878 | aux->map = map; |
5ce6e77c | 879 | return 0; |
5e7b3020 YS |
880 | |
881 | put_map: | |
882 | bpf_map_put_with_uref(map); | |
883 | return err; | |
884 | } | |
885 | ||
886 | static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) | |
887 | { | |
888 | bpf_map_put_with_uref(aux->map); | |
5ce6e77c YS |
889 | } |
890 | ||
891 | static const struct seq_operations bpf_sk_storage_map_seq_ops = { | |
892 | .start = bpf_sk_storage_map_seq_start, | |
893 | .next = bpf_sk_storage_map_seq_next, | |
894 | .stop = bpf_sk_storage_map_seq_stop, | |
895 | .show = bpf_sk_storage_map_seq_show, | |
896 | }; | |
897 | ||
898 | static const struct bpf_iter_seq_info iter_seq_info = { | |
899 | .seq_ops = &bpf_sk_storage_map_seq_ops, | |
900 | .init_seq_private = bpf_iter_init_sk_storage_map, | |
3c5f6e69 | 901 | .fini_seq_private = bpf_iter_fini_sk_storage_map, |
5ce6e77c YS |
902 | .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), |
903 | }; | |
904 | ||
905 | static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { | |
906 | .target = "bpf_sk_storage_map", | |
5e7b3020 YS |
907 | .attach_target = bpf_iter_attach_map, |
908 | .detach_target = bpf_iter_detach_map, | |
b76f2226 YS |
909 | .show_fdinfo = bpf_iter_map_show_fdinfo, |
910 | .fill_link_info = bpf_iter_map_fill_link_info, | |
5ce6e77c YS |
911 | .ctx_arg_info_size = 2, |
912 | .ctx_arg_info = { | |
913 | { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), | |
914 | PTR_TO_BTF_ID_OR_NULL }, | |
915 | { offsetof(struct bpf_iter__bpf_sk_storage_map, value), | |
20b2aff4 | 916 | PTR_TO_BUF | PTR_MAYBE_NULL }, |
5ce6e77c YS |
917 | }, |
918 | .seq_info = &iter_seq_info, | |
919 | }; | |
920 | ||
921 | static int __init bpf_sk_storage_map_iter_init(void) | |
922 | { | |
923 | bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id = | |
924 | btf_sock_ids[BTF_SOCK_TYPE_SOCK]; | |
925 | return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info); | |
926 | } | |
927 | late_initcall(bpf_sk_storage_map_iter_init); |