Commit | Line | Data |
---|---|---|
27ae7997 MKL |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2019 Facebook */ | |
3 | ||
4 | #include <linux/bpf.h> | |
5 | #include <linux/bpf_verifier.h> | |
6 | #include <linux/btf.h> | |
7 | #include <linux/filter.h> | |
8 | #include <linux/slab.h> | |
9 | #include <linux/numa.h> | |
10 | #include <linux/seq_file.h> | |
11 | #include <linux/refcount.h> | |
85d33df3 | 12 | #include <linux/mutex.h> |
27ae7997 | 13 | |
85d33df3 MKL |
14 | enum bpf_struct_ops_state { |
15 | BPF_STRUCT_OPS_STATE_INIT, | |
16 | BPF_STRUCT_OPS_STATE_INUSE, | |
17 | BPF_STRUCT_OPS_STATE_TOBEFREE, | |
18 | }; | |
19 | ||
20 | #define BPF_STRUCT_OPS_COMMON_VALUE \ | |
21 | refcount_t refcnt; \ | |
22 | enum bpf_struct_ops_state state | |
23 | ||
24 | struct bpf_struct_ops_value { | |
25 | BPF_STRUCT_OPS_COMMON_VALUE; | |
d7f10df8 | 26 | char data[] ____cacheline_aligned_in_smp; |
85d33df3 MKL |
27 | }; |
28 | ||
29 | struct bpf_struct_ops_map { | |
30 | struct bpf_map map; | |
31 | const struct bpf_struct_ops *st_ops; | |
32 | /* protect map_update */ | |
33 | struct mutex lock; | |
34 | /* progs has all the bpf_prog that is populated | |
35 | * to the func ptr of the kernel's struct | |
36 | * (in kvalue.data). | |
37 | */ | |
38 | struct bpf_prog **progs; | |
39 | /* image is a page that has all the trampolines | |
40 | * that stores the func args before calling the bpf_prog. | |
41 | * A PAGE_SIZE "image" is enough to store all trampoline for | |
42 | * "progs[]". | |
43 | */ | |
44 | void *image; | |
45 | /* uvalue->data stores the kernel struct | |
46 | * (e.g. tcp_congestion_ops) that is more useful | |
47 | * to userspace than the kvalue. For example, | |
48 | * the bpf_prog's id is stored instead of the kernel | |
49 | * address of a func ptr. | |
50 | */ | |
51 | struct bpf_struct_ops_value *uvalue; | |
52 | /* kvalue.data stores the actual kernel's struct | |
53 | * (e.g. tcp_congestion_ops) that will be | |
54 | * registered to the kernel subsystem. | |
55 | */ | |
56 | struct bpf_struct_ops_value kvalue; | |
57 | }; | |
58 | ||
59 | #define VALUE_PREFIX "bpf_struct_ops_" | |
60 | #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) | |
61 | ||
62 | /* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is | |
63 | * the map's value exposed to the userspace and its btf-type-id is | |
64 | * stored at the map->btf_vmlinux_value_type_id. | |
65 | * | |
66 | */ | |
27ae7997 | 67 | #define BPF_STRUCT_OPS_TYPE(_name) \ |
85d33df3 MKL |
68 | extern struct bpf_struct_ops bpf_##_name; \ |
69 | \ | |
70 | struct bpf_struct_ops_##_name { \ | |
71 | BPF_STRUCT_OPS_COMMON_VALUE; \ | |
72 | struct _name data ____cacheline_aligned_in_smp; \ | |
73 | }; | |
27ae7997 MKL |
74 | #include "bpf_struct_ops_types.h" |
75 | #undef BPF_STRUCT_OPS_TYPE | |
76 | ||
77 | enum { | |
78 | #define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name, | |
79 | #include "bpf_struct_ops_types.h" | |
80 | #undef BPF_STRUCT_OPS_TYPE | |
81 | __NR_BPF_STRUCT_OPS_TYPE, | |
82 | }; | |
83 | ||
84 | static struct bpf_struct_ops * const bpf_struct_ops[] = { | |
85 | #define BPF_STRUCT_OPS_TYPE(_name) \ | |
86 | [BPF_STRUCT_OPS_TYPE_##_name] = &bpf_##_name, | |
87 | #include "bpf_struct_ops_types.h" | |
88 | #undef BPF_STRUCT_OPS_TYPE | |
89 | }; | |
90 | ||
91 | const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = { | |
92 | }; | |
93 | ||
94 | const struct bpf_prog_ops bpf_struct_ops_prog_ops = { | |
95 | }; | |
96 | ||
85d33df3 MKL |
97 | static const struct btf_type *module_type; |
98 | ||
d3e42bb0 | 99 | void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) |
27ae7997 | 100 | { |
85d33df3 | 101 | s32 type_id, value_id, module_id; |
27ae7997 MKL |
102 | const struct btf_member *member; |
103 | struct bpf_struct_ops *st_ops; | |
27ae7997 | 104 | const struct btf_type *t; |
85d33df3 | 105 | char value_name[128]; |
27ae7997 | 106 | const char *mname; |
27ae7997 MKL |
107 | u32 i, j; |
108 | ||
85d33df3 MKL |
109 | /* Ensure BTF type is emitted for "struct bpf_struct_ops_##_name" */ |
110 | #define BPF_STRUCT_OPS_TYPE(_name) BTF_TYPE_EMIT(struct bpf_struct_ops_##_name); | |
111 | #include "bpf_struct_ops_types.h" | |
112 | #undef BPF_STRUCT_OPS_TYPE | |
113 | ||
114 | module_id = btf_find_by_name_kind(btf, "module", BTF_KIND_STRUCT); | |
115 | if (module_id < 0) { | |
116 | pr_warn("Cannot find struct module in btf_vmlinux\n"); | |
117 | return; | |
118 | } | |
119 | module_type = btf_type_by_id(btf, module_id); | |
120 | ||
27ae7997 MKL |
121 | for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { |
122 | st_ops = bpf_struct_ops[i]; | |
123 | ||
85d33df3 MKL |
124 | if (strlen(st_ops->name) + VALUE_PREFIX_LEN >= |
125 | sizeof(value_name)) { | |
126 | pr_warn("struct_ops name %s is too long\n", | |
127 | st_ops->name); | |
128 | continue; | |
129 | } | |
130 | sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name); | |
131 | ||
132 | value_id = btf_find_by_name_kind(btf, value_name, | |
133 | BTF_KIND_STRUCT); | |
134 | if (value_id < 0) { | |
135 | pr_warn("Cannot find struct %s in btf_vmlinux\n", | |
136 | value_name); | |
137 | continue; | |
138 | } | |
139 | ||
27ae7997 MKL |
140 | type_id = btf_find_by_name_kind(btf, st_ops->name, |
141 | BTF_KIND_STRUCT); | |
142 | if (type_id < 0) { | |
143 | pr_warn("Cannot find struct %s in btf_vmlinux\n", | |
144 | st_ops->name); | |
145 | continue; | |
146 | } | |
147 | t = btf_type_by_id(btf, type_id); | |
148 | if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) { | |
149 | pr_warn("Cannot support #%u members in struct %s\n", | |
150 | btf_type_vlen(t), st_ops->name); | |
151 | continue; | |
152 | } | |
153 | ||
154 | for_each_member(j, t, member) { | |
155 | const struct btf_type *func_proto; | |
156 | ||
157 | mname = btf_name_by_offset(btf, member->name_off); | |
158 | if (!*mname) { | |
159 | pr_warn("anon member in struct %s is not supported\n", | |
160 | st_ops->name); | |
161 | break; | |
162 | } | |
163 | ||
164 | if (btf_member_bitfield_size(t, member)) { | |
165 | pr_warn("bit field member %s in struct %s is not supported\n", | |
166 | mname, st_ops->name); | |
167 | break; | |
168 | } | |
169 | ||
170 | func_proto = btf_type_resolve_func_ptr(btf, | |
171 | member->type, | |
172 | NULL); | |
173 | if (func_proto && | |
d3e42bb0 | 174 | btf_distill_func_proto(log, btf, |
27ae7997 MKL |
175 | func_proto, mname, |
176 | &st_ops->func_models[j])) { | |
177 | pr_warn("Error in parsing func ptr %s in struct %s\n", | |
178 | mname, st_ops->name); | |
179 | break; | |
180 | } | |
181 | } | |
182 | ||
183 | if (j == btf_type_vlen(t)) { | |
184 | if (st_ops->init(btf)) { | |
185 | pr_warn("Error in init bpf_struct_ops %s\n", | |
186 | st_ops->name); | |
187 | } else { | |
188 | st_ops->type_id = type_id; | |
189 | st_ops->type = t; | |
85d33df3 MKL |
190 | st_ops->value_id = value_id; |
191 | st_ops->value_type = btf_type_by_id(btf, | |
192 | value_id); | |
27ae7997 MKL |
193 | } |
194 | } | |
195 | } | |
196 | } | |
197 | ||
198 | extern struct btf *btf_vmlinux; | |
199 | ||
85d33df3 MKL |
200 | static const struct bpf_struct_ops * |
201 | bpf_struct_ops_find_value(u32 value_id) | |
202 | { | |
203 | unsigned int i; | |
204 | ||
205 | if (!value_id || !btf_vmlinux) | |
206 | return NULL; | |
207 | ||
208 | for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { | |
209 | if (bpf_struct_ops[i]->value_id == value_id) | |
210 | return bpf_struct_ops[i]; | |
211 | } | |
212 | ||
213 | return NULL; | |
214 | } | |
215 | ||
27ae7997 MKL |
216 | const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) |
217 | { | |
218 | unsigned int i; | |
219 | ||
220 | if (!type_id || !btf_vmlinux) | |
221 | return NULL; | |
222 | ||
223 | for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { | |
224 | if (bpf_struct_ops[i]->type_id == type_id) | |
225 | return bpf_struct_ops[i]; | |
226 | } | |
227 | ||
228 | return NULL; | |
229 | } | |
85d33df3 MKL |
230 | |
231 | static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key, | |
232 | void *next_key) | |
233 | { | |
234 | if (key && *(u32 *)key == 0) | |
235 | return -ENOENT; | |
236 | ||
237 | *(u32 *)next_key = 0; | |
238 | return 0; | |
239 | } | |
240 | ||
241 | int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, | |
242 | void *value) | |
243 | { | |
244 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; | |
245 | struct bpf_struct_ops_value *uvalue, *kvalue; | |
246 | enum bpf_struct_ops_state state; | |
247 | ||
248 | if (unlikely(*(u32 *)key != 0)) | |
249 | return -ENOENT; | |
250 | ||
251 | kvalue = &st_map->kvalue; | |
252 | /* Pair with smp_store_release() during map_update */ | |
253 | state = smp_load_acquire(&kvalue->state); | |
254 | if (state == BPF_STRUCT_OPS_STATE_INIT) { | |
255 | memset(value, 0, map->value_size); | |
256 | return 0; | |
257 | } | |
258 | ||
259 | /* No lock is needed. state and refcnt do not need | |
260 | * to be updated together under atomic context. | |
261 | */ | |
262 | uvalue = (struct bpf_struct_ops_value *)value; | |
263 | memcpy(uvalue, st_map->uvalue, map->value_size); | |
264 | uvalue->state = state; | |
265 | refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt)); | |
266 | ||
267 | return 0; | |
268 | } | |
269 | ||
270 | static void *bpf_struct_ops_map_lookup_elem(struct bpf_map *map, void *key) | |
271 | { | |
272 | return ERR_PTR(-EINVAL); | |
273 | } | |
274 | ||
275 | static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map) | |
276 | { | |
277 | const struct btf_type *t = st_map->st_ops->type; | |
278 | u32 i; | |
279 | ||
280 | for (i = 0; i < btf_type_vlen(t); i++) { | |
281 | if (st_map->progs[i]) { | |
282 | bpf_prog_put(st_map->progs[i]); | |
283 | st_map->progs[i] = NULL; | |
284 | } | |
285 | } | |
286 | } | |
287 | ||
288 | static int check_zero_holes(const struct btf_type *t, void *data) | |
289 | { | |
290 | const struct btf_member *member; | |
291 | u32 i, moff, msize, prev_mend = 0; | |
292 | const struct btf_type *mtype; | |
293 | ||
294 | for_each_member(i, t, member) { | |
295 | moff = btf_member_bit_offset(t, member) / 8; | |
296 | if (moff > prev_mend && | |
297 | memchr_inv(data + prev_mend, 0, moff - prev_mend)) | |
298 | return -EINVAL; | |
299 | ||
300 | mtype = btf_type_by_id(btf_vmlinux, member->type); | |
301 | mtype = btf_resolve_size(btf_vmlinux, mtype, &msize, | |
302 | NULL, NULL); | |
303 | if (IS_ERR(mtype)) | |
304 | return PTR_ERR(mtype); | |
305 | prev_mend = moff + msize; | |
306 | } | |
307 | ||
308 | if (t->size > prev_mend && | |
309 | memchr_inv(data + prev_mend, 0, t->size - prev_mend)) | |
310 | return -EINVAL; | |
311 | ||
312 | return 0; | |
313 | } | |
314 | ||
315 | static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, | |
316 | void *value, u64 flags) | |
317 | { | |
318 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; | |
319 | const struct bpf_struct_ops *st_ops = st_map->st_ops; | |
320 | struct bpf_struct_ops_value *uvalue, *kvalue; | |
321 | const struct btf_member *member; | |
322 | const struct btf_type *t = st_ops->type; | |
88fd9e53 | 323 | struct bpf_tramp_progs *tprogs = NULL; |
85d33df3 MKL |
324 | void *udata, *kdata; |
325 | int prog_fd, err = 0; | |
326 | void *image; | |
327 | u32 i; | |
328 | ||
329 | if (flags) | |
330 | return -EINVAL; | |
331 | ||
332 | if (*(u32 *)key != 0) | |
333 | return -E2BIG; | |
334 | ||
335 | err = check_zero_holes(st_ops->value_type, value); | |
336 | if (err) | |
337 | return err; | |
338 | ||
339 | uvalue = (struct bpf_struct_ops_value *)value; | |
340 | err = check_zero_holes(t, uvalue->data); | |
341 | if (err) | |
342 | return err; | |
343 | ||
344 | if (uvalue->state || refcount_read(&uvalue->refcnt)) | |
345 | return -EINVAL; | |
346 | ||
88fd9e53 KS |
347 | tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); |
348 | if (!tprogs) | |
349 | return -ENOMEM; | |
350 | ||
85d33df3 MKL |
351 | uvalue = (struct bpf_struct_ops_value *)st_map->uvalue; |
352 | kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue; | |
353 | ||
354 | mutex_lock(&st_map->lock); | |
355 | ||
356 | if (kvalue->state != BPF_STRUCT_OPS_STATE_INIT) { | |
357 | err = -EBUSY; | |
358 | goto unlock; | |
359 | } | |
360 | ||
361 | memcpy(uvalue, value, map->value_size); | |
362 | ||
363 | udata = &uvalue->data; | |
364 | kdata = &kvalue->data; | |
365 | image = st_map->image; | |
366 | ||
367 | for_each_member(i, t, member) { | |
368 | const struct btf_type *mtype, *ptype; | |
369 | struct bpf_prog *prog; | |
370 | u32 moff; | |
371 | ||
372 | moff = btf_member_bit_offset(t, member) / 8; | |
373 | ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); | |
374 | if (ptype == module_type) { | |
375 | if (*(void **)(udata + moff)) | |
376 | goto reset_unlock; | |
377 | *(void **)(kdata + moff) = BPF_MODULE_OWNER; | |
378 | continue; | |
379 | } | |
380 | ||
381 | err = st_ops->init_member(t, member, kdata, udata); | |
382 | if (err < 0) | |
383 | goto reset_unlock; | |
384 | ||
385 | /* The ->init_member() has handled this member */ | |
386 | if (err > 0) | |
387 | continue; | |
388 | ||
389 | /* If st_ops->init_member does not handle it, | |
390 | * we will only handle func ptrs and zero-ed members | |
391 | * here. Reject everything else. | |
392 | */ | |
393 | ||
394 | /* All non func ptr member must be 0 */ | |
395 | if (!ptype || !btf_type_is_func_proto(ptype)) { | |
396 | u32 msize; | |
397 | ||
398 | mtype = btf_type_by_id(btf_vmlinux, member->type); | |
399 | mtype = btf_resolve_size(btf_vmlinux, mtype, &msize, | |
400 | NULL, NULL); | |
401 | if (IS_ERR(mtype)) { | |
402 | err = PTR_ERR(mtype); | |
403 | goto reset_unlock; | |
404 | } | |
405 | ||
406 | if (memchr_inv(udata + moff, 0, msize)) { | |
407 | err = -EINVAL; | |
408 | goto reset_unlock; | |
409 | } | |
410 | ||
411 | continue; | |
412 | } | |
413 | ||
414 | prog_fd = (int)(*(unsigned long *)(udata + moff)); | |
415 | /* Similar check as the attr->attach_prog_fd */ | |
416 | if (!prog_fd) | |
417 | continue; | |
418 | ||
419 | prog = bpf_prog_get(prog_fd); | |
420 | if (IS_ERR(prog)) { | |
421 | err = PTR_ERR(prog); | |
422 | goto reset_unlock; | |
423 | } | |
424 | st_map->progs[i] = prog; | |
425 | ||
426 | if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || | |
427 | prog->aux->attach_btf_id != st_ops->type_id || | |
428 | prog->expected_attach_type != i) { | |
429 | err = -EINVAL; | |
430 | goto reset_unlock; | |
431 | } | |
432 | ||
88fd9e53 KS |
433 | tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; |
434 | tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; | |
85d33df3 MKL |
435 | err = arch_prepare_bpf_trampoline(image, |
436 | st_map->image + PAGE_SIZE, | |
437 | &st_ops->func_models[i], 0, | |
88fd9e53 | 438 | tprogs, NULL); |
85d33df3 MKL |
439 | if (err < 0) |
440 | goto reset_unlock; | |
441 | ||
442 | *(void **)(kdata + moff) = image; | |
443 | image += err; | |
444 | ||
445 | /* put prog_id to udata */ | |
446 | *(unsigned long *)(udata + moff) = prog->aux->id; | |
447 | } | |
448 | ||
449 | refcount_set(&kvalue->refcnt, 1); | |
450 | bpf_map_inc(map); | |
451 | ||
452 | set_memory_ro((long)st_map->image, 1); | |
453 | set_memory_x((long)st_map->image, 1); | |
454 | err = st_ops->reg(kdata); | |
455 | if (likely(!err)) { | |
456 | /* Pair with smp_load_acquire() during lookup_elem(). | |
457 | * It ensures the above udata updates (e.g. prog->aux->id) | |
458 | * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set. | |
459 | */ | |
460 | smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_INUSE); | |
461 | goto unlock; | |
462 | } | |
463 | ||
464 | /* Error during st_ops->reg(). It is very unlikely since | |
465 | * the above init_member() should have caught it earlier | |
466 | * before reg(). The only possibility is if there was a race | |
467 | * in registering the struct_ops (under the same name) to | |
468 | * a sub-system through different struct_ops's maps. | |
469 | */ | |
470 | set_memory_nx((long)st_map->image, 1); | |
471 | set_memory_rw((long)st_map->image, 1); | |
472 | bpf_map_put(map); | |
473 | ||
474 | reset_unlock: | |
475 | bpf_struct_ops_map_put_progs(st_map); | |
476 | memset(uvalue, 0, map->value_size); | |
477 | memset(kvalue, 0, map->value_size); | |
478 | unlock: | |
88fd9e53 | 479 | kfree(tprogs); |
85d33df3 MKL |
480 | mutex_unlock(&st_map->lock); |
481 | return err; | |
482 | } | |
483 | ||
484 | static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) | |
485 | { | |
486 | enum bpf_struct_ops_state prev_state; | |
487 | struct bpf_struct_ops_map *st_map; | |
488 | ||
489 | st_map = (struct bpf_struct_ops_map *)map; | |
490 | prev_state = cmpxchg(&st_map->kvalue.state, | |
491 | BPF_STRUCT_OPS_STATE_INUSE, | |
492 | BPF_STRUCT_OPS_STATE_TOBEFREE); | |
8e5290e7 MKL |
493 | switch (prev_state) { |
494 | case BPF_STRUCT_OPS_STATE_INUSE: | |
85d33df3 MKL |
495 | st_map->st_ops->unreg(&st_map->kvalue.data); |
496 | if (refcount_dec_and_test(&st_map->kvalue.refcnt)) | |
497 | bpf_map_put(map); | |
8e5290e7 MKL |
498 | return 0; |
499 | case BPF_STRUCT_OPS_STATE_TOBEFREE: | |
500 | return -EINPROGRESS; | |
501 | case BPF_STRUCT_OPS_STATE_INIT: | |
502 | return -ENOENT; | |
503 | default: | |
504 | WARN_ON_ONCE(1); | |
505 | /* Should never happen. Treat it as not found. */ | |
506 | return -ENOENT; | |
85d33df3 | 507 | } |
85d33df3 MKL |
508 | } |
509 | ||
510 | static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key, | |
511 | struct seq_file *m) | |
512 | { | |
513 | void *value; | |
3b413041 | 514 | int err; |
85d33df3 | 515 | |
3b413041 | 516 | value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); |
85d33df3 MKL |
517 | if (!value) |
518 | return; | |
519 | ||
3b413041 MKL |
520 | err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); |
521 | if (!err) { | |
522 | btf_type_seq_show(btf_vmlinux, map->btf_vmlinux_value_type_id, | |
523 | value, m); | |
524 | seq_puts(m, "\n"); | |
525 | } | |
526 | ||
527 | kfree(value); | |
85d33df3 MKL |
528 | } |
529 | ||
530 | static void bpf_struct_ops_map_free(struct bpf_map *map) | |
531 | { | |
532 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; | |
533 | ||
534 | if (st_map->progs) | |
535 | bpf_struct_ops_map_put_progs(st_map); | |
536 | bpf_map_area_free(st_map->progs); | |
537 | bpf_jit_free_exec(st_map->image); | |
538 | bpf_map_area_free(st_map->uvalue); | |
539 | bpf_map_area_free(st_map); | |
540 | } | |
541 | ||
542 | static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) | |
543 | { | |
544 | if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 || | |
545 | attr->map_flags || !attr->btf_vmlinux_value_type_id) | |
546 | return -EINVAL; | |
547 | return 0; | |
548 | } | |
549 | ||
550 | static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) | |
551 | { | |
552 | const struct bpf_struct_ops *st_ops; | |
553 | size_t map_total_size, st_map_size; | |
554 | struct bpf_struct_ops_map *st_map; | |
555 | const struct btf_type *t, *vt; | |
556 | struct bpf_map_memory mem; | |
557 | struct bpf_map *map; | |
558 | int err; | |
559 | ||
2c78ee89 | 560 | if (!bpf_capable()) |
85d33df3 MKL |
561 | return ERR_PTR(-EPERM); |
562 | ||
563 | st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id); | |
564 | if (!st_ops) | |
565 | return ERR_PTR(-ENOTSUPP); | |
566 | ||
567 | vt = st_ops->value_type; | |
568 | if (attr->value_size != vt->size) | |
569 | return ERR_PTR(-EINVAL); | |
570 | ||
571 | t = st_ops->type; | |
572 | ||
573 | st_map_size = sizeof(*st_map) + | |
574 | /* kvalue stores the | |
575 | * struct bpf_struct_ops_tcp_congestions_ops | |
576 | */ | |
577 | (vt->size - sizeof(struct bpf_struct_ops_value)); | |
578 | map_total_size = st_map_size + | |
579 | /* uvalue */ | |
580 | sizeof(vt->size) + | |
581 | /* struct bpf_progs **progs */ | |
582 | btf_type_vlen(t) * sizeof(struct bpf_prog *); | |
583 | err = bpf_map_charge_init(&mem, map_total_size); | |
584 | if (err < 0) | |
585 | return ERR_PTR(err); | |
586 | ||
587 | st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE); | |
588 | if (!st_map) { | |
589 | bpf_map_charge_finish(&mem); | |
590 | return ERR_PTR(-ENOMEM); | |
591 | } | |
592 | st_map->st_ops = st_ops; | |
593 | map = &st_map->map; | |
594 | ||
595 | st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE); | |
596 | st_map->progs = | |
597 | bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_prog *), | |
598 | NUMA_NO_NODE); | |
599 | st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); | |
600 | if (!st_map->uvalue || !st_map->progs || !st_map->image) { | |
601 | bpf_struct_ops_map_free(map); | |
602 | bpf_map_charge_finish(&mem); | |
603 | return ERR_PTR(-ENOMEM); | |
604 | } | |
605 | ||
606 | mutex_init(&st_map->lock); | |
607 | set_vm_flush_reset_perms(st_map->image); | |
608 | bpf_map_init_from_attr(map, attr); | |
609 | bpf_map_charge_move(&map->memory, &mem); | |
610 | ||
611 | return map; | |
612 | } | |
613 | ||
614 | const struct bpf_map_ops bpf_struct_ops_map_ops = { | |
615 | .map_alloc_check = bpf_struct_ops_map_alloc_check, | |
616 | .map_alloc = bpf_struct_ops_map_alloc, | |
617 | .map_free = bpf_struct_ops_map_free, | |
618 | .map_get_next_key = bpf_struct_ops_map_get_next_key, | |
619 | .map_lookup_elem = bpf_struct_ops_map_lookup_elem, | |
620 | .map_delete_elem = bpf_struct_ops_map_delete_elem, | |
621 | .map_update_elem = bpf_struct_ops_map_update_elem, | |
622 | .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem, | |
623 | }; | |
624 | ||
625 | /* "const void *" because some subsystem is | |
626 | * passing a const (e.g. const struct tcp_congestion_ops *) | |
627 | */ | |
628 | bool bpf_struct_ops_get(const void *kdata) | |
629 | { | |
630 | struct bpf_struct_ops_value *kvalue; | |
631 | ||
632 | kvalue = container_of(kdata, struct bpf_struct_ops_value, data); | |
633 | ||
634 | return refcount_inc_not_zero(&kvalue->refcnt); | |
635 | } | |
636 | ||
637 | void bpf_struct_ops_put(const void *kdata) | |
638 | { | |
639 | struct bpf_struct_ops_value *kvalue; | |
640 | ||
641 | kvalue = container_of(kdata, struct bpf_struct_ops_value, data); | |
642 | if (refcount_dec_and_test(&kvalue->refcnt)) { | |
643 | struct bpf_struct_ops_map *st_map; | |
644 | ||
645 | st_map = container_of(kvalue, struct bpf_struct_ops_map, | |
646 | kvalue); | |
647 | bpf_map_put(&st_map->map); | |
648 | } | |
649 | } |