Commit | Line | Data |
---|---|---|
27ae7997 MKL |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2019 Facebook */ | |
3 | ||
4 | #include <linux/bpf.h> | |
5 | #include <linux/bpf_verifier.h> | |
6 | #include <linux/btf.h> | |
7 | #include <linux/filter.h> | |
8 | #include <linux/slab.h> | |
9 | #include <linux/numa.h> | |
10 | #include <linux/seq_file.h> | |
11 | #include <linux/refcount.h> | |
85d33df3 | 12 | #include <linux/mutex.h> |
c317ab71 | 13 | #include <linux/btf_ids.h> |
b671c206 | 14 | #include <linux/rcupdate_wait.h> |
1adddc97 | 15 | #include <linux/poll.h> |
27ae7997 | 16 | |
85d33df3 | 17 | struct bpf_struct_ops_value { |
612d087d | 18 | struct bpf_struct_ops_common_value common; |
d7f10df8 | 19 | char data[] ____cacheline_aligned_in_smp; |
85d33df3 MKL |
20 | }; |
21 | ||
187e2af0 KFL |
22 | #define MAX_TRAMP_IMAGE_PAGES 8 |
23 | ||
85d33df3 MKL |
24 | struct bpf_struct_ops_map { |
25 | struct bpf_map map; | |
eb18b49e | 26 | struct rcu_head rcu; |
4c5763ed | 27 | const struct bpf_struct_ops_desc *st_ops_desc; |
85d33df3 MKL |
28 | /* protect map_update */ |
29 | struct mutex lock; | |
f7e0beaf | 30 | /* link has all the bpf_links that is populated |
85d33df3 MKL |
31 | * to the func ptr of the kernel's struct |
32 | * (in kvalue.data). | |
33 | */ | |
f7e0beaf | 34 | struct bpf_link **links; |
e3f87fdf | 35 | u32 links_cnt; |
187e2af0 KFL |
36 | u32 image_pages_cnt; |
37 | /* image_pages is an array of pages that has all the trampolines | |
85d33df3 | 38 | * that stores the func args before calling the bpf_prog. |
85d33df3 | 39 | */ |
187e2af0 | 40 | void *image_pages[MAX_TRAMP_IMAGE_PAGES]; |
47f4f657 KFL |
41 | /* The owner moduler's btf. */ |
42 | struct btf *btf; | |
85d33df3 MKL |
43 | /* uvalue->data stores the kernel struct |
44 | * (e.g. tcp_congestion_ops) that is more useful | |
45 | * to userspace than the kvalue. For example, | |
46 | * the bpf_prog's id is stored instead of the kernel | |
47 | * address of a func ptr. | |
48 | */ | |
49 | struct bpf_struct_ops_value *uvalue; | |
50 | /* kvalue.data stores the actual kernel's struct | |
51 | * (e.g. tcp_congestion_ops) that will be | |
52 | * registered to the kernel subsystem. | |
53 | */ | |
54 | struct bpf_struct_ops_value kvalue; | |
55 | }; | |
56 | ||
68b04864 KFL |
57 | struct bpf_struct_ops_link { |
58 | struct bpf_link link; | |
59 | struct bpf_map __rcu *map; | |
1adddc97 | 60 | wait_queue_head_t wait_hup; |
68b04864 KFL |
61 | }; |
62 | ||
aef56f2e KFL |
63 | static DEFINE_MUTEX(update_mutex); |
64 | ||
85d33df3 MKL |
65 | #define VALUE_PREFIX "bpf_struct_ops_" |
66 | #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) | |
67 | ||
27ae7997 MKL |
68 | const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = { |
69 | }; | |
70 | ||
71 | const struct bpf_prog_ops bpf_struct_ops_prog_ops = { | |
c196906d HT |
72 | #ifdef CONFIG_NET |
73 | .test_run = bpf_struct_ops_test_run, | |
74 | #endif | |
27ae7997 MKL |
75 | }; |
76 | ||
95678395 KFL |
77 | BTF_ID_LIST(st_ops_ids) |
78 | BTF_ID(struct, module) | |
612d087d | 79 | BTF_ID(struct, bpf_struct_ops_common_value) |
95678395 KFL |
80 | |
81 | enum { | |
82 | IDX_MODULE_ID, | |
612d087d | 83 | IDX_ST_OPS_COMMON_VALUE_ID, |
95678395 | 84 | }; |
85d33df3 | 85 | |
612d087d KFL |
86 | extern struct btf *btf_vmlinux; |
87 | ||
88 | static bool is_valid_value_type(struct btf *btf, s32 value_id, | |
89 | const struct btf_type *type, | |
90 | const char *value_name) | |
91 | { | |
92 | const struct btf_type *common_value_type; | |
93 | const struct btf_member *member; | |
94 | const struct btf_type *vt, *mt; | |
95 | ||
96 | vt = btf_type_by_id(btf, value_id); | |
97 | if (btf_vlen(vt) != 2) { | |
98 | pr_warn("The number of %s's members should be 2, but we get %d\n", | |
99 | value_name, btf_vlen(vt)); | |
100 | return false; | |
101 | } | |
102 | member = btf_type_member(vt); | |
103 | mt = btf_type_by_id(btf, member->type); | |
104 | common_value_type = btf_type_by_id(btf_vmlinux, | |
105 | st_ops_ids[IDX_ST_OPS_COMMON_VALUE_ID]); | |
106 | if (mt != common_value_type) { | |
107 | pr_warn("The first member of %s should be bpf_struct_ops_common_value\n", | |
108 | value_name); | |
109 | return false; | |
110 | } | |
111 | member++; | |
112 | mt = btf_type_by_id(btf, member->type); | |
113 | if (mt != type) { | |
114 | pr_warn("The second member of %s should be %s\n", | |
115 | value_name, btf_name_by_offset(btf, type->name_off)); | |
116 | return false; | |
117 | } | |
118 | ||
119 | return true; | |
120 | } | |
121 | ||
187e2af0 KFL |
122 | static void *bpf_struct_ops_image_alloc(void) |
123 | { | |
124 | void *image; | |
125 | int err; | |
126 | ||
127 | err = bpf_jit_charge_modmem(PAGE_SIZE); | |
128 | if (err) | |
129 | return ERR_PTR(err); | |
130 | image = arch_alloc_bpf_trampoline(PAGE_SIZE); | |
131 | if (!image) { | |
132 | bpf_jit_uncharge_modmem(PAGE_SIZE); | |
133 | return ERR_PTR(-ENOMEM); | |
134 | } | |
135 | ||
136 | return image; | |
137 | } | |
138 | ||
139 | void bpf_struct_ops_image_free(void *image) | |
140 | { | |
141 | if (image) { | |
142 | arch_free_bpf_trampoline(image, PAGE_SIZE); | |
143 | bpf_jit_uncharge_modmem(PAGE_SIZE); | |
144 | } | |
145 | } | |
146 | ||
16116035 KFL |
147 | #define MAYBE_NULL_SUFFIX "__nullable" |
148 | #define MAX_STUB_NAME 128 | |
149 | ||
150 | /* Return the type info of a stub function, if it exists. | |
151 | * | |
152 | * The name of a stub function is made up of the name of the struct_ops and | |
153 | * the name of the function pointer member, separated by "__". For example, | |
154 | * if the struct_ops type is named "foo_ops" and the function pointer | |
155 | * member is named "bar", the stub function name would be "foo_ops__bar". | |
156 | */ | |
157 | static const struct btf_type * | |
158 | find_stub_func_proto(const struct btf *btf, const char *st_op_name, | |
159 | const char *member_name) | |
160 | { | |
161 | char stub_func_name[MAX_STUB_NAME]; | |
162 | const struct btf_type *func_type; | |
163 | s32 btf_id; | |
164 | int cp; | |
165 | ||
166 | cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s", | |
167 | st_op_name, member_name); | |
168 | if (cp >= MAX_STUB_NAME) { | |
169 | pr_warn("Stub function name too long\n"); | |
170 | return NULL; | |
171 | } | |
172 | btf_id = btf_find_by_name_kind(btf, stub_func_name, BTF_KIND_FUNC); | |
173 | if (btf_id < 0) | |
174 | return NULL; | |
175 | func_type = btf_type_by_id(btf, btf_id); | |
176 | if (!func_type) | |
177 | return NULL; | |
178 | ||
179 | return btf_type_by_id(btf, func_type->type); /* FUNC_PROTO */ | |
180 | } | |
181 | ||
182 | /* Prepare argument info for every nullable argument of a member of a | |
183 | * struct_ops type. | |
184 | * | |
185 | * Initialize a struct bpf_struct_ops_arg_info according to type info of | |
186 | * the arguments of a stub function. (Check kCFI for more information about | |
187 | * stub functions.) | |
188 | * | |
189 | * Each member in the struct_ops type has a struct bpf_struct_ops_arg_info | |
190 | * to provide an array of struct bpf_ctx_arg_aux, which in turn provides | |
191 | * the information that used by the verifier to check the arguments of the | |
192 | * BPF struct_ops program assigned to the member. Here, we only care about | |
193 | * the arguments that are marked as __nullable. | |
194 | * | |
195 | * The array of struct bpf_ctx_arg_aux is eventually assigned to | |
196 | * prog->aux->ctx_arg_info of BPF struct_ops programs and passed to the | |
197 | * verifier. (See check_struct_ops_btf_id()) | |
198 | * | |
199 | * arg_info->info will be the list of struct bpf_ctx_arg_aux if success. If | |
200 | * fails, it will be kept untouched. | |
201 | */ | |
202 | static int prepare_arg_info(struct btf *btf, | |
203 | const char *st_ops_name, | |
204 | const char *member_name, | |
205 | const struct btf_type *func_proto, | |
206 | struct bpf_struct_ops_arg_info *arg_info) | |
207 | { | |
208 | const struct btf_type *stub_func_proto, *pointed_type; | |
209 | const struct btf_param *stub_args, *args; | |
210 | struct bpf_ctx_arg_aux *info, *info_buf; | |
211 | u32 nargs, arg_no, info_cnt = 0; | |
212 | u32 arg_btf_id; | |
213 | int offset; | |
214 | ||
215 | stub_func_proto = find_stub_func_proto(btf, st_ops_name, member_name); | |
216 | if (!stub_func_proto) | |
217 | return 0; | |
218 | ||
219 | /* Check if the number of arguments of the stub function is the same | |
220 | * as the number of arguments of the function pointer. | |
221 | */ | |
222 | nargs = btf_type_vlen(func_proto); | |
223 | if (nargs != btf_type_vlen(stub_func_proto)) { | |
224 | pr_warn("the number of arguments of the stub function %s__%s does not match the number of arguments of the member %s of struct %s\n", | |
225 | st_ops_name, member_name, member_name, st_ops_name); | |
226 | return -EINVAL; | |
227 | } | |
228 | ||
229 | if (!nargs) | |
230 | return 0; | |
231 | ||
232 | args = btf_params(func_proto); | |
233 | stub_args = btf_params(stub_func_proto); | |
234 | ||
235 | info_buf = kcalloc(nargs, sizeof(*info_buf), GFP_KERNEL); | |
236 | if (!info_buf) | |
237 | return -ENOMEM; | |
238 | ||
239 | /* Prepare info for every nullable argument */ | |
240 | info = info_buf; | |
241 | for (arg_no = 0; arg_no < nargs; arg_no++) { | |
242 | /* Skip arguments that is not suffixed with | |
243 | * "__nullable". | |
244 | */ | |
245 | if (!btf_param_match_suffix(btf, &stub_args[arg_no], | |
246 | MAYBE_NULL_SUFFIX)) | |
247 | continue; | |
248 | ||
249 | /* Should be a pointer to struct */ | |
250 | pointed_type = btf_type_resolve_ptr(btf, | |
251 | args[arg_no].type, | |
252 | &arg_btf_id); | |
253 | if (!pointed_type || | |
254 | !btf_type_is_struct(pointed_type)) { | |
255 | pr_warn("stub function %s__%s has %s tagging to an unsupported type\n", | |
256 | st_ops_name, member_name, MAYBE_NULL_SUFFIX); | |
257 | goto err_out; | |
258 | } | |
259 | ||
260 | offset = btf_ctx_arg_offset(btf, func_proto, arg_no); | |
261 | if (offset < 0) { | |
262 | pr_warn("stub function %s__%s has an invalid trampoline ctx offset for arg#%u\n", | |
263 | st_ops_name, member_name, arg_no); | |
264 | goto err_out; | |
265 | } | |
266 | ||
267 | if (args[arg_no].type != stub_args[arg_no].type) { | |
268 | pr_warn("arg#%u type in stub function %s__%s does not match with its original func_proto\n", | |
269 | arg_no, st_ops_name, member_name); | |
270 | goto err_out; | |
271 | } | |
272 | ||
273 | /* Fill the information of the new argument */ | |
274 | info->reg_type = | |
275 | PTR_TRUSTED | PTR_TO_BTF_ID | PTR_MAYBE_NULL; | |
276 | info->btf_id = arg_btf_id; | |
277 | info->btf = btf; | |
278 | info->offset = offset; | |
279 | ||
280 | info++; | |
281 | info_cnt++; | |
282 | } | |
283 | ||
284 | if (info_cnt) { | |
285 | arg_info->info = info_buf; | |
286 | arg_info->cnt = info_cnt; | |
287 | } else { | |
288 | kfree(info_buf); | |
289 | } | |
290 | ||
291 | return 0; | |
292 | ||
293 | err_out: | |
294 | kfree(info_buf); | |
295 | ||
296 | return -EINVAL; | |
297 | } | |
298 | ||
299 | /* Clean up the arg_info in a struct bpf_struct_ops_desc. */ | |
300 | void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc) | |
301 | { | |
302 | struct bpf_struct_ops_arg_info *arg_info; | |
303 | int i; | |
304 | ||
305 | arg_info = st_ops_desc->arg_info; | |
306 | for (i = 0; i < btf_type_vlen(st_ops_desc->type); i++) | |
307 | kfree(arg_info[i].info); | |
308 | ||
309 | kfree(arg_info); | |
310 | } | |
311 | ||
f6be98d1 KFL |
312 | int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, |
313 | struct btf *btf, | |
314 | struct bpf_verifier_log *log) | |
27ae7997 | 315 | { |
4c5763ed | 316 | struct bpf_struct_ops *st_ops = st_ops_desc->st_ops; |
16116035 | 317 | struct bpf_struct_ops_arg_info *arg_info; |
27ae7997 | 318 | const struct btf_member *member; |
27ae7997 | 319 | const struct btf_type *t; |
3b1f89e7 | 320 | s32 type_id, value_id; |
85d33df3 | 321 | char value_name[128]; |
27ae7997 | 322 | const char *mname; |
16116035 | 323 | int i, err; |
27ae7997 | 324 | |
3b1f89e7 KFL |
325 | if (strlen(st_ops->name) + VALUE_PREFIX_LEN >= |
326 | sizeof(value_name)) { | |
327 | pr_warn("struct_ops name %s is too long\n", | |
328 | st_ops->name); | |
f6be98d1 | 329 | return -EINVAL; |
3b1f89e7 KFL |
330 | } |
331 | sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name); | |
85d33df3 | 332 | |
3e000833 KFL |
333 | if (!st_ops->cfi_stubs) { |
334 | pr_warn("struct_ops for %s has no cfi_stubs\n", st_ops->name); | |
335 | return -EINVAL; | |
336 | } | |
337 | ||
3b1f89e7 KFL |
338 | type_id = btf_find_by_name_kind(btf, st_ops->name, |
339 | BTF_KIND_STRUCT); | |
340 | if (type_id < 0) { | |
341 | pr_warn("Cannot find struct %s in %s\n", | |
342 | st_ops->name, btf_get_name(btf)); | |
f6be98d1 | 343 | return -EINVAL; |
3b1f89e7 KFL |
344 | } |
345 | t = btf_type_by_id(btf, type_id); | |
346 | if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) { | |
347 | pr_warn("Cannot support #%u members in struct %s\n", | |
348 | btf_type_vlen(t), st_ops->name); | |
f6be98d1 | 349 | return -EINVAL; |
3b1f89e7 | 350 | } |
27ae7997 | 351 | |
612d087d KFL |
352 | value_id = btf_find_by_name_kind(btf, value_name, |
353 | BTF_KIND_STRUCT); | |
354 | if (value_id < 0) { | |
355 | pr_warn("Cannot find struct %s in %s\n", | |
356 | value_name, btf_get_name(btf)); | |
f6be98d1 | 357 | return -EINVAL; |
612d087d KFL |
358 | } |
359 | if (!is_valid_value_type(btf, value_id, t, value_name)) | |
f6be98d1 | 360 | return -EINVAL; |
612d087d | 361 | |
16116035 KFL |
362 | arg_info = kcalloc(btf_type_vlen(t), sizeof(*arg_info), |
363 | GFP_KERNEL); | |
364 | if (!arg_info) | |
365 | return -ENOMEM; | |
366 | ||
367 | st_ops_desc->arg_info = arg_info; | |
368 | st_ops_desc->type = t; | |
369 | st_ops_desc->type_id = type_id; | |
370 | st_ops_desc->value_id = value_id; | |
371 | st_ops_desc->value_type = btf_type_by_id(btf, value_id); | |
372 | ||
3b1f89e7 KFL |
373 | for_each_member(i, t, member) { |
374 | const struct btf_type *func_proto; | |
85d33df3 | 375 | |
3b1f89e7 KFL |
376 | mname = btf_name_by_offset(btf, member->name_off); |
377 | if (!*mname) { | |
378 | pr_warn("anon member in struct %s is not supported\n", | |
27ae7997 | 379 | st_ops->name); |
16116035 KFL |
380 | err = -EOPNOTSUPP; |
381 | goto errout; | |
27ae7997 | 382 | } |
3b1f89e7 KFL |
383 | |
384 | if (__btf_member_bitfield_size(t, member)) { | |
385 | pr_warn("bit field member %s in struct %s is not supported\n", | |
386 | mname, st_ops->name); | |
16116035 KFL |
387 | err = -EOPNOTSUPP; |
388 | goto errout; | |
27ae7997 MKL |
389 | } |
390 | ||
3b1f89e7 KFL |
391 | func_proto = btf_type_resolve_func_ptr(btf, |
392 | member->type, | |
393 | NULL); | |
16116035 KFL |
394 | if (!func_proto) |
395 | continue; | |
396 | ||
397 | if (btf_distill_func_proto(log, btf, | |
3b1f89e7 KFL |
398 | func_proto, mname, |
399 | &st_ops->func_models[i])) { | |
400 | pr_warn("Error in parsing func ptr %s in struct %s\n", | |
401 | mname, st_ops->name); | |
16116035 KFL |
402 | err = -EINVAL; |
403 | goto errout; | |
27ae7997 | 404 | } |
16116035 KFL |
405 | |
406 | err = prepare_arg_info(btf, st_ops->name, mname, | |
407 | func_proto, | |
408 | arg_info + i); | |
409 | if (err) | |
410 | goto errout; | |
3b1f89e7 | 411 | } |
27ae7997 | 412 | |
df9705ea KFL |
413 | if (st_ops->init(btf)) { |
414 | pr_warn("Error in init bpf_struct_ops %s\n", | |
415 | st_ops->name); | |
16116035 KFL |
416 | err = -EINVAL; |
417 | goto errout; | |
27ae7997 | 418 | } |
3b1f89e7 | 419 | |
f6be98d1 | 420 | return 0; |
16116035 KFL |
421 | |
422 | errout: | |
423 | bpf_struct_ops_desc_release(st_ops_desc); | |
424 | ||
425 | return err; | |
27ae7997 | 426 | } |
85d33df3 MKL |
427 | |
428 | static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key, | |
429 | void *next_key) | |
430 | { | |
431 | if (key && *(u32 *)key == 0) | |
432 | return -ENOENT; | |
433 | ||
434 | *(u32 *)next_key = 0; | |
435 | return 0; | |
436 | } | |
437 | ||
438 | int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, | |
439 | void *value) | |
440 | { | |
441 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; | |
442 | struct bpf_struct_ops_value *uvalue, *kvalue; | |
443 | enum bpf_struct_ops_state state; | |
b671c206 | 444 | s64 refcnt; |
85d33df3 MKL |
445 | |
446 | if (unlikely(*(u32 *)key != 0)) | |
447 | return -ENOENT; | |
448 | ||
449 | kvalue = &st_map->kvalue; | |
450 | /* Pair with smp_store_release() during map_update */ | |
612d087d | 451 | state = smp_load_acquire(&kvalue->common.state); |
85d33df3 MKL |
452 | if (state == BPF_STRUCT_OPS_STATE_INIT) { |
453 | memset(value, 0, map->value_size); | |
454 | return 0; | |
455 | } | |
456 | ||
457 | /* No lock is needed. state and refcnt do not need | |
458 | * to be updated together under atomic context. | |
459 | */ | |
241d50ec | 460 | uvalue = value; |
85d33df3 | 461 | memcpy(uvalue, st_map->uvalue, map->value_size); |
612d087d | 462 | uvalue->common.state = state; |
b671c206 KFL |
463 | |
464 | /* This value offers the user space a general estimate of how | |
465 | * many sockets are still utilizing this struct_ops for TCP | |
466 | * congestion control. The number might not be exact, but it | |
467 | * should sufficiently meet our present goals. | |
468 | */ | |
469 | refcnt = atomic64_read(&map->refcnt) - atomic64_read(&map->usercnt); | |
612d087d | 470 | refcount_set(&uvalue->common.refcnt, max_t(s64, refcnt, 0)); |
85d33df3 MKL |
471 | |
472 | return 0; | |
473 | } | |
474 | ||
475 | static void *bpf_struct_ops_map_lookup_elem(struct bpf_map *map, void *key) | |
476 | { | |
477 | return ERR_PTR(-EINVAL); | |
478 | } | |
479 | ||
480 | static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map) | |
481 | { | |
85d33df3 MKL |
482 | u32 i; |
483 | ||
e3f87fdf | 484 | for (i = 0; i < st_map->links_cnt; i++) { |
f7e0beaf KFL |
485 | if (st_map->links[i]) { |
486 | bpf_link_put(st_map->links[i]); | |
487 | st_map->links[i] = NULL; | |
85d33df3 MKL |
488 | } |
489 | } | |
490 | } | |
491 | ||
187e2af0 KFL |
492 | static void bpf_struct_ops_map_free_image(struct bpf_struct_ops_map *st_map) |
493 | { | |
494 | int i; | |
495 | ||
496 | for (i = 0; i < st_map->image_pages_cnt; i++) | |
497 | bpf_struct_ops_image_free(st_map->image_pages[i]); | |
498 | st_map->image_pages_cnt = 0; | |
499 | } | |
500 | ||
47f4f657 | 501 | static int check_zero_holes(const struct btf *btf, const struct btf_type *t, void *data) |
85d33df3 MKL |
502 | { |
503 | const struct btf_member *member; | |
504 | u32 i, moff, msize, prev_mend = 0; | |
505 | const struct btf_type *mtype; | |
506 | ||
507 | for_each_member(i, t, member) { | |
8293eb99 | 508 | moff = __btf_member_bit_offset(t, member) / 8; |
85d33df3 MKL |
509 | if (moff > prev_mend && |
510 | memchr_inv(data + prev_mend, 0, moff - prev_mend)) | |
511 | return -EINVAL; | |
512 | ||
47f4f657 KFL |
513 | mtype = btf_type_by_id(btf, member->type); |
514 | mtype = btf_resolve_size(btf, mtype, &msize); | |
85d33df3 MKL |
515 | if (IS_ERR(mtype)) |
516 | return PTR_ERR(mtype); | |
517 | prev_mend = moff + msize; | |
518 | } | |
519 | ||
520 | if (t->size > prev_mend && | |
521 | memchr_inv(data + prev_mend, 0, t->size - prev_mend)) | |
522 | return -EINVAL; | |
523 | ||
524 | return 0; | |
525 | } | |
526 | ||
f7e0beaf KFL |
527 | static void bpf_struct_ops_link_release(struct bpf_link *link) |
528 | { | |
529 | } | |
530 | ||
531 | static void bpf_struct_ops_link_dealloc(struct bpf_link *link) | |
532 | { | |
533 | struct bpf_tramp_link *tlink = container_of(link, struct bpf_tramp_link, link); | |
534 | ||
535 | kfree(tlink); | |
536 | } | |
537 | ||
538 | const struct bpf_link_ops bpf_struct_ops_link_lops = { | |
539 | .release = bpf_struct_ops_link_release, | |
540 | .dealloc = bpf_struct_ops_link_dealloc, | |
541 | }; | |
542 | ||
543 | int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, | |
544 | struct bpf_tramp_link *link, | |
31a645ae | 545 | const struct btf_func_model *model, |
187e2af0 KFL |
546 | void *stub_func, |
547 | void **_image, u32 *_image_off, | |
548 | bool allow_alloc) | |
31a645ae | 549 | { |
187e2af0 KFL |
550 | u32 image_off = *_image_off, flags = BPF_TRAMP_F_INDIRECT; |
551 | void *image = *_image; | |
26ef208c | 552 | int size; |
31a645ae | 553 | |
f7e0beaf KFL |
554 | tlinks[BPF_TRAMP_FENTRY].links[0] = link; |
555 | tlinks[BPF_TRAMP_FENTRY].nr_links = 1; | |
2cd3e377 PZ |
556 | |
557 | if (model->ret_size > 0) | |
558 | flags |= BPF_TRAMP_F_RET_FENTRY_RET; | |
26ef208c SL |
559 | |
560 | size = arch_bpf_trampoline_size(model, flags, tlinks, NULL); | |
187e2af0 KFL |
561 | if (size <= 0) |
562 | return size ? : -EFAULT; | |
563 | ||
564 | /* Allocate image buffer if necessary */ | |
565 | if (!image || size > PAGE_SIZE - image_off) { | |
566 | if (!allow_alloc) | |
567 | return -E2BIG; | |
568 | ||
569 | image = bpf_struct_ops_image_alloc(); | |
570 | if (IS_ERR(image)) | |
571 | return PTR_ERR(image); | |
572 | image_off = 0; | |
573 | } | |
574 | ||
575 | size = arch_prepare_bpf_trampoline(NULL, image + image_off, | |
d1a42617 | 576 | image + image_off + size, |
2cd3e377 | 577 | model, flags, tlinks, stub_func); |
187e2af0 KFL |
578 | if (size <= 0) { |
579 | if (image != *_image) | |
580 | bpf_struct_ops_image_free(image); | |
581 | return size ? : -EFAULT; | |
582 | } | |
583 | ||
584 | *_image = image; | |
585 | *_image_off = image_off + size; | |
586 | return 0; | |
31a645ae HT |
587 | } |
588 | ||
d7ba4cc9 JK |
589 | static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, |
590 | void *value, u64 flags) | |
85d33df3 MKL |
591 | { |
592 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; | |
4c5763ed KFL |
593 | const struct bpf_struct_ops_desc *st_ops_desc = st_map->st_ops_desc; |
594 | const struct bpf_struct_ops *st_ops = st_ops_desc->st_ops; | |
85d33df3 | 595 | struct bpf_struct_ops_value *uvalue, *kvalue; |
95678395 | 596 | const struct btf_type *module_type; |
85d33df3 | 597 | const struct btf_member *member; |
4c5763ed | 598 | const struct btf_type *t = st_ops_desc->type; |
5964d1e4 | 599 | struct bpf_tramp_links *tlinks; |
85d33df3 | 600 | void *udata, *kdata; |
5964d1e4 | 601 | int prog_fd, err; |
187e2af0 KFL |
602 | u32 i, trampoline_start, image_off = 0; |
603 | void *cur_image = NULL, *image = NULL; | |
85d33df3 MKL |
604 | |
605 | if (flags) | |
606 | return -EINVAL; | |
607 | ||
608 | if (*(u32 *)key != 0) | |
609 | return -E2BIG; | |
610 | ||
47f4f657 | 611 | err = check_zero_holes(st_map->btf, st_ops_desc->value_type, value); |
85d33df3 MKL |
612 | if (err) |
613 | return err; | |
614 | ||
241d50ec | 615 | uvalue = value; |
47f4f657 | 616 | err = check_zero_holes(st_map->btf, t, uvalue->data); |
85d33df3 MKL |
617 | if (err) |
618 | return err; | |
619 | ||
612d087d | 620 | if (uvalue->common.state || refcount_read(&uvalue->common.refcnt)) |
85d33df3 MKL |
621 | return -EINVAL; |
622 | ||
f7e0beaf KFL |
623 | tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL); |
624 | if (!tlinks) | |
88fd9e53 KS |
625 | return -ENOMEM; |
626 | ||
85d33df3 MKL |
627 | uvalue = (struct bpf_struct_ops_value *)st_map->uvalue; |
628 | kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue; | |
629 | ||
630 | mutex_lock(&st_map->lock); | |
631 | ||
612d087d | 632 | if (kvalue->common.state != BPF_STRUCT_OPS_STATE_INIT) { |
85d33df3 MKL |
633 | err = -EBUSY; |
634 | goto unlock; | |
635 | } | |
636 | ||
637 | memcpy(uvalue, value, map->value_size); | |
638 | ||
639 | udata = &uvalue->data; | |
640 | kdata = &kvalue->data; | |
85d33df3 | 641 | |
95678395 | 642 | module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]); |
85d33df3 MKL |
643 | for_each_member(i, t, member) { |
644 | const struct btf_type *mtype, *ptype; | |
645 | struct bpf_prog *prog; | |
f7e0beaf | 646 | struct bpf_tramp_link *link; |
85d33df3 MKL |
647 | u32 moff; |
648 | ||
8293eb99 | 649 | moff = __btf_member_bit_offset(t, member) / 8; |
47f4f657 | 650 | ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL); |
85d33df3 MKL |
651 | if (ptype == module_type) { |
652 | if (*(void **)(udata + moff)) | |
653 | goto reset_unlock; | |
654 | *(void **)(kdata + moff) = BPF_MODULE_OWNER; | |
655 | continue; | |
656 | } | |
657 | ||
658 | err = st_ops->init_member(t, member, kdata, udata); | |
659 | if (err < 0) | |
660 | goto reset_unlock; | |
661 | ||
662 | /* The ->init_member() has handled this member */ | |
663 | if (err > 0) | |
664 | continue; | |
665 | ||
666 | /* If st_ops->init_member does not handle it, | |
667 | * we will only handle func ptrs and zero-ed members | |
668 | * here. Reject everything else. | |
669 | */ | |
670 | ||
671 | /* All non func ptr member must be 0 */ | |
672 | if (!ptype || !btf_type_is_func_proto(ptype)) { | |
673 | u32 msize; | |
674 | ||
47f4f657 KFL |
675 | mtype = btf_type_by_id(st_map->btf, member->type); |
676 | mtype = btf_resolve_size(st_map->btf, mtype, &msize); | |
85d33df3 MKL |
677 | if (IS_ERR(mtype)) { |
678 | err = PTR_ERR(mtype); | |
679 | goto reset_unlock; | |
680 | } | |
681 | ||
682 | if (memchr_inv(udata + moff, 0, msize)) { | |
683 | err = -EINVAL; | |
684 | goto reset_unlock; | |
685 | } | |
686 | ||
687 | continue; | |
688 | } | |
689 | ||
690 | prog_fd = (int)(*(unsigned long *)(udata + moff)); | |
691 | /* Similar check as the attr->attach_prog_fd */ | |
692 | if (!prog_fd) | |
693 | continue; | |
694 | ||
695 | prog = bpf_prog_get(prog_fd); | |
696 | if (IS_ERR(prog)) { | |
697 | err = PTR_ERR(prog); | |
698 | goto reset_unlock; | |
699 | } | |
85d33df3 MKL |
700 | |
701 | if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || | |
4c5763ed | 702 | prog->aux->attach_btf_id != st_ops_desc->type_id || |
85d33df3 | 703 | prog->expected_attach_type != i) { |
f7e0beaf | 704 | bpf_prog_put(prog); |
85d33df3 MKL |
705 | err = -EINVAL; |
706 | goto reset_unlock; | |
707 | } | |
708 | ||
f7e0beaf KFL |
709 | link = kzalloc(sizeof(*link), GFP_USER); |
710 | if (!link) { | |
711 | bpf_prog_put(prog); | |
712 | err = -ENOMEM; | |
713 | goto reset_unlock; | |
714 | } | |
715 | bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, | |
716 | &bpf_struct_ops_link_lops, prog); | |
717 | st_map->links[i] = &link->link; | |
718 | ||
187e2af0 | 719 | trampoline_start = image_off; |
f7e0beaf | 720 | err = bpf_struct_ops_prepare_trampoline(tlinks, link, |
187e2af0 KFL |
721 | &st_ops->func_models[i], |
722 | *(void **)(st_ops->cfi_stubs + moff), | |
723 | &image, &image_off, | |
724 | st_map->image_pages_cnt < MAX_TRAMP_IMAGE_PAGES); | |
725 | if (err) | |
726 | goto reset_unlock; | |
727 | ||
728 | if (cur_image != image) { | |
729 | st_map->image_pages[st_map->image_pages_cnt++] = image; | |
730 | cur_image = image; | |
731 | trampoline_start = 0; | |
732 | } | |
85d33df3 | 733 | |
187e2af0 | 734 | *(void **)(kdata + moff) = image + trampoline_start + cfi_get_offset(); |
85d33df3 MKL |
735 | |
736 | /* put prog_id to udata */ | |
737 | *(unsigned long *)(udata + moff) = prog->aux->id; | |
738 | } | |
739 | ||
73e4f9e6 KFL |
740 | if (st_ops->validate) { |
741 | err = st_ops->validate(kdata); | |
742 | if (err) | |
743 | goto reset_unlock; | |
744 | } | |
c733239f CL |
745 | for (i = 0; i < st_map->image_pages_cnt; i++) { |
746 | err = arch_protect_bpf_trampoline(st_map->image_pages[i], | |
747 | PAGE_SIZE); | |
748 | if (err) | |
749 | goto reset_unlock; | |
750 | } | |
73e4f9e6 | 751 | |
68b04864 | 752 | if (st_map->map.map_flags & BPF_F_LINK) { |
8ba651ed | 753 | err = 0; |
68b04864 KFL |
754 | /* Let bpf_link handle registration & unregistration. |
755 | * | |
756 | * Pair with smp_load_acquire() during lookup_elem(). | |
757 | */ | |
612d087d | 758 | smp_store_release(&kvalue->common.state, BPF_STRUCT_OPS_STATE_READY); |
68b04864 KFL |
759 | goto unlock; |
760 | } | |
85d33df3 | 761 | |
73287fe2 | 762 | err = st_ops->reg(kdata, NULL); |
85d33df3 | 763 | if (likely(!err)) { |
68b04864 KFL |
764 | /* This refcnt increment on the map here after |
765 | * 'st_ops->reg()' is secure since the state of the | |
766 | * map must be set to INIT at this moment, and thus | |
767 | * bpf_struct_ops_map_delete_elem() can't unregister | |
768 | * or transition it to TOBEFREE concurrently. | |
769 | */ | |
770 | bpf_map_inc(map); | |
85d33df3 MKL |
771 | /* Pair with smp_load_acquire() during lookup_elem(). |
772 | * It ensures the above udata updates (e.g. prog->aux->id) | |
773 | * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set. | |
774 | */ | |
612d087d | 775 | smp_store_release(&kvalue->common.state, BPF_STRUCT_OPS_STATE_INUSE); |
85d33df3 MKL |
776 | goto unlock; |
777 | } | |
778 | ||
9f0265e9 JTH |
779 | /* Error during st_ops->reg(). Can happen if this struct_ops needs to be |
780 | * verified as a whole, after all init_member() calls. Can also happen if | |
781 | * there was a race in registering the struct_ops (under the same name) to | |
85d33df3 MKL |
782 | * a sub-system through different struct_ops's maps. |
783 | */ | |
85d33df3 MKL |
784 | |
785 | reset_unlock: | |
187e2af0 | 786 | bpf_struct_ops_map_free_image(st_map); |
85d33df3 MKL |
787 | bpf_struct_ops_map_put_progs(st_map); |
788 | memset(uvalue, 0, map->value_size); | |
789 | memset(kvalue, 0, map->value_size); | |
790 | unlock: | |
f7e0beaf | 791 | kfree(tlinks); |
85d33df3 MKL |
792 | mutex_unlock(&st_map->lock); |
793 | return err; | |
794 | } | |
795 | ||
d7ba4cc9 | 796 | static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) |
85d33df3 MKL |
797 | { |
798 | enum bpf_struct_ops_state prev_state; | |
799 | struct bpf_struct_ops_map *st_map; | |
800 | ||
801 | st_map = (struct bpf_struct_ops_map *)map; | |
68b04864 KFL |
802 | if (st_map->map.map_flags & BPF_F_LINK) |
803 | return -EOPNOTSUPP; | |
804 | ||
612d087d | 805 | prev_state = cmpxchg(&st_map->kvalue.common.state, |
85d33df3 MKL |
806 | BPF_STRUCT_OPS_STATE_INUSE, |
807 | BPF_STRUCT_OPS_STATE_TOBEFREE); | |
8e5290e7 MKL |
808 | switch (prev_state) { |
809 | case BPF_STRUCT_OPS_STATE_INUSE: | |
73287fe2 | 810 | st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, NULL); |
b671c206 | 811 | bpf_map_put(map); |
8e5290e7 MKL |
812 | return 0; |
813 | case BPF_STRUCT_OPS_STATE_TOBEFREE: | |
814 | return -EINPROGRESS; | |
815 | case BPF_STRUCT_OPS_STATE_INIT: | |
816 | return -ENOENT; | |
817 | default: | |
818 | WARN_ON_ONCE(1); | |
819 | /* Should never happen. Treat it as not found. */ | |
820 | return -ENOENT; | |
85d33df3 | 821 | } |
85d33df3 MKL |
822 | } |
823 | ||
824 | static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key, | |
825 | struct seq_file *m) | |
826 | { | |
47f4f657 | 827 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; |
85d33df3 | 828 | void *value; |
3b413041 | 829 | int err; |
85d33df3 | 830 | |
3b413041 | 831 | value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); |
85d33df3 MKL |
832 | if (!value) |
833 | return; | |
834 | ||
3b413041 MKL |
835 | err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); |
836 | if (!err) { | |
47f4f657 KFL |
837 | btf_type_seq_show(st_map->btf, |
838 | map->btf_vmlinux_value_type_id, | |
3b413041 | 839 | value, m); |
df862de4 | 840 | seq_putc(m, '\n'); |
3b413041 MKL |
841 | } |
842 | ||
843 | kfree(value); | |
85d33df3 MKL |
844 | } |
845 | ||
b671c206 | 846 | static void __bpf_struct_ops_map_free(struct bpf_map *map) |
85d33df3 MKL |
847 | { |
848 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; | |
849 | ||
f7e0beaf | 850 | if (st_map->links) |
85d33df3 | 851 | bpf_struct_ops_map_put_progs(st_map); |
f7e0beaf | 852 | bpf_map_area_free(st_map->links); |
187e2af0 | 853 | bpf_struct_ops_map_free_image(st_map); |
85d33df3 MKL |
854 | bpf_map_area_free(st_map->uvalue); |
855 | bpf_map_area_free(st_map); | |
856 | } | |
857 | ||
b671c206 KFL |
858 | static void bpf_struct_ops_map_free(struct bpf_map *map) |
859 | { | |
e3f87fdf KFL |
860 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; |
861 | ||
862 | /* st_ops->owner was acquired during map_alloc to implicitly holds | |
863 | * the btf's refcnt. The acquire was only done when btf_is_module() | |
864 | * st_map->btf cannot be NULL here. | |
865 | */ | |
866 | if (btf_is_module(st_map->btf)) | |
867 | module_put(st_map->st_ops_desc->st_ops->owner); | |
868 | ||
b671c206 KFL |
869 | /* The struct_ops's function may switch to another struct_ops. |
870 | * | |
871 | * For example, bpf_tcp_cc_x->init() may switch to | |
872 | * another tcp_cc_y by calling | |
873 | * setsockopt(TCP_CONGESTION, "tcp_cc_y"). | |
874 | * During the switch, bpf_struct_ops_put(tcp_cc_x) is called | |
875 | * and its refcount may reach 0 which then free its | |
876 | * trampoline image while tcp_cc_x is still running. | |
877 | * | |
878 | * A vanilla rcu gp is to wait for all bpf-tcp-cc prog | |
879 | * to finish. bpf-tcp-cc prog is non sleepable. | |
880 | * A rcu_tasks gp is to wait for the last few insn | |
881 | * in the tramopline image to finish before releasing | |
882 | * the trampoline image. | |
883 | */ | |
884 | synchronize_rcu_mult(call_rcu, call_rcu_tasks); | |
885 | ||
886 | __bpf_struct_ops_map_free(map); | |
887 | } | |
888 | ||
85d33df3 MKL |
889 | static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) |
890 | { | |
891 | if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 || | |
fcc2c1fb KFL |
892 | (attr->map_flags & ~(BPF_F_LINK | BPF_F_VTYPE_BTF_OBJ_FD)) || |
893 | !attr->btf_vmlinux_value_type_id) | |
85d33df3 MKL |
894 | return -EINVAL; |
895 | return 0; | |
896 | } | |
897 | ||
898 | static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) | |
899 | { | |
4c5763ed | 900 | const struct bpf_struct_ops_desc *st_ops_desc; |
f043733f | 901 | size_t st_map_size; |
85d33df3 MKL |
902 | struct bpf_struct_ops_map *st_map; |
903 | const struct btf_type *t, *vt; | |
e3f87fdf | 904 | struct module *mod = NULL; |
85d33df3 | 905 | struct bpf_map *map; |
fcc2c1fb | 906 | struct btf *btf; |
5c04433d | 907 | int ret; |
85d33df3 | 908 | |
fcc2c1fb KFL |
909 | if (attr->map_flags & BPF_F_VTYPE_BTF_OBJ_FD) { |
910 | /* The map holds btf for its whole life time. */ | |
911 | btf = btf_get_by_fd(attr->value_type_btf_obj_fd); | |
912 | if (IS_ERR(btf)) | |
913 | return ERR_CAST(btf); | |
914 | if (!btf_is_module(btf)) { | |
915 | btf_put(btf); | |
916 | return ERR_PTR(-EINVAL); | |
917 | } | |
e3f87fdf KFL |
918 | |
919 | mod = btf_try_get_module(btf); | |
920 | /* mod holds a refcnt to btf. We don't need an extra refcnt | |
921 | * here. | |
922 | */ | |
923 | btf_put(btf); | |
924 | if (!mod) | |
925 | return ERR_PTR(-EINVAL); | |
fcc2c1fb KFL |
926 | } else { |
927 | btf = bpf_get_btf_vmlinux(); | |
928 | if (IS_ERR(btf)) | |
929 | return ERR_CAST(btf); | |
e6be8cd5 KFL |
930 | if (!btf) |
931 | return ERR_PTR(-ENOTSUPP); | |
fcc2c1fb KFL |
932 | } |
933 | ||
934 | st_ops_desc = bpf_struct_ops_find_value(btf, attr->btf_vmlinux_value_type_id); | |
935 | if (!st_ops_desc) { | |
936 | ret = -ENOTSUPP; | |
937 | goto errout; | |
938 | } | |
85d33df3 | 939 | |
4c5763ed | 940 | vt = st_ops_desc->value_type; |
fcc2c1fb KFL |
941 | if (attr->value_size != vt->size) { |
942 | ret = -EINVAL; | |
943 | goto errout; | |
944 | } | |
85d33df3 | 945 | |
4c5763ed | 946 | t = st_ops_desc->type; |
85d33df3 MKL |
947 | |
948 | st_map_size = sizeof(*st_map) + | |
949 | /* kvalue stores the | |
950 | * struct bpf_struct_ops_tcp_congestions_ops | |
951 | */ | |
952 | (vt->size - sizeof(struct bpf_struct_ops_value)); | |
85d33df3 MKL |
953 | |
954 | st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE); | |
fcc2c1fb KFL |
955 | if (!st_map) { |
956 | ret = -ENOMEM; | |
957 | goto errout; | |
958 | } | |
f043733f | 959 | |
4c5763ed | 960 | st_map->st_ops_desc = st_ops_desc; |
85d33df3 MKL |
961 | map = &st_map->map; |
962 | ||
963 | st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE); | |
e3f87fdf | 964 | st_map->links_cnt = btf_type_vlen(t); |
f7e0beaf | 965 | st_map->links = |
e3f87fdf | 966 | bpf_map_area_alloc(st_map->links_cnt * sizeof(struct bpf_links *), |
85d33df3 | 967 | NUMA_NO_NODE); |
5c04433d | 968 | if (!st_map->uvalue || !st_map->links) { |
fcc2c1fb KFL |
969 | ret = -ENOMEM; |
970 | goto errout_free; | |
85d33df3 | 971 | } |
fcc2c1fb | 972 | st_map->btf = btf; |
47f4f657 | 973 | |
85d33df3 | 974 | mutex_init(&st_map->lock); |
85d33df3 | 975 | bpf_map_init_from_attr(map, attr); |
85d33df3 MKL |
976 | |
977 | return map; | |
fcc2c1fb KFL |
978 | |
979 | errout_free: | |
980 | __bpf_struct_ops_map_free(map); | |
981 | errout: | |
e3f87fdf | 982 | module_put(mod); |
fcc2c1fb KFL |
983 | |
984 | return ERR_PTR(ret); | |
85d33df3 MKL |
985 | } |
986 | ||
f062226d YS |
987 | static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map) |
988 | { | |
989 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; | |
4c5763ed KFL |
990 | const struct bpf_struct_ops_desc *st_ops_desc = st_map->st_ops_desc; |
991 | const struct btf_type *vt = st_ops_desc->value_type; | |
f062226d YS |
992 | u64 usage; |
993 | ||
994 | usage = sizeof(*st_map) + | |
995 | vt->size - sizeof(struct bpf_struct_ops_value); | |
996 | usage += vt->size; | |
997 | usage += btf_type_vlen(vt) * sizeof(struct bpf_links *); | |
998 | usage += PAGE_SIZE; | |
999 | return usage; | |
1000 | } | |
1001 | ||
c317ab71 | 1002 | BTF_ID_LIST_SINGLE(bpf_struct_ops_map_btf_ids, struct, bpf_struct_ops_map) |
85d33df3 MKL |
1003 | const struct bpf_map_ops bpf_struct_ops_map_ops = { |
1004 | .map_alloc_check = bpf_struct_ops_map_alloc_check, | |
1005 | .map_alloc = bpf_struct_ops_map_alloc, | |
1006 | .map_free = bpf_struct_ops_map_free, | |
1007 | .map_get_next_key = bpf_struct_ops_map_get_next_key, | |
1008 | .map_lookup_elem = bpf_struct_ops_map_lookup_elem, | |
1009 | .map_delete_elem = bpf_struct_ops_map_delete_elem, | |
1010 | .map_update_elem = bpf_struct_ops_map_update_elem, | |
1011 | .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem, | |
f062226d | 1012 | .map_mem_usage = bpf_struct_ops_map_mem_usage, |
c317ab71 | 1013 | .map_btf_id = &bpf_struct_ops_map_btf_ids[0], |
85d33df3 MKL |
1014 | }; |
1015 | ||
1016 | /* "const void *" because some subsystem is | |
1017 | * passing a const (e.g. const struct tcp_congestion_ops *) | |
1018 | */ | |
1019 | bool bpf_struct_ops_get(const void *kdata) | |
1020 | { | |
1021 | struct bpf_struct_ops_value *kvalue; | |
b671c206 KFL |
1022 | struct bpf_struct_ops_map *st_map; |
1023 | struct bpf_map *map; | |
85d33df3 MKL |
1024 | |
1025 | kvalue = container_of(kdata, struct bpf_struct_ops_value, data); | |
b671c206 | 1026 | st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue); |
85d33df3 | 1027 | |
b671c206 KFL |
1028 | map = __bpf_map_inc_not_zero(&st_map->map, false); |
1029 | return !IS_ERR(map); | |
eb18b49e MKL |
1030 | } |
1031 | ||
85d33df3 MKL |
1032 | void bpf_struct_ops_put(const void *kdata) |
1033 | { | |
1034 | struct bpf_struct_ops_value *kvalue; | |
b671c206 | 1035 | struct bpf_struct_ops_map *st_map; |
85d33df3 MKL |
1036 | |
1037 | kvalue = container_of(kdata, struct bpf_struct_ops_value, data); | |
b671c206 KFL |
1038 | st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue); |
1039 | ||
1040 | bpf_map_put(&st_map->map); | |
85d33df3 | 1041 | } |
68b04864 | 1042 | |
e42ac141 MKL |
1043 | int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff) |
1044 | { | |
1045 | void *func_ptr = *(void **)(st_ops->cfi_stubs + moff); | |
1046 | ||
1047 | return func_ptr ? 0 : -ENOTSUPP; | |
1048 | } | |
1049 | ||
68b04864 KFL |
1050 | static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map) |
1051 | { | |
1052 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; | |
1053 | ||
1054 | return map->map_type == BPF_MAP_TYPE_STRUCT_OPS && | |
1055 | map->map_flags & BPF_F_LINK && | |
1056 | /* Pair with smp_store_release() during map_update */ | |
612d087d | 1057 | smp_load_acquire(&st_map->kvalue.common.state) == BPF_STRUCT_OPS_STATE_READY; |
68b04864 KFL |
1058 | } |
1059 | ||
1060 | static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link) | |
1061 | { | |
1062 | struct bpf_struct_ops_link *st_link; | |
1063 | struct bpf_struct_ops_map *st_map; | |
1064 | ||
1065 | st_link = container_of(link, struct bpf_struct_ops_link, link); | |
1066 | st_map = (struct bpf_struct_ops_map *) | |
1067 | rcu_dereference_protected(st_link->map, true); | |
1068 | if (st_map) { | |
73287fe2 | 1069 | st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link); |
68b04864 KFL |
1070 | bpf_map_put(&st_map->map); |
1071 | } | |
1072 | kfree(st_link); | |
1073 | } | |
1074 | ||
1075 | static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link, | |
1076 | struct seq_file *seq) | |
1077 | { | |
1078 | struct bpf_struct_ops_link *st_link; | |
1079 | struct bpf_map *map; | |
1080 | ||
1081 | st_link = container_of(link, struct bpf_struct_ops_link, link); | |
1082 | rcu_read_lock(); | |
1083 | map = rcu_dereference(st_link->map); | |
6fb2544e KFL |
1084 | if (map) |
1085 | seq_printf(seq, "map_id:\t%d\n", map->id); | |
68b04864 KFL |
1086 | rcu_read_unlock(); |
1087 | } | |
1088 | ||
1089 | static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link, | |
1090 | struct bpf_link_info *info) | |
1091 | { | |
1092 | struct bpf_struct_ops_link *st_link; | |
1093 | struct bpf_map *map; | |
1094 | ||
1095 | st_link = container_of(link, struct bpf_struct_ops_link, link); | |
1096 | rcu_read_lock(); | |
1097 | map = rcu_dereference(st_link->map); | |
6fb2544e KFL |
1098 | if (map) |
1099 | info->struct_ops.map_id = map->id; | |
68b04864 KFL |
1100 | rcu_read_unlock(); |
1101 | return 0; | |
1102 | } | |
1103 | ||
aef56f2e KFL |
1104 | static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map *new_map, |
1105 | struct bpf_map *expected_old_map) | |
1106 | { | |
1107 | struct bpf_struct_ops_map *st_map, *old_st_map; | |
1108 | struct bpf_map *old_map; | |
1109 | struct bpf_struct_ops_link *st_link; | |
5964d1e4 | 1110 | int err; |
aef56f2e KFL |
1111 | |
1112 | st_link = container_of(link, struct bpf_struct_ops_link, link); | |
1113 | st_map = container_of(new_map, struct bpf_struct_ops_map, map); | |
1114 | ||
1115 | if (!bpf_struct_ops_valid_to_reg(new_map)) | |
1116 | return -EINVAL; | |
1117 | ||
4c5763ed | 1118 | if (!st_map->st_ops_desc->st_ops->update) |
8ba651ed DV |
1119 | return -EOPNOTSUPP; |
1120 | ||
aef56f2e KFL |
1121 | mutex_lock(&update_mutex); |
1122 | ||
1123 | old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex)); | |
6fb2544e KFL |
1124 | if (!old_map) { |
1125 | err = -ENOLINK; | |
1126 | goto err_out; | |
1127 | } | |
aef56f2e KFL |
1128 | if (expected_old_map && old_map != expected_old_map) { |
1129 | err = -EPERM; | |
1130 | goto err_out; | |
1131 | } | |
1132 | ||
1133 | old_st_map = container_of(old_map, struct bpf_struct_ops_map, map); | |
1134 | /* The new and old struct_ops must be the same type. */ | |
4c5763ed | 1135 | if (st_map->st_ops_desc != old_st_map->st_ops_desc) { |
aef56f2e KFL |
1136 | err = -EINVAL; |
1137 | goto err_out; | |
1138 | } | |
1139 | ||
73287fe2 | 1140 | err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data, link); |
aef56f2e KFL |
1141 | if (err) |
1142 | goto err_out; | |
1143 | ||
1144 | bpf_map_inc(new_map); | |
1145 | rcu_assign_pointer(st_link->map, new_map); | |
1146 | bpf_map_put(old_map); | |
1147 | ||
1148 | err_out: | |
1149 | mutex_unlock(&update_mutex); | |
1150 | ||
1151 | return err; | |
1152 | } | |
1153 | ||
6fb2544e KFL |
1154 | static int bpf_struct_ops_map_link_detach(struct bpf_link *link) |
1155 | { | |
1156 | struct bpf_struct_ops_link *st_link = container_of(link, struct bpf_struct_ops_link, link); | |
1157 | struct bpf_struct_ops_map *st_map; | |
1158 | struct bpf_map *map; | |
1159 | ||
1160 | mutex_lock(&update_mutex); | |
1161 | ||
1162 | map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex)); | |
1163 | if (!map) { | |
1164 | mutex_unlock(&update_mutex); | |
1165 | return 0; | |
1166 | } | |
1167 | st_map = container_of(map, struct bpf_struct_ops_map, map); | |
1168 | ||
1169 | st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link); | |
1170 | ||
1171 | RCU_INIT_POINTER(st_link->map, NULL); | |
1172 | /* Pair with bpf_map_get() in bpf_struct_ops_link_create() or | |
1173 | * bpf_map_inc() in bpf_struct_ops_map_link_update(). | |
1174 | */ | |
1175 | bpf_map_put(&st_map->map); | |
1176 | ||
1177 | mutex_unlock(&update_mutex); | |
1178 | ||
1adddc97 KFL |
1179 | wake_up_interruptible_poll(&st_link->wait_hup, EPOLLHUP); |
1180 | ||
6fb2544e KFL |
1181 | return 0; |
1182 | } | |
1183 | ||
1adddc97 KFL |
1184 | static __poll_t bpf_struct_ops_map_link_poll(struct file *file, |
1185 | struct poll_table_struct *pts) | |
1186 | { | |
1187 | struct bpf_struct_ops_link *st_link = file->private_data; | |
1188 | ||
1189 | poll_wait(file, &st_link->wait_hup, pts); | |
1190 | ||
1191 | return rcu_access_pointer(st_link->map) ? 0 : EPOLLHUP; | |
1192 | } | |
1193 | ||
68b04864 KFL |
1194 | static const struct bpf_link_ops bpf_struct_ops_map_lops = { |
1195 | .dealloc = bpf_struct_ops_map_link_dealloc, | |
6fb2544e | 1196 | .detach = bpf_struct_ops_map_link_detach, |
68b04864 KFL |
1197 | .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo, |
1198 | .fill_link_info = bpf_struct_ops_map_link_fill_link_info, | |
aef56f2e | 1199 | .update_map = bpf_struct_ops_map_link_update, |
1adddc97 | 1200 | .poll = bpf_struct_ops_map_link_poll, |
68b04864 KFL |
1201 | }; |
1202 | ||
1203 | int bpf_struct_ops_link_create(union bpf_attr *attr) | |
1204 | { | |
1205 | struct bpf_struct_ops_link *link = NULL; | |
1206 | struct bpf_link_primer link_primer; | |
1207 | struct bpf_struct_ops_map *st_map; | |
1208 | struct bpf_map *map; | |
1209 | int err; | |
1210 | ||
1211 | map = bpf_map_get(attr->link_create.map_fd); | |
55fbae05 MKL |
1212 | if (IS_ERR(map)) |
1213 | return PTR_ERR(map); | |
68b04864 KFL |
1214 | |
1215 | st_map = (struct bpf_struct_ops_map *)map; | |
1216 | ||
1217 | if (!bpf_struct_ops_valid_to_reg(map)) { | |
1218 | err = -EINVAL; | |
1219 | goto err_out; | |
1220 | } | |
1221 | ||
1222 | link = kzalloc(sizeof(*link), GFP_USER); | |
1223 | if (!link) { | |
1224 | err = -ENOMEM; | |
1225 | goto err_out; | |
1226 | } | |
1227 | bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL); | |
1228 | ||
1229 | err = bpf_link_prime(&link->link, &link_primer); | |
1230 | if (err) | |
1231 | goto err_out; | |
1232 | ||
1adddc97 KFL |
1233 | init_waitqueue_head(&link->wait_hup); |
1234 | ||
6fb2544e KFL |
1235 | /* Hold the update_mutex such that the subsystem cannot |
1236 | * do link->ops->detach() before the link is fully initialized. | |
1237 | */ | |
1238 | mutex_lock(&update_mutex); | |
73287fe2 | 1239 | err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link); |
68b04864 | 1240 | if (err) { |
6fb2544e | 1241 | mutex_unlock(&update_mutex); |
68b04864 KFL |
1242 | bpf_link_cleanup(&link_primer); |
1243 | link = NULL; | |
1244 | goto err_out; | |
1245 | } | |
1246 | RCU_INIT_POINTER(link->map, map); | |
6fb2544e | 1247 | mutex_unlock(&update_mutex); |
68b04864 KFL |
1248 | |
1249 | return bpf_link_settle(&link_primer); | |
1250 | ||
1251 | err_out: | |
1252 | bpf_map_put(map); | |
1253 | kfree(link); | |
1254 | return err; | |
1255 | } | |
1338b933 KFL |
1256 | |
1257 | void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map) | |
1258 | { | |
1259 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; | |
1260 | ||
1261 | info->btf_vmlinux_id = btf_obj_id(st_map->btf); | |
1262 | } |