Commit | Line | Data |
---|---|---|
ae24345d YS |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2020 Facebook */ | |
3 | ||
4 | #include <linux/fs.h> | |
ac51d99b | 5 | #include <linux/anon_inodes.h> |
ae24345d YS |
6 | #include <linux/filter.h> |
7 | #include <linux/bpf.h> | |
b77fb25d | 8 | #include <linux/rcupdate_trace.h> |
ae24345d YS |
9 | |
10 | struct bpf_iter_target_info { | |
11 | struct list_head list; | |
15172a46 | 12 | const struct bpf_iter_reg *reg_info; |
15d83c4d | 13 | u32 btf_id; /* cached value */ |
ae24345d YS |
14 | }; |
15 | ||
de4e05ca YS |
16 | struct bpf_iter_link { |
17 | struct bpf_link link; | |
a5cbe05a | 18 | struct bpf_iter_aux_info aux; |
de4e05ca YS |
19 | struct bpf_iter_target_info *tinfo; |
20 | }; | |
21 | ||
ac51d99b YS |
22 | struct bpf_iter_priv_data { |
23 | struct bpf_iter_target_info *tinfo; | |
a5cbe05a | 24 | const struct bpf_iter_seq_info *seq_info; |
ac51d99b YS |
25 | struct bpf_prog *prog; |
26 | u64 session_id; | |
27 | u64 seq_num; | |
28 | bool done_stop; | |
29 | u8 target_private[] __aligned(8); | |
30 | }; | |
31 | ||
ae24345d YS |
32 | static struct list_head targets = LIST_HEAD_INIT(targets); |
33 | static DEFINE_MUTEX(targets_mutex); | |
34 | ||
2057c92b YS |
35 | /* protect bpf_iter_link changes */ |
36 | static DEFINE_MUTEX(link_mutex); | |
37 | ||
ac51d99b YS |
38 | /* incremented on every opened seq_file */ |
39 | static atomic64_t session_id; | |
40 | ||
a5cbe05a YS |
41 | static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, |
42 | const struct bpf_iter_seq_info *seq_info); | |
367ec3e4 | 43 | |
e5158d98 YS |
44 | static void bpf_iter_inc_seq_num(struct seq_file *seq) |
45 | { | |
46 | struct bpf_iter_priv_data *iter_priv; | |
47 | ||
48 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
49 | target_private); | |
50 | iter_priv->seq_num++; | |
51 | } | |
52 | ||
53 | static void bpf_iter_dec_seq_num(struct seq_file *seq) | |
54 | { | |
55 | struct bpf_iter_priv_data *iter_priv; | |
56 | ||
57 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
58 | target_private); | |
59 | iter_priv->seq_num--; | |
60 | } | |
61 | ||
62 | static void bpf_iter_done_stop(struct seq_file *seq) | |
63 | { | |
64 | struct bpf_iter_priv_data *iter_priv; | |
65 | ||
66 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
67 | target_private); | |
68 | iter_priv->done_stop = true; | |
69 | } | |
70 | ||
d247049f HT |
71 | static inline bool bpf_iter_target_support_resched(const struct bpf_iter_target_info *tinfo) |
72 | { | |
73 | return tinfo->reg_info->feature & BPF_ITER_RESCHED; | |
74 | } | |
75 | ||
cf83b2d2 YS |
76 | static bool bpf_iter_support_resched(struct seq_file *seq) |
77 | { | |
78 | struct bpf_iter_priv_data *iter_priv; | |
79 | ||
80 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
81 | target_private); | |
d247049f | 82 | return bpf_iter_target_support_resched(iter_priv->tinfo); |
cf83b2d2 YS |
83 | } |
84 | ||
e679654a YS |
85 | /* maximum visited objects before bailing out */ |
86 | #define MAX_ITER_OBJECTS 1000000 | |
87 | ||
fd4f12bc | 88 | /* bpf_seq_read, a customized and simpler version for bpf iterator. |
fd4f12bc YS |
89 | * The following are differences from seq_read(): |
90 | * . fixed buffer size (PAGE_SIZE) | |
868941b1 | 91 | * . assuming NULL ->llseek() |
fd4f12bc YS |
92 | * . stop() may call bpf program, handling potential overflow there |
93 | */ | |
94 | static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, | |
95 | loff_t *ppos) | |
96 | { | |
97 | struct seq_file *seq = file->private_data; | |
98 | size_t n, offs, copied = 0; | |
e679654a | 99 | int err = 0, num_objs = 0; |
cf83b2d2 | 100 | bool can_resched; |
fd4f12bc YS |
101 | void *p; |
102 | ||
103 | mutex_lock(&seq->lock); | |
104 | ||
105 | if (!seq->buf) { | |
af653209 AM |
106 | seq->size = PAGE_SIZE << 3; |
107 | seq->buf = kvmalloc(seq->size, GFP_KERNEL); | |
fd4f12bc YS |
108 | if (!seq->buf) { |
109 | err = -ENOMEM; | |
110 | goto done; | |
111 | } | |
112 | } | |
113 | ||
114 | if (seq->count) { | |
115 | n = min(seq->count, size); | |
116 | err = copy_to_user(buf, seq->buf + seq->from, n); | |
117 | if (err) { | |
118 | err = -EFAULT; | |
119 | goto done; | |
120 | } | |
121 | seq->count -= n; | |
122 | seq->from += n; | |
123 | copied = n; | |
124 | goto done; | |
125 | } | |
126 | ||
127 | seq->from = 0; | |
128 | p = seq->op->start(seq, &seq->index); | |
129 | if (!p) | |
130 | goto stop; | |
131 | if (IS_ERR(p)) { | |
132 | err = PTR_ERR(p); | |
133 | seq->op->stop(seq, p); | |
134 | seq->count = 0; | |
135 | goto done; | |
136 | } | |
137 | ||
138 | err = seq->op->show(seq, p); | |
139 | if (err > 0) { | |
e5158d98 YS |
140 | /* object is skipped, decrease seq_num, so next |
141 | * valid object can reuse the same seq_num. | |
142 | */ | |
143 | bpf_iter_dec_seq_num(seq); | |
fd4f12bc YS |
144 | seq->count = 0; |
145 | } else if (err < 0 || seq_has_overflowed(seq)) { | |
146 | if (!err) | |
147 | err = -E2BIG; | |
148 | seq->op->stop(seq, p); | |
149 | seq->count = 0; | |
150 | goto done; | |
151 | } | |
152 | ||
cf83b2d2 | 153 | can_resched = bpf_iter_support_resched(seq); |
fd4f12bc YS |
154 | while (1) { |
155 | loff_t pos = seq->index; | |
156 | ||
e679654a | 157 | num_objs++; |
fd4f12bc YS |
158 | offs = seq->count; |
159 | p = seq->op->next(seq, p, &seq->index); | |
160 | if (pos == seq->index) { | |
161 | pr_info_ratelimited("buggy seq_file .next function %ps " | |
162 | "did not updated position index\n", | |
163 | seq->op->next); | |
164 | seq->index++; | |
165 | } | |
166 | ||
167 | if (IS_ERR_OR_NULL(p)) | |
168 | break; | |
169 | ||
e5158d98 YS |
170 | /* got a valid next object, increase seq_num */ |
171 | bpf_iter_inc_seq_num(seq); | |
172 | ||
fd4f12bc YS |
173 | if (seq->count >= size) |
174 | break; | |
175 | ||
e679654a YS |
176 | if (num_objs >= MAX_ITER_OBJECTS) { |
177 | if (offs == 0) { | |
178 | err = -EAGAIN; | |
179 | seq->op->stop(seq, p); | |
180 | goto done; | |
181 | } | |
182 | break; | |
183 | } | |
184 | ||
fd4f12bc YS |
185 | err = seq->op->show(seq, p); |
186 | if (err > 0) { | |
e5158d98 | 187 | bpf_iter_dec_seq_num(seq); |
fd4f12bc YS |
188 | seq->count = offs; |
189 | } else if (err < 0 || seq_has_overflowed(seq)) { | |
190 | seq->count = offs; | |
191 | if (offs == 0) { | |
192 | if (!err) | |
193 | err = -E2BIG; | |
194 | seq->op->stop(seq, p); | |
195 | goto done; | |
196 | } | |
197 | break; | |
198 | } | |
cf83b2d2 YS |
199 | |
200 | if (can_resched) | |
201 | cond_resched(); | |
fd4f12bc YS |
202 | } |
203 | stop: | |
204 | offs = seq->count; | |
be3bb83d HL |
205 | if (IS_ERR(p)) { |
206 | seq->op->stop(seq, NULL); | |
207 | err = PTR_ERR(p); | |
208 | goto done; | |
209 | } | |
fd4f12bc YS |
210 | /* bpf program called if !p */ |
211 | seq->op->stop(seq, p); | |
e5158d98 YS |
212 | if (!p) { |
213 | if (!seq_has_overflowed(seq)) { | |
214 | bpf_iter_done_stop(seq); | |
215 | } else { | |
216 | seq->count = offs; | |
217 | if (offs == 0) { | |
218 | err = -E2BIG; | |
219 | goto done; | |
220 | } | |
fd4f12bc YS |
221 | } |
222 | } | |
223 | ||
224 | n = min(seq->count, size); | |
225 | err = copy_to_user(buf, seq->buf, n); | |
226 | if (err) { | |
227 | err = -EFAULT; | |
228 | goto done; | |
229 | } | |
230 | copied = n; | |
231 | seq->count -= n; | |
232 | seq->from = n; | |
233 | done: | |
234 | if (!copied) | |
235 | copied = err; | |
236 | else | |
237 | *ppos += copied; | |
238 | mutex_unlock(&seq->lock); | |
239 | return copied; | |
240 | } | |
241 | ||
a5cbe05a YS |
242 | static const struct bpf_iter_seq_info * |
243 | __get_seq_info(struct bpf_iter_link *link) | |
244 | { | |
245 | const struct bpf_iter_seq_info *seq_info; | |
246 | ||
247 | if (link->aux.map) { | |
248 | seq_info = link->aux.map->ops->iter_seq_info; | |
249 | if (seq_info) | |
250 | return seq_info; | |
251 | } | |
252 | ||
253 | return link->tinfo->reg_info->seq_info; | |
254 | } | |
255 | ||
367ec3e4 YS |
256 | static int iter_open(struct inode *inode, struct file *file) |
257 | { | |
258 | struct bpf_iter_link *link = inode->i_private; | |
259 | ||
a5cbe05a | 260 | return prepare_seq_file(file, link, __get_seq_info(link)); |
367ec3e4 YS |
261 | } |
262 | ||
ac51d99b YS |
263 | static int iter_release(struct inode *inode, struct file *file) |
264 | { | |
265 | struct bpf_iter_priv_data *iter_priv; | |
266 | struct seq_file *seq; | |
267 | ||
268 | seq = file->private_data; | |
269 | if (!seq) | |
270 | return 0; | |
271 | ||
272 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
273 | target_private); | |
274 | ||
a5cbe05a YS |
275 | if (iter_priv->seq_info->fini_seq_private) |
276 | iter_priv->seq_info->fini_seq_private(seq->private); | |
ac51d99b YS |
277 | |
278 | bpf_prog_put(iter_priv->prog); | |
279 | seq->private = iter_priv; | |
280 | ||
281 | return seq_release_private(inode, file); | |
282 | } | |
283 | ||
367ec3e4 YS |
284 | const struct file_operations bpf_iter_fops = { |
285 | .open = iter_open, | |
ac51d99b YS |
286 | .llseek = no_llseek, |
287 | .read = bpf_seq_read, | |
288 | .release = iter_release, | |
289 | }; | |
290 | ||
15172a46 YS |
291 | /* The argument reg_info will be cached in bpf_iter_target_info. |
292 | * The common practice is to declare target reg_info as | |
293 | * a const static variable and passed as an argument to | |
294 | * bpf_iter_reg_target(). | |
295 | */ | |
296 | int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) | |
ae24345d YS |
297 | { |
298 | struct bpf_iter_target_info *tinfo; | |
299 | ||
17d8beda | 300 | tinfo = kzalloc(sizeof(*tinfo), GFP_KERNEL); |
ae24345d YS |
301 | if (!tinfo) |
302 | return -ENOMEM; | |
303 | ||
15172a46 | 304 | tinfo->reg_info = reg_info; |
ae24345d YS |
305 | INIT_LIST_HEAD(&tinfo->list); |
306 | ||
307 | mutex_lock(&targets_mutex); | |
308 | list_add(&tinfo->list, &targets); | |
309 | mutex_unlock(&targets_mutex); | |
310 | ||
311 | return 0; | |
312 | } | |
313 | ||
ab2ee4fc | 314 | void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info) |
ae24345d YS |
315 | { |
316 | struct bpf_iter_target_info *tinfo; | |
317 | bool found = false; | |
318 | ||
319 | mutex_lock(&targets_mutex); | |
320 | list_for_each_entry(tinfo, &targets, list) { | |
ab2ee4fc | 321 | if (reg_info == tinfo->reg_info) { |
ae24345d YS |
322 | list_del(&tinfo->list); |
323 | kfree(tinfo); | |
324 | found = true; | |
325 | break; | |
326 | } | |
327 | } | |
328 | mutex_unlock(&targets_mutex); | |
329 | ||
330 | WARN_ON(found == false); | |
331 | } | |
15d83c4d YS |
332 | |
333 | static void cache_btf_id(struct bpf_iter_target_info *tinfo, | |
334 | struct bpf_prog *prog) | |
335 | { | |
336 | tinfo->btf_id = prog->aux->attach_btf_id; | |
337 | } | |
338 | ||
339 | bool bpf_iter_prog_supported(struct bpf_prog *prog) | |
340 | { | |
341 | const char *attach_fname = prog->aux->attach_func_name; | |
185da3da | 342 | struct bpf_iter_target_info *tinfo = NULL, *iter; |
15d83c4d YS |
343 | u32 prog_btf_id = prog->aux->attach_btf_id; |
344 | const char *prefix = BPF_ITER_FUNC_PREFIX; | |
15d83c4d | 345 | int prefix_len = strlen(prefix); |
15d83c4d YS |
346 | |
347 | if (strncmp(attach_fname, prefix, prefix_len)) | |
348 | return false; | |
349 | ||
350 | mutex_lock(&targets_mutex); | |
185da3da JK |
351 | list_for_each_entry(iter, &targets, list) { |
352 | if (iter->btf_id && iter->btf_id == prog_btf_id) { | |
353 | tinfo = iter; | |
15d83c4d YS |
354 | break; |
355 | } | |
185da3da JK |
356 | if (!strcmp(attach_fname + prefix_len, iter->reg_info->target)) { |
357 | cache_btf_id(iter, prog); | |
358 | tinfo = iter; | |
15d83c4d YS |
359 | break; |
360 | } | |
361 | } | |
362 | mutex_unlock(&targets_mutex); | |
363 | ||
185da3da | 364 | if (tinfo) { |
3c32cc1b YS |
365 | prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size; |
366 | prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info; | |
367 | } | |
368 | ||
185da3da | 369 | return tinfo != NULL; |
15d83c4d | 370 | } |
de4e05ca | 371 | |
3cee6fb8 MKL |
372 | const struct bpf_func_proto * |
373 | bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |
374 | { | |
375 | const struct bpf_iter_target_info *tinfo; | |
376 | const struct bpf_func_proto *fn = NULL; | |
377 | ||
378 | mutex_lock(&targets_mutex); | |
379 | list_for_each_entry(tinfo, &targets, list) { | |
380 | if (tinfo->btf_id == prog->aux->attach_btf_id) { | |
381 | const struct bpf_iter_reg *reg_info; | |
382 | ||
383 | reg_info = tinfo->reg_info; | |
384 | if (reg_info->get_func_proto) | |
385 | fn = reg_info->get_func_proto(func_id, prog); | |
386 | break; | |
387 | } | |
388 | } | |
389 | mutex_unlock(&targets_mutex); | |
390 | ||
391 | return fn; | |
392 | } | |
393 | ||
de4e05ca YS |
394 | static void bpf_iter_link_release(struct bpf_link *link) |
395 | { | |
a5cbe05a YS |
396 | struct bpf_iter_link *iter_link = |
397 | container_of(link, struct bpf_iter_link, link); | |
398 | ||
5e7b3020 YS |
399 | if (iter_link->tinfo->reg_info->detach_target) |
400 | iter_link->tinfo->reg_info->detach_target(&iter_link->aux); | |
de4e05ca YS |
401 | } |
402 | ||
403 | static void bpf_iter_link_dealloc(struct bpf_link *link) | |
404 | { | |
405 | struct bpf_iter_link *iter_link = | |
406 | container_of(link, struct bpf_iter_link, link); | |
407 | ||
408 | kfree(iter_link); | |
409 | } | |
410 | ||
2057c92b YS |
411 | static int bpf_iter_link_replace(struct bpf_link *link, |
412 | struct bpf_prog *new_prog, | |
413 | struct bpf_prog *old_prog) | |
414 | { | |
415 | int ret = 0; | |
416 | ||
417 | mutex_lock(&link_mutex); | |
418 | if (old_prog && link->prog != old_prog) { | |
419 | ret = -EPERM; | |
420 | goto out_unlock; | |
421 | } | |
422 | ||
423 | if (link->prog->type != new_prog->type || | |
424 | link->prog->expected_attach_type != new_prog->expected_attach_type || | |
425 | link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { | |
426 | ret = -EINVAL; | |
427 | goto out_unlock; | |
428 | } | |
429 | ||
430 | old_prog = xchg(&link->prog, new_prog); | |
431 | bpf_prog_put(old_prog); | |
432 | ||
433 | out_unlock: | |
434 | mutex_unlock(&link_mutex); | |
435 | return ret; | |
436 | } | |
437 | ||
6b0a249a YS |
438 | static void bpf_iter_link_show_fdinfo(const struct bpf_link *link, |
439 | struct seq_file *seq) | |
440 | { | |
441 | struct bpf_iter_link *iter_link = | |
442 | container_of(link, struct bpf_iter_link, link); | |
443 | bpf_iter_show_fdinfo_t show_fdinfo; | |
444 | ||
445 | seq_printf(seq, | |
446 | "target_name:\t%s\n", | |
447 | iter_link->tinfo->reg_info->target); | |
448 | ||
449 | show_fdinfo = iter_link->tinfo->reg_info->show_fdinfo; | |
450 | if (show_fdinfo) | |
451 | show_fdinfo(&iter_link->aux, seq); | |
452 | } | |
453 | ||
454 | static int bpf_iter_link_fill_link_info(const struct bpf_link *link, | |
455 | struct bpf_link_info *info) | |
456 | { | |
457 | struct bpf_iter_link *iter_link = | |
458 | container_of(link, struct bpf_iter_link, link); | |
459 | char __user *ubuf = u64_to_user_ptr(info->iter.target_name); | |
460 | bpf_iter_fill_link_info_t fill_link_info; | |
461 | u32 ulen = info->iter.target_name_len; | |
462 | const char *target_name; | |
463 | u32 target_len; | |
464 | ||
465 | if (!ulen ^ !ubuf) | |
466 | return -EINVAL; | |
467 | ||
468 | target_name = iter_link->tinfo->reg_info->target; | |
469 | target_len = strlen(target_name); | |
470 | info->iter.target_name_len = target_len + 1; | |
471 | ||
472 | if (ubuf) { | |
473 | if (ulen >= target_len + 1) { | |
474 | if (copy_to_user(ubuf, target_name, target_len + 1)) | |
475 | return -EFAULT; | |
476 | } else { | |
477 | char zero = '\0'; | |
478 | ||
479 | if (copy_to_user(ubuf, target_name, ulen - 1)) | |
480 | return -EFAULT; | |
481 | if (put_user(zero, ubuf + ulen - 1)) | |
482 | return -EFAULT; | |
483 | return -ENOSPC; | |
484 | } | |
485 | } | |
486 | ||
487 | fill_link_info = iter_link->tinfo->reg_info->fill_link_info; | |
488 | if (fill_link_info) | |
489 | return fill_link_info(&iter_link->aux, info); | |
490 | ||
491 | return 0; | |
492 | } | |
493 | ||
de4e05ca YS |
494 | static const struct bpf_link_ops bpf_iter_link_lops = { |
495 | .release = bpf_iter_link_release, | |
496 | .dealloc = bpf_iter_link_dealloc, | |
2057c92b | 497 | .update_prog = bpf_iter_link_replace, |
6b0a249a YS |
498 | .show_fdinfo = bpf_iter_link_show_fdinfo, |
499 | .fill_link_info = bpf_iter_link_fill_link_info, | |
de4e05ca YS |
500 | }; |
501 | ||
367ec3e4 YS |
502 | bool bpf_link_is_iter(struct bpf_link *link) |
503 | { | |
504 | return link->ops == &bpf_iter_link_lops; | |
505 | } | |
506 | ||
af2ac3e1 AS |
507 | int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, |
508 | struct bpf_prog *prog) | |
de4e05ca | 509 | { |
185da3da | 510 | struct bpf_iter_target_info *tinfo = NULL, *iter; |
de4e05ca | 511 | struct bpf_link_primer link_primer; |
5e7b3020 | 512 | union bpf_iter_link_info linfo; |
de4e05ca | 513 | struct bpf_iter_link *link; |
5e7b3020 | 514 | u32 prog_btf_id, linfo_len; |
af2ac3e1 | 515 | bpfptr_t ulinfo; |
de4e05ca YS |
516 | int err; |
517 | ||
5e7b3020 YS |
518 | if (attr->link_create.target_fd || attr->link_create.flags) |
519 | return -EINVAL; | |
520 | ||
521 | memset(&linfo, 0, sizeof(union bpf_iter_link_info)); | |
522 | ||
af2ac3e1 | 523 | ulinfo = make_bpfptr(attr->link_create.iter_info, uattr.is_kernel); |
5e7b3020 | 524 | linfo_len = attr->link_create.iter_info_len; |
af2ac3e1 | 525 | if (bpfptr_is_null(ulinfo) ^ !linfo_len) |
5e7b3020 YS |
526 | return -EINVAL; |
527 | ||
af2ac3e1 | 528 | if (!bpfptr_is_null(ulinfo)) { |
5e7b3020 YS |
529 | err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), |
530 | linfo_len); | |
531 | if (err) | |
532 | return err; | |
533 | linfo_len = min_t(u32, linfo_len, sizeof(linfo)); | |
af2ac3e1 | 534 | if (copy_from_bpfptr(&linfo, ulinfo, linfo_len)) |
5e7b3020 YS |
535 | return -EFAULT; |
536 | } | |
537 | ||
de4e05ca YS |
538 | prog_btf_id = prog->aux->attach_btf_id; |
539 | mutex_lock(&targets_mutex); | |
185da3da JK |
540 | list_for_each_entry(iter, &targets, list) { |
541 | if (iter->btf_id == prog_btf_id) { | |
542 | tinfo = iter; | |
de4e05ca YS |
543 | break; |
544 | } | |
545 | } | |
546 | mutex_unlock(&targets_mutex); | |
185da3da | 547 | if (!tinfo) |
de4e05ca YS |
548 | return -ENOENT; |
549 | ||
d247049f HT |
550 | /* Only allow sleepable program for resched-able iterator */ |
551 | if (prog->aux->sleepable && !bpf_iter_target_support_resched(tinfo)) | |
552 | return -EINVAL; | |
553 | ||
de4e05ca YS |
554 | link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); |
555 | if (!link) | |
556 | return -ENOMEM; | |
557 | ||
558 | bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); | |
559 | link->tinfo = tinfo; | |
560 | ||
aa1b02e6 | 561 | err = bpf_link_prime(&link->link, &link_primer); |
de4e05ca YS |
562 | if (err) { |
563 | kfree(link); | |
564 | return err; | |
565 | } | |
566 | ||
5e7b3020 YS |
567 | if (tinfo->reg_info->attach_target) { |
568 | err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); | |
a5cbe05a | 569 | if (err) { |
5e7b3020 YS |
570 | bpf_link_cleanup(&link_primer); |
571 | return err; | |
a5cbe05a | 572 | } |
a5cbe05a YS |
573 | } |
574 | ||
de4e05ca YS |
575 | return bpf_link_settle(&link_primer); |
576 | } | |
ac51d99b YS |
577 | |
578 | static void init_seq_meta(struct bpf_iter_priv_data *priv_data, | |
579 | struct bpf_iter_target_info *tinfo, | |
a5cbe05a | 580 | const struct bpf_iter_seq_info *seq_info, |
ac51d99b YS |
581 | struct bpf_prog *prog) |
582 | { | |
583 | priv_data->tinfo = tinfo; | |
a5cbe05a | 584 | priv_data->seq_info = seq_info; |
ac51d99b YS |
585 | priv_data->prog = prog; |
586 | priv_data->session_id = atomic64_inc_return(&session_id); | |
587 | priv_data->seq_num = 0; | |
588 | priv_data->done_stop = false; | |
589 | } | |
590 | ||
a5cbe05a YS |
591 | static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, |
592 | const struct bpf_iter_seq_info *seq_info) | |
ac51d99b YS |
593 | { |
594 | struct bpf_iter_priv_data *priv_data; | |
595 | struct bpf_iter_target_info *tinfo; | |
596 | struct bpf_prog *prog; | |
597 | u32 total_priv_dsize; | |
598 | struct seq_file *seq; | |
599 | int err = 0; | |
600 | ||
601 | mutex_lock(&link_mutex); | |
602 | prog = link->link.prog; | |
603 | bpf_prog_inc(prog); | |
604 | mutex_unlock(&link_mutex); | |
605 | ||
606 | tinfo = link->tinfo; | |
607 | total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + | |
a5cbe05a YS |
608 | seq_info->seq_priv_size; |
609 | priv_data = __seq_open_private(file, seq_info->seq_ops, | |
15172a46 | 610 | total_priv_dsize); |
ac51d99b YS |
611 | if (!priv_data) { |
612 | err = -ENOMEM; | |
613 | goto release_prog; | |
614 | } | |
615 | ||
a5cbe05a YS |
616 | if (seq_info->init_seq_private) { |
617 | err = seq_info->init_seq_private(priv_data->target_private, &link->aux); | |
ac51d99b YS |
618 | if (err) |
619 | goto release_seq_file; | |
620 | } | |
621 | ||
a5cbe05a | 622 | init_seq_meta(priv_data, tinfo, seq_info, prog); |
ac51d99b YS |
623 | seq = file->private_data; |
624 | seq->private = priv_data->target_private; | |
625 | ||
626 | return 0; | |
627 | ||
628 | release_seq_file: | |
629 | seq_release_private(file->f_inode, file); | |
630 | file->private_data = NULL; | |
631 | release_prog: | |
632 | bpf_prog_put(prog); | |
633 | return err; | |
634 | } | |
635 | ||
636 | int bpf_iter_new_fd(struct bpf_link *link) | |
637 | { | |
a5cbe05a | 638 | struct bpf_iter_link *iter_link; |
ac51d99b YS |
639 | struct file *file; |
640 | unsigned int flags; | |
641 | int err, fd; | |
642 | ||
643 | if (link->ops != &bpf_iter_link_lops) | |
644 | return -EINVAL; | |
645 | ||
646 | flags = O_RDONLY | O_CLOEXEC; | |
647 | fd = get_unused_fd_flags(flags); | |
648 | if (fd < 0) | |
649 | return fd; | |
650 | ||
651 | file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); | |
652 | if (IS_ERR(file)) { | |
653 | err = PTR_ERR(file); | |
654 | goto free_fd; | |
655 | } | |
656 | ||
a5cbe05a YS |
657 | iter_link = container_of(link, struct bpf_iter_link, link); |
658 | err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link)); | |
ac51d99b YS |
659 | if (err) |
660 | goto free_file; | |
661 | ||
662 | fd_install(fd, file); | |
663 | return fd; | |
664 | ||
665 | free_file: | |
666 | fput(file); | |
667 | free_fd: | |
668 | put_unused_fd(fd); | |
669 | return err; | |
670 | } | |
e5158d98 YS |
671 | |
672 | struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) | |
673 | { | |
674 | struct bpf_iter_priv_data *iter_priv; | |
675 | struct seq_file *seq; | |
676 | void *seq_priv; | |
677 | ||
678 | seq = meta->seq; | |
679 | if (seq->file->f_op != &bpf_iter_fops) | |
680 | return NULL; | |
681 | ||
682 | seq_priv = seq->private; | |
683 | iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, | |
684 | target_private); | |
685 | ||
686 | if (in_stop && iter_priv->done_stop) | |
687 | return NULL; | |
688 | ||
689 | meta->session_id = iter_priv->session_id; | |
690 | meta->seq_num = iter_priv->seq_num; | |
691 | ||
692 | return iter_priv->prog; | |
693 | } | |
694 | ||
695 | int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) | |
696 | { | |
2b5a2ecb | 697 | struct bpf_run_ctx run_ctx, *old_run_ctx; |
e5158d98 YS |
698 | int ret; |
699 | ||
b77fb25d KY |
700 | if (prog->aux->sleepable) { |
701 | rcu_read_lock_trace(); | |
702 | migrate_disable(); | |
703 | might_fault(); | |
2b5a2ecb | 704 | old_run_ctx = bpf_set_run_ctx(&run_ctx); |
b77fb25d | 705 | ret = bpf_prog_run(prog, ctx); |
2b5a2ecb | 706 | bpf_reset_run_ctx(old_run_ctx); |
b77fb25d KY |
707 | migrate_enable(); |
708 | rcu_read_unlock_trace(); | |
709 | } else { | |
710 | rcu_read_lock(); | |
711 | migrate_disable(); | |
2b5a2ecb | 712 | old_run_ctx = bpf_set_run_ctx(&run_ctx); |
b77fb25d | 713 | ret = bpf_prog_run(prog, ctx); |
2b5a2ecb | 714 | bpf_reset_run_ctx(old_run_ctx); |
b77fb25d KY |
715 | migrate_enable(); |
716 | rcu_read_unlock(); | |
717 | } | |
e5158d98 | 718 | |
2e3ed68b YS |
719 | /* bpf program can only return 0 or 1: |
720 | * 0 : okay | |
721 | * 1 : retry the same object | |
722 | * The bpf_iter_run_prog() return value | |
723 | * will be seq_ops->show() return value. | |
724 | */ | |
e5158d98 YS |
725 | return ret == 0 ? 0 : -EAGAIN; |
726 | } | |
69c087ba YS |
727 | |
728 | BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn, | |
729 | void *, callback_ctx, u64, flags) | |
730 | { | |
731 | return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags); | |
732 | } | |
733 | ||
734 | const struct bpf_func_proto bpf_for_each_map_elem_proto = { | |
735 | .func = bpf_for_each_map_elem, | |
736 | .gpl_only = false, | |
737 | .ret_type = RET_INTEGER, | |
738 | .arg1_type = ARG_CONST_MAP_PTR, | |
739 | .arg2_type = ARG_PTR_TO_FUNC, | |
740 | .arg3_type = ARG_PTR_TO_STACK_OR_NULL, | |
741 | .arg4_type = ARG_ANYTHING, | |
742 | }; | |
e6f2dd0f | 743 | |
e6f2dd0f JK |
744 | BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, |
745 | u64, flags) | |
746 | { | |
747 | bpf_callback_t callback = (bpf_callback_t)callback_fn; | |
748 | u64 ret; | |
749 | u32 i; | |
750 | ||
1ade2371 EZ |
751 | /* Note: these safety checks are also verified when bpf_loop |
752 | * is inlined, be careful to modify this code in sync. See | |
753 | * function verifier.c:inline_bpf_loop. | |
754 | */ | |
e6f2dd0f JK |
755 | if (flags) |
756 | return -EINVAL; | |
1ade2371 | 757 | if (nr_loops > BPF_MAX_LOOPS) |
e6f2dd0f JK |
758 | return -E2BIG; |
759 | ||
760 | for (i = 0; i < nr_loops; i++) { | |
761 | ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0); | |
762 | /* return value: 0 - continue, 1 - stop and return */ | |
763 | if (ret) | |
764 | return i + 1; | |
765 | } | |
766 | ||
767 | return i; | |
768 | } | |
769 | ||
770 | const struct bpf_func_proto bpf_loop_proto = { | |
771 | .func = bpf_loop, | |
772 | .gpl_only = false, | |
773 | .ret_type = RET_INTEGER, | |
774 | .arg1_type = ARG_ANYTHING, | |
775 | .arg2_type = ARG_PTR_TO_FUNC, | |
776 | .arg3_type = ARG_PTR_TO_STACK_OR_NULL, | |
777 | .arg4_type = ARG_ANYTHING, | |
778 | }; | |
6018e1f4 AN |
779 | |
780 | struct bpf_iter_num_kern { | |
781 | int cur; /* current value, inclusive */ | |
782 | int end; /* final value, exclusive */ | |
783 | } __aligned(8); | |
784 | ||
785 | __diag_push(); | |
786 | __diag_ignore_all("-Wmissing-prototypes", | |
787 | "Global functions as their definitions will be in vmlinux BTF"); | |
788 | ||
789 | __bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) | |
790 | { | |
791 | struct bpf_iter_num_kern *s = (void *)it; | |
792 | ||
793 | BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num)); | |
794 | BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num)); | |
795 | ||
796 | BTF_TYPE_EMIT(struct btf_iter_num); | |
797 | ||
798 | /* start == end is legit, it's an empty range and we'll just get NULL | |
799 | * on first (and any subsequent) bpf_iter_num_next() call | |
800 | */ | |
801 | if (start > end) { | |
802 | s->cur = s->end = 0; | |
803 | return -EINVAL; | |
804 | } | |
805 | ||
806 | /* avoid overflows, e.g., if start == INT_MIN and end == INT_MAX */ | |
807 | if ((s64)end - (s64)start > BPF_MAX_LOOPS) { | |
808 | s->cur = s->end = 0; | |
809 | return -E2BIG; | |
810 | } | |
811 | ||
812 | /* user will call bpf_iter_num_next() first, | |
813 | * which will set s->cur to exactly start value; | |
814 | * underflow shouldn't matter | |
815 | */ | |
816 | s->cur = start - 1; | |
817 | s->end = end; | |
818 | ||
819 | return 0; | |
820 | } | |
821 | ||
822 | __bpf_kfunc int *bpf_iter_num_next(struct bpf_iter_num* it) | |
823 | { | |
824 | struct bpf_iter_num_kern *s = (void *)it; | |
825 | ||
826 | /* check failed initialization or if we are done (same behavior); | |
827 | * need to be careful about overflow, so convert to s64 for checks, | |
828 | * e.g., if s->cur == s->end == INT_MAX, we can't just do | |
829 | * s->cur + 1 >= s->end | |
830 | */ | |
831 | if ((s64)(s->cur + 1) >= s->end) { | |
832 | s->cur = s->end = 0; | |
833 | return NULL; | |
834 | } | |
835 | ||
836 | s->cur++; | |
837 | ||
838 | return &s->cur; | |
839 | } | |
840 | ||
841 | __bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it) | |
842 | { | |
843 | struct bpf_iter_num_kern *s = (void *)it; | |
844 | ||
845 | s->cur = s->end = 0; | |
846 | } | |
847 | ||
848 | __diag_pop(); |