Commit | Line | Data |
---|---|---|
ae24345d YS |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2020 Facebook */ | |
3 | ||
4 | #include <linux/fs.h> | |
ac51d99b | 5 | #include <linux/anon_inodes.h> |
ae24345d YS |
6 | #include <linux/filter.h> |
7 | #include <linux/bpf.h> | |
8 | ||
9 | struct bpf_iter_target_info { | |
10 | struct list_head list; | |
11 | const char *target; | |
12 | const struct seq_operations *seq_ops; | |
13 | bpf_iter_init_seq_priv_t init_seq_private; | |
14 | bpf_iter_fini_seq_priv_t fini_seq_private; | |
15 | u32 seq_priv_size; | |
15d83c4d | 16 | u32 btf_id; /* cached value */ |
ae24345d YS |
17 | }; |
18 | ||
de4e05ca YS |
19 | struct bpf_iter_link { |
20 | struct bpf_link link; | |
21 | struct bpf_iter_target_info *tinfo; | |
22 | }; | |
23 | ||
ac51d99b YS |
24 | struct bpf_iter_priv_data { |
25 | struct bpf_iter_target_info *tinfo; | |
26 | struct bpf_prog *prog; | |
27 | u64 session_id; | |
28 | u64 seq_num; | |
29 | bool done_stop; | |
30 | u8 target_private[] __aligned(8); | |
31 | }; | |
32 | ||
ae24345d YS |
33 | static struct list_head targets = LIST_HEAD_INIT(targets); |
34 | static DEFINE_MUTEX(targets_mutex); | |
35 | ||
2057c92b YS |
36 | /* protect bpf_iter_link changes */ |
37 | static DEFINE_MUTEX(link_mutex); | |
38 | ||
ac51d99b YS |
39 | /* incremented on every opened seq_file */ |
40 | static atomic64_t session_id; | |
41 | ||
367ec3e4 YS |
42 | static int prepare_seq_file(struct file *file, struct bpf_iter_link *link); |
43 | ||
fd4f12bc YS |
44 | /* bpf_seq_read, a customized and simpler version for bpf iterator. |
45 | * no_llseek is assumed for this file. | |
46 | * The following are differences from seq_read(): | |
47 | * . fixed buffer size (PAGE_SIZE) | |
48 | * . assuming no_llseek | |
49 | * . stop() may call bpf program, handling potential overflow there | |
50 | */ | |
51 | static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, | |
52 | loff_t *ppos) | |
53 | { | |
54 | struct seq_file *seq = file->private_data; | |
55 | size_t n, offs, copied = 0; | |
56 | int err = 0; | |
57 | void *p; | |
58 | ||
59 | mutex_lock(&seq->lock); | |
60 | ||
61 | if (!seq->buf) { | |
62 | seq->size = PAGE_SIZE; | |
63 | seq->buf = kmalloc(seq->size, GFP_KERNEL); | |
64 | if (!seq->buf) { | |
65 | err = -ENOMEM; | |
66 | goto done; | |
67 | } | |
68 | } | |
69 | ||
70 | if (seq->count) { | |
71 | n = min(seq->count, size); | |
72 | err = copy_to_user(buf, seq->buf + seq->from, n); | |
73 | if (err) { | |
74 | err = -EFAULT; | |
75 | goto done; | |
76 | } | |
77 | seq->count -= n; | |
78 | seq->from += n; | |
79 | copied = n; | |
80 | goto done; | |
81 | } | |
82 | ||
83 | seq->from = 0; | |
84 | p = seq->op->start(seq, &seq->index); | |
85 | if (!p) | |
86 | goto stop; | |
87 | if (IS_ERR(p)) { | |
88 | err = PTR_ERR(p); | |
89 | seq->op->stop(seq, p); | |
90 | seq->count = 0; | |
91 | goto done; | |
92 | } | |
93 | ||
94 | err = seq->op->show(seq, p); | |
95 | if (err > 0) { | |
96 | seq->count = 0; | |
97 | } else if (err < 0 || seq_has_overflowed(seq)) { | |
98 | if (!err) | |
99 | err = -E2BIG; | |
100 | seq->op->stop(seq, p); | |
101 | seq->count = 0; | |
102 | goto done; | |
103 | } | |
104 | ||
105 | while (1) { | |
106 | loff_t pos = seq->index; | |
107 | ||
108 | offs = seq->count; | |
109 | p = seq->op->next(seq, p, &seq->index); | |
110 | if (pos == seq->index) { | |
111 | pr_info_ratelimited("buggy seq_file .next function %ps " | |
112 | "did not updated position index\n", | |
113 | seq->op->next); | |
114 | seq->index++; | |
115 | } | |
116 | ||
117 | if (IS_ERR_OR_NULL(p)) | |
118 | break; | |
119 | ||
120 | if (seq->count >= size) | |
121 | break; | |
122 | ||
123 | err = seq->op->show(seq, p); | |
124 | if (err > 0) { | |
125 | seq->count = offs; | |
126 | } else if (err < 0 || seq_has_overflowed(seq)) { | |
127 | seq->count = offs; | |
128 | if (offs == 0) { | |
129 | if (!err) | |
130 | err = -E2BIG; | |
131 | seq->op->stop(seq, p); | |
132 | goto done; | |
133 | } | |
134 | break; | |
135 | } | |
136 | } | |
137 | stop: | |
138 | offs = seq->count; | |
139 | /* bpf program called if !p */ | |
140 | seq->op->stop(seq, p); | |
141 | if (!p && seq_has_overflowed(seq)) { | |
142 | seq->count = offs; | |
143 | if (offs == 0) { | |
144 | err = -E2BIG; | |
145 | goto done; | |
146 | } | |
147 | } | |
148 | ||
149 | n = min(seq->count, size); | |
150 | err = copy_to_user(buf, seq->buf, n); | |
151 | if (err) { | |
152 | err = -EFAULT; | |
153 | goto done; | |
154 | } | |
155 | copied = n; | |
156 | seq->count -= n; | |
157 | seq->from = n; | |
158 | done: | |
159 | if (!copied) | |
160 | copied = err; | |
161 | else | |
162 | *ppos += copied; | |
163 | mutex_unlock(&seq->lock); | |
164 | return copied; | |
165 | } | |
166 | ||
367ec3e4 YS |
167 | static int iter_open(struct inode *inode, struct file *file) |
168 | { | |
169 | struct bpf_iter_link *link = inode->i_private; | |
170 | ||
171 | return prepare_seq_file(file, link); | |
172 | } | |
173 | ||
ac51d99b YS |
174 | static int iter_release(struct inode *inode, struct file *file) |
175 | { | |
176 | struct bpf_iter_priv_data *iter_priv; | |
177 | struct seq_file *seq; | |
178 | ||
179 | seq = file->private_data; | |
180 | if (!seq) | |
181 | return 0; | |
182 | ||
183 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
184 | target_private); | |
185 | ||
186 | if (iter_priv->tinfo->fini_seq_private) | |
187 | iter_priv->tinfo->fini_seq_private(seq->private); | |
188 | ||
189 | bpf_prog_put(iter_priv->prog); | |
190 | seq->private = iter_priv; | |
191 | ||
192 | return seq_release_private(inode, file); | |
193 | } | |
194 | ||
367ec3e4 YS |
195 | const struct file_operations bpf_iter_fops = { |
196 | .open = iter_open, | |
ac51d99b YS |
197 | .llseek = no_llseek, |
198 | .read = bpf_seq_read, | |
199 | .release = iter_release, | |
200 | }; | |
201 | ||
ae24345d YS |
202 | int bpf_iter_reg_target(struct bpf_iter_reg *reg_info) |
203 | { | |
204 | struct bpf_iter_target_info *tinfo; | |
205 | ||
206 | tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); | |
207 | if (!tinfo) | |
208 | return -ENOMEM; | |
209 | ||
210 | tinfo->target = reg_info->target; | |
211 | tinfo->seq_ops = reg_info->seq_ops; | |
212 | tinfo->init_seq_private = reg_info->init_seq_private; | |
213 | tinfo->fini_seq_private = reg_info->fini_seq_private; | |
214 | tinfo->seq_priv_size = reg_info->seq_priv_size; | |
215 | INIT_LIST_HEAD(&tinfo->list); | |
216 | ||
217 | mutex_lock(&targets_mutex); | |
218 | list_add(&tinfo->list, &targets); | |
219 | mutex_unlock(&targets_mutex); | |
220 | ||
221 | return 0; | |
222 | } | |
223 | ||
224 | void bpf_iter_unreg_target(const char *target) | |
225 | { | |
226 | struct bpf_iter_target_info *tinfo; | |
227 | bool found = false; | |
228 | ||
229 | mutex_lock(&targets_mutex); | |
230 | list_for_each_entry(tinfo, &targets, list) { | |
231 | if (!strcmp(target, tinfo->target)) { | |
232 | list_del(&tinfo->list); | |
233 | kfree(tinfo); | |
234 | found = true; | |
235 | break; | |
236 | } | |
237 | } | |
238 | mutex_unlock(&targets_mutex); | |
239 | ||
240 | WARN_ON(found == false); | |
241 | } | |
15d83c4d YS |
242 | |
243 | static void cache_btf_id(struct bpf_iter_target_info *tinfo, | |
244 | struct bpf_prog *prog) | |
245 | { | |
246 | tinfo->btf_id = prog->aux->attach_btf_id; | |
247 | } | |
248 | ||
249 | bool bpf_iter_prog_supported(struct bpf_prog *prog) | |
250 | { | |
251 | const char *attach_fname = prog->aux->attach_func_name; | |
252 | u32 prog_btf_id = prog->aux->attach_btf_id; | |
253 | const char *prefix = BPF_ITER_FUNC_PREFIX; | |
254 | struct bpf_iter_target_info *tinfo; | |
255 | int prefix_len = strlen(prefix); | |
256 | bool supported = false; | |
257 | ||
258 | if (strncmp(attach_fname, prefix, prefix_len)) | |
259 | return false; | |
260 | ||
261 | mutex_lock(&targets_mutex); | |
262 | list_for_each_entry(tinfo, &targets, list) { | |
263 | if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) { | |
264 | supported = true; | |
265 | break; | |
266 | } | |
267 | if (!strcmp(attach_fname + prefix_len, tinfo->target)) { | |
268 | cache_btf_id(tinfo, prog); | |
269 | supported = true; | |
270 | break; | |
271 | } | |
272 | } | |
273 | mutex_unlock(&targets_mutex); | |
274 | ||
275 | return supported; | |
276 | } | |
de4e05ca YS |
277 | |
278 | static void bpf_iter_link_release(struct bpf_link *link) | |
279 | { | |
280 | } | |
281 | ||
282 | static void bpf_iter_link_dealloc(struct bpf_link *link) | |
283 | { | |
284 | struct bpf_iter_link *iter_link = | |
285 | container_of(link, struct bpf_iter_link, link); | |
286 | ||
287 | kfree(iter_link); | |
288 | } | |
289 | ||
2057c92b YS |
290 | static int bpf_iter_link_replace(struct bpf_link *link, |
291 | struct bpf_prog *new_prog, | |
292 | struct bpf_prog *old_prog) | |
293 | { | |
294 | int ret = 0; | |
295 | ||
296 | mutex_lock(&link_mutex); | |
297 | if (old_prog && link->prog != old_prog) { | |
298 | ret = -EPERM; | |
299 | goto out_unlock; | |
300 | } | |
301 | ||
302 | if (link->prog->type != new_prog->type || | |
303 | link->prog->expected_attach_type != new_prog->expected_attach_type || | |
304 | link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { | |
305 | ret = -EINVAL; | |
306 | goto out_unlock; | |
307 | } | |
308 | ||
309 | old_prog = xchg(&link->prog, new_prog); | |
310 | bpf_prog_put(old_prog); | |
311 | ||
312 | out_unlock: | |
313 | mutex_unlock(&link_mutex); | |
314 | return ret; | |
315 | } | |
316 | ||
de4e05ca YS |
317 | static const struct bpf_link_ops bpf_iter_link_lops = { |
318 | .release = bpf_iter_link_release, | |
319 | .dealloc = bpf_iter_link_dealloc, | |
2057c92b | 320 | .update_prog = bpf_iter_link_replace, |
de4e05ca YS |
321 | }; |
322 | ||
367ec3e4 YS |
323 | bool bpf_link_is_iter(struct bpf_link *link) |
324 | { | |
325 | return link->ops == &bpf_iter_link_lops; | |
326 | } | |
327 | ||
de4e05ca YS |
328 | int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) |
329 | { | |
330 | struct bpf_link_primer link_primer; | |
331 | struct bpf_iter_target_info *tinfo; | |
332 | struct bpf_iter_link *link; | |
333 | bool existed = false; | |
334 | u32 prog_btf_id; | |
335 | int err; | |
336 | ||
337 | if (attr->link_create.target_fd || attr->link_create.flags) | |
338 | return -EINVAL; | |
339 | ||
340 | prog_btf_id = prog->aux->attach_btf_id; | |
341 | mutex_lock(&targets_mutex); | |
342 | list_for_each_entry(tinfo, &targets, list) { | |
343 | if (tinfo->btf_id == prog_btf_id) { | |
344 | existed = true; | |
345 | break; | |
346 | } | |
347 | } | |
348 | mutex_unlock(&targets_mutex); | |
349 | if (!existed) | |
350 | return -ENOENT; | |
351 | ||
352 | link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); | |
353 | if (!link) | |
354 | return -ENOMEM; | |
355 | ||
356 | bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); | |
357 | link->tinfo = tinfo; | |
358 | ||
359 | err = bpf_link_prime(&link->link, &link_primer); | |
360 | if (err) { | |
361 | kfree(link); | |
362 | return err; | |
363 | } | |
364 | ||
365 | return bpf_link_settle(&link_primer); | |
366 | } | |
ac51d99b YS |
367 | |
368 | static void init_seq_meta(struct bpf_iter_priv_data *priv_data, | |
369 | struct bpf_iter_target_info *tinfo, | |
370 | struct bpf_prog *prog) | |
371 | { | |
372 | priv_data->tinfo = tinfo; | |
373 | priv_data->prog = prog; | |
374 | priv_data->session_id = atomic64_inc_return(&session_id); | |
375 | priv_data->seq_num = 0; | |
376 | priv_data->done_stop = false; | |
377 | } | |
378 | ||
379 | static int prepare_seq_file(struct file *file, struct bpf_iter_link *link) | |
380 | { | |
381 | struct bpf_iter_priv_data *priv_data; | |
382 | struct bpf_iter_target_info *tinfo; | |
383 | struct bpf_prog *prog; | |
384 | u32 total_priv_dsize; | |
385 | struct seq_file *seq; | |
386 | int err = 0; | |
387 | ||
388 | mutex_lock(&link_mutex); | |
389 | prog = link->link.prog; | |
390 | bpf_prog_inc(prog); | |
391 | mutex_unlock(&link_mutex); | |
392 | ||
393 | tinfo = link->tinfo; | |
394 | total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + | |
395 | tinfo->seq_priv_size; | |
396 | priv_data = __seq_open_private(file, tinfo->seq_ops, total_priv_dsize); | |
397 | if (!priv_data) { | |
398 | err = -ENOMEM; | |
399 | goto release_prog; | |
400 | } | |
401 | ||
402 | if (tinfo->init_seq_private) { | |
403 | err = tinfo->init_seq_private(priv_data->target_private); | |
404 | if (err) | |
405 | goto release_seq_file; | |
406 | } | |
407 | ||
408 | init_seq_meta(priv_data, tinfo, prog); | |
409 | seq = file->private_data; | |
410 | seq->private = priv_data->target_private; | |
411 | ||
412 | return 0; | |
413 | ||
414 | release_seq_file: | |
415 | seq_release_private(file->f_inode, file); | |
416 | file->private_data = NULL; | |
417 | release_prog: | |
418 | bpf_prog_put(prog); | |
419 | return err; | |
420 | } | |
421 | ||
422 | int bpf_iter_new_fd(struct bpf_link *link) | |
423 | { | |
424 | struct file *file; | |
425 | unsigned int flags; | |
426 | int err, fd; | |
427 | ||
428 | if (link->ops != &bpf_iter_link_lops) | |
429 | return -EINVAL; | |
430 | ||
431 | flags = O_RDONLY | O_CLOEXEC; | |
432 | fd = get_unused_fd_flags(flags); | |
433 | if (fd < 0) | |
434 | return fd; | |
435 | ||
436 | file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); | |
437 | if (IS_ERR(file)) { | |
438 | err = PTR_ERR(file); | |
439 | goto free_fd; | |
440 | } | |
441 | ||
442 | err = prepare_seq_file(file, | |
443 | container_of(link, struct bpf_iter_link, link)); | |
444 | if (err) | |
445 | goto free_file; | |
446 | ||
447 | fd_install(fd, file); | |
448 | return fd; | |
449 | ||
450 | free_file: | |
451 | fput(file); | |
452 | free_fd: | |
453 | put_unused_fd(fd); | |
454 | return err; | |
455 | } |