bpf: Create file bpf iterator
[linux-2.6-block.git] / kernel / bpf / bpf_iter.c
CommitLineData
ae24345d
YS
1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (c) 2020 Facebook */
3
4#include <linux/fs.h>
ac51d99b 5#include <linux/anon_inodes.h>
ae24345d
YS
6#include <linux/filter.h>
7#include <linux/bpf.h>
8
9struct bpf_iter_target_info {
10 struct list_head list;
11 const char *target;
12 const struct seq_operations *seq_ops;
13 bpf_iter_init_seq_priv_t init_seq_private;
14 bpf_iter_fini_seq_priv_t fini_seq_private;
15 u32 seq_priv_size;
15d83c4d 16 u32 btf_id; /* cached value */
ae24345d
YS
17};
18
de4e05ca
YS
19struct bpf_iter_link {
20 struct bpf_link link;
21 struct bpf_iter_target_info *tinfo;
22};
23
ac51d99b
YS
24struct bpf_iter_priv_data {
25 struct bpf_iter_target_info *tinfo;
26 struct bpf_prog *prog;
27 u64 session_id;
28 u64 seq_num;
29 bool done_stop;
30 u8 target_private[] __aligned(8);
31};
32
ae24345d
YS
33static struct list_head targets = LIST_HEAD_INIT(targets);
34static DEFINE_MUTEX(targets_mutex);
35
2057c92b
YS
36/* protect bpf_iter_link changes */
37static DEFINE_MUTEX(link_mutex);
38
ac51d99b
YS
39/* incremented on every opened seq_file */
40static atomic64_t session_id;
41
367ec3e4
YS
42static int prepare_seq_file(struct file *file, struct bpf_iter_link *link);
43
fd4f12bc
YS
44/* bpf_seq_read, a customized and simpler version for bpf iterator.
45 * no_llseek is assumed for this file.
46 * The following are differences from seq_read():
47 * . fixed buffer size (PAGE_SIZE)
48 * . assuming no_llseek
49 * . stop() may call bpf program, handling potential overflow there
50 */
51static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
52 loff_t *ppos)
53{
54 struct seq_file *seq = file->private_data;
55 size_t n, offs, copied = 0;
56 int err = 0;
57 void *p;
58
59 mutex_lock(&seq->lock);
60
61 if (!seq->buf) {
62 seq->size = PAGE_SIZE;
63 seq->buf = kmalloc(seq->size, GFP_KERNEL);
64 if (!seq->buf) {
65 err = -ENOMEM;
66 goto done;
67 }
68 }
69
70 if (seq->count) {
71 n = min(seq->count, size);
72 err = copy_to_user(buf, seq->buf + seq->from, n);
73 if (err) {
74 err = -EFAULT;
75 goto done;
76 }
77 seq->count -= n;
78 seq->from += n;
79 copied = n;
80 goto done;
81 }
82
83 seq->from = 0;
84 p = seq->op->start(seq, &seq->index);
85 if (!p)
86 goto stop;
87 if (IS_ERR(p)) {
88 err = PTR_ERR(p);
89 seq->op->stop(seq, p);
90 seq->count = 0;
91 goto done;
92 }
93
94 err = seq->op->show(seq, p);
95 if (err > 0) {
96 seq->count = 0;
97 } else if (err < 0 || seq_has_overflowed(seq)) {
98 if (!err)
99 err = -E2BIG;
100 seq->op->stop(seq, p);
101 seq->count = 0;
102 goto done;
103 }
104
105 while (1) {
106 loff_t pos = seq->index;
107
108 offs = seq->count;
109 p = seq->op->next(seq, p, &seq->index);
110 if (pos == seq->index) {
111 pr_info_ratelimited("buggy seq_file .next function %ps "
112 "did not updated position index\n",
113 seq->op->next);
114 seq->index++;
115 }
116
117 if (IS_ERR_OR_NULL(p))
118 break;
119
120 if (seq->count >= size)
121 break;
122
123 err = seq->op->show(seq, p);
124 if (err > 0) {
125 seq->count = offs;
126 } else if (err < 0 || seq_has_overflowed(seq)) {
127 seq->count = offs;
128 if (offs == 0) {
129 if (!err)
130 err = -E2BIG;
131 seq->op->stop(seq, p);
132 goto done;
133 }
134 break;
135 }
136 }
137stop:
138 offs = seq->count;
139 /* bpf program called if !p */
140 seq->op->stop(seq, p);
141 if (!p && seq_has_overflowed(seq)) {
142 seq->count = offs;
143 if (offs == 0) {
144 err = -E2BIG;
145 goto done;
146 }
147 }
148
149 n = min(seq->count, size);
150 err = copy_to_user(buf, seq->buf, n);
151 if (err) {
152 err = -EFAULT;
153 goto done;
154 }
155 copied = n;
156 seq->count -= n;
157 seq->from = n;
158done:
159 if (!copied)
160 copied = err;
161 else
162 *ppos += copied;
163 mutex_unlock(&seq->lock);
164 return copied;
165}
166
367ec3e4
YS
167static int iter_open(struct inode *inode, struct file *file)
168{
169 struct bpf_iter_link *link = inode->i_private;
170
171 return prepare_seq_file(file, link);
172}
173
ac51d99b
YS
174static int iter_release(struct inode *inode, struct file *file)
175{
176 struct bpf_iter_priv_data *iter_priv;
177 struct seq_file *seq;
178
179 seq = file->private_data;
180 if (!seq)
181 return 0;
182
183 iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
184 target_private);
185
186 if (iter_priv->tinfo->fini_seq_private)
187 iter_priv->tinfo->fini_seq_private(seq->private);
188
189 bpf_prog_put(iter_priv->prog);
190 seq->private = iter_priv;
191
192 return seq_release_private(inode, file);
193}
194
367ec3e4
YS
195const struct file_operations bpf_iter_fops = {
196 .open = iter_open,
ac51d99b
YS
197 .llseek = no_llseek,
198 .read = bpf_seq_read,
199 .release = iter_release,
200};
201
ae24345d
YS
202int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
203{
204 struct bpf_iter_target_info *tinfo;
205
206 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
207 if (!tinfo)
208 return -ENOMEM;
209
210 tinfo->target = reg_info->target;
211 tinfo->seq_ops = reg_info->seq_ops;
212 tinfo->init_seq_private = reg_info->init_seq_private;
213 tinfo->fini_seq_private = reg_info->fini_seq_private;
214 tinfo->seq_priv_size = reg_info->seq_priv_size;
215 INIT_LIST_HEAD(&tinfo->list);
216
217 mutex_lock(&targets_mutex);
218 list_add(&tinfo->list, &targets);
219 mutex_unlock(&targets_mutex);
220
221 return 0;
222}
223
224void bpf_iter_unreg_target(const char *target)
225{
226 struct bpf_iter_target_info *tinfo;
227 bool found = false;
228
229 mutex_lock(&targets_mutex);
230 list_for_each_entry(tinfo, &targets, list) {
231 if (!strcmp(target, tinfo->target)) {
232 list_del(&tinfo->list);
233 kfree(tinfo);
234 found = true;
235 break;
236 }
237 }
238 mutex_unlock(&targets_mutex);
239
240 WARN_ON(found == false);
241}
15d83c4d
YS
242
243static void cache_btf_id(struct bpf_iter_target_info *tinfo,
244 struct bpf_prog *prog)
245{
246 tinfo->btf_id = prog->aux->attach_btf_id;
247}
248
249bool bpf_iter_prog_supported(struct bpf_prog *prog)
250{
251 const char *attach_fname = prog->aux->attach_func_name;
252 u32 prog_btf_id = prog->aux->attach_btf_id;
253 const char *prefix = BPF_ITER_FUNC_PREFIX;
254 struct bpf_iter_target_info *tinfo;
255 int prefix_len = strlen(prefix);
256 bool supported = false;
257
258 if (strncmp(attach_fname, prefix, prefix_len))
259 return false;
260
261 mutex_lock(&targets_mutex);
262 list_for_each_entry(tinfo, &targets, list) {
263 if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) {
264 supported = true;
265 break;
266 }
267 if (!strcmp(attach_fname + prefix_len, tinfo->target)) {
268 cache_btf_id(tinfo, prog);
269 supported = true;
270 break;
271 }
272 }
273 mutex_unlock(&targets_mutex);
274
275 return supported;
276}
de4e05ca
YS
277
278static void bpf_iter_link_release(struct bpf_link *link)
279{
280}
281
282static void bpf_iter_link_dealloc(struct bpf_link *link)
283{
284 struct bpf_iter_link *iter_link =
285 container_of(link, struct bpf_iter_link, link);
286
287 kfree(iter_link);
288}
289
2057c92b
YS
290static int bpf_iter_link_replace(struct bpf_link *link,
291 struct bpf_prog *new_prog,
292 struct bpf_prog *old_prog)
293{
294 int ret = 0;
295
296 mutex_lock(&link_mutex);
297 if (old_prog && link->prog != old_prog) {
298 ret = -EPERM;
299 goto out_unlock;
300 }
301
302 if (link->prog->type != new_prog->type ||
303 link->prog->expected_attach_type != new_prog->expected_attach_type ||
304 link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) {
305 ret = -EINVAL;
306 goto out_unlock;
307 }
308
309 old_prog = xchg(&link->prog, new_prog);
310 bpf_prog_put(old_prog);
311
312out_unlock:
313 mutex_unlock(&link_mutex);
314 return ret;
315}
316
de4e05ca
YS
317static const struct bpf_link_ops bpf_iter_link_lops = {
318 .release = bpf_iter_link_release,
319 .dealloc = bpf_iter_link_dealloc,
2057c92b 320 .update_prog = bpf_iter_link_replace,
de4e05ca
YS
321};
322
367ec3e4
YS
323bool bpf_link_is_iter(struct bpf_link *link)
324{
325 return link->ops == &bpf_iter_link_lops;
326}
327
de4e05ca
YS
328int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
329{
330 struct bpf_link_primer link_primer;
331 struct bpf_iter_target_info *tinfo;
332 struct bpf_iter_link *link;
333 bool existed = false;
334 u32 prog_btf_id;
335 int err;
336
337 if (attr->link_create.target_fd || attr->link_create.flags)
338 return -EINVAL;
339
340 prog_btf_id = prog->aux->attach_btf_id;
341 mutex_lock(&targets_mutex);
342 list_for_each_entry(tinfo, &targets, list) {
343 if (tinfo->btf_id == prog_btf_id) {
344 existed = true;
345 break;
346 }
347 }
348 mutex_unlock(&targets_mutex);
349 if (!existed)
350 return -ENOENT;
351
352 link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
353 if (!link)
354 return -ENOMEM;
355
356 bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog);
357 link->tinfo = tinfo;
358
359 err = bpf_link_prime(&link->link, &link_primer);
360 if (err) {
361 kfree(link);
362 return err;
363 }
364
365 return bpf_link_settle(&link_primer);
366}
ac51d99b
YS
367
368static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
369 struct bpf_iter_target_info *tinfo,
370 struct bpf_prog *prog)
371{
372 priv_data->tinfo = tinfo;
373 priv_data->prog = prog;
374 priv_data->session_id = atomic64_inc_return(&session_id);
375 priv_data->seq_num = 0;
376 priv_data->done_stop = false;
377}
378
379static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
380{
381 struct bpf_iter_priv_data *priv_data;
382 struct bpf_iter_target_info *tinfo;
383 struct bpf_prog *prog;
384 u32 total_priv_dsize;
385 struct seq_file *seq;
386 int err = 0;
387
388 mutex_lock(&link_mutex);
389 prog = link->link.prog;
390 bpf_prog_inc(prog);
391 mutex_unlock(&link_mutex);
392
393 tinfo = link->tinfo;
394 total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) +
395 tinfo->seq_priv_size;
396 priv_data = __seq_open_private(file, tinfo->seq_ops, total_priv_dsize);
397 if (!priv_data) {
398 err = -ENOMEM;
399 goto release_prog;
400 }
401
402 if (tinfo->init_seq_private) {
403 err = tinfo->init_seq_private(priv_data->target_private);
404 if (err)
405 goto release_seq_file;
406 }
407
408 init_seq_meta(priv_data, tinfo, prog);
409 seq = file->private_data;
410 seq->private = priv_data->target_private;
411
412 return 0;
413
414release_seq_file:
415 seq_release_private(file->f_inode, file);
416 file->private_data = NULL;
417release_prog:
418 bpf_prog_put(prog);
419 return err;
420}
421
422int bpf_iter_new_fd(struct bpf_link *link)
423{
424 struct file *file;
425 unsigned int flags;
426 int err, fd;
427
428 if (link->ops != &bpf_iter_link_lops)
429 return -EINVAL;
430
431 flags = O_RDONLY | O_CLOEXEC;
432 fd = get_unused_fd_flags(flags);
433 if (fd < 0)
434 return fd;
435
436 file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags);
437 if (IS_ERR(file)) {
438 err = PTR_ERR(file);
439 goto free_fd;
440 }
441
442 err = prepare_seq_file(file,
443 container_of(link, struct bpf_iter_link, link));
444 if (err)
445 goto free_file;
446
447 fd_install(fd, file);
448 return fd;
449
450free_file:
451 fput(file);
452free_fd:
453 put_unused_fd(fd);
454 return err;
455}