Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
b2197755 DB |
2 | /* |
3 | * Minimal file system backend for holding eBPF maps and programs, | |
4 | * used by bpf(2) object pinning. | |
5 | * | |
6 | * Authors: | |
7 | * | |
8 | * Daniel Borkmann <daniel@iogearbox.net> | |
b2197755 DB |
9 | */ |
10 | ||
a536a6e1 | 11 | #include <linux/init.h> |
b2197755 DB |
12 | #include <linux/magic.h> |
13 | #include <linux/major.h> | |
14 | #include <linux/mount.h> | |
15 | #include <linux/namei.h> | |
16 | #include <linux/fs.h> | |
d2935de7 DH |
17 | #include <linux/fs_context.h> |
18 | #include <linux/fs_parser.h> | |
b2197755 DB |
19 | #include <linux/kdev_t.h> |
20 | #include <linux/filter.h> | |
21 | #include <linux/bpf.h> | |
a67edbf4 | 22 | #include <linux/bpf_trace.h> |
d71fa5c9 | 23 | #include "preload/bpf_preload.h" |
b2197755 DB |
24 | |
25 | enum bpf_type { | |
26 | BPF_TYPE_UNSPEC = 0, | |
27 | BPF_TYPE_PROG, | |
28 | BPF_TYPE_MAP, | |
70ed506c | 29 | BPF_TYPE_LINK, |
b2197755 DB |
30 | }; |
31 | ||
32 | static void *bpf_any_get(void *raw, enum bpf_type type) | |
33 | { | |
34 | switch (type) { | |
35 | case BPF_TYPE_PROG: | |
85192dbf | 36 | bpf_prog_inc(raw); |
b2197755 DB |
37 | break; |
38 | case BPF_TYPE_MAP: | |
1e0bd5a0 | 39 | bpf_map_inc_with_uref(raw); |
b2197755 | 40 | break; |
70ed506c AN |
41 | case BPF_TYPE_LINK: |
42 | bpf_link_inc(raw); | |
43 | break; | |
b2197755 DB |
44 | default: |
45 | WARN_ON_ONCE(1); | |
46 | break; | |
47 | } | |
48 | ||
49 | return raw; | |
50 | } | |
51 | ||
52 | static void bpf_any_put(void *raw, enum bpf_type type) | |
53 | { | |
54 | switch (type) { | |
55 | case BPF_TYPE_PROG: | |
56 | bpf_prog_put(raw); | |
57 | break; | |
58 | case BPF_TYPE_MAP: | |
c9da161c | 59 | bpf_map_put_with_uref(raw); |
b2197755 | 60 | break; |
70ed506c AN |
61 | case BPF_TYPE_LINK: |
62 | bpf_link_put(raw); | |
63 | break; | |
b2197755 DB |
64 | default: |
65 | WARN_ON_ONCE(1); | |
66 | break; | |
67 | } | |
68 | } | |
69 | ||
70 | static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) | |
71 | { | |
72 | void *raw; | |
73 | ||
c9da161c | 74 | raw = bpf_map_get_with_uref(ufd); |
70ed506c AN |
75 | if (!IS_ERR(raw)) { |
76 | *type = BPF_TYPE_MAP; | |
77 | return raw; | |
78 | } | |
79 | ||
80 | raw = bpf_prog_get(ufd); | |
81 | if (!IS_ERR(raw)) { | |
b2197755 | 82 | *type = BPF_TYPE_PROG; |
70ed506c | 83 | return raw; |
b2197755 DB |
84 | } |
85 | ||
70ed506c AN |
86 | raw = bpf_link_get_from_fd(ufd); |
87 | if (!IS_ERR(raw)) { | |
88 | *type = BPF_TYPE_LINK; | |
89 | return raw; | |
90 | } | |
91 | ||
92 | return ERR_PTR(-EINVAL); | |
b2197755 DB |
93 | } |
94 | ||
95 | static const struct inode_operations bpf_dir_iops; | |
96 | ||
97 | static const struct inode_operations bpf_prog_iops = { }; | |
98 | static const struct inode_operations bpf_map_iops = { }; | |
70ed506c | 99 | static const struct inode_operations bpf_link_iops = { }; |
b2197755 DB |
100 | |
101 | static struct inode *bpf_get_inode(struct super_block *sb, | |
102 | const struct inode *dir, | |
103 | umode_t mode) | |
104 | { | |
105 | struct inode *inode; | |
106 | ||
107 | switch (mode & S_IFMT) { | |
108 | case S_IFDIR: | |
109 | case S_IFREG: | |
0f98621b | 110 | case S_IFLNK: |
b2197755 DB |
111 | break; |
112 | default: | |
113 | return ERR_PTR(-EINVAL); | |
114 | } | |
115 | ||
116 | inode = new_inode(sb); | |
117 | if (!inode) | |
118 | return ERR_PTR(-ENOSPC); | |
119 | ||
120 | inode->i_ino = get_next_ino(); | |
078cd827 | 121 | inode->i_atime = current_time(inode); |
b2197755 DB |
122 | inode->i_mtime = inode->i_atime; |
123 | inode->i_ctime = inode->i_atime; | |
124 | ||
21cb47be | 125 | inode_init_owner(&init_user_ns, inode, dir, mode); |
b2197755 DB |
126 | |
127 | return inode; | |
128 | } | |
129 | ||
130 | static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) | |
131 | { | |
132 | *type = BPF_TYPE_UNSPEC; | |
133 | if (inode->i_op == &bpf_prog_iops) | |
134 | *type = BPF_TYPE_PROG; | |
135 | else if (inode->i_op == &bpf_map_iops) | |
136 | *type = BPF_TYPE_MAP; | |
70ed506c AN |
137 | else if (inode->i_op == &bpf_link_iops) |
138 | *type = BPF_TYPE_LINK; | |
b2197755 DB |
139 | else |
140 | return -EACCES; | |
141 | ||
142 | return 0; | |
143 | } | |
144 | ||
0f98621b DB |
145 | static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, |
146 | struct inode *dir) | |
147 | { | |
148 | d_instantiate(dentry, inode); | |
149 | dget(dentry); | |
150 | ||
151 | dir->i_mtime = current_time(dir); | |
152 | dir->i_ctime = dir->i_mtime; | |
153 | } | |
154 | ||
549c7297 CB |
155 | static int bpf_mkdir(struct user_namespace *mnt_userns, struct inode *dir, |
156 | struct dentry *dentry, umode_t mode) | |
b2197755 DB |
157 | { |
158 | struct inode *inode; | |
159 | ||
b2197755 DB |
160 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); |
161 | if (IS_ERR(inode)) | |
162 | return PTR_ERR(inode); | |
163 | ||
164 | inode->i_op = &bpf_dir_iops; | |
165 | inode->i_fop = &simple_dir_operations; | |
166 | ||
167 | inc_nlink(inode); | |
168 | inc_nlink(dir); | |
169 | ||
0f98621b | 170 | bpf_dentry_finalize(dentry, inode, dir); |
b2197755 DB |
171 | return 0; |
172 | } | |
173 | ||
a26ca7c9 MKL |
174 | struct map_iter { |
175 | void *key; | |
176 | bool done; | |
177 | }; | |
178 | ||
179 | static struct map_iter *map_iter(struct seq_file *m) | |
180 | { | |
181 | return m->private; | |
182 | } | |
183 | ||
184 | static struct bpf_map *seq_file_to_map(struct seq_file *m) | |
185 | { | |
186 | return file_inode(m->file)->i_private; | |
187 | } | |
188 | ||
189 | static void map_iter_free(struct map_iter *iter) | |
190 | { | |
191 | if (iter) { | |
192 | kfree(iter->key); | |
193 | kfree(iter); | |
194 | } | |
195 | } | |
196 | ||
197 | static struct map_iter *map_iter_alloc(struct bpf_map *map) | |
198 | { | |
199 | struct map_iter *iter; | |
200 | ||
201 | iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN); | |
202 | if (!iter) | |
203 | goto error; | |
204 | ||
205 | iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN); | |
206 | if (!iter->key) | |
207 | goto error; | |
208 | ||
209 | return iter; | |
210 | ||
211 | error: | |
212 | map_iter_free(iter); | |
213 | return NULL; | |
214 | } | |
215 | ||
216 | static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) | |
217 | { | |
218 | struct bpf_map *map = seq_file_to_map(m); | |
219 | void *key = map_iter(m)->key; | |
dc1508a5 | 220 | void *prev_key; |
a26ca7c9 | 221 | |
90435a78 | 222 | (*pos)++; |
a26ca7c9 MKL |
223 | if (map_iter(m)->done) |
224 | return NULL; | |
225 | ||
226 | if (unlikely(v == SEQ_START_TOKEN)) | |
dc1508a5 YS |
227 | prev_key = NULL; |
228 | else | |
229 | prev_key = key; | |
a26ca7c9 | 230 | |
ce880cb8 | 231 | rcu_read_lock(); |
dc1508a5 | 232 | if (map->ops->map_get_next_key(map, prev_key, key)) { |
a26ca7c9 | 233 | map_iter(m)->done = true; |
ce880cb8 | 234 | key = NULL; |
a26ca7c9 | 235 | } |
ce880cb8 | 236 | rcu_read_unlock(); |
a26ca7c9 MKL |
237 | return key; |
238 | } | |
239 | ||
240 | static void *map_seq_start(struct seq_file *m, loff_t *pos) | |
241 | { | |
242 | if (map_iter(m)->done) | |
243 | return NULL; | |
244 | ||
245 | return *pos ? map_iter(m)->key : SEQ_START_TOKEN; | |
246 | } | |
247 | ||
248 | static void map_seq_stop(struct seq_file *m, void *v) | |
249 | { | |
250 | } | |
251 | ||
252 | static int map_seq_show(struct seq_file *m, void *v) | |
253 | { | |
254 | struct bpf_map *map = seq_file_to_map(m); | |
255 | void *key = map_iter(m)->key; | |
256 | ||
257 | if (unlikely(v == SEQ_START_TOKEN)) { | |
258 | seq_puts(m, "# WARNING!! The output is for debug purpose only\n"); | |
259 | seq_puts(m, "# WARNING!! The output format will change\n"); | |
260 | } else { | |
261 | map->ops->map_seq_show_elem(map, key, m); | |
262 | } | |
263 | ||
264 | return 0; | |
265 | } | |
266 | ||
267 | static const struct seq_operations bpffs_map_seq_ops = { | |
268 | .start = map_seq_start, | |
269 | .next = map_seq_next, | |
270 | .show = map_seq_show, | |
271 | .stop = map_seq_stop, | |
272 | }; | |
273 | ||
274 | static int bpffs_map_open(struct inode *inode, struct file *file) | |
275 | { | |
276 | struct bpf_map *map = inode->i_private; | |
277 | struct map_iter *iter; | |
278 | struct seq_file *m; | |
279 | int err; | |
280 | ||
281 | iter = map_iter_alloc(map); | |
282 | if (!iter) | |
283 | return -ENOMEM; | |
284 | ||
285 | err = seq_open(file, &bpffs_map_seq_ops); | |
286 | if (err) { | |
287 | map_iter_free(iter); | |
288 | return err; | |
289 | } | |
290 | ||
291 | m = file->private_data; | |
292 | m->private = iter; | |
293 | ||
294 | return 0; | |
295 | } | |
296 | ||
297 | static int bpffs_map_release(struct inode *inode, struct file *file) | |
298 | { | |
299 | struct seq_file *m = file->private_data; | |
300 | ||
301 | map_iter_free(map_iter(m)); | |
302 | ||
303 | return seq_release(inode, file); | |
304 | } | |
305 | ||
306 | /* bpffs_map_fops should only implement the basic | |
307 | * read operation for a BPF map. The purpose is to | |
308 | * provide a simple user intuitive way to do | |
309 | * "cat bpffs/pathto/a-pinned-map". | |
310 | * | |
311 | * Other operations (e.g. write, lookup...) should be realized by | |
312 | * the userspace tools (e.g. bpftool) through the | |
313 | * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update | |
314 | * interface. | |
315 | */ | |
316 | static const struct file_operations bpffs_map_fops = { | |
317 | .open = bpffs_map_open, | |
318 | .read = seq_read, | |
319 | .release = bpffs_map_release, | |
320 | }; | |
321 | ||
b1655857 DB |
322 | static int bpffs_obj_open(struct inode *inode, struct file *file) |
323 | { | |
324 | return -EIO; | |
325 | } | |
326 | ||
327 | static const struct file_operations bpffs_obj_fops = { | |
328 | .open = bpffs_obj_open, | |
329 | }; | |
330 | ||
a4a0683f | 331 | static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, |
a26ca7c9 MKL |
332 | const struct inode_operations *iops, |
333 | const struct file_operations *fops) | |
b2197755 | 334 | { |
a4a0683f AV |
335 | struct inode *dir = dentry->d_parent->d_inode; |
336 | struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode); | |
b2197755 DB |
337 | if (IS_ERR(inode)) |
338 | return PTR_ERR(inode); | |
339 | ||
340 | inode->i_op = iops; | |
a26ca7c9 | 341 | inode->i_fop = fops; |
a4a0683f | 342 | inode->i_private = raw; |
b2197755 | 343 | |
0f98621b | 344 | bpf_dentry_finalize(dentry, inode, dir); |
b2197755 DB |
345 | return 0; |
346 | } | |
347 | ||
a4a0683f | 348 | static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) |
b2197755 | 349 | { |
b1655857 DB |
350 | return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, |
351 | &bpffs_obj_fops); | |
a4a0683f | 352 | } |
b2197755 | 353 | |
a4a0683f AV |
354 | static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) |
355 | { | |
a26ca7c9 MKL |
356 | struct bpf_map *map = arg; |
357 | ||
358 | return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops, | |
e8d2bec0 DB |
359 | bpf_map_support_seq_show(map) ? |
360 | &bpffs_map_fops : &bpffs_obj_fops); | |
b2197755 DB |
361 | } |
362 | ||
70ed506c AN |
363 | static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg) |
364 | { | |
367ec3e4 YS |
365 | struct bpf_link *link = arg; |
366 | ||
70ed506c | 367 | return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops, |
367ec3e4 YS |
368 | bpf_link_is_iter(link) ? |
369 | &bpf_iter_fops : &bpffs_obj_fops); | |
70ed506c AN |
370 | } |
371 | ||
0c93b7d8 AV |
372 | static struct dentry * |
373 | bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) | |
bb35a6ef | 374 | { |
6d8cb045 | 375 | /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future |
d71fa5c9 | 376 | * extensions. That allows popoulate_bpffs() create special files. |
6d8cb045 | 377 | */ |
d71fa5c9 AS |
378 | if ((dir->i_mode & S_IALLUGO) && |
379 | strchr(dentry->d_name.name, '.')) | |
0c93b7d8 | 380 | return ERR_PTR(-EPERM); |
0f98621b | 381 | |
0c93b7d8 | 382 | return simple_lookup(dir, dentry, flags); |
bb35a6ef DB |
383 | } |
384 | ||
549c7297 CB |
385 | static int bpf_symlink(struct user_namespace *mnt_userns, struct inode *dir, |
386 | struct dentry *dentry, const char *target) | |
0f98621b DB |
387 | { |
388 | char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); | |
389 | struct inode *inode; | |
390 | ||
391 | if (!link) | |
392 | return -ENOMEM; | |
393 | ||
394 | inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK); | |
395 | if (IS_ERR(inode)) { | |
396 | kfree(link); | |
397 | return PTR_ERR(inode); | |
398 | } | |
399 | ||
400 | inode->i_op = &simple_symlink_inode_operations; | |
401 | inode->i_link = link; | |
402 | ||
403 | bpf_dentry_finalize(dentry, inode, dir); | |
404 | return 0; | |
405 | } | |
406 | ||
b2197755 | 407 | static const struct inode_operations bpf_dir_iops = { |
0c93b7d8 | 408 | .lookup = bpf_lookup, |
b2197755 | 409 | .mkdir = bpf_mkdir, |
0f98621b | 410 | .symlink = bpf_symlink, |
b2197755 | 411 | .rmdir = simple_rmdir, |
0c93b7d8 AV |
412 | .rename = simple_rename, |
413 | .link = simple_link, | |
b2197755 DB |
414 | .unlink = simple_unlink, |
415 | }; | |
416 | ||
d71fa5c9 AS |
417 | /* pin iterator link into bpffs */ |
418 | static int bpf_iter_link_pin_kernel(struct dentry *parent, | |
419 | const char *name, struct bpf_link *link) | |
420 | { | |
421 | umode_t mode = S_IFREG | S_IRUSR; | |
422 | struct dentry *dentry; | |
423 | int ret; | |
424 | ||
425 | inode_lock(parent->d_inode); | |
426 | dentry = lookup_one_len(name, parent, strlen(name)); | |
427 | if (IS_ERR(dentry)) { | |
428 | inode_unlock(parent->d_inode); | |
429 | return PTR_ERR(dentry); | |
430 | } | |
431 | ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops, | |
432 | &bpf_iter_fops); | |
433 | dput(dentry); | |
434 | inode_unlock(parent->d_inode); | |
435 | return ret; | |
436 | } | |
437 | ||
b87121dd | 438 | static int bpf_obj_do_pin(const char __user *pathname, void *raw, |
b2197755 DB |
439 | enum bpf_type type) |
440 | { | |
441 | struct dentry *dentry; | |
442 | struct inode *dir; | |
443 | struct path path; | |
444 | umode_t mode; | |
b2197755 DB |
445 | int ret; |
446 | ||
b87121dd | 447 | dentry = user_path_create(AT_FDCWD, pathname, &path, 0); |
b2197755 DB |
448 | if (IS_ERR(dentry)) |
449 | return PTR_ERR(dentry); | |
450 | ||
451 | mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); | |
b2197755 | 452 | |
a4a0683f | 453 | ret = security_path_mknod(&path, dentry, mode, 0); |
b2197755 DB |
454 | if (ret) |
455 | goto out; | |
456 | ||
457 | dir = d_inode(path.dentry); | |
458 | if (dir->i_op != &bpf_dir_iops) { | |
459 | ret = -EPERM; | |
460 | goto out; | |
461 | } | |
462 | ||
a4a0683f AV |
463 | switch (type) { |
464 | case BPF_TYPE_PROG: | |
465 | ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw); | |
466 | break; | |
467 | case BPF_TYPE_MAP: | |
468 | ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); | |
469 | break; | |
70ed506c AN |
470 | case BPF_TYPE_LINK: |
471 | ret = vfs_mkobj(dentry, mode, bpf_mklink, raw); | |
472 | break; | |
a4a0683f AV |
473 | default: |
474 | ret = -EPERM; | |
475 | } | |
b2197755 DB |
476 | out: |
477 | done_path_create(&path, dentry); | |
478 | return ret; | |
479 | } | |
480 | ||
481 | int bpf_obj_pin_user(u32 ufd, const char __user *pathname) | |
482 | { | |
b2197755 DB |
483 | enum bpf_type type; |
484 | void *raw; | |
485 | int ret; | |
486 | ||
b2197755 | 487 | raw = bpf_fd_probe_obj(ufd, &type); |
b87121dd AV |
488 | if (IS_ERR(raw)) |
489 | return PTR_ERR(raw); | |
b2197755 | 490 | |
b87121dd | 491 | ret = bpf_obj_do_pin(pathname, raw, type); |
b2197755 DB |
492 | if (ret != 0) |
493 | bpf_any_put(raw, type); | |
b87121dd | 494 | |
b2197755 DB |
495 | return ret; |
496 | } | |
497 | ||
b87121dd | 498 | static void *bpf_obj_do_get(const char __user *pathname, |
6e71b04a | 499 | enum bpf_type *type, int flags) |
b2197755 DB |
500 | { |
501 | struct inode *inode; | |
502 | struct path path; | |
503 | void *raw; | |
504 | int ret; | |
505 | ||
b87121dd | 506 | ret = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path); |
b2197755 DB |
507 | if (ret) |
508 | return ERR_PTR(ret); | |
509 | ||
510 | inode = d_backing_inode(path.dentry); | |
02f92b38 | 511 | ret = path_permission(&path, ACC_MODE(flags)); |
b2197755 DB |
512 | if (ret) |
513 | goto out; | |
514 | ||
515 | ret = bpf_inode_type(inode, type); | |
516 | if (ret) | |
517 | goto out; | |
518 | ||
519 | raw = bpf_any_get(inode->i_private, *type); | |
92117d84 AS |
520 | if (!IS_ERR(raw)) |
521 | touch_atime(&path); | |
b2197755 DB |
522 | |
523 | path_put(&path); | |
524 | return raw; | |
525 | out: | |
526 | path_put(&path); | |
527 | return ERR_PTR(ret); | |
528 | } | |
529 | ||
6e71b04a | 530 | int bpf_obj_get_user(const char __user *pathname, int flags) |
b2197755 DB |
531 | { |
532 | enum bpf_type type = BPF_TYPE_UNSPEC; | |
6e71b04a | 533 | int f_flags; |
b2197755 | 534 | void *raw; |
b87121dd | 535 | int ret; |
b2197755 | 536 | |
6e71b04a CF |
537 | f_flags = bpf_get_file_flag(flags); |
538 | if (f_flags < 0) | |
539 | return f_flags; | |
540 | ||
b87121dd AV |
541 | raw = bpf_obj_do_get(pathname, &type, f_flags); |
542 | if (IS_ERR(raw)) | |
543 | return PTR_ERR(raw); | |
b2197755 DB |
544 | |
545 | if (type == BPF_TYPE_PROG) | |
d37300ed | 546 | ret = (f_flags != O_RDWR) ? -EINVAL : bpf_prog_new_fd(raw); |
b2197755 | 547 | else if (type == BPF_TYPE_MAP) |
6e71b04a | 548 | ret = bpf_map_new_fd(raw, f_flags); |
70ed506c | 549 | else if (type == BPF_TYPE_LINK) |
25fc94b2 | 550 | ret = (f_flags != O_RDWR) ? -EINVAL : bpf_link_new_fd(raw); |
b2197755 | 551 | else |
b87121dd | 552 | return -ENOENT; |
b2197755 | 553 | |
4d220ed0 | 554 | if (ret < 0) |
b2197755 | 555 | bpf_any_put(raw, type); |
b2197755 DB |
556 | return ret; |
557 | } | |
040ee692 AV |
558 | |
559 | static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) | |
560 | { | |
561 | struct bpf_prog *prog; | |
47291baa | 562 | int ret = inode_permission(&init_user_ns, inode, MAY_READ); |
040ee692 AV |
563 | if (ret) |
564 | return ERR_PTR(ret); | |
565 | ||
566 | if (inode->i_op == &bpf_map_iops) | |
567 | return ERR_PTR(-EINVAL); | |
70ed506c AN |
568 | if (inode->i_op == &bpf_link_iops) |
569 | return ERR_PTR(-EINVAL); | |
040ee692 AV |
570 | if (inode->i_op != &bpf_prog_iops) |
571 | return ERR_PTR(-EACCES); | |
572 | ||
573 | prog = inode->i_private; | |
574 | ||
575 | ret = security_bpf_prog(prog); | |
576 | if (ret < 0) | |
577 | return ERR_PTR(ret); | |
578 | ||
579 | if (!bpf_prog_get_ok(prog, &type, false)) | |
580 | return ERR_PTR(-EINVAL); | |
581 | ||
85192dbf AN |
582 | bpf_prog_inc(prog); |
583 | return prog; | |
040ee692 AV |
584 | } |
585 | ||
586 | struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) | |
587 | { | |
588 | struct bpf_prog *prog; | |
589 | struct path path; | |
590 | int ret = kern_path(name, LOOKUP_FOLLOW, &path); | |
591 | if (ret) | |
592 | return ERR_PTR(ret); | |
593 | prog = __get_prog_inode(d_backing_inode(path.dentry), type); | |
594 | if (!IS_ERR(prog)) | |
595 | touch_atime(&path); | |
596 | path_put(&path); | |
597 | return prog; | |
598 | } | |
599 | EXPORT_SYMBOL(bpf_prog_get_type_path); | |
b2197755 | 600 | |
4cc7c186 DH |
601 | /* |
602 | * Display the mount options in /proc/mounts. | |
603 | */ | |
604 | static int bpf_show_options(struct seq_file *m, struct dentry *root) | |
605 | { | |
606 | umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX; | |
607 | ||
608 | if (mode != S_IRWXUGO) | |
609 | seq_printf(m, ",mode=%o", mode); | |
610 | return 0; | |
611 | } | |
612 | ||
524845ff | 613 | static void bpf_free_inode(struct inode *inode) |
1da6c4d9 | 614 | { |
1da6c4d9 DB |
615 | enum bpf_type type; |
616 | ||
617 | if (S_ISLNK(inode->i_mode)) | |
618 | kfree(inode->i_link); | |
619 | if (!bpf_inode_type(inode, &type)) | |
620 | bpf_any_put(inode->i_private, type); | |
621 | free_inode_nonrcu(inode); | |
622 | } | |
623 | ||
b2197755 DB |
624 | static const struct super_operations bpf_super_ops = { |
625 | .statfs = simple_statfs, | |
626 | .drop_inode = generic_delete_inode, | |
4cc7c186 | 627 | .show_options = bpf_show_options, |
524845ff | 628 | .free_inode = bpf_free_inode, |
b2197755 DB |
629 | }; |
630 | ||
a3af5f80 DB |
631 | enum { |
632 | OPT_MODE, | |
a3af5f80 DB |
633 | }; |
634 | ||
d7167b14 | 635 | static const struct fs_parameter_spec bpf_fs_parameters[] = { |
d2935de7 DH |
636 | fsparam_u32oct ("mode", OPT_MODE), |
637 | {} | |
638 | }; | |
639 | ||
a3af5f80 DB |
640 | struct bpf_mount_opts { |
641 | umode_t mode; | |
642 | }; | |
643 | ||
d2935de7 | 644 | static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) |
a3af5f80 | 645 | { |
d2935de7 DH |
646 | struct bpf_mount_opts *opts = fc->fs_private; |
647 | struct fs_parse_result result; | |
648 | int opt; | |
a3af5f80 | 649 | |
d7167b14 | 650 | opt = fs_parse(fc, bpf_fs_parameters, param, &result); |
d2935de7 | 651 | if (opt < 0) |
a3af5f80 DB |
652 | /* We might like to report bad mount options here, but |
653 | * traditionally we've ignored all mount options, so we'd | |
654 | * better continue to ignore non-existing options for bpf. | |
655 | */ | |
d2935de7 DH |
656 | return opt == -ENOPARAM ? 0 : opt; |
657 | ||
658 | switch (opt) { | |
659 | case OPT_MODE: | |
660 | opts->mode = result.uint_32 & S_IALLUGO; | |
661 | break; | |
a3af5f80 DB |
662 | } |
663 | ||
664 | return 0; | |
665 | } | |
666 | ||
d71fa5c9 AS |
667 | struct bpf_preload_ops *bpf_preload_ops; |
668 | EXPORT_SYMBOL_GPL(bpf_preload_ops); | |
669 | ||
670 | static bool bpf_preload_mod_get(void) | |
671 | { | |
672 | /* If bpf_preload.ko wasn't loaded earlier then load it now. | |
673 | * When bpf_preload is built into vmlinux the module's __init | |
674 | * function will populate it. | |
675 | */ | |
676 | if (!bpf_preload_ops) { | |
677 | request_module("bpf_preload"); | |
678 | if (!bpf_preload_ops) | |
679 | return false; | |
680 | } | |
681 | /* And grab the reference, so the module doesn't disappear while the | |
682 | * kernel is interacting with the kernel module and its UMD. | |
683 | */ | |
684 | if (!try_module_get(bpf_preload_ops->owner)) { | |
685 | pr_err("bpf_preload module get failed.\n"); | |
686 | return false; | |
687 | } | |
688 | return true; | |
689 | } | |
690 | ||
691 | static void bpf_preload_mod_put(void) | |
692 | { | |
693 | if (bpf_preload_ops) | |
694 | /* now user can "rmmod bpf_preload" if necessary */ | |
695 | module_put(bpf_preload_ops->owner); | |
696 | } | |
697 | ||
698 | static DEFINE_MUTEX(bpf_preload_lock); | |
699 | ||
700 | static int populate_bpffs(struct dentry *parent) | |
701 | { | |
702 | struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {}; | |
703 | struct bpf_link *links[BPF_PRELOAD_LINKS] = {}; | |
704 | int err = 0, i; | |
705 | ||
706 | /* grab the mutex to make sure the kernel interactions with bpf_preload | |
707 | * UMD are serialized | |
708 | */ | |
709 | mutex_lock(&bpf_preload_lock); | |
710 | ||
711 | /* if bpf_preload.ko wasn't built into vmlinux then load it */ | |
712 | if (!bpf_preload_mod_get()) | |
713 | goto out; | |
714 | ||
715 | if (!bpf_preload_ops->info.tgid) { | |
716 | /* preload() will start UMD that will load BPF iterator programs */ | |
717 | err = bpf_preload_ops->preload(objs); | |
718 | if (err) | |
719 | goto out_put; | |
720 | for (i = 0; i < BPF_PRELOAD_LINKS; i++) { | |
721 | links[i] = bpf_link_by_id(objs[i].link_id); | |
722 | if (IS_ERR(links[i])) { | |
723 | err = PTR_ERR(links[i]); | |
724 | goto out_put; | |
725 | } | |
726 | } | |
727 | for (i = 0; i < BPF_PRELOAD_LINKS; i++) { | |
728 | err = bpf_iter_link_pin_kernel(parent, | |
729 | objs[i].link_name, links[i]); | |
730 | if (err) | |
731 | goto out_put; | |
732 | /* do not unlink successfully pinned links even | |
733 | * if later link fails to pin | |
734 | */ | |
735 | links[i] = NULL; | |
736 | } | |
737 | /* finish() will tell UMD process to exit */ | |
738 | err = bpf_preload_ops->finish(); | |
739 | if (err) | |
740 | goto out_put; | |
741 | } | |
742 | out_put: | |
743 | bpf_preload_mod_put(); | |
744 | out: | |
745 | mutex_unlock(&bpf_preload_lock); | |
746 | for (i = 0; i < BPF_PRELOAD_LINKS && err; i++) | |
747 | if (!IS_ERR_OR_NULL(links[i])) | |
748 | bpf_link_put(links[i]); | |
749 | return err; | |
750 | } | |
751 | ||
d2935de7 | 752 | static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) |
b2197755 | 753 | { |
cda37124 | 754 | static const struct tree_descr bpf_rfiles[] = { { "" } }; |
d2935de7 | 755 | struct bpf_mount_opts *opts = fc->fs_private; |
b2197755 DB |
756 | struct inode *inode; |
757 | int ret; | |
758 | ||
759 | ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); | |
760 | if (ret) | |
761 | return ret; | |
762 | ||
763 | sb->s_op = &bpf_super_ops; | |
764 | ||
765 | inode = sb->s_root->d_inode; | |
766 | inode->i_op = &bpf_dir_iops; | |
767 | inode->i_mode &= ~S_IALLUGO; | |
d71fa5c9 | 768 | populate_bpffs(sb->s_root); |
d2935de7 | 769 | inode->i_mode |= S_ISVTX | opts->mode; |
b2197755 DB |
770 | return 0; |
771 | } | |
772 | ||
d2935de7 DH |
773 | static int bpf_get_tree(struct fs_context *fc) |
774 | { | |
775 | return get_tree_nodev(fc, bpf_fill_super); | |
776 | } | |
777 | ||
778 | static void bpf_free_fc(struct fs_context *fc) | |
b2197755 | 779 | { |
d2935de7 DH |
780 | kfree(fc->fs_private); |
781 | } | |
782 | ||
783 | static const struct fs_context_operations bpf_context_ops = { | |
784 | .free = bpf_free_fc, | |
785 | .parse_param = bpf_parse_param, | |
786 | .get_tree = bpf_get_tree, | |
787 | }; | |
788 | ||
789 | /* | |
790 | * Set up the filesystem mount context. | |
791 | */ | |
792 | static int bpf_init_fs_context(struct fs_context *fc) | |
793 | { | |
794 | struct bpf_mount_opts *opts; | |
795 | ||
796 | opts = kzalloc(sizeof(struct bpf_mount_opts), GFP_KERNEL); | |
797 | if (!opts) | |
798 | return -ENOMEM; | |
799 | ||
800 | opts->mode = S_IRWXUGO; | |
801 | ||
802 | fc->fs_private = opts; | |
803 | fc->ops = &bpf_context_ops; | |
804 | return 0; | |
b2197755 DB |
805 | } |
806 | ||
807 | static struct file_system_type bpf_fs_type = { | |
808 | .owner = THIS_MODULE, | |
809 | .name = "bpf", | |
d2935de7 | 810 | .init_fs_context = bpf_init_fs_context, |
d7167b14 | 811 | .parameters = bpf_fs_parameters, |
b2197755 | 812 | .kill_sb = kill_litter_super, |
b2197755 DB |
813 | }; |
814 | ||
b2197755 DB |
815 | static int __init bpf_init(void) |
816 | { | |
817 | int ret; | |
818 | ||
819 | ret = sysfs_create_mount_point(fs_kobj, "bpf"); | |
820 | if (ret) | |
821 | return ret; | |
822 | ||
823 | ret = register_filesystem(&bpf_fs_type); | |
824 | if (ret) | |
825 | sysfs_remove_mount_point(fs_kobj, "bpf"); | |
826 | ||
827 | return ret; | |
828 | } | |
829 | fs_initcall(bpf_init); |