Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
b2197755 DB |
2 | /* |
3 | * Minimal file system backend for holding eBPF maps and programs, | |
4 | * used by bpf(2) object pinning. | |
5 | * | |
6 | * Authors: | |
7 | * | |
8 | * Daniel Borkmann <daniel@iogearbox.net> | |
b2197755 DB |
9 | */ |
10 | ||
a536a6e1 | 11 | #include <linux/init.h> |
b2197755 DB |
12 | #include <linux/magic.h> |
13 | #include <linux/major.h> | |
14 | #include <linux/mount.h> | |
15 | #include <linux/namei.h> | |
16 | #include <linux/fs.h> | |
d2935de7 DH |
17 | #include <linux/fs_context.h> |
18 | #include <linux/fs_parser.h> | |
b2197755 DB |
19 | #include <linux/kdev_t.h> |
20 | #include <linux/filter.h> | |
21 | #include <linux/bpf.h> | |
a67edbf4 | 22 | #include <linux/bpf_trace.h> |
d71fa5c9 | 23 | #include "preload/bpf_preload.h" |
b2197755 DB |
24 | |
25 | enum bpf_type { | |
26 | BPF_TYPE_UNSPEC = 0, | |
27 | BPF_TYPE_PROG, | |
28 | BPF_TYPE_MAP, | |
70ed506c | 29 | BPF_TYPE_LINK, |
b2197755 DB |
30 | }; |
31 | ||
32 | static void *bpf_any_get(void *raw, enum bpf_type type) | |
33 | { | |
34 | switch (type) { | |
35 | case BPF_TYPE_PROG: | |
85192dbf | 36 | bpf_prog_inc(raw); |
b2197755 DB |
37 | break; |
38 | case BPF_TYPE_MAP: | |
1e0bd5a0 | 39 | bpf_map_inc_with_uref(raw); |
b2197755 | 40 | break; |
70ed506c AN |
41 | case BPF_TYPE_LINK: |
42 | bpf_link_inc(raw); | |
43 | break; | |
b2197755 DB |
44 | default: |
45 | WARN_ON_ONCE(1); | |
46 | break; | |
47 | } | |
48 | ||
49 | return raw; | |
50 | } | |
51 | ||
52 | static void bpf_any_put(void *raw, enum bpf_type type) | |
53 | { | |
54 | switch (type) { | |
55 | case BPF_TYPE_PROG: | |
56 | bpf_prog_put(raw); | |
57 | break; | |
58 | case BPF_TYPE_MAP: | |
c9da161c | 59 | bpf_map_put_with_uref(raw); |
b2197755 | 60 | break; |
70ed506c AN |
61 | case BPF_TYPE_LINK: |
62 | bpf_link_put(raw); | |
63 | break; | |
b2197755 DB |
64 | default: |
65 | WARN_ON_ONCE(1); | |
66 | break; | |
67 | } | |
68 | } | |
69 | ||
70 | static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) | |
71 | { | |
72 | void *raw; | |
73 | ||
c9da161c | 74 | raw = bpf_map_get_with_uref(ufd); |
70ed506c AN |
75 | if (!IS_ERR(raw)) { |
76 | *type = BPF_TYPE_MAP; | |
77 | return raw; | |
78 | } | |
79 | ||
80 | raw = bpf_prog_get(ufd); | |
81 | if (!IS_ERR(raw)) { | |
b2197755 | 82 | *type = BPF_TYPE_PROG; |
70ed506c | 83 | return raw; |
b2197755 DB |
84 | } |
85 | ||
70ed506c AN |
86 | raw = bpf_link_get_from_fd(ufd); |
87 | if (!IS_ERR(raw)) { | |
88 | *type = BPF_TYPE_LINK; | |
89 | return raw; | |
90 | } | |
91 | ||
92 | return ERR_PTR(-EINVAL); | |
b2197755 DB |
93 | } |
94 | ||
95 | static const struct inode_operations bpf_dir_iops; | |
96 | ||
97 | static const struct inode_operations bpf_prog_iops = { }; | |
98 | static const struct inode_operations bpf_map_iops = { }; | |
70ed506c | 99 | static const struct inode_operations bpf_link_iops = { }; |
b2197755 DB |
100 | |
101 | static struct inode *bpf_get_inode(struct super_block *sb, | |
102 | const struct inode *dir, | |
103 | umode_t mode) | |
104 | { | |
105 | struct inode *inode; | |
106 | ||
107 | switch (mode & S_IFMT) { | |
108 | case S_IFDIR: | |
109 | case S_IFREG: | |
0f98621b | 110 | case S_IFLNK: |
b2197755 DB |
111 | break; |
112 | default: | |
113 | return ERR_PTR(-EINVAL); | |
114 | } | |
115 | ||
116 | inode = new_inode(sb); | |
117 | if (!inode) | |
118 | return ERR_PTR(-ENOSPC); | |
119 | ||
120 | inode->i_ino = get_next_ino(); | |
1bc628a7 | 121 | simple_inode_init_ts(inode); |
b2197755 | 122 | |
f2d40141 | 123 | inode_init_owner(&nop_mnt_idmap, inode, dir, mode); |
b2197755 DB |
124 | |
125 | return inode; | |
126 | } | |
127 | ||
128 | static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) | |
129 | { | |
130 | *type = BPF_TYPE_UNSPEC; | |
131 | if (inode->i_op == &bpf_prog_iops) | |
132 | *type = BPF_TYPE_PROG; | |
133 | else if (inode->i_op == &bpf_map_iops) | |
134 | *type = BPF_TYPE_MAP; | |
70ed506c AN |
135 | else if (inode->i_op == &bpf_link_iops) |
136 | *type = BPF_TYPE_LINK; | |
b2197755 DB |
137 | else |
138 | return -EACCES; | |
139 | ||
140 | return 0; | |
141 | } | |
142 | ||
0f98621b DB |
143 | static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, |
144 | struct inode *dir) | |
145 | { | |
146 | d_instantiate(dentry, inode); | |
147 | dget(dentry); | |
148 | ||
1bc628a7 | 149 | inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); |
0f98621b DB |
150 | } |
151 | ||
c54bd91e | 152 | static int bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir, |
549c7297 | 153 | struct dentry *dentry, umode_t mode) |
b2197755 DB |
154 | { |
155 | struct inode *inode; | |
156 | ||
b2197755 DB |
157 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); |
158 | if (IS_ERR(inode)) | |
159 | return PTR_ERR(inode); | |
160 | ||
161 | inode->i_op = &bpf_dir_iops; | |
162 | inode->i_fop = &simple_dir_operations; | |
163 | ||
164 | inc_nlink(inode); | |
165 | inc_nlink(dir); | |
166 | ||
0f98621b | 167 | bpf_dentry_finalize(dentry, inode, dir); |
b2197755 DB |
168 | return 0; |
169 | } | |
170 | ||
a26ca7c9 MKL |
171 | struct map_iter { |
172 | void *key; | |
173 | bool done; | |
174 | }; | |
175 | ||
176 | static struct map_iter *map_iter(struct seq_file *m) | |
177 | { | |
178 | return m->private; | |
179 | } | |
180 | ||
181 | static struct bpf_map *seq_file_to_map(struct seq_file *m) | |
182 | { | |
183 | return file_inode(m->file)->i_private; | |
184 | } | |
185 | ||
186 | static void map_iter_free(struct map_iter *iter) | |
187 | { | |
188 | if (iter) { | |
189 | kfree(iter->key); | |
190 | kfree(iter); | |
191 | } | |
192 | } | |
193 | ||
194 | static struct map_iter *map_iter_alloc(struct bpf_map *map) | |
195 | { | |
196 | struct map_iter *iter; | |
197 | ||
198 | iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN); | |
199 | if (!iter) | |
200 | goto error; | |
201 | ||
202 | iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN); | |
203 | if (!iter->key) | |
204 | goto error; | |
205 | ||
206 | return iter; | |
207 | ||
208 | error: | |
209 | map_iter_free(iter); | |
210 | return NULL; | |
211 | } | |
212 | ||
213 | static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) | |
214 | { | |
215 | struct bpf_map *map = seq_file_to_map(m); | |
216 | void *key = map_iter(m)->key; | |
dc1508a5 | 217 | void *prev_key; |
a26ca7c9 | 218 | |
90435a78 | 219 | (*pos)++; |
a26ca7c9 MKL |
220 | if (map_iter(m)->done) |
221 | return NULL; | |
222 | ||
223 | if (unlikely(v == SEQ_START_TOKEN)) | |
dc1508a5 YS |
224 | prev_key = NULL; |
225 | else | |
226 | prev_key = key; | |
a26ca7c9 | 227 | |
ce880cb8 | 228 | rcu_read_lock(); |
dc1508a5 | 229 | if (map->ops->map_get_next_key(map, prev_key, key)) { |
a26ca7c9 | 230 | map_iter(m)->done = true; |
ce880cb8 | 231 | key = NULL; |
a26ca7c9 | 232 | } |
ce880cb8 | 233 | rcu_read_unlock(); |
a26ca7c9 MKL |
234 | return key; |
235 | } | |
236 | ||
237 | static void *map_seq_start(struct seq_file *m, loff_t *pos) | |
238 | { | |
239 | if (map_iter(m)->done) | |
240 | return NULL; | |
241 | ||
242 | return *pos ? map_iter(m)->key : SEQ_START_TOKEN; | |
243 | } | |
244 | ||
245 | static void map_seq_stop(struct seq_file *m, void *v) | |
246 | { | |
247 | } | |
248 | ||
249 | static int map_seq_show(struct seq_file *m, void *v) | |
250 | { | |
251 | struct bpf_map *map = seq_file_to_map(m); | |
252 | void *key = map_iter(m)->key; | |
253 | ||
254 | if (unlikely(v == SEQ_START_TOKEN)) { | |
255 | seq_puts(m, "# WARNING!! The output is for debug purpose only\n"); | |
256 | seq_puts(m, "# WARNING!! The output format will change\n"); | |
257 | } else { | |
258 | map->ops->map_seq_show_elem(map, key, m); | |
259 | } | |
260 | ||
261 | return 0; | |
262 | } | |
263 | ||
264 | static const struct seq_operations bpffs_map_seq_ops = { | |
265 | .start = map_seq_start, | |
266 | .next = map_seq_next, | |
267 | .show = map_seq_show, | |
268 | .stop = map_seq_stop, | |
269 | }; | |
270 | ||
271 | static int bpffs_map_open(struct inode *inode, struct file *file) | |
272 | { | |
273 | struct bpf_map *map = inode->i_private; | |
274 | struct map_iter *iter; | |
275 | struct seq_file *m; | |
276 | int err; | |
277 | ||
278 | iter = map_iter_alloc(map); | |
279 | if (!iter) | |
280 | return -ENOMEM; | |
281 | ||
282 | err = seq_open(file, &bpffs_map_seq_ops); | |
283 | if (err) { | |
284 | map_iter_free(iter); | |
285 | return err; | |
286 | } | |
287 | ||
288 | m = file->private_data; | |
289 | m->private = iter; | |
290 | ||
291 | return 0; | |
292 | } | |
293 | ||
294 | static int bpffs_map_release(struct inode *inode, struct file *file) | |
295 | { | |
296 | struct seq_file *m = file->private_data; | |
297 | ||
298 | map_iter_free(map_iter(m)); | |
299 | ||
300 | return seq_release(inode, file); | |
301 | } | |
302 | ||
303 | /* bpffs_map_fops should only implement the basic | |
304 | * read operation for a BPF map. The purpose is to | |
305 | * provide a simple user intuitive way to do | |
306 | * "cat bpffs/pathto/a-pinned-map". | |
307 | * | |
308 | * Other operations (e.g. write, lookup...) should be realized by | |
309 | * the userspace tools (e.g. bpftool) through the | |
310 | * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update | |
311 | * interface. | |
312 | */ | |
313 | static const struct file_operations bpffs_map_fops = { | |
314 | .open = bpffs_map_open, | |
315 | .read = seq_read, | |
316 | .release = bpffs_map_release, | |
317 | }; | |
318 | ||
b1655857 DB |
319 | static int bpffs_obj_open(struct inode *inode, struct file *file) |
320 | { | |
321 | return -EIO; | |
322 | } | |
323 | ||
324 | static const struct file_operations bpffs_obj_fops = { | |
325 | .open = bpffs_obj_open, | |
326 | }; | |
327 | ||
a4a0683f | 328 | static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, |
a26ca7c9 MKL |
329 | const struct inode_operations *iops, |
330 | const struct file_operations *fops) | |
b2197755 | 331 | { |
a4a0683f AV |
332 | struct inode *dir = dentry->d_parent->d_inode; |
333 | struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode); | |
b2197755 DB |
334 | if (IS_ERR(inode)) |
335 | return PTR_ERR(inode); | |
336 | ||
337 | inode->i_op = iops; | |
a26ca7c9 | 338 | inode->i_fop = fops; |
a4a0683f | 339 | inode->i_private = raw; |
b2197755 | 340 | |
0f98621b | 341 | bpf_dentry_finalize(dentry, inode, dir); |
b2197755 DB |
342 | return 0; |
343 | } | |
344 | ||
a4a0683f | 345 | static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) |
b2197755 | 346 | { |
b1655857 DB |
347 | return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, |
348 | &bpffs_obj_fops); | |
a4a0683f | 349 | } |
b2197755 | 350 | |
a4a0683f AV |
351 | static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) |
352 | { | |
a26ca7c9 MKL |
353 | struct bpf_map *map = arg; |
354 | ||
355 | return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops, | |
e8d2bec0 DB |
356 | bpf_map_support_seq_show(map) ? |
357 | &bpffs_map_fops : &bpffs_obj_fops); | |
b2197755 DB |
358 | } |
359 | ||
70ed506c AN |
360 | static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg) |
361 | { | |
367ec3e4 YS |
362 | struct bpf_link *link = arg; |
363 | ||
70ed506c | 364 | return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops, |
367ec3e4 YS |
365 | bpf_link_is_iter(link) ? |
366 | &bpf_iter_fops : &bpffs_obj_fops); | |
70ed506c AN |
367 | } |
368 | ||
0c93b7d8 AV |
369 | static struct dentry * |
370 | bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) | |
bb35a6ef | 371 | { |
6d8cb045 | 372 | /* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future |
d71fa5c9 | 373 | * extensions. That allows popoulate_bpffs() create special files. |
6d8cb045 | 374 | */ |
d71fa5c9 AS |
375 | if ((dir->i_mode & S_IALLUGO) && |
376 | strchr(dentry->d_name.name, '.')) | |
0c93b7d8 | 377 | return ERR_PTR(-EPERM); |
0f98621b | 378 | |
0c93b7d8 | 379 | return simple_lookup(dir, dentry, flags); |
bb35a6ef DB |
380 | } |
381 | ||
7a77db95 | 382 | static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir, |
549c7297 | 383 | struct dentry *dentry, const char *target) |
0f98621b DB |
384 | { |
385 | char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); | |
386 | struct inode *inode; | |
387 | ||
388 | if (!link) | |
389 | return -ENOMEM; | |
390 | ||
391 | inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK); | |
392 | if (IS_ERR(inode)) { | |
393 | kfree(link); | |
394 | return PTR_ERR(inode); | |
395 | } | |
396 | ||
397 | inode->i_op = &simple_symlink_inode_operations; | |
398 | inode->i_link = link; | |
399 | ||
400 | bpf_dentry_finalize(dentry, inode, dir); | |
401 | return 0; | |
402 | } | |
403 | ||
b2197755 | 404 | static const struct inode_operations bpf_dir_iops = { |
0c93b7d8 | 405 | .lookup = bpf_lookup, |
b2197755 | 406 | .mkdir = bpf_mkdir, |
0f98621b | 407 | .symlink = bpf_symlink, |
b2197755 | 408 | .rmdir = simple_rmdir, |
0c93b7d8 AV |
409 | .rename = simple_rename, |
410 | .link = simple_link, | |
b2197755 DB |
411 | .unlink = simple_unlink, |
412 | }; | |
413 | ||
d71fa5c9 AS |
414 | /* pin iterator link into bpffs */ |
415 | static int bpf_iter_link_pin_kernel(struct dentry *parent, | |
416 | const char *name, struct bpf_link *link) | |
417 | { | |
418 | umode_t mode = S_IFREG | S_IRUSR; | |
419 | struct dentry *dentry; | |
420 | int ret; | |
421 | ||
422 | inode_lock(parent->d_inode); | |
423 | dentry = lookup_one_len(name, parent, strlen(name)); | |
424 | if (IS_ERR(dentry)) { | |
425 | inode_unlock(parent->d_inode); | |
426 | return PTR_ERR(dentry); | |
427 | } | |
428 | ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops, | |
429 | &bpf_iter_fops); | |
430 | dput(dentry); | |
431 | inode_unlock(parent->d_inode); | |
432 | return ret; | |
433 | } | |
434 | ||
cb8edce2 | 435 | static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw, |
b2197755 DB |
436 | enum bpf_type type) |
437 | { | |
438 | struct dentry *dentry; | |
439 | struct inode *dir; | |
440 | struct path path; | |
441 | umode_t mode; | |
b2197755 DB |
442 | int ret; |
443 | ||
cb8edce2 | 444 | dentry = user_path_create(path_fd, pathname, &path, 0); |
b2197755 DB |
445 | if (IS_ERR(dentry)) |
446 | return PTR_ERR(dentry); | |
447 | ||
b2197755 DB |
448 | dir = d_inode(path.dentry); |
449 | if (dir->i_op != &bpf_dir_iops) { | |
450 | ret = -EPERM; | |
451 | goto out; | |
452 | } | |
453 | ||
e7d85427 AN |
454 | mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); |
455 | ret = security_path_mknod(&path, dentry, mode, 0); | |
456 | if (ret) | |
457 | goto out; | |
458 | ||
a4a0683f AV |
459 | switch (type) { |
460 | case BPF_TYPE_PROG: | |
461 | ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw); | |
462 | break; | |
463 | case BPF_TYPE_MAP: | |
464 | ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); | |
465 | break; | |
70ed506c AN |
466 | case BPF_TYPE_LINK: |
467 | ret = vfs_mkobj(dentry, mode, bpf_mklink, raw); | |
468 | break; | |
a4a0683f AV |
469 | default: |
470 | ret = -EPERM; | |
471 | } | |
b2197755 DB |
472 | out: |
473 | done_path_create(&path, dentry); | |
474 | return ret; | |
475 | } | |
476 | ||
cb8edce2 | 477 | int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname) |
b2197755 | 478 | { |
b2197755 DB |
479 | enum bpf_type type; |
480 | void *raw; | |
481 | int ret; | |
482 | ||
b2197755 | 483 | raw = bpf_fd_probe_obj(ufd, &type); |
b87121dd AV |
484 | if (IS_ERR(raw)) |
485 | return PTR_ERR(raw); | |
b2197755 | 486 | |
cb8edce2 | 487 | ret = bpf_obj_do_pin(path_fd, pathname, raw, type); |
b2197755 DB |
488 | if (ret != 0) |
489 | bpf_any_put(raw, type); | |
b87121dd | 490 | |
b2197755 DB |
491 | return ret; |
492 | } | |
493 | ||
cb8edce2 | 494 | static void *bpf_obj_do_get(int path_fd, const char __user *pathname, |
6e71b04a | 495 | enum bpf_type *type, int flags) |
b2197755 DB |
496 | { |
497 | struct inode *inode; | |
498 | struct path path; | |
499 | void *raw; | |
500 | int ret; | |
501 | ||
cb8edce2 | 502 | ret = user_path_at(path_fd, pathname, LOOKUP_FOLLOW, &path); |
b2197755 DB |
503 | if (ret) |
504 | return ERR_PTR(ret); | |
505 | ||
506 | inode = d_backing_inode(path.dentry); | |
02f92b38 | 507 | ret = path_permission(&path, ACC_MODE(flags)); |
b2197755 DB |
508 | if (ret) |
509 | goto out; | |
510 | ||
511 | ret = bpf_inode_type(inode, type); | |
512 | if (ret) | |
513 | goto out; | |
514 | ||
515 | raw = bpf_any_get(inode->i_private, *type); | |
92117d84 AS |
516 | if (!IS_ERR(raw)) |
517 | touch_atime(&path); | |
b2197755 DB |
518 | |
519 | path_put(&path); | |
520 | return raw; | |
521 | out: | |
522 | path_put(&path); | |
523 | return ERR_PTR(ret); | |
524 | } | |
525 | ||
cb8edce2 | 526 | int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags) |
b2197755 DB |
527 | { |
528 | enum bpf_type type = BPF_TYPE_UNSPEC; | |
6e71b04a | 529 | int f_flags; |
b2197755 | 530 | void *raw; |
b87121dd | 531 | int ret; |
b2197755 | 532 | |
6e71b04a CF |
533 | f_flags = bpf_get_file_flag(flags); |
534 | if (f_flags < 0) | |
535 | return f_flags; | |
536 | ||
cb8edce2 | 537 | raw = bpf_obj_do_get(path_fd, pathname, &type, f_flags); |
b87121dd AV |
538 | if (IS_ERR(raw)) |
539 | return PTR_ERR(raw); | |
b2197755 DB |
540 | |
541 | if (type == BPF_TYPE_PROG) | |
5dec6d96 | 542 | ret = bpf_prog_new_fd(raw); |
b2197755 | 543 | else if (type == BPF_TYPE_MAP) |
6e71b04a | 544 | ret = bpf_map_new_fd(raw, f_flags); |
70ed506c | 545 | else if (type == BPF_TYPE_LINK) |
25fc94b2 | 546 | ret = (f_flags != O_RDWR) ? -EINVAL : bpf_link_new_fd(raw); |
b2197755 | 547 | else |
b87121dd | 548 | return -ENOENT; |
b2197755 | 549 | |
4d220ed0 | 550 | if (ret < 0) |
b2197755 | 551 | bpf_any_put(raw, type); |
b2197755 DB |
552 | return ret; |
553 | } | |
040ee692 AV |
554 | |
555 | static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) | |
556 | { | |
557 | struct bpf_prog *prog; | |
4609e1f1 | 558 | int ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ); |
040ee692 AV |
559 | if (ret) |
560 | return ERR_PTR(ret); | |
561 | ||
562 | if (inode->i_op == &bpf_map_iops) | |
563 | return ERR_PTR(-EINVAL); | |
70ed506c AN |
564 | if (inode->i_op == &bpf_link_iops) |
565 | return ERR_PTR(-EINVAL); | |
040ee692 AV |
566 | if (inode->i_op != &bpf_prog_iops) |
567 | return ERR_PTR(-EACCES); | |
568 | ||
569 | prog = inode->i_private; | |
570 | ||
571 | ret = security_bpf_prog(prog); | |
572 | if (ret < 0) | |
573 | return ERR_PTR(ret); | |
574 | ||
575 | if (!bpf_prog_get_ok(prog, &type, false)) | |
576 | return ERR_PTR(-EINVAL); | |
577 | ||
85192dbf AN |
578 | bpf_prog_inc(prog); |
579 | return prog; | |
040ee692 AV |
580 | } |
581 | ||
582 | struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) | |
583 | { | |
584 | struct bpf_prog *prog; | |
585 | struct path path; | |
586 | int ret = kern_path(name, LOOKUP_FOLLOW, &path); | |
587 | if (ret) | |
588 | return ERR_PTR(ret); | |
589 | prog = __get_prog_inode(d_backing_inode(path.dentry), type); | |
590 | if (!IS_ERR(prog)) | |
591 | touch_atime(&path); | |
592 | path_put(&path); | |
593 | return prog; | |
594 | } | |
595 | EXPORT_SYMBOL(bpf_prog_get_type_path); | |
b2197755 | 596 | |
4cc7c186 DH |
597 | /* |
598 | * Display the mount options in /proc/mounts. | |
599 | */ | |
600 | static int bpf_show_options(struct seq_file *m, struct dentry *root) | |
601 | { | |
602 | umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX; | |
603 | ||
604 | if (mode != S_IRWXUGO) | |
605 | seq_printf(m, ",mode=%o", mode); | |
606 | return 0; | |
607 | } | |
608 | ||
524845ff | 609 | static void bpf_free_inode(struct inode *inode) |
1da6c4d9 | 610 | { |
1da6c4d9 DB |
611 | enum bpf_type type; |
612 | ||
613 | if (S_ISLNK(inode->i_mode)) | |
614 | kfree(inode->i_link); | |
615 | if (!bpf_inode_type(inode, &type)) | |
616 | bpf_any_put(inode->i_private, type); | |
617 | free_inode_nonrcu(inode); | |
618 | } | |
619 | ||
b2197755 DB |
620 | static const struct super_operations bpf_super_ops = { |
621 | .statfs = simple_statfs, | |
622 | .drop_inode = generic_delete_inode, | |
4cc7c186 | 623 | .show_options = bpf_show_options, |
524845ff | 624 | .free_inode = bpf_free_inode, |
b2197755 DB |
625 | }; |
626 | ||
a3af5f80 DB |
627 | enum { |
628 | OPT_MODE, | |
a3af5f80 DB |
629 | }; |
630 | ||
d7167b14 | 631 | static const struct fs_parameter_spec bpf_fs_parameters[] = { |
d2935de7 DH |
632 | fsparam_u32oct ("mode", OPT_MODE), |
633 | {} | |
634 | }; | |
635 | ||
a3af5f80 DB |
636 | struct bpf_mount_opts { |
637 | umode_t mode; | |
638 | }; | |
639 | ||
d2935de7 | 640 | static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) |
a3af5f80 | 641 | { |
d2935de7 DH |
642 | struct bpf_mount_opts *opts = fc->fs_private; |
643 | struct fs_parse_result result; | |
644 | int opt; | |
a3af5f80 | 645 | |
d7167b14 | 646 | opt = fs_parse(fc, bpf_fs_parameters, param, &result); |
1e9d7466 | 647 | if (opt < 0) { |
a3af5f80 DB |
648 | /* We might like to report bad mount options here, but |
649 | * traditionally we've ignored all mount options, so we'd | |
650 | * better continue to ignore non-existing options for bpf. | |
651 | */ | |
1e9d7466 YS |
652 | if (opt == -ENOPARAM) { |
653 | opt = vfs_parse_fs_param_source(fc, param); | |
654 | if (opt != -ENOPARAM) | |
655 | return opt; | |
656 | ||
657 | return 0; | |
658 | } | |
659 | ||
660 | if (opt < 0) | |
661 | return opt; | |
662 | } | |
d2935de7 DH |
663 | |
664 | switch (opt) { | |
665 | case OPT_MODE: | |
666 | opts->mode = result.uint_32 & S_IALLUGO; | |
667 | break; | |
a3af5f80 DB |
668 | } |
669 | ||
670 | return 0; | |
671 | } | |
672 | ||
d71fa5c9 AS |
673 | struct bpf_preload_ops *bpf_preload_ops; |
674 | EXPORT_SYMBOL_GPL(bpf_preload_ops); | |
675 | ||
676 | static bool bpf_preload_mod_get(void) | |
677 | { | |
678 | /* If bpf_preload.ko wasn't loaded earlier then load it now. | |
679 | * When bpf_preload is built into vmlinux the module's __init | |
680 | * function will populate it. | |
681 | */ | |
682 | if (!bpf_preload_ops) { | |
683 | request_module("bpf_preload"); | |
684 | if (!bpf_preload_ops) | |
685 | return false; | |
686 | } | |
687 | /* And grab the reference, so the module doesn't disappear while the | |
688 | * kernel is interacting with the kernel module and its UMD. | |
689 | */ | |
690 | if (!try_module_get(bpf_preload_ops->owner)) { | |
691 | pr_err("bpf_preload module get failed.\n"); | |
692 | return false; | |
693 | } | |
694 | return true; | |
695 | } | |
696 | ||
697 | static void bpf_preload_mod_put(void) | |
698 | { | |
699 | if (bpf_preload_ops) | |
700 | /* now user can "rmmod bpf_preload" if necessary */ | |
701 | module_put(bpf_preload_ops->owner); | |
702 | } | |
703 | ||
704 | static DEFINE_MUTEX(bpf_preload_lock); | |
705 | ||
706 | static int populate_bpffs(struct dentry *parent) | |
707 | { | |
708 | struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {}; | |
d71fa5c9 AS |
709 | int err = 0, i; |
710 | ||
711 | /* grab the mutex to make sure the kernel interactions with bpf_preload | |
cb80ddc6 | 712 | * are serialized |
d71fa5c9 AS |
713 | */ |
714 | mutex_lock(&bpf_preload_lock); | |
715 | ||
716 | /* if bpf_preload.ko wasn't built into vmlinux then load it */ | |
717 | if (!bpf_preload_mod_get()) | |
718 | goto out; | |
719 | ||
cb80ddc6 AS |
720 | err = bpf_preload_ops->preload(objs); |
721 | if (err) | |
722 | goto out_put; | |
723 | for (i = 0; i < BPF_PRELOAD_LINKS; i++) { | |
724 | bpf_link_inc(objs[i].link); | |
725 | err = bpf_iter_link_pin_kernel(parent, | |
726 | objs[i].link_name, objs[i].link); | |
727 | if (err) { | |
728 | bpf_link_put(objs[i].link); | |
d71fa5c9 | 729 | goto out_put; |
d71fa5c9 | 730 | } |
d71fa5c9 AS |
731 | } |
732 | out_put: | |
733 | bpf_preload_mod_put(); | |
734 | out: | |
735 | mutex_unlock(&bpf_preload_lock); | |
d71fa5c9 AS |
736 | return err; |
737 | } | |
738 | ||
d2935de7 | 739 | static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) |
b2197755 | 740 | { |
cda37124 | 741 | static const struct tree_descr bpf_rfiles[] = { { "" } }; |
d2935de7 | 742 | struct bpf_mount_opts *opts = fc->fs_private; |
b2197755 DB |
743 | struct inode *inode; |
744 | int ret; | |
745 | ||
746 | ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); | |
747 | if (ret) | |
748 | return ret; | |
749 | ||
750 | sb->s_op = &bpf_super_ops; | |
751 | ||
752 | inode = sb->s_root->d_inode; | |
753 | inode->i_op = &bpf_dir_iops; | |
754 | inode->i_mode &= ~S_IALLUGO; | |
d71fa5c9 | 755 | populate_bpffs(sb->s_root); |
d2935de7 | 756 | inode->i_mode |= S_ISVTX | opts->mode; |
b2197755 DB |
757 | return 0; |
758 | } | |
759 | ||
d2935de7 DH |
760 | static int bpf_get_tree(struct fs_context *fc) |
761 | { | |
762 | return get_tree_nodev(fc, bpf_fill_super); | |
763 | } | |
764 | ||
765 | static void bpf_free_fc(struct fs_context *fc) | |
b2197755 | 766 | { |
d2935de7 DH |
767 | kfree(fc->fs_private); |
768 | } | |
769 | ||
770 | static const struct fs_context_operations bpf_context_ops = { | |
771 | .free = bpf_free_fc, | |
772 | .parse_param = bpf_parse_param, | |
773 | .get_tree = bpf_get_tree, | |
774 | }; | |
775 | ||
776 | /* | |
777 | * Set up the filesystem mount context. | |
778 | */ | |
779 | static int bpf_init_fs_context(struct fs_context *fc) | |
780 | { | |
781 | struct bpf_mount_opts *opts; | |
782 | ||
783 | opts = kzalloc(sizeof(struct bpf_mount_opts), GFP_KERNEL); | |
784 | if (!opts) | |
785 | return -ENOMEM; | |
786 | ||
787 | opts->mode = S_IRWXUGO; | |
788 | ||
789 | fc->fs_private = opts; | |
790 | fc->ops = &bpf_context_ops; | |
791 | return 0; | |
b2197755 DB |
792 | } |
793 | ||
794 | static struct file_system_type bpf_fs_type = { | |
795 | .owner = THIS_MODULE, | |
796 | .name = "bpf", | |
d2935de7 | 797 | .init_fs_context = bpf_init_fs_context, |
d7167b14 | 798 | .parameters = bpf_fs_parameters, |
b2197755 | 799 | .kill_sb = kill_litter_super, |
b2197755 DB |
800 | }; |
801 | ||
b2197755 DB |
802 | static int __init bpf_init(void) |
803 | { | |
804 | int ret; | |
805 | ||
806 | ret = sysfs_create_mount_point(fs_kobj, "bpf"); | |
807 | if (ret) | |
808 | return ret; | |
809 | ||
810 | ret = register_filesystem(&bpf_fs_type); | |
811 | if (ret) | |
812 | sysfs_remove_mount_point(fs_kobj, "bpf"); | |
813 | ||
814 | return ret; | |
815 | } | |
816 | fs_initcall(bpf_init); |