bpf: introduce percpu_freelist
[linux-2.6-block.git] / kernel / bpf / syscall.c
CommitLineData
99c55f7d
AS
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12#include <linux/bpf.h>
13#include <linux/syscalls.h>
14#include <linux/slab.h>
15#include <linux/anon_inodes.h>
db20fd2b 16#include <linux/file.h>
09756af4
AS
17#include <linux/license.h>
18#include <linux/filter.h>
2541517c 19#include <linux/version.h>
99c55f7d 20
b121d1e7
AS
21DEFINE_PER_CPU(int, bpf_prog_active);
22
1be7f75d
AS
23int sysctl_unprivileged_bpf_disabled __read_mostly;
24
99c55f7d
AS
25static LIST_HEAD(bpf_map_types);
26
27static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
28{
29 struct bpf_map_type_list *tl;
30 struct bpf_map *map;
31
32 list_for_each_entry(tl, &bpf_map_types, list_node) {
33 if (tl->type == attr->map_type) {
34 map = tl->ops->map_alloc(attr);
35 if (IS_ERR(map))
36 return map;
37 map->ops = tl->ops;
38 map->map_type = attr->map_type;
39 return map;
40 }
41 }
42 return ERR_PTR(-EINVAL);
43}
44
45/* boot time registration of different map implementations */
46void bpf_register_map_type(struct bpf_map_type_list *tl)
47{
48 list_add(&tl->list_node, &bpf_map_types);
49}
50
aaac3ba9
AS
51static int bpf_map_charge_memlock(struct bpf_map *map)
52{
53 struct user_struct *user = get_current_user();
54 unsigned long memlock_limit;
55
56 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
57
58 atomic_long_add(map->pages, &user->locked_vm);
59
60 if (atomic_long_read(&user->locked_vm) > memlock_limit) {
61 atomic_long_sub(map->pages, &user->locked_vm);
62 free_uid(user);
63 return -EPERM;
64 }
65 map->user = user;
66 return 0;
67}
68
69static void bpf_map_uncharge_memlock(struct bpf_map *map)
70{
71 struct user_struct *user = map->user;
72
73 atomic_long_sub(map->pages, &user->locked_vm);
74 free_uid(user);
75}
76
99c55f7d
AS
77/* called from workqueue */
78static void bpf_map_free_deferred(struct work_struct *work)
79{
80 struct bpf_map *map = container_of(work, struct bpf_map, work);
81
aaac3ba9 82 bpf_map_uncharge_memlock(map);
99c55f7d
AS
83 /* implementation dependent freeing */
84 map->ops->map_free(map);
85}
86
c9da161c
DB
87static void bpf_map_put_uref(struct bpf_map *map)
88{
89 if (atomic_dec_and_test(&map->usercnt)) {
90 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
91 bpf_fd_array_map_clear(map);
92 }
93}
94
99c55f7d
AS
95/* decrement map refcnt and schedule it for freeing via workqueue
96 * (unrelying map implementation ops->map_free() might sleep)
97 */
98void bpf_map_put(struct bpf_map *map)
99{
100 if (atomic_dec_and_test(&map->refcnt)) {
101 INIT_WORK(&map->work, bpf_map_free_deferred);
102 schedule_work(&map->work);
103 }
104}
105
c9da161c 106void bpf_map_put_with_uref(struct bpf_map *map)
99c55f7d 107{
c9da161c 108 bpf_map_put_uref(map);
99c55f7d 109 bpf_map_put(map);
c9da161c
DB
110}
111
112static int bpf_map_release(struct inode *inode, struct file *filp)
113{
114 bpf_map_put_with_uref(filp->private_data);
99c55f7d
AS
115 return 0;
116}
117
f99bf205
DB
118#ifdef CONFIG_PROC_FS
119static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
120{
121 const struct bpf_map *map = filp->private_data;
122
123 seq_printf(m,
124 "map_type:\t%u\n"
125 "key_size:\t%u\n"
126 "value_size:\t%u\n"
127 "max_entries:\t%u\n",
128 map->map_type,
129 map->key_size,
130 map->value_size,
131 map->max_entries);
132}
133#endif
134
99c55f7d 135static const struct file_operations bpf_map_fops = {
f99bf205
DB
136#ifdef CONFIG_PROC_FS
137 .show_fdinfo = bpf_map_show_fdinfo,
138#endif
139 .release = bpf_map_release,
99c55f7d
AS
140};
141
b2197755 142int bpf_map_new_fd(struct bpf_map *map)
aa79781b
DB
143{
144 return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
145 O_RDWR | O_CLOEXEC);
146}
147
99c55f7d
AS
148/* helper macro to check that unused fields 'union bpf_attr' are zero */
149#define CHECK_ATTR(CMD) \
150 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
151 sizeof(attr->CMD##_LAST_FIELD), 0, \
152 sizeof(*attr) - \
153 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
154 sizeof(attr->CMD##_LAST_FIELD)) != NULL
155
156#define BPF_MAP_CREATE_LAST_FIELD max_entries
157/* called via syscall */
158static int map_create(union bpf_attr *attr)
159{
160 struct bpf_map *map;
161 int err;
162
163 err = CHECK_ATTR(BPF_MAP_CREATE);
164 if (err)
165 return -EINVAL;
166
167 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
168 map = find_and_alloc_map(attr);
169 if (IS_ERR(map))
170 return PTR_ERR(map);
171
172 atomic_set(&map->refcnt, 1);
c9da161c 173 atomic_set(&map->usercnt, 1);
99c55f7d 174
aaac3ba9
AS
175 err = bpf_map_charge_memlock(map);
176 if (err)
177 goto free_map;
178
aa79781b 179 err = bpf_map_new_fd(map);
99c55f7d
AS
180 if (err < 0)
181 /* failed to allocate fd */
182 goto free_map;
183
184 return err;
185
186free_map:
187 map->ops->map_free(map);
188 return err;
189}
190
db20fd2b
AS
191/* if error is returned, fd is released.
192 * On success caller should complete fd access with matching fdput()
193 */
c2101297 194struct bpf_map *__bpf_map_get(struct fd f)
db20fd2b 195{
db20fd2b
AS
196 if (!f.file)
197 return ERR_PTR(-EBADF);
db20fd2b
AS
198 if (f.file->f_op != &bpf_map_fops) {
199 fdput(f);
200 return ERR_PTR(-EINVAL);
201 }
202
c2101297
DB
203 return f.file->private_data;
204}
205
c9da161c
DB
206void bpf_map_inc(struct bpf_map *map, bool uref)
207{
208 atomic_inc(&map->refcnt);
209 if (uref)
210 atomic_inc(&map->usercnt);
211}
212
213struct bpf_map *bpf_map_get_with_uref(u32 ufd)
c2101297
DB
214{
215 struct fd f = fdget(ufd);
216 struct bpf_map *map;
217
218 map = __bpf_map_get(f);
219 if (IS_ERR(map))
220 return map;
221
c9da161c 222 bpf_map_inc(map, true);
c2101297 223 fdput(f);
db20fd2b
AS
224
225 return map;
226}
227
228/* helper to convert user pointers passed inside __aligned_u64 fields */
229static void __user *u64_to_ptr(__u64 val)
230{
231 return (void __user *) (unsigned long) val;
232}
233
234/* last field in 'union bpf_attr' used by this command */
235#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
236
237static int map_lookup_elem(union bpf_attr *attr)
238{
239 void __user *ukey = u64_to_ptr(attr->key);
240 void __user *uvalue = u64_to_ptr(attr->value);
241 int ufd = attr->map_fd;
db20fd2b 242 struct bpf_map *map;
8ebe667c 243 void *key, *value, *ptr;
15a07b33 244 u32 value_size;
592867bf 245 struct fd f;
db20fd2b
AS
246 int err;
247
248 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
249 return -EINVAL;
250
592867bf 251 f = fdget(ufd);
c2101297 252 map = __bpf_map_get(f);
db20fd2b
AS
253 if (IS_ERR(map))
254 return PTR_ERR(map);
255
256 err = -ENOMEM;
257 key = kmalloc(map->key_size, GFP_USER);
258 if (!key)
259 goto err_put;
260
261 err = -EFAULT;
262 if (copy_from_user(key, ukey, map->key_size) != 0)
263 goto free_key;
264
15a07b33
AS
265 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
266 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
267 value_size = round_up(map->value_size, 8) * num_possible_cpus();
268 else
269 value_size = map->value_size;
270
8ebe667c 271 err = -ENOMEM;
15a07b33 272 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
db20fd2b 273 if (!value)
8ebe667c
AS
274 goto free_key;
275
15a07b33
AS
276 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
277 err = bpf_percpu_hash_copy(map, key, value);
278 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
279 err = bpf_percpu_array_copy(map, key, value);
280 } else {
281 rcu_read_lock();
282 ptr = map->ops->map_lookup_elem(map, key);
283 if (ptr)
284 memcpy(value, ptr, value_size);
285 rcu_read_unlock();
286 err = ptr ? 0 : -ENOENT;
287 }
8ebe667c 288
15a07b33 289 if (err)
8ebe667c 290 goto free_value;
db20fd2b
AS
291
292 err = -EFAULT;
15a07b33 293 if (copy_to_user(uvalue, value, value_size) != 0)
8ebe667c 294 goto free_value;
db20fd2b
AS
295
296 err = 0;
297
8ebe667c
AS
298free_value:
299 kfree(value);
db20fd2b
AS
300free_key:
301 kfree(key);
302err_put:
303 fdput(f);
304 return err;
305}
306
3274f520 307#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
db20fd2b
AS
308
309static int map_update_elem(union bpf_attr *attr)
310{
311 void __user *ukey = u64_to_ptr(attr->key);
312 void __user *uvalue = u64_to_ptr(attr->value);
313 int ufd = attr->map_fd;
db20fd2b
AS
314 struct bpf_map *map;
315 void *key, *value;
15a07b33 316 u32 value_size;
592867bf 317 struct fd f;
db20fd2b
AS
318 int err;
319
320 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
321 return -EINVAL;
322
592867bf 323 f = fdget(ufd);
c2101297 324 map = __bpf_map_get(f);
db20fd2b
AS
325 if (IS_ERR(map))
326 return PTR_ERR(map);
327
328 err = -ENOMEM;
329 key = kmalloc(map->key_size, GFP_USER);
330 if (!key)
331 goto err_put;
332
333 err = -EFAULT;
334 if (copy_from_user(key, ukey, map->key_size) != 0)
335 goto free_key;
336
15a07b33
AS
337 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
338 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
339 value_size = round_up(map->value_size, 8) * num_possible_cpus();
340 else
341 value_size = map->value_size;
342
db20fd2b 343 err = -ENOMEM;
15a07b33 344 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
db20fd2b
AS
345 if (!value)
346 goto free_key;
347
348 err = -EFAULT;
15a07b33 349 if (copy_from_user(value, uvalue, value_size) != 0)
db20fd2b
AS
350 goto free_value;
351
b121d1e7
AS
352 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
353 * inside bpf map update or delete otherwise deadlocks are possible
354 */
355 preempt_disable();
356 __this_cpu_inc(bpf_prog_active);
15a07b33
AS
357 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
358 err = bpf_percpu_hash_update(map, key, value, attr->flags);
359 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
360 err = bpf_percpu_array_update(map, key, value, attr->flags);
361 } else {
362 rcu_read_lock();
363 err = map->ops->map_update_elem(map, key, value, attr->flags);
364 rcu_read_unlock();
365 }
b121d1e7
AS
366 __this_cpu_dec(bpf_prog_active);
367 preempt_enable();
db20fd2b
AS
368
369free_value:
370 kfree(value);
371free_key:
372 kfree(key);
373err_put:
374 fdput(f);
375 return err;
376}
377
378#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
379
380static int map_delete_elem(union bpf_attr *attr)
381{
382 void __user *ukey = u64_to_ptr(attr->key);
383 int ufd = attr->map_fd;
db20fd2b 384 struct bpf_map *map;
592867bf 385 struct fd f;
db20fd2b
AS
386 void *key;
387 int err;
388
389 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
390 return -EINVAL;
391
592867bf 392 f = fdget(ufd);
c2101297 393 map = __bpf_map_get(f);
db20fd2b
AS
394 if (IS_ERR(map))
395 return PTR_ERR(map);
396
397 err = -ENOMEM;
398 key = kmalloc(map->key_size, GFP_USER);
399 if (!key)
400 goto err_put;
401
402 err = -EFAULT;
403 if (copy_from_user(key, ukey, map->key_size) != 0)
404 goto free_key;
405
b121d1e7
AS
406 preempt_disable();
407 __this_cpu_inc(bpf_prog_active);
db20fd2b
AS
408 rcu_read_lock();
409 err = map->ops->map_delete_elem(map, key);
410 rcu_read_unlock();
b121d1e7
AS
411 __this_cpu_dec(bpf_prog_active);
412 preempt_enable();
db20fd2b
AS
413
414free_key:
415 kfree(key);
416err_put:
417 fdput(f);
418 return err;
419}
420
421/* last field in 'union bpf_attr' used by this command */
422#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
423
424static int map_get_next_key(union bpf_attr *attr)
425{
426 void __user *ukey = u64_to_ptr(attr->key);
427 void __user *unext_key = u64_to_ptr(attr->next_key);
428 int ufd = attr->map_fd;
db20fd2b
AS
429 struct bpf_map *map;
430 void *key, *next_key;
592867bf 431 struct fd f;
db20fd2b
AS
432 int err;
433
434 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
435 return -EINVAL;
436
592867bf 437 f = fdget(ufd);
c2101297 438 map = __bpf_map_get(f);
db20fd2b
AS
439 if (IS_ERR(map))
440 return PTR_ERR(map);
441
442 err = -ENOMEM;
443 key = kmalloc(map->key_size, GFP_USER);
444 if (!key)
445 goto err_put;
446
447 err = -EFAULT;
448 if (copy_from_user(key, ukey, map->key_size) != 0)
449 goto free_key;
450
451 err = -ENOMEM;
452 next_key = kmalloc(map->key_size, GFP_USER);
453 if (!next_key)
454 goto free_key;
455
456 rcu_read_lock();
457 err = map->ops->map_get_next_key(map, key, next_key);
458 rcu_read_unlock();
459 if (err)
460 goto free_next_key;
461
462 err = -EFAULT;
463 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
464 goto free_next_key;
465
466 err = 0;
467
468free_next_key:
469 kfree(next_key);
470free_key:
471 kfree(key);
472err_put:
473 fdput(f);
474 return err;
475}
476
09756af4
AS
477static LIST_HEAD(bpf_prog_types);
478
479static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
480{
481 struct bpf_prog_type_list *tl;
482
483 list_for_each_entry(tl, &bpf_prog_types, list_node) {
484 if (tl->type == type) {
485 prog->aux->ops = tl->ops;
24701ece 486 prog->type = type;
09756af4
AS
487 return 0;
488 }
489 }
24701ece 490
09756af4
AS
491 return -EINVAL;
492}
493
494void bpf_register_prog_type(struct bpf_prog_type_list *tl)
495{
496 list_add(&tl->list_node, &bpf_prog_types);
497}
498
0a542a86
AS
499/* fixup insn->imm field of bpf_call instructions:
500 * if (insn->imm == BPF_FUNC_map_lookup_elem)
501 * insn->imm = bpf_map_lookup_elem - __bpf_call_base;
502 * else if (insn->imm == BPF_FUNC_map_update_elem)
503 * insn->imm = bpf_map_update_elem - __bpf_call_base;
504 * else ...
505 *
506 * this function is called after eBPF program passed verification
507 */
508static void fixup_bpf_calls(struct bpf_prog *prog)
509{
510 const struct bpf_func_proto *fn;
511 int i;
512
513 for (i = 0; i < prog->len; i++) {
514 struct bpf_insn *insn = &prog->insnsi[i];
515
516 if (insn->code == (BPF_JMP | BPF_CALL)) {
517 /* we reach here when program has bpf_call instructions
518 * and it passed bpf_check(), means that
519 * ops->get_func_proto must have been supplied, check it
520 */
521 BUG_ON(!prog->aux->ops->get_func_proto);
522
c46646d0
DB
523 if (insn->imm == BPF_FUNC_get_route_realm)
524 prog->dst_needed = 1;
3ad00405
DB
525 if (insn->imm == BPF_FUNC_get_prandom_u32)
526 bpf_user_rnd_init_once();
04fd61ab
AS
527 if (insn->imm == BPF_FUNC_tail_call) {
528 /* mark bpf_tail_call as different opcode
529 * to avoid conditional branch in
530 * interpeter for every normal call
531 * and to prevent accidental JITing by
532 * JIT compiler that doesn't support
533 * bpf_tail_call yet
534 */
535 insn->imm = 0;
536 insn->code |= BPF_X;
537 continue;
538 }
539
0a542a86
AS
540 fn = prog->aux->ops->get_func_proto(insn->imm);
541 /* all functions that have prototype and verifier allowed
542 * programs to call them, must be real in-kernel functions
543 */
544 BUG_ON(!fn->func);
545 insn->imm = fn->func - __bpf_call_base;
546 }
547 }
548}
549
09756af4
AS
550/* drop refcnt on maps used by eBPF program and free auxilary data */
551static void free_used_maps(struct bpf_prog_aux *aux)
552{
553 int i;
554
555 for (i = 0; i < aux->used_map_cnt; i++)
556 bpf_map_put(aux->used_maps[i]);
557
558 kfree(aux->used_maps);
559}
560
aaac3ba9
AS
561static int bpf_prog_charge_memlock(struct bpf_prog *prog)
562{
563 struct user_struct *user = get_current_user();
564 unsigned long memlock_limit;
565
566 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
567
568 atomic_long_add(prog->pages, &user->locked_vm);
569 if (atomic_long_read(&user->locked_vm) > memlock_limit) {
570 atomic_long_sub(prog->pages, &user->locked_vm);
571 free_uid(user);
572 return -EPERM;
573 }
574 prog->aux->user = user;
575 return 0;
576}
577
578static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
579{
580 struct user_struct *user = prog->aux->user;
581
582 atomic_long_sub(prog->pages, &user->locked_vm);
583 free_uid(user);
584}
585
e9d8afa9 586static void __prog_put_common(struct rcu_head *rcu)
abf2e7d6
AS
587{
588 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
589
590 free_used_maps(aux);
aaac3ba9 591 bpf_prog_uncharge_memlock(aux->prog);
abf2e7d6
AS
592 bpf_prog_free(aux->prog);
593}
594
595/* version of bpf_prog_put() that is called after a grace period */
596void bpf_prog_put_rcu(struct bpf_prog *prog)
597{
e9d8afa9
DB
598 if (atomic_dec_and_test(&prog->aux->refcnt))
599 call_rcu(&prog->aux->rcu, __prog_put_common);
abf2e7d6
AS
600}
601
09756af4
AS
602void bpf_prog_put(struct bpf_prog *prog)
603{
e9d8afa9
DB
604 if (atomic_dec_and_test(&prog->aux->refcnt))
605 __prog_put_common(&prog->aux->rcu);
09756af4 606}
e2e9b654 607EXPORT_SYMBOL_GPL(bpf_prog_put);
09756af4
AS
608
609static int bpf_prog_release(struct inode *inode, struct file *filp)
610{
611 struct bpf_prog *prog = filp->private_data;
612
abf2e7d6 613 bpf_prog_put_rcu(prog);
09756af4
AS
614 return 0;
615}
616
617static const struct file_operations bpf_prog_fops = {
618 .release = bpf_prog_release,
619};
620
b2197755 621int bpf_prog_new_fd(struct bpf_prog *prog)
aa79781b
DB
622{
623 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
624 O_RDWR | O_CLOEXEC);
625}
626
c2101297 627static struct bpf_prog *__bpf_prog_get(struct fd f)
09756af4 628{
09756af4
AS
629 if (!f.file)
630 return ERR_PTR(-EBADF);
09756af4
AS
631 if (f.file->f_op != &bpf_prog_fops) {
632 fdput(f);
633 return ERR_PTR(-EINVAL);
634 }
635
c2101297 636 return f.file->private_data;
09756af4
AS
637}
638
639/* called by sockets/tracing/seccomp before attaching program to an event
640 * pairs with bpf_prog_put()
641 */
642struct bpf_prog *bpf_prog_get(u32 ufd)
643{
644 struct fd f = fdget(ufd);
645 struct bpf_prog *prog;
646
c2101297 647 prog = __bpf_prog_get(f);
09756af4
AS
648 if (IS_ERR(prog))
649 return prog;
650
651 atomic_inc(&prog->aux->refcnt);
652 fdput(f);
c2101297 653
09756af4
AS
654 return prog;
655}
e2e9b654 656EXPORT_SYMBOL_GPL(bpf_prog_get);
09756af4
AS
657
658/* last field in 'union bpf_attr' used by this command */
2541517c 659#define BPF_PROG_LOAD_LAST_FIELD kern_version
09756af4
AS
660
661static int bpf_prog_load(union bpf_attr *attr)
662{
663 enum bpf_prog_type type = attr->prog_type;
664 struct bpf_prog *prog;
665 int err;
666 char license[128];
667 bool is_gpl;
668
669 if (CHECK_ATTR(BPF_PROG_LOAD))
670 return -EINVAL;
671
672 /* copy eBPF program license from user space */
673 if (strncpy_from_user(license, u64_to_ptr(attr->license),
674 sizeof(license) - 1) < 0)
675 return -EFAULT;
676 license[sizeof(license) - 1] = 0;
677
678 /* eBPF programs must be GPL compatible to use GPL-ed functions */
679 is_gpl = license_is_gpl_compatible(license);
680
681 if (attr->insn_cnt >= BPF_MAXINSNS)
682 return -EINVAL;
683
2541517c
AS
684 if (type == BPF_PROG_TYPE_KPROBE &&
685 attr->kern_version != LINUX_VERSION_CODE)
686 return -EINVAL;
687
1be7f75d
AS
688 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
689 return -EPERM;
690
09756af4
AS
691 /* plain bpf_prog allocation */
692 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
693 if (!prog)
694 return -ENOMEM;
695
aaac3ba9
AS
696 err = bpf_prog_charge_memlock(prog);
697 if (err)
698 goto free_prog_nouncharge;
699
09756af4
AS
700 prog->len = attr->insn_cnt;
701
702 err = -EFAULT;
703 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
704 prog->len * sizeof(struct bpf_insn)) != 0)
705 goto free_prog;
706
707 prog->orig_prog = NULL;
a91263d5 708 prog->jited = 0;
09756af4
AS
709
710 atomic_set(&prog->aux->refcnt, 1);
a91263d5 711 prog->gpl_compatible = is_gpl ? 1 : 0;
09756af4
AS
712
713 /* find program type: socket_filter vs tracing_filter */
714 err = find_prog_type(type, prog);
715 if (err < 0)
716 goto free_prog;
717
718 /* run eBPF verifier */
9bac3d6d 719 err = bpf_check(&prog, attr);
09756af4
AS
720 if (err < 0)
721 goto free_used_maps;
722
0a542a86
AS
723 /* fixup BPF_CALL->imm field */
724 fixup_bpf_calls(prog);
725
09756af4 726 /* eBPF program is ready to be JITed */
04fd61ab
AS
727 err = bpf_prog_select_runtime(prog);
728 if (err < 0)
729 goto free_used_maps;
09756af4 730
aa79781b 731 err = bpf_prog_new_fd(prog);
09756af4
AS
732 if (err < 0)
733 /* failed to allocate fd */
734 goto free_used_maps;
735
736 return err;
737
738free_used_maps:
739 free_used_maps(prog->aux);
740free_prog:
aaac3ba9
AS
741 bpf_prog_uncharge_memlock(prog);
742free_prog_nouncharge:
09756af4
AS
743 bpf_prog_free(prog);
744 return err;
745}
746
b2197755
DB
747#define BPF_OBJ_LAST_FIELD bpf_fd
748
749static int bpf_obj_pin(const union bpf_attr *attr)
750{
751 if (CHECK_ATTR(BPF_OBJ))
752 return -EINVAL;
753
754 return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
755}
756
757static int bpf_obj_get(const union bpf_attr *attr)
758{
759 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
760 return -EINVAL;
761
762 return bpf_obj_get_user(u64_to_ptr(attr->pathname));
763}
764
99c55f7d
AS
765SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
766{
767 union bpf_attr attr = {};
768 int err;
769
1be7f75d 770 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
99c55f7d
AS
771 return -EPERM;
772
773 if (!access_ok(VERIFY_READ, uattr, 1))
774 return -EFAULT;
775
776 if (size > PAGE_SIZE) /* silly large */
777 return -E2BIG;
778
779 /* If we're handed a bigger struct than we know of,
780 * ensure all the unknown bits are 0 - i.e. new
781 * user-space does not rely on any kernel feature
782 * extensions we dont know about yet.
783 */
784 if (size > sizeof(attr)) {
785 unsigned char __user *addr;
786 unsigned char __user *end;
787 unsigned char val;
788
789 addr = (void __user *)uattr + sizeof(attr);
790 end = (void __user *)uattr + size;
791
792 for (; addr < end; addr++) {
793 err = get_user(val, addr);
794 if (err)
795 return err;
796 if (val)
797 return -E2BIG;
798 }
799 size = sizeof(attr);
800 }
801
802 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
803 if (copy_from_user(&attr, uattr, size) != 0)
804 return -EFAULT;
805
806 switch (cmd) {
807 case BPF_MAP_CREATE:
808 err = map_create(&attr);
809 break;
db20fd2b
AS
810 case BPF_MAP_LOOKUP_ELEM:
811 err = map_lookup_elem(&attr);
812 break;
813 case BPF_MAP_UPDATE_ELEM:
814 err = map_update_elem(&attr);
815 break;
816 case BPF_MAP_DELETE_ELEM:
817 err = map_delete_elem(&attr);
818 break;
819 case BPF_MAP_GET_NEXT_KEY:
820 err = map_get_next_key(&attr);
821 break;
09756af4
AS
822 case BPF_PROG_LOAD:
823 err = bpf_prog_load(&attr);
824 break;
b2197755
DB
825 case BPF_OBJ_PIN:
826 err = bpf_obj_pin(&attr);
827 break;
828 case BPF_OBJ_GET:
829 err = bpf_obj_get(&attr);
830 break;
99c55f7d
AS
831 default:
832 err = -EINVAL;
833 break;
834 }
835
836 return err;
837}