bpf: export bpf_prog_inc_not_zero
[linux-2.6-block.git] / kernel / bpf / syscall.c
CommitLineData
99c55f7d
AS
1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12#include <linux/bpf.h>
a67edbf4 13#include <linux/bpf_trace.h>
99c55f7d
AS
14#include <linux/syscalls.h>
15#include <linux/slab.h>
3f07c014 16#include <linux/sched/signal.h>
d407bd25
DB
17#include <linux/vmalloc.h>
18#include <linux/mmzone.h>
99c55f7d 19#include <linux/anon_inodes.h>
db20fd2b 20#include <linux/file.h>
09756af4
AS
21#include <linux/license.h>
22#include <linux/filter.h>
2541517c 23#include <linux/version.h>
535e7b4b 24#include <linux/kernel.h>
dc4bb0e2 25#include <linux/idr.h>
99c55f7d 26
14dc6f04
MKL
27#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
28 (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
29 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
30 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
31#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
32#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
33
b121d1e7 34DEFINE_PER_CPU(int, bpf_prog_active);
dc4bb0e2
MKL
35static DEFINE_IDR(prog_idr);
36static DEFINE_SPINLOCK(prog_idr_lock);
f3f1c054
MKL
37static DEFINE_IDR(map_idr);
38static DEFINE_SPINLOCK(map_idr_lock);
b121d1e7 39
1be7f75d
AS
40int sysctl_unprivileged_bpf_disabled __read_mostly;
41
40077e0c
JB
42static const struct bpf_map_ops * const bpf_map_types[] = {
43#define BPF_PROG_TYPE(_id, _ops)
44#define BPF_MAP_TYPE(_id, _ops) \
45 [_id] = &_ops,
46#include <linux/bpf_types.h>
47#undef BPF_PROG_TYPE
48#undef BPF_MAP_TYPE
49};
99c55f7d 50
752ba56f
MS
51/*
52 * If we're handed a bigger struct than we know of, ensure all the unknown bits
53 * are 0 - i.e. new user-space does not rely on any kernel feature extensions
54 * we don't know about yet.
55 *
56 * There is a ToCToU between this function call and the following
57 * copy_from_user() call. However, this is not a concern since this function is
58 * meant to be a future-proofing of bits.
59 */
58291a74
MS
60static int check_uarg_tail_zero(void __user *uaddr,
61 size_t expected_size,
62 size_t actual_size)
63{
64 unsigned char __user *addr;
65 unsigned char __user *end;
66 unsigned char val;
67 int err;
68
752ba56f
MS
69 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */
70 return -E2BIG;
71
72 if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size)))
73 return -EFAULT;
74
58291a74
MS
75 if (actual_size <= expected_size)
76 return 0;
77
78 addr = uaddr + expected_size;
79 end = uaddr + actual_size;
80
81 for (; addr < end; addr++) {
82 err = get_user(val, addr);
83 if (err)
84 return err;
85 if (val)
86 return -E2BIG;
87 }
88
89 return 0;
90}
91
99c55f7d
AS
92static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
93{
99c55f7d
AS
94 struct bpf_map *map;
95
40077e0c
JB
96 if (attr->map_type >= ARRAY_SIZE(bpf_map_types) ||
97 !bpf_map_types[attr->map_type])
98 return ERR_PTR(-EINVAL);
99c55f7d 99
40077e0c
JB
100 map = bpf_map_types[attr->map_type]->map_alloc(attr);
101 if (IS_ERR(map))
102 return map;
103 map->ops = bpf_map_types[attr->map_type];
104 map->map_type = attr->map_type;
105 return map;
99c55f7d
AS
106}
107
d407bd25
DB
108void *bpf_map_area_alloc(size_t size)
109{
110 /* We definitely need __GFP_NORETRY, so OOM killer doesn't
111 * trigger under memory pressure as we really just want to
112 * fail instead.
113 */
114 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
115 void *area;
116
117 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
118 area = kmalloc(size, GFP_USER | flags);
119 if (area != NULL)
120 return area;
121 }
122
19809c2d 123 return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL);
d407bd25
DB
124}
125
126void bpf_map_area_free(void *area)
127{
128 kvfree(area);
129}
130
6c905981
AS
131int bpf_map_precharge_memlock(u32 pages)
132{
133 struct user_struct *user = get_current_user();
134 unsigned long memlock_limit, cur;
135
136 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
137 cur = atomic_long_read(&user->locked_vm);
138 free_uid(user);
139 if (cur + pages > memlock_limit)
140 return -EPERM;
141 return 0;
142}
143
aaac3ba9
AS
144static int bpf_map_charge_memlock(struct bpf_map *map)
145{
146 struct user_struct *user = get_current_user();
147 unsigned long memlock_limit;
148
149 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
150
151 atomic_long_add(map->pages, &user->locked_vm);
152
153 if (atomic_long_read(&user->locked_vm) > memlock_limit) {
154 atomic_long_sub(map->pages, &user->locked_vm);
155 free_uid(user);
156 return -EPERM;
157 }
158 map->user = user;
159 return 0;
160}
161
162static void bpf_map_uncharge_memlock(struct bpf_map *map)
163{
164 struct user_struct *user = map->user;
165
166 atomic_long_sub(map->pages, &user->locked_vm);
167 free_uid(user);
168}
169
f3f1c054
MKL
170static int bpf_map_alloc_id(struct bpf_map *map)
171{
172 int id;
173
174 spin_lock_bh(&map_idr_lock);
175 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
176 if (id > 0)
177 map->id = id;
178 spin_unlock_bh(&map_idr_lock);
179
180 if (WARN_ON_ONCE(!id))
181 return -ENOSPC;
182
183 return id > 0 ? 0 : id;
184}
185
bd5f5f4e 186static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
f3f1c054 187{
bd5f5f4e
MKL
188 if (do_idr_lock)
189 spin_lock_bh(&map_idr_lock);
190 else
191 __acquire(&map_idr_lock);
192
f3f1c054 193 idr_remove(&map_idr, map->id);
bd5f5f4e
MKL
194
195 if (do_idr_lock)
196 spin_unlock_bh(&map_idr_lock);
197 else
198 __release(&map_idr_lock);
f3f1c054
MKL
199}
200
99c55f7d
AS
201/* called from workqueue */
202static void bpf_map_free_deferred(struct work_struct *work)
203{
204 struct bpf_map *map = container_of(work, struct bpf_map, work);
205
aaac3ba9 206 bpf_map_uncharge_memlock(map);
99c55f7d
AS
207 /* implementation dependent freeing */
208 map->ops->map_free(map);
209}
210
c9da161c
DB
211static void bpf_map_put_uref(struct bpf_map *map)
212{
213 if (atomic_dec_and_test(&map->usercnt)) {
214 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
215 bpf_fd_array_map_clear(map);
216 }
217}
218
99c55f7d
AS
219/* decrement map refcnt and schedule it for freeing via workqueue
220 * (unrelying map implementation ops->map_free() might sleep)
221 */
bd5f5f4e 222static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
99c55f7d
AS
223{
224 if (atomic_dec_and_test(&map->refcnt)) {
34ad5580 225 /* bpf_map_free_id() must be called first */
bd5f5f4e 226 bpf_map_free_id(map, do_idr_lock);
99c55f7d
AS
227 INIT_WORK(&map->work, bpf_map_free_deferred);
228 schedule_work(&map->work);
229 }
230}
231
bd5f5f4e
MKL
232void bpf_map_put(struct bpf_map *map)
233{
234 __bpf_map_put(map, true);
235}
236
c9da161c 237void bpf_map_put_with_uref(struct bpf_map *map)
99c55f7d 238{
c9da161c 239 bpf_map_put_uref(map);
99c55f7d 240 bpf_map_put(map);
c9da161c
DB
241}
242
243static int bpf_map_release(struct inode *inode, struct file *filp)
244{
61d1b6a4
DB
245 struct bpf_map *map = filp->private_data;
246
247 if (map->ops->map_release)
248 map->ops->map_release(map, filp);
249
250 bpf_map_put_with_uref(map);
99c55f7d
AS
251 return 0;
252}
253
f99bf205
DB
254#ifdef CONFIG_PROC_FS
255static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
256{
257 const struct bpf_map *map = filp->private_data;
21116b70
DB
258 const struct bpf_array *array;
259 u32 owner_prog_type = 0;
9780c0ab 260 u32 owner_jited = 0;
21116b70
DB
261
262 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
263 array = container_of(map, struct bpf_array, map);
264 owner_prog_type = array->owner_prog_type;
9780c0ab 265 owner_jited = array->owner_jited;
21116b70 266 }
f99bf205
DB
267
268 seq_printf(m,
269 "map_type:\t%u\n"
270 "key_size:\t%u\n"
271 "value_size:\t%u\n"
322cea2f 272 "max_entries:\t%u\n"
21116b70
DB
273 "map_flags:\t%#x\n"
274 "memlock:\t%llu\n",
f99bf205
DB
275 map->map_type,
276 map->key_size,
277 map->value_size,
322cea2f 278 map->max_entries,
21116b70
DB
279 map->map_flags,
280 map->pages * 1ULL << PAGE_SHIFT);
281
9780c0ab 282 if (owner_prog_type) {
21116b70
DB
283 seq_printf(m, "owner_prog_type:\t%u\n",
284 owner_prog_type);
9780c0ab
DB
285 seq_printf(m, "owner_jited:\t%u\n",
286 owner_jited);
287 }
f99bf205
DB
288}
289#endif
290
99c55f7d 291static const struct file_operations bpf_map_fops = {
f99bf205
DB
292#ifdef CONFIG_PROC_FS
293 .show_fdinfo = bpf_map_show_fdinfo,
294#endif
295 .release = bpf_map_release,
99c55f7d
AS
296};
297
b2197755 298int bpf_map_new_fd(struct bpf_map *map)
aa79781b
DB
299{
300 return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
301 O_RDWR | O_CLOEXEC);
302}
303
99c55f7d
AS
304/* helper macro to check that unused fields 'union bpf_attr' are zero */
305#define CHECK_ATTR(CMD) \
306 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
307 sizeof(attr->CMD##_LAST_FIELD), 0, \
308 sizeof(*attr) - \
309 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
310 sizeof(attr->CMD##_LAST_FIELD)) != NULL
311
56f668df 312#define BPF_MAP_CREATE_LAST_FIELD inner_map_fd
99c55f7d
AS
313/* called via syscall */
314static int map_create(union bpf_attr *attr)
315{
316 struct bpf_map *map;
317 int err;
318
319 err = CHECK_ATTR(BPF_MAP_CREATE);
320 if (err)
321 return -EINVAL;
322
323 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
324 map = find_and_alloc_map(attr);
325 if (IS_ERR(map))
326 return PTR_ERR(map);
327
328 atomic_set(&map->refcnt, 1);
c9da161c 329 atomic_set(&map->usercnt, 1);
99c55f7d 330
aaac3ba9
AS
331 err = bpf_map_charge_memlock(map);
332 if (err)
20b2b24f 333 goto free_map_nouncharge;
aaac3ba9 334
f3f1c054
MKL
335 err = bpf_map_alloc_id(map);
336 if (err)
337 goto free_map;
338
aa79781b 339 err = bpf_map_new_fd(map);
bd5f5f4e
MKL
340 if (err < 0) {
341 /* failed to allocate fd.
342 * bpf_map_put() is needed because the above
343 * bpf_map_alloc_id() has published the map
344 * to the userspace and the userspace may
345 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
346 */
347 bpf_map_put(map);
348 return err;
349 }
99c55f7d 350
a67edbf4 351 trace_bpf_map_create(map, err);
99c55f7d
AS
352 return err;
353
354free_map:
20b2b24f
DB
355 bpf_map_uncharge_memlock(map);
356free_map_nouncharge:
99c55f7d
AS
357 map->ops->map_free(map);
358 return err;
359}
360
db20fd2b
AS
361/* if error is returned, fd is released.
362 * On success caller should complete fd access with matching fdput()
363 */
c2101297 364struct bpf_map *__bpf_map_get(struct fd f)
db20fd2b 365{
db20fd2b
AS
366 if (!f.file)
367 return ERR_PTR(-EBADF);
db20fd2b
AS
368 if (f.file->f_op != &bpf_map_fops) {
369 fdput(f);
370 return ERR_PTR(-EINVAL);
371 }
372
c2101297
DB
373 return f.file->private_data;
374}
375
92117d84
AS
376/* prog's and map's refcnt limit */
377#define BPF_MAX_REFCNT 32768
378
379struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
c9da161c 380{
92117d84
AS
381 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
382 atomic_dec(&map->refcnt);
383 return ERR_PTR(-EBUSY);
384 }
c9da161c
DB
385 if (uref)
386 atomic_inc(&map->usercnt);
92117d84 387 return map;
c9da161c
DB
388}
389
390struct bpf_map *bpf_map_get_with_uref(u32 ufd)
c2101297
DB
391{
392 struct fd f = fdget(ufd);
393 struct bpf_map *map;
394
395 map = __bpf_map_get(f);
396 if (IS_ERR(map))
397 return map;
398
92117d84 399 map = bpf_map_inc(map, true);
c2101297 400 fdput(f);
db20fd2b
AS
401
402 return map;
403}
404
bd5f5f4e
MKL
405/* map_idr_lock should have been held */
406static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
407 bool uref)
408{
409 int refold;
410
411 refold = __atomic_add_unless(&map->refcnt, 1, 0);
412
413 if (refold >= BPF_MAX_REFCNT) {
414 __bpf_map_put(map, false);
415 return ERR_PTR(-EBUSY);
416 }
417
418 if (!refold)
419 return ERR_PTR(-ENOENT);
420
421 if (uref)
422 atomic_inc(&map->usercnt);
423
424 return map;
425}
426
b8cdc051
AS
427int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
428{
429 return -ENOTSUPP;
430}
431
db20fd2b
AS
432/* last field in 'union bpf_attr' used by this command */
433#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
434
435static int map_lookup_elem(union bpf_attr *attr)
436{
535e7b4b
MS
437 void __user *ukey = u64_to_user_ptr(attr->key);
438 void __user *uvalue = u64_to_user_ptr(attr->value);
db20fd2b 439 int ufd = attr->map_fd;
db20fd2b 440 struct bpf_map *map;
8ebe667c 441 void *key, *value, *ptr;
15a07b33 442 u32 value_size;
592867bf 443 struct fd f;
db20fd2b
AS
444 int err;
445
446 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
447 return -EINVAL;
448
592867bf 449 f = fdget(ufd);
c2101297 450 map = __bpf_map_get(f);
db20fd2b
AS
451 if (IS_ERR(map))
452 return PTR_ERR(map);
453
e4448ed8
AV
454 key = memdup_user(ukey, map->key_size);
455 if (IS_ERR(key)) {
456 err = PTR_ERR(key);
db20fd2b 457 goto err_put;
e4448ed8 458 }
db20fd2b 459
15a07b33 460 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
8f844938 461 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
15a07b33
AS
462 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
463 value_size = round_up(map->value_size, 8) * num_possible_cpus();
14dc6f04
MKL
464 else if (IS_FD_MAP(map))
465 value_size = sizeof(u32);
15a07b33
AS
466 else
467 value_size = map->value_size;
468
8ebe667c 469 err = -ENOMEM;
15a07b33 470 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
db20fd2b 471 if (!value)
8ebe667c
AS
472 goto free_key;
473
8f844938
MKL
474 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
475 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
15a07b33
AS
476 err = bpf_percpu_hash_copy(map, key, value);
477 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
478 err = bpf_percpu_array_copy(map, key, value);
557c0c6e
AS
479 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
480 err = bpf_stackmap_copy(map, key, value);
14dc6f04
MKL
481 } else if (IS_FD_ARRAY(map)) {
482 err = bpf_fd_array_map_lookup_elem(map, key, value);
483 } else if (IS_FD_HASH(map)) {
484 err = bpf_fd_htab_map_lookup_elem(map, key, value);
15a07b33
AS
485 } else {
486 rcu_read_lock();
487 ptr = map->ops->map_lookup_elem(map, key);
488 if (ptr)
489 memcpy(value, ptr, value_size);
490 rcu_read_unlock();
491 err = ptr ? 0 : -ENOENT;
492 }
8ebe667c 493
15a07b33 494 if (err)
8ebe667c 495 goto free_value;
db20fd2b
AS
496
497 err = -EFAULT;
15a07b33 498 if (copy_to_user(uvalue, value, value_size) != 0)
8ebe667c 499 goto free_value;
db20fd2b 500
a67edbf4 501 trace_bpf_map_lookup_elem(map, ufd, key, value);
db20fd2b
AS
502 err = 0;
503
8ebe667c
AS
504free_value:
505 kfree(value);
db20fd2b
AS
506free_key:
507 kfree(key);
508err_put:
509 fdput(f);
510 return err;
511}
512
3274f520 513#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
db20fd2b
AS
514
515static int map_update_elem(union bpf_attr *attr)
516{
535e7b4b
MS
517 void __user *ukey = u64_to_user_ptr(attr->key);
518 void __user *uvalue = u64_to_user_ptr(attr->value);
db20fd2b 519 int ufd = attr->map_fd;
db20fd2b
AS
520 struct bpf_map *map;
521 void *key, *value;
15a07b33 522 u32 value_size;
592867bf 523 struct fd f;
db20fd2b
AS
524 int err;
525
526 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
527 return -EINVAL;
528
592867bf 529 f = fdget(ufd);
c2101297 530 map = __bpf_map_get(f);
db20fd2b
AS
531 if (IS_ERR(map))
532 return PTR_ERR(map);
533
e4448ed8
AV
534 key = memdup_user(ukey, map->key_size);
535 if (IS_ERR(key)) {
536 err = PTR_ERR(key);
db20fd2b 537 goto err_put;
e4448ed8 538 }
db20fd2b 539
15a07b33 540 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
8f844938 541 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
15a07b33
AS
542 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
543 value_size = round_up(map->value_size, 8) * num_possible_cpus();
544 else
545 value_size = map->value_size;
546
db20fd2b 547 err = -ENOMEM;
15a07b33 548 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
db20fd2b
AS
549 if (!value)
550 goto free_key;
551
552 err = -EFAULT;
15a07b33 553 if (copy_from_user(value, uvalue, value_size) != 0)
db20fd2b
AS
554 goto free_value;
555
b121d1e7
AS
556 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
557 * inside bpf map update or delete otherwise deadlocks are possible
558 */
559 preempt_disable();
560 __this_cpu_inc(bpf_prog_active);
8f844938
MKL
561 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
562 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
15a07b33
AS
563 err = bpf_percpu_hash_update(map, key, value, attr->flags);
564 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
565 err = bpf_percpu_array_update(map, key, value, attr->flags);
d056a788 566 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
4ed8ec52 567 map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
56f668df
MKL
568 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||
569 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
d056a788
DB
570 rcu_read_lock();
571 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
572 attr->flags);
573 rcu_read_unlock();
bcc6b1b7
MKL
574 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
575 rcu_read_lock();
576 err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
577 attr->flags);
578 rcu_read_unlock();
15a07b33
AS
579 } else {
580 rcu_read_lock();
581 err = map->ops->map_update_elem(map, key, value, attr->flags);
582 rcu_read_unlock();
583 }
b121d1e7
AS
584 __this_cpu_dec(bpf_prog_active);
585 preempt_enable();
db20fd2b 586
a67edbf4
DB
587 if (!err)
588 trace_bpf_map_update_elem(map, ufd, key, value);
db20fd2b
AS
589free_value:
590 kfree(value);
591free_key:
592 kfree(key);
593err_put:
594 fdput(f);
595 return err;
596}
597
598#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
599
600static int map_delete_elem(union bpf_attr *attr)
601{
535e7b4b 602 void __user *ukey = u64_to_user_ptr(attr->key);
db20fd2b 603 int ufd = attr->map_fd;
db20fd2b 604 struct bpf_map *map;
592867bf 605 struct fd f;
db20fd2b
AS
606 void *key;
607 int err;
608
609 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
610 return -EINVAL;
611
592867bf 612 f = fdget(ufd);
c2101297 613 map = __bpf_map_get(f);
db20fd2b
AS
614 if (IS_ERR(map))
615 return PTR_ERR(map);
616
e4448ed8
AV
617 key = memdup_user(ukey, map->key_size);
618 if (IS_ERR(key)) {
619 err = PTR_ERR(key);
db20fd2b 620 goto err_put;
e4448ed8 621 }
db20fd2b 622
b121d1e7
AS
623 preempt_disable();
624 __this_cpu_inc(bpf_prog_active);
db20fd2b
AS
625 rcu_read_lock();
626 err = map->ops->map_delete_elem(map, key);
627 rcu_read_unlock();
b121d1e7
AS
628 __this_cpu_dec(bpf_prog_active);
629 preempt_enable();
db20fd2b 630
a67edbf4
DB
631 if (!err)
632 trace_bpf_map_delete_elem(map, ufd, key);
db20fd2b
AS
633 kfree(key);
634err_put:
635 fdput(f);
636 return err;
637}
638
639/* last field in 'union bpf_attr' used by this command */
640#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
641
642static int map_get_next_key(union bpf_attr *attr)
643{
535e7b4b
MS
644 void __user *ukey = u64_to_user_ptr(attr->key);
645 void __user *unext_key = u64_to_user_ptr(attr->next_key);
db20fd2b 646 int ufd = attr->map_fd;
db20fd2b
AS
647 struct bpf_map *map;
648 void *key, *next_key;
592867bf 649 struct fd f;
db20fd2b
AS
650 int err;
651
652 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
653 return -EINVAL;
654
592867bf 655 f = fdget(ufd);
c2101297 656 map = __bpf_map_get(f);
db20fd2b
AS
657 if (IS_ERR(map))
658 return PTR_ERR(map);
659
8fe45924 660 if (ukey) {
e4448ed8
AV
661 key = memdup_user(ukey, map->key_size);
662 if (IS_ERR(key)) {
663 err = PTR_ERR(key);
8fe45924 664 goto err_put;
e4448ed8 665 }
8fe45924
TQ
666 } else {
667 key = NULL;
668 }
db20fd2b
AS
669
670 err = -ENOMEM;
671 next_key = kmalloc(map->key_size, GFP_USER);
672 if (!next_key)
673 goto free_key;
674
675 rcu_read_lock();
676 err = map->ops->map_get_next_key(map, key, next_key);
677 rcu_read_unlock();
678 if (err)
679 goto free_next_key;
680
681 err = -EFAULT;
682 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
683 goto free_next_key;
684
a67edbf4 685 trace_bpf_map_next_key(map, ufd, key, next_key);
db20fd2b
AS
686 err = 0;
687
688free_next_key:
689 kfree(next_key);
690free_key:
691 kfree(key);
692err_put:
693 fdput(f);
694 return err;
695}
696
be9370a7
JB
697static const struct bpf_verifier_ops * const bpf_prog_types[] = {
698#define BPF_PROG_TYPE(_id, _ops) \
699 [_id] = &_ops,
40077e0c 700#define BPF_MAP_TYPE(_id, _ops)
be9370a7
JB
701#include <linux/bpf_types.h>
702#undef BPF_PROG_TYPE
40077e0c 703#undef BPF_MAP_TYPE
be9370a7 704};
09756af4
AS
705
706static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
707{
be9370a7
JB
708 if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
709 return -EINVAL;
09756af4 710
be9370a7
JB
711 prog->aux->ops = bpf_prog_types[type];
712 prog->type = type;
713 return 0;
09756af4
AS
714}
715
716/* drop refcnt on maps used by eBPF program and free auxilary data */
717static void free_used_maps(struct bpf_prog_aux *aux)
718{
719 int i;
720
721 for (i = 0; i < aux->used_map_cnt; i++)
722 bpf_map_put(aux->used_maps[i]);
723
724 kfree(aux->used_maps);
725}
726
5ccb071e
DB
727int __bpf_prog_charge(struct user_struct *user, u32 pages)
728{
729 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
730 unsigned long user_bufs;
731
732 if (user) {
733 user_bufs = atomic_long_add_return(pages, &user->locked_vm);
734 if (user_bufs > memlock_limit) {
735 atomic_long_sub(pages, &user->locked_vm);
736 return -EPERM;
737 }
738 }
739
740 return 0;
741}
742
743void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
744{
745 if (user)
746 atomic_long_sub(pages, &user->locked_vm);
747}
748
aaac3ba9
AS
749static int bpf_prog_charge_memlock(struct bpf_prog *prog)
750{
751 struct user_struct *user = get_current_user();
5ccb071e 752 int ret;
aaac3ba9 753
5ccb071e
DB
754 ret = __bpf_prog_charge(user, prog->pages);
755 if (ret) {
aaac3ba9 756 free_uid(user);
5ccb071e 757 return ret;
aaac3ba9 758 }
5ccb071e 759
aaac3ba9
AS
760 prog->aux->user = user;
761 return 0;
762}
763
764static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
765{
766 struct user_struct *user = prog->aux->user;
767
5ccb071e 768 __bpf_prog_uncharge(user, prog->pages);
aaac3ba9
AS
769 free_uid(user);
770}
771
dc4bb0e2
MKL
772static int bpf_prog_alloc_id(struct bpf_prog *prog)
773{
774 int id;
775
776 spin_lock_bh(&prog_idr_lock);
777 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
778 if (id > 0)
779 prog->aux->id = id;
780 spin_unlock_bh(&prog_idr_lock);
781
782 /* id is in [1, INT_MAX) */
783 if (WARN_ON_ONCE(!id))
784 return -ENOSPC;
785
786 return id > 0 ? 0 : id;
787}
788
b16d9aa4 789static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
dc4bb0e2
MKL
790{
791 /* cBPF to eBPF migrations are currently not in the idr store. */
792 if (!prog->aux->id)
793 return;
794
b16d9aa4
MKL
795 if (do_idr_lock)
796 spin_lock_bh(&prog_idr_lock);
797 else
798 __acquire(&prog_idr_lock);
799
dc4bb0e2 800 idr_remove(&prog_idr, prog->aux->id);
b16d9aa4
MKL
801
802 if (do_idr_lock)
803 spin_unlock_bh(&prog_idr_lock);
804 else
805 __release(&prog_idr_lock);
dc4bb0e2
MKL
806}
807
1aacde3d 808static void __bpf_prog_put_rcu(struct rcu_head *rcu)
abf2e7d6
AS
809{
810 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
811
812 free_used_maps(aux);
aaac3ba9 813 bpf_prog_uncharge_memlock(aux->prog);
abf2e7d6
AS
814 bpf_prog_free(aux->prog);
815}
816
b16d9aa4 817static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
09756af4 818{
a67edbf4
DB
819 if (atomic_dec_and_test(&prog->aux->refcnt)) {
820 trace_bpf_prog_put_rcu(prog);
34ad5580 821 /* bpf_prog_free_id() must be called first */
b16d9aa4 822 bpf_prog_free_id(prog, do_idr_lock);
74451e66 823 bpf_prog_kallsyms_del(prog);
1aacde3d 824 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
a67edbf4 825 }
09756af4 826}
b16d9aa4
MKL
827
828void bpf_prog_put(struct bpf_prog *prog)
829{
830 __bpf_prog_put(prog, true);
831}
e2e9b654 832EXPORT_SYMBOL_GPL(bpf_prog_put);
09756af4
AS
833
834static int bpf_prog_release(struct inode *inode, struct file *filp)
835{
836 struct bpf_prog *prog = filp->private_data;
837
1aacde3d 838 bpf_prog_put(prog);
09756af4
AS
839 return 0;
840}
841
7bd509e3
DB
842#ifdef CONFIG_PROC_FS
843static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
844{
845 const struct bpf_prog *prog = filp->private_data;
f1f7714e 846 char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
7bd509e3 847
f1f7714e 848 bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
7bd509e3
DB
849 seq_printf(m,
850 "prog_type:\t%u\n"
851 "prog_jited:\t%u\n"
f1f7714e 852 "prog_tag:\t%s\n"
7bd509e3
DB
853 "memlock:\t%llu\n",
854 prog->type,
855 prog->jited,
f1f7714e 856 prog_tag,
7bd509e3
DB
857 prog->pages * 1ULL << PAGE_SHIFT);
858}
859#endif
860
09756af4 861static const struct file_operations bpf_prog_fops = {
7bd509e3
DB
862#ifdef CONFIG_PROC_FS
863 .show_fdinfo = bpf_prog_show_fdinfo,
864#endif
865 .release = bpf_prog_release,
09756af4
AS
866};
867
b2197755 868int bpf_prog_new_fd(struct bpf_prog *prog)
aa79781b
DB
869{
870 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
871 O_RDWR | O_CLOEXEC);
872}
873
113214be 874static struct bpf_prog *____bpf_prog_get(struct fd f)
09756af4 875{
09756af4
AS
876 if (!f.file)
877 return ERR_PTR(-EBADF);
09756af4
AS
878 if (f.file->f_op != &bpf_prog_fops) {
879 fdput(f);
880 return ERR_PTR(-EINVAL);
881 }
882
c2101297 883 return f.file->private_data;
09756af4
AS
884}
885
59d3656d 886struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
92117d84 887{
59d3656d
BB
888 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
889 atomic_sub(i, &prog->aux->refcnt);
92117d84
AS
890 return ERR_PTR(-EBUSY);
891 }
892 return prog;
893}
59d3656d
BB
894EXPORT_SYMBOL_GPL(bpf_prog_add);
895
c540594f
DB
896void bpf_prog_sub(struct bpf_prog *prog, int i)
897{
898 /* Only to be used for undoing previous bpf_prog_add() in some
899 * error path. We still know that another entity in our call
900 * path holds a reference to the program, thus atomic_sub() can
901 * be safely used in such cases!
902 */
903 WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
904}
905EXPORT_SYMBOL_GPL(bpf_prog_sub);
906
59d3656d
BB
907struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
908{
909 return bpf_prog_add(prog, 1);
910}
97bc402d 911EXPORT_SYMBOL_GPL(bpf_prog_inc);
92117d84 912
b16d9aa4 913/* prog_idr_lock should have been held */
a6f6df69 914struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
b16d9aa4
MKL
915{
916 int refold;
917
918 refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0);
919
920 if (refold >= BPF_MAX_REFCNT) {
921 __bpf_prog_put(prog, false);
922 return ERR_PTR(-EBUSY);
923 }
924
925 if (!refold)
926 return ERR_PTR(-ENOENT);
927
928 return prog;
929}
a6f6df69 930EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
b16d9aa4 931
113214be 932static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
09756af4
AS
933{
934 struct fd f = fdget(ufd);
935 struct bpf_prog *prog;
936
113214be 937 prog = ____bpf_prog_get(f);
09756af4
AS
938 if (IS_ERR(prog))
939 return prog;
113214be
DB
940 if (type && prog->type != *type) {
941 prog = ERR_PTR(-EINVAL);
942 goto out;
943 }
09756af4 944
92117d84 945 prog = bpf_prog_inc(prog);
113214be 946out:
09756af4
AS
947 fdput(f);
948 return prog;
949}
113214be
DB
950
951struct bpf_prog *bpf_prog_get(u32 ufd)
952{
953 return __bpf_prog_get(ufd, NULL);
954}
955
956struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
957{
a67edbf4
DB
958 struct bpf_prog *prog = __bpf_prog_get(ufd, &type);
959
960 if (!IS_ERR(prog))
961 trace_bpf_prog_get_type(prog);
962 return prog;
113214be
DB
963}
964EXPORT_SYMBOL_GPL(bpf_prog_get_type);
09756af4
AS
965
966/* last field in 'union bpf_attr' used by this command */
e07b98d9 967#define BPF_PROG_LOAD_LAST_FIELD prog_flags
09756af4
AS
968
969static int bpf_prog_load(union bpf_attr *attr)
970{
971 enum bpf_prog_type type = attr->prog_type;
972 struct bpf_prog *prog;
973 int err;
974 char license[128];
975 bool is_gpl;
976
977 if (CHECK_ATTR(BPF_PROG_LOAD))
978 return -EINVAL;
979
e07b98d9
DM
980 if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT)
981 return -EINVAL;
982
09756af4 983 /* copy eBPF program license from user space */
535e7b4b 984 if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
09756af4
AS
985 sizeof(license) - 1) < 0)
986 return -EFAULT;
987 license[sizeof(license) - 1] = 0;
988
989 /* eBPF programs must be GPL compatible to use GPL-ed functions */
990 is_gpl = license_is_gpl_compatible(license);
991
ef0915ca
DB
992 if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
993 return -E2BIG;
09756af4 994
2541517c
AS
995 if (type == BPF_PROG_TYPE_KPROBE &&
996 attr->kern_version != LINUX_VERSION_CODE)
997 return -EINVAL;
998
80b7d819
CF
999 if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
1000 type != BPF_PROG_TYPE_CGROUP_SKB &&
1001 !capable(CAP_SYS_ADMIN))
1be7f75d
AS
1002 return -EPERM;
1003
09756af4
AS
1004 /* plain bpf_prog allocation */
1005 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
1006 if (!prog)
1007 return -ENOMEM;
1008
aaac3ba9
AS
1009 err = bpf_prog_charge_memlock(prog);
1010 if (err)
1011 goto free_prog_nouncharge;
1012
09756af4
AS
1013 prog->len = attr->insn_cnt;
1014
1015 err = -EFAULT;
535e7b4b 1016 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
aafe6ae9 1017 bpf_prog_insn_size(prog)) != 0)
09756af4
AS
1018 goto free_prog;
1019
1020 prog->orig_prog = NULL;
a91263d5 1021 prog->jited = 0;
09756af4
AS
1022
1023 atomic_set(&prog->aux->refcnt, 1);
a91263d5 1024 prog->gpl_compatible = is_gpl ? 1 : 0;
09756af4
AS
1025
1026 /* find program type: socket_filter vs tracing_filter */
1027 err = find_prog_type(type, prog);
1028 if (err < 0)
1029 goto free_prog;
1030
1031 /* run eBPF verifier */
9bac3d6d 1032 err = bpf_check(&prog, attr);
09756af4
AS
1033 if (err < 0)
1034 goto free_used_maps;
1035
1036 /* eBPF program is ready to be JITed */
d1c55ab5 1037 prog = bpf_prog_select_runtime(prog, &err);
04fd61ab
AS
1038 if (err < 0)
1039 goto free_used_maps;
09756af4 1040
dc4bb0e2
MKL
1041 err = bpf_prog_alloc_id(prog);
1042 if (err)
1043 goto free_used_maps;
1044
aa79781b 1045 err = bpf_prog_new_fd(prog);
b16d9aa4
MKL
1046 if (err < 0) {
1047 /* failed to allocate fd.
1048 * bpf_prog_put() is needed because the above
1049 * bpf_prog_alloc_id() has published the prog
1050 * to the userspace and the userspace may
1051 * have refcnt-ed it through BPF_PROG_GET_FD_BY_ID.
1052 */
1053 bpf_prog_put(prog);
1054 return err;
1055 }
09756af4 1056
74451e66 1057 bpf_prog_kallsyms_add(prog);
a67edbf4 1058 trace_bpf_prog_load(prog, err);
09756af4
AS
1059 return err;
1060
1061free_used_maps:
1062 free_used_maps(prog->aux);
1063free_prog:
aaac3ba9
AS
1064 bpf_prog_uncharge_memlock(prog);
1065free_prog_nouncharge:
09756af4
AS
1066 bpf_prog_free(prog);
1067 return err;
1068}
1069
b2197755
DB
1070#define BPF_OBJ_LAST_FIELD bpf_fd
1071
1072static int bpf_obj_pin(const union bpf_attr *attr)
1073{
1074 if (CHECK_ATTR(BPF_OBJ))
1075 return -EINVAL;
1076
535e7b4b 1077 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
b2197755
DB
1078}
1079
1080static int bpf_obj_get(const union bpf_attr *attr)
1081{
1082 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
1083 return -EINVAL;
1084
535e7b4b 1085 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
b2197755
DB
1086}
1087
f4324551
DM
1088#ifdef CONFIG_CGROUP_BPF
1089
7f677633 1090#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
f4324551
DM
1091
1092static int bpf_prog_attach(const union bpf_attr *attr)
1093{
7f677633 1094 enum bpf_prog_type ptype;
f4324551
DM
1095 struct bpf_prog *prog;
1096 struct cgroup *cgrp;
7f677633 1097 int ret;
f4324551
DM
1098
1099 if (!capable(CAP_NET_ADMIN))
1100 return -EPERM;
1101
1102 if (CHECK_ATTR(BPF_PROG_ATTACH))
1103 return -EINVAL;
1104
7f677633
AS
1105 if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
1106 return -EINVAL;
1107
f4324551
DM
1108 switch (attr->attach_type) {
1109 case BPF_CGROUP_INET_INGRESS:
1110 case BPF_CGROUP_INET_EGRESS:
b2cd1257 1111 ptype = BPF_PROG_TYPE_CGROUP_SKB;
f4324551 1112 break;
61023658
DA
1113 case BPF_CGROUP_INET_SOCK_CREATE:
1114 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
1115 break;
40304b2a
LB
1116 case BPF_CGROUP_SOCK_OPS:
1117 ptype = BPF_PROG_TYPE_SOCK_OPS;
1118 break;
f4324551
DM
1119 default:
1120 return -EINVAL;
1121 }
1122
b2cd1257
DA
1123 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
1124 if (IS_ERR(prog))
1125 return PTR_ERR(prog);
1126
1127 cgrp = cgroup_get_from_fd(attr->target_fd);
1128 if (IS_ERR(cgrp)) {
1129 bpf_prog_put(prog);
1130 return PTR_ERR(cgrp);
1131 }
1132
7f677633
AS
1133 ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
1134 attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
1135 if (ret)
1136 bpf_prog_put(prog);
b2cd1257
DA
1137 cgroup_put(cgrp);
1138
7f677633 1139 return ret;
f4324551
DM
1140}
1141
1142#define BPF_PROG_DETACH_LAST_FIELD attach_type
1143
1144static int bpf_prog_detach(const union bpf_attr *attr)
1145{
1146 struct cgroup *cgrp;
7f677633 1147 int ret;
f4324551
DM
1148
1149 if (!capable(CAP_NET_ADMIN))
1150 return -EPERM;
1151
1152 if (CHECK_ATTR(BPF_PROG_DETACH))
1153 return -EINVAL;
1154
1155 switch (attr->attach_type) {
1156 case BPF_CGROUP_INET_INGRESS:
1157 case BPF_CGROUP_INET_EGRESS:
61023658 1158 case BPF_CGROUP_INET_SOCK_CREATE:
40304b2a 1159 case BPF_CGROUP_SOCK_OPS:
f4324551
DM
1160 cgrp = cgroup_get_from_fd(attr->target_fd);
1161 if (IS_ERR(cgrp))
1162 return PTR_ERR(cgrp);
1163
7f677633 1164 ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
f4324551
DM
1165 cgroup_put(cgrp);
1166 break;
1167
1168 default:
1169 return -EINVAL;
1170 }
1171
7f677633 1172 return ret;
f4324551 1173}
40304b2a 1174
f4324551
DM
1175#endif /* CONFIG_CGROUP_BPF */
1176
1cf1cae9
AS
1177#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
1178
1179static int bpf_prog_test_run(const union bpf_attr *attr,
1180 union bpf_attr __user *uattr)
1181{
1182 struct bpf_prog *prog;
1183 int ret = -ENOTSUPP;
1184
1185 if (CHECK_ATTR(BPF_PROG_TEST_RUN))
1186 return -EINVAL;
1187
1188 prog = bpf_prog_get(attr->test.prog_fd);
1189 if (IS_ERR(prog))
1190 return PTR_ERR(prog);
1191
1192 if (prog->aux->ops->test_run)
1193 ret = prog->aux->ops->test_run(prog, attr, uattr);
1194
1195 bpf_prog_put(prog);
1196 return ret;
1197}
1198
34ad5580
MKL
1199#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
1200
1201static int bpf_obj_get_next_id(const union bpf_attr *attr,
1202 union bpf_attr __user *uattr,
1203 struct idr *idr,
1204 spinlock_t *lock)
1205{
1206 u32 next_id = attr->start_id;
1207 int err = 0;
1208
1209 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX)
1210 return -EINVAL;
1211
1212 if (!capable(CAP_SYS_ADMIN))
1213 return -EPERM;
1214
1215 next_id++;
1216 spin_lock_bh(lock);
1217 if (!idr_get_next(idr, &next_id))
1218 err = -ENOENT;
1219 spin_unlock_bh(lock);
1220
1221 if (!err)
1222 err = put_user(next_id, &uattr->next_id);
1223
1224 return err;
1225}
1226
b16d9aa4
MKL
1227#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
1228
1229static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
1230{
1231 struct bpf_prog *prog;
1232 u32 id = attr->prog_id;
1233 int fd;
1234
1235 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
1236 return -EINVAL;
1237
1238 if (!capable(CAP_SYS_ADMIN))
1239 return -EPERM;
1240
1241 spin_lock_bh(&prog_idr_lock);
1242 prog = idr_find(&prog_idr, id);
1243 if (prog)
1244 prog = bpf_prog_inc_not_zero(prog);
1245 else
1246 prog = ERR_PTR(-ENOENT);
1247 spin_unlock_bh(&prog_idr_lock);
1248
1249 if (IS_ERR(prog))
1250 return PTR_ERR(prog);
1251
1252 fd = bpf_prog_new_fd(prog);
1253 if (fd < 0)
1254 bpf_prog_put(prog);
1255
1256 return fd;
1257}
1258
bd5f5f4e
MKL
1259#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD map_id
1260
1261static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
1262{
1263 struct bpf_map *map;
1264 u32 id = attr->map_id;
1265 int fd;
1266
1267 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID))
1268 return -EINVAL;
1269
1270 if (!capable(CAP_SYS_ADMIN))
1271 return -EPERM;
1272
1273 spin_lock_bh(&map_idr_lock);
1274 map = idr_find(&map_idr, id);
1275 if (map)
1276 map = bpf_map_inc_not_zero(map, true);
1277 else
1278 map = ERR_PTR(-ENOENT);
1279 spin_unlock_bh(&map_idr_lock);
1280
1281 if (IS_ERR(map))
1282 return PTR_ERR(map);
1283
1284 fd = bpf_map_new_fd(map);
1285 if (fd < 0)
1286 bpf_map_put(map);
1287
1288 return fd;
1289}
1290
1e270976
MKL
1291static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
1292 const union bpf_attr *attr,
1293 union bpf_attr __user *uattr)
1294{
1295 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
1296 struct bpf_prog_info info = {};
1297 u32 info_len = attr->info.info_len;
1298 char __user *uinsns;
1299 u32 ulen;
1300 int err;
1301
1302 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
1303 if (err)
1304 return err;
1305 info_len = min_t(u32, sizeof(info), info_len);
1306
1307 if (copy_from_user(&info, uinfo, info_len))
89b09689 1308 return -EFAULT;
1e270976
MKL
1309
1310 info.type = prog->type;
1311 info.id = prog->aux->id;
1312
1313 memcpy(info.tag, prog->tag, sizeof(prog->tag));
1314
1315 if (!capable(CAP_SYS_ADMIN)) {
1316 info.jited_prog_len = 0;
1317 info.xlated_prog_len = 0;
1318 goto done;
1319 }
1320
1321 ulen = info.jited_prog_len;
1322 info.jited_prog_len = prog->jited_len;
1323 if (info.jited_prog_len && ulen) {
1324 uinsns = u64_to_user_ptr(info.jited_prog_insns);
1325 ulen = min_t(u32, info.jited_prog_len, ulen);
1326 if (copy_to_user(uinsns, prog->bpf_func, ulen))
1327 return -EFAULT;
1328 }
1329
1330 ulen = info.xlated_prog_len;
9975a54b 1331 info.xlated_prog_len = bpf_prog_insn_size(prog);
1e270976
MKL
1332 if (info.xlated_prog_len && ulen) {
1333 uinsns = u64_to_user_ptr(info.xlated_prog_insns);
1334 ulen = min_t(u32, info.xlated_prog_len, ulen);
1335 if (copy_to_user(uinsns, prog->insnsi, ulen))
1336 return -EFAULT;
1337 }
1338
1339done:
1340 if (copy_to_user(uinfo, &info, info_len) ||
1341 put_user(info_len, &uattr->info.info_len))
1342 return -EFAULT;
1343
1344 return 0;
1345}
1346
1347static int bpf_map_get_info_by_fd(struct bpf_map *map,
1348 const union bpf_attr *attr,
1349 union bpf_attr __user *uattr)
1350{
1351 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
1352 struct bpf_map_info info = {};
1353 u32 info_len = attr->info.info_len;
1354 int err;
1355
1356 err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
1357 if (err)
1358 return err;
1359 info_len = min_t(u32, sizeof(info), info_len);
1360
1361 info.type = map->map_type;
1362 info.id = map->id;
1363 info.key_size = map->key_size;
1364 info.value_size = map->value_size;
1365 info.max_entries = map->max_entries;
1366 info.map_flags = map->map_flags;
1367
1368 if (copy_to_user(uinfo, &info, info_len) ||
1369 put_user(info_len, &uattr->info.info_len))
1370 return -EFAULT;
1371
1372 return 0;
1373}
1374
1375#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
1376
1377static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
1378 union bpf_attr __user *uattr)
1379{
1380 int ufd = attr->info.bpf_fd;
1381 struct fd f;
1382 int err;
1383
1384 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
1385 return -EINVAL;
1386
1387 f = fdget(ufd);
1388 if (!f.file)
1389 return -EBADFD;
1390
1391 if (f.file->f_op == &bpf_prog_fops)
1392 err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
1393 uattr);
1394 else if (f.file->f_op == &bpf_map_fops)
1395 err = bpf_map_get_info_by_fd(f.file->private_data, attr,
1396 uattr);
1397 else
1398 err = -EINVAL;
1399
1400 fdput(f);
1401 return err;
1402}
1403
99c55f7d
AS
1404SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
1405{
1406 union bpf_attr attr = {};
1407 int err;
1408
1be7f75d 1409 if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
99c55f7d
AS
1410 return -EPERM;
1411
1e270976
MKL
1412 err = check_uarg_tail_zero(uattr, sizeof(attr), size);
1413 if (err)
1414 return err;
1415 size = min_t(u32, size, sizeof(attr));
99c55f7d
AS
1416
1417 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
1418 if (copy_from_user(&attr, uattr, size) != 0)
1419 return -EFAULT;
1420
1421 switch (cmd) {
1422 case BPF_MAP_CREATE:
1423 err = map_create(&attr);
1424 break;
db20fd2b
AS
1425 case BPF_MAP_LOOKUP_ELEM:
1426 err = map_lookup_elem(&attr);
1427 break;
1428 case BPF_MAP_UPDATE_ELEM:
1429 err = map_update_elem(&attr);
1430 break;
1431 case BPF_MAP_DELETE_ELEM:
1432 err = map_delete_elem(&attr);
1433 break;
1434 case BPF_MAP_GET_NEXT_KEY:
1435 err = map_get_next_key(&attr);
1436 break;
09756af4
AS
1437 case BPF_PROG_LOAD:
1438 err = bpf_prog_load(&attr);
1439 break;
b2197755
DB
1440 case BPF_OBJ_PIN:
1441 err = bpf_obj_pin(&attr);
1442 break;
1443 case BPF_OBJ_GET:
1444 err = bpf_obj_get(&attr);
1445 break;
f4324551
DM
1446#ifdef CONFIG_CGROUP_BPF
1447 case BPF_PROG_ATTACH:
1448 err = bpf_prog_attach(&attr);
1449 break;
1450 case BPF_PROG_DETACH:
1451 err = bpf_prog_detach(&attr);
1452 break;
1453#endif
1cf1cae9
AS
1454 case BPF_PROG_TEST_RUN:
1455 err = bpf_prog_test_run(&attr, uattr);
1456 break;
34ad5580
MKL
1457 case BPF_PROG_GET_NEXT_ID:
1458 err = bpf_obj_get_next_id(&attr, uattr,
1459 &prog_idr, &prog_idr_lock);
1460 break;
1461 case BPF_MAP_GET_NEXT_ID:
1462 err = bpf_obj_get_next_id(&attr, uattr,
1463 &map_idr, &map_idr_lock);
1464 break;
b16d9aa4
MKL
1465 case BPF_PROG_GET_FD_BY_ID:
1466 err = bpf_prog_get_fd_by_id(&attr);
1467 break;
bd5f5f4e
MKL
1468 case BPF_MAP_GET_FD_BY_ID:
1469 err = bpf_map_get_fd_by_id(&attr);
1470 break;
1e270976
MKL
1471 case BPF_OBJ_GET_INFO_BY_FD:
1472 err = bpf_obj_get_info_by_fd(&attr, uattr);
1473 break;
99c55f7d
AS
1474 default:
1475 err = -EINVAL;
1476 break;
1477 }
1478
1479 return err;
1480}