net: dsa: qca8k: fix noderef.cocci warnings
[linux-block.git] / kernel / bpf / syscall.c
CommitLineData
5b497af4 1// SPDX-License-Identifier: GPL-2.0-only
99c55f7d 2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
99c55f7d
AS
3 */
4#include <linux/bpf.h>
aef2feda 5#include <linux/bpf-cgroup.h>
a67edbf4 6#include <linux/bpf_trace.h>
f4364dcf 7#include <linux/bpf_lirc.h>
4a1e7c0c 8#include <linux/bpf_verifier.h>
f56a653c 9#include <linux/btf.h>
99c55f7d
AS
10#include <linux/syscalls.h>
11#include <linux/slab.h>
3f07c014 12#include <linux/sched/signal.h>
d407bd25
DB
13#include <linux/vmalloc.h>
14#include <linux/mmzone.h>
99c55f7d 15#include <linux/anon_inodes.h>
41bdc4b4 16#include <linux/fdtable.h>
db20fd2b 17#include <linux/file.h>
41bdc4b4 18#include <linux/fs.h>
09756af4
AS
19#include <linux/license.h>
20#include <linux/filter.h>
535e7b4b 21#include <linux/kernel.h>
dc4bb0e2 22#include <linux/idr.h>
cb4d2b3f
MKL
23#include <linux/cred.h>
24#include <linux/timekeeping.h>
25#include <linux/ctype.h>
9ef09e35 26#include <linux/nospec.h>
bae141f5 27#include <linux/audit.h>
ccfe29eb 28#include <uapi/linux/btf.h>
ca5999fd 29#include <linux/pgtable.h>
9e4e01df 30#include <linux/bpf_lsm.h>
457f4436 31#include <linux/poll.h>
a3fd7cee 32#include <linux/bpf-netns.h>
1e6c62a8 33#include <linux/rcupdate_trace.h>
48edc1f7 34#include <linux/memcontrol.h>
99c55f7d 35
da765a2f
DB
36#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
37 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
38 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
39#define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY)
14dc6f04 40#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
da765a2f
DB
41#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \
42 IS_FD_HASH(map))
14dc6f04 43
6e71b04a
CF
44#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY)
45
b121d1e7 46DEFINE_PER_CPU(int, bpf_prog_active);
dc4bb0e2
MKL
47static DEFINE_IDR(prog_idr);
48static DEFINE_SPINLOCK(prog_idr_lock);
f3f1c054
MKL
49static DEFINE_IDR(map_idr);
50static DEFINE_SPINLOCK(map_idr_lock);
a3b80e10
AN
51static DEFINE_IDR(link_idr);
52static DEFINE_SPINLOCK(link_idr_lock);
b121d1e7 53
08389d88
DB
54int sysctl_unprivileged_bpf_disabled __read_mostly =
55 IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
1be7f75d 56
40077e0c 57static const struct bpf_map_ops * const bpf_map_types[] = {
91cc1a99 58#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
40077e0c
JB
59#define BPF_MAP_TYPE(_id, _ops) \
60 [_id] = &_ops,
f2e10bff 61#define BPF_LINK_TYPE(_id, _name)
40077e0c
JB
62#include <linux/bpf_types.h>
63#undef BPF_PROG_TYPE
64#undef BPF_MAP_TYPE
f2e10bff 65#undef BPF_LINK_TYPE
40077e0c 66};
99c55f7d 67
752ba56f
MS
68/*
69 * If we're handed a bigger struct than we know of, ensure all the unknown bits
70 * are 0 - i.e. new user-space does not rely on any kernel feature extensions
71 * we don't know about yet.
72 *
73 * There is a ToCToU between this function call and the following
74 * copy_from_user() call. However, this is not a concern since this function is
75 * meant to be a future-proofing of bits.
76 */
af2ac3e1 77int bpf_check_uarg_tail_zero(bpfptr_t uaddr,
dcab51f1
MKL
78 size_t expected_size,
79 size_t actual_size)
58291a74 80{
b7e4b65f 81 int res;
58291a74 82
752ba56f
MS
83 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */
84 return -E2BIG;
85
58291a74
MS
86 if (actual_size <= expected_size)
87 return 0;
88
af2ac3e1
AS
89 if (uaddr.is_kernel)
90 res = memchr_inv(uaddr.kernel + expected_size, 0,
91 actual_size - expected_size) == NULL;
92 else
93 res = check_zeroed_user(uaddr.user + expected_size,
94 actual_size - expected_size);
b7e4b65f
AV
95 if (res < 0)
96 return res;
97 return res ? 0 : -E2BIG;
58291a74
MS
98}
99
a3884572 100const struct bpf_map_ops bpf_map_offload_ops = {
f4d05259 101 .map_meta_equal = bpf_map_meta_equal,
a3884572
JK
102 .map_alloc = bpf_map_offload_map_alloc,
103 .map_free = bpf_map_offload_map_free,
e8d2bec0 104 .map_check_btf = map_check_no_btf,
a3884572
JK
105};
106
99c55f7d
AS
107static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
108{
1110f3a9 109 const struct bpf_map_ops *ops;
9ef09e35 110 u32 type = attr->map_type;
99c55f7d 111 struct bpf_map *map;
1110f3a9 112 int err;
99c55f7d 113
9ef09e35 114 if (type >= ARRAY_SIZE(bpf_map_types))
1110f3a9 115 return ERR_PTR(-EINVAL);
9ef09e35
MR
116 type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
117 ops = bpf_map_types[type];
1110f3a9 118 if (!ops)
40077e0c 119 return ERR_PTR(-EINVAL);
99c55f7d 120
1110f3a9
JK
121 if (ops->map_alloc_check) {
122 err = ops->map_alloc_check(attr);
123 if (err)
124 return ERR_PTR(err);
125 }
a3884572
JK
126 if (attr->map_ifindex)
127 ops = &bpf_map_offload_ops;
1110f3a9 128 map = ops->map_alloc(attr);
40077e0c
JB
129 if (IS_ERR(map))
130 return map;
1110f3a9 131 map->ops = ops;
9ef09e35 132 map->map_type = type;
40077e0c 133 return map;
99c55f7d
AS
134}
135
353050be
DB
136static void bpf_map_write_active_inc(struct bpf_map *map)
137{
138 atomic64_inc(&map->writecnt);
139}
140
141static void bpf_map_write_active_dec(struct bpf_map *map)
142{
143 atomic64_dec(&map->writecnt);
144}
145
146bool bpf_map_write_active(const struct bpf_map *map)
147{
148 return atomic64_read(&map->writecnt) != 0;
149}
150
80ee81e0 151static u32 bpf_map_value_size(const struct bpf_map *map)
15c14a3d
BV
152{
153 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
154 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
155 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
156 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
157 return round_up(map->value_size, 8) * num_possible_cpus();
158 else if (IS_FD_MAP(map))
159 return sizeof(u32);
160 else
161 return map->value_size;
162}
163
164static void maybe_wait_bpf_programs(struct bpf_map *map)
165{
166 /* Wait for any running BPF programs to complete so that
167 * userspace, when we return to it, knows that all programs
168 * that could be running use the new map value.
169 */
170 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
171 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
172 synchronize_rcu();
173}
174
175static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
176 void *value, __u64 flags)
177{
178 int err;
179
180 /* Need to create a kthread, thus must support schedule */
181 if (bpf_map_is_dev_bound(map)) {
182 return bpf_map_offload_update_elem(map, key, value, flags);
183 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
15c14a3d
BV
184 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
185 return map->ops->map_update_elem(map, key, value, flags);
13b79d3f
LB
186 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH ||
187 map->map_type == BPF_MAP_TYPE_SOCKMAP) {
188 return sock_map_update_elem_sys(map, key, value, flags);
15c14a3d
BV
189 } else if (IS_FD_PROG_ARRAY(map)) {
190 return bpf_fd_array_map_update_elem(map, f.file, key, value,
191 flags);
192 }
193
b6e5dae1 194 bpf_disable_instrumentation();
15c14a3d
BV
195 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
196 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
197 err = bpf_percpu_hash_update(map, key, value, flags);
198 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
199 err = bpf_percpu_array_update(map, key, value, flags);
200 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
201 err = bpf_percpu_cgroup_storage_update(map, key, value,
202 flags);
203 } else if (IS_FD_ARRAY(map)) {
204 rcu_read_lock();
205 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
206 flags);
207 rcu_read_unlock();
208 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
209 rcu_read_lock();
210 err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
211 flags);
212 rcu_read_unlock();
213 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
214 /* rcu_read_lock() is not needed */
215 err = bpf_fd_reuseport_array_update_elem(map, key, value,
216 flags);
217 } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
9330986c
JK
218 map->map_type == BPF_MAP_TYPE_STACK ||
219 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
15c14a3d
BV
220 err = map->ops->map_push_elem(map, value, flags);
221 } else {
222 rcu_read_lock();
223 err = map->ops->map_update_elem(map, key, value, flags);
224 rcu_read_unlock();
225 }
b6e5dae1 226 bpf_enable_instrumentation();
15c14a3d
BV
227 maybe_wait_bpf_programs(map);
228
229 return err;
230}
231
232static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
233 __u64 flags)
234{
235 void *ptr;
236 int err;
237
cb4d03ab
BV
238 if (bpf_map_is_dev_bound(map))
239 return bpf_map_offload_lookup_elem(map, key, value);
15c14a3d 240
b6e5dae1 241 bpf_disable_instrumentation();
15c14a3d
BV
242 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
243 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
244 err = bpf_percpu_hash_copy(map, key, value);
245 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
246 err = bpf_percpu_array_copy(map, key, value);
247 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
248 err = bpf_percpu_cgroup_storage_copy(map, key, value);
249 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
250 err = bpf_stackmap_copy(map, key, value);
251 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
252 err = bpf_fd_array_map_lookup_elem(map, key, value);
253 } else if (IS_FD_HASH(map)) {
254 err = bpf_fd_htab_map_lookup_elem(map, key, value);
255 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
256 err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
257 } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
9330986c
JK
258 map->map_type == BPF_MAP_TYPE_STACK ||
259 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
15c14a3d
BV
260 err = map->ops->map_peek_elem(map, value);
261 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
262 /* struct_ops map requires directly updating "value" */
263 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
264 } else {
265 rcu_read_lock();
266 if (map->ops->map_lookup_elem_sys_only)
267 ptr = map->ops->map_lookup_elem_sys_only(map, key);
268 else
269 ptr = map->ops->map_lookup_elem(map, key);
270 if (IS_ERR(ptr)) {
271 err = PTR_ERR(ptr);
272 } else if (!ptr) {
273 err = -ENOENT;
274 } else {
275 err = 0;
276 if (flags & BPF_F_LOCK)
277 /* lock 'ptr' and copy everything but lock */
278 copy_map_value_locked(map, value, ptr, true);
279 else
280 copy_map_value(map, value, ptr);
68134668
AS
281 /* mask lock and timer, since value wasn't zero inited */
282 check_and_init_map_value(map, value);
15c14a3d
BV
283 }
284 rcu_read_unlock();
285 }
286
b6e5dae1 287 bpf_enable_instrumentation();
15c14a3d
BV
288 maybe_wait_bpf_programs(map);
289
290 return err;
291}
292
d5299b67
RG
293/* Please, do not use this function outside from the map creation path
294 * (e.g. in map update path) without taking care of setting the active
295 * memory cgroup (see at bpf_map_kmalloc_node() for example).
296 */
196e8ca7 297static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
d407bd25 298{
f01a7dbe
MP
299 /* We really just want to fail instead of triggering OOM killer
300 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
301 * which is used for lower order allocation requests.
302 *
303 * It has been observed that higher order allocation requests done by
304 * vmalloc with __GFP_NORETRY being set might fail due to not trying
305 * to reclaim memory from the page cache, thus we set
306 * __GFP_RETRY_MAYFAIL to avoid such situations.
d407bd25 307 */
f01a7dbe 308
d5299b67 309 const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT;
041de93f
CH
310 unsigned int flags = 0;
311 unsigned long align = 1;
d407bd25
DB
312 void *area;
313
196e8ca7
DB
314 if (size >= SIZE_MAX)
315 return NULL;
316
fc970227 317 /* kmalloc()'ed memory can't be mmap()'ed */
041de93f
CH
318 if (mmapable) {
319 BUG_ON(!PAGE_ALIGNED(size));
320 align = SHMLBA;
321 flags = VM_USERMAP;
322 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
323 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY,
f01a7dbe 324 numa_node);
d407bd25
DB
325 if (area != NULL)
326 return area;
327 }
041de93f
CH
328
329 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
330 gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL,
331 flags, numa_node, __builtin_return_address(0));
d407bd25
DB
332}
333
196e8ca7 334void *bpf_map_area_alloc(u64 size, int numa_node)
fc970227
AN
335{
336 return __bpf_map_area_alloc(size, numa_node, false);
337}
338
196e8ca7 339void *bpf_map_area_mmapable_alloc(u64 size, int numa_node)
fc970227
AN
340{
341 return __bpf_map_area_alloc(size, numa_node, true);
342}
343
d407bd25
DB
344void bpf_map_area_free(void *area)
345{
346 kvfree(area);
347}
348
be70bcd5
DB
349static u32 bpf_map_flags_retain_permanent(u32 flags)
350{
351 /* Some map creation flags are not tied to the map object but
352 * rather to the map fd instead, so they have no meaning upon
353 * map object inspection since multiple file descriptors with
354 * different (access) properties can exist here. Thus, given
355 * this has zero meaning for the map itself, lets clear these
356 * from here.
357 */
358 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY);
359}
360
bd475643
JK
361void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
362{
363 map->map_type = attr->map_type;
364 map->key_size = attr->key_size;
365 map->value_size = attr->value_size;
366 map->max_entries = attr->max_entries;
be70bcd5 367 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
bd475643 368 map->numa_node = bpf_map_attr_numa_node(attr);
9330986c 369 map->map_extra = attr->map_extra;
bd475643
JK
370}
371
f3f1c054
MKL
372static int bpf_map_alloc_id(struct bpf_map *map)
373{
374 int id;
375
b76354cd 376 idr_preload(GFP_KERNEL);
f3f1c054
MKL
377 spin_lock_bh(&map_idr_lock);
378 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
379 if (id > 0)
380 map->id = id;
381 spin_unlock_bh(&map_idr_lock);
b76354cd 382 idr_preload_end();
f3f1c054
MKL
383
384 if (WARN_ON_ONCE(!id))
385 return -ENOSPC;
386
387 return id > 0 ? 0 : id;
388}
389
a3884572 390void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
f3f1c054 391{
930651a7
ED
392 unsigned long flags;
393
a3884572
JK
394 /* Offloaded maps are removed from the IDR store when their device
395 * disappears - even if someone holds an fd to them they are unusable,
396 * the memory is gone, all ops will fail; they are simply waiting for
397 * refcnt to drop to be freed.
398 */
399 if (!map->id)
400 return;
401
bd5f5f4e 402 if (do_idr_lock)
930651a7 403 spin_lock_irqsave(&map_idr_lock, flags);
bd5f5f4e
MKL
404 else
405 __acquire(&map_idr_lock);
406
f3f1c054 407 idr_remove(&map_idr, map->id);
a3884572 408 map->id = 0;
bd5f5f4e
MKL
409
410 if (do_idr_lock)
930651a7 411 spin_unlock_irqrestore(&map_idr_lock, flags);
bd5f5f4e
MKL
412 else
413 __release(&map_idr_lock);
f3f1c054
MKL
414}
415
48edc1f7
RG
416#ifdef CONFIG_MEMCG_KMEM
417static void bpf_map_save_memcg(struct bpf_map *map)
418{
419 map->memcg = get_mem_cgroup_from_mm(current->mm);
420}
421
422static void bpf_map_release_memcg(struct bpf_map *map)
423{
424 mem_cgroup_put(map->memcg);
425}
426
427void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
428 int node)
429{
430 struct mem_cgroup *old_memcg;
431 void *ptr;
432
433 old_memcg = set_active_memcg(map->memcg);
434 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
435 set_active_memcg(old_memcg);
436
437 return ptr;
438}
439
440void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
441{
442 struct mem_cgroup *old_memcg;
443 void *ptr;
444
445 old_memcg = set_active_memcg(map->memcg);
446 ptr = kzalloc(size, flags | __GFP_ACCOUNT);
447 set_active_memcg(old_memcg);
448
449 return ptr;
450}
451
452void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
453 size_t align, gfp_t flags)
454{
455 struct mem_cgroup *old_memcg;
456 void __percpu *ptr;
457
458 old_memcg = set_active_memcg(map->memcg);
459 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);
460 set_active_memcg(old_memcg);
461
462 return ptr;
463}
464
465#else
466static void bpf_map_save_memcg(struct bpf_map *map)
467{
468}
469
470static void bpf_map_release_memcg(struct bpf_map *map)
471{
472}
473#endif
474
99c55f7d
AS
475/* called from workqueue */
476static void bpf_map_free_deferred(struct work_struct *work)
477{
478 struct bpf_map *map = container_of(work, struct bpf_map, work);
479
afdb09c7 480 security_bpf_map_free(map);
48edc1f7 481 bpf_map_release_memcg(map);
99c55f7d
AS
482 /* implementation dependent freeing */
483 map->ops->map_free(map);
484}
485
c9da161c
DB
486static void bpf_map_put_uref(struct bpf_map *map)
487{
1e0bd5a0 488 if (atomic64_dec_and_test(&map->usercnt)) {
ba6b8de4
JF
489 if (map->ops->map_release_uref)
490 map->ops->map_release_uref(map);
c9da161c
DB
491 }
492}
493
99c55f7d
AS
494/* decrement map refcnt and schedule it for freeing via workqueue
495 * (unrelying map implementation ops->map_free() might sleep)
496 */
bd5f5f4e 497static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
99c55f7d 498{
1e0bd5a0 499 if (atomic64_dec_and_test(&map->refcnt)) {
34ad5580 500 /* bpf_map_free_id() must be called first */
bd5f5f4e 501 bpf_map_free_id(map, do_idr_lock);
78958fca 502 btf_put(map->btf);
99c55f7d
AS
503 INIT_WORK(&map->work, bpf_map_free_deferred);
504 schedule_work(&map->work);
505 }
506}
507
bd5f5f4e
MKL
508void bpf_map_put(struct bpf_map *map)
509{
510 __bpf_map_put(map, true);
511}
630a4d38 512EXPORT_SYMBOL_GPL(bpf_map_put);
bd5f5f4e 513
c9da161c 514void bpf_map_put_with_uref(struct bpf_map *map)
99c55f7d 515{
c9da161c 516 bpf_map_put_uref(map);
99c55f7d 517 bpf_map_put(map);
c9da161c
DB
518}
519
520static int bpf_map_release(struct inode *inode, struct file *filp)
521{
61d1b6a4
DB
522 struct bpf_map *map = filp->private_data;
523
524 if (map->ops->map_release)
525 map->ops->map_release(map, filp);
526
527 bpf_map_put_with_uref(map);
99c55f7d
AS
528 return 0;
529}
530
87df15de
DB
531static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
532{
533 fmode_t mode = f.file->f_mode;
534
535 /* Our file permissions may have been overridden by global
536 * map permissions facing syscall side.
537 */
538 if (READ_ONCE(map->frozen))
539 mode &= ~FMODE_CAN_WRITE;
540 return mode;
541}
542
f99bf205 543#ifdef CONFIG_PROC_FS
80ee81e0
RG
544/* Provides an approximation of the map's memory footprint.
545 * Used only to provide a backward compatibility and display
546 * a reasonable "memlock" info.
547 */
548static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
549{
550 unsigned long size;
551
552 size = round_up(map->key_size + bpf_map_value_size(map), 8);
553
554 return round_up(map->max_entries * size, PAGE_SIZE);
555}
556
f99bf205
DB
557static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
558{
f45d5b6c 559 struct bpf_map *map = filp->private_data;
2beee5f5 560 u32 type = 0, jited = 0;
21116b70 561
f45d5b6c
THJ
562 if (map_type_contains_progs(map)) {
563 spin_lock(&map->owner.lock);
564 type = map->owner.type;
565 jited = map->owner.jited;
566 spin_unlock(&map->owner.lock);
21116b70 567 }
f99bf205
DB
568
569 seq_printf(m,
570 "map_type:\t%u\n"
571 "key_size:\t%u\n"
572 "value_size:\t%u\n"
322cea2f 573 "max_entries:\t%u\n"
21116b70 574 "map_flags:\t%#x\n"
9330986c 575 "map_extra:\t%#llx\n"
80ee81e0 576 "memlock:\t%lu\n"
87df15de
DB
577 "map_id:\t%u\n"
578 "frozen:\t%u\n",
f99bf205
DB
579 map->map_type,
580 map->key_size,
581 map->value_size,
322cea2f 582 map->max_entries,
21116b70 583 map->map_flags,
9330986c 584 (unsigned long long)map->map_extra,
80ee81e0 585 bpf_map_memory_footprint(map),
87df15de
DB
586 map->id,
587 READ_ONCE(map->frozen));
2beee5f5
DB
588 if (type) {
589 seq_printf(m, "owner_prog_type:\t%u\n", type);
590 seq_printf(m, "owner_jited:\t%u\n", jited);
9780c0ab 591 }
f99bf205
DB
592}
593#endif
594
6e71b04a
CF
595static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz,
596 loff_t *ppos)
597{
598 /* We need this handler such that alloc_file() enables
599 * f_mode with FMODE_CAN_READ.
600 */
601 return -EINVAL;
602}
603
604static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
605 size_t siz, loff_t *ppos)
606{
607 /* We need this handler such that alloc_file() enables
608 * f_mode with FMODE_CAN_WRITE.
609 */
610 return -EINVAL;
611}
612
fc970227
AN
613/* called for any extra memory-mapped regions (except initial) */
614static void bpf_map_mmap_open(struct vm_area_struct *vma)
615{
616 struct bpf_map *map = vma->vm_file->private_data;
617
353050be
DB
618 if (vma->vm_flags & VM_MAYWRITE)
619 bpf_map_write_active_inc(map);
fc970227
AN
620}
621
622/* called for all unmapped memory region (including initial) */
623static void bpf_map_mmap_close(struct vm_area_struct *vma)
624{
625 struct bpf_map *map = vma->vm_file->private_data;
626
353050be
DB
627 if (vma->vm_flags & VM_MAYWRITE)
628 bpf_map_write_active_dec(map);
fc970227
AN
629}
630
631static const struct vm_operations_struct bpf_map_default_vmops = {
632 .open = bpf_map_mmap_open,
633 .close = bpf_map_mmap_close,
634};
635
636static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
637{
638 struct bpf_map *map = filp->private_data;
639 int err;
640
68134668
AS
641 if (!map->ops->map_mmap || map_value_has_spin_lock(map) ||
642 map_value_has_timer(map))
fc970227
AN
643 return -ENOTSUPP;
644
645 if (!(vma->vm_flags & VM_SHARED))
646 return -EINVAL;
647
648 mutex_lock(&map->freeze_mutex);
649
dfeb376d
AN
650 if (vma->vm_flags & VM_WRITE) {
651 if (map->frozen) {
652 err = -EPERM;
653 goto out;
654 }
655 /* map is meant to be read-only, so do not allow mapping as
656 * writable, because it's possible to leak a writable page
657 * reference and allows user-space to still modify it after
658 * freezing, while verifier will assume contents do not change
659 */
660 if (map->map_flags & BPF_F_RDONLY_PROG) {
661 err = -EACCES;
662 goto out;
663 }
fc970227
AN
664 }
665
666 /* set default open/close callbacks */
667 vma->vm_ops = &bpf_map_default_vmops;
668 vma->vm_private_data = map;
1f6cb19b
AN
669 vma->vm_flags &= ~VM_MAYEXEC;
670 if (!(vma->vm_flags & VM_WRITE))
671 /* disallow re-mapping with PROT_WRITE */
672 vma->vm_flags &= ~VM_MAYWRITE;
fc970227
AN
673
674 err = map->ops->map_mmap(map, vma);
675 if (err)
676 goto out;
677
1f6cb19b 678 if (vma->vm_flags & VM_MAYWRITE)
353050be 679 bpf_map_write_active_inc(map);
fc970227
AN
680out:
681 mutex_unlock(&map->freeze_mutex);
682 return err;
683}
684
457f4436
AN
685static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts)
686{
687 struct bpf_map *map = filp->private_data;
688
689 if (map->ops->map_poll)
690 return map->ops->map_poll(map, filp, pts);
691
692 return EPOLLERR;
693}
694
f66e448c 695const struct file_operations bpf_map_fops = {
f99bf205
DB
696#ifdef CONFIG_PROC_FS
697 .show_fdinfo = bpf_map_show_fdinfo,
698#endif
699 .release = bpf_map_release,
6e71b04a
CF
700 .read = bpf_dummy_read,
701 .write = bpf_dummy_write,
fc970227 702 .mmap = bpf_map_mmap,
457f4436 703 .poll = bpf_map_poll,
99c55f7d
AS
704};
705
6e71b04a 706int bpf_map_new_fd(struct bpf_map *map, int flags)
aa79781b 707{
afdb09c7
CF
708 int ret;
709
710 ret = security_bpf_map(map, OPEN_FMODE(flags));
711 if (ret < 0)
712 return ret;
713
aa79781b 714 return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
6e71b04a
CF
715 flags | O_CLOEXEC);
716}
717
718int bpf_get_file_flag(int flags)
719{
720 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
721 return -EINVAL;
722 if (flags & BPF_F_RDONLY)
723 return O_RDONLY;
724 if (flags & BPF_F_WRONLY)
725 return O_WRONLY;
726 return O_RDWR;
aa79781b
DB
727}
728
99c55f7d
AS
729/* helper macro to check that unused fields 'union bpf_attr' are zero */
730#define CHECK_ATTR(CMD) \
731 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
732 sizeof(attr->CMD##_LAST_FIELD), 0, \
733 sizeof(*attr) - \
734 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
735 sizeof(attr->CMD##_LAST_FIELD)) != NULL
736
8e7ae251
MKL
737/* dst and src must have at least "size" number of bytes.
738 * Return strlen on success and < 0 on error.
cb4d2b3f 739 */
8e7ae251 740int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
cb4d2b3f 741{
8e7ae251
MKL
742 const char *end = src + size;
743 const char *orig_src = src;
cb4d2b3f 744
8e7ae251 745 memset(dst, 0, size);
3e0ddc4f 746 /* Copy all isalnum(), '_' and '.' chars. */
cb4d2b3f 747 while (src < end && *src) {
3e0ddc4f
DB
748 if (!isalnum(*src) &&
749 *src != '_' && *src != '.')
cb4d2b3f
MKL
750 return -EINVAL;
751 *dst++ = *src++;
752 }
753
8e7ae251 754 /* No '\0' found in "size" number of bytes */
cb4d2b3f
MKL
755 if (src == end)
756 return -EINVAL;
757
8e7ae251 758 return src - orig_src;
cb4d2b3f
MKL
759}
760
e8d2bec0 761int map_check_no_btf(const struct bpf_map *map,
1b2b234b 762 const struct btf *btf,
e8d2bec0
DB
763 const struct btf_type *key_type,
764 const struct btf_type *value_type)
765{
766 return -ENOTSUPP;
767}
768
d83525ca 769static int map_check_btf(struct bpf_map *map, const struct btf *btf,
e8d2bec0
DB
770 u32 btf_key_id, u32 btf_value_id)
771{
772 const struct btf_type *key_type, *value_type;
773 u32 key_size, value_size;
774 int ret = 0;
775
2824ecb7
DB
776 /* Some maps allow key to be unspecified. */
777 if (btf_key_id) {
778 key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
779 if (!key_type || key_size != map->key_size)
780 return -EINVAL;
781 } else {
782 key_type = btf_type_by_id(btf, 0);
783 if (!map->ops->map_check_btf)
784 return -EINVAL;
785 }
e8d2bec0
DB
786
787 value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
788 if (!value_type || value_size != map->value_size)
789 return -EINVAL;
790
d83525ca
AS
791 map->spin_lock_off = btf_find_spin_lock(btf, value_type);
792
793 if (map_value_has_spin_lock(map)) {
591fe988
DB
794 if (map->map_flags & BPF_F_RDONLY_PROG)
795 return -EACCES;
d83525ca 796 if (map->map_type != BPF_MAP_TYPE_HASH &&
e16d2f1a 797 map->map_type != BPF_MAP_TYPE_ARRAY &&
6ac99e8f 798 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
8ea63684 799 map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
4cf1bc1f
KS
800 map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
801 map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
d83525ca
AS
802 return -ENOTSUPP;
803 if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
804 map->value_size) {
805 WARN_ONCE(1,
806 "verifier bug spin_lock_off %d value_size %d\n",
807 map->spin_lock_off, map->value_size);
808 return -EFAULT;
809 }
810 }
811
68134668
AS
812 map->timer_off = btf_find_timer(btf, value_type);
813 if (map_value_has_timer(map)) {
814 if (map->map_flags & BPF_F_RDONLY_PROG)
815 return -EACCES;
816 if (map->map_type != BPF_MAP_TYPE_HASH &&
817 map->map_type != BPF_MAP_TYPE_LRU_HASH &&
818 map->map_type != BPF_MAP_TYPE_ARRAY)
819 return -EOPNOTSUPP;
820 }
821
e8d2bec0 822 if (map->ops->map_check_btf)
1b2b234b 823 ret = map->ops->map_check_btf(map, btf, key_type, value_type);
e8d2bec0
DB
824
825 return ret;
826}
827
9330986c 828#define BPF_MAP_CREATE_LAST_FIELD map_extra
99c55f7d
AS
829/* called via syscall */
830static int map_create(union bpf_attr *attr)
831{
96eabe7a 832 int numa_node = bpf_map_attr_numa_node(attr);
99c55f7d 833 struct bpf_map *map;
6e71b04a 834 int f_flags;
99c55f7d
AS
835 int err;
836
837 err = CHECK_ATTR(BPF_MAP_CREATE);
838 if (err)
839 return -EINVAL;
840
85d33df3
MKL
841 if (attr->btf_vmlinux_value_type_id) {
842 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
843 attr->btf_key_type_id || attr->btf_value_type_id)
844 return -EINVAL;
845 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
846 return -EINVAL;
847 }
848
9330986c
JK
849 if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
850 attr->map_extra != 0)
851 return -EINVAL;
852
6e71b04a
CF
853 f_flags = bpf_get_file_flag(attr->map_flags);
854 if (f_flags < 0)
855 return f_flags;
856
96eabe7a 857 if (numa_node != NUMA_NO_NODE &&
96e5ae4e
ED
858 ((unsigned int)numa_node >= nr_node_ids ||
859 !node_online(numa_node)))
96eabe7a
MKL
860 return -EINVAL;
861
99c55f7d
AS
862 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
863 map = find_and_alloc_map(attr);
864 if (IS_ERR(map))
865 return PTR_ERR(map);
866
8e7ae251
MKL
867 err = bpf_obj_name_cpy(map->name, attr->map_name,
868 sizeof(attr->map_name));
869 if (err < 0)
b936ca64 870 goto free_map;
ad5b177b 871
1e0bd5a0
AN
872 atomic64_set(&map->refcnt, 1);
873 atomic64_set(&map->usercnt, 1);
fc970227 874 mutex_init(&map->freeze_mutex);
f45d5b6c 875 spin_lock_init(&map->owner.lock);
99c55f7d 876
85d33df3 877 map->spin_lock_off = -EINVAL;
68134668 878 map->timer_off = -EINVAL;
85d33df3
MKL
879 if (attr->btf_key_type_id || attr->btf_value_type_id ||
880 /* Even the map's value is a kernel's struct,
881 * the bpf_prog.o must have BTF to begin with
882 * to figure out the corresponding kernel's
883 * counter part. Thus, attr->btf_fd has
884 * to be valid also.
885 */
886 attr->btf_vmlinux_value_type_id) {
a26ca7c9
MKL
887 struct btf *btf;
888
a26ca7c9
MKL
889 btf = btf_get_by_fd(attr->btf_fd);
890 if (IS_ERR(btf)) {
891 err = PTR_ERR(btf);
b936ca64 892 goto free_map;
a26ca7c9 893 }
350a5c4d
AS
894 if (btf_is_kernel(btf)) {
895 btf_put(btf);
896 err = -EACCES;
897 goto free_map;
898 }
85d33df3 899 map->btf = btf;
a26ca7c9 900
85d33df3
MKL
901 if (attr->btf_value_type_id) {
902 err = map_check_btf(map, btf, attr->btf_key_type_id,
903 attr->btf_value_type_id);
904 if (err)
905 goto free_map;
a26ca7c9
MKL
906 }
907
9b2cf328
MKL
908 map->btf_key_type_id = attr->btf_key_type_id;
909 map->btf_value_type_id = attr->btf_value_type_id;
85d33df3
MKL
910 map->btf_vmlinux_value_type_id =
911 attr->btf_vmlinux_value_type_id;
a26ca7c9
MKL
912 }
913
afdb09c7 914 err = security_bpf_map_alloc(map);
aaac3ba9 915 if (err)
b936ca64 916 goto free_map;
afdb09c7 917
f3f1c054
MKL
918 err = bpf_map_alloc_id(map);
919 if (err)
b936ca64 920 goto free_map_sec;
f3f1c054 921
48edc1f7
RG
922 bpf_map_save_memcg(map);
923
6e71b04a 924 err = bpf_map_new_fd(map, f_flags);
bd5f5f4e
MKL
925 if (err < 0) {
926 /* failed to allocate fd.
352d20d6 927 * bpf_map_put_with_uref() is needed because the above
bd5f5f4e
MKL
928 * bpf_map_alloc_id() has published the map
929 * to the userspace and the userspace may
930 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
931 */
352d20d6 932 bpf_map_put_with_uref(map);
bd5f5f4e
MKL
933 return err;
934 }
99c55f7d
AS
935
936 return err;
937
afdb09c7
CF
938free_map_sec:
939 security_bpf_map_free(map);
b936ca64 940free_map:
a26ca7c9 941 btf_put(map->btf);
99c55f7d
AS
942 map->ops->map_free(map);
943 return err;
944}
945
db20fd2b
AS
946/* if error is returned, fd is released.
947 * On success caller should complete fd access with matching fdput()
948 */
c2101297 949struct bpf_map *__bpf_map_get(struct fd f)
db20fd2b 950{
db20fd2b
AS
951 if (!f.file)
952 return ERR_PTR(-EBADF);
db20fd2b
AS
953 if (f.file->f_op != &bpf_map_fops) {
954 fdput(f);
955 return ERR_PTR(-EINVAL);
956 }
957
c2101297
DB
958 return f.file->private_data;
959}
960
1e0bd5a0 961void bpf_map_inc(struct bpf_map *map)
c9da161c 962{
1e0bd5a0 963 atomic64_inc(&map->refcnt);
c9da161c 964}
630a4d38 965EXPORT_SYMBOL_GPL(bpf_map_inc);
c9da161c 966
1e0bd5a0
AN
967void bpf_map_inc_with_uref(struct bpf_map *map)
968{
969 atomic64_inc(&map->refcnt);
970 atomic64_inc(&map->usercnt);
971}
972EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
973
1ed4d924
MKL
974struct bpf_map *bpf_map_get(u32 ufd)
975{
976 struct fd f = fdget(ufd);
977 struct bpf_map *map;
978
979 map = __bpf_map_get(f);
980 if (IS_ERR(map))
981 return map;
982
983 bpf_map_inc(map);
984 fdput(f);
985
986 return map;
987}
988
c9da161c 989struct bpf_map *bpf_map_get_with_uref(u32 ufd)
c2101297
DB
990{
991 struct fd f = fdget(ufd);
992 struct bpf_map *map;
993
994 map = __bpf_map_get(f);
995 if (IS_ERR(map))
996 return map;
997
1e0bd5a0 998 bpf_map_inc_with_uref(map);
c2101297 999 fdput(f);
db20fd2b
AS
1000
1001 return map;
1002}
1003
bd5f5f4e 1004/* map_idr_lock should have been held */
1e0bd5a0 1005static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
bd5f5f4e
MKL
1006{
1007 int refold;
1008
1e0bd5a0 1009 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0);
bd5f5f4e
MKL
1010 if (!refold)
1011 return ERR_PTR(-ENOENT);
bd5f5f4e 1012 if (uref)
1e0bd5a0 1013 atomic64_inc(&map->usercnt);
bd5f5f4e
MKL
1014
1015 return map;
1016}
1017
1e0bd5a0 1018struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
b0e4701c
SF
1019{
1020 spin_lock_bh(&map_idr_lock);
1e0bd5a0 1021 map = __bpf_map_inc_not_zero(map, false);
b0e4701c
SF
1022 spin_unlock_bh(&map_idr_lock);
1023
1024 return map;
1025}
1026EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
1027
b8cdc051
AS
1028int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
1029{
1030 return -ENOTSUPP;
1031}
1032
c9d29f46
MV
1033static void *__bpf_copy_key(void __user *ukey, u64 key_size)
1034{
1035 if (key_size)
44779a4b 1036 return vmemdup_user(ukey, key_size);
c9d29f46
MV
1037
1038 if (ukey)
1039 return ERR_PTR(-EINVAL);
1040
1041 return NULL;
1042}
1043
af2ac3e1
AS
1044static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size)
1045{
1046 if (key_size)
44779a4b 1047 return kvmemdup_bpfptr(ukey, key_size);
af2ac3e1
AS
1048
1049 if (!bpfptr_is_null(ukey))
1050 return ERR_PTR(-EINVAL);
1051
1052 return NULL;
1053}
1054
db20fd2b 1055/* last field in 'union bpf_attr' used by this command */
96049f3a 1056#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
db20fd2b
AS
1057
1058static int map_lookup_elem(union bpf_attr *attr)
1059{
535e7b4b
MS
1060 void __user *ukey = u64_to_user_ptr(attr->key);
1061 void __user *uvalue = u64_to_user_ptr(attr->value);
db20fd2b 1062 int ufd = attr->map_fd;
db20fd2b 1063 struct bpf_map *map;
15c14a3d 1064 void *key, *value;
15a07b33 1065 u32 value_size;
592867bf 1066 struct fd f;
db20fd2b
AS
1067 int err;
1068
1069 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
1070 return -EINVAL;
1071
96049f3a
AS
1072 if (attr->flags & ~BPF_F_LOCK)
1073 return -EINVAL;
1074
592867bf 1075 f = fdget(ufd);
c2101297 1076 map = __bpf_map_get(f);
db20fd2b
AS
1077 if (IS_ERR(map))
1078 return PTR_ERR(map);
87df15de 1079 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
6e71b04a
CF
1080 err = -EPERM;
1081 goto err_put;
1082 }
1083
96049f3a
AS
1084 if ((attr->flags & BPF_F_LOCK) &&
1085 !map_value_has_spin_lock(map)) {
1086 err = -EINVAL;
1087 goto err_put;
1088 }
1089
c9d29f46 1090 key = __bpf_copy_key(ukey, map->key_size);
e4448ed8
AV
1091 if (IS_ERR(key)) {
1092 err = PTR_ERR(key);
db20fd2b 1093 goto err_put;
e4448ed8 1094 }
db20fd2b 1095
15c14a3d 1096 value_size = bpf_map_value_size(map);
15a07b33 1097
8ebe667c 1098 err = -ENOMEM;
f0dce1d9 1099 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
db20fd2b 1100 if (!value)
8ebe667c
AS
1101 goto free_key;
1102
9330986c
JK
1103 if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
1104 if (copy_from_user(value, uvalue, value_size))
1105 err = -EFAULT;
1106 else
1107 err = bpf_map_copy_value(map, key, value, attr->flags);
1108 goto free_value;
1109 }
1110
15c14a3d 1111 err = bpf_map_copy_value(map, key, value, attr->flags);
15a07b33 1112 if (err)
8ebe667c 1113 goto free_value;
db20fd2b
AS
1114
1115 err = -EFAULT;
15a07b33 1116 if (copy_to_user(uvalue, value, value_size) != 0)
8ebe667c 1117 goto free_value;
db20fd2b
AS
1118
1119 err = 0;
1120
8ebe667c 1121free_value:
f0dce1d9 1122 kvfree(value);
db20fd2b 1123free_key:
44779a4b 1124 kvfree(key);
db20fd2b
AS
1125err_put:
1126 fdput(f);
1127 return err;
1128}
1129
1ae80cf3 1130
3274f520 1131#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
db20fd2b 1132
af2ac3e1 1133static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
db20fd2b 1134{
af2ac3e1
AS
1135 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
1136 bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel);
db20fd2b 1137 int ufd = attr->map_fd;
db20fd2b
AS
1138 struct bpf_map *map;
1139 void *key, *value;
15a07b33 1140 u32 value_size;
592867bf 1141 struct fd f;
db20fd2b
AS
1142 int err;
1143
1144 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
1145 return -EINVAL;
1146
592867bf 1147 f = fdget(ufd);
c2101297 1148 map = __bpf_map_get(f);
db20fd2b
AS
1149 if (IS_ERR(map))
1150 return PTR_ERR(map);
353050be 1151 bpf_map_write_active_inc(map);
87df15de 1152 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
6e71b04a
CF
1153 err = -EPERM;
1154 goto err_put;
1155 }
1156
96049f3a
AS
1157 if ((attr->flags & BPF_F_LOCK) &&
1158 !map_value_has_spin_lock(map)) {
1159 err = -EINVAL;
1160 goto err_put;
1161 }
1162
af2ac3e1 1163 key = ___bpf_copy_key(ukey, map->key_size);
e4448ed8
AV
1164 if (IS_ERR(key)) {
1165 err = PTR_ERR(key);
db20fd2b 1166 goto err_put;
e4448ed8 1167 }
db20fd2b 1168
f0dce1d9 1169 value_size = bpf_map_value_size(map);
15a07b33 1170
db20fd2b 1171 err = -ENOMEM;
f0dce1d9 1172 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
db20fd2b
AS
1173 if (!value)
1174 goto free_key;
1175
1176 err = -EFAULT;
af2ac3e1 1177 if (copy_from_bpfptr(value, uvalue, value_size) != 0)
db20fd2b
AS
1178 goto free_value;
1179
15c14a3d 1180 err = bpf_map_update_value(map, f, key, value, attr->flags);
6710e112 1181
db20fd2b 1182free_value:
f0dce1d9 1183 kvfree(value);
db20fd2b 1184free_key:
44779a4b 1185 kvfree(key);
db20fd2b 1186err_put:
353050be 1187 bpf_map_write_active_dec(map);
db20fd2b
AS
1188 fdput(f);
1189 return err;
1190}
1191
1192#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
1193
1194static int map_delete_elem(union bpf_attr *attr)
1195{
535e7b4b 1196 void __user *ukey = u64_to_user_ptr(attr->key);
db20fd2b 1197 int ufd = attr->map_fd;
db20fd2b 1198 struct bpf_map *map;
592867bf 1199 struct fd f;
db20fd2b
AS
1200 void *key;
1201 int err;
1202
1203 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
1204 return -EINVAL;
1205
592867bf 1206 f = fdget(ufd);
c2101297 1207 map = __bpf_map_get(f);
db20fd2b
AS
1208 if (IS_ERR(map))
1209 return PTR_ERR(map);
353050be 1210 bpf_map_write_active_inc(map);
87df15de 1211 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
6e71b04a
CF
1212 err = -EPERM;
1213 goto err_put;
1214 }
1215
c9d29f46 1216 key = __bpf_copy_key(ukey, map->key_size);
e4448ed8
AV
1217 if (IS_ERR(key)) {
1218 err = PTR_ERR(key);
db20fd2b 1219 goto err_put;
e4448ed8 1220 }
db20fd2b 1221
a3884572
JK
1222 if (bpf_map_is_dev_bound(map)) {
1223 err = bpf_map_offload_delete_elem(map, key);
1224 goto out;
85d33df3
MKL
1225 } else if (IS_FD_PROG_ARRAY(map) ||
1226 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
1227 /* These maps require sleepable context */
da765a2f
DB
1228 err = map->ops->map_delete_elem(map, key);
1229 goto out;
a3884572
JK
1230 }
1231
b6e5dae1 1232 bpf_disable_instrumentation();
db20fd2b
AS
1233 rcu_read_lock();
1234 err = map->ops->map_delete_elem(map, key);
1235 rcu_read_unlock();
b6e5dae1 1236 bpf_enable_instrumentation();
1ae80cf3 1237 maybe_wait_bpf_programs(map);
a3884572 1238out:
44779a4b 1239 kvfree(key);
db20fd2b 1240err_put:
353050be 1241 bpf_map_write_active_dec(map);
db20fd2b
AS
1242 fdput(f);
1243 return err;
1244}
1245
1246/* last field in 'union bpf_attr' used by this command */
1247#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
1248
1249static int map_get_next_key(union bpf_attr *attr)
1250{
535e7b4b
MS
1251 void __user *ukey = u64_to_user_ptr(attr->key);
1252 void __user *unext_key = u64_to_user_ptr(attr->next_key);
db20fd2b 1253 int ufd = attr->map_fd;
db20fd2b
AS
1254 struct bpf_map *map;
1255 void *key, *next_key;
592867bf 1256 struct fd f;
db20fd2b
AS
1257 int err;
1258
1259 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
1260 return -EINVAL;
1261
592867bf 1262 f = fdget(ufd);
c2101297 1263 map = __bpf_map_get(f);
db20fd2b
AS
1264 if (IS_ERR(map))
1265 return PTR_ERR(map);
87df15de 1266 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
6e71b04a
CF
1267 err = -EPERM;
1268 goto err_put;
1269 }
1270
8fe45924 1271 if (ukey) {
c9d29f46 1272 key = __bpf_copy_key(ukey, map->key_size);
e4448ed8
AV
1273 if (IS_ERR(key)) {
1274 err = PTR_ERR(key);
8fe45924 1275 goto err_put;
e4448ed8 1276 }
8fe45924
TQ
1277 } else {
1278 key = NULL;
1279 }
db20fd2b
AS
1280
1281 err = -ENOMEM;
44779a4b 1282 next_key = kvmalloc(map->key_size, GFP_USER);
db20fd2b
AS
1283 if (!next_key)
1284 goto free_key;
1285
a3884572
JK
1286 if (bpf_map_is_dev_bound(map)) {
1287 err = bpf_map_offload_get_next_key(map, key, next_key);
1288 goto out;
1289 }
1290
db20fd2b
AS
1291 rcu_read_lock();
1292 err = map->ops->map_get_next_key(map, key, next_key);
1293 rcu_read_unlock();
a3884572 1294out:
db20fd2b
AS
1295 if (err)
1296 goto free_next_key;
1297
1298 err = -EFAULT;
1299 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
1300 goto free_next_key;
1301
1302 err = 0;
1303
1304free_next_key:
44779a4b 1305 kvfree(next_key);
db20fd2b 1306free_key:
44779a4b 1307 kvfree(key);
db20fd2b
AS
1308err_put:
1309 fdput(f);
1310 return err;
1311}
1312
aa2e93b8
BV
1313int generic_map_delete_batch(struct bpf_map *map,
1314 const union bpf_attr *attr,
1315 union bpf_attr __user *uattr)
1316{
1317 void __user *keys = u64_to_user_ptr(attr->batch.keys);
1318 u32 cp, max_count;
1319 int err = 0;
1320 void *key;
1321
1322 if (attr->batch.elem_flags & ~BPF_F_LOCK)
1323 return -EINVAL;
1324
1325 if ((attr->batch.elem_flags & BPF_F_LOCK) &&
1326 !map_value_has_spin_lock(map)) {
1327 return -EINVAL;
1328 }
1329
1330 max_count = attr->batch.count;
1331 if (!max_count)
1332 return 0;
1333
44779a4b 1334 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
2e3a94aa
BV
1335 if (!key)
1336 return -ENOMEM;
1337
aa2e93b8 1338 for (cp = 0; cp < max_count; cp++) {
2e3a94aa
BV
1339 err = -EFAULT;
1340 if (copy_from_user(key, keys + cp * map->key_size,
1341 map->key_size))
aa2e93b8 1342 break;
aa2e93b8
BV
1343
1344 if (bpf_map_is_dev_bound(map)) {
1345 err = bpf_map_offload_delete_elem(map, key);
1346 break;
1347 }
1348
b6e5dae1 1349 bpf_disable_instrumentation();
aa2e93b8
BV
1350 rcu_read_lock();
1351 err = map->ops->map_delete_elem(map, key);
1352 rcu_read_unlock();
b6e5dae1 1353 bpf_enable_instrumentation();
aa2e93b8
BV
1354 maybe_wait_bpf_programs(map);
1355 if (err)
1356 break;
1357 }
1358 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
1359 err = -EFAULT;
2e3a94aa 1360
44779a4b 1361 kvfree(key);
aa2e93b8
BV
1362 return err;
1363}
1364
1365int generic_map_update_batch(struct bpf_map *map,
1366 const union bpf_attr *attr,
1367 union bpf_attr __user *uattr)
1368{
1369 void __user *values = u64_to_user_ptr(attr->batch.values);
1370 void __user *keys = u64_to_user_ptr(attr->batch.keys);
1371 u32 value_size, cp, max_count;
fda7a387 1372 int ufd = attr->batch.map_fd;
aa2e93b8
BV
1373 void *key, *value;
1374 struct fd f;
1375 int err = 0;
1376
aa2e93b8
BV
1377 if (attr->batch.elem_flags & ~BPF_F_LOCK)
1378 return -EINVAL;
1379
1380 if ((attr->batch.elem_flags & BPF_F_LOCK) &&
1381 !map_value_has_spin_lock(map)) {
1382 return -EINVAL;
1383 }
1384
1385 value_size = bpf_map_value_size(map);
1386
1387 max_count = attr->batch.count;
1388 if (!max_count)
1389 return 0;
1390
44779a4b 1391 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
2e3a94aa
BV
1392 if (!key)
1393 return -ENOMEM;
1394
f0dce1d9 1395 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
2e3a94aa 1396 if (!value) {
44779a4b 1397 kvfree(key);
aa2e93b8 1398 return -ENOMEM;
2e3a94aa 1399 }
aa2e93b8 1400
fda7a387 1401 f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */
aa2e93b8 1402 for (cp = 0; cp < max_count; cp++) {
aa2e93b8 1403 err = -EFAULT;
2e3a94aa
BV
1404 if (copy_from_user(key, keys + cp * map->key_size,
1405 map->key_size) ||
1406 copy_from_user(value, values + cp * value_size, value_size))
aa2e93b8
BV
1407 break;
1408
1409 err = bpf_map_update_value(map, f, key, value,
1410 attr->batch.elem_flags);
1411
1412 if (err)
1413 break;
1414 }
1415
1416 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
1417 err = -EFAULT;
1418
f0dce1d9 1419 kvfree(value);
44779a4b 1420 kvfree(key);
fda7a387 1421 fdput(f);
aa2e93b8
BV
1422 return err;
1423}
1424
cb4d03ab
BV
1425#define MAP_LOOKUP_RETRIES 3
1426
1427int generic_map_lookup_batch(struct bpf_map *map,
1428 const union bpf_attr *attr,
1429 union bpf_attr __user *uattr)
1430{
1431 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
1432 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
1433 void __user *values = u64_to_user_ptr(attr->batch.values);
1434 void __user *keys = u64_to_user_ptr(attr->batch.keys);
1435 void *buf, *buf_prevkey, *prev_key, *key, *value;
1436 int err, retry = MAP_LOOKUP_RETRIES;
1437 u32 value_size, cp, max_count;
cb4d03ab
BV
1438
1439 if (attr->batch.elem_flags & ~BPF_F_LOCK)
1440 return -EINVAL;
1441
1442 if ((attr->batch.elem_flags & BPF_F_LOCK) &&
1443 !map_value_has_spin_lock(map))
1444 return -EINVAL;
1445
1446 value_size = bpf_map_value_size(map);
1447
1448 max_count = attr->batch.count;
1449 if (!max_count)
1450 return 0;
1451
1452 if (put_user(0, &uattr->batch.count))
1453 return -EFAULT;
1454
44779a4b 1455 buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
cb4d03ab
BV
1456 if (!buf_prevkey)
1457 return -ENOMEM;
1458
f0dce1d9 1459 buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
cb4d03ab 1460 if (!buf) {
44779a4b 1461 kvfree(buf_prevkey);
cb4d03ab
BV
1462 return -ENOMEM;
1463 }
1464
1465 err = -EFAULT;
cb4d03ab
BV
1466 prev_key = NULL;
1467 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size))
1468 goto free_buf;
1469 key = buf;
1470 value = key + map->key_size;
1471 if (ubatch)
1472 prev_key = buf_prevkey;
1473
1474 for (cp = 0; cp < max_count;) {
1475 rcu_read_lock();
1476 err = map->ops->map_get_next_key(map, prev_key, key);
1477 rcu_read_unlock();
1478 if (err)
1479 break;
1480 err = bpf_map_copy_value(map, key, value,
1481 attr->batch.elem_flags);
1482
1483 if (err == -ENOENT) {
1484 if (retry) {
1485 retry--;
1486 continue;
1487 }
1488 err = -EINTR;
1489 break;
1490 }
1491
1492 if (err)
1493 goto free_buf;
1494
1495 if (copy_to_user(keys + cp * map->key_size, key,
1496 map->key_size)) {
1497 err = -EFAULT;
1498 goto free_buf;
1499 }
1500 if (copy_to_user(values + cp * value_size, value, value_size)) {
1501 err = -EFAULT;
1502 goto free_buf;
1503 }
1504
1505 if (!prev_key)
1506 prev_key = buf_prevkey;
1507
1508 swap(prev_key, key);
1509 retry = MAP_LOOKUP_RETRIES;
1510 cp++;
1511 }
1512
1513 if (err == -EFAULT)
1514 goto free_buf;
1515
1516 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) ||
1517 (cp && copy_to_user(uobatch, prev_key, map->key_size))))
1518 err = -EFAULT;
1519
1520free_buf:
44779a4b 1521 kvfree(buf_prevkey);
f0dce1d9 1522 kvfree(buf);
cb4d03ab
BV
1523 return err;
1524}
1525
3e87f192 1526#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags
bd513cd0
MV
1527
1528static int map_lookup_and_delete_elem(union bpf_attr *attr)
1529{
1530 void __user *ukey = u64_to_user_ptr(attr->key);
1531 void __user *uvalue = u64_to_user_ptr(attr->value);
1532 int ufd = attr->map_fd;
1533 struct bpf_map *map;
540fefc0 1534 void *key, *value;
bd513cd0
MV
1535 u32 value_size;
1536 struct fd f;
1537 int err;
1538
1539 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
1540 return -EINVAL;
1541
3e87f192
DS
1542 if (attr->flags & ~BPF_F_LOCK)
1543 return -EINVAL;
1544
bd513cd0
MV
1545 f = fdget(ufd);
1546 map = __bpf_map_get(f);
1547 if (IS_ERR(map))
1548 return PTR_ERR(map);
353050be 1549 bpf_map_write_active_inc(map);
1ea0f912
AP
1550 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
1551 !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
bd513cd0
MV
1552 err = -EPERM;
1553 goto err_put;
1554 }
1555
3e87f192
DS
1556 if (attr->flags &&
1557 (map->map_type == BPF_MAP_TYPE_QUEUE ||
1558 map->map_type == BPF_MAP_TYPE_STACK)) {
1559 err = -EINVAL;
1560 goto err_put;
1561 }
1562
1563 if ((attr->flags & BPF_F_LOCK) &&
1564 !map_value_has_spin_lock(map)) {
1565 err = -EINVAL;
1566 goto err_put;
1567 }
1568
bd513cd0
MV
1569 key = __bpf_copy_key(ukey, map->key_size);
1570 if (IS_ERR(key)) {
1571 err = PTR_ERR(key);
1572 goto err_put;
1573 }
1574
3e87f192 1575 value_size = bpf_map_value_size(map);
bd513cd0
MV
1576
1577 err = -ENOMEM;
f0dce1d9 1578 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
bd513cd0
MV
1579 if (!value)
1580 goto free_key;
1581
3e87f192 1582 err = -ENOTSUPP;
bd513cd0
MV
1583 if (map->map_type == BPF_MAP_TYPE_QUEUE ||
1584 map->map_type == BPF_MAP_TYPE_STACK) {
1585 err = map->ops->map_pop_elem(map, value);
3e87f192
DS
1586 } else if (map->map_type == BPF_MAP_TYPE_HASH ||
1587 map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
1588 map->map_type == BPF_MAP_TYPE_LRU_HASH ||
1589 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
1590 if (!bpf_map_is_dev_bound(map)) {
1591 bpf_disable_instrumentation();
1592 rcu_read_lock();
1593 err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags);
1594 rcu_read_unlock();
1595 bpf_enable_instrumentation();
1596 }
bd513cd0
MV
1597 }
1598
1599 if (err)
1600 goto free_value;
1601
7f645462
WY
1602 if (copy_to_user(uvalue, value, value_size) != 0) {
1603 err = -EFAULT;
bd513cd0 1604 goto free_value;
7f645462 1605 }
bd513cd0
MV
1606
1607 err = 0;
1608
1609free_value:
f0dce1d9 1610 kvfree(value);
bd513cd0 1611free_key:
44779a4b 1612 kvfree(key);
bd513cd0 1613err_put:
353050be 1614 bpf_map_write_active_dec(map);
bd513cd0
MV
1615 fdput(f);
1616 return err;
1617}
1618
87df15de
DB
1619#define BPF_MAP_FREEZE_LAST_FIELD map_fd
1620
1621static int map_freeze(const union bpf_attr *attr)
1622{
1623 int err = 0, ufd = attr->map_fd;
1624 struct bpf_map *map;
1625 struct fd f;
1626
1627 if (CHECK_ATTR(BPF_MAP_FREEZE))
1628 return -EINVAL;
1629
1630 f = fdget(ufd);
1631 map = __bpf_map_get(f);
1632 if (IS_ERR(map))
1633 return PTR_ERR(map);
fc970227 1634
68134668
AS
1635 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
1636 map_value_has_timer(map)) {
849b4d94
MKL
1637 fdput(f);
1638 return -ENOTSUPP;
1639 }
1640
fc970227 1641 mutex_lock(&map->freeze_mutex);
353050be 1642 if (bpf_map_write_active(map)) {
fc970227
AN
1643 err = -EBUSY;
1644 goto err_put;
1645 }
87df15de
DB
1646 if (READ_ONCE(map->frozen)) {
1647 err = -EBUSY;
1648 goto err_put;
1649 }
2c78ee89 1650 if (!bpf_capable()) {
87df15de
DB
1651 err = -EPERM;
1652 goto err_put;
1653 }
1654
1655 WRITE_ONCE(map->frozen, true);
1656err_put:
fc970227 1657 mutex_unlock(&map->freeze_mutex);
87df15de
DB
1658 fdput(f);
1659 return err;
1660}
1661
7de16e3a 1662static const struct bpf_prog_ops * const bpf_prog_types[] = {
91cc1a99 1663#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
7de16e3a
JK
1664 [_id] = & _name ## _prog_ops,
1665#define BPF_MAP_TYPE(_id, _ops)
f2e10bff 1666#define BPF_LINK_TYPE(_id, _name)
7de16e3a
JK
1667#include <linux/bpf_types.h>
1668#undef BPF_PROG_TYPE
1669#undef BPF_MAP_TYPE
f2e10bff 1670#undef BPF_LINK_TYPE
7de16e3a
JK
1671};
1672
09756af4
AS
1673static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
1674{
d0f1a451
DB
1675 const struct bpf_prog_ops *ops;
1676
1677 if (type >= ARRAY_SIZE(bpf_prog_types))
1678 return -EINVAL;
1679 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types));
1680 ops = bpf_prog_types[type];
1681 if (!ops)
be9370a7 1682 return -EINVAL;
09756af4 1683
ab3f0063 1684 if (!bpf_prog_is_dev_bound(prog->aux))
d0f1a451 1685 prog->aux->ops = ops;
ab3f0063
JK
1686 else
1687 prog->aux->ops = &bpf_offload_prog_ops;
be9370a7
JB
1688 prog->type = type;
1689 return 0;
09756af4
AS
1690}
1691
bae141f5
DB
1692enum bpf_audit {
1693 BPF_AUDIT_LOAD,
1694 BPF_AUDIT_UNLOAD,
1695 BPF_AUDIT_MAX,
1696};
1697
1698static const char * const bpf_audit_str[BPF_AUDIT_MAX] = {
1699 [BPF_AUDIT_LOAD] = "LOAD",
1700 [BPF_AUDIT_UNLOAD] = "UNLOAD",
1701};
1702
1703static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
1704{
1705 struct audit_context *ctx = NULL;
1706 struct audit_buffer *ab;
1707
1708 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX))
1709 return;
1710 if (audit_enabled == AUDIT_OFF)
1711 return;
1712 if (op == BPF_AUDIT_LOAD)
1713 ctx = audit_context();
1714 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
1715 if (unlikely(!ab))
1716 return;
1717 audit_log_format(ab, "prog-id=%u op=%s",
1718 prog->aux->id, bpf_audit_str[op]);
1719 audit_log_end(ab);
1720}
1721
dc4bb0e2
MKL
1722static int bpf_prog_alloc_id(struct bpf_prog *prog)
1723{
1724 int id;
1725
b76354cd 1726 idr_preload(GFP_KERNEL);
dc4bb0e2
MKL
1727 spin_lock_bh(&prog_idr_lock);
1728 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
1729 if (id > 0)
1730 prog->aux->id = id;
1731 spin_unlock_bh(&prog_idr_lock);
b76354cd 1732 idr_preload_end();
dc4bb0e2
MKL
1733
1734 /* id is in [1, INT_MAX) */
1735 if (WARN_ON_ONCE(!id))
1736 return -ENOSPC;
1737
1738 return id > 0 ? 0 : id;
1739}
1740
ad8ad79f 1741void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
dc4bb0e2 1742{
d809e134
AS
1743 unsigned long flags;
1744
ad8ad79f
JK
1745 /* cBPF to eBPF migrations are currently not in the idr store.
1746 * Offloaded programs are removed from the store when their device
1747 * disappears - even if someone grabs an fd to them they are unusable,
1748 * simply waiting for refcnt to drop to be freed.
1749 */
dc4bb0e2
MKL
1750 if (!prog->aux->id)
1751 return;
1752
b16d9aa4 1753 if (do_idr_lock)
d809e134 1754 spin_lock_irqsave(&prog_idr_lock, flags);
b16d9aa4
MKL
1755 else
1756 __acquire(&prog_idr_lock);
1757
dc4bb0e2 1758 idr_remove(&prog_idr, prog->aux->id);
ad8ad79f 1759 prog->aux->id = 0;
b16d9aa4
MKL
1760
1761 if (do_idr_lock)
d809e134 1762 spin_unlock_irqrestore(&prog_idr_lock, flags);
b16d9aa4
MKL
1763 else
1764 __release(&prog_idr_lock);
dc4bb0e2
MKL
1765}
1766
1aacde3d 1767static void __bpf_prog_put_rcu(struct rcu_head *rcu)
abf2e7d6
AS
1768{
1769 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
1770
3b4d9eb2 1771 kvfree(aux->func_info);
8c1b6e69 1772 kfree(aux->func_info_aux);
3ac1f01b 1773 free_uid(aux->user);
afdb09c7 1774 security_bpf_prog_free(aux);
abf2e7d6
AS
1775 bpf_prog_free(aux->prog);
1776}
1777
cd7455f1
DB
1778static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
1779{
1780 bpf_prog_kallsyms_del_all(prog);
1781 btf_put(prog->aux->btf);
e16301fb
MKL
1782 kvfree(prog->aux->jited_linfo);
1783 kvfree(prog->aux->linfo);
e6ac2450 1784 kfree(prog->aux->kfunc_tab);
22dc4a0f
AN
1785 if (prog->aux->attach_btf)
1786 btf_put(prog->aux->attach_btf);
cd7455f1 1787
1e6c62a8
AS
1788 if (deferred) {
1789 if (prog->aux->sleepable)
1790 call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
1791 else
1792 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
1793 } else {
cd7455f1 1794 __bpf_prog_put_rcu(&prog->aux->rcu);
1e6c62a8 1795 }
cd7455f1
DB
1796}
1797
d809e134
AS
1798static void bpf_prog_put_deferred(struct work_struct *work)
1799{
1800 struct bpf_prog_aux *aux;
1801 struct bpf_prog *prog;
1802
1803 aux = container_of(work, struct bpf_prog_aux, work);
1804 prog = aux->prog;
1805 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
1806 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
1807 __bpf_prog_put_noref(prog, true);
1808}
1809
b16d9aa4 1810static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
09756af4 1811{
d809e134
AS
1812 struct bpf_prog_aux *aux = prog->aux;
1813
1814 if (atomic64_dec_and_test(&aux->refcnt)) {
34ad5580 1815 /* bpf_prog_free_id() must be called first */
b16d9aa4 1816 bpf_prog_free_id(prog, do_idr_lock);
d809e134
AS
1817
1818 if (in_irq() || irqs_disabled()) {
1819 INIT_WORK(&aux->work, bpf_prog_put_deferred);
1820 schedule_work(&aux->work);
1821 } else {
1822 bpf_prog_put_deferred(&aux->work);
1823 }
a67edbf4 1824 }
09756af4 1825}
b16d9aa4
MKL
1826
1827void bpf_prog_put(struct bpf_prog *prog)
1828{
1829 __bpf_prog_put(prog, true);
1830}
e2e9b654 1831EXPORT_SYMBOL_GPL(bpf_prog_put);
09756af4
AS
1832
1833static int bpf_prog_release(struct inode *inode, struct file *filp)
1834{
1835 struct bpf_prog *prog = filp->private_data;
1836
1aacde3d 1837 bpf_prog_put(prog);
09756af4
AS
1838 return 0;
1839}
1840
61a0abae
ED
1841struct bpf_prog_kstats {
1842 u64 nsecs;
1843 u64 cnt;
1844 u64 misses;
1845};
1846
492ecee8 1847static void bpf_prog_get_stats(const struct bpf_prog *prog,
61a0abae 1848 struct bpf_prog_kstats *stats)
492ecee8 1849{
9ed9e9ba 1850 u64 nsecs = 0, cnt = 0, misses = 0;
492ecee8
AS
1851 int cpu;
1852
1853 for_each_possible_cpu(cpu) {
1854 const struct bpf_prog_stats *st;
1855 unsigned int start;
9ed9e9ba 1856 u64 tnsecs, tcnt, tmisses;
492ecee8 1857
700d4796 1858 st = per_cpu_ptr(prog->stats, cpu);
492ecee8
AS
1859 do {
1860 start = u64_stats_fetch_begin_irq(&st->syncp);
61a0abae
ED
1861 tnsecs = u64_stats_read(&st->nsecs);
1862 tcnt = u64_stats_read(&st->cnt);
1863 tmisses = u64_stats_read(&st->misses);
492ecee8
AS
1864 } while (u64_stats_fetch_retry_irq(&st->syncp, start));
1865 nsecs += tnsecs;
1866 cnt += tcnt;
9ed9e9ba 1867 misses += tmisses;
492ecee8
AS
1868 }
1869 stats->nsecs = nsecs;
1870 stats->cnt = cnt;
9ed9e9ba 1871 stats->misses = misses;
492ecee8
AS
1872}
1873
7bd509e3
DB
1874#ifdef CONFIG_PROC_FS
1875static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
1876{
1877 const struct bpf_prog *prog = filp->private_data;
f1f7714e 1878 char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
61a0abae 1879 struct bpf_prog_kstats stats;
7bd509e3 1880
492ecee8 1881 bpf_prog_get_stats(prog, &stats);
f1f7714e 1882 bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
7bd509e3
DB
1883 seq_printf(m,
1884 "prog_type:\t%u\n"
1885 "prog_jited:\t%u\n"
f1f7714e 1886 "prog_tag:\t%s\n"
4316b409 1887 "memlock:\t%llu\n"
492ecee8
AS
1888 "prog_id:\t%u\n"
1889 "run_time_ns:\t%llu\n"
9ed9e9ba 1890 "run_cnt:\t%llu\n"
aba64c7d
DM
1891 "recursion_misses:\t%llu\n"
1892 "verified_insns:\t%u\n",
7bd509e3
DB
1893 prog->type,
1894 prog->jited,
f1f7714e 1895 prog_tag,
4316b409 1896 prog->pages * 1ULL << PAGE_SHIFT,
492ecee8
AS
1897 prog->aux->id,
1898 stats.nsecs,
9ed9e9ba 1899 stats.cnt,
aba64c7d
DM
1900 stats.misses,
1901 prog->aux->verified_insns);
7bd509e3
DB
1902}
1903#endif
1904
f66e448c 1905const struct file_operations bpf_prog_fops = {
7bd509e3
DB
1906#ifdef CONFIG_PROC_FS
1907 .show_fdinfo = bpf_prog_show_fdinfo,
1908#endif
1909 .release = bpf_prog_release,
6e71b04a
CF
1910 .read = bpf_dummy_read,
1911 .write = bpf_dummy_write,
09756af4
AS
1912};
1913
b2197755 1914int bpf_prog_new_fd(struct bpf_prog *prog)
aa79781b 1915{
afdb09c7
CF
1916 int ret;
1917
1918 ret = security_bpf_prog(prog);
1919 if (ret < 0)
1920 return ret;
1921
aa79781b
DB
1922 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
1923 O_RDWR | O_CLOEXEC);
1924}
1925
113214be 1926static struct bpf_prog *____bpf_prog_get(struct fd f)
09756af4 1927{
09756af4
AS
1928 if (!f.file)
1929 return ERR_PTR(-EBADF);
09756af4
AS
1930 if (f.file->f_op != &bpf_prog_fops) {
1931 fdput(f);
1932 return ERR_PTR(-EINVAL);
1933 }
1934
c2101297 1935 return f.file->private_data;
09756af4
AS
1936}
1937
85192dbf 1938void bpf_prog_add(struct bpf_prog *prog, int i)
92117d84 1939{
85192dbf 1940 atomic64_add(i, &prog->aux->refcnt);
92117d84 1941}
59d3656d
BB
1942EXPORT_SYMBOL_GPL(bpf_prog_add);
1943
c540594f
DB
1944void bpf_prog_sub(struct bpf_prog *prog, int i)
1945{
1946 /* Only to be used for undoing previous bpf_prog_add() in some
1947 * error path. We still know that another entity in our call
1948 * path holds a reference to the program, thus atomic_sub() can
1949 * be safely used in such cases!
1950 */
85192dbf 1951 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0);
c540594f
DB
1952}
1953EXPORT_SYMBOL_GPL(bpf_prog_sub);
1954
85192dbf 1955void bpf_prog_inc(struct bpf_prog *prog)
59d3656d 1956{
85192dbf 1957 atomic64_inc(&prog->aux->refcnt);
59d3656d 1958}
97bc402d 1959EXPORT_SYMBOL_GPL(bpf_prog_inc);
92117d84 1960
b16d9aa4 1961/* prog_idr_lock should have been held */
a6f6df69 1962struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
b16d9aa4
MKL
1963{
1964 int refold;
1965
85192dbf 1966 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0);
b16d9aa4
MKL
1967
1968 if (!refold)
1969 return ERR_PTR(-ENOENT);
1970
1971 return prog;
1972}
a6f6df69 1973EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
b16d9aa4 1974
040ee692 1975bool bpf_prog_get_ok(struct bpf_prog *prog,
288b3de5 1976 enum bpf_prog_type *attach_type, bool attach_drv)
248f346f 1977{
288b3de5
JK
1978 /* not an attachment, just a refcount inc, always allow */
1979 if (!attach_type)
1980 return true;
248f346f
JK
1981
1982 if (prog->type != *attach_type)
1983 return false;
288b3de5 1984 if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv)
248f346f
JK
1985 return false;
1986
1987 return true;
1988}
1989
1990static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
288b3de5 1991 bool attach_drv)
09756af4
AS
1992{
1993 struct fd f = fdget(ufd);
1994 struct bpf_prog *prog;
1995
113214be 1996 prog = ____bpf_prog_get(f);
09756af4
AS
1997 if (IS_ERR(prog))
1998 return prog;
288b3de5 1999 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
113214be
DB
2000 prog = ERR_PTR(-EINVAL);
2001 goto out;
2002 }
09756af4 2003
85192dbf 2004 bpf_prog_inc(prog);
113214be 2005out:
09756af4
AS
2006 fdput(f);
2007 return prog;
2008}
113214be
DB
2009
2010struct bpf_prog *bpf_prog_get(u32 ufd)
2011{
288b3de5 2012 return __bpf_prog_get(ufd, NULL, false);
113214be
DB
2013}
2014
248f346f 2015struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
288b3de5 2016 bool attach_drv)
248f346f 2017{
4d220ed0 2018 return __bpf_prog_get(ufd, &type, attach_drv);
248f346f 2019}
6c8dfe21 2020EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
248f346f 2021
aac3fc32
AI
2022/* Initially all BPF programs could be loaded w/o specifying
2023 * expected_attach_type. Later for some of them specifying expected_attach_type
2024 * at load time became required so that program could be validated properly.
2025 * Programs of types that are allowed to be loaded both w/ and w/o (for
2026 * backward compatibility) expected_attach_type, should have the default attach
2027 * type assigned to expected_attach_type for the latter case, so that it can be
2028 * validated later at attach time.
2029 *
2030 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
2031 * prog type requires it but has some attach types that have to be backward
2032 * compatible.
2033 */
2034static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
2035{
2036 switch (attr->prog_type) {
2037 case BPF_PROG_TYPE_CGROUP_SOCK:
2038 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
2039 * exist so checking for non-zero is the way to go here.
2040 */
2041 if (!attr->expected_attach_type)
2042 attr->expected_attach_type =
2043 BPF_CGROUP_INET_SOCK_CREATE;
2044 break;
d5e4ddae
KI
2045 case BPF_PROG_TYPE_SK_REUSEPORT:
2046 if (!attr->expected_attach_type)
2047 attr->expected_attach_type =
2048 BPF_SK_REUSEPORT_SELECT;
2049 break;
aac3fc32
AI
2050 }
2051}
2052
5e43f899 2053static int
ccfe29eb
AS
2054bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
2055 enum bpf_attach_type expected_attach_type,
290248a5
AN
2056 struct btf *attach_btf, u32 btf_id,
2057 struct bpf_prog *dst_prog)
5e43f899 2058{
27ae7997 2059 if (btf_id) {
c108e3c1
AS
2060 if (btf_id > BTF_MAX_TYPE)
2061 return -EINVAL;
27ae7997 2062
290248a5
AN
2063 if (!attach_btf && !dst_prog)
2064 return -EINVAL;
2065
27ae7997
MKL
2066 switch (prog_type) {
2067 case BPF_PROG_TYPE_TRACING:
9e4e01df 2068 case BPF_PROG_TYPE_LSM:
27ae7997 2069 case BPF_PROG_TYPE_STRUCT_OPS:
be8704ff 2070 case BPF_PROG_TYPE_EXT:
27ae7997
MKL
2071 break;
2072 default:
c108e3c1 2073 return -EINVAL;
27ae7997 2074 }
c108e3c1
AS
2075 }
2076
290248a5
AN
2077 if (attach_btf && (!btf_id || dst_prog))
2078 return -EINVAL;
2079
2080 if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING &&
be8704ff 2081 prog_type != BPF_PROG_TYPE_EXT)
27ae7997
MKL
2082 return -EINVAL;
2083
4fbac77d 2084 switch (prog_type) {
aac3fc32
AI
2085 case BPF_PROG_TYPE_CGROUP_SOCK:
2086 switch (expected_attach_type) {
2087 case BPF_CGROUP_INET_SOCK_CREATE:
f5836749 2088 case BPF_CGROUP_INET_SOCK_RELEASE:
aac3fc32
AI
2089 case BPF_CGROUP_INET4_POST_BIND:
2090 case BPF_CGROUP_INET6_POST_BIND:
2091 return 0;
2092 default:
2093 return -EINVAL;
2094 }
4fbac77d
AI
2095 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2096 switch (expected_attach_type) {
2097 case BPF_CGROUP_INET4_BIND:
2098 case BPF_CGROUP_INET6_BIND:
d74bad4e
AI
2099 case BPF_CGROUP_INET4_CONNECT:
2100 case BPF_CGROUP_INET6_CONNECT:
1b66d253
DB
2101 case BPF_CGROUP_INET4_GETPEERNAME:
2102 case BPF_CGROUP_INET6_GETPEERNAME:
2103 case BPF_CGROUP_INET4_GETSOCKNAME:
2104 case BPF_CGROUP_INET6_GETSOCKNAME:
1cedee13
AI
2105 case BPF_CGROUP_UDP4_SENDMSG:
2106 case BPF_CGROUP_UDP6_SENDMSG:
983695fa
DB
2107 case BPF_CGROUP_UDP4_RECVMSG:
2108 case BPF_CGROUP_UDP6_RECVMSG:
4fbac77d
AI
2109 return 0;
2110 default:
2111 return -EINVAL;
2112 }
5cf1e914 2113 case BPF_PROG_TYPE_CGROUP_SKB:
2114 switch (expected_attach_type) {
2115 case BPF_CGROUP_INET_INGRESS:
2116 case BPF_CGROUP_INET_EGRESS:
2117 return 0;
2118 default:
2119 return -EINVAL;
2120 }
0d01da6a
SF
2121 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2122 switch (expected_attach_type) {
2123 case BPF_CGROUP_SETSOCKOPT:
2124 case BPF_CGROUP_GETSOCKOPT:
2125 return 0;
2126 default:
2127 return -EINVAL;
2128 }
e9ddbb77
JS
2129 case BPF_PROG_TYPE_SK_LOOKUP:
2130 if (expected_attach_type == BPF_SK_LOOKUP)
2131 return 0;
2132 return -EINVAL;
d5e4ddae
KI
2133 case BPF_PROG_TYPE_SK_REUSEPORT:
2134 switch (expected_attach_type) {
2135 case BPF_SK_REUSEPORT_SELECT:
2136 case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE:
2137 return 0;
2138 default:
2139 return -EINVAL;
2140 }
79a7f8bd 2141 case BPF_PROG_TYPE_SYSCALL:
be8704ff
AS
2142 case BPF_PROG_TYPE_EXT:
2143 if (expected_attach_type)
2144 return -EINVAL;
df561f66 2145 fallthrough;
4fbac77d
AI
2146 default:
2147 return 0;
2148 }
5e43f899
AI
2149}
2150
2c78ee89
AS
2151static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
2152{
2153 switch (prog_type) {
2154 case BPF_PROG_TYPE_SCHED_CLS:
2155 case BPF_PROG_TYPE_SCHED_ACT:
2156 case BPF_PROG_TYPE_XDP:
2157 case BPF_PROG_TYPE_LWT_IN:
2158 case BPF_PROG_TYPE_LWT_OUT:
2159 case BPF_PROG_TYPE_LWT_XMIT:
2160 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2161 case BPF_PROG_TYPE_SK_SKB:
2162 case BPF_PROG_TYPE_SK_MSG:
2163 case BPF_PROG_TYPE_LIRC_MODE2:
2164 case BPF_PROG_TYPE_FLOW_DISSECTOR:
2165 case BPF_PROG_TYPE_CGROUP_DEVICE:
2166 case BPF_PROG_TYPE_CGROUP_SOCK:
2167 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2168 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2169 case BPF_PROG_TYPE_CGROUP_SYSCTL:
2170 case BPF_PROG_TYPE_SOCK_OPS:
2171 case BPF_PROG_TYPE_EXT: /* extends any prog */
2172 return true;
2173 case BPF_PROG_TYPE_CGROUP_SKB:
2174 /* always unpriv */
2175 case BPF_PROG_TYPE_SK_REUSEPORT:
2176 /* equivalent to SOCKET_FILTER. need CAP_BPF only */
2177 default:
2178 return false;
2179 }
2180}
2181
2182static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
2183{
2184 switch (prog_type) {
2185 case BPF_PROG_TYPE_KPROBE:
2186 case BPF_PROG_TYPE_TRACEPOINT:
2187 case BPF_PROG_TYPE_PERF_EVENT:
2188 case BPF_PROG_TYPE_RAW_TRACEPOINT:
2189 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
2190 case BPF_PROG_TYPE_TRACING:
2191 case BPF_PROG_TYPE_LSM:
2192 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
2193 case BPF_PROG_TYPE_EXT: /* extends any prog */
2194 return true;
2195 default:
2196 return false;
2197 }
2198}
2199
09756af4 2200/* last field in 'union bpf_attr' used by this command */
fbd94c7a 2201#define BPF_PROG_LOAD_LAST_FIELD core_relo_rec_size
09756af4 2202
af2ac3e1 2203static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
09756af4
AS
2204{
2205 enum bpf_prog_type type = attr->prog_type;
290248a5
AN
2206 struct bpf_prog *prog, *dst_prog = NULL;
2207 struct btf *attach_btf = NULL;
09756af4
AS
2208 int err;
2209 char license[128];
2210 bool is_gpl;
2211
2212 if (CHECK_ATTR(BPF_PROG_LOAD))
2213 return -EINVAL;
2214
c240eff6
JW
2215 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
2216 BPF_F_ANY_ALIGNMENT |
10d274e8 2217 BPF_F_TEST_STATE_FREQ |
1e6c62a8 2218 BPF_F_SLEEPABLE |
c2f2cdbe
LB
2219 BPF_F_TEST_RND_HI32 |
2220 BPF_F_XDP_HAS_FRAGS))
e07b98d9
DM
2221 return -EINVAL;
2222
e9ee9efc
DM
2223 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
2224 (attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
2c78ee89 2225 !bpf_capable())
e9ee9efc
DM
2226 return -EPERM;
2227
09756af4 2228 /* copy eBPF program license from user space */
af2ac3e1
AS
2229 if (strncpy_from_bpfptr(license,
2230 make_bpfptr(attr->license, uattr.is_kernel),
2231 sizeof(license) - 1) < 0)
09756af4
AS
2232 return -EFAULT;
2233 license[sizeof(license) - 1] = 0;
2234
2235 /* eBPF programs must be GPL compatible to use GPL-ed functions */
2236 is_gpl = license_is_gpl_compatible(license);
2237
c04c0d2b 2238 if (attr->insn_cnt == 0 ||
2c78ee89 2239 attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
ef0915ca 2240 return -E2BIG;
80b7d819
CF
2241 if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
2242 type != BPF_PROG_TYPE_CGROUP_SKB &&
2c78ee89
AS
2243 !bpf_capable())
2244 return -EPERM;
2245
b338cb92 2246 if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN))
2c78ee89
AS
2247 return -EPERM;
2248 if (is_perfmon_prog_type(type) && !perfmon_capable())
1be7f75d
AS
2249 return -EPERM;
2250
290248a5
AN
2251 /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
2252 * or btf, we need to check which one it is
2253 */
2254 if (attr->attach_prog_fd) {
2255 dst_prog = bpf_prog_get(attr->attach_prog_fd);
2256 if (IS_ERR(dst_prog)) {
2257 dst_prog = NULL;
2258 attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd);
2259 if (IS_ERR(attach_btf))
2260 return -EINVAL;
2261 if (!btf_is_kernel(attach_btf)) {
8bdd8e27
AN
2262 /* attaching through specifying bpf_prog's BTF
2263 * objects directly might be supported eventually
2264 */
290248a5 2265 btf_put(attach_btf);
8bdd8e27 2266 return -ENOTSUPP;
290248a5
AN
2267 }
2268 }
2269 } else if (attr->attach_btf_id) {
2270 /* fall back to vmlinux BTF, if BTF type ID is specified */
2271 attach_btf = bpf_get_btf_vmlinux();
2272 if (IS_ERR(attach_btf))
2273 return PTR_ERR(attach_btf);
2274 if (!attach_btf)
2275 return -EINVAL;
2276 btf_get(attach_btf);
2277 }
2278
aac3fc32 2279 bpf_prog_load_fixup_attach_type(attr);
ccfe29eb 2280 if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
290248a5
AN
2281 attach_btf, attr->attach_btf_id,
2282 dst_prog)) {
2283 if (dst_prog)
2284 bpf_prog_put(dst_prog);
2285 if (attach_btf)
2286 btf_put(attach_btf);
5e43f899 2287 return -EINVAL;
290248a5 2288 }
5e43f899 2289
09756af4
AS
2290 /* plain bpf_prog allocation */
2291 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
290248a5
AN
2292 if (!prog) {
2293 if (dst_prog)
2294 bpf_prog_put(dst_prog);
2295 if (attach_btf)
2296 btf_put(attach_btf);
09756af4 2297 return -ENOMEM;
290248a5 2298 }
09756af4 2299
5e43f899 2300 prog->expected_attach_type = attr->expected_attach_type;
290248a5 2301 prog->aux->attach_btf = attach_btf;
ccfe29eb 2302 prog->aux->attach_btf_id = attr->attach_btf_id;
290248a5 2303 prog->aux->dst_prog = dst_prog;
9a18eedb 2304 prog->aux->offload_requested = !!attr->prog_ifindex;
1e6c62a8 2305 prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
c2f2cdbe 2306 prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
9a18eedb 2307
afdb09c7 2308 err = security_bpf_prog_alloc(prog->aux);
aaac3ba9 2309 if (err)
3ac1f01b 2310 goto free_prog;
afdb09c7 2311
3ac1f01b 2312 prog->aux->user = get_current_user();
09756af4
AS
2313 prog->len = attr->insn_cnt;
2314
2315 err = -EFAULT;
af2ac3e1
AS
2316 if (copy_from_bpfptr(prog->insns,
2317 make_bpfptr(attr->insns, uattr.is_kernel),
2318 bpf_prog_insn_size(prog)) != 0)
3ac1f01b 2319 goto free_prog_sec;
09756af4
AS
2320
2321 prog->orig_prog = NULL;
a91263d5 2322 prog->jited = 0;
09756af4 2323
85192dbf 2324 atomic64_set(&prog->aux->refcnt, 1);
a91263d5 2325 prog->gpl_compatible = is_gpl ? 1 : 0;
09756af4 2326
9a18eedb 2327 if (bpf_prog_is_dev_bound(prog->aux)) {
ab3f0063
JK
2328 err = bpf_prog_offload_init(prog, attr);
2329 if (err)
3ac1f01b 2330 goto free_prog_sec;
ab3f0063
JK
2331 }
2332
09756af4
AS
2333 /* find program type: socket_filter vs tracing_filter */
2334 err = find_prog_type(type, prog);
2335 if (err < 0)
3ac1f01b 2336 goto free_prog_sec;
09756af4 2337
9285ec4c 2338 prog->aux->load_time = ktime_get_boottime_ns();
8e7ae251
MKL
2339 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
2340 sizeof(attr->prog_name));
2341 if (err < 0)
3ac1f01b 2342 goto free_prog_sec;
cb4d2b3f 2343
09756af4 2344 /* run eBPF verifier */
838e9690 2345 err = bpf_check(&prog, attr, uattr);
09756af4
AS
2346 if (err < 0)
2347 goto free_used_maps;
2348
9facc336 2349 prog = bpf_prog_select_runtime(prog, &err);
04fd61ab
AS
2350 if (err < 0)
2351 goto free_used_maps;
09756af4 2352
dc4bb0e2
MKL
2353 err = bpf_prog_alloc_id(prog);
2354 if (err)
2355 goto free_used_maps;
2356
c751798a
DB
2357 /* Upon success of bpf_prog_alloc_id(), the BPF prog is
2358 * effectively publicly exposed. However, retrieving via
2359 * bpf_prog_get_fd_by_id() will take another reference,
2360 * therefore it cannot be gone underneath us.
2361 *
2362 * Only for the time /after/ successful bpf_prog_new_fd()
2363 * and before returning to userspace, we might just hold
2364 * one reference and any parallel close on that fd could
2365 * rip everything out. Hence, below notifications must
2366 * happen before bpf_prog_new_fd().
2367 *
2368 * Also, any failure handling from this point onwards must
2369 * be using bpf_prog_put() given the program is exposed.
2370 */
74451e66 2371 bpf_prog_kallsyms_add(prog);
6ee52e2a 2372 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
bae141f5 2373 bpf_audit_prog(prog, BPF_AUDIT_LOAD);
c751798a
DB
2374
2375 err = bpf_prog_new_fd(prog);
2376 if (err < 0)
2377 bpf_prog_put(prog);
09756af4
AS
2378 return err;
2379
2380free_used_maps:
cd7455f1
DB
2381 /* In case we have subprogs, we need to wait for a grace
2382 * period before we can tear down JIT memory since symbols
2383 * are already exposed under kallsyms.
2384 */
2385 __bpf_prog_put_noref(prog, prog->aux->func_cnt);
2386 return err;
afdb09c7 2387free_prog_sec:
3ac1f01b 2388 free_uid(prog->aux->user);
afdb09c7 2389 security_bpf_prog_free(prog->aux);
3ac1f01b 2390free_prog:
22dc4a0f
AN
2391 if (prog->aux->attach_btf)
2392 btf_put(prog->aux->attach_btf);
09756af4
AS
2393 bpf_prog_free(prog);
2394 return err;
2395}
2396
6e71b04a 2397#define BPF_OBJ_LAST_FIELD file_flags
b2197755
DB
2398
2399static int bpf_obj_pin(const union bpf_attr *attr)
2400{
6e71b04a 2401 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0)
b2197755
DB
2402 return -EINVAL;
2403
535e7b4b 2404 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
b2197755
DB
2405}
2406
2407static int bpf_obj_get(const union bpf_attr *attr)
2408{
6e71b04a
CF
2409 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 ||
2410 attr->file_flags & ~BPF_OBJ_FLAG_MASK)
b2197755
DB
2411 return -EINVAL;
2412
6e71b04a
CF
2413 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname),
2414 attr->file_flags);
b2197755
DB
2415}
2416
f2e10bff 2417void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
a3b80e10 2418 const struct bpf_link_ops *ops, struct bpf_prog *prog)
fec56f58 2419{
70ed506c 2420 atomic64_set(&link->refcnt, 1);
f2e10bff 2421 link->type = type;
a3b80e10 2422 link->id = 0;
70ed506c
AN
2423 link->ops = ops;
2424 link->prog = prog;
2425}
2426
a3b80e10
AN
2427static void bpf_link_free_id(int id)
2428{
2429 if (!id)
2430 return;
2431
2432 spin_lock_bh(&link_idr_lock);
2433 idr_remove(&link_idr, id);
2434 spin_unlock_bh(&link_idr_lock);
2435}
2436
98868668
AN
2437/* Clean up bpf_link and corresponding anon_inode file and FD. After
2438 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred
a3b80e10
AN
2439 * anon_inode's release() call. This helper marksbpf_link as
2440 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt
2441 * is not decremented, it's the responsibility of a calling code that failed
2442 * to complete bpf_link initialization.
98868668 2443 */
a3b80e10 2444void bpf_link_cleanup(struct bpf_link_primer *primer)
babf3164 2445{
a3b80e10
AN
2446 primer->link->prog = NULL;
2447 bpf_link_free_id(primer->id);
2448 fput(primer->file);
2449 put_unused_fd(primer->fd);
babf3164
AN
2450}
2451
70ed506c
AN
2452void bpf_link_inc(struct bpf_link *link)
2453{
2454 atomic64_inc(&link->refcnt);
2455}
2456
2457/* bpf_link_free is guaranteed to be called from process context */
2458static void bpf_link_free(struct bpf_link *link)
2459{
a3b80e10 2460 bpf_link_free_id(link->id);
babf3164
AN
2461 if (link->prog) {
2462 /* detach BPF program, clean up used resources */
2463 link->ops->release(link);
2464 bpf_prog_put(link->prog);
2465 }
2466 /* free bpf_link and its containing memory */
2467 link->ops->dealloc(link);
70ed506c
AN
2468}
2469
2470static void bpf_link_put_deferred(struct work_struct *work)
2471{
2472 struct bpf_link *link = container_of(work, struct bpf_link, work);
2473
2474 bpf_link_free(link);
2475}
2476
2477/* bpf_link_put can be called from atomic context, but ensures that resources
2478 * are freed from process context
2479 */
2480void bpf_link_put(struct bpf_link *link)
2481{
2482 if (!atomic64_dec_and_test(&link->refcnt))
2483 return;
2484
f00f2f7f
AS
2485 if (in_atomic()) {
2486 INIT_WORK(&link->work, bpf_link_put_deferred);
2487 schedule_work(&link->work);
2488 } else {
2489 bpf_link_free(link);
2490 }
70ed506c
AN
2491}
2492
2493static int bpf_link_release(struct inode *inode, struct file *filp)
2494{
2495 struct bpf_link *link = filp->private_data;
2496
2497 bpf_link_put(link);
fec56f58
AS
2498 return 0;
2499}
2500
70ed506c 2501#ifdef CONFIG_PROC_FS
f2e10bff
AN
2502#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
2503#define BPF_MAP_TYPE(_id, _ops)
2504#define BPF_LINK_TYPE(_id, _name) [_id] = #_name,
2505static const char *bpf_link_type_strs[] = {
2506 [BPF_LINK_TYPE_UNSPEC] = "<invalid>",
2507#include <linux/bpf_types.h>
2508};
2509#undef BPF_PROG_TYPE
2510#undef BPF_MAP_TYPE
2511#undef BPF_LINK_TYPE
70ed506c
AN
2512
2513static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
2514{
2515 const struct bpf_link *link = filp->private_data;
2516 const struct bpf_prog *prog = link->prog;
2517 char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
70ed506c
AN
2518
2519 bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
2520 seq_printf(m,
2521 "link_type:\t%s\n"
a3b80e10 2522 "link_id:\t%u\n"
70ed506c
AN
2523 "prog_tag:\t%s\n"
2524 "prog_id:\t%u\n",
f2e10bff 2525 bpf_link_type_strs[link->type],
a3b80e10 2526 link->id,
70ed506c
AN
2527 prog_tag,
2528 prog->aux->id);
f2e10bff
AN
2529 if (link->ops->show_fdinfo)
2530 link->ops->show_fdinfo(link, m);
70ed506c
AN
2531}
2532#endif
2533
6f302bfb 2534static const struct file_operations bpf_link_fops = {
70ed506c
AN
2535#ifdef CONFIG_PROC_FS
2536 .show_fdinfo = bpf_link_show_fdinfo,
2537#endif
2538 .release = bpf_link_release,
fec56f58
AS
2539 .read = bpf_dummy_read,
2540 .write = bpf_dummy_write,
2541};
2542
a3b80e10 2543static int bpf_link_alloc_id(struct bpf_link *link)
70ed506c 2544{
a3b80e10 2545 int id;
70ed506c 2546
a3b80e10
AN
2547 idr_preload(GFP_KERNEL);
2548 spin_lock_bh(&link_idr_lock);
2549 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC);
2550 spin_unlock_bh(&link_idr_lock);
2551 idr_preload_end();
70ed506c 2552
a3b80e10
AN
2553 return id;
2554}
2555
2556/* Prepare bpf_link to be exposed to user-space by allocating anon_inode file,
2557 * reserving unused FD and allocating ID from link_idr. This is to be paired
2558 * with bpf_link_settle() to install FD and ID and expose bpf_link to
2559 * user-space, if bpf_link is successfully attached. If not, bpf_link and
2560 * pre-allocated resources are to be freed with bpf_cleanup() call. All the
2561 * transient state is passed around in struct bpf_link_primer.
2562 * This is preferred way to create and initialize bpf_link, especially when
2563 * there are complicated and expensive operations inbetween creating bpf_link
2564 * itself and attaching it to BPF hook. By using bpf_link_prime() and
2565 * bpf_link_settle() kernel code using bpf_link doesn't have to perform
2566 * expensive (and potentially failing) roll back operations in a rare case
2567 * that file, FD, or ID can't be allocated.
babf3164 2568 */
a3b80e10 2569int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)
babf3164
AN
2570{
2571 struct file *file;
a3b80e10 2572 int fd, id;
babf3164
AN
2573
2574 fd = get_unused_fd_flags(O_CLOEXEC);
2575 if (fd < 0)
a3b80e10 2576 return fd;
babf3164 2577
babf3164 2578
a3b80e10
AN
2579 id = bpf_link_alloc_id(link);
2580 if (id < 0) {
2581 put_unused_fd(fd);
a3b80e10
AN
2582 return id;
2583 }
babf3164
AN
2584
2585 file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
2586 if (IS_ERR(file)) {
138c6767 2587 bpf_link_free_id(id);
babf3164 2588 put_unused_fd(fd);
138c6767 2589 return PTR_ERR(file);
babf3164
AN
2590 }
2591
a3b80e10
AN
2592 primer->link = link;
2593 primer->file = file;
2594 primer->fd = fd;
2595 primer->id = id;
2596 return 0;
2597}
2598
2599int bpf_link_settle(struct bpf_link_primer *primer)
2600{
2601 /* make bpf_link fetchable by ID */
2602 spin_lock_bh(&link_idr_lock);
2603 primer->link->id = primer->id;
2604 spin_unlock_bh(&link_idr_lock);
2605 /* make bpf_link fetchable by FD */
2606 fd_install(primer->fd, primer->file);
2607 /* pass through installed FD */
2608 return primer->fd;
2609}
2610
2611int bpf_link_new_fd(struct bpf_link *link)
2612{
2613 return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
babf3164
AN
2614}
2615
70ed506c
AN
2616struct bpf_link *bpf_link_get_from_fd(u32 ufd)
2617{
2618 struct fd f = fdget(ufd);
2619 struct bpf_link *link;
2620
2621 if (!f.file)
2622 return ERR_PTR(-EBADF);
2623 if (f.file->f_op != &bpf_link_fops) {
2624 fdput(f);
2625 return ERR_PTR(-EINVAL);
2626 }
2627
2628 link = f.file->private_data;
2629 bpf_link_inc(link);
2630 fdput(f);
2631
2632 return link;
2633}
2634
2635struct bpf_tracing_link {
2636 struct bpf_link link;
f2e10bff 2637 enum bpf_attach_type attach_type;
3aac1ead
THJ
2638 struct bpf_trampoline *trampoline;
2639 struct bpf_prog *tgt_prog;
70ed506c
AN
2640};
2641
2642static void bpf_tracing_link_release(struct bpf_link *link)
babf3164 2643{
3aac1ead
THJ
2644 struct bpf_tracing_link *tr_link =
2645 container_of(link, struct bpf_tracing_link, link);
2646
2647 WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog,
2648 tr_link->trampoline));
2649
2650 bpf_trampoline_put(tr_link->trampoline);
2651
2652 /* tgt_prog is NULL if target is a kernel function */
2653 if (tr_link->tgt_prog)
2654 bpf_prog_put(tr_link->tgt_prog);
babf3164
AN
2655}
2656
2657static void bpf_tracing_link_dealloc(struct bpf_link *link)
70ed506c
AN
2658{
2659 struct bpf_tracing_link *tr_link =
2660 container_of(link, struct bpf_tracing_link, link);
2661
70ed506c
AN
2662 kfree(tr_link);
2663}
2664
f2e10bff
AN
2665static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
2666 struct seq_file *seq)
2667{
2668 struct bpf_tracing_link *tr_link =
2669 container_of(link, struct bpf_tracing_link, link);
2670
2671 seq_printf(seq,
2672 "attach_type:\t%d\n",
2673 tr_link->attach_type);
2674}
2675
2676static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
2677 struct bpf_link_info *info)
2678{
2679 struct bpf_tracing_link *tr_link =
2680 container_of(link, struct bpf_tracing_link, link);
2681
2682 info->tracing.attach_type = tr_link->attach_type;
441e8c66
THJ
2683 bpf_trampoline_unpack_key(tr_link->trampoline->key,
2684 &info->tracing.target_obj_id,
2685 &info->tracing.target_btf_id);
f2e10bff
AN
2686
2687 return 0;
2688}
2689
70ed506c
AN
2690static const struct bpf_link_ops bpf_tracing_link_lops = {
2691 .release = bpf_tracing_link_release,
babf3164 2692 .dealloc = bpf_tracing_link_dealloc,
f2e10bff
AN
2693 .show_fdinfo = bpf_tracing_link_show_fdinfo,
2694 .fill_link_info = bpf_tracing_link_fill_link_info,
70ed506c
AN
2695};
2696
4a1e7c0c
THJ
2697static int bpf_tracing_prog_attach(struct bpf_prog *prog,
2698 int tgt_prog_fd,
2699 u32 btf_id)
fec56f58 2700{
a3b80e10 2701 struct bpf_link_primer link_primer;
3aac1ead 2702 struct bpf_prog *tgt_prog = NULL;
4a1e7c0c 2703 struct bpf_trampoline *tr = NULL;
70ed506c 2704 struct bpf_tracing_link *link;
4a1e7c0c 2705 u64 key = 0;
a3b80e10 2706 int err;
fec56f58 2707
9e4e01df
KS
2708 switch (prog->type) {
2709 case BPF_PROG_TYPE_TRACING:
2710 if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
2711 prog->expected_attach_type != BPF_TRACE_FEXIT &&
2712 prog->expected_attach_type != BPF_MODIFY_RETURN) {
2713 err = -EINVAL;
2714 goto out_put_prog;
2715 }
2716 break;
2717 case BPF_PROG_TYPE_EXT:
2718 if (prog->expected_attach_type != 0) {
2719 err = -EINVAL;
2720 goto out_put_prog;
2721 }
2722 break;
2723 case BPF_PROG_TYPE_LSM:
2724 if (prog->expected_attach_type != BPF_LSM_MAC) {
2725 err = -EINVAL;
2726 goto out_put_prog;
2727 }
2728 break;
2729 default:
fec56f58
AS
2730 err = -EINVAL;
2731 goto out_put_prog;
2732 }
2733
4a1e7c0c
THJ
2734 if (!!tgt_prog_fd != !!btf_id) {
2735 err = -EINVAL;
2736 goto out_put_prog;
2737 }
2738
2739 if (tgt_prog_fd) {
2740 /* For now we only allow new targets for BPF_PROG_TYPE_EXT */
2741 if (prog->type != BPF_PROG_TYPE_EXT) {
2742 err = -EINVAL;
2743 goto out_put_prog;
2744 }
2745
2746 tgt_prog = bpf_prog_get(tgt_prog_fd);
2747 if (IS_ERR(tgt_prog)) {
2748 err = PTR_ERR(tgt_prog);
2749 tgt_prog = NULL;
2750 goto out_put_prog;
2751 }
2752
22dc4a0f 2753 key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id);
4a1e7c0c
THJ
2754 }
2755
70ed506c
AN
2756 link = kzalloc(sizeof(*link), GFP_USER);
2757 if (!link) {
2758 err = -ENOMEM;
2759 goto out_put_prog;
2760 }
f2e10bff
AN
2761 bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING,
2762 &bpf_tracing_link_lops, prog);
2763 link->attach_type = prog->expected_attach_type;
70ed506c 2764
3aac1ead
THJ
2765 mutex_lock(&prog->aux->dst_mutex);
2766
4a1e7c0c
THJ
2767 /* There are a few possible cases here:
2768 *
2769 * - if prog->aux->dst_trampoline is set, the program was just loaded
2770 * and not yet attached to anything, so we can use the values stored
2771 * in prog->aux
2772 *
2773 * - if prog->aux->dst_trampoline is NULL, the program has already been
2774 * attached to a target and its initial target was cleared (below)
2775 *
2776 * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
2777 * target_btf_id using the link_create API.
2778 *
2779 * - if tgt_prog == NULL when this function was called using the old
f3a95075
JO
2780 * raw_tracepoint_open API, and we need a target from prog->aux
2781 *
2782 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program
2783 * was detached and is going for re-attachment.
4a1e7c0c
THJ
2784 */
2785 if (!prog->aux->dst_trampoline && !tgt_prog) {
f3a95075
JO
2786 /*
2787 * Allow re-attach for TRACING and LSM programs. If it's
2788 * currently linked, bpf_trampoline_link_prog will fail.
2789 * EXT programs need to specify tgt_prog_fd, so they
2790 * re-attach in separate code path.
2791 */
2792 if (prog->type != BPF_PROG_TYPE_TRACING &&
2793 prog->type != BPF_PROG_TYPE_LSM) {
2794 err = -EINVAL;
2795 goto out_unlock;
2796 }
2797 btf_id = prog->aux->attach_btf_id;
2798 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id);
babf3164 2799 }
4a1e7c0c
THJ
2800
2801 if (!prog->aux->dst_trampoline ||
2802 (key && key != prog->aux->dst_trampoline->key)) {
2803 /* If there is no saved target, or the specified target is
2804 * different from the destination specified at load time, we
2805 * need a new trampoline and a check for compatibility
2806 */
2807 struct bpf_attach_target_info tgt_info = {};
2808
2809 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id,
2810 &tgt_info);
2811 if (err)
2812 goto out_unlock;
2813
2814 tr = bpf_trampoline_get(key, &tgt_info);
2815 if (!tr) {
2816 err = -ENOMEM;
2817 goto out_unlock;
2818 }
2819 } else {
2820 /* The caller didn't specify a target, or the target was the
2821 * same as the destination supplied during program load. This
2822 * means we can reuse the trampoline and reference from program
2823 * load time, and there is no need to allocate a new one. This
2824 * can only happen once for any program, as the saved values in
2825 * prog->aux are cleared below.
2826 */
2827 tr = prog->aux->dst_trampoline;
2828 tgt_prog = prog->aux->dst_prog;
2829 }
3aac1ead
THJ
2830
2831 err = bpf_link_prime(&link->link, &link_primer);
2832 if (err)
2833 goto out_unlock;
fec56f58 2834
3aac1ead 2835 err = bpf_trampoline_link_prog(prog, tr);
babf3164 2836 if (err) {
a3b80e10 2837 bpf_link_cleanup(&link_primer);
3aac1ead
THJ
2838 link = NULL;
2839 goto out_unlock;
fec56f58 2840 }
babf3164 2841
3aac1ead
THJ
2842 link->tgt_prog = tgt_prog;
2843 link->trampoline = tr;
2844
4a1e7c0c
THJ
2845 /* Always clear the trampoline and target prog from prog->aux to make
2846 * sure the original attach destination is not kept alive after a
2847 * program is (re-)attached to another target.
2848 */
2849 if (prog->aux->dst_prog &&
2850 (tgt_prog_fd || tr != prog->aux->dst_trampoline))
2851 /* got extra prog ref from syscall, or attaching to different prog */
2852 bpf_prog_put(prog->aux->dst_prog);
2853 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline)
2854 /* we allocated a new trampoline, so free the old one */
2855 bpf_trampoline_put(prog->aux->dst_trampoline);
2856
3aac1ead
THJ
2857 prog->aux->dst_prog = NULL;
2858 prog->aux->dst_trampoline = NULL;
2859 mutex_unlock(&prog->aux->dst_mutex);
2860
a3b80e10 2861 return bpf_link_settle(&link_primer);
3aac1ead 2862out_unlock:
4a1e7c0c
THJ
2863 if (tr && tr != prog->aux->dst_trampoline)
2864 bpf_trampoline_put(tr);
3aac1ead
THJ
2865 mutex_unlock(&prog->aux->dst_mutex);
2866 kfree(link);
fec56f58 2867out_put_prog:
4a1e7c0c
THJ
2868 if (tgt_prog_fd && tgt_prog)
2869 bpf_prog_put(tgt_prog);
fec56f58
AS
2870 return err;
2871}
2872
70ed506c
AN
2873struct bpf_raw_tp_link {
2874 struct bpf_link link;
c4f6699d 2875 struct bpf_raw_event_map *btp;
c4f6699d
AS
2876};
2877
70ed506c 2878static void bpf_raw_tp_link_release(struct bpf_link *link)
c4f6699d 2879{
70ed506c
AN
2880 struct bpf_raw_tp_link *raw_tp =
2881 container_of(link, struct bpf_raw_tp_link, link);
c4f6699d 2882
70ed506c 2883 bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
a38d1107 2884 bpf_put_raw_tracepoint(raw_tp->btp);
babf3164
AN
2885}
2886
2887static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
2888{
2889 struct bpf_raw_tp_link *raw_tp =
2890 container_of(link, struct bpf_raw_tp_link, link);
2891
c4f6699d 2892 kfree(raw_tp);
c4f6699d
AS
2893}
2894
f2e10bff
AN
2895static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link,
2896 struct seq_file *seq)
2897{
2898 struct bpf_raw_tp_link *raw_tp_link =
2899 container_of(link, struct bpf_raw_tp_link, link);
2900
2901 seq_printf(seq,
2902 "tp_name:\t%s\n",
2903 raw_tp_link->btp->tp->name);
2904}
2905
2906static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
2907 struct bpf_link_info *info)
2908{
2909 struct bpf_raw_tp_link *raw_tp_link =
2910 container_of(link, struct bpf_raw_tp_link, link);
2911 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name);
2912 const char *tp_name = raw_tp_link->btp->tp->name;
2913 u32 ulen = info->raw_tracepoint.tp_name_len;
2914 size_t tp_len = strlen(tp_name);
2915
b474959d 2916 if (!ulen ^ !ubuf)
f2e10bff
AN
2917 return -EINVAL;
2918
2919 info->raw_tracepoint.tp_name_len = tp_len + 1;
2920
2921 if (!ubuf)
2922 return 0;
2923
2924 if (ulen >= tp_len + 1) {
2925 if (copy_to_user(ubuf, tp_name, tp_len + 1))
2926 return -EFAULT;
2927 } else {
2928 char zero = '\0';
2929
2930 if (copy_to_user(ubuf, tp_name, ulen - 1))
2931 return -EFAULT;
2932 if (put_user(zero, ubuf + ulen - 1))
2933 return -EFAULT;
2934 return -ENOSPC;
2935 }
2936
2937 return 0;
2938}
2939
a3b80e10 2940static const struct bpf_link_ops bpf_raw_tp_link_lops = {
70ed506c 2941 .release = bpf_raw_tp_link_release,
babf3164 2942 .dealloc = bpf_raw_tp_link_dealloc,
f2e10bff
AN
2943 .show_fdinfo = bpf_raw_tp_link_show_fdinfo,
2944 .fill_link_info = bpf_raw_tp_link_fill_link_info,
c4f6699d
AS
2945};
2946
b89fbfbb
AN
2947#ifdef CONFIG_PERF_EVENTS
2948struct bpf_perf_link {
2949 struct bpf_link link;
2950 struct file *perf_file;
2951};
2952
2953static void bpf_perf_link_release(struct bpf_link *link)
2954{
2955 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
2956 struct perf_event *event = perf_link->perf_file->private_data;
2957
2958 perf_event_free_bpf_prog(event);
2959 fput(perf_link->perf_file);
2960}
2961
2962static void bpf_perf_link_dealloc(struct bpf_link *link)
2963{
2964 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
2965
2966 kfree(perf_link);
2967}
2968
2969static const struct bpf_link_ops bpf_perf_link_lops = {
2970 .release = bpf_perf_link_release,
2971 .dealloc = bpf_perf_link_dealloc,
2972};
2973
2974static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
2975{
2976 struct bpf_link_primer link_primer;
2977 struct bpf_perf_link *link;
2978 struct perf_event *event;
2979 struct file *perf_file;
2980 int err;
2981
2982 if (attr->link_create.flags)
2983 return -EINVAL;
2984
2985 perf_file = perf_event_get(attr->link_create.target_fd);
2986 if (IS_ERR(perf_file))
2987 return PTR_ERR(perf_file);
2988
2989 link = kzalloc(sizeof(*link), GFP_USER);
2990 if (!link) {
2991 err = -ENOMEM;
2992 goto out_put_file;
2993 }
2994 bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog);
2995 link->perf_file = perf_file;
2996
2997 err = bpf_link_prime(&link->link, &link_primer);
2998 if (err) {
2999 kfree(link);
3000 goto out_put_file;
3001 }
3002
3003 event = perf_file->private_data;
82e6b1ee 3004 err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie);
b89fbfbb
AN
3005 if (err) {
3006 bpf_link_cleanup(&link_primer);
3007 goto out_put_file;
3008 }
3009 /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */
3010 bpf_prog_inc(prog);
3011
3012 return bpf_link_settle(&link_primer);
3013
3014out_put_file:
3015 fput(perf_file);
3016 return err;
3017}
3018#endif /* CONFIG_PERF_EVENTS */
3019
c4f6699d
AS
3020#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
3021
3022static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
3023{
a3b80e10 3024 struct bpf_link_primer link_primer;
babf3164 3025 struct bpf_raw_tp_link *link;
c4f6699d
AS
3026 struct bpf_raw_event_map *btp;
3027 struct bpf_prog *prog;
ac4414b5
AS
3028 const char *tp_name;
3029 char buf[128];
a3b80e10 3030 int err;
c4f6699d 3031
ac4414b5
AS
3032 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
3033 return -EINVAL;
3034
3035 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
3036 if (IS_ERR(prog))
3037 return PTR_ERR(prog);
3038
9e4e01df
KS
3039 switch (prog->type) {
3040 case BPF_PROG_TYPE_TRACING:
3041 case BPF_PROG_TYPE_EXT:
3042 case BPF_PROG_TYPE_LSM:
ac4414b5 3043 if (attr->raw_tracepoint.name) {
fec56f58
AS
3044 /* The attach point for this category of programs
3045 * should be specified via btf_id during program load.
ac4414b5
AS
3046 */
3047 err = -EINVAL;
3048 goto out_put_prog;
3049 }
9e4e01df
KS
3050 if (prog->type == BPF_PROG_TYPE_TRACING &&
3051 prog->expected_attach_type == BPF_TRACE_RAW_TP) {
fec56f58 3052 tp_name = prog->aux->attach_func_name;
9e4e01df
KS
3053 break;
3054 }
5541075a
JO
3055 err = bpf_tracing_prog_attach(prog, 0, 0);
3056 if (err >= 0)
3057 return err;
3058 goto out_put_prog;
9e4e01df
KS
3059 case BPF_PROG_TYPE_RAW_TRACEPOINT:
3060 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
ac4414b5
AS
3061 if (strncpy_from_user(buf,
3062 u64_to_user_ptr(attr->raw_tracepoint.name),
3063 sizeof(buf) - 1) < 0) {
3064 err = -EFAULT;
3065 goto out_put_prog;
3066 }
3067 buf[sizeof(buf) - 1] = 0;
3068 tp_name = buf;
9e4e01df
KS
3069 break;
3070 default:
3071 err = -EINVAL;
3072 goto out_put_prog;
ac4414b5 3073 }
c4f6699d 3074
a38d1107 3075 btp = bpf_get_raw_tracepoint(tp_name);
ac4414b5
AS
3076 if (!btp) {
3077 err = -ENOENT;
3078 goto out_put_prog;
3079 }
c4f6699d 3080
babf3164
AN
3081 link = kzalloc(sizeof(*link), GFP_USER);
3082 if (!link) {
a38d1107
MM
3083 err = -ENOMEM;
3084 goto out_put_btp;
3085 }
f2e10bff
AN
3086 bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT,
3087 &bpf_raw_tp_link_lops, prog);
babf3164 3088 link->btp = btp;
c4f6699d 3089
a3b80e10
AN
3090 err = bpf_link_prime(&link->link, &link_primer);
3091 if (err) {
babf3164 3092 kfree(link);
babf3164
AN
3093 goto out_put_btp;
3094 }
c4f6699d 3095
babf3164
AN
3096 err = bpf_probe_register(link->btp, prog);
3097 if (err) {
a3b80e10 3098 bpf_link_cleanup(&link_primer);
babf3164 3099 goto out_put_btp;
c4f6699d 3100 }
babf3164 3101
a3b80e10 3102 return bpf_link_settle(&link_primer);
c4f6699d 3103
a38d1107
MM
3104out_put_btp:
3105 bpf_put_raw_tracepoint(btp);
ac4414b5
AS
3106out_put_prog:
3107 bpf_prog_put(prog);
c4f6699d
AS
3108 return err;
3109}
3110
33491588
AR
3111static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
3112 enum bpf_attach_type attach_type)
3113{
3114 switch (prog->type) {
3115 case BPF_PROG_TYPE_CGROUP_SOCK:
3116 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
0d01da6a 3117 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
e9ddbb77 3118 case BPF_PROG_TYPE_SK_LOOKUP:
33491588 3119 return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
5cf1e914 3120 case BPF_PROG_TYPE_CGROUP_SKB:
2c78ee89
AS
3121 if (!capable(CAP_NET_ADMIN))
3122 /* cg-skb progs can be loaded by unpriv user.
3123 * check permissions at attach time.
3124 */
3125 return -EPERM;
5cf1e914 3126 return prog->enforce_expected_attach_type &&
3127 prog->expected_attach_type != attach_type ?
3128 -EINVAL : 0;
33491588
AR
3129 default:
3130 return 0;
3131 }
3132}
3133
e28784e3
AN
3134static enum bpf_prog_type
3135attach_type_to_prog_type(enum bpf_attach_type attach_type)
f4324551 3136{
e28784e3 3137 switch (attach_type) {
f4324551
DM
3138 case BPF_CGROUP_INET_INGRESS:
3139 case BPF_CGROUP_INET_EGRESS:
e28784e3 3140 return BPF_PROG_TYPE_CGROUP_SKB;
61023658 3141 case BPF_CGROUP_INET_SOCK_CREATE:
f5836749 3142 case BPF_CGROUP_INET_SOCK_RELEASE:
aac3fc32
AI
3143 case BPF_CGROUP_INET4_POST_BIND:
3144 case BPF_CGROUP_INET6_POST_BIND:
e28784e3 3145 return BPF_PROG_TYPE_CGROUP_SOCK;
4fbac77d
AI
3146 case BPF_CGROUP_INET4_BIND:
3147 case BPF_CGROUP_INET6_BIND:
d74bad4e
AI
3148 case BPF_CGROUP_INET4_CONNECT:
3149 case BPF_CGROUP_INET6_CONNECT:
1b66d253
DB
3150 case BPF_CGROUP_INET4_GETPEERNAME:
3151 case BPF_CGROUP_INET6_GETPEERNAME:
3152 case BPF_CGROUP_INET4_GETSOCKNAME:
3153 case BPF_CGROUP_INET6_GETSOCKNAME:
1cedee13
AI
3154 case BPF_CGROUP_UDP4_SENDMSG:
3155 case BPF_CGROUP_UDP6_SENDMSG:
983695fa
DB
3156 case BPF_CGROUP_UDP4_RECVMSG:
3157 case BPF_CGROUP_UDP6_RECVMSG:
e28784e3 3158 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
40304b2a 3159 case BPF_CGROUP_SOCK_OPS:
e28784e3 3160 return BPF_PROG_TYPE_SOCK_OPS;
ebc614f6 3161 case BPF_CGROUP_DEVICE:
e28784e3 3162 return BPF_PROG_TYPE_CGROUP_DEVICE;
4f738adb 3163 case BPF_SK_MSG_VERDICT:
e28784e3 3164 return BPF_PROG_TYPE_SK_MSG;
464bc0fd
JF
3165 case BPF_SK_SKB_STREAM_PARSER:
3166 case BPF_SK_SKB_STREAM_VERDICT:
a7ba4558 3167 case BPF_SK_SKB_VERDICT:
e28784e3 3168 return BPF_PROG_TYPE_SK_SKB;
f4364dcf 3169 case BPF_LIRC_MODE2:
e28784e3 3170 return BPF_PROG_TYPE_LIRC_MODE2;
d58e468b 3171 case BPF_FLOW_DISSECTOR:
e28784e3 3172 return BPF_PROG_TYPE_FLOW_DISSECTOR;
7b146ceb 3173 case BPF_CGROUP_SYSCTL:
e28784e3 3174 return BPF_PROG_TYPE_CGROUP_SYSCTL;
0d01da6a
SF
3175 case BPF_CGROUP_GETSOCKOPT:
3176 case BPF_CGROUP_SETSOCKOPT:
e28784e3 3177 return BPF_PROG_TYPE_CGROUP_SOCKOPT;
de4e05ca
YS
3178 case BPF_TRACE_ITER:
3179 return BPF_PROG_TYPE_TRACING;
e9ddbb77
JS
3180 case BPF_SK_LOOKUP:
3181 return BPF_PROG_TYPE_SK_LOOKUP;
aa8d3a71
AN
3182 case BPF_XDP:
3183 return BPF_PROG_TYPE_XDP;
f4324551 3184 default:
e28784e3 3185 return BPF_PROG_TYPE_UNSPEC;
f4324551 3186 }
e28784e3
AN
3187}
3188
3189#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
3190
3191#define BPF_F_ATTACH_MASK \
3192 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
3193
3194static int bpf_prog_attach(const union bpf_attr *attr)
3195{
3196 enum bpf_prog_type ptype;
3197 struct bpf_prog *prog;
3198 int ret;
3199
e28784e3
AN
3200 if (CHECK_ATTR(BPF_PROG_ATTACH))
3201 return -EINVAL;
3202
3203 if (attr->attach_flags & ~BPF_F_ATTACH_MASK)
3204 return -EINVAL;
3205
3206 ptype = attach_type_to_prog_type(attr->attach_type);
3207 if (ptype == BPF_PROG_TYPE_UNSPEC)
3208 return -EINVAL;
f4324551 3209
b2cd1257
DA
3210 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
3211 if (IS_ERR(prog))
3212 return PTR_ERR(prog);
3213
5e43f899
AI
3214 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
3215 bpf_prog_put(prog);
3216 return -EINVAL;
3217 }
3218
fdb5c453
SY
3219 switch (ptype) {
3220 case BPF_PROG_TYPE_SK_SKB:
3221 case BPF_PROG_TYPE_SK_MSG:
604326b4 3222 ret = sock_map_get_from_fd(attr, prog);
fdb5c453
SY
3223 break;
3224 case BPF_PROG_TYPE_LIRC_MODE2:
3225 ret = lirc_prog_attach(attr, prog);
3226 break;
d58e468b 3227 case BPF_PROG_TYPE_FLOW_DISSECTOR:
a3fd7cee 3228 ret = netns_bpf_prog_attach(attr, prog);
d58e468b 3229 break;
e28784e3
AN
3230 case BPF_PROG_TYPE_CGROUP_DEVICE:
3231 case BPF_PROG_TYPE_CGROUP_SKB:
3232 case BPF_PROG_TYPE_CGROUP_SOCK:
3233 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3234 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3235 case BPF_PROG_TYPE_CGROUP_SYSCTL:
3236 case BPF_PROG_TYPE_SOCK_OPS:
fdb5c453 3237 ret = cgroup_bpf_prog_attach(attr, ptype, prog);
e28784e3
AN
3238 break;
3239 default:
3240 ret = -EINVAL;
b2cd1257
DA
3241 }
3242
7f677633
AS
3243 if (ret)
3244 bpf_prog_put(prog);
7f677633 3245 return ret;
f4324551
DM
3246}
3247
3248#define BPF_PROG_DETACH_LAST_FIELD attach_type
3249
3250static int bpf_prog_detach(const union bpf_attr *attr)
3251{
324bda9e 3252 enum bpf_prog_type ptype;
f4324551 3253
f4324551
DM
3254 if (CHECK_ATTR(BPF_PROG_DETACH))
3255 return -EINVAL;
3256
e28784e3
AN
3257 ptype = attach_type_to_prog_type(attr->attach_type);
3258
3259 switch (ptype) {
3260 case BPF_PROG_TYPE_SK_MSG:
3261 case BPF_PROG_TYPE_SK_SKB:
bb0de313 3262 return sock_map_prog_detach(attr, ptype);
e28784e3 3263 case BPF_PROG_TYPE_LIRC_MODE2:
f4364dcf 3264 return lirc_prog_detach(attr);
e28784e3 3265 case BPF_PROG_TYPE_FLOW_DISSECTOR:
4ac2add6 3266 return netns_bpf_prog_detach(attr, ptype);
e28784e3
AN
3267 case BPF_PROG_TYPE_CGROUP_DEVICE:
3268 case BPF_PROG_TYPE_CGROUP_SKB:
3269 case BPF_PROG_TYPE_CGROUP_SOCK:
3270 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3271 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3272 case BPF_PROG_TYPE_CGROUP_SYSCTL:
3273 case BPF_PROG_TYPE_SOCK_OPS:
3274 return cgroup_bpf_prog_detach(attr, ptype);
f4324551
DM
3275 default:
3276 return -EINVAL;
3277 }
f4324551 3278}
40304b2a 3279
468e2f64
AS
3280#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
3281
3282static int bpf_prog_query(const union bpf_attr *attr,
3283 union bpf_attr __user *uattr)
3284{
468e2f64
AS
3285 if (!capable(CAP_NET_ADMIN))
3286 return -EPERM;
3287 if (CHECK_ATTR(BPF_PROG_QUERY))
3288 return -EINVAL;
3289 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE)
3290 return -EINVAL;
3291
3292 switch (attr->query.attach_type) {
3293 case BPF_CGROUP_INET_INGRESS:
3294 case BPF_CGROUP_INET_EGRESS:
3295 case BPF_CGROUP_INET_SOCK_CREATE:
f5836749 3296 case BPF_CGROUP_INET_SOCK_RELEASE:
4fbac77d
AI
3297 case BPF_CGROUP_INET4_BIND:
3298 case BPF_CGROUP_INET6_BIND:
aac3fc32
AI
3299 case BPF_CGROUP_INET4_POST_BIND:
3300 case BPF_CGROUP_INET6_POST_BIND:
d74bad4e
AI
3301 case BPF_CGROUP_INET4_CONNECT:
3302 case BPF_CGROUP_INET6_CONNECT:
1b66d253
DB
3303 case BPF_CGROUP_INET4_GETPEERNAME:
3304 case BPF_CGROUP_INET6_GETPEERNAME:
3305 case BPF_CGROUP_INET4_GETSOCKNAME:
3306 case BPF_CGROUP_INET6_GETSOCKNAME:
1cedee13
AI
3307 case BPF_CGROUP_UDP4_SENDMSG:
3308 case BPF_CGROUP_UDP6_SENDMSG:
983695fa
DB
3309 case BPF_CGROUP_UDP4_RECVMSG:
3310 case BPF_CGROUP_UDP6_RECVMSG:
468e2f64 3311 case BPF_CGROUP_SOCK_OPS:
ebc614f6 3312 case BPF_CGROUP_DEVICE:
7b146ceb 3313 case BPF_CGROUP_SYSCTL:
0d01da6a
SF
3314 case BPF_CGROUP_GETSOCKOPT:
3315 case BPF_CGROUP_SETSOCKOPT:
e28784e3 3316 return cgroup_bpf_prog_query(attr, uattr);
f4364dcf
SY
3317 case BPF_LIRC_MODE2:
3318 return lirc_prog_query(attr, uattr);
118c8e9a 3319 case BPF_FLOW_DISSECTOR:
e9ddbb77 3320 case BPF_SK_LOOKUP:
a3fd7cee 3321 return netns_bpf_prog_query(attr, uattr);
748cd572
DZ
3322 case BPF_SK_SKB_STREAM_PARSER:
3323 case BPF_SK_SKB_STREAM_VERDICT:
3324 case BPF_SK_MSG_VERDICT:
3325 case BPF_SK_SKB_VERDICT:
3326 return sock_map_bpf_prog_query(attr, uattr);
468e2f64
AS
3327 default:
3328 return -EINVAL;
3329 }
468e2f64 3330}
f4324551 3331
1b4d60ec 3332#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu
1cf1cae9
AS
3333
3334static int bpf_prog_test_run(const union bpf_attr *attr,
3335 union bpf_attr __user *uattr)
3336{
3337 struct bpf_prog *prog;
3338 int ret = -ENOTSUPP;
3339
3340 if (CHECK_ATTR(BPF_PROG_TEST_RUN))
3341 return -EINVAL;
3342
b0b9395d
SF
3343 if ((attr->test.ctx_size_in && !attr->test.ctx_in) ||
3344 (!attr->test.ctx_size_in && attr->test.ctx_in))
3345 return -EINVAL;
3346
3347 if ((attr->test.ctx_size_out && !attr->test.ctx_out) ||
3348 (!attr->test.ctx_size_out && attr->test.ctx_out))
3349 return -EINVAL;
3350
1cf1cae9
AS
3351 prog = bpf_prog_get(attr->test.prog_fd);
3352 if (IS_ERR(prog))
3353 return PTR_ERR(prog);
3354
3355 if (prog->aux->ops->test_run)
3356 ret = prog->aux->ops->test_run(prog, attr, uattr);
3357
3358 bpf_prog_put(prog);
3359 return ret;
3360}
3361
34ad5580
MKL
3362#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
3363
3364static int bpf_obj_get_next_id(const union bpf_attr *attr,
3365 union bpf_attr __user *uattr,
3366 struct idr *idr,
3367 spinlock_t *lock)
3368{
3369 u32 next_id = attr->start_id;
3370 int err = 0;
3371
3372 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX)
3373 return -EINVAL;
3374
3375 if (!capable(CAP_SYS_ADMIN))
3376 return -EPERM;
3377
3378 next_id++;
3379 spin_lock_bh(lock);
3380 if (!idr_get_next(idr, &next_id))
3381 err = -ENOENT;
3382 spin_unlock_bh(lock);
3383
3384 if (!err)
3385 err = put_user(next_id, &uattr->next_id);
3386
3387 return err;
3388}
3389
6086d29d
YS
3390struct bpf_map *bpf_map_get_curr_or_next(u32 *id)
3391{
3392 struct bpf_map *map;
3393
3394 spin_lock_bh(&map_idr_lock);
3395again:
3396 map = idr_get_next(&map_idr, id);
3397 if (map) {
3398 map = __bpf_map_inc_not_zero(map, false);
3399 if (IS_ERR(map)) {
3400 (*id)++;
3401 goto again;
3402 }
3403 }
3404 spin_unlock_bh(&map_idr_lock);
3405
3406 return map;
3407}
3408
a228a64f
AS
3409struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id)
3410{
3411 struct bpf_prog *prog;
3412
3413 spin_lock_bh(&prog_idr_lock);
3414again:
3415 prog = idr_get_next(&prog_idr, id);
3416 if (prog) {
3417 prog = bpf_prog_inc_not_zero(prog);
3418 if (IS_ERR(prog)) {
3419 (*id)++;
3420 goto again;
3421 }
3422 }
3423 spin_unlock_bh(&prog_idr_lock);
3424
3425 return prog;
3426}
3427
b16d9aa4
MKL
3428#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
3429
7e6897f9 3430struct bpf_prog *bpf_prog_by_id(u32 id)
b16d9aa4
MKL
3431{
3432 struct bpf_prog *prog;
b16d9aa4 3433
7e6897f9
BT
3434 if (!id)
3435 return ERR_PTR(-ENOENT);
b16d9aa4
MKL
3436
3437 spin_lock_bh(&prog_idr_lock);
3438 prog = idr_find(&prog_idr, id);
3439 if (prog)
3440 prog = bpf_prog_inc_not_zero(prog);
3441 else
3442 prog = ERR_PTR(-ENOENT);
3443 spin_unlock_bh(&prog_idr_lock);
7e6897f9
BT
3444 return prog;
3445}
3446
3447static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
3448{
3449 struct bpf_prog *prog;
3450 u32 id = attr->prog_id;
3451 int fd;
3452
3453 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
3454 return -EINVAL;
3455
3456 if (!capable(CAP_SYS_ADMIN))
3457 return -EPERM;
b16d9aa4 3458
7e6897f9 3459 prog = bpf_prog_by_id(id);
b16d9aa4
MKL
3460 if (IS_ERR(prog))
3461 return PTR_ERR(prog);
3462
3463 fd = bpf_prog_new_fd(prog);
3464 if (fd < 0)
3465 bpf_prog_put(prog);
3466
3467 return fd;
3468}
3469
6e71b04a 3470#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags
bd5f5f4e
MKL
3471
3472static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
3473{
3474 struct bpf_map *map;
3475 u32 id = attr->map_id;
6e71b04a 3476 int f_flags;
bd5f5f4e
MKL
3477 int fd;
3478
6e71b04a
CF
3479 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) ||
3480 attr->open_flags & ~BPF_OBJ_FLAG_MASK)
bd5f5f4e
MKL
3481 return -EINVAL;
3482
3483 if (!capable(CAP_SYS_ADMIN))
3484 return -EPERM;
3485
6e71b04a
CF
3486 f_flags = bpf_get_file_flag(attr->open_flags);
3487 if (f_flags < 0)
3488 return f_flags;
3489
bd5f5f4e
MKL
3490 spin_lock_bh(&map_idr_lock);
3491 map = idr_find(&map_idr, id);
3492 if (map)
b0e4701c 3493 map = __bpf_map_inc_not_zero(map, true);
bd5f5f4e
MKL
3494 else
3495 map = ERR_PTR(-ENOENT);
3496 spin_unlock_bh(&map_idr_lock);
3497
3498 if (IS_ERR(map))
3499 return PTR_ERR(map);
3500
6e71b04a 3501 fd = bpf_map_new_fd(map, f_flags);
bd5f5f4e 3502 if (fd < 0)
781e6282 3503 bpf_map_put_with_uref(map);
bd5f5f4e
MKL
3504
3505 return fd;
3506}
3507
7105e828 3508static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
d8eca5bb
DB
3509 unsigned long addr, u32 *off,
3510 u32 *type)
7105e828 3511{
d8eca5bb 3512 const struct bpf_map *map;
7105e828
DB
3513 int i;
3514
984fe94f 3515 mutex_lock(&prog->aux->used_maps_mutex);
d8eca5bb
DB
3516 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
3517 map = prog->aux->used_maps[i];
3518 if (map == (void *)addr) {
3519 *type = BPF_PSEUDO_MAP_FD;
984fe94f 3520 goto out;
d8eca5bb
DB
3521 }
3522 if (!map->ops->map_direct_value_meta)
3523 continue;
3524 if (!map->ops->map_direct_value_meta(map, addr, off)) {
3525 *type = BPF_PSEUDO_MAP_VALUE;
984fe94f 3526 goto out;
d8eca5bb
DB
3527 }
3528 }
984fe94f 3529 map = NULL;
d8eca5bb 3530
984fe94f
YZ
3531out:
3532 mutex_unlock(&prog->aux->used_maps_mutex);
3533 return map;
7105e828
DB
3534}
3535
63960260
KC
3536static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
3537 const struct cred *f_cred)
7105e828
DB
3538{
3539 const struct bpf_map *map;
3540 struct bpf_insn *insns;
d8eca5bb 3541 u32 off, type;
7105e828 3542 u64 imm;
29fcb05b 3543 u8 code;
7105e828
DB
3544 int i;
3545
3546 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
3547 GFP_USER);
3548 if (!insns)
3549 return insns;
3550
3551 for (i = 0; i < prog->len; i++) {
29fcb05b
AN
3552 code = insns[i].code;
3553
3554 if (code == (BPF_JMP | BPF_TAIL_CALL)) {
7105e828
DB
3555 insns[i].code = BPF_JMP | BPF_CALL;
3556 insns[i].imm = BPF_FUNC_tail_call;
3557 /* fall-through */
3558 }
29fcb05b
AN
3559 if (code == (BPF_JMP | BPF_CALL) ||
3560 code == (BPF_JMP | BPF_CALL_ARGS)) {
3561 if (code == (BPF_JMP | BPF_CALL_ARGS))
7105e828 3562 insns[i].code = BPF_JMP | BPF_CALL;
63960260 3563 if (!bpf_dump_raw_ok(f_cred))
7105e828
DB
3564 insns[i].imm = 0;
3565 continue;
3566 }
29fcb05b
AN
3567 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) {
3568 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM;
3569 continue;
3570 }
7105e828 3571
29fcb05b 3572 if (code != (BPF_LD | BPF_IMM | BPF_DW))
7105e828
DB
3573 continue;
3574
3575 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
d8eca5bb 3576 map = bpf_map_from_imm(prog, imm, &off, &type);
7105e828 3577 if (map) {
d8eca5bb 3578 insns[i].src_reg = type;
7105e828 3579 insns[i].imm = map->id;
d8eca5bb 3580 insns[i + 1].imm = off;
7105e828
DB
3581 continue;
3582 }
7105e828
DB
3583 }
3584
3585 return insns;
3586}
3587
c454a46b
MKL
3588static int set_info_rec_size(struct bpf_prog_info *info)
3589{
3590 /*
3591 * Ensure info.*_rec_size is the same as kernel expected size
3592 *
3593 * or
3594 *
3595 * Only allow zero *_rec_size if both _rec_size and _cnt are
3596 * zero. In this case, the kernel will set the expected
3597 * _rec_size back to the info.
3598 */
3599
11d8b82d 3600 if ((info->nr_func_info || info->func_info_rec_size) &&
c454a46b
MKL
3601 info->func_info_rec_size != sizeof(struct bpf_func_info))
3602 return -EINVAL;
3603
11d8b82d 3604 if ((info->nr_line_info || info->line_info_rec_size) &&
c454a46b
MKL
3605 info->line_info_rec_size != sizeof(struct bpf_line_info))
3606 return -EINVAL;
3607
11d8b82d 3608 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) &&
c454a46b
MKL
3609 info->jited_line_info_rec_size != sizeof(__u64))
3610 return -EINVAL;
3611
3612 info->func_info_rec_size = sizeof(struct bpf_func_info);
3613 info->line_info_rec_size = sizeof(struct bpf_line_info);
3614 info->jited_line_info_rec_size = sizeof(__u64);
3615
3616 return 0;
3617}
3618
63960260
KC
3619static int bpf_prog_get_info_by_fd(struct file *file,
3620 struct bpf_prog *prog,
1e270976
MKL
3621 const union bpf_attr *attr,
3622 union bpf_attr __user *uattr)
3623{
3624 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
5c6f2588 3625 struct bpf_prog_info info;
1e270976 3626 u32 info_len = attr->info.info_len;
61a0abae 3627 struct bpf_prog_kstats stats;
1e270976
MKL
3628 char __user *uinsns;
3629 u32 ulen;
3630 int err;
3631
af2ac3e1 3632 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
1e270976
MKL
3633 if (err)
3634 return err;
3635 info_len = min_t(u32, sizeof(info), info_len);
3636
5c6f2588 3637 memset(&info, 0, sizeof(info));
1e270976 3638 if (copy_from_user(&info, uinfo, info_len))
89b09689 3639 return -EFAULT;
1e270976
MKL
3640
3641 info.type = prog->type;
3642 info.id = prog->aux->id;
cb4d2b3f
MKL
3643 info.load_time = prog->aux->load_time;
3644 info.created_by_uid = from_kuid_munged(current_user_ns(),
3645 prog->aux->user->uid);
b85fab0e 3646 info.gpl_compatible = prog->gpl_compatible;
1e270976
MKL
3647
3648 memcpy(info.tag, prog->tag, sizeof(prog->tag));
cb4d2b3f
MKL
3649 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
3650
984fe94f 3651 mutex_lock(&prog->aux->used_maps_mutex);
cb4d2b3f
MKL
3652 ulen = info.nr_map_ids;
3653 info.nr_map_ids = prog->aux->used_map_cnt;
3654 ulen = min_t(u32, info.nr_map_ids, ulen);
3655 if (ulen) {
721e08da 3656 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
cb4d2b3f
MKL
3657 u32 i;
3658
3659 for (i = 0; i < ulen; i++)
3660 if (put_user(prog->aux->used_maps[i]->id,
984fe94f
YZ
3661 &user_map_ids[i])) {
3662 mutex_unlock(&prog->aux->used_maps_mutex);
cb4d2b3f 3663 return -EFAULT;
984fe94f 3664 }
cb4d2b3f 3665 }
984fe94f 3666 mutex_unlock(&prog->aux->used_maps_mutex);
1e270976 3667
c454a46b
MKL
3668 err = set_info_rec_size(&info);
3669 if (err)
3670 return err;
7337224f 3671
5f8f8b93
AS
3672 bpf_prog_get_stats(prog, &stats);
3673 info.run_time_ns = stats.nsecs;
3674 info.run_cnt = stats.cnt;
9ed9e9ba 3675 info.recursion_misses = stats.misses;
5f8f8b93 3676
aba64c7d
DM
3677 info.verified_insns = prog->aux->verified_insns;
3678
2c78ee89 3679 if (!bpf_capable()) {
1e270976
MKL
3680 info.jited_prog_len = 0;
3681 info.xlated_prog_len = 0;
dbecd738 3682 info.nr_jited_ksyms = 0;
28c2fae7 3683 info.nr_jited_func_lens = 0;
11d8b82d
YS
3684 info.nr_func_info = 0;
3685 info.nr_line_info = 0;
3686 info.nr_jited_line_info = 0;
1e270976
MKL
3687 goto done;
3688 }
3689
1e270976 3690 ulen = info.xlated_prog_len;
9975a54b 3691 info.xlated_prog_len = bpf_prog_insn_size(prog);
1e270976 3692 if (info.xlated_prog_len && ulen) {
7105e828
DB
3693 struct bpf_insn *insns_sanitized;
3694 bool fault;
3695
63960260 3696 if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) {
7105e828
DB
3697 info.xlated_prog_insns = 0;
3698 goto done;
3699 }
63960260 3700 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
7105e828
DB
3701 if (!insns_sanitized)
3702 return -ENOMEM;
1e270976
MKL
3703 uinsns = u64_to_user_ptr(info.xlated_prog_insns);
3704 ulen = min_t(u32, info.xlated_prog_len, ulen);
7105e828
DB
3705 fault = copy_to_user(uinsns, insns_sanitized, ulen);
3706 kfree(insns_sanitized);
3707 if (fault)
1e270976
MKL
3708 return -EFAULT;
3709 }
3710
675fc275
JK
3711 if (bpf_prog_is_dev_bound(prog->aux)) {
3712 err = bpf_prog_offload_info_fill(&info, prog);
3713 if (err)
3714 return err;
fcfb126d
JW
3715 goto done;
3716 }
3717
3718 /* NOTE: the following code is supposed to be skipped for offload.
3719 * bpf_prog_offload_info_fill() is the place to fill similar fields
3720 * for offload.
3721 */
3722 ulen = info.jited_prog_len;
4d56a76e
SD
3723 if (prog->aux->func_cnt) {
3724 u32 i;
3725
3726 info.jited_prog_len = 0;
3727 for (i = 0; i < prog->aux->func_cnt; i++)
3728 info.jited_prog_len += prog->aux->func[i]->jited_len;
3729 } else {
3730 info.jited_prog_len = prog->jited_len;
3731 }
3732
fcfb126d 3733 if (info.jited_prog_len && ulen) {
63960260 3734 if (bpf_dump_raw_ok(file->f_cred)) {
fcfb126d
JW
3735 uinsns = u64_to_user_ptr(info.jited_prog_insns);
3736 ulen = min_t(u32, info.jited_prog_len, ulen);
4d56a76e
SD
3737
3738 /* for multi-function programs, copy the JITed
3739 * instructions for all the functions
3740 */
3741 if (prog->aux->func_cnt) {
3742 u32 len, free, i;
3743 u8 *img;
3744
3745 free = ulen;
3746 for (i = 0; i < prog->aux->func_cnt; i++) {
3747 len = prog->aux->func[i]->jited_len;
3748 len = min_t(u32, len, free);
3749 img = (u8 *) prog->aux->func[i]->bpf_func;
3750 if (copy_to_user(uinsns, img, len))
3751 return -EFAULT;
3752 uinsns += len;
3753 free -= len;
3754 if (!free)
3755 break;
3756 }
3757 } else {
3758 if (copy_to_user(uinsns, prog->bpf_func, ulen))
3759 return -EFAULT;
3760 }
fcfb126d
JW
3761 } else {
3762 info.jited_prog_insns = 0;
3763 }
675fc275
JK
3764 }
3765
dbecd738 3766 ulen = info.nr_jited_ksyms;
ff1889fc 3767 info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
7a5725dd 3768 if (ulen) {
63960260 3769 if (bpf_dump_raw_ok(file->f_cred)) {
ff1889fc 3770 unsigned long ksym_addr;
dbecd738 3771 u64 __user *user_ksyms;
dbecd738
SD
3772 u32 i;
3773
3774 /* copy the address of the kernel symbol
3775 * corresponding to each function
3776 */
3777 ulen = min_t(u32, info.nr_jited_ksyms, ulen);
3778 user_ksyms = u64_to_user_ptr(info.jited_ksyms);
ff1889fc
SL
3779 if (prog->aux->func_cnt) {
3780 for (i = 0; i < ulen; i++) {
3781 ksym_addr = (unsigned long)
3782 prog->aux->func[i]->bpf_func;
3783 if (put_user((u64) ksym_addr,
3784 &user_ksyms[i]))
3785 return -EFAULT;
3786 }
3787 } else {
3788 ksym_addr = (unsigned long) prog->bpf_func;
3789 if (put_user((u64) ksym_addr, &user_ksyms[0]))
dbecd738
SD
3790 return -EFAULT;
3791 }
3792 } else {
3793 info.jited_ksyms = 0;
3794 }
3795 }
3796
815581c1 3797 ulen = info.nr_jited_func_lens;
ff1889fc 3798 info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
7a5725dd 3799 if (ulen) {
63960260 3800 if (bpf_dump_raw_ok(file->f_cred)) {
815581c1
SD
3801 u32 __user *user_lens;
3802 u32 func_len, i;
3803
3804 /* copy the JITed image lengths for each function */
3805 ulen = min_t(u32, info.nr_jited_func_lens, ulen);
3806 user_lens = u64_to_user_ptr(info.jited_func_lens);
ff1889fc
SL
3807 if (prog->aux->func_cnt) {
3808 for (i = 0; i < ulen; i++) {
3809 func_len =
3810 prog->aux->func[i]->jited_len;
3811 if (put_user(func_len, &user_lens[i]))
3812 return -EFAULT;
3813 }
3814 } else {
3815 func_len = prog->jited_len;
3816 if (put_user(func_len, &user_lens[0]))
815581c1
SD
3817 return -EFAULT;
3818 }
3819 } else {
3820 info.jited_func_lens = 0;
3821 }
3822 }
3823
7337224f 3824 if (prog->aux->btf)
22dc4a0f 3825 info.btf_id = btf_obj_id(prog->aux->btf);
838e9690 3826
11d8b82d
YS
3827 ulen = info.nr_func_info;
3828 info.nr_func_info = prog->aux->func_info_cnt;
3829 if (info.nr_func_info && ulen) {
9e794163 3830 char __user *user_finfo;
7337224f 3831
9e794163
MKL
3832 user_finfo = u64_to_user_ptr(info.func_info);
3833 ulen = min_t(u32, info.nr_func_info, ulen);
3834 if (copy_to_user(user_finfo, prog->aux->func_info,
3835 info.func_info_rec_size * ulen))
3836 return -EFAULT;
838e9690
YS
3837 }
3838
11d8b82d
YS
3839 ulen = info.nr_line_info;
3840 info.nr_line_info = prog->aux->nr_linfo;
3841 if (info.nr_line_info && ulen) {
9e794163 3842 __u8 __user *user_linfo;
c454a46b 3843
9e794163
MKL
3844 user_linfo = u64_to_user_ptr(info.line_info);
3845 ulen = min_t(u32, info.nr_line_info, ulen);
3846 if (copy_to_user(user_linfo, prog->aux->linfo,
3847 info.line_info_rec_size * ulen))
3848 return -EFAULT;
c454a46b
MKL
3849 }
3850
11d8b82d 3851 ulen = info.nr_jited_line_info;
c454a46b 3852 if (prog->aux->jited_linfo)
11d8b82d 3853 info.nr_jited_line_info = prog->aux->nr_linfo;
c454a46b 3854 else
11d8b82d
YS
3855 info.nr_jited_line_info = 0;
3856 if (info.nr_jited_line_info && ulen) {
63960260 3857 if (bpf_dump_raw_ok(file->f_cred)) {
c454a46b
MKL
3858 __u64 __user *user_linfo;
3859 u32 i;
3860
3861 user_linfo = u64_to_user_ptr(info.jited_line_info);
11d8b82d 3862 ulen = min_t(u32, info.nr_jited_line_info, ulen);
c454a46b
MKL
3863 for (i = 0; i < ulen; i++) {
3864 if (put_user((__u64)(long)prog->aux->jited_linfo[i],
3865 &user_linfo[i]))
3866 return -EFAULT;
3867 }
3868 } else {
3869 info.jited_line_info = 0;
3870 }
3871 }
3872
c872bdb3
SL
3873 ulen = info.nr_prog_tags;
3874 info.nr_prog_tags = prog->aux->func_cnt ? : 1;
3875 if (ulen) {
3876 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE];
3877 u32 i;
3878
3879 user_prog_tags = u64_to_user_ptr(info.prog_tags);
3880 ulen = min_t(u32, info.nr_prog_tags, ulen);
3881 if (prog->aux->func_cnt) {
3882 for (i = 0; i < ulen; i++) {
3883 if (copy_to_user(user_prog_tags[i],
3884 prog->aux->func[i]->tag,
3885 BPF_TAG_SIZE))
3886 return -EFAULT;
3887 }
3888 } else {
3889 if (copy_to_user(user_prog_tags[0],
3890 prog->tag, BPF_TAG_SIZE))
3891 return -EFAULT;
3892 }
3893 }
3894
1e270976
MKL
3895done:
3896 if (copy_to_user(uinfo, &info, info_len) ||
3897 put_user(info_len, &uattr->info.info_len))
3898 return -EFAULT;
3899
3900 return 0;
3901}
3902
63960260
KC
3903static int bpf_map_get_info_by_fd(struct file *file,
3904 struct bpf_map *map,
1e270976
MKL
3905 const union bpf_attr *attr,
3906 union bpf_attr __user *uattr)
3907{
3908 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
5c6f2588 3909 struct bpf_map_info info;
1e270976
MKL
3910 u32 info_len = attr->info.info_len;
3911 int err;
3912
af2ac3e1 3913 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
1e270976
MKL
3914 if (err)
3915 return err;
3916 info_len = min_t(u32, sizeof(info), info_len);
3917
5c6f2588 3918 memset(&info, 0, sizeof(info));
1e270976
MKL
3919 info.type = map->map_type;
3920 info.id = map->id;
3921 info.key_size = map->key_size;
3922 info.value_size = map->value_size;
3923 info.max_entries = map->max_entries;
3924 info.map_flags = map->map_flags;
9330986c 3925 info.map_extra = map->map_extra;
ad5b177b 3926 memcpy(info.name, map->name, sizeof(map->name));
1e270976 3927
78958fca 3928 if (map->btf) {
22dc4a0f 3929 info.btf_id = btf_obj_id(map->btf);
9b2cf328
MKL
3930 info.btf_key_type_id = map->btf_key_type_id;
3931 info.btf_value_type_id = map->btf_value_type_id;
78958fca 3932 }
85d33df3 3933 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
78958fca 3934
52775b33
JK
3935 if (bpf_map_is_dev_bound(map)) {
3936 err = bpf_map_offload_info_fill(&info, map);
3937 if (err)
3938 return err;
3939 }
3940
1e270976
MKL
3941 if (copy_to_user(uinfo, &info, info_len) ||
3942 put_user(info_len, &uattr->info.info_len))
3943 return -EFAULT;
3944
3945 return 0;
3946}
3947
63960260
KC
3948static int bpf_btf_get_info_by_fd(struct file *file,
3949 struct btf *btf,
62dab84c
MKL
3950 const union bpf_attr *attr,
3951 union bpf_attr __user *uattr)
3952{
3953 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3954 u32 info_len = attr->info.info_len;
3955 int err;
3956
af2ac3e1 3957 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len);
62dab84c
MKL
3958 if (err)
3959 return err;
3960
3961 return btf_get_info_by_fd(btf, attr, uattr);
3962}
3963
63960260
KC
3964static int bpf_link_get_info_by_fd(struct file *file,
3965 struct bpf_link *link,
f2e10bff
AN
3966 const union bpf_attr *attr,
3967 union bpf_attr __user *uattr)
3968{
3969 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3970 struct bpf_link_info info;
3971 u32 info_len = attr->info.info_len;
3972 int err;
3973
af2ac3e1 3974 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
f2e10bff
AN
3975 if (err)
3976 return err;
3977 info_len = min_t(u32, sizeof(info), info_len);
3978
3979 memset(&info, 0, sizeof(info));
3980 if (copy_from_user(&info, uinfo, info_len))
3981 return -EFAULT;
3982
3983 info.type = link->type;
3984 info.id = link->id;
3985 info.prog_id = link->prog->aux->id;
3986
3987 if (link->ops->fill_link_info) {
3988 err = link->ops->fill_link_info(link, &info);
3989 if (err)
3990 return err;
3991 }
3992
3993 if (copy_to_user(uinfo, &info, info_len) ||
3994 put_user(info_len, &uattr->info.info_len))
3995 return -EFAULT;
3996
3997 return 0;
3998}
3999
4000
1e270976
MKL
4001#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
4002
4003static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
4004 union bpf_attr __user *uattr)
4005{
4006 int ufd = attr->info.bpf_fd;
4007 struct fd f;
4008 int err;
4009
4010 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
4011 return -EINVAL;
4012
4013 f = fdget(ufd);
4014 if (!f.file)
4015 return -EBADFD;
4016
4017 if (f.file->f_op == &bpf_prog_fops)
63960260 4018 err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr,
1e270976
MKL
4019 uattr);
4020 else if (f.file->f_op == &bpf_map_fops)
63960260 4021 err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr,
1e270976 4022 uattr);
60197cfb 4023 else if (f.file->f_op == &btf_fops)
63960260 4024 err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
f2e10bff 4025 else if (f.file->f_op == &bpf_link_fops)
63960260 4026 err = bpf_link_get_info_by_fd(f.file, f.file->private_data,
f2e10bff 4027 attr, uattr);
1e270976
MKL
4028 else
4029 err = -EINVAL;
4030
4031 fdput(f);
4032 return err;
4033}
4034
f56a653c
MKL
4035#define BPF_BTF_LOAD_LAST_FIELD btf_log_level
4036
c571bd75 4037static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr)
f56a653c
MKL
4038{
4039 if (CHECK_ATTR(BPF_BTF_LOAD))
4040 return -EINVAL;
4041
2c78ee89 4042 if (!bpf_capable())
f56a653c
MKL
4043 return -EPERM;
4044
c571bd75 4045 return btf_new_fd(attr, uattr);
f56a653c
MKL
4046}
4047
78958fca
MKL
4048#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
4049
4050static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
4051{
4052 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
4053 return -EINVAL;
4054
4055 if (!capable(CAP_SYS_ADMIN))
4056 return -EPERM;
4057
4058 return btf_get_fd_by_id(attr->btf_id);
4059}
4060
41bdc4b4
YS
4061static int bpf_task_fd_query_copy(const union bpf_attr *attr,
4062 union bpf_attr __user *uattr,
4063 u32 prog_id, u32 fd_type,
4064 const char *buf, u64 probe_offset,
4065 u64 probe_addr)
4066{
4067 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
4068 u32 len = buf ? strlen(buf) : 0, input_len;
4069 int err = 0;
4070
4071 if (put_user(len, &uattr->task_fd_query.buf_len))
4072 return -EFAULT;
4073 input_len = attr->task_fd_query.buf_len;
4074 if (input_len && ubuf) {
4075 if (!len) {
4076 /* nothing to copy, just make ubuf NULL terminated */
4077 char zero = '\0';
4078
4079 if (put_user(zero, ubuf))
4080 return -EFAULT;
4081 } else if (input_len >= len + 1) {
4082 /* ubuf can hold the string with NULL terminator */
4083 if (copy_to_user(ubuf, buf, len + 1))
4084 return -EFAULT;
4085 } else {
4086 /* ubuf cannot hold the string with NULL terminator,
4087 * do a partial copy with NULL terminator.
4088 */
4089 char zero = '\0';
4090
4091 err = -ENOSPC;
4092 if (copy_to_user(ubuf, buf, input_len - 1))
4093 return -EFAULT;
4094 if (put_user(zero, ubuf + input_len - 1))
4095 return -EFAULT;
4096 }
4097 }
4098
4099 if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
4100 put_user(fd_type, &uattr->task_fd_query.fd_type) ||
4101 put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
4102 put_user(probe_addr, &uattr->task_fd_query.probe_addr))
4103 return -EFAULT;
4104
4105 return err;
4106}
4107
4108#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
4109
4110static int bpf_task_fd_query(const union bpf_attr *attr,
4111 union bpf_attr __user *uattr)
4112{
4113 pid_t pid = attr->task_fd_query.pid;
4114 u32 fd = attr->task_fd_query.fd;
4115 const struct perf_event *event;
41bdc4b4
YS
4116 struct task_struct *task;
4117 struct file *file;
4118 int err;
4119
4120 if (CHECK_ATTR(BPF_TASK_FD_QUERY))
4121 return -EINVAL;
4122
4123 if (!capable(CAP_SYS_ADMIN))
4124 return -EPERM;
4125
4126 if (attr->task_fd_query.flags != 0)
4127 return -EINVAL;
4128
4129 task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
4130 if (!task)
4131 return -ENOENT;
4132
41bdc4b4 4133 err = 0;
b48845af
EB
4134 file = fget_task(task, fd);
4135 put_task_struct(task);
41bdc4b4 4136 if (!file)
b48845af 4137 return -EBADF;
41bdc4b4 4138
70ed506c
AN
4139 if (file->f_op == &bpf_link_fops) {
4140 struct bpf_link *link = file->private_data;
41bdc4b4 4141
a3b80e10 4142 if (link->ops == &bpf_raw_tp_link_lops) {
70ed506c
AN
4143 struct bpf_raw_tp_link *raw_tp =
4144 container_of(link, struct bpf_raw_tp_link, link);
4145 struct bpf_raw_event_map *btp = raw_tp->btp;
4146
4147 err = bpf_task_fd_query_copy(attr, uattr,
4148 raw_tp->link.prog->aux->id,
4149 BPF_FD_TYPE_RAW_TRACEPOINT,
4150 btp->tp->name, 0, 0);
4151 goto put_file;
4152 }
4153 goto out_not_supp;
41bdc4b4
YS
4154 }
4155
4156 event = perf_get_event(file);
4157 if (!IS_ERR(event)) {
4158 u64 probe_offset, probe_addr;
4159 u32 prog_id, fd_type;
4160 const char *buf;
4161
4162 err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
4163 &buf, &probe_offset,
4164 &probe_addr);
4165 if (!err)
4166 err = bpf_task_fd_query_copy(attr, uattr, prog_id,
4167 fd_type, buf,
4168 probe_offset,
4169 probe_addr);
4170 goto put_file;
4171 }
4172
70ed506c 4173out_not_supp:
41bdc4b4
YS
4174 err = -ENOTSUPP;
4175put_file:
4176 fput(file);
41bdc4b4
YS
4177 return err;
4178}
4179
cb4d03ab
BV
4180#define BPF_MAP_BATCH_LAST_FIELD batch.flags
4181
4182#define BPF_DO_BATCH(fn) \
4183 do { \
4184 if (!fn) { \
4185 err = -ENOTSUPP; \
4186 goto err_put; \
4187 } \
4188 err = fn(map, attr, uattr); \
4189 } while (0)
4190
4191static int bpf_map_do_batch(const union bpf_attr *attr,
4192 union bpf_attr __user *uattr,
4193 int cmd)
4194{
353050be
DB
4195 bool has_read = cmd == BPF_MAP_LOOKUP_BATCH ||
4196 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
4197 bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
cb4d03ab
BV
4198 struct bpf_map *map;
4199 int err, ufd;
4200 struct fd f;
4201
4202 if (CHECK_ATTR(BPF_MAP_BATCH))
4203 return -EINVAL;
4204
4205 ufd = attr->batch.map_fd;
4206 f = fdget(ufd);
4207 map = __bpf_map_get(f);
4208 if (IS_ERR(map))
4209 return PTR_ERR(map);
353050be
DB
4210 if (has_write)
4211 bpf_map_write_active_inc(map);
4212 if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
cb4d03ab
BV
4213 err = -EPERM;
4214 goto err_put;
4215 }
353050be 4216 if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
cb4d03ab
BV
4217 err = -EPERM;
4218 goto err_put;
4219 }
4220
4221 if (cmd == BPF_MAP_LOOKUP_BATCH)
4222 BPF_DO_BATCH(map->ops->map_lookup_batch);
05799638
YS
4223 else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH)
4224 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch);
aa2e93b8
BV
4225 else if (cmd == BPF_MAP_UPDATE_BATCH)
4226 BPF_DO_BATCH(map->ops->map_update_batch);
4227 else
4228 BPF_DO_BATCH(map->ops->map_delete_batch);
cb4d03ab 4229err_put:
353050be
DB
4230 if (has_write)
4231 bpf_map_write_active_dec(map);
cb4d03ab
BV
4232 fdput(f);
4233 return err;
4234}
4235
af2ac3e1
AS
4236static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
4237 struct bpf_prog *prog)
de4e05ca 4238{
4a1e7c0c
THJ
4239 if (attr->link_create.attach_type != prog->expected_attach_type)
4240 return -EINVAL;
de4e05ca 4241
4a1e7c0c 4242 if (prog->expected_attach_type == BPF_TRACE_ITER)
af2ac3e1 4243 return bpf_iter_link_attach(attr, uattr, prog);
4a1e7c0c
THJ
4244 else if (prog->type == BPF_PROG_TYPE_EXT)
4245 return bpf_tracing_prog_attach(prog,
4246 attr->link_create.target_fd,
4247 attr->link_create.target_btf_id);
de4e05ca
YS
4248 return -EINVAL;
4249}
4250
5e7b3020 4251#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
af2ac3e1 4252static int link_create(union bpf_attr *attr, bpfptr_t uattr)
af6eea57
AN
4253{
4254 enum bpf_prog_type ptype;
4255 struct bpf_prog *prog;
4256 int ret;
4257
af6eea57
AN
4258 if (CHECK_ATTR(BPF_LINK_CREATE))
4259 return -EINVAL;
4260
4a1e7c0c 4261 prog = bpf_prog_get(attr->link_create.prog_fd);
af6eea57
AN
4262 if (IS_ERR(prog))
4263 return PTR_ERR(prog);
4264
4265 ret = bpf_prog_attach_check_attach_type(prog,
4266 attr->link_create.attach_type);
4267 if (ret)
4a1e7c0c
THJ
4268 goto out;
4269
b89fbfbb
AN
4270 switch (prog->type) {
4271 case BPF_PROG_TYPE_EXT:
af2ac3e1 4272 ret = tracing_bpf_link_attach(attr, uattr, prog);
4a1e7c0c 4273 goto out;
b89fbfbb
AN
4274 case BPF_PROG_TYPE_PERF_EVENT:
4275 case BPF_PROG_TYPE_KPROBE:
4276 case BPF_PROG_TYPE_TRACEPOINT:
4277 if (attr->link_create.attach_type != BPF_PERF_EVENT) {
4278 ret = -EINVAL;
4279 goto out;
4280 }
4281 ptype = prog->type;
4282 break;
4283 default:
4284 ptype = attach_type_to_prog_type(attr->link_create.attach_type);
4285 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
4286 ret = -EINVAL;
4287 goto out;
4288 }
4289 break;
4a1e7c0c 4290 }
af6eea57
AN
4291
4292 switch (ptype) {
4293 case BPF_PROG_TYPE_CGROUP_SKB:
4294 case BPF_PROG_TYPE_CGROUP_SOCK:
4295 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
4296 case BPF_PROG_TYPE_SOCK_OPS:
4297 case BPF_PROG_TYPE_CGROUP_DEVICE:
4298 case BPF_PROG_TYPE_CGROUP_SYSCTL:
4299 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4300 ret = cgroup_bpf_link_attach(attr, prog);
4301 break;
de4e05ca 4302 case BPF_PROG_TYPE_TRACING:
af2ac3e1 4303 ret = tracing_bpf_link_attach(attr, uattr, prog);
de4e05ca 4304 break;
7f045a49 4305 case BPF_PROG_TYPE_FLOW_DISSECTOR:
e9ddbb77 4306 case BPF_PROG_TYPE_SK_LOOKUP:
7f045a49
JS
4307 ret = netns_bpf_link_create(attr, prog);
4308 break;
310ad797 4309#ifdef CONFIG_NET
aa8d3a71
AN
4310 case BPF_PROG_TYPE_XDP:
4311 ret = bpf_xdp_link_attach(attr, prog);
4312 break;
b89fbfbb
AN
4313#endif
4314#ifdef CONFIG_PERF_EVENTS
4315 case BPF_PROG_TYPE_PERF_EVENT:
4316 case BPF_PROG_TYPE_TRACEPOINT:
4317 case BPF_PROG_TYPE_KPROBE:
4318 ret = bpf_perf_link_attach(attr, prog);
4319 break;
310ad797 4320#endif
af6eea57
AN
4321 default:
4322 ret = -EINVAL;
4323 }
4324
4a1e7c0c 4325out:
af6eea57
AN
4326 if (ret < 0)
4327 bpf_prog_put(prog);
4328 return ret;
4329}
4330
0c991ebc
AN
4331#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
4332
4333static int link_update(union bpf_attr *attr)
4334{
4335 struct bpf_prog *old_prog = NULL, *new_prog;
4336 struct bpf_link *link;
4337 u32 flags;
4338 int ret;
4339
0c991ebc
AN
4340 if (CHECK_ATTR(BPF_LINK_UPDATE))
4341 return -EINVAL;
4342
4343 flags = attr->link_update.flags;
4344 if (flags & ~BPF_F_REPLACE)
4345 return -EINVAL;
4346
4347 link = bpf_link_get_from_fd(attr->link_update.link_fd);
4348 if (IS_ERR(link))
4349 return PTR_ERR(link);
4350
4351 new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
4adb7a4a
AN
4352 if (IS_ERR(new_prog)) {
4353 ret = PTR_ERR(new_prog);
4354 goto out_put_link;
4355 }
0c991ebc
AN
4356
4357 if (flags & BPF_F_REPLACE) {
4358 old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
4359 if (IS_ERR(old_prog)) {
4360 ret = PTR_ERR(old_prog);
4361 old_prog = NULL;
4362 goto out_put_progs;
4363 }
4adb7a4a
AN
4364 } else if (attr->link_update.old_prog_fd) {
4365 ret = -EINVAL;
4366 goto out_put_progs;
0c991ebc
AN
4367 }
4368
f9d04127
AN
4369 if (link->ops->update_prog)
4370 ret = link->ops->update_prog(link, new_prog, old_prog);
4371 else
fe537393 4372 ret = -EINVAL;
0c991ebc
AN
4373
4374out_put_progs:
4375 if (old_prog)
4376 bpf_prog_put(old_prog);
4377 if (ret)
4378 bpf_prog_put(new_prog);
4adb7a4a
AN
4379out_put_link:
4380 bpf_link_put(link);
0c991ebc
AN
4381 return ret;
4382}
4383
73b11c2a
AN
4384#define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd
4385
4386static int link_detach(union bpf_attr *attr)
4387{
4388 struct bpf_link *link;
4389 int ret;
4390
4391 if (CHECK_ATTR(BPF_LINK_DETACH))
4392 return -EINVAL;
4393
4394 link = bpf_link_get_from_fd(attr->link_detach.link_fd);
4395 if (IS_ERR(link))
4396 return PTR_ERR(link);
4397
4398 if (link->ops->detach)
4399 ret = link->ops->detach(link);
4400 else
4401 ret = -EOPNOTSUPP;
4402
4403 bpf_link_put(link);
4404 return ret;
4405}
4406
005142b8 4407static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
2d602c8c 4408{
005142b8 4409 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT);
2d602c8c
AN
4410}
4411
005142b8 4412struct bpf_link *bpf_link_by_id(u32 id)
2d602c8c
AN
4413{
4414 struct bpf_link *link;
2d602c8c 4415
005142b8
AS
4416 if (!id)
4417 return ERR_PTR(-ENOENT);
2d602c8c
AN
4418
4419 spin_lock_bh(&link_idr_lock);
2d602c8c 4420 /* before link is "settled", ID is 0, pretend it doesn't exist yet */
005142b8 4421 link = idr_find(&link_idr, id);
2d602c8c
AN
4422 if (link) {
4423 if (link->id)
005142b8 4424 link = bpf_link_inc_not_zero(link);
2d602c8c 4425 else
005142b8 4426 link = ERR_PTR(-EAGAIN);
2d602c8c 4427 } else {
005142b8 4428 link = ERR_PTR(-ENOENT);
2d602c8c
AN
4429 }
4430 spin_unlock_bh(&link_idr_lock);
005142b8
AS
4431 return link;
4432}
2d602c8c 4433
005142b8
AS
4434#define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id
4435
4436static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
4437{
4438 struct bpf_link *link;
4439 u32 id = attr->link_id;
4440 int fd;
4441
4442 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID))
4443 return -EINVAL;
4444
4445 if (!capable(CAP_SYS_ADMIN))
4446 return -EPERM;
4447
4448 link = bpf_link_by_id(id);
4449 if (IS_ERR(link))
4450 return PTR_ERR(link);
2d602c8c
AN
4451
4452 fd = bpf_link_new_fd(link);
4453 if (fd < 0)
4454 bpf_link_put(link);
4455
4456 return fd;
4457}
4458
d46edd67
SL
4459DEFINE_MUTEX(bpf_stats_enabled_mutex);
4460
4461static int bpf_stats_release(struct inode *inode, struct file *file)
4462{
4463 mutex_lock(&bpf_stats_enabled_mutex);
4464 static_key_slow_dec(&bpf_stats_enabled_key.key);
4465 mutex_unlock(&bpf_stats_enabled_mutex);
4466 return 0;
4467}
4468
4469static const struct file_operations bpf_stats_fops = {
4470 .release = bpf_stats_release,
4471};
4472
4473static int bpf_enable_runtime_stats(void)
4474{
4475 int fd;
4476
4477 mutex_lock(&bpf_stats_enabled_mutex);
4478
4479 /* Set a very high limit to avoid overflow */
4480 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) {
4481 mutex_unlock(&bpf_stats_enabled_mutex);
4482 return -EBUSY;
4483 }
4484
4485 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
4486 if (fd >= 0)
4487 static_key_slow_inc(&bpf_stats_enabled_key.key);
4488
4489 mutex_unlock(&bpf_stats_enabled_mutex);
4490 return fd;
4491}
4492
4493#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type
4494
4495static int bpf_enable_stats(union bpf_attr *attr)
4496{
4497
4498 if (CHECK_ATTR(BPF_ENABLE_STATS))
4499 return -EINVAL;
4500
4501 if (!capable(CAP_SYS_ADMIN))
4502 return -EPERM;
4503
4504 switch (attr->enable_stats.type) {
4505 case BPF_STATS_RUN_TIME:
4506 return bpf_enable_runtime_stats();
4507 default:
4508 break;
4509 }
4510 return -EINVAL;
4511}
4512
ac51d99b
YS
4513#define BPF_ITER_CREATE_LAST_FIELD iter_create.flags
4514
4515static int bpf_iter_create(union bpf_attr *attr)
4516{
4517 struct bpf_link *link;
4518 int err;
4519
4520 if (CHECK_ATTR(BPF_ITER_CREATE))
4521 return -EINVAL;
4522
4523 if (attr->iter_create.flags)
4524 return -EINVAL;
4525
4526 link = bpf_link_get_from_fd(attr->iter_create.link_fd);
4527 if (IS_ERR(link))
4528 return PTR_ERR(link);
4529
4530 err = bpf_iter_new_fd(link);
4531 bpf_link_put(link);
4532
4533 return err;
4534}
4535
ef15314a
YZ
4536#define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags
4537
4538static int bpf_prog_bind_map(union bpf_attr *attr)
4539{
4540 struct bpf_prog *prog;
4541 struct bpf_map *map;
4542 struct bpf_map **used_maps_old, **used_maps_new;
4543 int i, ret = 0;
4544
4545 if (CHECK_ATTR(BPF_PROG_BIND_MAP))
4546 return -EINVAL;
4547
4548 if (attr->prog_bind_map.flags)
4549 return -EINVAL;
4550
4551 prog = bpf_prog_get(attr->prog_bind_map.prog_fd);
4552 if (IS_ERR(prog))
4553 return PTR_ERR(prog);
4554
4555 map = bpf_map_get(attr->prog_bind_map.map_fd);
4556 if (IS_ERR(map)) {
4557 ret = PTR_ERR(map);
4558 goto out_prog_put;
4559 }
4560
4561 mutex_lock(&prog->aux->used_maps_mutex);
4562
4563 used_maps_old = prog->aux->used_maps;
4564
4565 for (i = 0; i < prog->aux->used_map_cnt; i++)
1028ae40
SF
4566 if (used_maps_old[i] == map) {
4567 bpf_map_put(map);
ef15314a 4568 goto out_unlock;
1028ae40 4569 }
ef15314a
YZ
4570
4571 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1,
4572 sizeof(used_maps_new[0]),
4573 GFP_KERNEL);
4574 if (!used_maps_new) {
4575 ret = -ENOMEM;
4576 goto out_unlock;
4577 }
4578
4579 memcpy(used_maps_new, used_maps_old,
4580 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
4581 used_maps_new[prog->aux->used_map_cnt] = map;
4582
4583 prog->aux->used_map_cnt++;
4584 prog->aux->used_maps = used_maps_new;
4585
4586 kfree(used_maps_old);
4587
4588out_unlock:
4589 mutex_unlock(&prog->aux->used_maps_mutex);
4590
4591 if (ret)
4592 bpf_map_put(map);
4593out_prog_put:
4594 bpf_prog_put(prog);
4595 return ret;
4596}
4597
af2ac3e1 4598static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
99c55f7d 4599{
8096f229 4600 union bpf_attr attr;
99c55f7d
AS
4601 int err;
4602
2c78ee89 4603 if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
99c55f7d
AS
4604 return -EPERM;
4605
dcab51f1 4606 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
1e270976
MKL
4607 if (err)
4608 return err;
4609 size = min_t(u32, size, sizeof(attr));
99c55f7d
AS
4610
4611 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
8096f229 4612 memset(&attr, 0, sizeof(attr));
af2ac3e1 4613 if (copy_from_bpfptr(&attr, uattr, size) != 0)
99c55f7d
AS
4614 return -EFAULT;
4615
afdb09c7
CF
4616 err = security_bpf(cmd, &attr, size);
4617 if (err < 0)
4618 return err;
4619
99c55f7d
AS
4620 switch (cmd) {
4621 case BPF_MAP_CREATE:
4622 err = map_create(&attr);
4623 break;
db20fd2b
AS
4624 case BPF_MAP_LOOKUP_ELEM:
4625 err = map_lookup_elem(&attr);
4626 break;
4627 case BPF_MAP_UPDATE_ELEM:
af2ac3e1 4628 err = map_update_elem(&attr, uattr);
db20fd2b
AS
4629 break;
4630 case BPF_MAP_DELETE_ELEM:
4631 err = map_delete_elem(&attr);
4632 break;
4633 case BPF_MAP_GET_NEXT_KEY:
4634 err = map_get_next_key(&attr);
4635 break;
87df15de
DB
4636 case BPF_MAP_FREEZE:
4637 err = map_freeze(&attr);
4638 break;
09756af4 4639 case BPF_PROG_LOAD:
838e9690 4640 err = bpf_prog_load(&attr, uattr);
09756af4 4641 break;
b2197755
DB
4642 case BPF_OBJ_PIN:
4643 err = bpf_obj_pin(&attr);
4644 break;
4645 case BPF_OBJ_GET:
4646 err = bpf_obj_get(&attr);
4647 break;
f4324551
DM
4648 case BPF_PROG_ATTACH:
4649 err = bpf_prog_attach(&attr);
4650 break;
4651 case BPF_PROG_DETACH:
4652 err = bpf_prog_detach(&attr);
4653 break;
468e2f64 4654 case BPF_PROG_QUERY:
af2ac3e1 4655 err = bpf_prog_query(&attr, uattr.user);
468e2f64 4656 break;
1cf1cae9 4657 case BPF_PROG_TEST_RUN:
af2ac3e1 4658 err = bpf_prog_test_run(&attr, uattr.user);
1cf1cae9 4659 break;
34ad5580 4660 case BPF_PROG_GET_NEXT_ID:
af2ac3e1 4661 err = bpf_obj_get_next_id(&attr, uattr.user,
34ad5580
MKL
4662 &prog_idr, &prog_idr_lock);
4663 break;
4664 case BPF_MAP_GET_NEXT_ID:
af2ac3e1 4665 err = bpf_obj_get_next_id(&attr, uattr.user,
34ad5580
MKL
4666 &map_idr, &map_idr_lock);
4667 break;
1b9ed84e 4668 case BPF_BTF_GET_NEXT_ID:
af2ac3e1 4669 err = bpf_obj_get_next_id(&attr, uattr.user,
1b9ed84e
QM
4670 &btf_idr, &btf_idr_lock);
4671 break;
b16d9aa4
MKL
4672 case BPF_PROG_GET_FD_BY_ID:
4673 err = bpf_prog_get_fd_by_id(&attr);
4674 break;
bd5f5f4e
MKL
4675 case BPF_MAP_GET_FD_BY_ID:
4676 err = bpf_map_get_fd_by_id(&attr);
4677 break;
1e270976 4678 case BPF_OBJ_GET_INFO_BY_FD:
af2ac3e1 4679 err = bpf_obj_get_info_by_fd(&attr, uattr.user);
1e270976 4680 break;
c4f6699d
AS
4681 case BPF_RAW_TRACEPOINT_OPEN:
4682 err = bpf_raw_tracepoint_open(&attr);
4683 break;
f56a653c 4684 case BPF_BTF_LOAD:
c571bd75 4685 err = bpf_btf_load(&attr, uattr);
f56a653c 4686 break;
78958fca
MKL
4687 case BPF_BTF_GET_FD_BY_ID:
4688 err = bpf_btf_get_fd_by_id(&attr);
4689 break;
41bdc4b4 4690 case BPF_TASK_FD_QUERY:
af2ac3e1 4691 err = bpf_task_fd_query(&attr, uattr.user);
41bdc4b4 4692 break;
bd513cd0
MV
4693 case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
4694 err = map_lookup_and_delete_elem(&attr);
4695 break;
cb4d03ab 4696 case BPF_MAP_LOOKUP_BATCH:
af2ac3e1 4697 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH);
cb4d03ab 4698 break;
05799638 4699 case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
af2ac3e1 4700 err = bpf_map_do_batch(&attr, uattr.user,
05799638
YS
4701 BPF_MAP_LOOKUP_AND_DELETE_BATCH);
4702 break;
aa2e93b8 4703 case BPF_MAP_UPDATE_BATCH:
af2ac3e1 4704 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH);
aa2e93b8
BV
4705 break;
4706 case BPF_MAP_DELETE_BATCH:
af2ac3e1 4707 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH);
aa2e93b8 4708 break;
af6eea57 4709 case BPF_LINK_CREATE:
af2ac3e1 4710 err = link_create(&attr, uattr);
af6eea57 4711 break;
0c991ebc
AN
4712 case BPF_LINK_UPDATE:
4713 err = link_update(&attr);
4714 break;
2d602c8c
AN
4715 case BPF_LINK_GET_FD_BY_ID:
4716 err = bpf_link_get_fd_by_id(&attr);
4717 break;
4718 case BPF_LINK_GET_NEXT_ID:
af2ac3e1 4719 err = bpf_obj_get_next_id(&attr, uattr.user,
2d602c8c
AN
4720 &link_idr, &link_idr_lock);
4721 break;
d46edd67
SL
4722 case BPF_ENABLE_STATS:
4723 err = bpf_enable_stats(&attr);
4724 break;
ac51d99b
YS
4725 case BPF_ITER_CREATE:
4726 err = bpf_iter_create(&attr);
4727 break;
73b11c2a
AN
4728 case BPF_LINK_DETACH:
4729 err = link_detach(&attr);
4730 break;
ef15314a
YZ
4731 case BPF_PROG_BIND_MAP:
4732 err = bpf_prog_bind_map(&attr);
4733 break;
99c55f7d
AS
4734 default:
4735 err = -EINVAL;
4736 break;
4737 }
4738
4739 return err;
4740}
79a7f8bd 4741
af2ac3e1
AS
4742SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
4743{
4744 return __sys_bpf(cmd, USER_BPFPTR(uattr), size);
4745}
4746
79a7f8bd
AS
4747static bool syscall_prog_is_valid_access(int off, int size,
4748 enum bpf_access_type type,
4749 const struct bpf_prog *prog,
4750 struct bpf_insn_access_aux *info)
4751{
4752 if (off < 0 || off >= U16_MAX)
4753 return false;
4754 if (off % size != 0)
4755 return false;
4756 return true;
4757}
4758
4759BPF_CALL_3(bpf_sys_bpf, int, cmd, void *, attr, u32, attr_size)
4760{
af2ac3e1
AS
4761 switch (cmd) {
4762 case BPF_MAP_CREATE:
4763 case BPF_MAP_UPDATE_ELEM:
4764 case BPF_MAP_FREEZE:
4765 case BPF_PROG_LOAD:
c571bd75 4766 case BPF_BTF_LOAD:
af2ac3e1
AS
4767 break;
4768 /* case BPF_PROG_TEST_RUN:
4769 * is not part of this list to prevent recursive test_run
4770 */
4771 default:
4772 return -EINVAL;
4773 }
4774 return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
79a7f8bd
AS
4775}
4776
3a2daa72 4777static const struct bpf_func_proto bpf_sys_bpf_proto = {
79a7f8bd
AS
4778 .func = bpf_sys_bpf,
4779 .gpl_only = false,
4780 .ret_type = RET_INTEGER,
4781 .arg1_type = ARG_ANYTHING,
216e3cd2 4782 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
79a7f8bd
AS
4783 .arg3_type = ARG_CONST_SIZE,
4784};
4785
4786const struct bpf_func_proto * __weak
4787tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4788{
4789 return bpf_base_func_proto(func_id);
4790}
4791
3abea089
AS
4792BPF_CALL_1(bpf_sys_close, u32, fd)
4793{
4794 /* When bpf program calls this helper there should not be
4795 * an fdget() without matching completed fdput().
4796 * This helper is allowed in the following callchain only:
4797 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close
4798 */
4799 return close_fd(fd);
4800}
4801
3a2daa72 4802static const struct bpf_func_proto bpf_sys_close_proto = {
3abea089
AS
4803 .func = bpf_sys_close,
4804 .gpl_only = false,
4805 .ret_type = RET_INTEGER,
4806 .arg1_type = ARG_ANYTHING,
4807};
4808
d6aef08a
KKD
4809BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res)
4810{
4811 if (flags)
4812 return -EINVAL;
4813
4814 if (name_sz <= 1 || name[name_sz - 1])
4815 return -EINVAL;
4816
4817 if (!bpf_dump_raw_ok(current_cred()))
4818 return -EPERM;
4819
4820 *res = kallsyms_lookup_name(name);
4821 return *res ? 0 : -ENOENT;
4822}
4823
4824const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
4825 .func = bpf_kallsyms_lookup_name,
4826 .gpl_only = false,
4827 .ret_type = RET_INTEGER,
4828 .arg1_type = ARG_PTR_TO_MEM,
d4efb170 4829 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
d6aef08a
KKD
4830 .arg3_type = ARG_ANYTHING,
4831 .arg4_type = ARG_PTR_TO_LONG,
4832};
4833
79a7f8bd
AS
4834static const struct bpf_func_proto *
4835syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4836{
4837 switch (func_id) {
4838 case BPF_FUNC_sys_bpf:
4839 return &bpf_sys_bpf_proto;
3d78417b
AS
4840 case BPF_FUNC_btf_find_by_name_kind:
4841 return &bpf_btf_find_by_name_kind_proto;
3abea089
AS
4842 case BPF_FUNC_sys_close:
4843 return &bpf_sys_close_proto;
d6aef08a
KKD
4844 case BPF_FUNC_kallsyms_lookup_name:
4845 return &bpf_kallsyms_lookup_name_proto;
79a7f8bd
AS
4846 default:
4847 return tracing_prog_func_proto(func_id, prog);
4848 }
4849}
4850
4851const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
4852 .get_func_proto = syscall_prog_func_proto,
4853 .is_valid_access = syscall_prog_is_valid_access,
4854};
4855
4856const struct bpf_prog_ops bpf_syscall_prog_ops = {
4857 .test_run = bpf_prog_test_run_syscall,
4858};