bpf: Switch BPF_F_KPROBE_MULTI_RETURN macro to enum
[linux-block.git] / kernel / bpf / syscall.c
CommitLineData
5b497af4 1// SPDX-License-Identifier: GPL-2.0-only
99c55f7d 2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
99c55f7d
AS
3 */
4#include <linux/bpf.h>
aef2feda 5#include <linux/bpf-cgroup.h>
a67edbf4 6#include <linux/bpf_trace.h>
f4364dcf 7#include <linux/bpf_lirc.h>
4a1e7c0c 8#include <linux/bpf_verifier.h>
61df10c7 9#include <linux/bsearch.h>
f56a653c 10#include <linux/btf.h>
99c55f7d
AS
11#include <linux/syscalls.h>
12#include <linux/slab.h>
3f07c014 13#include <linux/sched/signal.h>
d407bd25
DB
14#include <linux/vmalloc.h>
15#include <linux/mmzone.h>
99c55f7d 16#include <linux/anon_inodes.h>
41bdc4b4 17#include <linux/fdtable.h>
db20fd2b 18#include <linux/file.h>
41bdc4b4 19#include <linux/fs.h>
09756af4
AS
20#include <linux/license.h>
21#include <linux/filter.h>
535e7b4b 22#include <linux/kernel.h>
dc4bb0e2 23#include <linux/idr.h>
cb4d2b3f
MKL
24#include <linux/cred.h>
25#include <linux/timekeeping.h>
26#include <linux/ctype.h>
9ef09e35 27#include <linux/nospec.h>
bae141f5 28#include <linux/audit.h>
ccfe29eb 29#include <uapi/linux/btf.h>
ca5999fd 30#include <linux/pgtable.h>
9e4e01df 31#include <linux/bpf_lsm.h>
457f4436 32#include <linux/poll.h>
4d7d7f69 33#include <linux/sort.h>
a3fd7cee 34#include <linux/bpf-netns.h>
1e6c62a8 35#include <linux/rcupdate_trace.h>
48edc1f7 36#include <linux/memcontrol.h>
0dcac272 37#include <linux/trace_events.h>
84601d6e 38#include <net/netfilter/nf_bpf_link.h>
99c55f7d 39
e420bed0
DB
40#include <net/tcx.h>
41
da765a2f
DB
42#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
43 (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
44 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
45#define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY)
14dc6f04 46#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
da765a2f
DB
47#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \
48 IS_FD_HASH(map))
14dc6f04 49
6e71b04a
CF
50#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY)
51
b121d1e7 52DEFINE_PER_CPU(int, bpf_prog_active);
dc4bb0e2
MKL
53static DEFINE_IDR(prog_idr);
54static DEFINE_SPINLOCK(prog_idr_lock);
f3f1c054
MKL
55static DEFINE_IDR(map_idr);
56static DEFINE_SPINLOCK(map_idr_lock);
a3b80e10
AN
57static DEFINE_IDR(link_idr);
58static DEFINE_SPINLOCK(link_idr_lock);
b121d1e7 59
08389d88
DB
60int sysctl_unprivileged_bpf_disabled __read_mostly =
61 IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
1be7f75d 62
40077e0c 63static const struct bpf_map_ops * const bpf_map_types[] = {
91cc1a99 64#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
40077e0c
JB
65#define BPF_MAP_TYPE(_id, _ops) \
66 [_id] = &_ops,
f2e10bff 67#define BPF_LINK_TYPE(_id, _name)
40077e0c
JB
68#include <linux/bpf_types.h>
69#undef BPF_PROG_TYPE
70#undef BPF_MAP_TYPE
f2e10bff 71#undef BPF_LINK_TYPE
40077e0c 72};
99c55f7d 73
752ba56f
MS
74/*
75 * If we're handed a bigger struct than we know of, ensure all the unknown bits
76 * are 0 - i.e. new user-space does not rely on any kernel feature extensions
77 * we don't know about yet.
78 *
79 * There is a ToCToU between this function call and the following
80 * copy_from_user() call. However, this is not a concern since this function is
81 * meant to be a future-proofing of bits.
82 */
af2ac3e1 83int bpf_check_uarg_tail_zero(bpfptr_t uaddr,
dcab51f1
MKL
84 size_t expected_size,
85 size_t actual_size)
58291a74 86{
b7e4b65f 87 int res;
58291a74 88
752ba56f
MS
89 if (unlikely(actual_size > PAGE_SIZE)) /* silly large */
90 return -E2BIG;
91
58291a74
MS
92 if (actual_size <= expected_size)
93 return 0;
94
af2ac3e1
AS
95 if (uaddr.is_kernel)
96 res = memchr_inv(uaddr.kernel + expected_size, 0,
97 actual_size - expected_size) == NULL;
98 else
99 res = check_zeroed_user(uaddr.user + expected_size,
100 actual_size - expected_size);
b7e4b65f
AV
101 if (res < 0)
102 return res;
103 return res ? 0 : -E2BIG;
58291a74
MS
104}
105
a3884572 106const struct bpf_map_ops bpf_map_offload_ops = {
f4d05259 107 .map_meta_equal = bpf_map_meta_equal,
a3884572
JK
108 .map_alloc = bpf_map_offload_map_alloc,
109 .map_free = bpf_map_offload_map_free,
e8d2bec0 110 .map_check_btf = map_check_no_btf,
9629363c 111 .map_mem_usage = bpf_map_offload_map_mem_usage,
a3884572
JK
112};
113
353050be
DB
114static void bpf_map_write_active_inc(struct bpf_map *map)
115{
116 atomic64_inc(&map->writecnt);
117}
118
119static void bpf_map_write_active_dec(struct bpf_map *map)
120{
121 atomic64_dec(&map->writecnt);
122}
123
124bool bpf_map_write_active(const struct bpf_map *map)
125{
126 return atomic64_read(&map->writecnt) != 0;
127}
128
80ee81e0 129static u32 bpf_map_value_size(const struct bpf_map *map)
15c14a3d
BV
130{
131 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
132 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
133 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
134 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
135 return round_up(map->value_size, 8) * num_possible_cpus();
136 else if (IS_FD_MAP(map))
137 return sizeof(u32);
138 else
139 return map->value_size;
140}
141
142static void maybe_wait_bpf_programs(struct bpf_map *map)
143{
144 /* Wait for any running BPF programs to complete so that
145 * userspace, when we return to it, knows that all programs
146 * that could be running use the new map value.
147 */
148 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
149 map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
150 synchronize_rcu();
151}
152
3af43ba4
HT
153static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
154 void *key, void *value, __u64 flags)
15c14a3d
BV
155{
156 int err;
157
158 /* Need to create a kthread, thus must support schedule */
9d03ebc7 159 if (bpf_map_is_offloaded(map)) {
15c14a3d
BV
160 return bpf_map_offload_update_elem(map, key, value, flags);
161 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
15c14a3d
BV
162 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
163 return map->ops->map_update_elem(map, key, value, flags);
13b79d3f
LB
164 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH ||
165 map->map_type == BPF_MAP_TYPE_SOCKMAP) {
166 return sock_map_update_elem_sys(map, key, value, flags);
15c14a3d 167 } else if (IS_FD_PROG_ARRAY(map)) {
3af43ba4 168 return bpf_fd_array_map_update_elem(map, map_file, key, value,
15c14a3d
BV
169 flags);
170 }
171
b6e5dae1 172 bpf_disable_instrumentation();
15c14a3d
BV
173 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
174 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
175 err = bpf_percpu_hash_update(map, key, value, flags);
176 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
177 err = bpf_percpu_array_update(map, key, value, flags);
178 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
179 err = bpf_percpu_cgroup_storage_update(map, key, value,
180 flags);
181 } else if (IS_FD_ARRAY(map)) {
182 rcu_read_lock();
3af43ba4 183 err = bpf_fd_array_map_update_elem(map, map_file, key, value,
15c14a3d
BV
184 flags);
185 rcu_read_unlock();
186 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
187 rcu_read_lock();
3af43ba4 188 err = bpf_fd_htab_map_update_elem(map, map_file, key, value,
15c14a3d
BV
189 flags);
190 rcu_read_unlock();
191 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
192 /* rcu_read_lock() is not needed */
193 err = bpf_fd_reuseport_array_update_elem(map, key, value,
194 flags);
195 } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
9330986c
JK
196 map->map_type == BPF_MAP_TYPE_STACK ||
197 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
15c14a3d
BV
198 err = map->ops->map_push_elem(map, value, flags);
199 } else {
200 rcu_read_lock();
201 err = map->ops->map_update_elem(map, key, value, flags);
202 rcu_read_unlock();
203 }
b6e5dae1 204 bpf_enable_instrumentation();
15c14a3d
BV
205 maybe_wait_bpf_programs(map);
206
207 return err;
208}
209
210static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
211 __u64 flags)
212{
213 void *ptr;
214 int err;
215
9d03ebc7 216 if (bpf_map_is_offloaded(map))
cb4d03ab 217 return bpf_map_offload_lookup_elem(map, key, value);
15c14a3d 218
b6e5dae1 219 bpf_disable_instrumentation();
15c14a3d
BV
220 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
221 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
222 err = bpf_percpu_hash_copy(map, key, value);
223 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
224 err = bpf_percpu_array_copy(map, key, value);
225 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
226 err = bpf_percpu_cgroup_storage_copy(map, key, value);
227 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
228 err = bpf_stackmap_copy(map, key, value);
229 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
230 err = bpf_fd_array_map_lookup_elem(map, key, value);
231 } else if (IS_FD_HASH(map)) {
232 err = bpf_fd_htab_map_lookup_elem(map, key, value);
233 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
234 err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
235 } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
9330986c
JK
236 map->map_type == BPF_MAP_TYPE_STACK ||
237 map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
15c14a3d
BV
238 err = map->ops->map_peek_elem(map, value);
239 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
240 /* struct_ops map requires directly updating "value" */
241 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
242 } else {
243 rcu_read_lock();
244 if (map->ops->map_lookup_elem_sys_only)
245 ptr = map->ops->map_lookup_elem_sys_only(map, key);
246 else
247 ptr = map->ops->map_lookup_elem(map, key);
248 if (IS_ERR(ptr)) {
249 err = PTR_ERR(ptr);
250 } else if (!ptr) {
251 err = -ENOENT;
252 } else {
253 err = 0;
254 if (flags & BPF_F_LOCK)
255 /* lock 'ptr' and copy everything but lock */
256 copy_map_value_locked(map, value, ptr, true);
257 else
258 copy_map_value(map, value, ptr);
68134668
AS
259 /* mask lock and timer, since value wasn't zero inited */
260 check_and_init_map_value(map, value);
15c14a3d
BV
261 }
262 rcu_read_unlock();
263 }
264
b6e5dae1 265 bpf_enable_instrumentation();
15c14a3d
BV
266 maybe_wait_bpf_programs(map);
267
268 return err;
269}
270
d5299b67
RG
271/* Please, do not use this function outside from the map creation path
272 * (e.g. in map update path) without taking care of setting the active
273 * memory cgroup (see at bpf_map_kmalloc_node() for example).
274 */
196e8ca7 275static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
d407bd25 276{
f01a7dbe
MP
277 /* We really just want to fail instead of triggering OOM killer
278 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
279 * which is used for lower order allocation requests.
280 *
281 * It has been observed that higher order allocation requests done by
282 * vmalloc with __GFP_NORETRY being set might fail due to not trying
283 * to reclaim memory from the page cache, thus we set
284 * __GFP_RETRY_MAYFAIL to avoid such situations.
d407bd25 285 */
f01a7dbe 286
ee53cbfb 287 gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO);
041de93f
CH
288 unsigned int flags = 0;
289 unsigned long align = 1;
d407bd25
DB
290 void *area;
291
196e8ca7
DB
292 if (size >= SIZE_MAX)
293 return NULL;
294
fc970227 295 /* kmalloc()'ed memory can't be mmap()'ed */
041de93f
CH
296 if (mmapable) {
297 BUG_ON(!PAGE_ALIGNED(size));
298 align = SHMLBA;
299 flags = VM_USERMAP;
300 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
301 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY,
f01a7dbe 302 numa_node);
d407bd25
DB
303 if (area != NULL)
304 return area;
305 }
041de93f
CH
306
307 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
308 gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL,
309 flags, numa_node, __builtin_return_address(0));
d407bd25
DB
310}
311
196e8ca7 312void *bpf_map_area_alloc(u64 size, int numa_node)
fc970227
AN
313{
314 return __bpf_map_area_alloc(size, numa_node, false);
315}
316
196e8ca7 317void *bpf_map_area_mmapable_alloc(u64 size, int numa_node)
fc970227
AN
318{
319 return __bpf_map_area_alloc(size, numa_node, true);
320}
321
d407bd25
DB
322void bpf_map_area_free(void *area)
323{
324 kvfree(area);
325}
326
be70bcd5
DB
327static u32 bpf_map_flags_retain_permanent(u32 flags)
328{
329 /* Some map creation flags are not tied to the map object but
330 * rather to the map fd instead, so they have no meaning upon
331 * map object inspection since multiple file descriptors with
332 * different (access) properties can exist here. Thus, given
333 * this has zero meaning for the map itself, lets clear these
334 * from here.
335 */
336 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY);
337}
338
bd475643
JK
339void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
340{
341 map->map_type = attr->map_type;
342 map->key_size = attr->key_size;
343 map->value_size = attr->value_size;
344 map->max_entries = attr->max_entries;
be70bcd5 345 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
bd475643 346 map->numa_node = bpf_map_attr_numa_node(attr);
9330986c 347 map->map_extra = attr->map_extra;
bd475643
JK
348}
349
f3f1c054
MKL
350static int bpf_map_alloc_id(struct bpf_map *map)
351{
352 int id;
353
b76354cd 354 idr_preload(GFP_KERNEL);
f3f1c054
MKL
355 spin_lock_bh(&map_idr_lock);
356 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
357 if (id > 0)
358 map->id = id;
359 spin_unlock_bh(&map_idr_lock);
b76354cd 360 idr_preload_end();
f3f1c054
MKL
361
362 if (WARN_ON_ONCE(!id))
363 return -ENOSPC;
364
365 return id > 0 ? 0 : id;
366}
367
158e5e9e 368void bpf_map_free_id(struct bpf_map *map)
f3f1c054 369{
930651a7
ED
370 unsigned long flags;
371
a3884572
JK
372 /* Offloaded maps are removed from the IDR store when their device
373 * disappears - even if someone holds an fd to them they are unusable,
374 * the memory is gone, all ops will fail; they are simply waiting for
375 * refcnt to drop to be freed.
376 */
377 if (!map->id)
378 return;
379
158e5e9e 380 spin_lock_irqsave(&map_idr_lock, flags);
bd5f5f4e 381
f3f1c054 382 idr_remove(&map_idr, map->id);
a3884572 383 map->id = 0;
bd5f5f4e 384
158e5e9e 385 spin_unlock_irqrestore(&map_idr_lock, flags);
f3f1c054
MKL
386}
387
48edc1f7
RG
388#ifdef CONFIG_MEMCG_KMEM
389static void bpf_map_save_memcg(struct bpf_map *map)
390{
4201d9ab
RG
391 /* Currently if a map is created by a process belonging to the root
392 * memory cgroup, get_obj_cgroup_from_current() will return NULL.
393 * So we have to check map->objcg for being NULL each time it's
394 * being used.
395 */
ee53cbfb
YS
396 if (memcg_bpf_enabled())
397 map->objcg = get_obj_cgroup_from_current();
48edc1f7
RG
398}
399
400static void bpf_map_release_memcg(struct bpf_map *map)
401{
4201d9ab
RG
402 if (map->objcg)
403 obj_cgroup_put(map->objcg);
404}
405
406static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map)
407{
408 if (map->objcg)
409 return get_mem_cgroup_from_objcg(map->objcg);
410
411 return root_mem_cgroup;
48edc1f7
RG
412}
413
414void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
415 int node)
416{
4201d9ab 417 struct mem_cgroup *memcg, *old_memcg;
48edc1f7
RG
418 void *ptr;
419
4201d9ab
RG
420 memcg = bpf_map_get_memcg(map);
421 old_memcg = set_active_memcg(memcg);
48edc1f7
RG
422 ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
423 set_active_memcg(old_memcg);
4201d9ab 424 mem_cgroup_put(memcg);
48edc1f7
RG
425
426 return ptr;
427}
428
429void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
430{
4201d9ab 431 struct mem_cgroup *memcg, *old_memcg;
48edc1f7
RG
432 void *ptr;
433
4201d9ab
RG
434 memcg = bpf_map_get_memcg(map);
435 old_memcg = set_active_memcg(memcg);
48edc1f7
RG
436 ptr = kzalloc(size, flags | __GFP_ACCOUNT);
437 set_active_memcg(old_memcg);
4201d9ab 438 mem_cgroup_put(memcg);
48edc1f7
RG
439
440 return ptr;
441}
442
ddef81b5
YS
443void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
444 gfp_t flags)
445{
446 struct mem_cgroup *memcg, *old_memcg;
447 void *ptr;
448
449 memcg = bpf_map_get_memcg(map);
450 old_memcg = set_active_memcg(memcg);
451 ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT);
452 set_active_memcg(old_memcg);
453 mem_cgroup_put(memcg);
454
455 return ptr;
456}
457
48edc1f7
RG
458void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
459 size_t align, gfp_t flags)
460{
4201d9ab 461 struct mem_cgroup *memcg, *old_memcg;
48edc1f7
RG
462 void __percpu *ptr;
463
4201d9ab
RG
464 memcg = bpf_map_get_memcg(map);
465 old_memcg = set_active_memcg(memcg);
48edc1f7
RG
466 ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);
467 set_active_memcg(old_memcg);
4201d9ab 468 mem_cgroup_put(memcg);
48edc1f7
RG
469
470 return ptr;
471}
472
473#else
474static void bpf_map_save_memcg(struct bpf_map *map)
475{
476}
477
478static void bpf_map_release_memcg(struct bpf_map *map)
479{
480}
481#endif
482
aa3496ac 483static int btf_field_cmp(const void *a, const void *b)
61df10c7 484{
aa3496ac 485 const struct btf_field *f1 = a, *f2 = b;
61df10c7 486
aa3496ac 487 if (f1->offset < f2->offset)
61df10c7 488 return -1;
aa3496ac 489 else if (f1->offset > f2->offset)
61df10c7
KKD
490 return 1;
491 return 0;
492}
493
aa3496ac 494struct btf_field *btf_record_find(const struct btf_record *rec, u32 offset,
74843b57 495 u32 field_mask)
61df10c7 496{
aa3496ac 497 struct btf_field *field;
61df10c7 498
74843b57 499 if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & field_mask))
aa3496ac
KKD
500 return NULL;
501 field = bsearch(&offset, rec->fields, rec->cnt, sizeof(rec->fields[0]), btf_field_cmp);
74843b57 502 if (!field || !(field->type & field_mask))
61df10c7 503 return NULL;
aa3496ac 504 return field;
61df10c7
KKD
505}
506
aa3496ac 507void btf_record_free(struct btf_record *rec)
61df10c7 508{
61df10c7
KKD
509 int i;
510
aa3496ac 511 if (IS_ERR_OR_NULL(rec))
61df10c7 512 return;
aa3496ac
KKD
513 for (i = 0; i < rec->cnt; i++) {
514 switch (rec->fields[i].type) {
515 case BPF_KPTR_UNREF:
516 case BPF_KPTR_REF:
517 if (rec->fields[i].kptr.module)
518 module_put(rec->fields[i].kptr.module);
519 btf_put(rec->fields[i].kptr.btf);
520 break;
f0c5941f 521 case BPF_LIST_HEAD:
8ffa5cc1 522 case BPF_LIST_NODE:
9c395c1b
DM
523 case BPF_RB_ROOT:
524 case BPF_RB_NODE:
525 case BPF_SPIN_LOCK:
526 case BPF_TIMER:
d54730b5 527 case BPF_REFCOUNT:
9c395c1b 528 /* Nothing to release */
f0c5941f 529 break;
aa3496ac
KKD
530 default:
531 WARN_ON_ONCE(1);
532 continue;
533 }
14a324f6 534 }
aa3496ac
KKD
535 kfree(rec);
536}
537
538void bpf_map_free_record(struct bpf_map *map)
539{
540 btf_record_free(map->record);
541 map->record = NULL;
61df10c7
KKD
542}
543
aa3496ac 544struct btf_record *btf_record_dup(const struct btf_record *rec)
61df10c7 545{
aa3496ac
KKD
546 const struct btf_field *fields;
547 struct btf_record *new_rec;
548 int ret, size, i;
61df10c7 549
aa3496ac
KKD
550 if (IS_ERR_OR_NULL(rec))
551 return NULL;
552 size = offsetof(struct btf_record, fields[rec->cnt]);
553 new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN);
554 if (!new_rec)
61df10c7 555 return ERR_PTR(-ENOMEM);
aa3496ac
KKD
556 /* Do a deep copy of the btf_record */
557 fields = rec->fields;
558 new_rec->cnt = 0;
559 for (i = 0; i < rec->cnt; i++) {
560 switch (fields[i].type) {
561 case BPF_KPTR_UNREF:
562 case BPF_KPTR_REF:
563 btf_get(fields[i].kptr.btf);
564 if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
565 ret = -ENXIO;
566 goto free;
14a324f6 567 }
aa3496ac 568 break;
f0c5941f 569 case BPF_LIST_HEAD:
8ffa5cc1 570 case BPF_LIST_NODE:
9c395c1b
DM
571 case BPF_RB_ROOT:
572 case BPF_RB_NODE:
573 case BPF_SPIN_LOCK:
574 case BPF_TIMER:
d54730b5 575 case BPF_REFCOUNT:
9c395c1b 576 /* Nothing to acquire */
f0c5941f 577 break;
aa3496ac
KKD
578 default:
579 ret = -EFAULT;
580 WARN_ON_ONCE(1);
581 goto free;
14a324f6 582 }
aa3496ac 583 new_rec->cnt++;
14a324f6 584 }
aa3496ac
KKD
585 return new_rec;
586free:
587 btf_record_free(new_rec);
588 return ERR_PTR(ret);
61df10c7
KKD
589}
590
aa3496ac 591bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b)
61df10c7 592{
aa3496ac 593 bool a_has_fields = !IS_ERR_OR_NULL(rec_a), b_has_fields = !IS_ERR_OR_NULL(rec_b);
61df10c7
KKD
594 int size;
595
aa3496ac 596 if (!a_has_fields && !b_has_fields)
61df10c7 597 return true;
aa3496ac 598 if (a_has_fields != b_has_fields)
61df10c7 599 return false;
aa3496ac 600 if (rec_a->cnt != rec_b->cnt)
61df10c7 601 return false;
aa3496ac 602 size = offsetof(struct btf_record, fields[rec_a->cnt]);
c22dfdd2
KKD
603 /* btf_parse_fields uses kzalloc to allocate a btf_record, so unused
604 * members are zeroed out. So memcmp is safe to do without worrying
605 * about padding/unused fields.
606 *
607 * While spin_lock, timer, and kptr have no relation to map BTF,
608 * list_head metadata is specific to map BTF, the btf and value_rec
609 * members in particular. btf is the map BTF, while value_rec points to
610 * btf_record in that map BTF.
611 *
612 * So while by default, we don't rely on the map BTF (which the records
613 * were parsed from) matching for both records, which is not backwards
614 * compatible, in case list_head is part of it, we implicitly rely on
615 * that by way of depending on memcmp succeeding for it.
616 */
aa3496ac 617 return !memcmp(rec_a, rec_b, size);
61df10c7
KKD
618}
619
db559117
KKD
620void bpf_obj_free_timer(const struct btf_record *rec, void *obj)
621{
622 if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TIMER)))
623 return;
624 bpf_timer_cancel_and_free(obj + rec->timer_off);
625}
626
9e36a204
DM
627extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec);
628
aa3496ac 629void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
14a324f6 630{
aa3496ac 631 const struct btf_field *fields;
14a324f6
KKD
632 int i;
633
aa3496ac
KKD
634 if (IS_ERR_OR_NULL(rec))
635 return;
636 fields = rec->fields;
637 for (i = 0; i < rec->cnt; i++) {
c8e18754 638 struct btf_struct_meta *pointee_struct_meta;
aa3496ac
KKD
639 const struct btf_field *field = &fields[i];
640 void *field_ptr = obj + field->offset;
c8e18754 641 void *xchgd_field;
aa3496ac
KKD
642
643 switch (fields[i].type) {
db559117
KKD
644 case BPF_SPIN_LOCK:
645 break;
646 case BPF_TIMER:
647 bpf_timer_cancel_and_free(field_ptr);
648 break;
aa3496ac
KKD
649 case BPF_KPTR_UNREF:
650 WRITE_ONCE(*(u64 *)field_ptr, 0);
651 break;
652 case BPF_KPTR_REF:
c8e18754 653 xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0);
1431d0b5
DV
654 if (!xchgd_field)
655 break;
656
c8e18754
DM
657 if (!btf_is_kernel(field->kptr.btf)) {
658 pointee_struct_meta = btf_find_struct_meta(field->kptr.btf,
659 field->kptr.btf_id);
660 WARN_ON_ONCE(!pointee_struct_meta);
9e36a204
DM
661 migrate_disable();
662 __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ?
663 pointee_struct_meta->record :
664 NULL);
665 migrate_enable();
c8e18754
DM
666 } else {
667 field->kptr.dtor(xchgd_field);
668 }
aa3496ac 669 break;
f0c5941f
KKD
670 case BPF_LIST_HEAD:
671 if (WARN_ON_ONCE(rec->spin_lock_off < 0))
672 continue;
673 bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off);
674 break;
9c395c1b
DM
675 case BPF_RB_ROOT:
676 if (WARN_ON_ONCE(rec->spin_lock_off < 0))
677 continue;
678 bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off);
679 break;
8ffa5cc1 680 case BPF_LIST_NODE:
9c395c1b 681 case BPF_RB_NODE:
d54730b5 682 case BPF_REFCOUNT:
8ffa5cc1 683 break;
aa3496ac
KKD
684 default:
685 WARN_ON_ONCE(1);
14a324f6
KKD
686 continue;
687 }
14a324f6
KKD
688 }
689}
690
99c55f7d
AS
691/* called from workqueue */
692static void bpf_map_free_deferred(struct work_struct *work)
693{
694 struct bpf_map *map = container_of(work, struct bpf_map, work);
d7f5ef65 695 struct btf_record *rec = map->record;
99c55f7d 696
afdb09c7 697 security_bpf_map_free(map);
48edc1f7 698 bpf_map_release_memcg(map);
d7f5ef65 699 /* implementation dependent freeing */
99c55f7d 700 map->ops->map_free(map);
cd2a8079 701 /* Delay freeing of btf_record for maps, as map_free
d7f5ef65
KKD
702 * callback usually needs access to them. It is better to do it here
703 * than require each callback to do the free itself manually.
704 *
705 * Note that the btf_record stashed in map->inner_map_meta->record was
706 * already freed using the map_free callback for map in map case which
707 * eventually calls bpf_map_free_meta, since inner_map_meta is only a
708 * template bpf_map struct used during verification.
709 */
d7f5ef65 710 btf_record_free(rec);
99c55f7d
AS
711}
712
c9da161c
DB
713static void bpf_map_put_uref(struct bpf_map *map)
714{
1e0bd5a0 715 if (atomic64_dec_and_test(&map->usercnt)) {
ba6b8de4
JF
716 if (map->ops->map_release_uref)
717 map->ops->map_release_uref(map);
c9da161c
DB
718 }
719}
720
99c55f7d 721/* decrement map refcnt and schedule it for freeing via workqueue
158e5e9e 722 * (underlying map implementation ops->map_free() might sleep)
99c55f7d 723 */
158e5e9e 724void bpf_map_put(struct bpf_map *map)
99c55f7d 725{
1e0bd5a0 726 if (atomic64_dec_and_test(&map->refcnt)) {
34ad5580 727 /* bpf_map_free_id() must be called first */
158e5e9e 728 bpf_map_free_id(map);
78958fca 729 btf_put(map->btf);
99c55f7d 730 INIT_WORK(&map->work, bpf_map_free_deferred);
8d5a8011
AS
731 /* Avoid spawning kworkers, since they all might contend
732 * for the same mutex like slab_mutex.
733 */
734 queue_work(system_unbound_wq, &map->work);
99c55f7d
AS
735 }
736}
630a4d38 737EXPORT_SYMBOL_GPL(bpf_map_put);
bd5f5f4e 738
c9da161c 739void bpf_map_put_with_uref(struct bpf_map *map)
99c55f7d 740{
c9da161c 741 bpf_map_put_uref(map);
99c55f7d 742 bpf_map_put(map);
c9da161c
DB
743}
744
745static int bpf_map_release(struct inode *inode, struct file *filp)
746{
61d1b6a4
DB
747 struct bpf_map *map = filp->private_data;
748
749 if (map->ops->map_release)
750 map->ops->map_release(map, filp);
751
752 bpf_map_put_with_uref(map);
99c55f7d
AS
753 return 0;
754}
755
87df15de
DB
756static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
757{
758 fmode_t mode = f.file->f_mode;
759
760 /* Our file permissions may have been overridden by global
761 * map permissions facing syscall side.
762 */
763 if (READ_ONCE(map->frozen))
764 mode &= ~FMODE_CAN_WRITE;
765 return mode;
766}
767
f99bf205 768#ifdef CONFIG_PROC_FS
90a5527d
YS
769/* Show the memory usage of a bpf map */
770static u64 bpf_map_memory_usage(const struct bpf_map *map)
80ee81e0 771{
6b4a6ea2 772 return map->ops->map_mem_usage(map);
80ee81e0
RG
773}
774
f99bf205
DB
775static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
776{
f45d5b6c 777 struct bpf_map *map = filp->private_data;
2beee5f5 778 u32 type = 0, jited = 0;
21116b70 779
f45d5b6c
THJ
780 if (map_type_contains_progs(map)) {
781 spin_lock(&map->owner.lock);
782 type = map->owner.type;
783 jited = map->owner.jited;
784 spin_unlock(&map->owner.lock);
21116b70 785 }
f99bf205
DB
786
787 seq_printf(m,
788 "map_type:\t%u\n"
789 "key_size:\t%u\n"
790 "value_size:\t%u\n"
322cea2f 791 "max_entries:\t%u\n"
21116b70 792 "map_flags:\t%#x\n"
9330986c 793 "map_extra:\t%#llx\n"
90a5527d 794 "memlock:\t%llu\n"
87df15de
DB
795 "map_id:\t%u\n"
796 "frozen:\t%u\n",
f99bf205
DB
797 map->map_type,
798 map->key_size,
799 map->value_size,
322cea2f 800 map->max_entries,
21116b70 801 map->map_flags,
9330986c 802 (unsigned long long)map->map_extra,
90a5527d 803 bpf_map_memory_usage(map),
87df15de
DB
804 map->id,
805 READ_ONCE(map->frozen));
2beee5f5
DB
806 if (type) {
807 seq_printf(m, "owner_prog_type:\t%u\n", type);
808 seq_printf(m, "owner_jited:\t%u\n", jited);
9780c0ab 809 }
f99bf205
DB
810}
811#endif
812
6e71b04a
CF
813static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz,
814 loff_t *ppos)
815{
816 /* We need this handler such that alloc_file() enables
817 * f_mode with FMODE_CAN_READ.
818 */
819 return -EINVAL;
820}
821
822static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
823 size_t siz, loff_t *ppos)
824{
825 /* We need this handler such that alloc_file() enables
826 * f_mode with FMODE_CAN_WRITE.
827 */
828 return -EINVAL;
829}
830
fc970227
AN
831/* called for any extra memory-mapped regions (except initial) */
832static void bpf_map_mmap_open(struct vm_area_struct *vma)
833{
834 struct bpf_map *map = vma->vm_file->private_data;
835
353050be
DB
836 if (vma->vm_flags & VM_MAYWRITE)
837 bpf_map_write_active_inc(map);
fc970227
AN
838}
839
840/* called for all unmapped memory region (including initial) */
841static void bpf_map_mmap_close(struct vm_area_struct *vma)
842{
843 struct bpf_map *map = vma->vm_file->private_data;
844
353050be
DB
845 if (vma->vm_flags & VM_MAYWRITE)
846 bpf_map_write_active_dec(map);
fc970227
AN
847}
848
849static const struct vm_operations_struct bpf_map_default_vmops = {
850 .open = bpf_map_mmap_open,
851 .close = bpf_map_mmap_close,
852};
853
854static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
855{
856 struct bpf_map *map = filp->private_data;
857 int err;
858
db559117 859 if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record))
fc970227
AN
860 return -ENOTSUPP;
861
862 if (!(vma->vm_flags & VM_SHARED))
863 return -EINVAL;
864
865 mutex_lock(&map->freeze_mutex);
866
dfeb376d
AN
867 if (vma->vm_flags & VM_WRITE) {
868 if (map->frozen) {
869 err = -EPERM;
870 goto out;
871 }
872 /* map is meant to be read-only, so do not allow mapping as
873 * writable, because it's possible to leak a writable page
874 * reference and allows user-space to still modify it after
875 * freezing, while verifier will assume contents do not change
876 */
877 if (map->map_flags & BPF_F_RDONLY_PROG) {
878 err = -EACCES;
879 goto out;
880 }
fc970227
AN
881 }
882
883 /* set default open/close callbacks */
884 vma->vm_ops = &bpf_map_default_vmops;
885 vma->vm_private_data = map;
1c71222e 886 vm_flags_clear(vma, VM_MAYEXEC);
1f6cb19b
AN
887 if (!(vma->vm_flags & VM_WRITE))
888 /* disallow re-mapping with PROT_WRITE */
1c71222e 889 vm_flags_clear(vma, VM_MAYWRITE);
fc970227
AN
890
891 err = map->ops->map_mmap(map, vma);
892 if (err)
893 goto out;
894
1f6cb19b 895 if (vma->vm_flags & VM_MAYWRITE)
353050be 896 bpf_map_write_active_inc(map);
fc970227
AN
897out:
898 mutex_unlock(&map->freeze_mutex);
899 return err;
900}
901
457f4436
AN
902static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts)
903{
904 struct bpf_map *map = filp->private_data;
905
906 if (map->ops->map_poll)
907 return map->ops->map_poll(map, filp, pts);
908
909 return EPOLLERR;
910}
911
f66e448c 912const struct file_operations bpf_map_fops = {
f99bf205
DB
913#ifdef CONFIG_PROC_FS
914 .show_fdinfo = bpf_map_show_fdinfo,
915#endif
916 .release = bpf_map_release,
6e71b04a
CF
917 .read = bpf_dummy_read,
918 .write = bpf_dummy_write,
fc970227 919 .mmap = bpf_map_mmap,
457f4436 920 .poll = bpf_map_poll,
99c55f7d
AS
921};
922
6e71b04a 923int bpf_map_new_fd(struct bpf_map *map, int flags)
aa79781b 924{
afdb09c7
CF
925 int ret;
926
927 ret = security_bpf_map(map, OPEN_FMODE(flags));
928 if (ret < 0)
929 return ret;
930
aa79781b 931 return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
6e71b04a
CF
932 flags | O_CLOEXEC);
933}
934
935int bpf_get_file_flag(int flags)
936{
937 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
938 return -EINVAL;
939 if (flags & BPF_F_RDONLY)
940 return O_RDONLY;
941 if (flags & BPF_F_WRONLY)
942 return O_WRONLY;
943 return O_RDWR;
aa79781b
DB
944}
945
99c55f7d
AS
946/* helper macro to check that unused fields 'union bpf_attr' are zero */
947#define CHECK_ATTR(CMD) \
948 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
949 sizeof(attr->CMD##_LAST_FIELD), 0, \
950 sizeof(*attr) - \
951 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
952 sizeof(attr->CMD##_LAST_FIELD)) != NULL
953
8e7ae251
MKL
954/* dst and src must have at least "size" number of bytes.
955 * Return strlen on success and < 0 on error.
cb4d2b3f 956 */
8e7ae251 957int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
cb4d2b3f 958{
8e7ae251
MKL
959 const char *end = src + size;
960 const char *orig_src = src;
cb4d2b3f 961
8e7ae251 962 memset(dst, 0, size);
3e0ddc4f 963 /* Copy all isalnum(), '_' and '.' chars. */
cb4d2b3f 964 while (src < end && *src) {
3e0ddc4f
DB
965 if (!isalnum(*src) &&
966 *src != '_' && *src != '.')
cb4d2b3f
MKL
967 return -EINVAL;
968 *dst++ = *src++;
969 }
970
8e7ae251 971 /* No '\0' found in "size" number of bytes */
cb4d2b3f
MKL
972 if (src == end)
973 return -EINVAL;
974
8e7ae251 975 return src - orig_src;
cb4d2b3f
MKL
976}
977
e8d2bec0 978int map_check_no_btf(const struct bpf_map *map,
1b2b234b 979 const struct btf *btf,
e8d2bec0
DB
980 const struct btf_type *key_type,
981 const struct btf_type *value_type)
982{
983 return -ENOTSUPP;
984}
985
d83525ca 986static int map_check_btf(struct bpf_map *map, const struct btf *btf,
e8d2bec0
DB
987 u32 btf_key_id, u32 btf_value_id)
988{
989 const struct btf_type *key_type, *value_type;
990 u32 key_size, value_size;
991 int ret = 0;
992
2824ecb7
DB
993 /* Some maps allow key to be unspecified. */
994 if (btf_key_id) {
995 key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
996 if (!key_type || key_size != map->key_size)
997 return -EINVAL;
998 } else {
999 key_type = btf_type_by_id(btf, 0);
1000 if (!map->ops->map_check_btf)
1001 return -EINVAL;
1002 }
e8d2bec0
DB
1003
1004 value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
1005 if (!value_type || value_size != map->value_size)
1006 return -EINVAL;
1007
f0c5941f 1008 map->record = btf_parse_fields(btf, value_type,
9c395c1b 1009 BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
d54730b5 1010 BPF_RB_ROOT | BPF_REFCOUNT,
db559117 1011 map->value_size);
aa3496ac
KKD
1012 if (!IS_ERR_OR_NULL(map->record)) {
1013 int i;
1014
61df10c7
KKD
1015 if (!bpf_capable()) {
1016 ret = -EPERM;
1017 goto free_map_tab;
1018 }
1019 if (map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) {
1020 ret = -EACCES;
1021 goto free_map_tab;
1022 }
aa3496ac
KKD
1023 for (i = 0; i < sizeof(map->record->field_mask) * 8; i++) {
1024 switch (map->record->field_mask & (1 << i)) {
1025 case 0:
1026 continue;
db559117
KKD
1027 case BPF_SPIN_LOCK:
1028 if (map->map_type != BPF_MAP_TYPE_HASH &&
1029 map->map_type != BPF_MAP_TYPE_ARRAY &&
1030 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
1031 map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
1032 map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
1033 map->map_type != BPF_MAP_TYPE_TASK_STORAGE &&
1034 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) {
1035 ret = -EOPNOTSUPP;
1036 goto free_map_tab;
1037 }
1038 break;
1039 case BPF_TIMER:
1040 if (map->map_type != BPF_MAP_TYPE_HASH &&
1041 map->map_type != BPF_MAP_TYPE_LRU_HASH &&
1042 map->map_type != BPF_MAP_TYPE_ARRAY) {
c237bfa5 1043 ret = -EOPNOTSUPP;
db559117
KKD
1044 goto free_map_tab;
1045 }
1046 break;
aa3496ac
KKD
1047 case BPF_KPTR_UNREF:
1048 case BPF_KPTR_REF:
d54730b5 1049 case BPF_REFCOUNT:
aa3496ac 1050 if (map->map_type != BPF_MAP_TYPE_HASH &&
65334e64 1051 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
aa3496ac 1052 map->map_type != BPF_MAP_TYPE_LRU_HASH &&
65334e64 1053 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH &&
aa3496ac 1054 map->map_type != BPF_MAP_TYPE_ARRAY &&
9db44fdd
KKD
1055 map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
1056 map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
1057 map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
1058 map->map_type != BPF_MAP_TYPE_TASK_STORAGE &&
1059 map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) {
f0c5941f
KKD
1060 ret = -EOPNOTSUPP;
1061 goto free_map_tab;
1062 }
1063 break;
1064 case BPF_LIST_HEAD:
9c395c1b 1065 case BPF_RB_ROOT:
f0c5941f
KKD
1066 if (map->map_type != BPF_MAP_TYPE_HASH &&
1067 map->map_type != BPF_MAP_TYPE_LRU_HASH &&
1068 map->map_type != BPF_MAP_TYPE_ARRAY) {
aa3496ac
KKD
1069 ret = -EOPNOTSUPP;
1070 goto free_map_tab;
1071 }
1072 break;
1073 default:
1074 /* Fail if map_type checks are missing for a field type */
1075 ret = -EOPNOTSUPP;
1076 goto free_map_tab;
1077 }
61df10c7
KKD
1078 }
1079 }
1080
865ce09a
KKD
1081 ret = btf_check_and_fixup_fields(btf, map->record);
1082 if (ret < 0)
1083 goto free_map_tab;
1084
61df10c7 1085 if (map->ops->map_check_btf) {
1b2b234b 1086 ret = map->ops->map_check_btf(map, btf, key_type, value_type);
61df10c7
KKD
1087 if (ret < 0)
1088 goto free_map_tab;
1089 }
e8d2bec0 1090
61df10c7
KKD
1091 return ret;
1092free_map_tab:
aa3496ac 1093 bpf_map_free_record(map);
e8d2bec0
DB
1094 return ret;
1095}
1096
9330986c 1097#define BPF_MAP_CREATE_LAST_FIELD map_extra
99c55f7d
AS
1098/* called via syscall */
1099static int map_create(union bpf_attr *attr)
1100{
22db4122 1101 const struct bpf_map_ops *ops;
96eabe7a 1102 int numa_node = bpf_map_attr_numa_node(attr);
22db4122 1103 u32 map_type = attr->map_type;
99c55f7d 1104 struct bpf_map *map;
6e71b04a 1105 int f_flags;
99c55f7d
AS
1106 int err;
1107
1108 err = CHECK_ATTR(BPF_MAP_CREATE);
1109 if (err)
1110 return -EINVAL;
1111
85d33df3
MKL
1112 if (attr->btf_vmlinux_value_type_id) {
1113 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
1114 attr->btf_key_type_id || attr->btf_value_type_id)
1115 return -EINVAL;
1116 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
1117 return -EINVAL;
1118 }
1119
9330986c
JK
1120 if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
1121 attr->map_extra != 0)
1122 return -EINVAL;
1123
6e71b04a
CF
1124 f_flags = bpf_get_file_flag(attr->map_flags);
1125 if (f_flags < 0)
1126 return f_flags;
1127
96eabe7a 1128 if (numa_node != NUMA_NO_NODE &&
96e5ae4e
ED
1129 ((unsigned int)numa_node >= nr_node_ids ||
1130 !node_online(numa_node)))
96eabe7a
MKL
1131 return -EINVAL;
1132
99c55f7d 1133 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
22db4122
AN
1134 map_type = attr->map_type;
1135 if (map_type >= ARRAY_SIZE(bpf_map_types))
1136 return -EINVAL;
1137 map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types));
1138 ops = bpf_map_types[map_type];
1139 if (!ops)
1140 return -EINVAL;
1141
1142 if (ops->map_alloc_check) {
1143 err = ops->map_alloc_check(attr);
1144 if (err)
1145 return err;
1146 }
1147 if (attr->map_ifindex)
1148 ops = &bpf_map_offload_ops;
1149 if (!ops->map_mem_usage)
1150 return -EINVAL;
1151
1d28635a
AN
1152 /* Intent here is for unprivileged_bpf_disabled to block BPF map
1153 * creation for unprivileged users; other actions depend
1154 * on fd availability and access to bpffs, so are dependent on
1155 * object creation success. Even with unprivileged BPF disabled,
1156 * capability checks are still carried out.
1157 */
1158 if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
1159 return -EPERM;
1160
6c3eba1c
AN
1161 /* check privileged map type permissions */
1162 switch (map_type) {
1163 case BPF_MAP_TYPE_ARRAY:
1164 case BPF_MAP_TYPE_PERCPU_ARRAY:
1165 case BPF_MAP_TYPE_PROG_ARRAY:
1166 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
1167 case BPF_MAP_TYPE_CGROUP_ARRAY:
1168 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
1169 case BPF_MAP_TYPE_HASH:
1170 case BPF_MAP_TYPE_PERCPU_HASH:
1171 case BPF_MAP_TYPE_HASH_OF_MAPS:
1172 case BPF_MAP_TYPE_RINGBUF:
1173 case BPF_MAP_TYPE_USER_RINGBUF:
1174 case BPF_MAP_TYPE_CGROUP_STORAGE:
1175 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
1176 /* unprivileged */
1177 break;
1178 case BPF_MAP_TYPE_SK_STORAGE:
1179 case BPF_MAP_TYPE_INODE_STORAGE:
1180 case BPF_MAP_TYPE_TASK_STORAGE:
1181 case BPF_MAP_TYPE_CGRP_STORAGE:
1182 case BPF_MAP_TYPE_BLOOM_FILTER:
1183 case BPF_MAP_TYPE_LPM_TRIE:
1184 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
1185 case BPF_MAP_TYPE_STACK_TRACE:
1186 case BPF_MAP_TYPE_QUEUE:
1187 case BPF_MAP_TYPE_STACK:
1188 case BPF_MAP_TYPE_LRU_HASH:
1189 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
1190 case BPF_MAP_TYPE_STRUCT_OPS:
1191 case BPF_MAP_TYPE_CPUMAP:
1192 if (!bpf_capable())
1193 return -EPERM;
1194 break;
1195 case BPF_MAP_TYPE_SOCKMAP:
1196 case BPF_MAP_TYPE_SOCKHASH:
1197 case BPF_MAP_TYPE_DEVMAP:
1198 case BPF_MAP_TYPE_DEVMAP_HASH:
1199 case BPF_MAP_TYPE_XSKMAP:
1200 if (!capable(CAP_NET_ADMIN))
1201 return -EPERM;
1202 break;
1203 default:
1204 WARN(1, "unsupported map type %d", map_type);
1205 return -EPERM;
1206 }
1207
22db4122 1208 map = ops->map_alloc(attr);
99c55f7d
AS
1209 if (IS_ERR(map))
1210 return PTR_ERR(map);
22db4122
AN
1211 map->ops = ops;
1212 map->map_type = map_type;
99c55f7d 1213
8e7ae251
MKL
1214 err = bpf_obj_name_cpy(map->name, attr->map_name,
1215 sizeof(attr->map_name));
1216 if (err < 0)
b936ca64 1217 goto free_map;
ad5b177b 1218
1e0bd5a0
AN
1219 atomic64_set(&map->refcnt, 1);
1220 atomic64_set(&map->usercnt, 1);
fc970227 1221 mutex_init(&map->freeze_mutex);
f45d5b6c 1222 spin_lock_init(&map->owner.lock);
99c55f7d 1223
85d33df3
MKL
1224 if (attr->btf_key_type_id || attr->btf_value_type_id ||
1225 /* Even the map's value is a kernel's struct,
1226 * the bpf_prog.o must have BTF to begin with
1227 * to figure out the corresponding kernel's
1228 * counter part. Thus, attr->btf_fd has
1229 * to be valid also.
1230 */
1231 attr->btf_vmlinux_value_type_id) {
a26ca7c9
MKL
1232 struct btf *btf;
1233
a26ca7c9
MKL
1234 btf = btf_get_by_fd(attr->btf_fd);
1235 if (IS_ERR(btf)) {
1236 err = PTR_ERR(btf);
b936ca64 1237 goto free_map;
a26ca7c9 1238 }
350a5c4d
AS
1239 if (btf_is_kernel(btf)) {
1240 btf_put(btf);
1241 err = -EACCES;
1242 goto free_map;
1243 }
85d33df3 1244 map->btf = btf;
a26ca7c9 1245
85d33df3
MKL
1246 if (attr->btf_value_type_id) {
1247 err = map_check_btf(map, btf, attr->btf_key_type_id,
1248 attr->btf_value_type_id);
1249 if (err)
1250 goto free_map;
a26ca7c9
MKL
1251 }
1252
9b2cf328
MKL
1253 map->btf_key_type_id = attr->btf_key_type_id;
1254 map->btf_value_type_id = attr->btf_value_type_id;
85d33df3
MKL
1255 map->btf_vmlinux_value_type_id =
1256 attr->btf_vmlinux_value_type_id;
a26ca7c9
MKL
1257 }
1258
4d7d7f69
KKD
1259 err = security_bpf_map_alloc(map);
1260 if (err)
cd2a8079 1261 goto free_map;
4d7d7f69 1262
f3f1c054
MKL
1263 err = bpf_map_alloc_id(map);
1264 if (err)
b936ca64 1265 goto free_map_sec;
f3f1c054 1266
48edc1f7
RG
1267 bpf_map_save_memcg(map);
1268
6e71b04a 1269 err = bpf_map_new_fd(map, f_flags);
bd5f5f4e
MKL
1270 if (err < 0) {
1271 /* failed to allocate fd.
352d20d6 1272 * bpf_map_put_with_uref() is needed because the above
bd5f5f4e
MKL
1273 * bpf_map_alloc_id() has published the map
1274 * to the userspace and the userspace may
1275 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
1276 */
352d20d6 1277 bpf_map_put_with_uref(map);
bd5f5f4e
MKL
1278 return err;
1279 }
99c55f7d
AS
1280
1281 return err;
1282
afdb09c7
CF
1283free_map_sec:
1284 security_bpf_map_free(map);
b936ca64 1285free_map:
a26ca7c9 1286 btf_put(map->btf);
99c55f7d
AS
1287 map->ops->map_free(map);
1288 return err;
1289}
1290
db20fd2b
AS
1291/* if error is returned, fd is released.
1292 * On success caller should complete fd access with matching fdput()
1293 */
c2101297 1294struct bpf_map *__bpf_map_get(struct fd f)
db20fd2b 1295{
db20fd2b
AS
1296 if (!f.file)
1297 return ERR_PTR(-EBADF);
db20fd2b
AS
1298 if (f.file->f_op != &bpf_map_fops) {
1299 fdput(f);
1300 return ERR_PTR(-EINVAL);
1301 }
1302
c2101297
DB
1303 return f.file->private_data;
1304}
1305
1e0bd5a0 1306void bpf_map_inc(struct bpf_map *map)
c9da161c 1307{
1e0bd5a0 1308 atomic64_inc(&map->refcnt);
c9da161c 1309}
630a4d38 1310EXPORT_SYMBOL_GPL(bpf_map_inc);
c9da161c 1311
1e0bd5a0
AN
1312void bpf_map_inc_with_uref(struct bpf_map *map)
1313{
1314 atomic64_inc(&map->refcnt);
1315 atomic64_inc(&map->usercnt);
1316}
1317EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
1318
1ed4d924
MKL
1319struct bpf_map *bpf_map_get(u32 ufd)
1320{
1321 struct fd f = fdget(ufd);
1322 struct bpf_map *map;
1323
1324 map = __bpf_map_get(f);
1325 if (IS_ERR(map))
1326 return map;
1327
1328 bpf_map_inc(map);
1329 fdput(f);
1330
1331 return map;
1332}
b1d18a75 1333EXPORT_SYMBOL(bpf_map_get);
1ed4d924 1334
c9da161c 1335struct bpf_map *bpf_map_get_with_uref(u32 ufd)
c2101297
DB
1336{
1337 struct fd f = fdget(ufd);
1338 struct bpf_map *map;
1339
1340 map = __bpf_map_get(f);
1341 if (IS_ERR(map))
1342 return map;
1343
1e0bd5a0 1344 bpf_map_inc_with_uref(map);
c2101297 1345 fdput(f);
db20fd2b
AS
1346
1347 return map;
1348}
1349
b671c206
KFL
1350/* map_idr_lock should have been held or the map should have been
1351 * protected by rcu read lock.
1352 */
1353struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
bd5f5f4e
MKL
1354{
1355 int refold;
1356
1e0bd5a0 1357 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0);
bd5f5f4e
MKL
1358 if (!refold)
1359 return ERR_PTR(-ENOENT);
bd5f5f4e 1360 if (uref)
1e0bd5a0 1361 atomic64_inc(&map->usercnt);
bd5f5f4e
MKL
1362
1363 return map;
1364}
1365
1e0bd5a0 1366struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
b0e4701c
SF
1367{
1368 spin_lock_bh(&map_idr_lock);
1e0bd5a0 1369 map = __bpf_map_inc_not_zero(map, false);
b0e4701c
SF
1370 spin_unlock_bh(&map_idr_lock);
1371
1372 return map;
1373}
1374EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
1375
b8cdc051
AS
1376int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
1377{
1378 return -ENOTSUPP;
1379}
1380
c9d29f46
MV
1381static void *__bpf_copy_key(void __user *ukey, u64 key_size)
1382{
1383 if (key_size)
44779a4b 1384 return vmemdup_user(ukey, key_size);
c9d29f46
MV
1385
1386 if (ukey)
1387 return ERR_PTR(-EINVAL);
1388
1389 return NULL;
1390}
1391
af2ac3e1
AS
1392static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size)
1393{
1394 if (key_size)
44779a4b 1395 return kvmemdup_bpfptr(ukey, key_size);
af2ac3e1
AS
1396
1397 if (!bpfptr_is_null(ukey))
1398 return ERR_PTR(-EINVAL);
1399
1400 return NULL;
1401}
1402
db20fd2b 1403/* last field in 'union bpf_attr' used by this command */
96049f3a 1404#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
db20fd2b
AS
1405
1406static int map_lookup_elem(union bpf_attr *attr)
1407{
535e7b4b
MS
1408 void __user *ukey = u64_to_user_ptr(attr->key);
1409 void __user *uvalue = u64_to_user_ptr(attr->value);
db20fd2b 1410 int ufd = attr->map_fd;
db20fd2b 1411 struct bpf_map *map;
15c14a3d 1412 void *key, *value;
15a07b33 1413 u32 value_size;
592867bf 1414 struct fd f;
db20fd2b
AS
1415 int err;
1416
1417 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
1418 return -EINVAL;
1419
96049f3a
AS
1420 if (attr->flags & ~BPF_F_LOCK)
1421 return -EINVAL;
1422
592867bf 1423 f = fdget(ufd);
c2101297 1424 map = __bpf_map_get(f);
db20fd2b
AS
1425 if (IS_ERR(map))
1426 return PTR_ERR(map);
87df15de 1427 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
6e71b04a
CF
1428 err = -EPERM;
1429 goto err_put;
1430 }
1431
96049f3a 1432 if ((attr->flags & BPF_F_LOCK) &&
db559117 1433 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
96049f3a
AS
1434 err = -EINVAL;
1435 goto err_put;
1436 }
1437
c9d29f46 1438 key = __bpf_copy_key(ukey, map->key_size);
e4448ed8
AV
1439 if (IS_ERR(key)) {
1440 err = PTR_ERR(key);
db20fd2b 1441 goto err_put;
e4448ed8 1442 }
db20fd2b 1443
15c14a3d 1444 value_size = bpf_map_value_size(map);
15a07b33 1445
8ebe667c 1446 err = -ENOMEM;
f0dce1d9 1447 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
db20fd2b 1448 if (!value)
8ebe667c
AS
1449 goto free_key;
1450
9330986c
JK
1451 if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
1452 if (copy_from_user(value, uvalue, value_size))
1453 err = -EFAULT;
1454 else
1455 err = bpf_map_copy_value(map, key, value, attr->flags);
1456 goto free_value;
1457 }
1458
15c14a3d 1459 err = bpf_map_copy_value(map, key, value, attr->flags);
15a07b33 1460 if (err)
8ebe667c 1461 goto free_value;
db20fd2b
AS
1462
1463 err = -EFAULT;
15a07b33 1464 if (copy_to_user(uvalue, value, value_size) != 0)
8ebe667c 1465 goto free_value;
db20fd2b
AS
1466
1467 err = 0;
1468
8ebe667c 1469free_value:
f0dce1d9 1470 kvfree(value);
db20fd2b 1471free_key:
44779a4b 1472 kvfree(key);
db20fd2b
AS
1473err_put:
1474 fdput(f);
1475 return err;
1476}
1477
1ae80cf3 1478
3274f520 1479#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
db20fd2b 1480
af2ac3e1 1481static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
db20fd2b 1482{
af2ac3e1
AS
1483 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
1484 bpfptr_t uvalue = make_bpfptr(attr->value, uattr.is_kernel);
db20fd2b 1485 int ufd = attr->map_fd;
db20fd2b
AS
1486 struct bpf_map *map;
1487 void *key, *value;
15a07b33 1488 u32 value_size;
592867bf 1489 struct fd f;
db20fd2b
AS
1490 int err;
1491
1492 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
1493 return -EINVAL;
1494
592867bf 1495 f = fdget(ufd);
c2101297 1496 map = __bpf_map_get(f);
db20fd2b
AS
1497 if (IS_ERR(map))
1498 return PTR_ERR(map);
353050be 1499 bpf_map_write_active_inc(map);
87df15de 1500 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
6e71b04a
CF
1501 err = -EPERM;
1502 goto err_put;
1503 }
1504
96049f3a 1505 if ((attr->flags & BPF_F_LOCK) &&
db559117 1506 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
96049f3a
AS
1507 err = -EINVAL;
1508 goto err_put;
1509 }
1510
af2ac3e1 1511 key = ___bpf_copy_key(ukey, map->key_size);
e4448ed8
AV
1512 if (IS_ERR(key)) {
1513 err = PTR_ERR(key);
db20fd2b 1514 goto err_put;
e4448ed8 1515 }
db20fd2b 1516
f0dce1d9 1517 value_size = bpf_map_value_size(map);
a02c118e
WY
1518 value = kvmemdup_bpfptr(uvalue, value_size);
1519 if (IS_ERR(value)) {
1520 err = PTR_ERR(value);
db20fd2b 1521 goto free_key;
a02c118e 1522 }
db20fd2b 1523
3af43ba4 1524 err = bpf_map_update_value(map, f.file, key, value, attr->flags);
6710e112 1525
f0dce1d9 1526 kvfree(value);
db20fd2b 1527free_key:
44779a4b 1528 kvfree(key);
db20fd2b 1529err_put:
353050be 1530 bpf_map_write_active_dec(map);
db20fd2b
AS
1531 fdput(f);
1532 return err;
1533}
1534
1535#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
1536
b88df697 1537static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr)
db20fd2b 1538{
b88df697 1539 bpfptr_t ukey = make_bpfptr(attr->key, uattr.is_kernel);
db20fd2b 1540 int ufd = attr->map_fd;
db20fd2b 1541 struct bpf_map *map;
592867bf 1542 struct fd f;
db20fd2b
AS
1543 void *key;
1544 int err;
1545
1546 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
1547 return -EINVAL;
1548
592867bf 1549 f = fdget(ufd);
c2101297 1550 map = __bpf_map_get(f);
db20fd2b
AS
1551 if (IS_ERR(map))
1552 return PTR_ERR(map);
353050be 1553 bpf_map_write_active_inc(map);
87df15de 1554 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
6e71b04a
CF
1555 err = -EPERM;
1556 goto err_put;
1557 }
1558
b88df697 1559 key = ___bpf_copy_key(ukey, map->key_size);
e4448ed8
AV
1560 if (IS_ERR(key)) {
1561 err = PTR_ERR(key);
db20fd2b 1562 goto err_put;
e4448ed8 1563 }
db20fd2b 1564
9d03ebc7 1565 if (bpf_map_is_offloaded(map)) {
a3884572
JK
1566 err = bpf_map_offload_delete_elem(map, key);
1567 goto out;
85d33df3
MKL
1568 } else if (IS_FD_PROG_ARRAY(map) ||
1569 map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
1570 /* These maps require sleepable context */
da765a2f
DB
1571 err = map->ops->map_delete_elem(map, key);
1572 goto out;
a3884572
JK
1573 }
1574
b6e5dae1 1575 bpf_disable_instrumentation();
db20fd2b
AS
1576 rcu_read_lock();
1577 err = map->ops->map_delete_elem(map, key);
1578 rcu_read_unlock();
b6e5dae1 1579 bpf_enable_instrumentation();
1ae80cf3 1580 maybe_wait_bpf_programs(map);
a3884572 1581out:
44779a4b 1582 kvfree(key);
db20fd2b 1583err_put:
353050be 1584 bpf_map_write_active_dec(map);
db20fd2b
AS
1585 fdput(f);
1586 return err;
1587}
1588
1589/* last field in 'union bpf_attr' used by this command */
1590#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
1591
1592static int map_get_next_key(union bpf_attr *attr)
1593{
535e7b4b
MS
1594 void __user *ukey = u64_to_user_ptr(attr->key);
1595 void __user *unext_key = u64_to_user_ptr(attr->next_key);
db20fd2b 1596 int ufd = attr->map_fd;
db20fd2b
AS
1597 struct bpf_map *map;
1598 void *key, *next_key;
592867bf 1599 struct fd f;
db20fd2b
AS
1600 int err;
1601
1602 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
1603 return -EINVAL;
1604
592867bf 1605 f = fdget(ufd);
c2101297 1606 map = __bpf_map_get(f);
db20fd2b
AS
1607 if (IS_ERR(map))
1608 return PTR_ERR(map);
87df15de 1609 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
6e71b04a
CF
1610 err = -EPERM;
1611 goto err_put;
1612 }
1613
8fe45924 1614 if (ukey) {
c9d29f46 1615 key = __bpf_copy_key(ukey, map->key_size);
e4448ed8
AV
1616 if (IS_ERR(key)) {
1617 err = PTR_ERR(key);
8fe45924 1618 goto err_put;
e4448ed8 1619 }
8fe45924
TQ
1620 } else {
1621 key = NULL;
1622 }
db20fd2b
AS
1623
1624 err = -ENOMEM;
44779a4b 1625 next_key = kvmalloc(map->key_size, GFP_USER);
db20fd2b
AS
1626 if (!next_key)
1627 goto free_key;
1628
9d03ebc7 1629 if (bpf_map_is_offloaded(map)) {
a3884572
JK
1630 err = bpf_map_offload_get_next_key(map, key, next_key);
1631 goto out;
1632 }
1633
db20fd2b
AS
1634 rcu_read_lock();
1635 err = map->ops->map_get_next_key(map, key, next_key);
1636 rcu_read_unlock();
a3884572 1637out:
db20fd2b
AS
1638 if (err)
1639 goto free_next_key;
1640
1641 err = -EFAULT;
1642 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
1643 goto free_next_key;
1644
1645 err = 0;
1646
1647free_next_key:
44779a4b 1648 kvfree(next_key);
db20fd2b 1649free_key:
44779a4b 1650 kvfree(key);
db20fd2b
AS
1651err_put:
1652 fdput(f);
1653 return err;
1654}
1655
aa2e93b8
BV
1656int generic_map_delete_batch(struct bpf_map *map,
1657 const union bpf_attr *attr,
1658 union bpf_attr __user *uattr)
1659{
1660 void __user *keys = u64_to_user_ptr(attr->batch.keys);
1661 u32 cp, max_count;
1662 int err = 0;
1663 void *key;
1664
1665 if (attr->batch.elem_flags & ~BPF_F_LOCK)
1666 return -EINVAL;
1667
1668 if ((attr->batch.elem_flags & BPF_F_LOCK) &&
db559117 1669 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
aa2e93b8
BV
1670 return -EINVAL;
1671 }
1672
1673 max_count = attr->batch.count;
1674 if (!max_count)
1675 return 0;
1676
44779a4b 1677 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
2e3a94aa
BV
1678 if (!key)
1679 return -ENOMEM;
1680
aa2e93b8 1681 for (cp = 0; cp < max_count; cp++) {
2e3a94aa
BV
1682 err = -EFAULT;
1683 if (copy_from_user(key, keys + cp * map->key_size,
1684 map->key_size))
aa2e93b8 1685 break;
aa2e93b8 1686
9d03ebc7 1687 if (bpf_map_is_offloaded(map)) {
aa2e93b8
BV
1688 err = bpf_map_offload_delete_elem(map, key);
1689 break;
1690 }
1691
b6e5dae1 1692 bpf_disable_instrumentation();
aa2e93b8
BV
1693 rcu_read_lock();
1694 err = map->ops->map_delete_elem(map, key);
1695 rcu_read_unlock();
b6e5dae1 1696 bpf_enable_instrumentation();
aa2e93b8
BV
1697 if (err)
1698 break;
75134f16 1699 cond_resched();
aa2e93b8
BV
1700 }
1701 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
1702 err = -EFAULT;
2e3a94aa 1703
44779a4b 1704 kvfree(key);
9087c6ff
ED
1705
1706 maybe_wait_bpf_programs(map);
aa2e93b8
BV
1707 return err;
1708}
1709
3af43ba4 1710int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
aa2e93b8
BV
1711 const union bpf_attr *attr,
1712 union bpf_attr __user *uattr)
1713{
1714 void __user *values = u64_to_user_ptr(attr->batch.values);
1715 void __user *keys = u64_to_user_ptr(attr->batch.keys);
1716 u32 value_size, cp, max_count;
aa2e93b8 1717 void *key, *value;
aa2e93b8
BV
1718 int err = 0;
1719
aa2e93b8
BV
1720 if (attr->batch.elem_flags & ~BPF_F_LOCK)
1721 return -EINVAL;
1722
1723 if ((attr->batch.elem_flags & BPF_F_LOCK) &&
db559117 1724 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
aa2e93b8
BV
1725 return -EINVAL;
1726 }
1727
1728 value_size = bpf_map_value_size(map);
1729
1730 max_count = attr->batch.count;
1731 if (!max_count)
1732 return 0;
1733
44779a4b 1734 key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
2e3a94aa
BV
1735 if (!key)
1736 return -ENOMEM;
1737
f0dce1d9 1738 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
2e3a94aa 1739 if (!value) {
44779a4b 1740 kvfree(key);
aa2e93b8 1741 return -ENOMEM;
2e3a94aa 1742 }
aa2e93b8
BV
1743
1744 for (cp = 0; cp < max_count; cp++) {
aa2e93b8 1745 err = -EFAULT;
2e3a94aa
BV
1746 if (copy_from_user(key, keys + cp * map->key_size,
1747 map->key_size) ||
1748 copy_from_user(value, values + cp * value_size, value_size))
aa2e93b8
BV
1749 break;
1750
3af43ba4 1751 err = bpf_map_update_value(map, map_file, key, value,
aa2e93b8
BV
1752 attr->batch.elem_flags);
1753
1754 if (err)
1755 break;
75134f16 1756 cond_resched();
aa2e93b8
BV
1757 }
1758
1759 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
1760 err = -EFAULT;
1761
f0dce1d9 1762 kvfree(value);
44779a4b 1763 kvfree(key);
aa2e93b8
BV
1764 return err;
1765}
1766
cb4d03ab
BV
1767#define MAP_LOOKUP_RETRIES 3
1768
1769int generic_map_lookup_batch(struct bpf_map *map,
1770 const union bpf_attr *attr,
1771 union bpf_attr __user *uattr)
1772{
1773 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
1774 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
1775 void __user *values = u64_to_user_ptr(attr->batch.values);
1776 void __user *keys = u64_to_user_ptr(attr->batch.keys);
1777 void *buf, *buf_prevkey, *prev_key, *key, *value;
1778 int err, retry = MAP_LOOKUP_RETRIES;
1779 u32 value_size, cp, max_count;
cb4d03ab
BV
1780
1781 if (attr->batch.elem_flags & ~BPF_F_LOCK)
1782 return -EINVAL;
1783
1784 if ((attr->batch.elem_flags & BPF_F_LOCK) &&
db559117 1785 !btf_record_has_field(map->record, BPF_SPIN_LOCK))
cb4d03ab
BV
1786 return -EINVAL;
1787
1788 value_size = bpf_map_value_size(map);
1789
1790 max_count = attr->batch.count;
1791 if (!max_count)
1792 return 0;
1793
1794 if (put_user(0, &uattr->batch.count))
1795 return -EFAULT;
1796
44779a4b 1797 buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
cb4d03ab
BV
1798 if (!buf_prevkey)
1799 return -ENOMEM;
1800
f0dce1d9 1801 buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
cb4d03ab 1802 if (!buf) {
44779a4b 1803 kvfree(buf_prevkey);
cb4d03ab
BV
1804 return -ENOMEM;
1805 }
1806
1807 err = -EFAULT;
cb4d03ab
BV
1808 prev_key = NULL;
1809 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size))
1810 goto free_buf;
1811 key = buf;
1812 value = key + map->key_size;
1813 if (ubatch)
1814 prev_key = buf_prevkey;
1815
1816 for (cp = 0; cp < max_count;) {
1817 rcu_read_lock();
1818 err = map->ops->map_get_next_key(map, prev_key, key);
1819 rcu_read_unlock();
1820 if (err)
1821 break;
1822 err = bpf_map_copy_value(map, key, value,
1823 attr->batch.elem_flags);
1824
1825 if (err == -ENOENT) {
1826 if (retry) {
1827 retry--;
1828 continue;
1829 }
1830 err = -EINTR;
1831 break;
1832 }
1833
1834 if (err)
1835 goto free_buf;
1836
1837 if (copy_to_user(keys + cp * map->key_size, key,
1838 map->key_size)) {
1839 err = -EFAULT;
1840 goto free_buf;
1841 }
1842 if (copy_to_user(values + cp * value_size, value, value_size)) {
1843 err = -EFAULT;
1844 goto free_buf;
1845 }
1846
1847 if (!prev_key)
1848 prev_key = buf_prevkey;
1849
1850 swap(prev_key, key);
1851 retry = MAP_LOOKUP_RETRIES;
1852 cp++;
75134f16 1853 cond_resched();
cb4d03ab
BV
1854 }
1855
1856 if (err == -EFAULT)
1857 goto free_buf;
1858
1859 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) ||
1860 (cp && copy_to_user(uobatch, prev_key, map->key_size))))
1861 err = -EFAULT;
1862
1863free_buf:
44779a4b 1864 kvfree(buf_prevkey);
f0dce1d9 1865 kvfree(buf);
cb4d03ab
BV
1866 return err;
1867}
1868
3e87f192 1869#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags
bd513cd0
MV
1870
1871static int map_lookup_and_delete_elem(union bpf_attr *attr)
1872{
1873 void __user *ukey = u64_to_user_ptr(attr->key);
1874 void __user *uvalue = u64_to_user_ptr(attr->value);
1875 int ufd = attr->map_fd;
1876 struct bpf_map *map;
540fefc0 1877 void *key, *value;
bd513cd0
MV
1878 u32 value_size;
1879 struct fd f;
1880 int err;
1881
1882 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
1883 return -EINVAL;
1884
3e87f192
DS
1885 if (attr->flags & ~BPF_F_LOCK)
1886 return -EINVAL;
1887
bd513cd0
MV
1888 f = fdget(ufd);
1889 map = __bpf_map_get(f);
1890 if (IS_ERR(map))
1891 return PTR_ERR(map);
353050be 1892 bpf_map_write_active_inc(map);
1ea0f912
AP
1893 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
1894 !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
bd513cd0
MV
1895 err = -EPERM;
1896 goto err_put;
1897 }
1898
3e87f192
DS
1899 if (attr->flags &&
1900 (map->map_type == BPF_MAP_TYPE_QUEUE ||
1901 map->map_type == BPF_MAP_TYPE_STACK)) {
1902 err = -EINVAL;
1903 goto err_put;
1904 }
1905
1906 if ((attr->flags & BPF_F_LOCK) &&
db559117 1907 !btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
3e87f192
DS
1908 err = -EINVAL;
1909 goto err_put;
1910 }
1911
bd513cd0
MV
1912 key = __bpf_copy_key(ukey, map->key_size);
1913 if (IS_ERR(key)) {
1914 err = PTR_ERR(key);
1915 goto err_put;
1916 }
1917
3e87f192 1918 value_size = bpf_map_value_size(map);
bd513cd0
MV
1919
1920 err = -ENOMEM;
f0dce1d9 1921 value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
bd513cd0
MV
1922 if (!value)
1923 goto free_key;
1924
3e87f192 1925 err = -ENOTSUPP;
bd513cd0
MV
1926 if (map->map_type == BPF_MAP_TYPE_QUEUE ||
1927 map->map_type == BPF_MAP_TYPE_STACK) {
1928 err = map->ops->map_pop_elem(map, value);
3e87f192
DS
1929 } else if (map->map_type == BPF_MAP_TYPE_HASH ||
1930 map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
1931 map->map_type == BPF_MAP_TYPE_LRU_HASH ||
1932 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
9d03ebc7 1933 if (!bpf_map_is_offloaded(map)) {
3e87f192
DS
1934 bpf_disable_instrumentation();
1935 rcu_read_lock();
1936 err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags);
1937 rcu_read_unlock();
1938 bpf_enable_instrumentation();
1939 }
bd513cd0
MV
1940 }
1941
1942 if (err)
1943 goto free_value;
1944
7f645462
WY
1945 if (copy_to_user(uvalue, value, value_size) != 0) {
1946 err = -EFAULT;
bd513cd0 1947 goto free_value;
7f645462 1948 }
bd513cd0
MV
1949
1950 err = 0;
1951
1952free_value:
f0dce1d9 1953 kvfree(value);
bd513cd0 1954free_key:
44779a4b 1955 kvfree(key);
bd513cd0 1956err_put:
353050be 1957 bpf_map_write_active_dec(map);
bd513cd0
MV
1958 fdput(f);
1959 return err;
1960}
1961
87df15de
DB
1962#define BPF_MAP_FREEZE_LAST_FIELD map_fd
1963
1964static int map_freeze(const union bpf_attr *attr)
1965{
1966 int err = 0, ufd = attr->map_fd;
1967 struct bpf_map *map;
1968 struct fd f;
1969
1970 if (CHECK_ATTR(BPF_MAP_FREEZE))
1971 return -EINVAL;
1972
1973 f = fdget(ufd);
1974 map = __bpf_map_get(f);
1975 if (IS_ERR(map))
1976 return PTR_ERR(map);
fc970227 1977
db559117 1978 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || !IS_ERR_OR_NULL(map->record)) {
849b4d94
MKL
1979 fdput(f);
1980 return -ENOTSUPP;
1981 }
1982
c4c84f6f 1983 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
4266f41f
DB
1984 fdput(f);
1985 return -EPERM;
c4c84f6f
AN
1986 }
1987
fc970227 1988 mutex_lock(&map->freeze_mutex);
353050be 1989 if (bpf_map_write_active(map)) {
fc970227
AN
1990 err = -EBUSY;
1991 goto err_put;
1992 }
87df15de
DB
1993 if (READ_ONCE(map->frozen)) {
1994 err = -EBUSY;
1995 goto err_put;
1996 }
87df15de
DB
1997
1998 WRITE_ONCE(map->frozen, true);
1999err_put:
fc970227 2000 mutex_unlock(&map->freeze_mutex);
87df15de
DB
2001 fdput(f);
2002 return err;
2003}
2004
7de16e3a 2005static const struct bpf_prog_ops * const bpf_prog_types[] = {
91cc1a99 2006#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
7de16e3a
JK
2007 [_id] = & _name ## _prog_ops,
2008#define BPF_MAP_TYPE(_id, _ops)
f2e10bff 2009#define BPF_LINK_TYPE(_id, _name)
7de16e3a
JK
2010#include <linux/bpf_types.h>
2011#undef BPF_PROG_TYPE
2012#undef BPF_MAP_TYPE
f2e10bff 2013#undef BPF_LINK_TYPE
7de16e3a
JK
2014};
2015
09756af4
AS
2016static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
2017{
d0f1a451
DB
2018 const struct bpf_prog_ops *ops;
2019
2020 if (type >= ARRAY_SIZE(bpf_prog_types))
2021 return -EINVAL;
2022 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types));
2023 ops = bpf_prog_types[type];
2024 if (!ops)
be9370a7 2025 return -EINVAL;
09756af4 2026
9d03ebc7 2027 if (!bpf_prog_is_offloaded(prog->aux))
d0f1a451 2028 prog->aux->ops = ops;
ab3f0063
JK
2029 else
2030 prog->aux->ops = &bpf_offload_prog_ops;
be9370a7
JB
2031 prog->type = type;
2032 return 0;
09756af4
AS
2033}
2034
bae141f5
DB
2035enum bpf_audit {
2036 BPF_AUDIT_LOAD,
2037 BPF_AUDIT_UNLOAD,
2038 BPF_AUDIT_MAX,
2039};
2040
2041static const char * const bpf_audit_str[BPF_AUDIT_MAX] = {
2042 [BPF_AUDIT_LOAD] = "LOAD",
2043 [BPF_AUDIT_UNLOAD] = "UNLOAD",
2044};
2045
2046static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
2047{
2048 struct audit_context *ctx = NULL;
2049 struct audit_buffer *ab;
2050
2051 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX))
2052 return;
2053 if (audit_enabled == AUDIT_OFF)
2054 return;
ef01f4e2 2055 if (!in_irq() && !irqs_disabled())
bae141f5
DB
2056 ctx = audit_context();
2057 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
2058 if (unlikely(!ab))
2059 return;
2060 audit_log_format(ab, "prog-id=%u op=%s",
2061 prog->aux->id, bpf_audit_str[op]);
2062 audit_log_end(ab);
2063}
2064
dc4bb0e2
MKL
2065static int bpf_prog_alloc_id(struct bpf_prog *prog)
2066{
2067 int id;
2068
b76354cd 2069 idr_preload(GFP_KERNEL);
dc4bb0e2
MKL
2070 spin_lock_bh(&prog_idr_lock);
2071 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
2072 if (id > 0)
2073 prog->aux->id = id;
2074 spin_unlock_bh(&prog_idr_lock);
b76354cd 2075 idr_preload_end();
dc4bb0e2
MKL
2076
2077 /* id is in [1, INT_MAX) */
2078 if (WARN_ON_ONCE(!id))
2079 return -ENOSPC;
2080
2081 return id > 0 ? 0 : id;
2082}
2083
e7895f01 2084void bpf_prog_free_id(struct bpf_prog *prog)
dc4bb0e2 2085{
d809e134
AS
2086 unsigned long flags;
2087
ad8ad79f
JK
2088 /* cBPF to eBPF migrations are currently not in the idr store.
2089 * Offloaded programs are removed from the store when their device
2090 * disappears - even if someone grabs an fd to them they are unusable,
2091 * simply waiting for refcnt to drop to be freed.
2092 */
dc4bb0e2
MKL
2093 if (!prog->aux->id)
2094 return;
2095
e7895f01 2096 spin_lock_irqsave(&prog_idr_lock, flags);
dc4bb0e2 2097 idr_remove(&prog_idr, prog->aux->id);
ad8ad79f 2098 prog->aux->id = 0;
e7895f01 2099 spin_unlock_irqrestore(&prog_idr_lock, flags);
dc4bb0e2
MKL
2100}
2101
1aacde3d 2102static void __bpf_prog_put_rcu(struct rcu_head *rcu)
abf2e7d6
AS
2103{
2104 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
2105
3b4d9eb2 2106 kvfree(aux->func_info);
8c1b6e69 2107 kfree(aux->func_info_aux);
3ac1f01b 2108 free_uid(aux->user);
afdb09c7 2109 security_bpf_prog_free(aux);
abf2e7d6
AS
2110 bpf_prog_free(aux->prog);
2111}
2112
cd7455f1
DB
2113static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
2114{
2115 bpf_prog_kallsyms_del_all(prog);
2116 btf_put(prog->aux->btf);
31bf1dbc 2117 module_put(prog->aux->mod);
e16301fb
MKL
2118 kvfree(prog->aux->jited_linfo);
2119 kvfree(prog->aux->linfo);
e6ac2450 2120 kfree(prog->aux->kfunc_tab);
22dc4a0f
AN
2121 if (prog->aux->attach_btf)
2122 btf_put(prog->aux->attach_btf);
cd7455f1 2123
1e6c62a8
AS
2124 if (deferred) {
2125 if (prog->aux->sleepable)
2126 call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
2127 else
2128 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
2129 } else {
cd7455f1 2130 __bpf_prog_put_rcu(&prog->aux->rcu);
1e6c62a8 2131 }
cd7455f1
DB
2132}
2133
d809e134
AS
2134static void bpf_prog_put_deferred(struct work_struct *work)
2135{
2136 struct bpf_prog_aux *aux;
2137 struct bpf_prog *prog;
2138
2139 aux = container_of(work, struct bpf_prog_aux, work);
2140 prog = aux->prog;
2141 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
2142 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
e7895f01 2143 bpf_prog_free_id(prog);
d809e134
AS
2144 __bpf_prog_put_noref(prog, true);
2145}
2146
e7895f01 2147static void __bpf_prog_put(struct bpf_prog *prog)
09756af4 2148{
d809e134
AS
2149 struct bpf_prog_aux *aux = prog->aux;
2150
2151 if (atomic64_dec_and_test(&aux->refcnt)) {
d809e134
AS
2152 if (in_irq() || irqs_disabled()) {
2153 INIT_WORK(&aux->work, bpf_prog_put_deferred);
2154 schedule_work(&aux->work);
2155 } else {
2156 bpf_prog_put_deferred(&aux->work);
2157 }
a67edbf4 2158 }
09756af4 2159}
b16d9aa4
MKL
2160
2161void bpf_prog_put(struct bpf_prog *prog)
2162{
e7895f01 2163 __bpf_prog_put(prog);
b16d9aa4 2164}
e2e9b654 2165EXPORT_SYMBOL_GPL(bpf_prog_put);
09756af4
AS
2166
2167static int bpf_prog_release(struct inode *inode, struct file *filp)
2168{
2169 struct bpf_prog *prog = filp->private_data;
2170
1aacde3d 2171 bpf_prog_put(prog);
09756af4
AS
2172 return 0;
2173}
2174
61a0abae
ED
2175struct bpf_prog_kstats {
2176 u64 nsecs;
2177 u64 cnt;
2178 u64 misses;
2179};
2180
05b24ff9
JO
2181void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog)
2182{
2183 struct bpf_prog_stats *stats;
2184 unsigned int flags;
2185
2186 stats = this_cpu_ptr(prog->stats);
2187 flags = u64_stats_update_begin_irqsave(&stats->syncp);
2188 u64_stats_inc(&stats->misses);
2189 u64_stats_update_end_irqrestore(&stats->syncp, flags);
2190}
2191
492ecee8 2192static void bpf_prog_get_stats(const struct bpf_prog *prog,
61a0abae 2193 struct bpf_prog_kstats *stats)
492ecee8 2194{
9ed9e9ba 2195 u64 nsecs = 0, cnt = 0, misses = 0;
492ecee8
AS
2196 int cpu;
2197
2198 for_each_possible_cpu(cpu) {
2199 const struct bpf_prog_stats *st;
2200 unsigned int start;
9ed9e9ba 2201 u64 tnsecs, tcnt, tmisses;
492ecee8 2202
700d4796 2203 st = per_cpu_ptr(prog->stats, cpu);
492ecee8 2204 do {
97c4090b 2205 start = u64_stats_fetch_begin(&st->syncp);
61a0abae
ED
2206 tnsecs = u64_stats_read(&st->nsecs);
2207 tcnt = u64_stats_read(&st->cnt);
2208 tmisses = u64_stats_read(&st->misses);
97c4090b 2209 } while (u64_stats_fetch_retry(&st->syncp, start));
492ecee8
AS
2210 nsecs += tnsecs;
2211 cnt += tcnt;
9ed9e9ba 2212 misses += tmisses;
492ecee8
AS
2213 }
2214 stats->nsecs = nsecs;
2215 stats->cnt = cnt;
9ed9e9ba 2216 stats->misses = misses;
492ecee8
AS
2217}
2218
7bd509e3
DB
2219#ifdef CONFIG_PROC_FS
2220static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
2221{
2222 const struct bpf_prog *prog = filp->private_data;
f1f7714e 2223 char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
61a0abae 2224 struct bpf_prog_kstats stats;
7bd509e3 2225
492ecee8 2226 bpf_prog_get_stats(prog, &stats);
f1f7714e 2227 bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
7bd509e3
DB
2228 seq_printf(m,
2229 "prog_type:\t%u\n"
2230 "prog_jited:\t%u\n"
f1f7714e 2231 "prog_tag:\t%s\n"
4316b409 2232 "memlock:\t%llu\n"
492ecee8
AS
2233 "prog_id:\t%u\n"
2234 "run_time_ns:\t%llu\n"
9ed9e9ba 2235 "run_cnt:\t%llu\n"
aba64c7d
DM
2236 "recursion_misses:\t%llu\n"
2237 "verified_insns:\t%u\n",
7bd509e3
DB
2238 prog->type,
2239 prog->jited,
f1f7714e 2240 prog_tag,
4316b409 2241 prog->pages * 1ULL << PAGE_SHIFT,
492ecee8
AS
2242 prog->aux->id,
2243 stats.nsecs,
9ed9e9ba 2244 stats.cnt,
aba64c7d
DM
2245 stats.misses,
2246 prog->aux->verified_insns);
7bd509e3
DB
2247}
2248#endif
2249
f66e448c 2250const struct file_operations bpf_prog_fops = {
7bd509e3
DB
2251#ifdef CONFIG_PROC_FS
2252 .show_fdinfo = bpf_prog_show_fdinfo,
2253#endif
2254 .release = bpf_prog_release,
6e71b04a
CF
2255 .read = bpf_dummy_read,
2256 .write = bpf_dummy_write,
09756af4
AS
2257};
2258
b2197755 2259int bpf_prog_new_fd(struct bpf_prog *prog)
aa79781b 2260{
afdb09c7
CF
2261 int ret;
2262
2263 ret = security_bpf_prog(prog);
2264 if (ret < 0)
2265 return ret;
2266
aa79781b
DB
2267 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
2268 O_RDWR | O_CLOEXEC);
2269}
2270
113214be 2271static struct bpf_prog *____bpf_prog_get(struct fd f)
09756af4 2272{
09756af4
AS
2273 if (!f.file)
2274 return ERR_PTR(-EBADF);
09756af4
AS
2275 if (f.file->f_op != &bpf_prog_fops) {
2276 fdput(f);
2277 return ERR_PTR(-EINVAL);
2278 }
2279
c2101297 2280 return f.file->private_data;
09756af4
AS
2281}
2282
85192dbf 2283void bpf_prog_add(struct bpf_prog *prog, int i)
92117d84 2284{
85192dbf 2285 atomic64_add(i, &prog->aux->refcnt);
92117d84 2286}
59d3656d
BB
2287EXPORT_SYMBOL_GPL(bpf_prog_add);
2288
c540594f
DB
2289void bpf_prog_sub(struct bpf_prog *prog, int i)
2290{
2291 /* Only to be used for undoing previous bpf_prog_add() in some
2292 * error path. We still know that another entity in our call
2293 * path holds a reference to the program, thus atomic_sub() can
2294 * be safely used in such cases!
2295 */
85192dbf 2296 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0);
c540594f
DB
2297}
2298EXPORT_SYMBOL_GPL(bpf_prog_sub);
2299
85192dbf 2300void bpf_prog_inc(struct bpf_prog *prog)
59d3656d 2301{
85192dbf 2302 atomic64_inc(&prog->aux->refcnt);
59d3656d 2303}
97bc402d 2304EXPORT_SYMBOL_GPL(bpf_prog_inc);
92117d84 2305
b16d9aa4 2306/* prog_idr_lock should have been held */
a6f6df69 2307struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
b16d9aa4
MKL
2308{
2309 int refold;
2310
85192dbf 2311 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0);
b16d9aa4
MKL
2312
2313 if (!refold)
2314 return ERR_PTR(-ENOENT);
2315
2316 return prog;
2317}
a6f6df69 2318EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
b16d9aa4 2319
040ee692 2320bool bpf_prog_get_ok(struct bpf_prog *prog,
288b3de5 2321 enum bpf_prog_type *attach_type, bool attach_drv)
248f346f 2322{
288b3de5
JK
2323 /* not an attachment, just a refcount inc, always allow */
2324 if (!attach_type)
2325 return true;
248f346f
JK
2326
2327 if (prog->type != *attach_type)
2328 return false;
9d03ebc7 2329 if (bpf_prog_is_offloaded(prog->aux) && !attach_drv)
248f346f
JK
2330 return false;
2331
2332 return true;
2333}
2334
2335static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
288b3de5 2336 bool attach_drv)
09756af4
AS
2337{
2338 struct fd f = fdget(ufd);
2339 struct bpf_prog *prog;
2340
113214be 2341 prog = ____bpf_prog_get(f);
09756af4
AS
2342 if (IS_ERR(prog))
2343 return prog;
288b3de5 2344 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
113214be
DB
2345 prog = ERR_PTR(-EINVAL);
2346 goto out;
2347 }
09756af4 2348
85192dbf 2349 bpf_prog_inc(prog);
113214be 2350out:
09756af4
AS
2351 fdput(f);
2352 return prog;
2353}
113214be
DB
2354
2355struct bpf_prog *bpf_prog_get(u32 ufd)
2356{
288b3de5 2357 return __bpf_prog_get(ufd, NULL, false);
113214be
DB
2358}
2359
248f346f 2360struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
288b3de5 2361 bool attach_drv)
248f346f 2362{
4d220ed0 2363 return __bpf_prog_get(ufd, &type, attach_drv);
248f346f 2364}
6c8dfe21 2365EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
248f346f 2366
aac3fc32
AI
2367/* Initially all BPF programs could be loaded w/o specifying
2368 * expected_attach_type. Later for some of them specifying expected_attach_type
2369 * at load time became required so that program could be validated properly.
2370 * Programs of types that are allowed to be loaded both w/ and w/o (for
2371 * backward compatibility) expected_attach_type, should have the default attach
2372 * type assigned to expected_attach_type for the latter case, so that it can be
2373 * validated later at attach time.
2374 *
2375 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
2376 * prog type requires it but has some attach types that have to be backward
2377 * compatible.
2378 */
2379static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
2380{
2381 switch (attr->prog_type) {
2382 case BPF_PROG_TYPE_CGROUP_SOCK:
2383 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
2384 * exist so checking for non-zero is the way to go here.
2385 */
2386 if (!attr->expected_attach_type)
2387 attr->expected_attach_type =
2388 BPF_CGROUP_INET_SOCK_CREATE;
2389 break;
d5e4ddae
KI
2390 case BPF_PROG_TYPE_SK_REUSEPORT:
2391 if (!attr->expected_attach_type)
2392 attr->expected_attach_type =
2393 BPF_SK_REUSEPORT_SELECT;
2394 break;
aac3fc32
AI
2395 }
2396}
2397
5e43f899 2398static int
ccfe29eb
AS
2399bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
2400 enum bpf_attach_type expected_attach_type,
290248a5
AN
2401 struct btf *attach_btf, u32 btf_id,
2402 struct bpf_prog *dst_prog)
5e43f899 2403{
27ae7997 2404 if (btf_id) {
c108e3c1
AS
2405 if (btf_id > BTF_MAX_TYPE)
2406 return -EINVAL;
27ae7997 2407
290248a5
AN
2408 if (!attach_btf && !dst_prog)
2409 return -EINVAL;
2410
27ae7997
MKL
2411 switch (prog_type) {
2412 case BPF_PROG_TYPE_TRACING:
9e4e01df 2413 case BPF_PROG_TYPE_LSM:
27ae7997 2414 case BPF_PROG_TYPE_STRUCT_OPS:
be8704ff 2415 case BPF_PROG_TYPE_EXT:
27ae7997
MKL
2416 break;
2417 default:
c108e3c1 2418 return -EINVAL;
27ae7997 2419 }
c108e3c1
AS
2420 }
2421
290248a5
AN
2422 if (attach_btf && (!btf_id || dst_prog))
2423 return -EINVAL;
2424
2425 if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING &&
be8704ff 2426 prog_type != BPF_PROG_TYPE_EXT)
27ae7997
MKL
2427 return -EINVAL;
2428
4fbac77d 2429 switch (prog_type) {
aac3fc32
AI
2430 case BPF_PROG_TYPE_CGROUP_SOCK:
2431 switch (expected_attach_type) {
2432 case BPF_CGROUP_INET_SOCK_CREATE:
f5836749 2433 case BPF_CGROUP_INET_SOCK_RELEASE:
aac3fc32
AI
2434 case BPF_CGROUP_INET4_POST_BIND:
2435 case BPF_CGROUP_INET6_POST_BIND:
2436 return 0;
2437 default:
2438 return -EINVAL;
2439 }
4fbac77d
AI
2440 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2441 switch (expected_attach_type) {
2442 case BPF_CGROUP_INET4_BIND:
2443 case BPF_CGROUP_INET6_BIND:
d74bad4e
AI
2444 case BPF_CGROUP_INET4_CONNECT:
2445 case BPF_CGROUP_INET6_CONNECT:
1b66d253
DB
2446 case BPF_CGROUP_INET4_GETPEERNAME:
2447 case BPF_CGROUP_INET6_GETPEERNAME:
2448 case BPF_CGROUP_INET4_GETSOCKNAME:
2449 case BPF_CGROUP_INET6_GETSOCKNAME:
1cedee13
AI
2450 case BPF_CGROUP_UDP4_SENDMSG:
2451 case BPF_CGROUP_UDP6_SENDMSG:
983695fa
DB
2452 case BPF_CGROUP_UDP4_RECVMSG:
2453 case BPF_CGROUP_UDP6_RECVMSG:
4fbac77d
AI
2454 return 0;
2455 default:
2456 return -EINVAL;
2457 }
5cf1e914 2458 case BPF_PROG_TYPE_CGROUP_SKB:
2459 switch (expected_attach_type) {
2460 case BPF_CGROUP_INET_INGRESS:
2461 case BPF_CGROUP_INET_EGRESS:
2462 return 0;
2463 default:
2464 return -EINVAL;
2465 }
0d01da6a
SF
2466 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2467 switch (expected_attach_type) {
2468 case BPF_CGROUP_SETSOCKOPT:
2469 case BPF_CGROUP_GETSOCKOPT:
2470 return 0;
2471 default:
2472 return -EINVAL;
2473 }
e9ddbb77
JS
2474 case BPF_PROG_TYPE_SK_LOOKUP:
2475 if (expected_attach_type == BPF_SK_LOOKUP)
2476 return 0;
2477 return -EINVAL;
d5e4ddae
KI
2478 case BPF_PROG_TYPE_SK_REUSEPORT:
2479 switch (expected_attach_type) {
2480 case BPF_SK_REUSEPORT_SELECT:
2481 case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE:
2482 return 0;
2483 default:
2484 return -EINVAL;
2485 }
132328e8
FW
2486 case BPF_PROG_TYPE_NETFILTER:
2487 if (expected_attach_type == BPF_NETFILTER)
2488 return 0;
2489 return -EINVAL;
79a7f8bd 2490 case BPF_PROG_TYPE_SYSCALL:
be8704ff
AS
2491 case BPF_PROG_TYPE_EXT:
2492 if (expected_attach_type)
2493 return -EINVAL;
df561f66 2494 fallthrough;
4fbac77d
AI
2495 default:
2496 return 0;
2497 }
5e43f899
AI
2498}
2499
2c78ee89
AS
2500static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
2501{
2502 switch (prog_type) {
2503 case BPF_PROG_TYPE_SCHED_CLS:
2504 case BPF_PROG_TYPE_SCHED_ACT:
2505 case BPF_PROG_TYPE_XDP:
2506 case BPF_PROG_TYPE_LWT_IN:
2507 case BPF_PROG_TYPE_LWT_OUT:
2508 case BPF_PROG_TYPE_LWT_XMIT:
2509 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2510 case BPF_PROG_TYPE_SK_SKB:
2511 case BPF_PROG_TYPE_SK_MSG:
2c78ee89
AS
2512 case BPF_PROG_TYPE_FLOW_DISSECTOR:
2513 case BPF_PROG_TYPE_CGROUP_DEVICE:
2514 case BPF_PROG_TYPE_CGROUP_SOCK:
2515 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2516 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2517 case BPF_PROG_TYPE_CGROUP_SYSCTL:
2518 case BPF_PROG_TYPE_SOCK_OPS:
2519 case BPF_PROG_TYPE_EXT: /* extends any prog */
84601d6e 2520 case BPF_PROG_TYPE_NETFILTER:
2c78ee89
AS
2521 return true;
2522 case BPF_PROG_TYPE_CGROUP_SKB:
2523 /* always unpriv */
2524 case BPF_PROG_TYPE_SK_REUSEPORT:
2525 /* equivalent to SOCKET_FILTER. need CAP_BPF only */
2526 default:
2527 return false;
2528 }
2529}
2530
2531static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
2532{
2533 switch (prog_type) {
2534 case BPF_PROG_TYPE_KPROBE:
2535 case BPF_PROG_TYPE_TRACEPOINT:
2536 case BPF_PROG_TYPE_PERF_EVENT:
2537 case BPF_PROG_TYPE_RAW_TRACEPOINT:
2538 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
2539 case BPF_PROG_TYPE_TRACING:
2540 case BPF_PROG_TYPE_LSM:
2541 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
2542 case BPF_PROG_TYPE_EXT: /* extends any prog */
2543 return true;
2544 default:
2545 return false;
2546 }
2547}
2548
09756af4 2549/* last field in 'union bpf_attr' used by this command */
47a71c1f 2550#define BPF_PROG_LOAD_LAST_FIELD log_true_size
09756af4 2551
47a71c1f 2552static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
09756af4
AS
2553{
2554 enum bpf_prog_type type = attr->prog_type;
290248a5
AN
2555 struct bpf_prog *prog, *dst_prog = NULL;
2556 struct btf *attach_btf = NULL;
09756af4
AS
2557 int err;
2558 char license[128];
09756af4
AS
2559
2560 if (CHECK_ATTR(BPF_PROG_LOAD))
2561 return -EINVAL;
2562
c240eff6
JW
2563 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
2564 BPF_F_ANY_ALIGNMENT |
10d274e8 2565 BPF_F_TEST_STATE_FREQ |
1e6c62a8 2566 BPF_F_SLEEPABLE |
c2f2cdbe 2567 BPF_F_TEST_RND_HI32 |
2b3486bc
SF
2568 BPF_F_XDP_HAS_FRAGS |
2569 BPF_F_XDP_DEV_BOUND_ONLY))
e07b98d9
DM
2570 return -EINVAL;
2571
e9ee9efc
DM
2572 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
2573 (attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
2c78ee89 2574 !bpf_capable())
e9ee9efc
DM
2575 return -EPERM;
2576
1d28635a
AN
2577 /* Intent here is for unprivileged_bpf_disabled to block BPF program
2578 * creation for unprivileged users; other actions depend
2579 * on fd availability and access to bpffs, so are dependent on
2580 * object creation success. Even with unprivileged BPF disabled,
2581 * capability checks are still carried out for these
2582 * and other operations.
2583 */
2584 if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
2585 return -EPERM;
09756af4 2586
c04c0d2b 2587 if (attr->insn_cnt == 0 ||
2c78ee89 2588 attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
ef0915ca 2589 return -E2BIG;
80b7d819
CF
2590 if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
2591 type != BPF_PROG_TYPE_CGROUP_SKB &&
2c78ee89
AS
2592 !bpf_capable())
2593 return -EPERM;
2594
b338cb92 2595 if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN))
2c78ee89
AS
2596 return -EPERM;
2597 if (is_perfmon_prog_type(type) && !perfmon_capable())
1be7f75d
AS
2598 return -EPERM;
2599
290248a5
AN
2600 /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
2601 * or btf, we need to check which one it is
2602 */
2603 if (attr->attach_prog_fd) {
2604 dst_prog = bpf_prog_get(attr->attach_prog_fd);
2605 if (IS_ERR(dst_prog)) {
2606 dst_prog = NULL;
2607 attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd);
2608 if (IS_ERR(attach_btf))
2609 return -EINVAL;
2610 if (!btf_is_kernel(attach_btf)) {
8bdd8e27
AN
2611 /* attaching through specifying bpf_prog's BTF
2612 * objects directly might be supported eventually
2613 */
290248a5 2614 btf_put(attach_btf);
8bdd8e27 2615 return -ENOTSUPP;
290248a5
AN
2616 }
2617 }
2618 } else if (attr->attach_btf_id) {
2619 /* fall back to vmlinux BTF, if BTF type ID is specified */
2620 attach_btf = bpf_get_btf_vmlinux();
2621 if (IS_ERR(attach_btf))
2622 return PTR_ERR(attach_btf);
2623 if (!attach_btf)
2624 return -EINVAL;
2625 btf_get(attach_btf);
2626 }
2627
aac3fc32 2628 bpf_prog_load_fixup_attach_type(attr);
ccfe29eb 2629 if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
290248a5
AN
2630 attach_btf, attr->attach_btf_id,
2631 dst_prog)) {
2632 if (dst_prog)
2633 bpf_prog_put(dst_prog);
2634 if (attach_btf)
2635 btf_put(attach_btf);
5e43f899 2636 return -EINVAL;
290248a5 2637 }
5e43f899 2638
09756af4
AS
2639 /* plain bpf_prog allocation */
2640 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
290248a5
AN
2641 if (!prog) {
2642 if (dst_prog)
2643 bpf_prog_put(dst_prog);
2644 if (attach_btf)
2645 btf_put(attach_btf);
09756af4 2646 return -ENOMEM;
290248a5 2647 }
09756af4 2648
5e43f899 2649 prog->expected_attach_type = attr->expected_attach_type;
290248a5 2650 prog->aux->attach_btf = attach_btf;
ccfe29eb 2651 prog->aux->attach_btf_id = attr->attach_btf_id;
290248a5 2652 prog->aux->dst_prog = dst_prog;
2b3486bc 2653 prog->aux->dev_bound = !!attr->prog_ifindex;
1e6c62a8 2654 prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
c2f2cdbe 2655 prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
9a18eedb 2656
afdb09c7 2657 err = security_bpf_prog_alloc(prog->aux);
aaac3ba9 2658 if (err)
3ac1f01b 2659 goto free_prog;
afdb09c7 2660
3ac1f01b 2661 prog->aux->user = get_current_user();
09756af4
AS
2662 prog->len = attr->insn_cnt;
2663
2664 err = -EFAULT;
af2ac3e1
AS
2665 if (copy_from_bpfptr(prog->insns,
2666 make_bpfptr(attr->insns, uattr.is_kernel),
2667 bpf_prog_insn_size(prog)) != 0)
3ac1f01b 2668 goto free_prog_sec;
7f6719f7
AN
2669 /* copy eBPF program license from user space */
2670 if (strncpy_from_bpfptr(license,
2671 make_bpfptr(attr->license, uattr.is_kernel),
2672 sizeof(license) - 1) < 0)
2673 goto free_prog_sec;
2674 license[sizeof(license) - 1] = 0;
2675
2676 /* eBPF programs must be GPL compatible to use GPL-ed functions */
2677 prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0;
09756af4
AS
2678
2679 prog->orig_prog = NULL;
a91263d5 2680 prog->jited = 0;
09756af4 2681
85192dbf 2682 atomic64_set(&prog->aux->refcnt, 1);
09756af4 2683
9a18eedb 2684 if (bpf_prog_is_dev_bound(prog->aux)) {
2b3486bc 2685 err = bpf_prog_dev_bound_init(prog, attr);
ab3f0063 2686 if (err)
3ac1f01b 2687 goto free_prog_sec;
ab3f0063
JK
2688 }
2689
fd7c211d
THJ
2690 if (type == BPF_PROG_TYPE_EXT && dst_prog &&
2691 bpf_prog_is_dev_bound(dst_prog->aux)) {
2692 err = bpf_prog_dev_bound_inherit(prog, dst_prog);
ab3f0063 2693 if (err)
3ac1f01b 2694 goto free_prog_sec;
ab3f0063
JK
2695 }
2696
09756af4
AS
2697 /* find program type: socket_filter vs tracing_filter */
2698 err = find_prog_type(type, prog);
2699 if (err < 0)
3ac1f01b 2700 goto free_prog_sec;
09756af4 2701
9285ec4c 2702 prog->aux->load_time = ktime_get_boottime_ns();
8e7ae251
MKL
2703 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
2704 sizeof(attr->prog_name));
2705 if (err < 0)
3ac1f01b 2706 goto free_prog_sec;
cb4d2b3f 2707
09756af4 2708 /* run eBPF verifier */
47a71c1f 2709 err = bpf_check(&prog, attr, uattr, uattr_size);
09756af4
AS
2710 if (err < 0)
2711 goto free_used_maps;
2712
9facc336 2713 prog = bpf_prog_select_runtime(prog, &err);
04fd61ab
AS
2714 if (err < 0)
2715 goto free_used_maps;
09756af4 2716
dc4bb0e2
MKL
2717 err = bpf_prog_alloc_id(prog);
2718 if (err)
2719 goto free_used_maps;
2720
c751798a
DB
2721 /* Upon success of bpf_prog_alloc_id(), the BPF prog is
2722 * effectively publicly exposed. However, retrieving via
2723 * bpf_prog_get_fd_by_id() will take another reference,
2724 * therefore it cannot be gone underneath us.
2725 *
2726 * Only for the time /after/ successful bpf_prog_new_fd()
2727 * and before returning to userspace, we might just hold
2728 * one reference and any parallel close on that fd could
2729 * rip everything out. Hence, below notifications must
2730 * happen before bpf_prog_new_fd().
2731 *
2732 * Also, any failure handling from this point onwards must
2733 * be using bpf_prog_put() given the program is exposed.
2734 */
74451e66 2735 bpf_prog_kallsyms_add(prog);
6ee52e2a 2736 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
bae141f5 2737 bpf_audit_prog(prog, BPF_AUDIT_LOAD);
c751798a
DB
2738
2739 err = bpf_prog_new_fd(prog);
2740 if (err < 0)
2741 bpf_prog_put(prog);
09756af4
AS
2742 return err;
2743
2744free_used_maps:
cd7455f1
DB
2745 /* In case we have subprogs, we need to wait for a grace
2746 * period before we can tear down JIT memory since symbols
2747 * are already exposed under kallsyms.
2748 */
2749 __bpf_prog_put_noref(prog, prog->aux->func_cnt);
2750 return err;
afdb09c7 2751free_prog_sec:
3ac1f01b 2752 free_uid(prog->aux->user);
afdb09c7 2753 security_bpf_prog_free(prog->aux);
3ac1f01b 2754free_prog:
22dc4a0f
AN
2755 if (prog->aux->attach_btf)
2756 btf_put(prog->aux->attach_btf);
09756af4
AS
2757 bpf_prog_free(prog);
2758 return err;
2759}
2760
cb8edce2 2761#define BPF_OBJ_LAST_FIELD path_fd
b2197755
DB
2762
2763static int bpf_obj_pin(const union bpf_attr *attr)
2764{
cb8edce2
AN
2765 int path_fd;
2766
2767 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD)
2768 return -EINVAL;
2769
2770 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */
2771 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd)
b2197755
DB
2772 return -EINVAL;
2773
cb8edce2
AN
2774 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD;
2775 return bpf_obj_pin_user(attr->bpf_fd, path_fd,
2776 u64_to_user_ptr(attr->pathname));
b2197755
DB
2777}
2778
2779static int bpf_obj_get(const union bpf_attr *attr)
2780{
cb8edce2
AN
2781 int path_fd;
2782
6e71b04a 2783 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 ||
cb8edce2
AN
2784 attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD))
2785 return -EINVAL;
2786
2787 /* path_fd has to be accompanied by BPF_F_PATH_FD flag */
2788 if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd)
b2197755
DB
2789 return -EINVAL;
2790
cb8edce2
AN
2791 path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD;
2792 return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname),
6e71b04a 2793 attr->file_flags);
b2197755
DB
2794}
2795
f2e10bff 2796void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
a3b80e10 2797 const struct bpf_link_ops *ops, struct bpf_prog *prog)
fec56f58 2798{
70ed506c 2799 atomic64_set(&link->refcnt, 1);
f2e10bff 2800 link->type = type;
a3b80e10 2801 link->id = 0;
70ed506c
AN
2802 link->ops = ops;
2803 link->prog = prog;
2804}
2805
a3b80e10
AN
2806static void bpf_link_free_id(int id)
2807{
2808 if (!id)
2809 return;
2810
2811 spin_lock_bh(&link_idr_lock);
2812 idr_remove(&link_idr, id);
2813 spin_unlock_bh(&link_idr_lock);
2814}
2815
98868668
AN
2816/* Clean up bpf_link and corresponding anon_inode file and FD. After
2817 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred
a3b80e10
AN
2818 * anon_inode's release() call. This helper marksbpf_link as
2819 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt
2820 * is not decremented, it's the responsibility of a calling code that failed
2821 * to complete bpf_link initialization.
98868668 2822 */
a3b80e10 2823void bpf_link_cleanup(struct bpf_link_primer *primer)
babf3164 2824{
a3b80e10
AN
2825 primer->link->prog = NULL;
2826 bpf_link_free_id(primer->id);
2827 fput(primer->file);
2828 put_unused_fd(primer->fd);
babf3164
AN
2829}
2830
70ed506c
AN
2831void bpf_link_inc(struct bpf_link *link)
2832{
2833 atomic64_inc(&link->refcnt);
2834}
2835
2836/* bpf_link_free is guaranteed to be called from process context */
2837static void bpf_link_free(struct bpf_link *link)
2838{
a3b80e10 2839 bpf_link_free_id(link->id);
babf3164
AN
2840 if (link->prog) {
2841 /* detach BPF program, clean up used resources */
2842 link->ops->release(link);
2843 bpf_prog_put(link->prog);
2844 }
2845 /* free bpf_link and its containing memory */
2846 link->ops->dealloc(link);
70ed506c
AN
2847}
2848
2849static void bpf_link_put_deferred(struct work_struct *work)
2850{
2851 struct bpf_link *link = container_of(work, struct bpf_link, work);
2852
2853 bpf_link_free(link);
2854}
2855
ab5d47bd
SAS
2856/* bpf_link_put might be called from atomic context. It needs to be called
2857 * from sleepable context in order to acquire sleeping locks during the process.
70ed506c
AN
2858 */
2859void bpf_link_put(struct bpf_link *link)
2860{
2861 if (!atomic64_dec_and_test(&link->refcnt))
2862 return;
2863
ab5d47bd
SAS
2864 INIT_WORK(&link->work, bpf_link_put_deferred);
2865 schedule_work(&link->work);
70ed506c 2866}
cb80ddc6 2867EXPORT_SYMBOL(bpf_link_put);
70ed506c 2868
ab5d47bd
SAS
2869static void bpf_link_put_direct(struct bpf_link *link)
2870{
2871 if (!atomic64_dec_and_test(&link->refcnt))
2872 return;
2873 bpf_link_free(link);
2874}
2875
70ed506c
AN
2876static int bpf_link_release(struct inode *inode, struct file *filp)
2877{
2878 struct bpf_link *link = filp->private_data;
2879
ab5d47bd 2880 bpf_link_put_direct(link);
fec56f58
AS
2881 return 0;
2882}
2883
70ed506c 2884#ifdef CONFIG_PROC_FS
f2e10bff
AN
2885#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
2886#define BPF_MAP_TYPE(_id, _ops)
2887#define BPF_LINK_TYPE(_id, _name) [_id] = #_name,
2888static const char *bpf_link_type_strs[] = {
2889 [BPF_LINK_TYPE_UNSPEC] = "<invalid>",
2890#include <linux/bpf_types.h>
2891};
2892#undef BPF_PROG_TYPE
2893#undef BPF_MAP_TYPE
2894#undef BPF_LINK_TYPE
70ed506c
AN
2895
2896static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
2897{
2898 const struct bpf_link *link = filp->private_data;
2899 const struct bpf_prog *prog = link->prog;
2900 char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
70ed506c 2901
70ed506c
AN
2902 seq_printf(m,
2903 "link_type:\t%s\n"
68b04864 2904 "link_id:\t%u\n",
f2e10bff 2905 bpf_link_type_strs[link->type],
68b04864
KFL
2906 link->id);
2907 if (prog) {
2908 bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
2909 seq_printf(m,
2910 "prog_tag:\t%s\n"
2911 "prog_id:\t%u\n",
2912 prog_tag,
2913 prog->aux->id);
2914 }
f2e10bff
AN
2915 if (link->ops->show_fdinfo)
2916 link->ops->show_fdinfo(link, m);
70ed506c
AN
2917}
2918#endif
2919
6f302bfb 2920static const struct file_operations bpf_link_fops = {
70ed506c
AN
2921#ifdef CONFIG_PROC_FS
2922 .show_fdinfo = bpf_link_show_fdinfo,
2923#endif
2924 .release = bpf_link_release,
fec56f58
AS
2925 .read = bpf_dummy_read,
2926 .write = bpf_dummy_write,
2927};
2928
a3b80e10 2929static int bpf_link_alloc_id(struct bpf_link *link)
70ed506c 2930{
a3b80e10 2931 int id;
70ed506c 2932
a3b80e10
AN
2933 idr_preload(GFP_KERNEL);
2934 spin_lock_bh(&link_idr_lock);
2935 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC);
2936 spin_unlock_bh(&link_idr_lock);
2937 idr_preload_end();
70ed506c 2938
a3b80e10
AN
2939 return id;
2940}
2941
2942/* Prepare bpf_link to be exposed to user-space by allocating anon_inode file,
2943 * reserving unused FD and allocating ID from link_idr. This is to be paired
2944 * with bpf_link_settle() to install FD and ID and expose bpf_link to
2945 * user-space, if bpf_link is successfully attached. If not, bpf_link and
2946 * pre-allocated resources are to be freed with bpf_cleanup() call. All the
2947 * transient state is passed around in struct bpf_link_primer.
2948 * This is preferred way to create and initialize bpf_link, especially when
c561d110 2949 * there are complicated and expensive operations in between creating bpf_link
a3b80e10
AN
2950 * itself and attaching it to BPF hook. By using bpf_link_prime() and
2951 * bpf_link_settle() kernel code using bpf_link doesn't have to perform
2952 * expensive (and potentially failing) roll back operations in a rare case
2953 * that file, FD, or ID can't be allocated.
babf3164 2954 */
a3b80e10 2955int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)
babf3164
AN
2956{
2957 struct file *file;
a3b80e10 2958 int fd, id;
babf3164
AN
2959
2960 fd = get_unused_fd_flags(O_CLOEXEC);
2961 if (fd < 0)
a3b80e10 2962 return fd;
babf3164 2963
babf3164 2964
a3b80e10
AN
2965 id = bpf_link_alloc_id(link);
2966 if (id < 0) {
2967 put_unused_fd(fd);
a3b80e10
AN
2968 return id;
2969 }
babf3164
AN
2970
2971 file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
2972 if (IS_ERR(file)) {
138c6767 2973 bpf_link_free_id(id);
babf3164 2974 put_unused_fd(fd);
138c6767 2975 return PTR_ERR(file);
babf3164
AN
2976 }
2977
a3b80e10
AN
2978 primer->link = link;
2979 primer->file = file;
2980 primer->fd = fd;
2981 primer->id = id;
2982 return 0;
2983}
2984
2985int bpf_link_settle(struct bpf_link_primer *primer)
2986{
2987 /* make bpf_link fetchable by ID */
2988 spin_lock_bh(&link_idr_lock);
2989 primer->link->id = primer->id;
2990 spin_unlock_bh(&link_idr_lock);
2991 /* make bpf_link fetchable by FD */
2992 fd_install(primer->fd, primer->file);
2993 /* pass through installed FD */
2994 return primer->fd;
2995}
2996
2997int bpf_link_new_fd(struct bpf_link *link)
2998{
2999 return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
babf3164
AN
3000}
3001
70ed506c
AN
3002struct bpf_link *bpf_link_get_from_fd(u32 ufd)
3003{
3004 struct fd f = fdget(ufd);
3005 struct bpf_link *link;
3006
3007 if (!f.file)
3008 return ERR_PTR(-EBADF);
3009 if (f.file->f_op != &bpf_link_fops) {
3010 fdput(f);
3011 return ERR_PTR(-EINVAL);
3012 }
3013
3014 link = f.file->private_data;
3015 bpf_link_inc(link);
3016 fdput(f);
3017
3018 return link;
3019}
cb80ddc6 3020EXPORT_SYMBOL(bpf_link_get_from_fd);
70ed506c 3021
70ed506c 3022static void bpf_tracing_link_release(struct bpf_link *link)
babf3164 3023{
3aac1ead 3024 struct bpf_tracing_link *tr_link =
f7e0beaf 3025 container_of(link, struct bpf_tracing_link, link.link);
3aac1ead 3026
f7e0beaf 3027 WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
3aac1ead
THJ
3028 tr_link->trampoline));
3029
3030 bpf_trampoline_put(tr_link->trampoline);
3031
3032 /* tgt_prog is NULL if target is a kernel function */
3033 if (tr_link->tgt_prog)
3034 bpf_prog_put(tr_link->tgt_prog);
babf3164
AN
3035}
3036
3037static void bpf_tracing_link_dealloc(struct bpf_link *link)
70ed506c
AN
3038{
3039 struct bpf_tracing_link *tr_link =
f7e0beaf 3040 container_of(link, struct bpf_tracing_link, link.link);
70ed506c 3041
70ed506c
AN
3042 kfree(tr_link);
3043}
3044
f2e10bff
AN
3045static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
3046 struct seq_file *seq)
3047{
3048 struct bpf_tracing_link *tr_link =
f7e0beaf 3049 container_of(link, struct bpf_tracing_link, link.link);
e859e429 3050 u32 target_btf_id, target_obj_id;
f2e10bff 3051
e859e429
YS
3052 bpf_trampoline_unpack_key(tr_link->trampoline->key,
3053 &target_obj_id, &target_btf_id);
f2e10bff 3054 seq_printf(seq,
e859e429
YS
3055 "attach_type:\t%d\n"
3056 "target_obj_id:\t%u\n"
3057 "target_btf_id:\t%u\n",
3058 tr_link->attach_type,
3059 target_obj_id,
3060 target_btf_id);
f2e10bff
AN
3061}
3062
3063static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
3064 struct bpf_link_info *info)
3065{
3066 struct bpf_tracing_link *tr_link =
f7e0beaf 3067 container_of(link, struct bpf_tracing_link, link.link);
f2e10bff
AN
3068
3069 info->tracing.attach_type = tr_link->attach_type;
441e8c66
THJ
3070 bpf_trampoline_unpack_key(tr_link->trampoline->key,
3071 &info->tracing.target_obj_id,
3072 &info->tracing.target_btf_id);
f2e10bff
AN
3073
3074 return 0;
3075}
3076
70ed506c
AN
3077static const struct bpf_link_ops bpf_tracing_link_lops = {
3078 .release = bpf_tracing_link_release,
babf3164 3079 .dealloc = bpf_tracing_link_dealloc,
f2e10bff
AN
3080 .show_fdinfo = bpf_tracing_link_show_fdinfo,
3081 .fill_link_info = bpf_tracing_link_fill_link_info,
70ed506c
AN
3082};
3083
4a1e7c0c
THJ
3084static int bpf_tracing_prog_attach(struct bpf_prog *prog,
3085 int tgt_prog_fd,
2fcc8241
KFL
3086 u32 btf_id,
3087 u64 bpf_cookie)
fec56f58 3088{
a3b80e10 3089 struct bpf_link_primer link_primer;
3aac1ead 3090 struct bpf_prog *tgt_prog = NULL;
4a1e7c0c 3091 struct bpf_trampoline *tr = NULL;
70ed506c 3092 struct bpf_tracing_link *link;
4a1e7c0c 3093 u64 key = 0;
a3b80e10 3094 int err;
fec56f58 3095
9e4e01df
KS
3096 switch (prog->type) {
3097 case BPF_PROG_TYPE_TRACING:
3098 if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
3099 prog->expected_attach_type != BPF_TRACE_FEXIT &&
3100 prog->expected_attach_type != BPF_MODIFY_RETURN) {
3101 err = -EINVAL;
3102 goto out_put_prog;
3103 }
3104 break;
3105 case BPF_PROG_TYPE_EXT:
3106 if (prog->expected_attach_type != 0) {
3107 err = -EINVAL;
3108 goto out_put_prog;
3109 }
3110 break;
3111 case BPF_PROG_TYPE_LSM:
3112 if (prog->expected_attach_type != BPF_LSM_MAC) {
3113 err = -EINVAL;
3114 goto out_put_prog;
3115 }
3116 break;
3117 default:
fec56f58
AS
3118 err = -EINVAL;
3119 goto out_put_prog;
3120 }
3121
4a1e7c0c
THJ
3122 if (!!tgt_prog_fd != !!btf_id) {
3123 err = -EINVAL;
3124 goto out_put_prog;
3125 }
3126
3127 if (tgt_prog_fd) {
3128 /* For now we only allow new targets for BPF_PROG_TYPE_EXT */
3129 if (prog->type != BPF_PROG_TYPE_EXT) {
3130 err = -EINVAL;
3131 goto out_put_prog;
3132 }
3133
3134 tgt_prog = bpf_prog_get(tgt_prog_fd);
3135 if (IS_ERR(tgt_prog)) {
3136 err = PTR_ERR(tgt_prog);
3137 tgt_prog = NULL;
3138 goto out_put_prog;
3139 }
3140
22dc4a0f 3141 key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id);
4a1e7c0c
THJ
3142 }
3143
70ed506c
AN
3144 link = kzalloc(sizeof(*link), GFP_USER);
3145 if (!link) {
3146 err = -ENOMEM;
3147 goto out_put_prog;
3148 }
f7e0beaf 3149 bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING,
f2e10bff
AN
3150 &bpf_tracing_link_lops, prog);
3151 link->attach_type = prog->expected_attach_type;
2fcc8241 3152 link->link.cookie = bpf_cookie;
70ed506c 3153
3aac1ead
THJ
3154 mutex_lock(&prog->aux->dst_mutex);
3155
4a1e7c0c
THJ
3156 /* There are a few possible cases here:
3157 *
3158 * - if prog->aux->dst_trampoline is set, the program was just loaded
3159 * and not yet attached to anything, so we can use the values stored
3160 * in prog->aux
3161 *
3162 * - if prog->aux->dst_trampoline is NULL, the program has already been
3163 * attached to a target and its initial target was cleared (below)
3164 *
3165 * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
3166 * target_btf_id using the link_create API.
3167 *
3168 * - if tgt_prog == NULL when this function was called using the old
f3a95075
JO
3169 * raw_tracepoint_open API, and we need a target from prog->aux
3170 *
3171 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program
3172 * was detached and is going for re-attachment.
4a1e7c0c
THJ
3173 */
3174 if (!prog->aux->dst_trampoline && !tgt_prog) {
f3a95075
JO
3175 /*
3176 * Allow re-attach for TRACING and LSM programs. If it's
3177 * currently linked, bpf_trampoline_link_prog will fail.
3178 * EXT programs need to specify tgt_prog_fd, so they
3179 * re-attach in separate code path.
3180 */
3181 if (prog->type != BPF_PROG_TYPE_TRACING &&
3182 prog->type != BPF_PROG_TYPE_LSM) {
3183 err = -EINVAL;
3184 goto out_unlock;
3185 }
3186 btf_id = prog->aux->attach_btf_id;
3187 key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id);
babf3164 3188 }
4a1e7c0c
THJ
3189
3190 if (!prog->aux->dst_trampoline ||
3191 (key && key != prog->aux->dst_trampoline->key)) {
3192 /* If there is no saved target, or the specified target is
3193 * different from the destination specified at load time, we
3194 * need a new trampoline and a check for compatibility
3195 */
3196 struct bpf_attach_target_info tgt_info = {};
3197
3198 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id,
3199 &tgt_info);
3200 if (err)
3201 goto out_unlock;
3202
31bf1dbc
VM
3203 if (tgt_info.tgt_mod) {
3204 module_put(prog->aux->mod);
3205 prog->aux->mod = tgt_info.tgt_mod;
3206 }
3207
4a1e7c0c
THJ
3208 tr = bpf_trampoline_get(key, &tgt_info);
3209 if (!tr) {
3210 err = -ENOMEM;
3211 goto out_unlock;
3212 }
3213 } else {
3214 /* The caller didn't specify a target, or the target was the
3215 * same as the destination supplied during program load. This
3216 * means we can reuse the trampoline and reference from program
3217 * load time, and there is no need to allocate a new one. This
3218 * can only happen once for any program, as the saved values in
3219 * prog->aux are cleared below.
3220 */
3221 tr = prog->aux->dst_trampoline;
3222 tgt_prog = prog->aux->dst_prog;
3223 }
3aac1ead 3224
f7e0beaf 3225 err = bpf_link_prime(&link->link.link, &link_primer);
3aac1ead
THJ
3226 if (err)
3227 goto out_unlock;
fec56f58 3228
f7e0beaf 3229 err = bpf_trampoline_link_prog(&link->link, tr);
babf3164 3230 if (err) {
a3b80e10 3231 bpf_link_cleanup(&link_primer);
3aac1ead
THJ
3232 link = NULL;
3233 goto out_unlock;
fec56f58 3234 }
babf3164 3235
3aac1ead
THJ
3236 link->tgt_prog = tgt_prog;
3237 link->trampoline = tr;
3238
4a1e7c0c
THJ
3239 /* Always clear the trampoline and target prog from prog->aux to make
3240 * sure the original attach destination is not kept alive after a
3241 * program is (re-)attached to another target.
3242 */
3243 if (prog->aux->dst_prog &&
3244 (tgt_prog_fd || tr != prog->aux->dst_trampoline))
3245 /* got extra prog ref from syscall, or attaching to different prog */
3246 bpf_prog_put(prog->aux->dst_prog);
3247 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline)
3248 /* we allocated a new trampoline, so free the old one */
3249 bpf_trampoline_put(prog->aux->dst_trampoline);
3250
3aac1ead
THJ
3251 prog->aux->dst_prog = NULL;
3252 prog->aux->dst_trampoline = NULL;
3253 mutex_unlock(&prog->aux->dst_mutex);
3254
a3b80e10 3255 return bpf_link_settle(&link_primer);
3aac1ead 3256out_unlock:
4a1e7c0c
THJ
3257 if (tr && tr != prog->aux->dst_trampoline)
3258 bpf_trampoline_put(tr);
3aac1ead
THJ
3259 mutex_unlock(&prog->aux->dst_mutex);
3260 kfree(link);
fec56f58 3261out_put_prog:
4a1e7c0c
THJ
3262 if (tgt_prog_fd && tgt_prog)
3263 bpf_prog_put(tgt_prog);
fec56f58
AS
3264 return err;
3265}
3266
70ed506c
AN
3267struct bpf_raw_tp_link {
3268 struct bpf_link link;
c4f6699d 3269 struct bpf_raw_event_map *btp;
c4f6699d
AS
3270};
3271
70ed506c 3272static void bpf_raw_tp_link_release(struct bpf_link *link)
c4f6699d 3273{
70ed506c
AN
3274 struct bpf_raw_tp_link *raw_tp =
3275 container_of(link, struct bpf_raw_tp_link, link);
c4f6699d 3276
70ed506c 3277 bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
a38d1107 3278 bpf_put_raw_tracepoint(raw_tp->btp);
babf3164
AN
3279}
3280
3281static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
3282{
3283 struct bpf_raw_tp_link *raw_tp =
3284 container_of(link, struct bpf_raw_tp_link, link);
3285
c4f6699d 3286 kfree(raw_tp);
c4f6699d
AS
3287}
3288
f2e10bff
AN
3289static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link,
3290 struct seq_file *seq)
3291{
3292 struct bpf_raw_tp_link *raw_tp_link =
3293 container_of(link, struct bpf_raw_tp_link, link);
3294
3295 seq_printf(seq,
3296 "tp_name:\t%s\n",
3297 raw_tp_link->btp->tp->name);
3298}
3299
57d48537
YS
3300static int bpf_copy_to_user(char __user *ubuf, const char *buf, u32 ulen,
3301 u32 len)
3302{
3303 if (ulen >= len + 1) {
3304 if (copy_to_user(ubuf, buf, len + 1))
3305 return -EFAULT;
3306 } else {
3307 char zero = '\0';
3308
3309 if (copy_to_user(ubuf, buf, ulen - 1))
3310 return -EFAULT;
3311 if (put_user(zero, ubuf + ulen - 1))
3312 return -EFAULT;
3313 return -ENOSPC;
3314 }
3315
3316 return 0;
3317}
3318
f2e10bff
AN
3319static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
3320 struct bpf_link_info *info)
3321{
3322 struct bpf_raw_tp_link *raw_tp_link =
3323 container_of(link, struct bpf_raw_tp_link, link);
3324 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name);
3325 const char *tp_name = raw_tp_link->btp->tp->name;
3326 u32 ulen = info->raw_tracepoint.tp_name_len;
3327 size_t tp_len = strlen(tp_name);
3328
b474959d 3329 if (!ulen ^ !ubuf)
f2e10bff
AN
3330 return -EINVAL;
3331
3332 info->raw_tracepoint.tp_name_len = tp_len + 1;
3333
3334 if (!ubuf)
3335 return 0;
3336
57d48537 3337 return bpf_copy_to_user(ubuf, tp_name, ulen, tp_len);
f2e10bff
AN
3338}
3339
a3b80e10 3340static const struct bpf_link_ops bpf_raw_tp_link_lops = {
70ed506c 3341 .release = bpf_raw_tp_link_release,
babf3164 3342 .dealloc = bpf_raw_tp_link_dealloc,
f2e10bff
AN
3343 .show_fdinfo = bpf_raw_tp_link_show_fdinfo,
3344 .fill_link_info = bpf_raw_tp_link_fill_link_info,
c4f6699d
AS
3345};
3346
b89fbfbb
AN
3347#ifdef CONFIG_PERF_EVENTS
3348struct bpf_perf_link {
3349 struct bpf_link link;
3350 struct file *perf_file;
3351};
3352
3353static void bpf_perf_link_release(struct bpf_link *link)
3354{
3355 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
3356 struct perf_event *event = perf_link->perf_file->private_data;
3357
3358 perf_event_free_bpf_prog(event);
3359 fput(perf_link->perf_file);
3360}
3361
3362static void bpf_perf_link_dealloc(struct bpf_link *link)
3363{
3364 struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
3365
3366 kfree(perf_link);
3367}
3368
1b715e1b
YS
3369static int bpf_perf_link_fill_common(const struct perf_event *event,
3370 char __user *uname, u32 ulen,
3371 u64 *probe_offset, u64 *probe_addr,
3372 u32 *fd_type)
3373{
3374 const char *buf;
3375 u32 prog_id;
3376 size_t len;
3377 int err;
3378
3379 if (!ulen ^ !uname)
3380 return -EINVAL;
1b715e1b
YS
3381
3382 err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf,
3383 probe_offset, probe_addr);
3384 if (err)
3385 return err;
0aa35162
YS
3386 if (!uname)
3387 return 0;
1b715e1b
YS
3388 if (buf) {
3389 len = strlen(buf);
3390 err = bpf_copy_to_user(uname, buf, ulen, len);
3391 if (err)
3392 return err;
3393 } else {
3394 char zero = '\0';
3395
3396 if (put_user(zero, uname))
3397 return -EFAULT;
3398 }
3399 return 0;
3400}
3401
3402#ifdef CONFIG_KPROBE_EVENTS
3403static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
3404 struct bpf_link_info *info)
3405{
3406 char __user *uname;
3407 u64 addr, offset;
3408 u32 ulen, type;
3409 int err;
3410
3411 uname = u64_to_user_ptr(info->perf_event.kprobe.func_name);
3412 ulen = info->perf_event.kprobe.name_len;
3413 err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
3414 &type);
3415 if (err)
3416 return err;
3417 if (type == BPF_FD_TYPE_KRETPROBE)
3418 info->perf_event.type = BPF_PERF_EVENT_KRETPROBE;
3419 else
3420 info->perf_event.type = BPF_PERF_EVENT_KPROBE;
3421
3422 info->perf_event.kprobe.offset = offset;
3423 if (!kallsyms_show_value(current_cred()))
3424 addr = 0;
3425 info->perf_event.kprobe.addr = addr;
3426 return 0;
3427}
3428#endif
3429
3430#ifdef CONFIG_UPROBE_EVENTS
3431static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
3432 struct bpf_link_info *info)
3433{
3434 char __user *uname;
3435 u64 addr, offset;
3436 u32 ulen, type;
3437 int err;
3438
3439 uname = u64_to_user_ptr(info->perf_event.uprobe.file_name);
3440 ulen = info->perf_event.uprobe.name_len;
3441 err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
3442 &type);
3443 if (err)
3444 return err;
3445
3446 if (type == BPF_FD_TYPE_URETPROBE)
3447 info->perf_event.type = BPF_PERF_EVENT_URETPROBE;
3448 else
3449 info->perf_event.type = BPF_PERF_EVENT_UPROBE;
3450 info->perf_event.uprobe.offset = offset;
3451 return 0;
3452}
3453#endif
3454
3455static int bpf_perf_link_fill_probe(const struct perf_event *event,
3456 struct bpf_link_info *info)
3457{
3458#ifdef CONFIG_KPROBE_EVENTS
3459 if (event->tp_event->flags & TRACE_EVENT_FL_KPROBE)
3460 return bpf_perf_link_fill_kprobe(event, info);
3461#endif
3462#ifdef CONFIG_UPROBE_EVENTS
3463 if (event->tp_event->flags & TRACE_EVENT_FL_UPROBE)
3464 return bpf_perf_link_fill_uprobe(event, info);
3465#endif
3466 return -EOPNOTSUPP;
3467}
3468
3469static int bpf_perf_link_fill_tracepoint(const struct perf_event *event,
3470 struct bpf_link_info *info)
3471{
3472 char __user *uname;
3473 u32 ulen;
3474
3475 uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name);
3476 ulen = info->perf_event.tracepoint.name_len;
3477 info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT;
3478 return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL);
3479}
3480
3481static int bpf_perf_link_fill_perf_event(const struct perf_event *event,
3482 struct bpf_link_info *info)
3483{
3484 info->perf_event.event.type = event->attr.type;
3485 info->perf_event.event.config = event->attr.config;
3486 info->perf_event.type = BPF_PERF_EVENT_EVENT;
3487 return 0;
3488}
3489
3490static int bpf_perf_link_fill_link_info(const struct bpf_link *link,
3491 struct bpf_link_info *info)
3492{
3493 struct bpf_perf_link *perf_link;
3494 const struct perf_event *event;
3495
3496 perf_link = container_of(link, struct bpf_perf_link, link);
3497 event = perf_get_event(perf_link->perf_file);
3498 if (IS_ERR(event))
3499 return PTR_ERR(event);
3500
3501 switch (event->prog->type) {
3502 case BPF_PROG_TYPE_PERF_EVENT:
3503 return bpf_perf_link_fill_perf_event(event, info);
3504 case BPF_PROG_TYPE_TRACEPOINT:
3505 return bpf_perf_link_fill_tracepoint(event, info);
3506 case BPF_PROG_TYPE_KPROBE:
3507 return bpf_perf_link_fill_probe(event, info);
3508 default:
3509 return -EOPNOTSUPP;
3510 }
3511}
3512
b89fbfbb
AN
3513static const struct bpf_link_ops bpf_perf_link_lops = {
3514 .release = bpf_perf_link_release,
3515 .dealloc = bpf_perf_link_dealloc,
1b715e1b 3516 .fill_link_info = bpf_perf_link_fill_link_info,
b89fbfbb
AN
3517};
3518
3519static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
3520{
3521 struct bpf_link_primer link_primer;
3522 struct bpf_perf_link *link;
3523 struct perf_event *event;
3524 struct file *perf_file;
3525 int err;
3526
3527 if (attr->link_create.flags)
3528 return -EINVAL;
3529
3530 perf_file = perf_event_get(attr->link_create.target_fd);
3531 if (IS_ERR(perf_file))
3532 return PTR_ERR(perf_file);
3533
3534 link = kzalloc(sizeof(*link), GFP_USER);
3535 if (!link) {
3536 err = -ENOMEM;
3537 goto out_put_file;
3538 }
3539 bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog);
3540 link->perf_file = perf_file;
3541
3542 err = bpf_link_prime(&link->link, &link_primer);
3543 if (err) {
3544 kfree(link);
3545 goto out_put_file;
3546 }
3547
3548 event = perf_file->private_data;
82e6b1ee 3549 err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie);
b89fbfbb
AN
3550 if (err) {
3551 bpf_link_cleanup(&link_primer);
3552 goto out_put_file;
3553 }
3554 /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */
3555 bpf_prog_inc(prog);
3556
3557 return bpf_link_settle(&link_primer);
3558
3559out_put_file:
3560 fput(perf_file);
3561 return err;
3562}
0dcac272
JO
3563#else
3564static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
3565{
3566 return -EOPNOTSUPP;
3567}
b89fbfbb
AN
3568#endif /* CONFIG_PERF_EVENTS */
3569
df86ca0d
AN
3570static int bpf_raw_tp_link_attach(struct bpf_prog *prog,
3571 const char __user *user_tp_name)
c4f6699d 3572{
a3b80e10 3573 struct bpf_link_primer link_primer;
babf3164 3574 struct bpf_raw_tp_link *link;
c4f6699d 3575 struct bpf_raw_event_map *btp;
ac4414b5
AS
3576 const char *tp_name;
3577 char buf[128];
a3b80e10 3578 int err;
c4f6699d 3579
9e4e01df
KS
3580 switch (prog->type) {
3581 case BPF_PROG_TYPE_TRACING:
3582 case BPF_PROG_TYPE_EXT:
3583 case BPF_PROG_TYPE_LSM:
df86ca0d 3584 if (user_tp_name)
fec56f58
AS
3585 /* The attach point for this category of programs
3586 * should be specified via btf_id during program load.
ac4414b5 3587 */
df86ca0d 3588 return -EINVAL;
9e4e01df
KS
3589 if (prog->type == BPF_PROG_TYPE_TRACING &&
3590 prog->expected_attach_type == BPF_TRACE_RAW_TP) {
fec56f58 3591 tp_name = prog->aux->attach_func_name;
9e4e01df
KS
3592 break;
3593 }
2fcc8241 3594 return bpf_tracing_prog_attach(prog, 0, 0, 0);
9e4e01df
KS
3595 case BPF_PROG_TYPE_RAW_TRACEPOINT:
3596 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
df86ca0d
AN
3597 if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0)
3598 return -EFAULT;
ac4414b5
AS
3599 buf[sizeof(buf) - 1] = 0;
3600 tp_name = buf;
9e4e01df
KS
3601 break;
3602 default:
df86ca0d 3603 return -EINVAL;
ac4414b5 3604 }
c4f6699d 3605
a38d1107 3606 btp = bpf_get_raw_tracepoint(tp_name);
df86ca0d
AN
3607 if (!btp)
3608 return -ENOENT;
c4f6699d 3609
babf3164
AN
3610 link = kzalloc(sizeof(*link), GFP_USER);
3611 if (!link) {
a38d1107
MM
3612 err = -ENOMEM;
3613 goto out_put_btp;
3614 }
f2e10bff
AN
3615 bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT,
3616 &bpf_raw_tp_link_lops, prog);
babf3164 3617 link->btp = btp;
c4f6699d 3618
a3b80e10
AN
3619 err = bpf_link_prime(&link->link, &link_primer);
3620 if (err) {
babf3164 3621 kfree(link);
babf3164
AN
3622 goto out_put_btp;
3623 }
c4f6699d 3624
babf3164
AN
3625 err = bpf_probe_register(link->btp, prog);
3626 if (err) {
a3b80e10 3627 bpf_link_cleanup(&link_primer);
babf3164 3628 goto out_put_btp;
c4f6699d 3629 }
babf3164 3630
a3b80e10 3631 return bpf_link_settle(&link_primer);
c4f6699d 3632
a38d1107
MM
3633out_put_btp:
3634 bpf_put_raw_tracepoint(btp);
c4f6699d
AS
3635 return err;
3636}
3637
df86ca0d
AN
3638#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
3639
3640static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
3641{
3642 struct bpf_prog *prog;
3643 int fd;
3644
3645 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
3646 return -EINVAL;
3647
3648 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
3649 if (IS_ERR(prog))
3650 return PTR_ERR(prog);
3651
3652 fd = bpf_raw_tp_link_attach(prog, u64_to_user_ptr(attr->raw_tracepoint.name));
3653 if (fd < 0)
3654 bpf_prog_put(prog);
3655 return fd;
3656}
3657
33491588
AR
3658static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
3659 enum bpf_attach_type attach_type)
3660{
3661 switch (prog->type) {
3662 case BPF_PROG_TYPE_CGROUP_SOCK:
3663 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
0d01da6a 3664 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
e9ddbb77 3665 case BPF_PROG_TYPE_SK_LOOKUP:
33491588 3666 return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
5cf1e914 3667 case BPF_PROG_TYPE_CGROUP_SKB:
2c78ee89
AS
3668 if (!capable(CAP_NET_ADMIN))
3669 /* cg-skb progs can be loaded by unpriv user.
3670 * check permissions at attach time.
3671 */
3672 return -EPERM;
5cf1e914 3673 return prog->enforce_expected_attach_type &&
3674 prog->expected_attach_type != attach_type ?
3675 -EINVAL : 0;
db8eae6b
JO
3676 case BPF_PROG_TYPE_KPROBE:
3677 if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI &&
3678 attach_type != BPF_TRACE_KPROBE_MULTI)
3679 return -EINVAL;
3680 return 0;
33491588
AR
3681 default:
3682 return 0;
3683 }
3684}
3685
e28784e3
AN
3686static enum bpf_prog_type
3687attach_type_to_prog_type(enum bpf_attach_type attach_type)
f4324551 3688{
e28784e3 3689 switch (attach_type) {
f4324551
DM
3690 case BPF_CGROUP_INET_INGRESS:
3691 case BPF_CGROUP_INET_EGRESS:
e28784e3 3692 return BPF_PROG_TYPE_CGROUP_SKB;
61023658 3693 case BPF_CGROUP_INET_SOCK_CREATE:
f5836749 3694 case BPF_CGROUP_INET_SOCK_RELEASE:
aac3fc32
AI
3695 case BPF_CGROUP_INET4_POST_BIND:
3696 case BPF_CGROUP_INET6_POST_BIND:
e28784e3 3697 return BPF_PROG_TYPE_CGROUP_SOCK;
4fbac77d
AI
3698 case BPF_CGROUP_INET4_BIND:
3699 case BPF_CGROUP_INET6_BIND:
d74bad4e
AI
3700 case BPF_CGROUP_INET4_CONNECT:
3701 case BPF_CGROUP_INET6_CONNECT:
1b66d253
DB
3702 case BPF_CGROUP_INET4_GETPEERNAME:
3703 case BPF_CGROUP_INET6_GETPEERNAME:
3704 case BPF_CGROUP_INET4_GETSOCKNAME:
3705 case BPF_CGROUP_INET6_GETSOCKNAME:
1cedee13
AI
3706 case BPF_CGROUP_UDP4_SENDMSG:
3707 case BPF_CGROUP_UDP6_SENDMSG:
983695fa
DB
3708 case BPF_CGROUP_UDP4_RECVMSG:
3709 case BPF_CGROUP_UDP6_RECVMSG:
e28784e3 3710 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
40304b2a 3711 case BPF_CGROUP_SOCK_OPS:
e28784e3 3712 return BPF_PROG_TYPE_SOCK_OPS;
ebc614f6 3713 case BPF_CGROUP_DEVICE:
e28784e3 3714 return BPF_PROG_TYPE_CGROUP_DEVICE;
4f738adb 3715 case BPF_SK_MSG_VERDICT:
e28784e3 3716 return BPF_PROG_TYPE_SK_MSG;
464bc0fd
JF
3717 case BPF_SK_SKB_STREAM_PARSER:
3718 case BPF_SK_SKB_STREAM_VERDICT:
a7ba4558 3719 case BPF_SK_SKB_VERDICT:
e28784e3 3720 return BPF_PROG_TYPE_SK_SKB;
f4364dcf 3721 case BPF_LIRC_MODE2:
e28784e3 3722 return BPF_PROG_TYPE_LIRC_MODE2;
d58e468b 3723 case BPF_FLOW_DISSECTOR:
e28784e3 3724 return BPF_PROG_TYPE_FLOW_DISSECTOR;
7b146ceb 3725 case BPF_CGROUP_SYSCTL:
e28784e3 3726 return BPF_PROG_TYPE_CGROUP_SYSCTL;
0d01da6a
SF
3727 case BPF_CGROUP_GETSOCKOPT:
3728 case BPF_CGROUP_SETSOCKOPT:
e28784e3 3729 return BPF_PROG_TYPE_CGROUP_SOCKOPT;
de4e05ca 3730 case BPF_TRACE_ITER:
df86ca0d
AN
3731 case BPF_TRACE_RAW_TP:
3732 case BPF_TRACE_FENTRY:
3733 case BPF_TRACE_FEXIT:
3734 case BPF_MODIFY_RETURN:
de4e05ca 3735 return BPF_PROG_TYPE_TRACING;
df86ca0d
AN
3736 case BPF_LSM_MAC:
3737 return BPF_PROG_TYPE_LSM;
e9ddbb77
JS
3738 case BPF_SK_LOOKUP:
3739 return BPF_PROG_TYPE_SK_LOOKUP;
aa8d3a71
AN
3740 case BPF_XDP:
3741 return BPF_PROG_TYPE_XDP;
69fd337a
SF
3742 case BPF_LSM_CGROUP:
3743 return BPF_PROG_TYPE_LSM;
e420bed0
DB
3744 case BPF_TCX_INGRESS:
3745 case BPF_TCX_EGRESS:
3746 return BPF_PROG_TYPE_SCHED_CLS;
f4324551 3747 default:
e28784e3 3748 return BPF_PROG_TYPE_UNSPEC;
f4324551 3749 }
e28784e3
AN
3750}
3751
e420bed0
DB
3752#define BPF_PROG_ATTACH_LAST_FIELD expected_revision
3753
3754#define BPF_F_ATTACH_MASK_BASE \
3755 (BPF_F_ALLOW_OVERRIDE | \
3756 BPF_F_ALLOW_MULTI | \
3757 BPF_F_REPLACE)
e28784e3 3758
e420bed0
DB
3759#define BPF_F_ATTACH_MASK_MPROG \
3760 (BPF_F_REPLACE | \
3761 BPF_F_BEFORE | \
3762 BPF_F_AFTER | \
3763 BPF_F_ID | \
3764 BPF_F_LINK)
e28784e3
AN
3765
3766static int bpf_prog_attach(const union bpf_attr *attr)
3767{
3768 enum bpf_prog_type ptype;
3769 struct bpf_prog *prog;
e420bed0 3770 u32 mask;
e28784e3
AN
3771 int ret;
3772
e28784e3
AN
3773 if (CHECK_ATTR(BPF_PROG_ATTACH))
3774 return -EINVAL;
3775
e28784e3
AN
3776 ptype = attach_type_to_prog_type(attr->attach_type);
3777 if (ptype == BPF_PROG_TYPE_UNSPEC)
3778 return -EINVAL;
e420bed0
DB
3779 mask = bpf_mprog_supported(ptype) ?
3780 BPF_F_ATTACH_MASK_MPROG : BPF_F_ATTACH_MASK_BASE;
3781 if (attr->attach_flags & ~mask)
3782 return -EINVAL;
f4324551 3783
b2cd1257
DA
3784 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
3785 if (IS_ERR(prog))
3786 return PTR_ERR(prog);
3787
5e43f899
AI
3788 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
3789 bpf_prog_put(prog);
3790 return -EINVAL;
3791 }
3792
fdb5c453
SY
3793 switch (ptype) {
3794 case BPF_PROG_TYPE_SK_SKB:
3795 case BPF_PROG_TYPE_SK_MSG:
604326b4 3796 ret = sock_map_get_from_fd(attr, prog);
fdb5c453
SY
3797 break;
3798 case BPF_PROG_TYPE_LIRC_MODE2:
3799 ret = lirc_prog_attach(attr, prog);
3800 break;
d58e468b 3801 case BPF_PROG_TYPE_FLOW_DISSECTOR:
a3fd7cee 3802 ret = netns_bpf_prog_attach(attr, prog);
d58e468b 3803 break;
e28784e3
AN
3804 case BPF_PROG_TYPE_CGROUP_DEVICE:
3805 case BPF_PROG_TYPE_CGROUP_SKB:
3806 case BPF_PROG_TYPE_CGROUP_SOCK:
3807 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3808 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3809 case BPF_PROG_TYPE_CGROUP_SYSCTL:
3810 case BPF_PROG_TYPE_SOCK_OPS:
69fd337a
SF
3811 case BPF_PROG_TYPE_LSM:
3812 if (ptype == BPF_PROG_TYPE_LSM &&
3813 prog->expected_attach_type != BPF_LSM_CGROUP)
e89f3edf
ML
3814 ret = -EINVAL;
3815 else
3816 ret = cgroup_bpf_prog_attach(attr, ptype, prog);
e28784e3 3817 break;
e420bed0
DB
3818 case BPF_PROG_TYPE_SCHED_CLS:
3819 ret = tcx_prog_attach(attr, prog);
3820 break;
e28784e3
AN
3821 default:
3822 ret = -EINVAL;
b2cd1257
DA
3823 }
3824
7f677633
AS
3825 if (ret)
3826 bpf_prog_put(prog);
7f677633 3827 return ret;
f4324551
DM
3828}
3829
e420bed0 3830#define BPF_PROG_DETACH_LAST_FIELD expected_revision
f4324551
DM
3831
3832static int bpf_prog_detach(const union bpf_attr *attr)
3833{
e420bed0 3834 struct bpf_prog *prog = NULL;
324bda9e 3835 enum bpf_prog_type ptype;
e420bed0 3836 int ret;
f4324551 3837
f4324551
DM
3838 if (CHECK_ATTR(BPF_PROG_DETACH))
3839 return -EINVAL;
3840
e28784e3 3841 ptype = attach_type_to_prog_type(attr->attach_type);
e420bed0
DB
3842 if (bpf_mprog_supported(ptype)) {
3843 if (ptype == BPF_PROG_TYPE_UNSPEC)
3844 return -EINVAL;
3845 if (attr->attach_flags & ~BPF_F_ATTACH_MASK_MPROG)
3846 return -EINVAL;
3847 if (attr->attach_bpf_fd) {
3848 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
3849 if (IS_ERR(prog))
3850 return PTR_ERR(prog);
3851 }
3852 }
e28784e3
AN
3853
3854 switch (ptype) {
3855 case BPF_PROG_TYPE_SK_MSG:
3856 case BPF_PROG_TYPE_SK_SKB:
e420bed0
DB
3857 ret = sock_map_prog_detach(attr, ptype);
3858 break;
e28784e3 3859 case BPF_PROG_TYPE_LIRC_MODE2:
e420bed0
DB
3860 ret = lirc_prog_detach(attr);
3861 break;
e28784e3 3862 case BPF_PROG_TYPE_FLOW_DISSECTOR:
e420bed0
DB
3863 ret = netns_bpf_prog_detach(attr, ptype);
3864 break;
e28784e3
AN
3865 case BPF_PROG_TYPE_CGROUP_DEVICE:
3866 case BPF_PROG_TYPE_CGROUP_SKB:
3867 case BPF_PROG_TYPE_CGROUP_SOCK:
3868 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3869 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3870 case BPF_PROG_TYPE_CGROUP_SYSCTL:
3871 case BPF_PROG_TYPE_SOCK_OPS:
69fd337a 3872 case BPF_PROG_TYPE_LSM:
e420bed0
DB
3873 ret = cgroup_bpf_prog_detach(attr, ptype);
3874 break;
3875 case BPF_PROG_TYPE_SCHED_CLS:
3876 ret = tcx_prog_detach(attr, prog);
3877 break;
f4324551 3878 default:
e420bed0 3879 ret = -EINVAL;
f4324551 3880 }
e420bed0
DB
3881
3882 if (prog)
3883 bpf_prog_put(prog);
3884 return ret;
f4324551 3885}
40304b2a 3886
e420bed0 3887#define BPF_PROG_QUERY_LAST_FIELD query.link_attach_flags
468e2f64
AS
3888
3889static int bpf_prog_query(const union bpf_attr *attr,
3890 union bpf_attr __user *uattr)
3891{
468e2f64
AS
3892 if (!capable(CAP_NET_ADMIN))
3893 return -EPERM;
3894 if (CHECK_ATTR(BPF_PROG_QUERY))
3895 return -EINVAL;
3896 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE)
3897 return -EINVAL;
3898
3899 switch (attr->query.attach_type) {
3900 case BPF_CGROUP_INET_INGRESS:
3901 case BPF_CGROUP_INET_EGRESS:
3902 case BPF_CGROUP_INET_SOCK_CREATE:
f5836749 3903 case BPF_CGROUP_INET_SOCK_RELEASE:
4fbac77d
AI
3904 case BPF_CGROUP_INET4_BIND:
3905 case BPF_CGROUP_INET6_BIND:
aac3fc32
AI
3906 case BPF_CGROUP_INET4_POST_BIND:
3907 case BPF_CGROUP_INET6_POST_BIND:
d74bad4e
AI
3908 case BPF_CGROUP_INET4_CONNECT:
3909 case BPF_CGROUP_INET6_CONNECT:
1b66d253
DB
3910 case BPF_CGROUP_INET4_GETPEERNAME:
3911 case BPF_CGROUP_INET6_GETPEERNAME:
3912 case BPF_CGROUP_INET4_GETSOCKNAME:
3913 case BPF_CGROUP_INET6_GETSOCKNAME:
1cedee13
AI
3914 case BPF_CGROUP_UDP4_SENDMSG:
3915 case BPF_CGROUP_UDP6_SENDMSG:
983695fa
DB
3916 case BPF_CGROUP_UDP4_RECVMSG:
3917 case BPF_CGROUP_UDP6_RECVMSG:
468e2f64 3918 case BPF_CGROUP_SOCK_OPS:
ebc614f6 3919 case BPF_CGROUP_DEVICE:
7b146ceb 3920 case BPF_CGROUP_SYSCTL:
0d01da6a
SF
3921 case BPF_CGROUP_GETSOCKOPT:
3922 case BPF_CGROUP_SETSOCKOPT:
b79c9fc9 3923 case BPF_LSM_CGROUP:
e28784e3 3924 return cgroup_bpf_prog_query(attr, uattr);
f4364dcf
SY
3925 case BPF_LIRC_MODE2:
3926 return lirc_prog_query(attr, uattr);
118c8e9a 3927 case BPF_FLOW_DISSECTOR:
e9ddbb77 3928 case BPF_SK_LOOKUP:
a3fd7cee 3929 return netns_bpf_prog_query(attr, uattr);
748cd572
DZ
3930 case BPF_SK_SKB_STREAM_PARSER:
3931 case BPF_SK_SKB_STREAM_VERDICT:
3932 case BPF_SK_MSG_VERDICT:
3933 case BPF_SK_SKB_VERDICT:
3934 return sock_map_bpf_prog_query(attr, uattr);
e420bed0
DB
3935 case BPF_TCX_INGRESS:
3936 case BPF_TCX_EGRESS:
3937 return tcx_prog_query(attr, uattr);
468e2f64
AS
3938 default:
3939 return -EINVAL;
3940 }
468e2f64 3941}
f4324551 3942
b530e9e1 3943#define BPF_PROG_TEST_RUN_LAST_FIELD test.batch_size
1cf1cae9
AS
3944
3945static int bpf_prog_test_run(const union bpf_attr *attr,
3946 union bpf_attr __user *uattr)
3947{
3948 struct bpf_prog *prog;
3949 int ret = -ENOTSUPP;
3950
3951 if (CHECK_ATTR(BPF_PROG_TEST_RUN))
3952 return -EINVAL;
3953
b0b9395d
SF
3954 if ((attr->test.ctx_size_in && !attr->test.ctx_in) ||
3955 (!attr->test.ctx_size_in && attr->test.ctx_in))
3956 return -EINVAL;
3957
3958 if ((attr->test.ctx_size_out && !attr->test.ctx_out) ||
3959 (!attr->test.ctx_size_out && attr->test.ctx_out))
3960 return -EINVAL;
3961
1cf1cae9
AS
3962 prog = bpf_prog_get(attr->test.prog_fd);
3963 if (IS_ERR(prog))
3964 return PTR_ERR(prog);
3965
3966 if (prog->aux->ops->test_run)
3967 ret = prog->aux->ops->test_run(prog, attr, uattr);
3968
3969 bpf_prog_put(prog);
3970 return ret;
3971}
3972
34ad5580
MKL
3973#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
3974
3975static int bpf_obj_get_next_id(const union bpf_attr *attr,
3976 union bpf_attr __user *uattr,
3977 struct idr *idr,
3978 spinlock_t *lock)
3979{
3980 u32 next_id = attr->start_id;
3981 int err = 0;
3982
3983 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX)
3984 return -EINVAL;
3985
3986 if (!capable(CAP_SYS_ADMIN))
3987 return -EPERM;
3988
3989 next_id++;
3990 spin_lock_bh(lock);
3991 if (!idr_get_next(idr, &next_id))
3992 err = -ENOENT;
3993 spin_unlock_bh(lock);
3994
3995 if (!err)
3996 err = put_user(next_id, &uattr->next_id);
3997
3998 return err;
3999}
4000
6086d29d
YS
4001struct bpf_map *bpf_map_get_curr_or_next(u32 *id)
4002{
4003 struct bpf_map *map;
4004
4005 spin_lock_bh(&map_idr_lock);
4006again:
4007 map = idr_get_next(&map_idr, id);
4008 if (map) {
4009 map = __bpf_map_inc_not_zero(map, false);
4010 if (IS_ERR(map)) {
4011 (*id)++;
4012 goto again;
4013 }
4014 }
4015 spin_unlock_bh(&map_idr_lock);
4016
4017 return map;
4018}
4019
a228a64f
AS
4020struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id)
4021{
4022 struct bpf_prog *prog;
4023
4024 spin_lock_bh(&prog_idr_lock);
4025again:
4026 prog = idr_get_next(&prog_idr, id);
4027 if (prog) {
4028 prog = bpf_prog_inc_not_zero(prog);
4029 if (IS_ERR(prog)) {
4030 (*id)++;
4031 goto again;
4032 }
4033 }
4034 spin_unlock_bh(&prog_idr_lock);
4035
4036 return prog;
4037}
4038
b16d9aa4
MKL
4039#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
4040
7e6897f9 4041struct bpf_prog *bpf_prog_by_id(u32 id)
b16d9aa4
MKL
4042{
4043 struct bpf_prog *prog;
b16d9aa4 4044
7e6897f9
BT
4045 if (!id)
4046 return ERR_PTR(-ENOENT);
b16d9aa4
MKL
4047
4048 spin_lock_bh(&prog_idr_lock);
4049 prog = idr_find(&prog_idr, id);
4050 if (prog)
4051 prog = bpf_prog_inc_not_zero(prog);
4052 else
4053 prog = ERR_PTR(-ENOENT);
4054 spin_unlock_bh(&prog_idr_lock);
7e6897f9
BT
4055 return prog;
4056}
4057
4058static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
4059{
4060 struct bpf_prog *prog;
4061 u32 id = attr->prog_id;
4062 int fd;
4063
4064 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
4065 return -EINVAL;
4066
4067 if (!capable(CAP_SYS_ADMIN))
4068 return -EPERM;
b16d9aa4 4069
7e6897f9 4070 prog = bpf_prog_by_id(id);
b16d9aa4
MKL
4071 if (IS_ERR(prog))
4072 return PTR_ERR(prog);
4073
4074 fd = bpf_prog_new_fd(prog);
4075 if (fd < 0)
4076 bpf_prog_put(prog);
4077
4078 return fd;
4079}
4080
6e71b04a 4081#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags
bd5f5f4e
MKL
4082
4083static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
4084{
4085 struct bpf_map *map;
4086 u32 id = attr->map_id;
6e71b04a 4087 int f_flags;
bd5f5f4e
MKL
4088 int fd;
4089
6e71b04a
CF
4090 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) ||
4091 attr->open_flags & ~BPF_OBJ_FLAG_MASK)
bd5f5f4e
MKL
4092 return -EINVAL;
4093
4094 if (!capable(CAP_SYS_ADMIN))
4095 return -EPERM;
4096
6e71b04a
CF
4097 f_flags = bpf_get_file_flag(attr->open_flags);
4098 if (f_flags < 0)
4099 return f_flags;
4100
bd5f5f4e
MKL
4101 spin_lock_bh(&map_idr_lock);
4102 map = idr_find(&map_idr, id);
4103 if (map)
b0e4701c 4104 map = __bpf_map_inc_not_zero(map, true);
bd5f5f4e
MKL
4105 else
4106 map = ERR_PTR(-ENOENT);
4107 spin_unlock_bh(&map_idr_lock);
4108
4109 if (IS_ERR(map))
4110 return PTR_ERR(map);
4111
6e71b04a 4112 fd = bpf_map_new_fd(map, f_flags);
bd5f5f4e 4113 if (fd < 0)
781e6282 4114 bpf_map_put_with_uref(map);
bd5f5f4e
MKL
4115
4116 return fd;
4117}
4118
7105e828 4119static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
d8eca5bb
DB
4120 unsigned long addr, u32 *off,
4121 u32 *type)
7105e828 4122{
d8eca5bb 4123 const struct bpf_map *map;
7105e828
DB
4124 int i;
4125
984fe94f 4126 mutex_lock(&prog->aux->used_maps_mutex);
d8eca5bb
DB
4127 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
4128 map = prog->aux->used_maps[i];
4129 if (map == (void *)addr) {
4130 *type = BPF_PSEUDO_MAP_FD;
984fe94f 4131 goto out;
d8eca5bb
DB
4132 }
4133 if (!map->ops->map_direct_value_meta)
4134 continue;
4135 if (!map->ops->map_direct_value_meta(map, addr, off)) {
4136 *type = BPF_PSEUDO_MAP_VALUE;
984fe94f 4137 goto out;
d8eca5bb
DB
4138 }
4139 }
984fe94f 4140 map = NULL;
d8eca5bb 4141
984fe94f
YZ
4142out:
4143 mutex_unlock(&prog->aux->used_maps_mutex);
4144 return map;
7105e828
DB
4145}
4146
63960260
KC
4147static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
4148 const struct cred *f_cred)
7105e828
DB
4149{
4150 const struct bpf_map *map;
4151 struct bpf_insn *insns;
d8eca5bb 4152 u32 off, type;
7105e828 4153 u64 imm;
29fcb05b 4154 u8 code;
7105e828
DB
4155 int i;
4156
4157 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
4158 GFP_USER);
4159 if (!insns)
4160 return insns;
4161
4162 for (i = 0; i < prog->len; i++) {
29fcb05b
AN
4163 code = insns[i].code;
4164
4165 if (code == (BPF_JMP | BPF_TAIL_CALL)) {
7105e828
DB
4166 insns[i].code = BPF_JMP | BPF_CALL;
4167 insns[i].imm = BPF_FUNC_tail_call;
4168 /* fall-through */
4169 }
29fcb05b
AN
4170 if (code == (BPF_JMP | BPF_CALL) ||
4171 code == (BPF_JMP | BPF_CALL_ARGS)) {
4172 if (code == (BPF_JMP | BPF_CALL_ARGS))
7105e828 4173 insns[i].code = BPF_JMP | BPF_CALL;
63960260 4174 if (!bpf_dump_raw_ok(f_cred))
7105e828
DB
4175 insns[i].imm = 0;
4176 continue;
4177 }
29fcb05b
AN
4178 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) {
4179 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM;
4180 continue;
4181 }
7105e828 4182
29fcb05b 4183 if (code != (BPF_LD | BPF_IMM | BPF_DW))
7105e828
DB
4184 continue;
4185
4186 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
d8eca5bb 4187 map = bpf_map_from_imm(prog, imm, &off, &type);
7105e828 4188 if (map) {
d8eca5bb 4189 insns[i].src_reg = type;
7105e828 4190 insns[i].imm = map->id;
d8eca5bb 4191 insns[i + 1].imm = off;
7105e828
DB
4192 continue;
4193 }
7105e828
DB
4194 }
4195
4196 return insns;
4197}
4198
c454a46b
MKL
4199static int set_info_rec_size(struct bpf_prog_info *info)
4200{
4201 /*
4202 * Ensure info.*_rec_size is the same as kernel expected size
4203 *
4204 * or
4205 *
4206 * Only allow zero *_rec_size if both _rec_size and _cnt are
4207 * zero. In this case, the kernel will set the expected
4208 * _rec_size back to the info.
4209 */
4210
11d8b82d 4211 if ((info->nr_func_info || info->func_info_rec_size) &&
c454a46b
MKL
4212 info->func_info_rec_size != sizeof(struct bpf_func_info))
4213 return -EINVAL;
4214
11d8b82d 4215 if ((info->nr_line_info || info->line_info_rec_size) &&
c454a46b
MKL
4216 info->line_info_rec_size != sizeof(struct bpf_line_info))
4217 return -EINVAL;
4218
11d8b82d 4219 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) &&
c454a46b
MKL
4220 info->jited_line_info_rec_size != sizeof(__u64))
4221 return -EINVAL;
4222
4223 info->func_info_rec_size = sizeof(struct bpf_func_info);
4224 info->line_info_rec_size = sizeof(struct bpf_line_info);
4225 info->jited_line_info_rec_size = sizeof(__u64);
4226
4227 return 0;
4228}
4229
63960260
KC
4230static int bpf_prog_get_info_by_fd(struct file *file,
4231 struct bpf_prog *prog,
1e270976
MKL
4232 const union bpf_attr *attr,
4233 union bpf_attr __user *uattr)
4234{
4235 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
6644aabb 4236 struct btf *attach_btf = bpf_prog_get_target_btf(prog);
5c6f2588 4237 struct bpf_prog_info info;
1e270976 4238 u32 info_len = attr->info.info_len;
61a0abae 4239 struct bpf_prog_kstats stats;
1e270976
MKL
4240 char __user *uinsns;
4241 u32 ulen;
4242 int err;
4243
af2ac3e1 4244 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
1e270976
MKL
4245 if (err)
4246 return err;
4247 info_len = min_t(u32, sizeof(info), info_len);
4248
5c6f2588 4249 memset(&info, 0, sizeof(info));
1e270976 4250 if (copy_from_user(&info, uinfo, info_len))
89b09689 4251 return -EFAULT;
1e270976
MKL
4252
4253 info.type = prog->type;
4254 info.id = prog->aux->id;
cb4d2b3f
MKL
4255 info.load_time = prog->aux->load_time;
4256 info.created_by_uid = from_kuid_munged(current_user_ns(),
4257 prog->aux->user->uid);
b85fab0e 4258 info.gpl_compatible = prog->gpl_compatible;
1e270976
MKL
4259
4260 memcpy(info.tag, prog->tag, sizeof(prog->tag));
cb4d2b3f
MKL
4261 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
4262
984fe94f 4263 mutex_lock(&prog->aux->used_maps_mutex);
cb4d2b3f
MKL
4264 ulen = info.nr_map_ids;
4265 info.nr_map_ids = prog->aux->used_map_cnt;
4266 ulen = min_t(u32, info.nr_map_ids, ulen);
4267 if (ulen) {
721e08da 4268 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
cb4d2b3f
MKL
4269 u32 i;
4270
4271 for (i = 0; i < ulen; i++)
4272 if (put_user(prog->aux->used_maps[i]->id,
984fe94f
YZ
4273 &user_map_ids[i])) {
4274 mutex_unlock(&prog->aux->used_maps_mutex);
cb4d2b3f 4275 return -EFAULT;
984fe94f 4276 }
cb4d2b3f 4277 }
984fe94f 4278 mutex_unlock(&prog->aux->used_maps_mutex);
1e270976 4279
c454a46b
MKL
4280 err = set_info_rec_size(&info);
4281 if (err)
4282 return err;
7337224f 4283
5f8f8b93
AS
4284 bpf_prog_get_stats(prog, &stats);
4285 info.run_time_ns = stats.nsecs;
4286 info.run_cnt = stats.cnt;
9ed9e9ba 4287 info.recursion_misses = stats.misses;
5f8f8b93 4288
aba64c7d
DM
4289 info.verified_insns = prog->aux->verified_insns;
4290
2c78ee89 4291 if (!bpf_capable()) {
1e270976
MKL
4292 info.jited_prog_len = 0;
4293 info.xlated_prog_len = 0;
dbecd738 4294 info.nr_jited_ksyms = 0;
28c2fae7 4295 info.nr_jited_func_lens = 0;
11d8b82d
YS
4296 info.nr_func_info = 0;
4297 info.nr_line_info = 0;
4298 info.nr_jited_line_info = 0;
1e270976
MKL
4299 goto done;
4300 }
4301
1e270976 4302 ulen = info.xlated_prog_len;
9975a54b 4303 info.xlated_prog_len = bpf_prog_insn_size(prog);
1e270976 4304 if (info.xlated_prog_len && ulen) {
7105e828
DB
4305 struct bpf_insn *insns_sanitized;
4306 bool fault;
4307
63960260 4308 if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) {
7105e828
DB
4309 info.xlated_prog_insns = 0;
4310 goto done;
4311 }
63960260 4312 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
7105e828
DB
4313 if (!insns_sanitized)
4314 return -ENOMEM;
1e270976
MKL
4315 uinsns = u64_to_user_ptr(info.xlated_prog_insns);
4316 ulen = min_t(u32, info.xlated_prog_len, ulen);
7105e828
DB
4317 fault = copy_to_user(uinsns, insns_sanitized, ulen);
4318 kfree(insns_sanitized);
4319 if (fault)
1e270976
MKL
4320 return -EFAULT;
4321 }
4322
9d03ebc7 4323 if (bpf_prog_is_offloaded(prog->aux)) {
675fc275
JK
4324 err = bpf_prog_offload_info_fill(&info, prog);
4325 if (err)
4326 return err;
fcfb126d
JW
4327 goto done;
4328 }
4329
4330 /* NOTE: the following code is supposed to be skipped for offload.
4331 * bpf_prog_offload_info_fill() is the place to fill similar fields
4332 * for offload.
4333 */
4334 ulen = info.jited_prog_len;
4d56a76e
SD
4335 if (prog->aux->func_cnt) {
4336 u32 i;
4337
4338 info.jited_prog_len = 0;
4339 for (i = 0; i < prog->aux->func_cnt; i++)
4340 info.jited_prog_len += prog->aux->func[i]->jited_len;
4341 } else {
4342 info.jited_prog_len = prog->jited_len;
4343 }
4344
fcfb126d 4345 if (info.jited_prog_len && ulen) {
63960260 4346 if (bpf_dump_raw_ok(file->f_cred)) {
fcfb126d
JW
4347 uinsns = u64_to_user_ptr(info.jited_prog_insns);
4348 ulen = min_t(u32, info.jited_prog_len, ulen);
4d56a76e
SD
4349
4350 /* for multi-function programs, copy the JITed
4351 * instructions for all the functions
4352 */
4353 if (prog->aux->func_cnt) {
4354 u32 len, free, i;
4355 u8 *img;
4356
4357 free = ulen;
4358 for (i = 0; i < prog->aux->func_cnt; i++) {
4359 len = prog->aux->func[i]->jited_len;
4360 len = min_t(u32, len, free);
4361 img = (u8 *) prog->aux->func[i]->bpf_func;
4362 if (copy_to_user(uinsns, img, len))
4363 return -EFAULT;
4364 uinsns += len;
4365 free -= len;
4366 if (!free)
4367 break;
4368 }
4369 } else {
4370 if (copy_to_user(uinsns, prog->bpf_func, ulen))
4371 return -EFAULT;
4372 }
fcfb126d
JW
4373 } else {
4374 info.jited_prog_insns = 0;
4375 }
675fc275
JK
4376 }
4377
dbecd738 4378 ulen = info.nr_jited_ksyms;
ff1889fc 4379 info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
7a5725dd 4380 if (ulen) {
63960260 4381 if (bpf_dump_raw_ok(file->f_cred)) {
ff1889fc 4382 unsigned long ksym_addr;
dbecd738 4383 u64 __user *user_ksyms;
dbecd738
SD
4384 u32 i;
4385
4386 /* copy the address of the kernel symbol
4387 * corresponding to each function
4388 */
4389 ulen = min_t(u32, info.nr_jited_ksyms, ulen);
4390 user_ksyms = u64_to_user_ptr(info.jited_ksyms);
ff1889fc
SL
4391 if (prog->aux->func_cnt) {
4392 for (i = 0; i < ulen; i++) {
4393 ksym_addr = (unsigned long)
4394 prog->aux->func[i]->bpf_func;
4395 if (put_user((u64) ksym_addr,
4396 &user_ksyms[i]))
4397 return -EFAULT;
4398 }
4399 } else {
4400 ksym_addr = (unsigned long) prog->bpf_func;
4401 if (put_user((u64) ksym_addr, &user_ksyms[0]))
dbecd738
SD
4402 return -EFAULT;
4403 }
4404 } else {
4405 info.jited_ksyms = 0;
4406 }
4407 }
4408
815581c1 4409 ulen = info.nr_jited_func_lens;
ff1889fc 4410 info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
7a5725dd 4411 if (ulen) {
63960260 4412 if (bpf_dump_raw_ok(file->f_cred)) {
815581c1
SD
4413 u32 __user *user_lens;
4414 u32 func_len, i;
4415
4416 /* copy the JITed image lengths for each function */
4417 ulen = min_t(u32, info.nr_jited_func_lens, ulen);
4418 user_lens = u64_to_user_ptr(info.jited_func_lens);
ff1889fc
SL
4419 if (prog->aux->func_cnt) {
4420 for (i = 0; i < ulen; i++) {
4421 func_len =
4422 prog->aux->func[i]->jited_len;
4423 if (put_user(func_len, &user_lens[i]))
4424 return -EFAULT;
4425 }
4426 } else {
4427 func_len = prog->jited_len;
4428 if (put_user(func_len, &user_lens[0]))
815581c1
SD
4429 return -EFAULT;
4430 }
4431 } else {
4432 info.jited_func_lens = 0;
4433 }
4434 }
4435
7337224f 4436 if (prog->aux->btf)
22dc4a0f 4437 info.btf_id = btf_obj_id(prog->aux->btf);
b79c9fc9 4438 info.attach_btf_id = prog->aux->attach_btf_id;
6644aabb
SF
4439 if (attach_btf)
4440 info.attach_btf_obj_id = btf_obj_id(attach_btf);
838e9690 4441
11d8b82d
YS
4442 ulen = info.nr_func_info;
4443 info.nr_func_info = prog->aux->func_info_cnt;
4444 if (info.nr_func_info && ulen) {
9e794163 4445 char __user *user_finfo;
7337224f 4446
9e794163
MKL
4447 user_finfo = u64_to_user_ptr(info.func_info);
4448 ulen = min_t(u32, info.nr_func_info, ulen);
4449 if (copy_to_user(user_finfo, prog->aux->func_info,
4450 info.func_info_rec_size * ulen))
4451 return -EFAULT;
838e9690
YS
4452 }
4453
11d8b82d
YS
4454 ulen = info.nr_line_info;
4455 info.nr_line_info = prog->aux->nr_linfo;
4456 if (info.nr_line_info && ulen) {
9e794163 4457 __u8 __user *user_linfo;
c454a46b 4458
9e794163
MKL
4459 user_linfo = u64_to_user_ptr(info.line_info);
4460 ulen = min_t(u32, info.nr_line_info, ulen);
4461 if (copy_to_user(user_linfo, prog->aux->linfo,
4462 info.line_info_rec_size * ulen))
4463 return -EFAULT;
c454a46b
MKL
4464 }
4465
11d8b82d 4466 ulen = info.nr_jited_line_info;
c454a46b 4467 if (prog->aux->jited_linfo)
11d8b82d 4468 info.nr_jited_line_info = prog->aux->nr_linfo;
c454a46b 4469 else
11d8b82d
YS
4470 info.nr_jited_line_info = 0;
4471 if (info.nr_jited_line_info && ulen) {
63960260 4472 if (bpf_dump_raw_ok(file->f_cred)) {
2cd00852 4473 unsigned long line_addr;
c454a46b
MKL
4474 __u64 __user *user_linfo;
4475 u32 i;
4476
4477 user_linfo = u64_to_user_ptr(info.jited_line_info);
11d8b82d 4478 ulen = min_t(u32, info.nr_jited_line_info, ulen);
c454a46b 4479 for (i = 0; i < ulen; i++) {
2cd00852
PL
4480 line_addr = (unsigned long)prog->aux->jited_linfo[i];
4481 if (put_user((__u64)line_addr, &user_linfo[i]))
c454a46b
MKL
4482 return -EFAULT;
4483 }
4484 } else {
4485 info.jited_line_info = 0;
4486 }
4487 }
4488
c872bdb3
SL
4489 ulen = info.nr_prog_tags;
4490 info.nr_prog_tags = prog->aux->func_cnt ? : 1;
4491 if (ulen) {
4492 __u8 __user (*user_prog_tags)[BPF_TAG_SIZE];
4493 u32 i;
4494
4495 user_prog_tags = u64_to_user_ptr(info.prog_tags);
4496 ulen = min_t(u32, info.nr_prog_tags, ulen);
4497 if (prog->aux->func_cnt) {
4498 for (i = 0; i < ulen; i++) {
4499 if (copy_to_user(user_prog_tags[i],
4500 prog->aux->func[i]->tag,
4501 BPF_TAG_SIZE))
4502 return -EFAULT;
4503 }
4504 } else {
4505 if (copy_to_user(user_prog_tags[0],
4506 prog->tag, BPF_TAG_SIZE))
4507 return -EFAULT;
4508 }
4509 }
4510
1e270976
MKL
4511done:
4512 if (copy_to_user(uinfo, &info, info_len) ||
4513 put_user(info_len, &uattr->info.info_len))
4514 return -EFAULT;
4515
4516 return 0;
4517}
4518
63960260
KC
4519static int bpf_map_get_info_by_fd(struct file *file,
4520 struct bpf_map *map,
1e270976
MKL
4521 const union bpf_attr *attr,
4522 union bpf_attr __user *uattr)
4523{
4524 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
5c6f2588 4525 struct bpf_map_info info;
1e270976
MKL
4526 u32 info_len = attr->info.info_len;
4527 int err;
4528
af2ac3e1 4529 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
1e270976
MKL
4530 if (err)
4531 return err;
4532 info_len = min_t(u32, sizeof(info), info_len);
4533
5c6f2588 4534 memset(&info, 0, sizeof(info));
1e270976
MKL
4535 info.type = map->map_type;
4536 info.id = map->id;
4537 info.key_size = map->key_size;
4538 info.value_size = map->value_size;
4539 info.max_entries = map->max_entries;
4540 info.map_flags = map->map_flags;
9330986c 4541 info.map_extra = map->map_extra;
ad5b177b 4542 memcpy(info.name, map->name, sizeof(map->name));
1e270976 4543
78958fca 4544 if (map->btf) {
22dc4a0f 4545 info.btf_id = btf_obj_id(map->btf);
9b2cf328
MKL
4546 info.btf_key_type_id = map->btf_key_type_id;
4547 info.btf_value_type_id = map->btf_value_type_id;
78958fca 4548 }
85d33df3 4549 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
78958fca 4550
9d03ebc7 4551 if (bpf_map_is_offloaded(map)) {
52775b33
JK
4552 err = bpf_map_offload_info_fill(&info, map);
4553 if (err)
4554 return err;
4555 }
4556
1e270976
MKL
4557 if (copy_to_user(uinfo, &info, info_len) ||
4558 put_user(info_len, &uattr->info.info_len))
4559 return -EFAULT;
4560
4561 return 0;
4562}
4563
63960260
KC
4564static int bpf_btf_get_info_by_fd(struct file *file,
4565 struct btf *btf,
62dab84c
MKL
4566 const union bpf_attr *attr,
4567 union bpf_attr __user *uattr)
4568{
4569 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
4570 u32 info_len = attr->info.info_len;
4571 int err;
4572
af2ac3e1 4573 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len);
62dab84c
MKL
4574 if (err)
4575 return err;
4576
4577 return btf_get_info_by_fd(btf, attr, uattr);
4578}
4579
63960260
KC
4580static int bpf_link_get_info_by_fd(struct file *file,
4581 struct bpf_link *link,
f2e10bff
AN
4582 const union bpf_attr *attr,
4583 union bpf_attr __user *uattr)
4584{
4585 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info);
4586 struct bpf_link_info info;
4587 u32 info_len = attr->info.info_len;
4588 int err;
4589
af2ac3e1 4590 err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(info), info_len);
f2e10bff
AN
4591 if (err)
4592 return err;
4593 info_len = min_t(u32, sizeof(info), info_len);
4594
4595 memset(&info, 0, sizeof(info));
4596 if (copy_from_user(&info, uinfo, info_len))
4597 return -EFAULT;
4598
4599 info.type = link->type;
4600 info.id = link->id;
68b04864
KFL
4601 if (link->prog)
4602 info.prog_id = link->prog->aux->id;
f2e10bff
AN
4603
4604 if (link->ops->fill_link_info) {
4605 err = link->ops->fill_link_info(link, &info);
4606 if (err)
4607 return err;
4608 }
4609
4610 if (copy_to_user(uinfo, &info, info_len) ||
4611 put_user(info_len, &uattr->info.info_len))
4612 return -EFAULT;
4613
4614 return 0;
4615}
4616
4617
1e270976
MKL
4618#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
4619
4620static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
4621 union bpf_attr __user *uattr)
4622{
4623 int ufd = attr->info.bpf_fd;
4624 struct fd f;
4625 int err;
4626
4627 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
4628 return -EINVAL;
4629
4630 f = fdget(ufd);
4631 if (!f.file)
4632 return -EBADFD;
4633
4634 if (f.file->f_op == &bpf_prog_fops)
63960260 4635 err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr,
1e270976
MKL
4636 uattr);
4637 else if (f.file->f_op == &bpf_map_fops)
63960260 4638 err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr,
1e270976 4639 uattr);
60197cfb 4640 else if (f.file->f_op == &btf_fops)
63960260 4641 err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
f2e10bff 4642 else if (f.file->f_op == &bpf_link_fops)
63960260 4643 err = bpf_link_get_info_by_fd(f.file, f.file->private_data,
f2e10bff 4644 attr, uattr);
1e270976
MKL
4645 else
4646 err = -EINVAL;
4647
4648 fdput(f);
4649 return err;
4650}
4651
47a71c1f 4652#define BPF_BTF_LOAD_LAST_FIELD btf_log_true_size
f56a653c 4653
47a71c1f 4654static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
f56a653c
MKL
4655{
4656 if (CHECK_ATTR(BPF_BTF_LOAD))
4657 return -EINVAL;
4658
2c78ee89 4659 if (!bpf_capable())
f56a653c
MKL
4660 return -EPERM;
4661
47a71c1f 4662 return btf_new_fd(attr, uattr, uattr_size);
f56a653c
MKL
4663}
4664
78958fca
MKL
4665#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
4666
4667static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
4668{
4669 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
4670 return -EINVAL;
4671
4672 if (!capable(CAP_SYS_ADMIN))
4673 return -EPERM;
4674
4675 return btf_get_fd_by_id(attr->btf_id);
4676}
4677
41bdc4b4
YS
4678static int bpf_task_fd_query_copy(const union bpf_attr *attr,
4679 union bpf_attr __user *uattr,
4680 u32 prog_id, u32 fd_type,
4681 const char *buf, u64 probe_offset,
4682 u64 probe_addr)
4683{
4684 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
4685 u32 len = buf ? strlen(buf) : 0, input_len;
4686 int err = 0;
4687
4688 if (put_user(len, &uattr->task_fd_query.buf_len))
4689 return -EFAULT;
4690 input_len = attr->task_fd_query.buf_len;
4691 if (input_len && ubuf) {
4692 if (!len) {
4693 /* nothing to copy, just make ubuf NULL terminated */
4694 char zero = '\0';
4695
4696 if (put_user(zero, ubuf))
4697 return -EFAULT;
4698 } else if (input_len >= len + 1) {
4699 /* ubuf can hold the string with NULL terminator */
4700 if (copy_to_user(ubuf, buf, len + 1))
4701 return -EFAULT;
4702 } else {
4703 /* ubuf cannot hold the string with NULL terminator,
4704 * do a partial copy with NULL terminator.
4705 */
4706 char zero = '\0';
4707
4708 err = -ENOSPC;
4709 if (copy_to_user(ubuf, buf, input_len - 1))
4710 return -EFAULT;
4711 if (put_user(zero, ubuf + input_len - 1))
4712 return -EFAULT;
4713 }
4714 }
4715
4716 if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
4717 put_user(fd_type, &uattr->task_fd_query.fd_type) ||
4718 put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
4719 put_user(probe_addr, &uattr->task_fd_query.probe_addr))
4720 return -EFAULT;
4721
4722 return err;
4723}
4724
4725#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
4726
4727static int bpf_task_fd_query(const union bpf_attr *attr,
4728 union bpf_attr __user *uattr)
4729{
4730 pid_t pid = attr->task_fd_query.pid;
4731 u32 fd = attr->task_fd_query.fd;
4732 const struct perf_event *event;
41bdc4b4
YS
4733 struct task_struct *task;
4734 struct file *file;
4735 int err;
4736
4737 if (CHECK_ATTR(BPF_TASK_FD_QUERY))
4738 return -EINVAL;
4739
4740 if (!capable(CAP_SYS_ADMIN))
4741 return -EPERM;
4742
4743 if (attr->task_fd_query.flags != 0)
4744 return -EINVAL;
4745
83c10cc3 4746 rcu_read_lock();
41bdc4b4 4747 task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
83c10cc3 4748 rcu_read_unlock();
41bdc4b4
YS
4749 if (!task)
4750 return -ENOENT;
4751
41bdc4b4 4752 err = 0;
b48845af
EB
4753 file = fget_task(task, fd);
4754 put_task_struct(task);
41bdc4b4 4755 if (!file)
b48845af 4756 return -EBADF;
41bdc4b4 4757
70ed506c
AN
4758 if (file->f_op == &bpf_link_fops) {
4759 struct bpf_link *link = file->private_data;
41bdc4b4 4760
a3b80e10 4761 if (link->ops == &bpf_raw_tp_link_lops) {
70ed506c
AN
4762 struct bpf_raw_tp_link *raw_tp =
4763 container_of(link, struct bpf_raw_tp_link, link);
4764 struct bpf_raw_event_map *btp = raw_tp->btp;
4765
4766 err = bpf_task_fd_query_copy(attr, uattr,
4767 raw_tp->link.prog->aux->id,
4768 BPF_FD_TYPE_RAW_TRACEPOINT,
4769 btp->tp->name, 0, 0);
4770 goto put_file;
4771 }
4772 goto out_not_supp;
41bdc4b4
YS
4773 }
4774
4775 event = perf_get_event(file);
4776 if (!IS_ERR(event)) {
4777 u64 probe_offset, probe_addr;
4778 u32 prog_id, fd_type;
4779 const char *buf;
4780
4781 err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
4782 &buf, &probe_offset,
4783 &probe_addr);
4784 if (!err)
4785 err = bpf_task_fd_query_copy(attr, uattr, prog_id,
4786 fd_type, buf,
4787 probe_offset,
4788 probe_addr);
4789 goto put_file;
4790 }
4791
70ed506c 4792out_not_supp:
41bdc4b4
YS
4793 err = -ENOTSUPP;
4794put_file:
4795 fput(file);
41bdc4b4
YS
4796 return err;
4797}
4798
cb4d03ab
BV
4799#define BPF_MAP_BATCH_LAST_FIELD batch.flags
4800
3af43ba4 4801#define BPF_DO_BATCH(fn, ...) \
cb4d03ab
BV
4802 do { \
4803 if (!fn) { \
4804 err = -ENOTSUPP; \
4805 goto err_put; \
4806 } \
3af43ba4 4807 err = fn(__VA_ARGS__); \
cb4d03ab
BV
4808 } while (0)
4809
4810static int bpf_map_do_batch(const union bpf_attr *attr,
4811 union bpf_attr __user *uattr,
4812 int cmd)
4813{
353050be
DB
4814 bool has_read = cmd == BPF_MAP_LOOKUP_BATCH ||
4815 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
4816 bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
cb4d03ab
BV
4817 struct bpf_map *map;
4818 int err, ufd;
4819 struct fd f;
4820
4821 if (CHECK_ATTR(BPF_MAP_BATCH))
4822 return -EINVAL;
4823
4824 ufd = attr->batch.map_fd;
4825 f = fdget(ufd);
4826 map = __bpf_map_get(f);
4827 if (IS_ERR(map))
4828 return PTR_ERR(map);
353050be
DB
4829 if (has_write)
4830 bpf_map_write_active_inc(map);
4831 if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
cb4d03ab
BV
4832 err = -EPERM;
4833 goto err_put;
4834 }
353050be 4835 if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
cb4d03ab
BV
4836 err = -EPERM;
4837 goto err_put;
4838 }
4839
4840 if (cmd == BPF_MAP_LOOKUP_BATCH)
3af43ba4 4841 BPF_DO_BATCH(map->ops->map_lookup_batch, map, attr, uattr);
05799638 4842 else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH)
3af43ba4 4843 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch, map, attr, uattr);
aa2e93b8 4844 else if (cmd == BPF_MAP_UPDATE_BATCH)
3af43ba4 4845 BPF_DO_BATCH(map->ops->map_update_batch, map, f.file, attr, uattr);
aa2e93b8 4846 else
3af43ba4 4847 BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr);
cb4d03ab 4848err_put:
353050be
DB
4849 if (has_write)
4850 bpf_map_write_active_dec(map);
cb4d03ab
BV
4851 fdput(f);
4852 return err;
4853}
4854
ca74823c 4855#define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe_multi.cookies
af2ac3e1 4856static int link_create(union bpf_attr *attr, bpfptr_t uattr)
af6eea57
AN
4857{
4858 enum bpf_prog_type ptype;
4859 struct bpf_prog *prog;
4860 int ret;
4861
af6eea57
AN
4862 if (CHECK_ATTR(BPF_LINK_CREATE))
4863 return -EINVAL;
4864
68b04864
KFL
4865 if (attr->link_create.attach_type == BPF_STRUCT_OPS)
4866 return bpf_struct_ops_link_create(attr);
4867
4a1e7c0c 4868 prog = bpf_prog_get(attr->link_create.prog_fd);
af6eea57
AN
4869 if (IS_ERR(prog))
4870 return PTR_ERR(prog);
4871
4872 ret = bpf_prog_attach_check_attach_type(prog,
4873 attr->link_create.attach_type);
4874 if (ret)
4a1e7c0c
THJ
4875 goto out;
4876
b89fbfbb
AN
4877 switch (prog->type) {
4878 case BPF_PROG_TYPE_EXT:
132328e8 4879 break;
84601d6e 4880 case BPF_PROG_TYPE_NETFILTER:
132328e8
FW
4881 if (attr->link_create.attach_type != BPF_NETFILTER) {
4882 ret = -EINVAL;
4883 goto out;
4884 }
df86ca0d 4885 break;
b89fbfbb 4886 case BPF_PROG_TYPE_PERF_EVENT:
b89fbfbb
AN
4887 case BPF_PROG_TYPE_TRACEPOINT:
4888 if (attr->link_create.attach_type != BPF_PERF_EVENT) {
4889 ret = -EINVAL;
4890 goto out;
4891 }
b89fbfbb 4892 break;
0dcac272
JO
4893 case BPF_PROG_TYPE_KPROBE:
4894 if (attr->link_create.attach_type != BPF_PERF_EVENT &&
4895 attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) {
4896 ret = -EINVAL;
4897 goto out;
4898 }
0dcac272 4899 break;
e420bed0
DB
4900 case BPF_PROG_TYPE_SCHED_CLS:
4901 if (attr->link_create.attach_type != BPF_TCX_INGRESS &&
4902 attr->link_create.attach_type != BPF_TCX_EGRESS) {
4903 ret = -EINVAL;
4904 goto out;
4905 }
4906 break;
b89fbfbb
AN
4907 default:
4908 ptype = attach_type_to_prog_type(attr->link_create.attach_type);
4909 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
4910 ret = -EINVAL;
4911 goto out;
4912 }
4913 break;
4a1e7c0c 4914 }
af6eea57 4915
df86ca0d 4916 switch (prog->type) {
af6eea57
AN
4917 case BPF_PROG_TYPE_CGROUP_SKB:
4918 case BPF_PROG_TYPE_CGROUP_SOCK:
4919 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
4920 case BPF_PROG_TYPE_SOCK_OPS:
4921 case BPF_PROG_TYPE_CGROUP_DEVICE:
4922 case BPF_PROG_TYPE_CGROUP_SYSCTL:
4923 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4924 ret = cgroup_bpf_link_attach(attr, prog);
4925 break;
df86ca0d
AN
4926 case BPF_PROG_TYPE_EXT:
4927 ret = bpf_tracing_prog_attach(prog,
4928 attr->link_create.target_fd,
2fcc8241
KFL
4929 attr->link_create.target_btf_id,
4930 attr->link_create.tracing.cookie);
df86ca0d
AN
4931 break;
4932 case BPF_PROG_TYPE_LSM:
de4e05ca 4933 case BPF_PROG_TYPE_TRACING:
df86ca0d
AN
4934 if (attr->link_create.attach_type != prog->expected_attach_type) {
4935 ret = -EINVAL;
4936 goto out;
4937 }
4938 if (prog->expected_attach_type == BPF_TRACE_RAW_TP)
4939 ret = bpf_raw_tp_link_attach(prog, NULL);
4940 else if (prog->expected_attach_type == BPF_TRACE_ITER)
4941 ret = bpf_iter_link_attach(attr, uattr, prog);
69fd337a
SF
4942 else if (prog->expected_attach_type == BPF_LSM_CGROUP)
4943 ret = cgroup_bpf_link_attach(attr, prog);
df86ca0d
AN
4944 else
4945 ret = bpf_tracing_prog_attach(prog,
4946 attr->link_create.target_fd,
2fcc8241
KFL
4947 attr->link_create.target_btf_id,
4948 attr->link_create.tracing.cookie);
de4e05ca 4949 break;
7f045a49 4950 case BPF_PROG_TYPE_FLOW_DISSECTOR:
e9ddbb77 4951 case BPF_PROG_TYPE_SK_LOOKUP:
7f045a49
JS
4952 ret = netns_bpf_link_create(attr, prog);
4953 break;
310ad797 4954#ifdef CONFIG_NET
aa8d3a71
AN
4955 case BPF_PROG_TYPE_XDP:
4956 ret = bpf_xdp_link_attach(attr, prog);
84601d6e 4957 break;
e420bed0
DB
4958 case BPF_PROG_TYPE_SCHED_CLS:
4959 ret = tcx_link_attach(attr, prog);
4960 break;
84601d6e
FW
4961 case BPF_PROG_TYPE_NETFILTER:
4962 ret = bpf_nf_link_attach(attr, prog);
aa8d3a71 4963 break;
b89fbfbb 4964#endif
b89fbfbb
AN
4965 case BPF_PROG_TYPE_PERF_EVENT:
4966 case BPF_PROG_TYPE_TRACEPOINT:
b89fbfbb
AN
4967 ret = bpf_perf_link_attach(attr, prog);
4968 break;
0dcac272
JO
4969 case BPF_PROG_TYPE_KPROBE:
4970 if (attr->link_create.attach_type == BPF_PERF_EVENT)
4971 ret = bpf_perf_link_attach(attr, prog);
4972 else
4973 ret = bpf_kprobe_multi_link_attach(attr, prog);
4974 break;
af6eea57
AN
4975 default:
4976 ret = -EINVAL;
4977 }
4978
4a1e7c0c 4979out:
af6eea57
AN
4980 if (ret < 0)
4981 bpf_prog_put(prog);
4982 return ret;
4983}
4984
aef56f2e
KFL
4985static int link_update_map(struct bpf_link *link, union bpf_attr *attr)
4986{
4987 struct bpf_map *new_map, *old_map = NULL;
4988 int ret;
4989
4990 new_map = bpf_map_get(attr->link_update.new_map_fd);
4991 if (IS_ERR(new_map))
55fbae05 4992 return PTR_ERR(new_map);
aef56f2e
KFL
4993
4994 if (attr->link_update.flags & BPF_F_REPLACE) {
4995 old_map = bpf_map_get(attr->link_update.old_map_fd);
4996 if (IS_ERR(old_map)) {
55fbae05 4997 ret = PTR_ERR(old_map);
aef56f2e
KFL
4998 goto out_put;
4999 }
5000 } else if (attr->link_update.old_map_fd) {
5001 ret = -EINVAL;
5002 goto out_put;
5003 }
5004
5005 ret = link->ops->update_map(link, new_map, old_map);
5006
5007 if (old_map)
5008 bpf_map_put(old_map);
5009out_put:
5010 bpf_map_put(new_map);
5011 return ret;
5012}
5013
0c991ebc
AN
5014#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
5015
5016static int link_update(union bpf_attr *attr)
5017{
5018 struct bpf_prog *old_prog = NULL, *new_prog;
5019 struct bpf_link *link;
5020 u32 flags;
5021 int ret;
5022
0c991ebc
AN
5023 if (CHECK_ATTR(BPF_LINK_UPDATE))
5024 return -EINVAL;
5025
5026 flags = attr->link_update.flags;
5027 if (flags & ~BPF_F_REPLACE)
5028 return -EINVAL;
5029
5030 link = bpf_link_get_from_fd(attr->link_update.link_fd);
5031 if (IS_ERR(link))
5032 return PTR_ERR(link);
5033
aef56f2e
KFL
5034 if (link->ops->update_map) {
5035 ret = link_update_map(link, attr);
5036 goto out_put_link;
5037 }
5038
0c991ebc 5039 new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
4adb7a4a
AN
5040 if (IS_ERR(new_prog)) {
5041 ret = PTR_ERR(new_prog);
5042 goto out_put_link;
5043 }
0c991ebc
AN
5044
5045 if (flags & BPF_F_REPLACE) {
5046 old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
5047 if (IS_ERR(old_prog)) {
5048 ret = PTR_ERR(old_prog);
5049 old_prog = NULL;
5050 goto out_put_progs;
5051 }
4adb7a4a
AN
5052 } else if (attr->link_update.old_prog_fd) {
5053 ret = -EINVAL;
5054 goto out_put_progs;
0c991ebc
AN
5055 }
5056
f9d04127
AN
5057 if (link->ops->update_prog)
5058 ret = link->ops->update_prog(link, new_prog, old_prog);
5059 else
fe537393 5060 ret = -EINVAL;
0c991ebc
AN
5061
5062out_put_progs:
5063 if (old_prog)
5064 bpf_prog_put(old_prog);
5065 if (ret)
5066 bpf_prog_put(new_prog);
4adb7a4a 5067out_put_link:
ab5d47bd 5068 bpf_link_put_direct(link);
0c991ebc
AN
5069 return ret;
5070}
5071
73b11c2a
AN
5072#define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd
5073
5074static int link_detach(union bpf_attr *attr)
5075{
5076 struct bpf_link *link;
5077 int ret;
5078
5079 if (CHECK_ATTR(BPF_LINK_DETACH))
5080 return -EINVAL;
5081
5082 link = bpf_link_get_from_fd(attr->link_detach.link_fd);
5083 if (IS_ERR(link))
5084 return PTR_ERR(link);
5085
5086 if (link->ops->detach)
5087 ret = link->ops->detach(link);
5088 else
5089 ret = -EOPNOTSUPP;
5090
ab5d47bd 5091 bpf_link_put_direct(link);
73b11c2a
AN
5092 return ret;
5093}
5094
005142b8 5095static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
2d602c8c 5096{
005142b8 5097 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT);
2d602c8c
AN
5098}
5099
005142b8 5100struct bpf_link *bpf_link_by_id(u32 id)
2d602c8c
AN
5101{
5102 struct bpf_link *link;
2d602c8c 5103
005142b8
AS
5104 if (!id)
5105 return ERR_PTR(-ENOENT);
2d602c8c
AN
5106
5107 spin_lock_bh(&link_idr_lock);
2d602c8c 5108 /* before link is "settled", ID is 0, pretend it doesn't exist yet */
005142b8 5109 link = idr_find(&link_idr, id);
2d602c8c
AN
5110 if (link) {
5111 if (link->id)
005142b8 5112 link = bpf_link_inc_not_zero(link);
2d602c8c 5113 else
005142b8 5114 link = ERR_PTR(-EAGAIN);
2d602c8c 5115 } else {
005142b8 5116 link = ERR_PTR(-ENOENT);
2d602c8c
AN
5117 }
5118 spin_unlock_bh(&link_idr_lock);
005142b8
AS
5119 return link;
5120}
2d602c8c 5121
9f883612
DD
5122struct bpf_link *bpf_link_get_curr_or_next(u32 *id)
5123{
5124 struct bpf_link *link;
5125
5126 spin_lock_bh(&link_idr_lock);
5127again:
5128 link = idr_get_next(&link_idr, id);
5129 if (link) {
5130 link = bpf_link_inc_not_zero(link);
5131 if (IS_ERR(link)) {
5132 (*id)++;
5133 goto again;
5134 }
5135 }
5136 spin_unlock_bh(&link_idr_lock);
5137
5138 return link;
5139}
5140
005142b8
AS
5141#define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id
5142
5143static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
5144{
5145 struct bpf_link *link;
5146 u32 id = attr->link_id;
5147 int fd;
5148
5149 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID))
5150 return -EINVAL;
5151
5152 if (!capable(CAP_SYS_ADMIN))
5153 return -EPERM;
5154
5155 link = bpf_link_by_id(id);
5156 if (IS_ERR(link))
5157 return PTR_ERR(link);
2d602c8c
AN
5158
5159 fd = bpf_link_new_fd(link);
5160 if (fd < 0)
ab5d47bd 5161 bpf_link_put_direct(link);
2d602c8c
AN
5162
5163 return fd;
5164}
5165
d46edd67
SL
5166DEFINE_MUTEX(bpf_stats_enabled_mutex);
5167
5168static int bpf_stats_release(struct inode *inode, struct file *file)
5169{
5170 mutex_lock(&bpf_stats_enabled_mutex);
5171 static_key_slow_dec(&bpf_stats_enabled_key.key);
5172 mutex_unlock(&bpf_stats_enabled_mutex);
5173 return 0;
5174}
5175
5176static const struct file_operations bpf_stats_fops = {
5177 .release = bpf_stats_release,
5178};
5179
5180static int bpf_enable_runtime_stats(void)
5181{
5182 int fd;
5183
5184 mutex_lock(&bpf_stats_enabled_mutex);
5185
5186 /* Set a very high limit to avoid overflow */
5187 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) {
5188 mutex_unlock(&bpf_stats_enabled_mutex);
5189 return -EBUSY;
5190 }
5191
5192 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
5193 if (fd >= 0)
5194 static_key_slow_inc(&bpf_stats_enabled_key.key);
5195
5196 mutex_unlock(&bpf_stats_enabled_mutex);
5197 return fd;
5198}
5199
5200#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type
5201
5202static int bpf_enable_stats(union bpf_attr *attr)
5203{
5204
5205 if (CHECK_ATTR(BPF_ENABLE_STATS))
5206 return -EINVAL;
5207
5208 if (!capable(CAP_SYS_ADMIN))
5209 return -EPERM;
5210
5211 switch (attr->enable_stats.type) {
5212 case BPF_STATS_RUN_TIME:
5213 return bpf_enable_runtime_stats();
5214 default:
5215 break;
5216 }
5217 return -EINVAL;
5218}
5219
ac51d99b
YS
5220#define BPF_ITER_CREATE_LAST_FIELD iter_create.flags
5221
5222static int bpf_iter_create(union bpf_attr *attr)
5223{
5224 struct bpf_link *link;
5225 int err;
5226
5227 if (CHECK_ATTR(BPF_ITER_CREATE))
5228 return -EINVAL;
5229
5230 if (attr->iter_create.flags)
5231 return -EINVAL;
5232
5233 link = bpf_link_get_from_fd(attr->iter_create.link_fd);
5234 if (IS_ERR(link))
5235 return PTR_ERR(link);
5236
5237 err = bpf_iter_new_fd(link);
ab5d47bd 5238 bpf_link_put_direct(link);
ac51d99b
YS
5239
5240 return err;
5241}
5242
ef15314a
YZ
5243#define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags
5244
5245static int bpf_prog_bind_map(union bpf_attr *attr)
5246{
5247 struct bpf_prog *prog;
5248 struct bpf_map *map;
5249 struct bpf_map **used_maps_old, **used_maps_new;
5250 int i, ret = 0;
5251
5252 if (CHECK_ATTR(BPF_PROG_BIND_MAP))
5253 return -EINVAL;
5254
5255 if (attr->prog_bind_map.flags)
5256 return -EINVAL;
5257
5258 prog = bpf_prog_get(attr->prog_bind_map.prog_fd);
5259 if (IS_ERR(prog))
5260 return PTR_ERR(prog);
5261
5262 map = bpf_map_get(attr->prog_bind_map.map_fd);
5263 if (IS_ERR(map)) {
5264 ret = PTR_ERR(map);
5265 goto out_prog_put;
5266 }
5267
5268 mutex_lock(&prog->aux->used_maps_mutex);
5269
5270 used_maps_old = prog->aux->used_maps;
5271
5272 for (i = 0; i < prog->aux->used_map_cnt; i++)
1028ae40
SF
5273 if (used_maps_old[i] == map) {
5274 bpf_map_put(map);
ef15314a 5275 goto out_unlock;
1028ae40 5276 }
ef15314a
YZ
5277
5278 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1,
5279 sizeof(used_maps_new[0]),
5280 GFP_KERNEL);
5281 if (!used_maps_new) {
5282 ret = -ENOMEM;
5283 goto out_unlock;
5284 }
5285
5286 memcpy(used_maps_new, used_maps_old,
5287 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
5288 used_maps_new[prog->aux->used_map_cnt] = map;
5289
5290 prog->aux->used_map_cnt++;
5291 prog->aux->used_maps = used_maps_new;
5292
5293 kfree(used_maps_old);
5294
5295out_unlock:
5296 mutex_unlock(&prog->aux->used_maps_mutex);
5297
5298 if (ret)
5299 bpf_map_put(map);
5300out_prog_put:
5301 bpf_prog_put(prog);
5302 return ret;
5303}
5304
af2ac3e1 5305static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
99c55f7d 5306{
8096f229 5307 union bpf_attr attr;
99c55f7d
AS
5308 int err;
5309
dcab51f1 5310 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
1e270976
MKL
5311 if (err)
5312 return err;
5313 size = min_t(u32, size, sizeof(attr));
99c55f7d
AS
5314
5315 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
8096f229 5316 memset(&attr, 0, sizeof(attr));
af2ac3e1 5317 if (copy_from_bpfptr(&attr, uattr, size) != 0)
99c55f7d
AS
5318 return -EFAULT;
5319
afdb09c7
CF
5320 err = security_bpf(cmd, &attr, size);
5321 if (err < 0)
5322 return err;
5323
99c55f7d
AS
5324 switch (cmd) {
5325 case BPF_MAP_CREATE:
5326 err = map_create(&attr);
5327 break;
db20fd2b
AS
5328 case BPF_MAP_LOOKUP_ELEM:
5329 err = map_lookup_elem(&attr);
5330 break;
5331 case BPF_MAP_UPDATE_ELEM:
af2ac3e1 5332 err = map_update_elem(&attr, uattr);
db20fd2b
AS
5333 break;
5334 case BPF_MAP_DELETE_ELEM:
b88df697 5335 err = map_delete_elem(&attr, uattr);
db20fd2b
AS
5336 break;
5337 case BPF_MAP_GET_NEXT_KEY:
5338 err = map_get_next_key(&attr);
5339 break;
87df15de
DB
5340 case BPF_MAP_FREEZE:
5341 err = map_freeze(&attr);
5342 break;
09756af4 5343 case BPF_PROG_LOAD:
47a71c1f 5344 err = bpf_prog_load(&attr, uattr, size);
09756af4 5345 break;
b2197755
DB
5346 case BPF_OBJ_PIN:
5347 err = bpf_obj_pin(&attr);
5348 break;
5349 case BPF_OBJ_GET:
5350 err = bpf_obj_get(&attr);
5351 break;
f4324551
DM
5352 case BPF_PROG_ATTACH:
5353 err = bpf_prog_attach(&attr);
5354 break;
5355 case BPF_PROG_DETACH:
5356 err = bpf_prog_detach(&attr);
5357 break;
468e2f64 5358 case BPF_PROG_QUERY:
af2ac3e1 5359 err = bpf_prog_query(&attr, uattr.user);
468e2f64 5360 break;
1cf1cae9 5361 case BPF_PROG_TEST_RUN:
af2ac3e1 5362 err = bpf_prog_test_run(&attr, uattr.user);
1cf1cae9 5363 break;
34ad5580 5364 case BPF_PROG_GET_NEXT_ID:
af2ac3e1 5365 err = bpf_obj_get_next_id(&attr, uattr.user,
34ad5580
MKL
5366 &prog_idr, &prog_idr_lock);
5367 break;
5368 case BPF_MAP_GET_NEXT_ID:
af2ac3e1 5369 err = bpf_obj_get_next_id(&attr, uattr.user,
34ad5580
MKL
5370 &map_idr, &map_idr_lock);
5371 break;
1b9ed84e 5372 case BPF_BTF_GET_NEXT_ID:
af2ac3e1 5373 err = bpf_obj_get_next_id(&attr, uattr.user,
1b9ed84e
QM
5374 &btf_idr, &btf_idr_lock);
5375 break;
b16d9aa4
MKL
5376 case BPF_PROG_GET_FD_BY_ID:
5377 err = bpf_prog_get_fd_by_id(&attr);
5378 break;
bd5f5f4e
MKL
5379 case BPF_MAP_GET_FD_BY_ID:
5380 err = bpf_map_get_fd_by_id(&attr);
5381 break;
1e270976 5382 case BPF_OBJ_GET_INFO_BY_FD:
af2ac3e1 5383 err = bpf_obj_get_info_by_fd(&attr, uattr.user);
1e270976 5384 break;
c4f6699d
AS
5385 case BPF_RAW_TRACEPOINT_OPEN:
5386 err = bpf_raw_tracepoint_open(&attr);
5387 break;
f56a653c 5388 case BPF_BTF_LOAD:
47a71c1f 5389 err = bpf_btf_load(&attr, uattr, size);
f56a653c 5390 break;
78958fca
MKL
5391 case BPF_BTF_GET_FD_BY_ID:
5392 err = bpf_btf_get_fd_by_id(&attr);
5393 break;
41bdc4b4 5394 case BPF_TASK_FD_QUERY:
af2ac3e1 5395 err = bpf_task_fd_query(&attr, uattr.user);
41bdc4b4 5396 break;
bd513cd0
MV
5397 case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
5398 err = map_lookup_and_delete_elem(&attr);
5399 break;
cb4d03ab 5400 case BPF_MAP_LOOKUP_BATCH:
af2ac3e1 5401 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_LOOKUP_BATCH);
cb4d03ab 5402 break;
05799638 5403 case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
af2ac3e1 5404 err = bpf_map_do_batch(&attr, uattr.user,
05799638
YS
5405 BPF_MAP_LOOKUP_AND_DELETE_BATCH);
5406 break;
aa2e93b8 5407 case BPF_MAP_UPDATE_BATCH:
af2ac3e1 5408 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_UPDATE_BATCH);
aa2e93b8
BV
5409 break;
5410 case BPF_MAP_DELETE_BATCH:
af2ac3e1 5411 err = bpf_map_do_batch(&attr, uattr.user, BPF_MAP_DELETE_BATCH);
aa2e93b8 5412 break;
af6eea57 5413 case BPF_LINK_CREATE:
af2ac3e1 5414 err = link_create(&attr, uattr);
af6eea57 5415 break;
0c991ebc
AN
5416 case BPF_LINK_UPDATE:
5417 err = link_update(&attr);
5418 break;
2d602c8c
AN
5419 case BPF_LINK_GET_FD_BY_ID:
5420 err = bpf_link_get_fd_by_id(&attr);
5421 break;
5422 case BPF_LINK_GET_NEXT_ID:
af2ac3e1 5423 err = bpf_obj_get_next_id(&attr, uattr.user,
2d602c8c
AN
5424 &link_idr, &link_idr_lock);
5425 break;
d46edd67
SL
5426 case BPF_ENABLE_STATS:
5427 err = bpf_enable_stats(&attr);
5428 break;
ac51d99b
YS
5429 case BPF_ITER_CREATE:
5430 err = bpf_iter_create(&attr);
5431 break;
73b11c2a
AN
5432 case BPF_LINK_DETACH:
5433 err = link_detach(&attr);
5434 break;
ef15314a
YZ
5435 case BPF_PROG_BIND_MAP:
5436 err = bpf_prog_bind_map(&attr);
5437 break;
99c55f7d
AS
5438 default:
5439 err = -EINVAL;
5440 break;
5441 }
5442
5443 return err;
5444}
79a7f8bd 5445
af2ac3e1
AS
5446SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
5447{
5448 return __sys_bpf(cmd, USER_BPFPTR(uattr), size);
5449}
5450
79a7f8bd
AS
5451static bool syscall_prog_is_valid_access(int off, int size,
5452 enum bpf_access_type type,
5453 const struct bpf_prog *prog,
5454 struct bpf_insn_access_aux *info)
5455{
5456 if (off < 0 || off >= U16_MAX)
5457 return false;
5458 if (off % size != 0)
5459 return false;
5460 return true;
5461}
5462
b1d18a75 5463BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
79a7f8bd 5464{
af2ac3e1
AS
5465 switch (cmd) {
5466 case BPF_MAP_CREATE:
b88df697 5467 case BPF_MAP_DELETE_ELEM:
af2ac3e1
AS
5468 case BPF_MAP_UPDATE_ELEM:
5469 case BPF_MAP_FREEZE:
b88df697 5470 case BPF_MAP_GET_FD_BY_ID:
af2ac3e1 5471 case BPF_PROG_LOAD:
c571bd75 5472 case BPF_BTF_LOAD:
b1d18a75
AS
5473 case BPF_LINK_CREATE:
5474 case BPF_RAW_TRACEPOINT_OPEN:
af2ac3e1 5475 break;
86f44fce
AS
5476 default:
5477 return -EINVAL;
5478 }
5479 return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size);
5480}
5481
4e4588f1
AS
5482
5483/* To shut up -Wmissing-prototypes.
5484 * This function is used by the kernel light skeleton
5485 * to load bpf programs when modules are loaded or during kernel boot.
5486 * See tools/lib/bpf/skel_internal.h
5487 */
5488int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
5489
86f44fce
AS
5490int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
5491{
5492 struct bpf_prog * __maybe_unused prog;
5493 struct bpf_tramp_run_ctx __maybe_unused run_ctx;
5494
5495 switch (cmd) {
b1d18a75
AS
5496#ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */
5497 case BPF_PROG_TEST_RUN:
5498 if (attr->test.data_in || attr->test.data_out ||
5499 attr->test.ctx_out || attr->test.duration ||
5500 attr->test.repeat || attr->test.flags)
5501 return -EINVAL;
5502
5503 prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL);
5504 if (IS_ERR(prog))
5505 return PTR_ERR(prog);
5506
5507 if (attr->test.ctx_size_in < prog->aux->max_ctx_offset ||
5508 attr->test.ctx_size_in > U16_MAX) {
5509 bpf_prog_put(prog);
5510 return -EINVAL;
5511 }
5512
e384c7b7
KFL
5513 run_ctx.bpf_cookie = 0;
5514 run_ctx.saved_run_ctx = NULL;
271de525 5515 if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) {
b1d18a75
AS
5516 /* recursion detected */
5517 bpf_prog_put(prog);
5518 return -EBUSY;
5519 }
5520 attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in);
271de525
MKL
5521 __bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */,
5522 &run_ctx);
b1d18a75
AS
5523 bpf_prog_put(prog);
5524 return 0;
5525#endif
af2ac3e1 5526 default:
86f44fce 5527 return ____bpf_sys_bpf(cmd, attr, size);
af2ac3e1 5528 }
79a7f8bd 5529}
86f44fce 5530EXPORT_SYMBOL(kern_sys_bpf);
79a7f8bd 5531
3a2daa72 5532static const struct bpf_func_proto bpf_sys_bpf_proto = {
79a7f8bd
AS
5533 .func = bpf_sys_bpf,
5534 .gpl_only = false,
5535 .ret_type = RET_INTEGER,
5536 .arg1_type = ARG_ANYTHING,
216e3cd2 5537 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
79a7f8bd
AS
5538 .arg3_type = ARG_CONST_SIZE,
5539};
5540
5541const struct bpf_func_proto * __weak
5542tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5543{
5544 return bpf_base_func_proto(func_id);
5545}
5546
3abea089
AS
5547BPF_CALL_1(bpf_sys_close, u32, fd)
5548{
5549 /* When bpf program calls this helper there should not be
5550 * an fdget() without matching completed fdput().
5551 * This helper is allowed in the following callchain only:
5552 * sys_bpf->prog_test_run->bpf_prog->bpf_sys_close
5553 */
5554 return close_fd(fd);
5555}
5556
3a2daa72 5557static const struct bpf_func_proto bpf_sys_close_proto = {
3abea089
AS
5558 .func = bpf_sys_close,
5559 .gpl_only = false,
5560 .ret_type = RET_INTEGER,
5561 .arg1_type = ARG_ANYTHING,
5562};
5563
d6aef08a
KKD
5564BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res)
5565{
5566 if (flags)
5567 return -EINVAL;
5568
5569 if (name_sz <= 1 || name[name_sz - 1])
5570 return -EINVAL;
5571
5572 if (!bpf_dump_raw_ok(current_cred()))
5573 return -EPERM;
5574
5575 *res = kallsyms_lookup_name(name);
5576 return *res ? 0 : -ENOENT;
5577}
5578
dc368e1c 5579static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
d6aef08a
KKD
5580 .func = bpf_kallsyms_lookup_name,
5581 .gpl_only = false,
5582 .ret_type = RET_INTEGER,
5583 .arg1_type = ARG_PTR_TO_MEM,
d4efb170 5584 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
d6aef08a
KKD
5585 .arg3_type = ARG_ANYTHING,
5586 .arg4_type = ARG_PTR_TO_LONG,
5587};
5588
79a7f8bd
AS
5589static const struct bpf_func_proto *
5590syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5591{
5592 switch (func_id) {
5593 case BPF_FUNC_sys_bpf:
14b20b78 5594 return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto;
3d78417b
AS
5595 case BPF_FUNC_btf_find_by_name_kind:
5596 return &bpf_btf_find_by_name_kind_proto;
3abea089
AS
5597 case BPF_FUNC_sys_close:
5598 return &bpf_sys_close_proto;
d6aef08a
KKD
5599 case BPF_FUNC_kallsyms_lookup_name:
5600 return &bpf_kallsyms_lookup_name_proto;
79a7f8bd
AS
5601 default:
5602 return tracing_prog_func_proto(func_id, prog);
5603 }
5604}
5605
5606const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
5607 .get_func_proto = syscall_prog_func_proto,
5608 .is_valid_access = syscall_prog_is_valid_access,
5609};
5610
5611const struct bpf_prog_ops bpf_syscall_prog_ops = {
5612 .test_run = bpf_prog_test_run_syscall,
5613};
2900005e
YZ
5614
5615#ifdef CONFIG_SYSCTL
5616static int bpf_stats_handler(struct ctl_table *table, int write,
5617 void *buffer, size_t *lenp, loff_t *ppos)
5618{
5619 struct static_key *key = (struct static_key *)table->data;
5620 static int saved_val;
5621 int val, ret;
5622 struct ctl_table tmp = {
5623 .data = &val,
5624 .maxlen = sizeof(val),
5625 .mode = table->mode,
5626 .extra1 = SYSCTL_ZERO,
5627 .extra2 = SYSCTL_ONE,
5628 };
5629
5630 if (write && !capable(CAP_SYS_ADMIN))
5631 return -EPERM;
5632
5633 mutex_lock(&bpf_stats_enabled_mutex);
5634 val = saved_val;
5635 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
5636 if (write && !ret && val != saved_val) {
5637 if (val)
5638 static_key_slow_inc(key);
5639 else
5640 static_key_slow_dec(key);
5641 saved_val = val;
5642 }
5643 mutex_unlock(&bpf_stats_enabled_mutex);
5644 return ret;
5645}
5646
5647void __weak unpriv_ebpf_notify(int new_state)
5648{
5649}
5650
5651static int bpf_unpriv_handler(struct ctl_table *table, int write,
5652 void *buffer, size_t *lenp, loff_t *ppos)
5653{
5654 int ret, unpriv_enable = *(int *)table->data;
5655 bool locked_state = unpriv_enable == 1;
5656 struct ctl_table tmp = *table;
5657
5658 if (write && !capable(CAP_SYS_ADMIN))
5659 return -EPERM;
5660
5661 tmp.data = &unpriv_enable;
5662 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
5663 if (write && !ret) {
5664 if (locked_state && unpriv_enable != 1)
5665 return -EPERM;
5666 *(int *)table->data = unpriv_enable;
5667 }
5668
fedf9920
KFL
5669 if (write)
5670 unpriv_ebpf_notify(unpriv_enable);
2900005e
YZ
5671
5672 return ret;
5673}
5674
5675static struct ctl_table bpf_syscall_table[] = {
5676 {
5677 .procname = "unprivileged_bpf_disabled",
5678 .data = &sysctl_unprivileged_bpf_disabled,
5679 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
5680 .mode = 0644,
5681 .proc_handler = bpf_unpriv_handler,
5682 .extra1 = SYSCTL_ZERO,
5683 .extra2 = SYSCTL_TWO,
5684 },
5685 {
5686 .procname = "bpf_stats_enabled",
5687 .data = &bpf_stats_enabled_key.key,
2900005e
YZ
5688 .mode = 0644,
5689 .proc_handler = bpf_stats_handler,
5690 },
5691 { }
5692};
5693
5694static int __init bpf_syscall_sysctl_init(void)
5695{
5696 register_sysctl_init("kernel", bpf_syscall_table);
5697 return 0;
5698}
5699late_initcall(bpf_syscall_sysctl_init);
5700#endif /* CONFIG_SYSCTL */