selftests/bpf: enhance align selftest's expected log matching
[linux-block.git] / kernel / bpf / verifier.c
... / ...
CommitLineData
1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * Copyright (c) 2016 Facebook
4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5 */
6#include <uapi/linux/btf.h>
7#include <linux/bpf-cgroup.h>
8#include <linux/kernel.h>
9#include <linux/types.h>
10#include <linux/slab.h>
11#include <linux/bpf.h>
12#include <linux/btf.h>
13#include <linux/bpf_verifier.h>
14#include <linux/filter.h>
15#include <net/netlink.h>
16#include <linux/file.h>
17#include <linux/vmalloc.h>
18#include <linux/stringify.h>
19#include <linux/bsearch.h>
20#include <linux/sort.h>
21#include <linux/perf_event.h>
22#include <linux/ctype.h>
23#include <linux/error-injection.h>
24#include <linux/bpf_lsm.h>
25#include <linux/btf_ids.h>
26#include <linux/poison.h>
27
28#include "disasm.h"
29
30static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
31#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
32 [_id] = & _name ## _verifier_ops,
33#define BPF_MAP_TYPE(_id, _ops)
34#define BPF_LINK_TYPE(_id, _name)
35#include <linux/bpf_types.h>
36#undef BPF_PROG_TYPE
37#undef BPF_MAP_TYPE
38#undef BPF_LINK_TYPE
39};
40
41/* bpf_check() is a static code analyzer that walks eBPF program
42 * instruction by instruction and updates register/stack state.
43 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
44 *
45 * The first pass is depth-first-search to check that the program is a DAG.
46 * It rejects the following programs:
47 * - larger than BPF_MAXINSNS insns
48 * - if loop is present (detected via back-edge)
49 * - unreachable insns exist (shouldn't be a forest. program = one function)
50 * - out of bounds or malformed jumps
51 * The second pass is all possible path descent from the 1st insn.
52 * Since it's analyzing all paths through the program, the length of the
53 * analysis is limited to 64k insn, which may be hit even if total number of
54 * insn is less then 4K, but there are too many branches that change stack/regs.
55 * Number of 'branches to be analyzed' is limited to 1k
56 *
57 * On entry to each instruction, each register has a type, and the instruction
58 * changes the types of the registers depending on instruction semantics.
59 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
60 * copied to R1.
61 *
62 * All registers are 64-bit.
63 * R0 - return register
64 * R1-R5 argument passing registers
65 * R6-R9 callee saved registers
66 * R10 - frame pointer read-only
67 *
68 * At the start of BPF program the register R1 contains a pointer to bpf_context
69 * and has type PTR_TO_CTX.
70 *
71 * Verifier tracks arithmetic operations on pointers in case:
72 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
73 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
74 * 1st insn copies R10 (which has FRAME_PTR) type into R1
75 * and 2nd arithmetic instruction is pattern matched to recognize
76 * that it wants to construct a pointer to some element within stack.
77 * So after 2nd insn, the register R1 has type PTR_TO_STACK
78 * (and -20 constant is saved for further stack bounds checking).
79 * Meaning that this reg is a pointer to stack plus known immediate constant.
80 *
81 * Most of the time the registers have SCALAR_VALUE type, which
82 * means the register has some value, but it's not a valid pointer.
83 * (like pointer plus pointer becomes SCALAR_VALUE type)
84 *
85 * When verifier sees load or store instructions the type of base register
86 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
87 * four pointer types recognized by check_mem_access() function.
88 *
89 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
90 * and the range of [ptr, ptr + map's value_size) is accessible.
91 *
92 * registers used to pass values to function calls are checked against
93 * function argument constraints.
94 *
95 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
96 * It means that the register type passed to this function must be
97 * PTR_TO_STACK and it will be used inside the function as
98 * 'pointer to map element key'
99 *
100 * For example the argument constraints for bpf_map_lookup_elem():
101 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
102 * .arg1_type = ARG_CONST_MAP_PTR,
103 * .arg2_type = ARG_PTR_TO_MAP_KEY,
104 *
105 * ret_type says that this function returns 'pointer to map elem value or null'
106 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
107 * 2nd argument should be a pointer to stack, which will be used inside
108 * the helper function as a pointer to map element key.
109 *
110 * On the kernel side the helper function looks like:
111 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
112 * {
113 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
114 * void *key = (void *) (unsigned long) r2;
115 * void *value;
116 *
117 * here kernel can access 'key' and 'map' pointers safely, knowing that
118 * [key, key + map->key_size) bytes are valid and were initialized on
119 * the stack of eBPF program.
120 * }
121 *
122 * Corresponding eBPF program may look like:
123 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
124 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
125 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
126 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
127 * here verifier looks at prototype of map_lookup_elem() and sees:
128 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
129 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
130 *
131 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
132 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
133 * and were initialized prior to this call.
134 * If it's ok, then verifier allows this BPF_CALL insn and looks at
135 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
136 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
137 * returns either pointer to map value or NULL.
138 *
139 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
140 * insn, the register holding that pointer in the true branch changes state to
141 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
142 * branch. See check_cond_jmp_op().
143 *
144 * After the call R0 is set to return type of the function and registers R1-R5
145 * are set to NOT_INIT to indicate that they are no longer readable.
146 *
147 * The following reference types represent a potential reference to a kernel
148 * resource which, after first being allocated, must be checked and freed by
149 * the BPF program:
150 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
151 *
152 * When the verifier sees a helper call return a reference type, it allocates a
153 * pointer id for the reference and stores it in the current function state.
154 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
155 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
156 * passes through a NULL-check conditional. For the branch wherein the state is
157 * changed to CONST_IMM, the verifier releases the reference.
158 *
159 * For each helper function that allocates a reference, such as
160 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
161 * bpf_sk_release(). When a reference type passes into the release function,
162 * the verifier also releases the reference. If any unchecked or unreleased
163 * reference remains at the end of the program, the verifier rejects it.
164 */
165
166/* verifier_state + insn_idx are pushed to stack when branch is encountered */
167struct bpf_verifier_stack_elem {
168 /* verifer state is 'st'
169 * before processing instruction 'insn_idx'
170 * and after processing instruction 'prev_insn_idx'
171 */
172 struct bpf_verifier_state st;
173 int insn_idx;
174 int prev_insn_idx;
175 struct bpf_verifier_stack_elem *next;
176 /* length of verifier log at the time this state was pushed on stack */
177 u32 log_pos;
178};
179
180#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
181#define BPF_COMPLEXITY_LIMIT_STATES 64
182
183#define BPF_MAP_KEY_POISON (1ULL << 63)
184#define BPF_MAP_KEY_SEEN (1ULL << 62)
185
186#define BPF_MAP_PTR_UNPRIV 1UL
187#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
188 POISON_POINTER_DELTA))
189#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
190
191static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
192static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
193static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
194static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
195static int ref_set_non_owning(struct bpf_verifier_env *env,
196 struct bpf_reg_state *reg);
197
198static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
199{
200 return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
201}
202
203static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
204{
205 return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
206}
207
208static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
209 const struct bpf_map *map, bool unpriv)
210{
211 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
212 unpriv |= bpf_map_ptr_unpriv(aux);
213 aux->map_ptr_state = (unsigned long)map |
214 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
215}
216
217static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
218{
219 return aux->map_key_state & BPF_MAP_KEY_POISON;
220}
221
222static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
223{
224 return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
225}
226
227static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
228{
229 return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
230}
231
232static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
233{
234 bool poisoned = bpf_map_key_poisoned(aux);
235
236 aux->map_key_state = state | BPF_MAP_KEY_SEEN |
237 (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
238}
239
240static bool bpf_pseudo_call(const struct bpf_insn *insn)
241{
242 return insn->code == (BPF_JMP | BPF_CALL) &&
243 insn->src_reg == BPF_PSEUDO_CALL;
244}
245
246static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
247{
248 return insn->code == (BPF_JMP | BPF_CALL) &&
249 insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
250}
251
252struct bpf_call_arg_meta {
253 struct bpf_map *map_ptr;
254 bool raw_mode;
255 bool pkt_access;
256 u8 release_regno;
257 int regno;
258 int access_size;
259 int mem_size;
260 u64 msize_max_value;
261 int ref_obj_id;
262 int dynptr_id;
263 int map_uid;
264 int func_id;
265 struct btf *btf;
266 u32 btf_id;
267 struct btf *ret_btf;
268 u32 ret_btf_id;
269 u32 subprogno;
270 struct btf_field *kptr_field;
271};
272
273struct btf *btf_vmlinux;
274
275static DEFINE_MUTEX(bpf_verifier_lock);
276
277static const struct bpf_line_info *
278find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
279{
280 const struct bpf_line_info *linfo;
281 const struct bpf_prog *prog;
282 u32 i, nr_linfo;
283
284 prog = env->prog;
285 nr_linfo = prog->aux->nr_linfo;
286
287 if (!nr_linfo || insn_off >= prog->len)
288 return NULL;
289
290 linfo = prog->aux->linfo;
291 for (i = 1; i < nr_linfo; i++)
292 if (insn_off < linfo[i].insn_off)
293 break;
294
295 return &linfo[i - 1];
296}
297
298void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
299 va_list args)
300{
301 unsigned int n;
302
303 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
304
305 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
306 "verifier log line truncated - local buffer too short\n");
307
308 if (log->level == BPF_LOG_KERNEL) {
309 bool newline = n > 0 && log->kbuf[n - 1] == '\n';
310
311 pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
312 return;
313 }
314
315 n = min(log->len_total - log->len_used - 1, n);
316 log->kbuf[n] = '\0';
317 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
318 log->len_used += n;
319 else
320 log->ubuf = NULL;
321}
322
323static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
324{
325 char zero = 0;
326
327 if (!bpf_verifier_log_needed(log))
328 return;
329
330 log->len_used = new_pos;
331 if (put_user(zero, log->ubuf + new_pos))
332 log->ubuf = NULL;
333}
334
335/* log_level controls verbosity level of eBPF verifier.
336 * bpf_verifier_log_write() is used to dump the verification trace to the log,
337 * so the user can figure out what's wrong with the program
338 */
339__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
340 const char *fmt, ...)
341{
342 va_list args;
343
344 if (!bpf_verifier_log_needed(&env->log))
345 return;
346
347 va_start(args, fmt);
348 bpf_verifier_vlog(&env->log, fmt, args);
349 va_end(args);
350}
351EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
352
353__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
354{
355 struct bpf_verifier_env *env = private_data;
356 va_list args;
357
358 if (!bpf_verifier_log_needed(&env->log))
359 return;
360
361 va_start(args, fmt);
362 bpf_verifier_vlog(&env->log, fmt, args);
363 va_end(args);
364}
365
366__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
367 const char *fmt, ...)
368{
369 va_list args;
370
371 if (!bpf_verifier_log_needed(log))
372 return;
373
374 va_start(args, fmt);
375 bpf_verifier_vlog(log, fmt, args);
376 va_end(args);
377}
378EXPORT_SYMBOL_GPL(bpf_log);
379
380static const char *ltrim(const char *s)
381{
382 while (isspace(*s))
383 s++;
384
385 return s;
386}
387
388__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
389 u32 insn_off,
390 const char *prefix_fmt, ...)
391{
392 const struct bpf_line_info *linfo;
393
394 if (!bpf_verifier_log_needed(&env->log))
395 return;
396
397 linfo = find_linfo(env, insn_off);
398 if (!linfo || linfo == env->prev_linfo)
399 return;
400
401 if (prefix_fmt) {
402 va_list args;
403
404 va_start(args, prefix_fmt);
405 bpf_verifier_vlog(&env->log, prefix_fmt, args);
406 va_end(args);
407 }
408
409 verbose(env, "%s\n",
410 ltrim(btf_name_by_offset(env->prog->aux->btf,
411 linfo->line_off)));
412
413 env->prev_linfo = linfo;
414}
415
416static void verbose_invalid_scalar(struct bpf_verifier_env *env,
417 struct bpf_reg_state *reg,
418 struct tnum *range, const char *ctx,
419 const char *reg_name)
420{
421 char tn_buf[48];
422
423 verbose(env, "At %s the register %s ", ctx, reg_name);
424 if (!tnum_is_unknown(reg->var_off)) {
425 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
426 verbose(env, "has value %s", tn_buf);
427 } else {
428 verbose(env, "has unknown scalar value");
429 }
430 tnum_strn(tn_buf, sizeof(tn_buf), *range);
431 verbose(env, " should have been in %s\n", tn_buf);
432}
433
434static bool type_is_pkt_pointer(enum bpf_reg_type type)
435{
436 type = base_type(type);
437 return type == PTR_TO_PACKET ||
438 type == PTR_TO_PACKET_META;
439}
440
441static bool type_is_sk_pointer(enum bpf_reg_type type)
442{
443 return type == PTR_TO_SOCKET ||
444 type == PTR_TO_SOCK_COMMON ||
445 type == PTR_TO_TCP_SOCK ||
446 type == PTR_TO_XDP_SOCK;
447}
448
449static bool reg_type_not_null(enum bpf_reg_type type)
450{
451 return type == PTR_TO_SOCKET ||
452 type == PTR_TO_TCP_SOCK ||
453 type == PTR_TO_MAP_VALUE ||
454 type == PTR_TO_MAP_KEY ||
455 type == PTR_TO_SOCK_COMMON;
456}
457
458static bool type_is_ptr_alloc_obj(u32 type)
459{
460 return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
461}
462
463static bool type_is_non_owning_ref(u32 type)
464{
465 return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
466}
467
468static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
469{
470 struct btf_record *rec = NULL;
471 struct btf_struct_meta *meta;
472
473 if (reg->type == PTR_TO_MAP_VALUE) {
474 rec = reg->map_ptr->record;
475 } else if (type_is_ptr_alloc_obj(reg->type)) {
476 meta = btf_find_struct_meta(reg->btf, reg->btf_id);
477 if (meta)
478 rec = meta->record;
479 }
480 return rec;
481}
482
483static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
484{
485 return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
486}
487
488static bool type_is_rdonly_mem(u32 type)
489{
490 return type & MEM_RDONLY;
491}
492
493static bool type_may_be_null(u32 type)
494{
495 return type & PTR_MAYBE_NULL;
496}
497
498static bool is_acquire_function(enum bpf_func_id func_id,
499 const struct bpf_map *map)
500{
501 enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
502
503 if (func_id == BPF_FUNC_sk_lookup_tcp ||
504 func_id == BPF_FUNC_sk_lookup_udp ||
505 func_id == BPF_FUNC_skc_lookup_tcp ||
506 func_id == BPF_FUNC_ringbuf_reserve ||
507 func_id == BPF_FUNC_kptr_xchg)
508 return true;
509
510 if (func_id == BPF_FUNC_map_lookup_elem &&
511 (map_type == BPF_MAP_TYPE_SOCKMAP ||
512 map_type == BPF_MAP_TYPE_SOCKHASH))
513 return true;
514
515 return false;
516}
517
518static bool is_ptr_cast_function(enum bpf_func_id func_id)
519{
520 return func_id == BPF_FUNC_tcp_sock ||
521 func_id == BPF_FUNC_sk_fullsock ||
522 func_id == BPF_FUNC_skc_to_tcp_sock ||
523 func_id == BPF_FUNC_skc_to_tcp6_sock ||
524 func_id == BPF_FUNC_skc_to_udp6_sock ||
525 func_id == BPF_FUNC_skc_to_mptcp_sock ||
526 func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
527 func_id == BPF_FUNC_skc_to_tcp_request_sock;
528}
529
530static bool is_dynptr_ref_function(enum bpf_func_id func_id)
531{
532 return func_id == BPF_FUNC_dynptr_data;
533}
534
535static bool is_callback_calling_function(enum bpf_func_id func_id)
536{
537 return func_id == BPF_FUNC_for_each_map_elem ||
538 func_id == BPF_FUNC_timer_set_callback ||
539 func_id == BPF_FUNC_find_vma ||
540 func_id == BPF_FUNC_loop ||
541 func_id == BPF_FUNC_user_ringbuf_drain;
542}
543
544static bool is_storage_get_function(enum bpf_func_id func_id)
545{
546 return func_id == BPF_FUNC_sk_storage_get ||
547 func_id == BPF_FUNC_inode_storage_get ||
548 func_id == BPF_FUNC_task_storage_get ||
549 func_id == BPF_FUNC_cgrp_storage_get;
550}
551
552static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
553 const struct bpf_map *map)
554{
555 int ref_obj_uses = 0;
556
557 if (is_ptr_cast_function(func_id))
558 ref_obj_uses++;
559 if (is_acquire_function(func_id, map))
560 ref_obj_uses++;
561 if (is_dynptr_ref_function(func_id))
562 ref_obj_uses++;
563
564 return ref_obj_uses > 1;
565}
566
567static bool is_cmpxchg_insn(const struct bpf_insn *insn)
568{
569 return BPF_CLASS(insn->code) == BPF_STX &&
570 BPF_MODE(insn->code) == BPF_ATOMIC &&
571 insn->imm == BPF_CMPXCHG;
572}
573
574/* string representation of 'enum bpf_reg_type'
575 *
576 * Note that reg_type_str() can not appear more than once in a single verbose()
577 * statement.
578 */
579static const char *reg_type_str(struct bpf_verifier_env *env,
580 enum bpf_reg_type type)
581{
582 char postfix[16] = {0}, prefix[64] = {0};
583 static const char * const str[] = {
584 [NOT_INIT] = "?",
585 [SCALAR_VALUE] = "scalar",
586 [PTR_TO_CTX] = "ctx",
587 [CONST_PTR_TO_MAP] = "map_ptr",
588 [PTR_TO_MAP_VALUE] = "map_value",
589 [PTR_TO_STACK] = "fp",
590 [PTR_TO_PACKET] = "pkt",
591 [PTR_TO_PACKET_META] = "pkt_meta",
592 [PTR_TO_PACKET_END] = "pkt_end",
593 [PTR_TO_FLOW_KEYS] = "flow_keys",
594 [PTR_TO_SOCKET] = "sock",
595 [PTR_TO_SOCK_COMMON] = "sock_common",
596 [PTR_TO_TCP_SOCK] = "tcp_sock",
597 [PTR_TO_TP_BUFFER] = "tp_buffer",
598 [PTR_TO_XDP_SOCK] = "xdp_sock",
599 [PTR_TO_BTF_ID] = "ptr_",
600 [PTR_TO_MEM] = "mem",
601 [PTR_TO_BUF] = "buf",
602 [PTR_TO_FUNC] = "func",
603 [PTR_TO_MAP_KEY] = "map_key",
604 [CONST_PTR_TO_DYNPTR] = "dynptr_ptr",
605 };
606
607 if (type & PTR_MAYBE_NULL) {
608 if (base_type(type) == PTR_TO_BTF_ID)
609 strncpy(postfix, "or_null_", 16);
610 else
611 strncpy(postfix, "_or_null", 16);
612 }
613
614 snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
615 type & MEM_RDONLY ? "rdonly_" : "",
616 type & MEM_RINGBUF ? "ringbuf_" : "",
617 type & MEM_USER ? "user_" : "",
618 type & MEM_PERCPU ? "percpu_" : "",
619 type & MEM_RCU ? "rcu_" : "",
620 type & PTR_UNTRUSTED ? "untrusted_" : "",
621 type & PTR_TRUSTED ? "trusted_" : ""
622 );
623
624 snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
625 prefix, str[base_type(type)], postfix);
626 return env->type_str_buf;
627}
628
629static char slot_type_char[] = {
630 [STACK_INVALID] = '?',
631 [STACK_SPILL] = 'r',
632 [STACK_MISC] = 'm',
633 [STACK_ZERO] = '0',
634 [STACK_DYNPTR] = 'd',
635};
636
637static void print_liveness(struct bpf_verifier_env *env,
638 enum bpf_reg_liveness live)
639{
640 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
641 verbose(env, "_");
642 if (live & REG_LIVE_READ)
643 verbose(env, "r");
644 if (live & REG_LIVE_WRITTEN)
645 verbose(env, "w");
646 if (live & REG_LIVE_DONE)
647 verbose(env, "D");
648}
649
650static int __get_spi(s32 off)
651{
652 return (-off - 1) / BPF_REG_SIZE;
653}
654
655static struct bpf_func_state *func(struct bpf_verifier_env *env,
656 const struct bpf_reg_state *reg)
657{
658 struct bpf_verifier_state *cur = env->cur_state;
659
660 return cur->frame[reg->frameno];
661}
662
663static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
664{
665 int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
666
667 /* We need to check that slots between [spi - nr_slots + 1, spi] are
668 * within [0, allocated_stack).
669 *
670 * Please note that the spi grows downwards. For example, a dynptr
671 * takes the size of two stack slots; the first slot will be at
672 * spi and the second slot will be at spi - 1.
673 */
674 return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
675}
676
677static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
678{
679 int off, spi;
680
681 if (!tnum_is_const(reg->var_off)) {
682 verbose(env, "dynptr has to be at a constant offset\n");
683 return -EINVAL;
684 }
685
686 off = reg->off + reg->var_off.value;
687 if (off % BPF_REG_SIZE) {
688 verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
689 return -EINVAL;
690 }
691
692 spi = __get_spi(off);
693 if (spi < 1) {
694 verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
695 return -EINVAL;
696 }
697
698 if (!is_spi_bounds_valid(func(env, reg), spi, BPF_DYNPTR_NR_SLOTS))
699 return -ERANGE;
700 return spi;
701}
702
703static const char *kernel_type_name(const struct btf* btf, u32 id)
704{
705 return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
706}
707
708static const char *dynptr_type_str(enum bpf_dynptr_type type)
709{
710 switch (type) {
711 case BPF_DYNPTR_TYPE_LOCAL:
712 return "local";
713 case BPF_DYNPTR_TYPE_RINGBUF:
714 return "ringbuf";
715 case BPF_DYNPTR_TYPE_SKB:
716 return "skb";
717 case BPF_DYNPTR_TYPE_XDP:
718 return "xdp";
719 case BPF_DYNPTR_TYPE_INVALID:
720 return "<invalid>";
721 default:
722 WARN_ONCE(1, "unknown dynptr type %d\n", type);
723 return "<unknown>";
724 }
725}
726
727static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
728{
729 env->scratched_regs |= 1U << regno;
730}
731
732static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
733{
734 env->scratched_stack_slots |= 1ULL << spi;
735}
736
737static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
738{
739 return (env->scratched_regs >> regno) & 1;
740}
741
742static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
743{
744 return (env->scratched_stack_slots >> regno) & 1;
745}
746
747static bool verifier_state_scratched(const struct bpf_verifier_env *env)
748{
749 return env->scratched_regs || env->scratched_stack_slots;
750}
751
752static void mark_verifier_state_clean(struct bpf_verifier_env *env)
753{
754 env->scratched_regs = 0U;
755 env->scratched_stack_slots = 0ULL;
756}
757
758/* Used for printing the entire verifier state. */
759static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
760{
761 env->scratched_regs = ~0U;
762 env->scratched_stack_slots = ~0ULL;
763}
764
765static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
766{
767 switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
768 case DYNPTR_TYPE_LOCAL:
769 return BPF_DYNPTR_TYPE_LOCAL;
770 case DYNPTR_TYPE_RINGBUF:
771 return BPF_DYNPTR_TYPE_RINGBUF;
772 case DYNPTR_TYPE_SKB:
773 return BPF_DYNPTR_TYPE_SKB;
774 case DYNPTR_TYPE_XDP:
775 return BPF_DYNPTR_TYPE_XDP;
776 default:
777 return BPF_DYNPTR_TYPE_INVALID;
778 }
779}
780
781static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
782{
783 switch (type) {
784 case BPF_DYNPTR_TYPE_LOCAL:
785 return DYNPTR_TYPE_LOCAL;
786 case BPF_DYNPTR_TYPE_RINGBUF:
787 return DYNPTR_TYPE_RINGBUF;
788 case BPF_DYNPTR_TYPE_SKB:
789 return DYNPTR_TYPE_SKB;
790 case BPF_DYNPTR_TYPE_XDP:
791 return DYNPTR_TYPE_XDP;
792 default:
793 return 0;
794 }
795}
796
797static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
798{
799 return type == BPF_DYNPTR_TYPE_RINGBUF;
800}
801
802static void __mark_dynptr_reg(struct bpf_reg_state *reg,
803 enum bpf_dynptr_type type,
804 bool first_slot, int dynptr_id);
805
806static void __mark_reg_not_init(const struct bpf_verifier_env *env,
807 struct bpf_reg_state *reg);
808
809static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
810 struct bpf_reg_state *sreg1,
811 struct bpf_reg_state *sreg2,
812 enum bpf_dynptr_type type)
813{
814 int id = ++env->id_gen;
815
816 __mark_dynptr_reg(sreg1, type, true, id);
817 __mark_dynptr_reg(sreg2, type, false, id);
818}
819
820static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
821 struct bpf_reg_state *reg,
822 enum bpf_dynptr_type type)
823{
824 __mark_dynptr_reg(reg, type, true, ++env->id_gen);
825}
826
827static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
828 struct bpf_func_state *state, int spi);
829
830static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
831 enum bpf_arg_type arg_type, int insn_idx)
832{
833 struct bpf_func_state *state = func(env, reg);
834 enum bpf_dynptr_type type;
835 int spi, i, id, err;
836
837 spi = dynptr_get_spi(env, reg);
838 if (spi < 0)
839 return spi;
840
841 /* We cannot assume both spi and spi - 1 belong to the same dynptr,
842 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
843 * to ensure that for the following example:
844 * [d1][d1][d2][d2]
845 * spi 3 2 1 0
846 * So marking spi = 2 should lead to destruction of both d1 and d2. In
847 * case they do belong to same dynptr, second call won't see slot_type
848 * as STACK_DYNPTR and will simply skip destruction.
849 */
850 err = destroy_if_dynptr_stack_slot(env, state, spi);
851 if (err)
852 return err;
853 err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
854 if (err)
855 return err;
856
857 for (i = 0; i < BPF_REG_SIZE; i++) {
858 state->stack[spi].slot_type[i] = STACK_DYNPTR;
859 state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
860 }
861
862 type = arg_to_dynptr_type(arg_type);
863 if (type == BPF_DYNPTR_TYPE_INVALID)
864 return -EINVAL;
865
866 mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
867 &state->stack[spi - 1].spilled_ptr, type);
868
869 if (dynptr_type_refcounted(type)) {
870 /* The id is used to track proper releasing */
871 id = acquire_reference_state(env, insn_idx);
872 if (id < 0)
873 return id;
874
875 state->stack[spi].spilled_ptr.ref_obj_id = id;
876 state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
877 }
878
879 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
880 state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
881
882 return 0;
883}
884
885static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
886{
887 struct bpf_func_state *state = func(env, reg);
888 int spi, i;
889
890 spi = dynptr_get_spi(env, reg);
891 if (spi < 0)
892 return spi;
893
894 for (i = 0; i < BPF_REG_SIZE; i++) {
895 state->stack[spi].slot_type[i] = STACK_INVALID;
896 state->stack[spi - 1].slot_type[i] = STACK_INVALID;
897 }
898
899 /* Invalidate any slices associated with this dynptr */
900 if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type))
901 WARN_ON_ONCE(release_reference(env, state->stack[spi].spilled_ptr.ref_obj_id));
902
903 __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
904 __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
905
906 /* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?
907 *
908 * While we don't allow reading STACK_INVALID, it is still possible to
909 * do <8 byte writes marking some but not all slots as STACK_MISC. Then,
910 * helpers or insns can do partial read of that part without failing,
911 * but check_stack_range_initialized, check_stack_read_var_off, and
912 * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
913 * the slot conservatively. Hence we need to prevent those liveness
914 * marking walks.
915 *
916 * This was not a problem before because STACK_INVALID is only set by
917 * default (where the default reg state has its reg->parent as NULL), or
918 * in clean_live_states after REG_LIVE_DONE (at which point
919 * mark_reg_read won't walk reg->parent chain), but not randomly during
920 * verifier state exploration (like we did above). Hence, for our case
921 * parentage chain will still be live (i.e. reg->parent may be
922 * non-NULL), while earlier reg->parent was NULL, so we need
923 * REG_LIVE_WRITTEN to screen off read marker propagation when it is
924 * done later on reads or by mark_dynptr_read as well to unnecessary
925 * mark registers in verifier state.
926 */
927 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
928 state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
929
930 return 0;
931}
932
933static void __mark_reg_unknown(const struct bpf_verifier_env *env,
934 struct bpf_reg_state *reg);
935
936static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
937{
938 if (!env->allow_ptr_leaks)
939 __mark_reg_not_init(env, reg);
940 else
941 __mark_reg_unknown(env, reg);
942}
943
944static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
945 struct bpf_func_state *state, int spi)
946{
947 struct bpf_func_state *fstate;
948 struct bpf_reg_state *dreg;
949 int i, dynptr_id;
950
951 /* We always ensure that STACK_DYNPTR is never set partially,
952 * hence just checking for slot_type[0] is enough. This is
953 * different for STACK_SPILL, where it may be only set for
954 * 1 byte, so code has to use is_spilled_reg.
955 */
956 if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
957 return 0;
958
959 /* Reposition spi to first slot */
960 if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
961 spi = spi + 1;
962
963 if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
964 verbose(env, "cannot overwrite referenced dynptr\n");
965 return -EINVAL;
966 }
967
968 mark_stack_slot_scratched(env, spi);
969 mark_stack_slot_scratched(env, spi - 1);
970
971 /* Writing partially to one dynptr stack slot destroys both. */
972 for (i = 0; i < BPF_REG_SIZE; i++) {
973 state->stack[spi].slot_type[i] = STACK_INVALID;
974 state->stack[spi - 1].slot_type[i] = STACK_INVALID;
975 }
976
977 dynptr_id = state->stack[spi].spilled_ptr.id;
978 /* Invalidate any slices associated with this dynptr */
979 bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
980 /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
981 if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
982 continue;
983 if (dreg->dynptr_id == dynptr_id)
984 mark_reg_invalid(env, dreg);
985 }));
986
987 /* Do not release reference state, we are destroying dynptr on stack,
988 * not using some helper to release it. Just reset register.
989 */
990 __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
991 __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
992
993 /* Same reason as unmark_stack_slots_dynptr above */
994 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
995 state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
996
997 return 0;
998}
999
1000static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1001{
1002 int spi;
1003
1004 if (reg->type == CONST_PTR_TO_DYNPTR)
1005 return false;
1006
1007 spi = dynptr_get_spi(env, reg);
1008
1009 /* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
1010 * error because this just means the stack state hasn't been updated yet.
1011 * We will do check_mem_access to check and update stack bounds later.
1012 */
1013 if (spi < 0 && spi != -ERANGE)
1014 return false;
1015
1016 /* We don't need to check if the stack slots are marked by previous
1017 * dynptr initializations because we allow overwriting existing unreferenced
1018 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
1019 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
1020 * touching are completely destructed before we reinitialize them for a new
1021 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
1022 * instead of delaying it until the end where the user will get "Unreleased
1023 * reference" error.
1024 */
1025 return true;
1026}
1027
1028static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1029{
1030 struct bpf_func_state *state = func(env, reg);
1031 int i, spi;
1032
1033 /* This already represents first slot of initialized bpf_dynptr.
1034 *
1035 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
1036 * check_func_arg_reg_off's logic, so we don't need to check its
1037 * offset and alignment.
1038 */
1039 if (reg->type == CONST_PTR_TO_DYNPTR)
1040 return true;
1041
1042 spi = dynptr_get_spi(env, reg);
1043 if (spi < 0)
1044 return false;
1045 if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
1046 return false;
1047
1048 for (i = 0; i < BPF_REG_SIZE; i++) {
1049 if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
1050 state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
1051 return false;
1052 }
1053
1054 return true;
1055}
1056
1057static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1058 enum bpf_arg_type arg_type)
1059{
1060 struct bpf_func_state *state = func(env, reg);
1061 enum bpf_dynptr_type dynptr_type;
1062 int spi;
1063
1064 /* ARG_PTR_TO_DYNPTR takes any type of dynptr */
1065 if (arg_type == ARG_PTR_TO_DYNPTR)
1066 return true;
1067
1068 dynptr_type = arg_to_dynptr_type(arg_type);
1069 if (reg->type == CONST_PTR_TO_DYNPTR) {
1070 return reg->dynptr.type == dynptr_type;
1071 } else {
1072 spi = dynptr_get_spi(env, reg);
1073 if (spi < 0)
1074 return false;
1075 return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
1076 }
1077}
1078
1079/* The reg state of a pointer or a bounded scalar was saved when
1080 * it was spilled to the stack.
1081 */
1082static bool is_spilled_reg(const struct bpf_stack_state *stack)
1083{
1084 return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
1085}
1086
1087static void scrub_spilled_slot(u8 *stype)
1088{
1089 if (*stype != STACK_INVALID)
1090 *stype = STACK_MISC;
1091}
1092
1093static void print_verifier_state(struct bpf_verifier_env *env,
1094 const struct bpf_func_state *state,
1095 bool print_all)
1096{
1097 const struct bpf_reg_state *reg;
1098 enum bpf_reg_type t;
1099 int i;
1100
1101 if (state->frameno)
1102 verbose(env, " frame%d:", state->frameno);
1103 for (i = 0; i < MAX_BPF_REG; i++) {
1104 reg = &state->regs[i];
1105 t = reg->type;
1106 if (t == NOT_INIT)
1107 continue;
1108 if (!print_all && !reg_scratched(env, i))
1109 continue;
1110 verbose(env, " R%d", i);
1111 print_liveness(env, reg->live);
1112 verbose(env, "=");
1113 if (t == SCALAR_VALUE && reg->precise)
1114 verbose(env, "P");
1115 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
1116 tnum_is_const(reg->var_off)) {
1117 /* reg->off should be 0 for SCALAR_VALUE */
1118 verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
1119 verbose(env, "%lld", reg->var_off.value + reg->off);
1120 } else {
1121 const char *sep = "";
1122
1123 verbose(env, "%s", reg_type_str(env, t));
1124 if (base_type(t) == PTR_TO_BTF_ID)
1125 verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
1126 verbose(env, "(");
1127/*
1128 * _a stands for append, was shortened to avoid multiline statements below.
1129 * This macro is used to output a comma separated list of attributes.
1130 */
1131#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
1132
1133 if (reg->id)
1134 verbose_a("id=%d", reg->id);
1135 if (reg->ref_obj_id)
1136 verbose_a("ref_obj_id=%d", reg->ref_obj_id);
1137 if (type_is_non_owning_ref(reg->type))
1138 verbose_a("%s", "non_own_ref");
1139 if (t != SCALAR_VALUE)
1140 verbose_a("off=%d", reg->off);
1141 if (type_is_pkt_pointer(t))
1142 verbose_a("r=%d", reg->range);
1143 else if (base_type(t) == CONST_PTR_TO_MAP ||
1144 base_type(t) == PTR_TO_MAP_KEY ||
1145 base_type(t) == PTR_TO_MAP_VALUE)
1146 verbose_a("ks=%d,vs=%d",
1147 reg->map_ptr->key_size,
1148 reg->map_ptr->value_size);
1149 if (tnum_is_const(reg->var_off)) {
1150 /* Typically an immediate SCALAR_VALUE, but
1151 * could be a pointer whose offset is too big
1152 * for reg->off
1153 */
1154 verbose_a("imm=%llx", reg->var_off.value);
1155 } else {
1156 if (reg->smin_value != reg->umin_value &&
1157 reg->smin_value != S64_MIN)
1158 verbose_a("smin=%lld", (long long)reg->smin_value);
1159 if (reg->smax_value != reg->umax_value &&
1160 reg->smax_value != S64_MAX)
1161 verbose_a("smax=%lld", (long long)reg->smax_value);
1162 if (reg->umin_value != 0)
1163 verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
1164 if (reg->umax_value != U64_MAX)
1165 verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
1166 if (!tnum_is_unknown(reg->var_off)) {
1167 char tn_buf[48];
1168
1169 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1170 verbose_a("var_off=%s", tn_buf);
1171 }
1172 if (reg->s32_min_value != reg->smin_value &&
1173 reg->s32_min_value != S32_MIN)
1174 verbose_a("s32_min=%d", (int)(reg->s32_min_value));
1175 if (reg->s32_max_value != reg->smax_value &&
1176 reg->s32_max_value != S32_MAX)
1177 verbose_a("s32_max=%d", (int)(reg->s32_max_value));
1178 if (reg->u32_min_value != reg->umin_value &&
1179 reg->u32_min_value != U32_MIN)
1180 verbose_a("u32_min=%d", (int)(reg->u32_min_value));
1181 if (reg->u32_max_value != reg->umax_value &&
1182 reg->u32_max_value != U32_MAX)
1183 verbose_a("u32_max=%d", (int)(reg->u32_max_value));
1184 }
1185#undef verbose_a
1186
1187 verbose(env, ")");
1188 }
1189 }
1190 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
1191 char types_buf[BPF_REG_SIZE + 1];
1192 bool valid = false;
1193 int j;
1194
1195 for (j = 0; j < BPF_REG_SIZE; j++) {
1196 if (state->stack[i].slot_type[j] != STACK_INVALID)
1197 valid = true;
1198 types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
1199 }
1200 types_buf[BPF_REG_SIZE] = 0;
1201 if (!valid)
1202 continue;
1203 if (!print_all && !stack_slot_scratched(env, i))
1204 continue;
1205 switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) {
1206 case STACK_SPILL:
1207 reg = &state->stack[i].spilled_ptr;
1208 t = reg->type;
1209
1210 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1211 print_liveness(env, reg->live);
1212 verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
1213 if (t == SCALAR_VALUE && reg->precise)
1214 verbose(env, "P");
1215 if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
1216 verbose(env, "%lld", reg->var_off.value + reg->off);
1217 break;
1218 case STACK_DYNPTR:
1219 i += BPF_DYNPTR_NR_SLOTS - 1;
1220 reg = &state->stack[i].spilled_ptr;
1221
1222 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1223 print_liveness(env, reg->live);
1224 verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type));
1225 if (reg->ref_obj_id)
1226 verbose(env, "(ref_id=%d)", reg->ref_obj_id);
1227 break;
1228 case STACK_MISC:
1229 case STACK_ZERO:
1230 default:
1231 reg = &state->stack[i].spilled_ptr;
1232
1233 for (j = 0; j < BPF_REG_SIZE; j++)
1234 types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
1235 types_buf[BPF_REG_SIZE] = 0;
1236
1237 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1238 print_liveness(env, reg->live);
1239 verbose(env, "=%s", types_buf);
1240 break;
1241 }
1242 }
1243 if (state->acquired_refs && state->refs[0].id) {
1244 verbose(env, " refs=%d", state->refs[0].id);
1245 for (i = 1; i < state->acquired_refs; i++)
1246 if (state->refs[i].id)
1247 verbose(env, ",%d", state->refs[i].id);
1248 }
1249 if (state->in_callback_fn)
1250 verbose(env, " cb");
1251 if (state->in_async_callback_fn)
1252 verbose(env, " async_cb");
1253 verbose(env, "\n");
1254 mark_verifier_state_clean(env);
1255}
1256
1257static inline u32 vlog_alignment(u32 pos)
1258{
1259 return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
1260 BPF_LOG_MIN_ALIGNMENT) - pos - 1;
1261}
1262
1263static void print_insn_state(struct bpf_verifier_env *env,
1264 const struct bpf_func_state *state)
1265{
1266 if (env->prev_log_len && env->prev_log_len == env->log.len_used) {
1267 /* remove new line character */
1268 bpf_vlog_reset(&env->log, env->prev_log_len - 1);
1269 verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' ');
1270 } else {
1271 verbose(env, "%d:", env->insn_idx);
1272 }
1273 print_verifier_state(env, state, false);
1274}
1275
1276/* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1277 * small to hold src. This is different from krealloc since we don't want to preserve
1278 * the contents of dst.
1279 *
1280 * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1281 * not be allocated.
1282 */
1283static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1284{
1285 size_t alloc_bytes;
1286 void *orig = dst;
1287 size_t bytes;
1288
1289 if (ZERO_OR_NULL_PTR(src))
1290 goto out;
1291
1292 if (unlikely(check_mul_overflow(n, size, &bytes)))
1293 return NULL;
1294
1295 alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1296 dst = krealloc(orig, alloc_bytes, flags);
1297 if (!dst) {
1298 kfree(orig);
1299 return NULL;
1300 }
1301
1302 memcpy(dst, src, bytes);
1303out:
1304 return dst ? dst : ZERO_SIZE_PTR;
1305}
1306
1307/* resize an array from old_n items to new_n items. the array is reallocated if it's too
1308 * small to hold new_n items. new items are zeroed out if the array grows.
1309 *
1310 * Contrary to krealloc_array, does not free arr if new_n is zero.
1311 */
1312static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1313{
1314 size_t alloc_size;
1315 void *new_arr;
1316
1317 if (!new_n || old_n == new_n)
1318 goto out;
1319
1320 alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1321 new_arr = krealloc(arr, alloc_size, GFP_KERNEL);
1322 if (!new_arr) {
1323 kfree(arr);
1324 return NULL;
1325 }
1326 arr = new_arr;
1327
1328 if (new_n > old_n)
1329 memset(arr + old_n * size, 0, (new_n - old_n) * size);
1330
1331out:
1332 return arr ? arr : ZERO_SIZE_PTR;
1333}
1334
1335static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1336{
1337 dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1338 sizeof(struct bpf_reference_state), GFP_KERNEL);
1339 if (!dst->refs)
1340 return -ENOMEM;
1341
1342 dst->acquired_refs = src->acquired_refs;
1343 return 0;
1344}
1345
1346static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1347{
1348 size_t n = src->allocated_stack / BPF_REG_SIZE;
1349
1350 dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1351 GFP_KERNEL);
1352 if (!dst->stack)
1353 return -ENOMEM;
1354
1355 dst->allocated_stack = src->allocated_stack;
1356 return 0;
1357}
1358
1359static int resize_reference_state(struct bpf_func_state *state, size_t n)
1360{
1361 state->refs = realloc_array(state->refs, state->acquired_refs, n,
1362 sizeof(struct bpf_reference_state));
1363 if (!state->refs)
1364 return -ENOMEM;
1365
1366 state->acquired_refs = n;
1367 return 0;
1368}
1369
1370static int grow_stack_state(struct bpf_func_state *state, int size)
1371{
1372 size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
1373
1374 if (old_n >= n)
1375 return 0;
1376
1377 state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1378 if (!state->stack)
1379 return -ENOMEM;
1380
1381 state->allocated_stack = size;
1382 return 0;
1383}
1384
1385/* Acquire a pointer id from the env and update the state->refs to include
1386 * this new pointer reference.
1387 * On success, returns a valid pointer id to associate with the register
1388 * On failure, returns a negative errno.
1389 */
1390static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1391{
1392 struct bpf_func_state *state = cur_func(env);
1393 int new_ofs = state->acquired_refs;
1394 int id, err;
1395
1396 err = resize_reference_state(state, state->acquired_refs + 1);
1397 if (err)
1398 return err;
1399 id = ++env->id_gen;
1400 state->refs[new_ofs].id = id;
1401 state->refs[new_ofs].insn_idx = insn_idx;
1402 state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0;
1403
1404 return id;
1405}
1406
1407/* release function corresponding to acquire_reference_state(). Idempotent. */
1408static int release_reference_state(struct bpf_func_state *state, int ptr_id)
1409{
1410 int i, last_idx;
1411
1412 last_idx = state->acquired_refs - 1;
1413 for (i = 0; i < state->acquired_refs; i++) {
1414 if (state->refs[i].id == ptr_id) {
1415 /* Cannot release caller references in callbacks */
1416 if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
1417 return -EINVAL;
1418 if (last_idx && i != last_idx)
1419 memcpy(&state->refs[i], &state->refs[last_idx],
1420 sizeof(*state->refs));
1421 memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1422 state->acquired_refs--;
1423 return 0;
1424 }
1425 }
1426 return -EINVAL;
1427}
1428
1429static void free_func_state(struct bpf_func_state *state)
1430{
1431 if (!state)
1432 return;
1433 kfree(state->refs);
1434 kfree(state->stack);
1435 kfree(state);
1436}
1437
1438static void clear_jmp_history(struct bpf_verifier_state *state)
1439{
1440 kfree(state->jmp_history);
1441 state->jmp_history = NULL;
1442 state->jmp_history_cnt = 0;
1443}
1444
1445static void free_verifier_state(struct bpf_verifier_state *state,
1446 bool free_self)
1447{
1448 int i;
1449
1450 for (i = 0; i <= state->curframe; i++) {
1451 free_func_state(state->frame[i]);
1452 state->frame[i] = NULL;
1453 }
1454 clear_jmp_history(state);
1455 if (free_self)
1456 kfree(state);
1457}
1458
1459/* copy verifier state from src to dst growing dst stack space
1460 * when necessary to accommodate larger src stack
1461 */
1462static int copy_func_state(struct bpf_func_state *dst,
1463 const struct bpf_func_state *src)
1464{
1465 int err;
1466
1467 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
1468 err = copy_reference_state(dst, src);
1469 if (err)
1470 return err;
1471 return copy_stack_state(dst, src);
1472}
1473
1474static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1475 const struct bpf_verifier_state *src)
1476{
1477 struct bpf_func_state *dst;
1478 int i, err;
1479
1480 dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1481 src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
1482 GFP_USER);
1483 if (!dst_state->jmp_history)
1484 return -ENOMEM;
1485 dst_state->jmp_history_cnt = src->jmp_history_cnt;
1486
1487 /* if dst has more stack frames then src frame, free them */
1488 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1489 free_func_state(dst_state->frame[i]);
1490 dst_state->frame[i] = NULL;
1491 }
1492 dst_state->speculative = src->speculative;
1493 dst_state->active_rcu_lock = src->active_rcu_lock;
1494 dst_state->curframe = src->curframe;
1495 dst_state->active_lock.ptr = src->active_lock.ptr;
1496 dst_state->active_lock.id = src->active_lock.id;
1497 dst_state->branches = src->branches;
1498 dst_state->parent = src->parent;
1499 dst_state->first_insn_idx = src->first_insn_idx;
1500 dst_state->last_insn_idx = src->last_insn_idx;
1501 for (i = 0; i <= src->curframe; i++) {
1502 dst = dst_state->frame[i];
1503 if (!dst) {
1504 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1505 if (!dst)
1506 return -ENOMEM;
1507 dst_state->frame[i] = dst;
1508 }
1509 err = copy_func_state(dst, src->frame[i]);
1510 if (err)
1511 return err;
1512 }
1513 return 0;
1514}
1515
1516static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1517{
1518 while (st) {
1519 u32 br = --st->branches;
1520
1521 /* WARN_ON(br > 1) technically makes sense here,
1522 * but see comment in push_stack(), hence:
1523 */
1524 WARN_ONCE((int)br < 0,
1525 "BUG update_branch_counts:branches_to_explore=%d\n",
1526 br);
1527 if (br)
1528 break;
1529 st = st->parent;
1530 }
1531}
1532
1533static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1534 int *insn_idx, bool pop_log)
1535{
1536 struct bpf_verifier_state *cur = env->cur_state;
1537 struct bpf_verifier_stack_elem *elem, *head = env->head;
1538 int err;
1539
1540 if (env->head == NULL)
1541 return -ENOENT;
1542
1543 if (cur) {
1544 err = copy_verifier_state(cur, &head->st);
1545 if (err)
1546 return err;
1547 }
1548 if (pop_log)
1549 bpf_vlog_reset(&env->log, head->log_pos);
1550 if (insn_idx)
1551 *insn_idx = head->insn_idx;
1552 if (prev_insn_idx)
1553 *prev_insn_idx = head->prev_insn_idx;
1554 elem = head->next;
1555 free_verifier_state(&head->st, false);
1556 kfree(head);
1557 env->head = elem;
1558 env->stack_size--;
1559 return 0;
1560}
1561
1562static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1563 int insn_idx, int prev_insn_idx,
1564 bool speculative)
1565{
1566 struct bpf_verifier_state *cur = env->cur_state;
1567 struct bpf_verifier_stack_elem *elem;
1568 int err;
1569
1570 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1571 if (!elem)
1572 goto err;
1573
1574 elem->insn_idx = insn_idx;
1575 elem->prev_insn_idx = prev_insn_idx;
1576 elem->next = env->head;
1577 elem->log_pos = env->log.len_used;
1578 env->head = elem;
1579 env->stack_size++;
1580 err = copy_verifier_state(&elem->st, cur);
1581 if (err)
1582 goto err;
1583 elem->st.speculative |= speculative;
1584 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1585 verbose(env, "The sequence of %d jumps is too complex.\n",
1586 env->stack_size);
1587 goto err;
1588 }
1589 if (elem->st.parent) {
1590 ++elem->st.parent->branches;
1591 /* WARN_ON(branches > 2) technically makes sense here,
1592 * but
1593 * 1. speculative states will bump 'branches' for non-branch
1594 * instructions
1595 * 2. is_state_visited() heuristics may decide not to create
1596 * a new state for a sequence of branches and all such current
1597 * and cloned states will be pointing to a single parent state
1598 * which might have large 'branches' count.
1599 */
1600 }
1601 return &elem->st;
1602err:
1603 free_verifier_state(env->cur_state, true);
1604 env->cur_state = NULL;
1605 /* pop all elements and return */
1606 while (!pop_stack(env, NULL, NULL, false));
1607 return NULL;
1608}
1609
1610#define CALLER_SAVED_REGS 6
1611static const int caller_saved[CALLER_SAVED_REGS] = {
1612 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1613};
1614
1615/* This helper doesn't clear reg->id */
1616static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1617{
1618 reg->var_off = tnum_const(imm);
1619 reg->smin_value = (s64)imm;
1620 reg->smax_value = (s64)imm;
1621 reg->umin_value = imm;
1622 reg->umax_value = imm;
1623
1624 reg->s32_min_value = (s32)imm;
1625 reg->s32_max_value = (s32)imm;
1626 reg->u32_min_value = (u32)imm;
1627 reg->u32_max_value = (u32)imm;
1628}
1629
1630/* Mark the unknown part of a register (variable offset or scalar value) as
1631 * known to have the value @imm.
1632 */
1633static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1634{
1635 /* Clear off and union(map_ptr, range) */
1636 memset(((u8 *)reg) + sizeof(reg->type), 0,
1637 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1638 reg->id = 0;
1639 reg->ref_obj_id = 0;
1640 ___mark_reg_known(reg, imm);
1641}
1642
1643static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1644{
1645 reg->var_off = tnum_const_subreg(reg->var_off, imm);
1646 reg->s32_min_value = (s32)imm;
1647 reg->s32_max_value = (s32)imm;
1648 reg->u32_min_value = (u32)imm;
1649 reg->u32_max_value = (u32)imm;
1650}
1651
1652/* Mark the 'variable offset' part of a register as zero. This should be
1653 * used only on registers holding a pointer type.
1654 */
1655static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1656{
1657 __mark_reg_known(reg, 0);
1658}
1659
1660static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1661{
1662 __mark_reg_known(reg, 0);
1663 reg->type = SCALAR_VALUE;
1664}
1665
1666static void mark_reg_known_zero(struct bpf_verifier_env *env,
1667 struct bpf_reg_state *regs, u32 regno)
1668{
1669 if (WARN_ON(regno >= MAX_BPF_REG)) {
1670 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1671 /* Something bad happened, let's kill all regs */
1672 for (regno = 0; regno < MAX_BPF_REG; regno++)
1673 __mark_reg_not_init(env, regs + regno);
1674 return;
1675 }
1676 __mark_reg_known_zero(regs + regno);
1677}
1678
1679static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
1680 bool first_slot, int dynptr_id)
1681{
1682 /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
1683 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
1684 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
1685 */
1686 __mark_reg_known_zero(reg);
1687 reg->type = CONST_PTR_TO_DYNPTR;
1688 /* Give each dynptr a unique id to uniquely associate slices to it. */
1689 reg->id = dynptr_id;
1690 reg->dynptr.type = type;
1691 reg->dynptr.first_slot = first_slot;
1692}
1693
1694static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1695{
1696 if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1697 const struct bpf_map *map = reg->map_ptr;
1698
1699 if (map->inner_map_meta) {
1700 reg->type = CONST_PTR_TO_MAP;
1701 reg->map_ptr = map->inner_map_meta;
1702 /* transfer reg's id which is unique for every map_lookup_elem
1703 * as UID of the inner map.
1704 */
1705 if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER))
1706 reg->map_uid = reg->id;
1707 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1708 reg->type = PTR_TO_XDP_SOCK;
1709 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1710 map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1711 reg->type = PTR_TO_SOCKET;
1712 } else {
1713 reg->type = PTR_TO_MAP_VALUE;
1714 }
1715 return;
1716 }
1717
1718 reg->type &= ~PTR_MAYBE_NULL;
1719}
1720
1721static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
1722 struct btf_field_graph_root *ds_head)
1723{
1724 __mark_reg_known_zero(&regs[regno]);
1725 regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
1726 regs[regno].btf = ds_head->btf;
1727 regs[regno].btf_id = ds_head->value_btf_id;
1728 regs[regno].off = ds_head->node_offset;
1729}
1730
1731static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1732{
1733 return type_is_pkt_pointer(reg->type);
1734}
1735
1736static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1737{
1738 return reg_is_pkt_pointer(reg) ||
1739 reg->type == PTR_TO_PACKET_END;
1740}
1741
1742static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
1743{
1744 return base_type(reg->type) == PTR_TO_MEM &&
1745 (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP);
1746}
1747
1748/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1749static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1750 enum bpf_reg_type which)
1751{
1752 /* The register can already have a range from prior markings.
1753 * This is fine as long as it hasn't been advanced from its
1754 * origin.
1755 */
1756 return reg->type == which &&
1757 reg->id == 0 &&
1758 reg->off == 0 &&
1759 tnum_equals_const(reg->var_off, 0);
1760}
1761
1762/* Reset the min/max bounds of a register */
1763static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1764{
1765 reg->smin_value = S64_MIN;
1766 reg->smax_value = S64_MAX;
1767 reg->umin_value = 0;
1768 reg->umax_value = U64_MAX;
1769
1770 reg->s32_min_value = S32_MIN;
1771 reg->s32_max_value = S32_MAX;
1772 reg->u32_min_value = 0;
1773 reg->u32_max_value = U32_MAX;
1774}
1775
1776static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1777{
1778 reg->smin_value = S64_MIN;
1779 reg->smax_value = S64_MAX;
1780 reg->umin_value = 0;
1781 reg->umax_value = U64_MAX;
1782}
1783
1784static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1785{
1786 reg->s32_min_value = S32_MIN;
1787 reg->s32_max_value = S32_MAX;
1788 reg->u32_min_value = 0;
1789 reg->u32_max_value = U32_MAX;
1790}
1791
1792static void __update_reg32_bounds(struct bpf_reg_state *reg)
1793{
1794 struct tnum var32_off = tnum_subreg(reg->var_off);
1795
1796 /* min signed is max(sign bit) | min(other bits) */
1797 reg->s32_min_value = max_t(s32, reg->s32_min_value,
1798 var32_off.value | (var32_off.mask & S32_MIN));
1799 /* max signed is min(sign bit) | max(other bits) */
1800 reg->s32_max_value = min_t(s32, reg->s32_max_value,
1801 var32_off.value | (var32_off.mask & S32_MAX));
1802 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1803 reg->u32_max_value = min(reg->u32_max_value,
1804 (u32)(var32_off.value | var32_off.mask));
1805}
1806
1807static void __update_reg64_bounds(struct bpf_reg_state *reg)
1808{
1809 /* min signed is max(sign bit) | min(other bits) */
1810 reg->smin_value = max_t(s64, reg->smin_value,
1811 reg->var_off.value | (reg->var_off.mask & S64_MIN));
1812 /* max signed is min(sign bit) | max(other bits) */
1813 reg->smax_value = min_t(s64, reg->smax_value,
1814 reg->var_off.value | (reg->var_off.mask & S64_MAX));
1815 reg->umin_value = max(reg->umin_value, reg->var_off.value);
1816 reg->umax_value = min(reg->umax_value,
1817 reg->var_off.value | reg->var_off.mask);
1818}
1819
1820static void __update_reg_bounds(struct bpf_reg_state *reg)
1821{
1822 __update_reg32_bounds(reg);
1823 __update_reg64_bounds(reg);
1824}
1825
1826/* Uses signed min/max values to inform unsigned, and vice-versa */
1827static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1828{
1829 /* Learn sign from signed bounds.
1830 * If we cannot cross the sign boundary, then signed and unsigned bounds
1831 * are the same, so combine. This works even in the negative case, e.g.
1832 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1833 */
1834 if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1835 reg->s32_min_value = reg->u32_min_value =
1836 max_t(u32, reg->s32_min_value, reg->u32_min_value);
1837 reg->s32_max_value = reg->u32_max_value =
1838 min_t(u32, reg->s32_max_value, reg->u32_max_value);
1839 return;
1840 }
1841 /* Learn sign from unsigned bounds. Signed bounds cross the sign
1842 * boundary, so we must be careful.
1843 */
1844 if ((s32)reg->u32_max_value >= 0) {
1845 /* Positive. We can't learn anything from the smin, but smax
1846 * is positive, hence safe.
1847 */
1848 reg->s32_min_value = reg->u32_min_value;
1849 reg->s32_max_value = reg->u32_max_value =
1850 min_t(u32, reg->s32_max_value, reg->u32_max_value);
1851 } else if ((s32)reg->u32_min_value < 0) {
1852 /* Negative. We can't learn anything from the smax, but smin
1853 * is negative, hence safe.
1854 */
1855 reg->s32_min_value = reg->u32_min_value =
1856 max_t(u32, reg->s32_min_value, reg->u32_min_value);
1857 reg->s32_max_value = reg->u32_max_value;
1858 }
1859}
1860
1861static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
1862{
1863 /* Learn sign from signed bounds.
1864 * If we cannot cross the sign boundary, then signed and unsigned bounds
1865 * are the same, so combine. This works even in the negative case, e.g.
1866 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1867 */
1868 if (reg->smin_value >= 0 || reg->smax_value < 0) {
1869 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1870 reg->umin_value);
1871 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1872 reg->umax_value);
1873 return;
1874 }
1875 /* Learn sign from unsigned bounds. Signed bounds cross the sign
1876 * boundary, so we must be careful.
1877 */
1878 if ((s64)reg->umax_value >= 0) {
1879 /* Positive. We can't learn anything from the smin, but smax
1880 * is positive, hence safe.
1881 */
1882 reg->smin_value = reg->umin_value;
1883 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1884 reg->umax_value);
1885 } else if ((s64)reg->umin_value < 0) {
1886 /* Negative. We can't learn anything from the smax, but smin
1887 * is negative, hence safe.
1888 */
1889 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1890 reg->umin_value);
1891 reg->smax_value = reg->umax_value;
1892 }
1893}
1894
1895static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1896{
1897 __reg32_deduce_bounds(reg);
1898 __reg64_deduce_bounds(reg);
1899}
1900
1901/* Attempts to improve var_off based on unsigned min/max information */
1902static void __reg_bound_offset(struct bpf_reg_state *reg)
1903{
1904 struct tnum var64_off = tnum_intersect(reg->var_off,
1905 tnum_range(reg->umin_value,
1906 reg->umax_value));
1907 struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1908 tnum_range(reg->u32_min_value,
1909 reg->u32_max_value));
1910
1911 reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1912}
1913
1914static void reg_bounds_sync(struct bpf_reg_state *reg)
1915{
1916 /* We might have learned new bounds from the var_off. */
1917 __update_reg_bounds(reg);
1918 /* We might have learned something about the sign bit. */
1919 __reg_deduce_bounds(reg);
1920 /* We might have learned some bits from the bounds. */
1921 __reg_bound_offset(reg);
1922 /* Intersecting with the old var_off might have improved our bounds
1923 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1924 * then new var_off is (0; 0x7f...fc) which improves our umax.
1925 */
1926 __update_reg_bounds(reg);
1927}
1928
1929static bool __reg32_bound_s64(s32 a)
1930{
1931 return a >= 0 && a <= S32_MAX;
1932}
1933
1934static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1935{
1936 reg->umin_value = reg->u32_min_value;
1937 reg->umax_value = reg->u32_max_value;
1938
1939 /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1940 * be positive otherwise set to worse case bounds and refine later
1941 * from tnum.
1942 */
1943 if (__reg32_bound_s64(reg->s32_min_value) &&
1944 __reg32_bound_s64(reg->s32_max_value)) {
1945 reg->smin_value = reg->s32_min_value;
1946 reg->smax_value = reg->s32_max_value;
1947 } else {
1948 reg->smin_value = 0;
1949 reg->smax_value = U32_MAX;
1950 }
1951}
1952
1953static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1954{
1955 /* special case when 64-bit register has upper 32-bit register
1956 * zeroed. Typically happens after zext or <<32, >>32 sequence
1957 * allowing us to use 32-bit bounds directly,
1958 */
1959 if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1960 __reg_assign_32_into_64(reg);
1961 } else {
1962 /* Otherwise the best we can do is push lower 32bit known and
1963 * unknown bits into register (var_off set from jmp logic)
1964 * then learn as much as possible from the 64-bit tnum
1965 * known and unknown bits. The previous smin/smax bounds are
1966 * invalid here because of jmp32 compare so mark them unknown
1967 * so they do not impact tnum bounds calculation.
1968 */
1969 __mark_reg64_unbounded(reg);
1970 }
1971 reg_bounds_sync(reg);
1972}
1973
1974static bool __reg64_bound_s32(s64 a)
1975{
1976 return a >= S32_MIN && a <= S32_MAX;
1977}
1978
1979static bool __reg64_bound_u32(u64 a)
1980{
1981 return a >= U32_MIN && a <= U32_MAX;
1982}
1983
1984static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1985{
1986 __mark_reg32_unbounded(reg);
1987 if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
1988 reg->s32_min_value = (s32)reg->smin_value;
1989 reg->s32_max_value = (s32)reg->smax_value;
1990 }
1991 if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
1992 reg->u32_min_value = (u32)reg->umin_value;
1993 reg->u32_max_value = (u32)reg->umax_value;
1994 }
1995 reg_bounds_sync(reg);
1996}
1997
1998/* Mark a register as having a completely unknown (scalar) value. */
1999static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2000 struct bpf_reg_state *reg)
2001{
2002 /*
2003 * Clear type, off, and union(map_ptr, range) and
2004 * padding between 'type' and union
2005 */
2006 memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
2007 reg->type = SCALAR_VALUE;
2008 reg->id = 0;
2009 reg->ref_obj_id = 0;
2010 reg->var_off = tnum_unknown;
2011 reg->frameno = 0;
2012 reg->precise = !env->bpf_capable;
2013 __mark_reg_unbounded(reg);
2014}
2015
2016static void mark_reg_unknown(struct bpf_verifier_env *env,
2017 struct bpf_reg_state *regs, u32 regno)
2018{
2019 if (WARN_ON(regno >= MAX_BPF_REG)) {
2020 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
2021 /* Something bad happened, let's kill all regs except FP */
2022 for (regno = 0; regno < BPF_REG_FP; regno++)
2023 __mark_reg_not_init(env, regs + regno);
2024 return;
2025 }
2026 __mark_reg_unknown(env, regs + regno);
2027}
2028
2029static void __mark_reg_not_init(const struct bpf_verifier_env *env,
2030 struct bpf_reg_state *reg)
2031{
2032 __mark_reg_unknown(env, reg);
2033 reg->type = NOT_INIT;
2034}
2035
2036static void mark_reg_not_init(struct bpf_verifier_env *env,
2037 struct bpf_reg_state *regs, u32 regno)
2038{
2039 if (WARN_ON(regno >= MAX_BPF_REG)) {
2040 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
2041 /* Something bad happened, let's kill all regs except FP */
2042 for (regno = 0; regno < BPF_REG_FP; regno++)
2043 __mark_reg_not_init(env, regs + regno);
2044 return;
2045 }
2046 __mark_reg_not_init(env, regs + regno);
2047}
2048
2049static void mark_btf_ld_reg(struct bpf_verifier_env *env,
2050 struct bpf_reg_state *regs, u32 regno,
2051 enum bpf_reg_type reg_type,
2052 struct btf *btf, u32 btf_id,
2053 enum bpf_type_flag flag)
2054{
2055 if (reg_type == SCALAR_VALUE) {
2056 mark_reg_unknown(env, regs, regno);
2057 return;
2058 }
2059 mark_reg_known_zero(env, regs, regno);
2060 regs[regno].type = PTR_TO_BTF_ID | flag;
2061 regs[regno].btf = btf;
2062 regs[regno].btf_id = btf_id;
2063}
2064
2065#define DEF_NOT_SUBREG (0)
2066static void init_reg_state(struct bpf_verifier_env *env,
2067 struct bpf_func_state *state)
2068{
2069 struct bpf_reg_state *regs = state->regs;
2070 int i;
2071
2072 for (i = 0; i < MAX_BPF_REG; i++) {
2073 mark_reg_not_init(env, regs, i);
2074 regs[i].live = REG_LIVE_NONE;
2075 regs[i].parent = NULL;
2076 regs[i].subreg_def = DEF_NOT_SUBREG;
2077 }
2078
2079 /* frame pointer */
2080 regs[BPF_REG_FP].type = PTR_TO_STACK;
2081 mark_reg_known_zero(env, regs, BPF_REG_FP);
2082 regs[BPF_REG_FP].frameno = state->frameno;
2083}
2084
2085#define BPF_MAIN_FUNC (-1)
2086static void init_func_state(struct bpf_verifier_env *env,
2087 struct bpf_func_state *state,
2088 int callsite, int frameno, int subprogno)
2089{
2090 state->callsite = callsite;
2091 state->frameno = frameno;
2092 state->subprogno = subprogno;
2093 state->callback_ret_range = tnum_range(0, 0);
2094 init_reg_state(env, state);
2095 mark_verifier_state_scratched(env);
2096}
2097
2098/* Similar to push_stack(), but for async callbacks */
2099static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2100 int insn_idx, int prev_insn_idx,
2101 int subprog)
2102{
2103 struct bpf_verifier_stack_elem *elem;
2104 struct bpf_func_state *frame;
2105
2106 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
2107 if (!elem)
2108 goto err;
2109
2110 elem->insn_idx = insn_idx;
2111 elem->prev_insn_idx = prev_insn_idx;
2112 elem->next = env->head;
2113 elem->log_pos = env->log.len_used;
2114 env->head = elem;
2115 env->stack_size++;
2116 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2117 verbose(env,
2118 "The sequence of %d jumps is too complex for async cb.\n",
2119 env->stack_size);
2120 goto err;
2121 }
2122 /* Unlike push_stack() do not copy_verifier_state().
2123 * The caller state doesn't matter.
2124 * This is async callback. It starts in a fresh stack.
2125 * Initialize it similar to do_check_common().
2126 */
2127 elem->st.branches = 1;
2128 frame = kzalloc(sizeof(*frame), GFP_KERNEL);
2129 if (!frame)
2130 goto err;
2131 init_func_state(env, frame,
2132 BPF_MAIN_FUNC /* callsite */,
2133 0 /* frameno within this callchain */,
2134 subprog /* subprog number within this prog */);
2135 elem->st.frame[0] = frame;
2136 return &elem->st;
2137err:
2138 free_verifier_state(env->cur_state, true);
2139 env->cur_state = NULL;
2140 /* pop all elements and return */
2141 while (!pop_stack(env, NULL, NULL, false));
2142 return NULL;
2143}
2144
2145
2146enum reg_arg_type {
2147 SRC_OP, /* register is used as source operand */
2148 DST_OP, /* register is used as destination operand */
2149 DST_OP_NO_MARK /* same as above, check only, don't mark */
2150};
2151
2152static int cmp_subprogs(const void *a, const void *b)
2153{
2154 return ((struct bpf_subprog_info *)a)->start -
2155 ((struct bpf_subprog_info *)b)->start;
2156}
2157
2158static int find_subprog(struct bpf_verifier_env *env, int off)
2159{
2160 struct bpf_subprog_info *p;
2161
2162 p = bsearch(&off, env->subprog_info, env->subprog_cnt,
2163 sizeof(env->subprog_info[0]), cmp_subprogs);
2164 if (!p)
2165 return -ENOENT;
2166 return p - env->subprog_info;
2167
2168}
2169
2170static int add_subprog(struct bpf_verifier_env *env, int off)
2171{
2172 int insn_cnt = env->prog->len;
2173 int ret;
2174
2175 if (off >= insn_cnt || off < 0) {
2176 verbose(env, "call to invalid destination\n");
2177 return -EINVAL;
2178 }
2179 ret = find_subprog(env, off);
2180 if (ret >= 0)
2181 return ret;
2182 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2183 verbose(env, "too many subprograms\n");
2184 return -E2BIG;
2185 }
2186 /* determine subprog starts. The end is one before the next starts */
2187 env->subprog_info[env->subprog_cnt++].start = off;
2188 sort(env->subprog_info, env->subprog_cnt,
2189 sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
2190 return env->subprog_cnt - 1;
2191}
2192
2193#define MAX_KFUNC_DESCS 256
2194#define MAX_KFUNC_BTFS 256
2195
2196struct bpf_kfunc_desc {
2197 struct btf_func_model func_model;
2198 u32 func_id;
2199 s32 imm;
2200 u16 offset;
2201};
2202
2203struct bpf_kfunc_btf {
2204 struct btf *btf;
2205 struct module *module;
2206 u16 offset;
2207};
2208
2209struct bpf_kfunc_desc_tab {
2210 struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
2211 u32 nr_descs;
2212};
2213
2214struct bpf_kfunc_btf_tab {
2215 struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2216 u32 nr_descs;
2217};
2218
2219static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
2220{
2221 const struct bpf_kfunc_desc *d0 = a;
2222 const struct bpf_kfunc_desc *d1 = b;
2223
2224 /* func_id is not greater than BTF_MAX_TYPE */
2225 return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2226}
2227
2228static int kfunc_btf_cmp_by_off(const void *a, const void *b)
2229{
2230 const struct bpf_kfunc_btf *d0 = a;
2231 const struct bpf_kfunc_btf *d1 = b;
2232
2233 return d0->offset - d1->offset;
2234}
2235
2236static const struct bpf_kfunc_desc *
2237find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2238{
2239 struct bpf_kfunc_desc desc = {
2240 .func_id = func_id,
2241 .offset = offset,
2242 };
2243 struct bpf_kfunc_desc_tab *tab;
2244
2245 tab = prog->aux->kfunc_tab;
2246 return bsearch(&desc, tab->descs, tab->nr_descs,
2247 sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
2248}
2249
2250static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
2251 s16 offset)
2252{
2253 struct bpf_kfunc_btf kf_btf = { .offset = offset };
2254 struct bpf_kfunc_btf_tab *tab;
2255 struct bpf_kfunc_btf *b;
2256 struct module *mod;
2257 struct btf *btf;
2258 int btf_fd;
2259
2260 tab = env->prog->aux->kfunc_btf_tab;
2261 b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2262 sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2263 if (!b) {
2264 if (tab->nr_descs == MAX_KFUNC_BTFS) {
2265 verbose(env, "too many different module BTFs\n");
2266 return ERR_PTR(-E2BIG);
2267 }
2268
2269 if (bpfptr_is_null(env->fd_array)) {
2270 verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2271 return ERR_PTR(-EPROTO);
2272 }
2273
2274 if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2275 offset * sizeof(btf_fd),
2276 sizeof(btf_fd)))
2277 return ERR_PTR(-EFAULT);
2278
2279 btf = btf_get_by_fd(btf_fd);
2280 if (IS_ERR(btf)) {
2281 verbose(env, "invalid module BTF fd specified\n");
2282 return btf;
2283 }
2284
2285 if (!btf_is_module(btf)) {
2286 verbose(env, "BTF fd for kfunc is not a module BTF\n");
2287 btf_put(btf);
2288 return ERR_PTR(-EINVAL);
2289 }
2290
2291 mod = btf_try_get_module(btf);
2292 if (!mod) {
2293 btf_put(btf);
2294 return ERR_PTR(-ENXIO);
2295 }
2296
2297 b = &tab->descs[tab->nr_descs++];
2298 b->btf = btf;
2299 b->module = mod;
2300 b->offset = offset;
2301
2302 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2303 kfunc_btf_cmp_by_off, NULL);
2304 }
2305 return b->btf;
2306}
2307
2308void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2309{
2310 if (!tab)
2311 return;
2312
2313 while (tab->nr_descs--) {
2314 module_put(tab->descs[tab->nr_descs].module);
2315 btf_put(tab->descs[tab->nr_descs].btf);
2316 }
2317 kfree(tab);
2318}
2319
2320static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2321{
2322 if (offset) {
2323 if (offset < 0) {
2324 /* In the future, this can be allowed to increase limit
2325 * of fd index into fd_array, interpreted as u16.
2326 */
2327 verbose(env, "negative offset disallowed for kernel module function call\n");
2328 return ERR_PTR(-EINVAL);
2329 }
2330
2331 return __find_kfunc_desc_btf(env, offset);
2332 }
2333 return btf_vmlinux ?: ERR_PTR(-ENOENT);
2334}
2335
2336static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
2337{
2338 const struct btf_type *func, *func_proto;
2339 struct bpf_kfunc_btf_tab *btf_tab;
2340 struct bpf_kfunc_desc_tab *tab;
2341 struct bpf_prog_aux *prog_aux;
2342 struct bpf_kfunc_desc *desc;
2343 const char *func_name;
2344 struct btf *desc_btf;
2345 unsigned long call_imm;
2346 unsigned long addr;
2347 int err;
2348
2349 prog_aux = env->prog->aux;
2350 tab = prog_aux->kfunc_tab;
2351 btf_tab = prog_aux->kfunc_btf_tab;
2352 if (!tab) {
2353 if (!btf_vmlinux) {
2354 verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2355 return -ENOTSUPP;
2356 }
2357
2358 if (!env->prog->jit_requested) {
2359 verbose(env, "JIT is required for calling kernel function\n");
2360 return -ENOTSUPP;
2361 }
2362
2363 if (!bpf_jit_supports_kfunc_call()) {
2364 verbose(env, "JIT does not support calling kernel function\n");
2365 return -ENOTSUPP;
2366 }
2367
2368 if (!env->prog->gpl_compatible) {
2369 verbose(env, "cannot call kernel function from non-GPL compatible program\n");
2370 return -EINVAL;
2371 }
2372
2373 tab = kzalloc(sizeof(*tab), GFP_KERNEL);
2374 if (!tab)
2375 return -ENOMEM;
2376 prog_aux->kfunc_tab = tab;
2377 }
2378
2379 /* func_id == 0 is always invalid, but instead of returning an error, be
2380 * conservative and wait until the code elimination pass before returning
2381 * error, so that invalid calls that get pruned out can be in BPF programs
2382 * loaded from userspace. It is also required that offset be untouched
2383 * for such calls.
2384 */
2385 if (!func_id && !offset)
2386 return 0;
2387
2388 if (!btf_tab && offset) {
2389 btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
2390 if (!btf_tab)
2391 return -ENOMEM;
2392 prog_aux->kfunc_btf_tab = btf_tab;
2393 }
2394
2395 desc_btf = find_kfunc_desc_btf(env, offset);
2396 if (IS_ERR(desc_btf)) {
2397 verbose(env, "failed to find BTF for kernel function\n");
2398 return PTR_ERR(desc_btf);
2399 }
2400
2401 if (find_kfunc_desc(env->prog, func_id, offset))
2402 return 0;
2403
2404 if (tab->nr_descs == MAX_KFUNC_DESCS) {
2405 verbose(env, "too many different kernel function calls\n");
2406 return -E2BIG;
2407 }
2408
2409 func = btf_type_by_id(desc_btf, func_id);
2410 if (!func || !btf_type_is_func(func)) {
2411 verbose(env, "kernel btf_id %u is not a function\n",
2412 func_id);
2413 return -EINVAL;
2414 }
2415 func_proto = btf_type_by_id(desc_btf, func->type);
2416 if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2417 verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
2418 func_id);
2419 return -EINVAL;
2420 }
2421
2422 func_name = btf_name_by_offset(desc_btf, func->name_off);
2423 addr = kallsyms_lookup_name(func_name);
2424 if (!addr) {
2425 verbose(env, "cannot find address for kernel function %s\n",
2426 func_name);
2427 return -EINVAL;
2428 }
2429
2430 call_imm = BPF_CALL_IMM(addr);
2431 /* Check whether or not the relative offset overflows desc->imm */
2432 if ((unsigned long)(s32)call_imm != call_imm) {
2433 verbose(env, "address of kernel function %s is out of range\n",
2434 func_name);
2435 return -EINVAL;
2436 }
2437
2438 if (bpf_dev_bound_kfunc_id(func_id)) {
2439 err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
2440 if (err)
2441 return err;
2442 }
2443
2444 desc = &tab->descs[tab->nr_descs++];
2445 desc->func_id = func_id;
2446 desc->imm = call_imm;
2447 desc->offset = offset;
2448 err = btf_distill_func_proto(&env->log, desc_btf,
2449 func_proto, func_name,
2450 &desc->func_model);
2451 if (!err)
2452 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2453 kfunc_desc_cmp_by_id_off, NULL);
2454 return err;
2455}
2456
2457static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
2458{
2459 const struct bpf_kfunc_desc *d0 = a;
2460 const struct bpf_kfunc_desc *d1 = b;
2461
2462 if (d0->imm > d1->imm)
2463 return 1;
2464 else if (d0->imm < d1->imm)
2465 return -1;
2466 return 0;
2467}
2468
2469static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
2470{
2471 struct bpf_kfunc_desc_tab *tab;
2472
2473 tab = prog->aux->kfunc_tab;
2474 if (!tab)
2475 return;
2476
2477 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2478 kfunc_desc_cmp_by_imm, NULL);
2479}
2480
2481bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2482{
2483 return !!prog->aux->kfunc_tab;
2484}
2485
2486const struct btf_func_model *
2487bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
2488 const struct bpf_insn *insn)
2489{
2490 const struct bpf_kfunc_desc desc = {
2491 .imm = insn->imm,
2492 };
2493 const struct bpf_kfunc_desc *res;
2494 struct bpf_kfunc_desc_tab *tab;
2495
2496 tab = prog->aux->kfunc_tab;
2497 res = bsearch(&desc, tab->descs, tab->nr_descs,
2498 sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
2499
2500 return res ? &res->func_model : NULL;
2501}
2502
2503static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
2504{
2505 struct bpf_subprog_info *subprog = env->subprog_info;
2506 struct bpf_insn *insn = env->prog->insnsi;
2507 int i, ret, insn_cnt = env->prog->len;
2508
2509 /* Add entry function. */
2510 ret = add_subprog(env, 0);
2511 if (ret)
2512 return ret;
2513
2514 for (i = 0; i < insn_cnt; i++, insn++) {
2515 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2516 !bpf_pseudo_kfunc_call(insn))
2517 continue;
2518
2519 if (!env->bpf_capable) {
2520 verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2521 return -EPERM;
2522 }
2523
2524 if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
2525 ret = add_subprog(env, i + insn->imm + 1);
2526 else
2527 ret = add_kfunc_call(env, insn->imm, insn->off);
2528
2529 if (ret < 0)
2530 return ret;
2531 }
2532
2533 /* Add a fake 'exit' subprog which could simplify subprog iteration
2534 * logic. 'subprog_cnt' should not be increased.
2535 */
2536 subprog[env->subprog_cnt].start = insn_cnt;
2537
2538 if (env->log.level & BPF_LOG_LEVEL2)
2539 for (i = 0; i < env->subprog_cnt; i++)
2540 verbose(env, "func#%d @%d\n", i, subprog[i].start);
2541
2542 return 0;
2543}
2544
2545static int check_subprogs(struct bpf_verifier_env *env)
2546{
2547 int i, subprog_start, subprog_end, off, cur_subprog = 0;
2548 struct bpf_subprog_info *subprog = env->subprog_info;
2549 struct bpf_insn *insn = env->prog->insnsi;
2550 int insn_cnt = env->prog->len;
2551
2552 /* now check that all jumps are within the same subprog */
2553 subprog_start = subprog[cur_subprog].start;
2554 subprog_end = subprog[cur_subprog + 1].start;
2555 for (i = 0; i < insn_cnt; i++) {
2556 u8 code = insn[i].code;
2557
2558 if (code == (BPF_JMP | BPF_CALL) &&
2559 insn[i].src_reg == 0 &&
2560 insn[i].imm == BPF_FUNC_tail_call)
2561 subprog[cur_subprog].has_tail_call = true;
2562 if (BPF_CLASS(code) == BPF_LD &&
2563 (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2564 subprog[cur_subprog].has_ld_abs = true;
2565 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2566 goto next;
2567 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
2568 goto next;
2569 off = i + insn[i].off + 1;
2570 if (off < subprog_start || off >= subprog_end) {
2571 verbose(env, "jump out of range from insn %d to %d\n", i, off);
2572 return -EINVAL;
2573 }
2574next:
2575 if (i == subprog_end - 1) {
2576 /* to avoid fall-through from one subprog into another
2577 * the last insn of the subprog should be either exit
2578 * or unconditional jump back
2579 */
2580 if (code != (BPF_JMP | BPF_EXIT) &&
2581 code != (BPF_JMP | BPF_JA)) {
2582 verbose(env, "last insn is not an exit or jmp\n");
2583 return -EINVAL;
2584 }
2585 subprog_start = subprog_end;
2586 cur_subprog++;
2587 if (cur_subprog < env->subprog_cnt)
2588 subprog_end = subprog[cur_subprog + 1].start;
2589 }
2590 }
2591 return 0;
2592}
2593
2594/* Parentage chain of this register (or stack slot) should take care of all
2595 * issues like callee-saved registers, stack slot allocation time, etc.
2596 */
2597static int mark_reg_read(struct bpf_verifier_env *env,
2598 const struct bpf_reg_state *state,
2599 struct bpf_reg_state *parent, u8 flag)
2600{
2601 bool writes = parent == state->parent; /* Observe write marks */
2602 int cnt = 0;
2603
2604 while (parent) {
2605 /* if read wasn't screened by an earlier write ... */
2606 if (writes && state->live & REG_LIVE_WRITTEN)
2607 break;
2608 if (parent->live & REG_LIVE_DONE) {
2609 verbose(env, "verifier BUG type %s var_off %lld off %d\n",
2610 reg_type_str(env, parent->type),
2611 parent->var_off.value, parent->off);
2612 return -EFAULT;
2613 }
2614 /* The first condition is more likely to be true than the
2615 * second, checked it first.
2616 */
2617 if ((parent->live & REG_LIVE_READ) == flag ||
2618 parent->live & REG_LIVE_READ64)
2619 /* The parentage chain never changes and
2620 * this parent was already marked as LIVE_READ.
2621 * There is no need to keep walking the chain again and
2622 * keep re-marking all parents as LIVE_READ.
2623 * This case happens when the same register is read
2624 * multiple times without writes into it in-between.
2625 * Also, if parent has the stronger REG_LIVE_READ64 set,
2626 * then no need to set the weak REG_LIVE_READ32.
2627 */
2628 break;
2629 /* ... then we depend on parent's value */
2630 parent->live |= flag;
2631 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
2632 if (flag == REG_LIVE_READ64)
2633 parent->live &= ~REG_LIVE_READ32;
2634 state = parent;
2635 parent = state->parent;
2636 writes = true;
2637 cnt++;
2638 }
2639
2640 if (env->longest_mark_read_walk < cnt)
2641 env->longest_mark_read_walk = cnt;
2642 return 0;
2643}
2644
2645static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
2646{
2647 struct bpf_func_state *state = func(env, reg);
2648 int spi, ret;
2649
2650 /* For CONST_PTR_TO_DYNPTR, it must have already been done by
2651 * check_reg_arg in check_helper_call and mark_btf_func_reg_size in
2652 * check_kfunc_call.
2653 */
2654 if (reg->type == CONST_PTR_TO_DYNPTR)
2655 return 0;
2656 spi = dynptr_get_spi(env, reg);
2657 if (spi < 0)
2658 return spi;
2659 /* Caller ensures dynptr is valid and initialized, which means spi is in
2660 * bounds and spi is the first dynptr slot. Simply mark stack slot as
2661 * read.
2662 */
2663 ret = mark_reg_read(env, &state->stack[spi].spilled_ptr,
2664 state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64);
2665 if (ret)
2666 return ret;
2667 return mark_reg_read(env, &state->stack[spi - 1].spilled_ptr,
2668 state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64);
2669}
2670
2671/* This function is supposed to be used by the following 32-bit optimization
2672 * code only. It returns TRUE if the source or destination register operates
2673 * on 64-bit, otherwise return FALSE.
2674 */
2675static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
2676 u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
2677{
2678 u8 code, class, op;
2679
2680 code = insn->code;
2681 class = BPF_CLASS(code);
2682 op = BPF_OP(code);
2683 if (class == BPF_JMP) {
2684 /* BPF_EXIT for "main" will reach here. Return TRUE
2685 * conservatively.
2686 */
2687 if (op == BPF_EXIT)
2688 return true;
2689 if (op == BPF_CALL) {
2690 /* BPF to BPF call will reach here because of marking
2691 * caller saved clobber with DST_OP_NO_MARK for which we
2692 * don't care the register def because they are anyway
2693 * marked as NOT_INIT already.
2694 */
2695 if (insn->src_reg == BPF_PSEUDO_CALL)
2696 return false;
2697 /* Helper call will reach here because of arg type
2698 * check, conservatively return TRUE.
2699 */
2700 if (t == SRC_OP)
2701 return true;
2702
2703 return false;
2704 }
2705 }
2706
2707 if (class == BPF_ALU64 || class == BPF_JMP ||
2708 /* BPF_END always use BPF_ALU class. */
2709 (class == BPF_ALU && op == BPF_END && insn->imm == 64))
2710 return true;
2711
2712 if (class == BPF_ALU || class == BPF_JMP32)
2713 return false;
2714
2715 if (class == BPF_LDX) {
2716 if (t != SRC_OP)
2717 return BPF_SIZE(code) == BPF_DW;
2718 /* LDX source must be ptr. */
2719 return true;
2720 }
2721
2722 if (class == BPF_STX) {
2723 /* BPF_STX (including atomic variants) has multiple source
2724 * operands, one of which is a ptr. Check whether the caller is
2725 * asking about it.
2726 */
2727 if (t == SRC_OP && reg->type != SCALAR_VALUE)
2728 return true;
2729 return BPF_SIZE(code) == BPF_DW;
2730 }
2731
2732 if (class == BPF_LD) {
2733 u8 mode = BPF_MODE(code);
2734
2735 /* LD_IMM64 */
2736 if (mode == BPF_IMM)
2737 return true;
2738
2739 /* Both LD_IND and LD_ABS return 32-bit data. */
2740 if (t != SRC_OP)
2741 return false;
2742
2743 /* Implicit ctx ptr. */
2744 if (regno == BPF_REG_6)
2745 return true;
2746
2747 /* Explicit source could be any width. */
2748 return true;
2749 }
2750
2751 if (class == BPF_ST)
2752 /* The only source register for BPF_ST is a ptr. */
2753 return true;
2754
2755 /* Conservatively return true at default. */
2756 return true;
2757}
2758
2759/* Return the regno defined by the insn, or -1. */
2760static int insn_def_regno(const struct bpf_insn *insn)
2761{
2762 switch (BPF_CLASS(insn->code)) {
2763 case BPF_JMP:
2764 case BPF_JMP32:
2765 case BPF_ST:
2766 return -1;
2767 case BPF_STX:
2768 if (BPF_MODE(insn->code) == BPF_ATOMIC &&
2769 (insn->imm & BPF_FETCH)) {
2770 if (insn->imm == BPF_CMPXCHG)
2771 return BPF_REG_0;
2772 else
2773 return insn->src_reg;
2774 } else {
2775 return -1;
2776 }
2777 default:
2778 return insn->dst_reg;
2779 }
2780}
2781
2782/* Return TRUE if INSN has defined any 32-bit value explicitly. */
2783static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
2784{
2785 int dst_reg = insn_def_regno(insn);
2786
2787 if (dst_reg == -1)
2788 return false;
2789
2790 return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
2791}
2792
2793static void mark_insn_zext(struct bpf_verifier_env *env,
2794 struct bpf_reg_state *reg)
2795{
2796 s32 def_idx = reg->subreg_def;
2797
2798 if (def_idx == DEF_NOT_SUBREG)
2799 return;
2800
2801 env->insn_aux_data[def_idx - 1].zext_dst = true;
2802 /* The dst will be zero extended, so won't be sub-register anymore. */
2803 reg->subreg_def = DEF_NOT_SUBREG;
2804}
2805
2806static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
2807 enum reg_arg_type t)
2808{
2809 struct bpf_verifier_state *vstate = env->cur_state;
2810 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2811 struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
2812 struct bpf_reg_state *reg, *regs = state->regs;
2813 bool rw64;
2814
2815 if (regno >= MAX_BPF_REG) {
2816 verbose(env, "R%d is invalid\n", regno);
2817 return -EINVAL;
2818 }
2819
2820 mark_reg_scratched(env, regno);
2821
2822 reg = &regs[regno];
2823 rw64 = is_reg64(env, insn, regno, reg, t);
2824 if (t == SRC_OP) {
2825 /* check whether register used as source operand can be read */
2826 if (reg->type == NOT_INIT) {
2827 verbose(env, "R%d !read_ok\n", regno);
2828 return -EACCES;
2829 }
2830 /* We don't need to worry about FP liveness because it's read-only */
2831 if (regno == BPF_REG_FP)
2832 return 0;
2833
2834 if (rw64)
2835 mark_insn_zext(env, reg);
2836
2837 return mark_reg_read(env, reg, reg->parent,
2838 rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
2839 } else {
2840 /* check whether register used as dest operand can be written to */
2841 if (regno == BPF_REG_FP) {
2842 verbose(env, "frame pointer is read only\n");
2843 return -EACCES;
2844 }
2845 reg->live |= REG_LIVE_WRITTEN;
2846 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
2847 if (t == DST_OP)
2848 mark_reg_unknown(env, regs, regno);
2849 }
2850 return 0;
2851}
2852
2853static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
2854{
2855 env->insn_aux_data[idx].jmp_point = true;
2856}
2857
2858static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
2859{
2860 return env->insn_aux_data[insn_idx].jmp_point;
2861}
2862
2863/* for any branch, call, exit record the history of jmps in the given state */
2864static int push_jmp_history(struct bpf_verifier_env *env,
2865 struct bpf_verifier_state *cur)
2866{
2867 u32 cnt = cur->jmp_history_cnt;
2868 struct bpf_idx_pair *p;
2869 size_t alloc_size;
2870
2871 if (!is_jmp_point(env, env->insn_idx))
2872 return 0;
2873
2874 cnt++;
2875 alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
2876 p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
2877 if (!p)
2878 return -ENOMEM;
2879 p[cnt - 1].idx = env->insn_idx;
2880 p[cnt - 1].prev_idx = env->prev_insn_idx;
2881 cur->jmp_history = p;
2882 cur->jmp_history_cnt = cnt;
2883 return 0;
2884}
2885
2886/* Backtrack one insn at a time. If idx is not at the top of recorded
2887 * history then previous instruction came from straight line execution.
2888 */
2889static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
2890 u32 *history)
2891{
2892 u32 cnt = *history;
2893
2894 if (cnt && st->jmp_history[cnt - 1].idx == i) {
2895 i = st->jmp_history[cnt - 1].prev_idx;
2896 (*history)--;
2897 } else {
2898 i--;
2899 }
2900 return i;
2901}
2902
2903static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
2904{
2905 const struct btf_type *func;
2906 struct btf *desc_btf;
2907
2908 if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
2909 return NULL;
2910
2911 desc_btf = find_kfunc_desc_btf(data, insn->off);
2912 if (IS_ERR(desc_btf))
2913 return "<error>";
2914
2915 func = btf_type_by_id(desc_btf, insn->imm);
2916 return btf_name_by_offset(desc_btf, func->name_off);
2917}
2918
2919/* For given verifier state backtrack_insn() is called from the last insn to
2920 * the first insn. Its purpose is to compute a bitmask of registers and
2921 * stack slots that needs precision in the parent verifier state.
2922 */
2923static int backtrack_insn(struct bpf_verifier_env *env, int idx,
2924 u32 *reg_mask, u64 *stack_mask)
2925{
2926 const struct bpf_insn_cbs cbs = {
2927 .cb_call = disasm_kfunc_name,
2928 .cb_print = verbose,
2929 .private_data = env,
2930 };
2931 struct bpf_insn *insn = env->prog->insnsi + idx;
2932 u8 class = BPF_CLASS(insn->code);
2933 u8 opcode = BPF_OP(insn->code);
2934 u8 mode = BPF_MODE(insn->code);
2935 u32 dreg = 1u << insn->dst_reg;
2936 u32 sreg = 1u << insn->src_reg;
2937 u32 spi;
2938
2939 if (insn->code == 0)
2940 return 0;
2941 if (env->log.level & BPF_LOG_LEVEL2) {
2942 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
2943 verbose(env, "%d: ", idx);
2944 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
2945 }
2946
2947 if (class == BPF_ALU || class == BPF_ALU64) {
2948 if (!(*reg_mask & dreg))
2949 return 0;
2950 if (opcode == BPF_MOV) {
2951 if (BPF_SRC(insn->code) == BPF_X) {
2952 /* dreg = sreg
2953 * dreg needs precision after this insn
2954 * sreg needs precision before this insn
2955 */
2956 *reg_mask &= ~dreg;
2957 *reg_mask |= sreg;
2958 } else {
2959 /* dreg = K
2960 * dreg needs precision after this insn.
2961 * Corresponding register is already marked
2962 * as precise=true in this verifier state.
2963 * No further markings in parent are necessary
2964 */
2965 *reg_mask &= ~dreg;
2966 }
2967 } else {
2968 if (BPF_SRC(insn->code) == BPF_X) {
2969 /* dreg += sreg
2970 * both dreg and sreg need precision
2971 * before this insn
2972 */
2973 *reg_mask |= sreg;
2974 } /* else dreg += K
2975 * dreg still needs precision before this insn
2976 */
2977 }
2978 } else if (class == BPF_LDX) {
2979 if (!(*reg_mask & dreg))
2980 return 0;
2981 *reg_mask &= ~dreg;
2982
2983 /* scalars can only be spilled into stack w/o losing precision.
2984 * Load from any other memory can be zero extended.
2985 * The desire to keep that precision is already indicated
2986 * by 'precise' mark in corresponding register of this state.
2987 * No further tracking necessary.
2988 */
2989 if (insn->src_reg != BPF_REG_FP)
2990 return 0;
2991
2992 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
2993 * that [fp - off] slot contains scalar that needs to be
2994 * tracked with precision
2995 */
2996 spi = (-insn->off - 1) / BPF_REG_SIZE;
2997 if (spi >= 64) {
2998 verbose(env, "BUG spi %d\n", spi);
2999 WARN_ONCE(1, "verifier backtracking bug");
3000 return -EFAULT;
3001 }
3002 *stack_mask |= 1ull << spi;
3003 } else if (class == BPF_STX || class == BPF_ST) {
3004 if (*reg_mask & dreg)
3005 /* stx & st shouldn't be using _scalar_ dst_reg
3006 * to access memory. It means backtracking
3007 * encountered a case of pointer subtraction.
3008 */
3009 return -ENOTSUPP;
3010 /* scalars can only be spilled into stack */
3011 if (insn->dst_reg != BPF_REG_FP)
3012 return 0;
3013 spi = (-insn->off - 1) / BPF_REG_SIZE;
3014 if (spi >= 64) {
3015 verbose(env, "BUG spi %d\n", spi);
3016 WARN_ONCE(1, "verifier backtracking bug");
3017 return -EFAULT;
3018 }
3019 if (!(*stack_mask & (1ull << spi)))
3020 return 0;
3021 *stack_mask &= ~(1ull << spi);
3022 if (class == BPF_STX)
3023 *reg_mask |= sreg;
3024 } else if (class == BPF_JMP || class == BPF_JMP32) {
3025 if (opcode == BPF_CALL) {
3026 if (insn->src_reg == BPF_PSEUDO_CALL)
3027 return -ENOTSUPP;
3028 /* BPF helpers that invoke callback subprogs are
3029 * equivalent to BPF_PSEUDO_CALL above
3030 */
3031 if (insn->src_reg == 0 && is_callback_calling_function(insn->imm))
3032 return -ENOTSUPP;
3033 /* kfunc with imm==0 is invalid and fixup_kfunc_call will
3034 * catch this error later. Make backtracking conservative
3035 * with ENOTSUPP.
3036 */
3037 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
3038 return -ENOTSUPP;
3039 /* regular helper call sets R0 */
3040 *reg_mask &= ~1;
3041 if (*reg_mask & 0x3f) {
3042 /* if backtracing was looking for registers R1-R5
3043 * they should have been found already.
3044 */
3045 verbose(env, "BUG regs %x\n", *reg_mask);
3046 WARN_ONCE(1, "verifier backtracking bug");
3047 return -EFAULT;
3048 }
3049 } else if (opcode == BPF_EXIT) {
3050 return -ENOTSUPP;
3051 }
3052 } else if (class == BPF_LD) {
3053 if (!(*reg_mask & dreg))
3054 return 0;
3055 *reg_mask &= ~dreg;
3056 /* It's ld_imm64 or ld_abs or ld_ind.
3057 * For ld_imm64 no further tracking of precision
3058 * into parent is necessary
3059 */
3060 if (mode == BPF_IND || mode == BPF_ABS)
3061 /* to be analyzed */
3062 return -ENOTSUPP;
3063 }
3064 return 0;
3065}
3066
3067/* the scalar precision tracking algorithm:
3068 * . at the start all registers have precise=false.
3069 * . scalar ranges are tracked as normal through alu and jmp insns.
3070 * . once precise value of the scalar register is used in:
3071 * . ptr + scalar alu
3072 * . if (scalar cond K|scalar)
3073 * . helper_call(.., scalar, ...) where ARG_CONST is expected
3074 * backtrack through the verifier states and mark all registers and
3075 * stack slots with spilled constants that these scalar regisers
3076 * should be precise.
3077 * . during state pruning two registers (or spilled stack slots)
3078 * are equivalent if both are not precise.
3079 *
3080 * Note the verifier cannot simply walk register parentage chain,
3081 * since many different registers and stack slots could have been
3082 * used to compute single precise scalar.
3083 *
3084 * The approach of starting with precise=true for all registers and then
3085 * backtrack to mark a register as not precise when the verifier detects
3086 * that program doesn't care about specific value (e.g., when helper
3087 * takes register as ARG_ANYTHING parameter) is not safe.
3088 *
3089 * It's ok to walk single parentage chain of the verifier states.
3090 * It's possible that this backtracking will go all the way till 1st insn.
3091 * All other branches will be explored for needing precision later.
3092 *
3093 * The backtracking needs to deal with cases like:
3094 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
3095 * r9 -= r8
3096 * r5 = r9
3097 * if r5 > 0x79f goto pc+7
3098 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
3099 * r5 += 1
3100 * ...
3101 * call bpf_perf_event_output#25
3102 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
3103 *
3104 * and this case:
3105 * r6 = 1
3106 * call foo // uses callee's r6 inside to compute r0
3107 * r0 += r6
3108 * if r0 == 0 goto
3109 *
3110 * to track above reg_mask/stack_mask needs to be independent for each frame.
3111 *
3112 * Also if parent's curframe > frame where backtracking started,
3113 * the verifier need to mark registers in both frames, otherwise callees
3114 * may incorrectly prune callers. This is similar to
3115 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
3116 *
3117 * For now backtracking falls back into conservative marking.
3118 */
3119static void mark_all_scalars_precise(struct bpf_verifier_env *env,
3120 struct bpf_verifier_state *st)
3121{
3122 struct bpf_func_state *func;
3123 struct bpf_reg_state *reg;
3124 int i, j;
3125
3126 /* big hammer: mark all scalars precise in this path.
3127 * pop_stack may still get !precise scalars.
3128 * We also skip current state and go straight to first parent state,
3129 * because precision markings in current non-checkpointed state are
3130 * not needed. See why in the comment in __mark_chain_precision below.
3131 */
3132 for (st = st->parent; st; st = st->parent) {
3133 for (i = 0; i <= st->curframe; i++) {
3134 func = st->frame[i];
3135 for (j = 0; j < BPF_REG_FP; j++) {
3136 reg = &func->regs[j];
3137 if (reg->type != SCALAR_VALUE)
3138 continue;
3139 reg->precise = true;
3140 }
3141 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
3142 if (!is_spilled_reg(&func->stack[j]))
3143 continue;
3144 reg = &func->stack[j].spilled_ptr;
3145 if (reg->type != SCALAR_VALUE)
3146 continue;
3147 reg->precise = true;
3148 }
3149 }
3150 }
3151}
3152
3153static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
3154{
3155 struct bpf_func_state *func;
3156 struct bpf_reg_state *reg;
3157 int i, j;
3158
3159 for (i = 0; i <= st->curframe; i++) {
3160 func = st->frame[i];
3161 for (j = 0; j < BPF_REG_FP; j++) {
3162 reg = &func->regs[j];
3163 if (reg->type != SCALAR_VALUE)
3164 continue;
3165 reg->precise = false;
3166 }
3167 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
3168 if (!is_spilled_reg(&func->stack[j]))
3169 continue;
3170 reg = &func->stack[j].spilled_ptr;
3171 if (reg->type != SCALAR_VALUE)
3172 continue;
3173 reg->precise = false;
3174 }
3175 }
3176}
3177
3178/*
3179 * __mark_chain_precision() backtracks BPF program instruction sequence and
3180 * chain of verifier states making sure that register *regno* (if regno >= 0)
3181 * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
3182 * SCALARS, as well as any other registers and slots that contribute to
3183 * a tracked state of given registers/stack slots, depending on specific BPF
3184 * assembly instructions (see backtrack_insns() for exact instruction handling
3185 * logic). This backtracking relies on recorded jmp_history and is able to
3186 * traverse entire chain of parent states. This process ends only when all the
3187 * necessary registers/slots and their transitive dependencies are marked as
3188 * precise.
3189 *
3190 * One important and subtle aspect is that precise marks *do not matter* in
3191 * the currently verified state (current state). It is important to understand
3192 * why this is the case.
3193 *
3194 * First, note that current state is the state that is not yet "checkpointed",
3195 * i.e., it is not yet put into env->explored_states, and it has no children
3196 * states as well. It's ephemeral, and can end up either a) being discarded if
3197 * compatible explored state is found at some point or BPF_EXIT instruction is
3198 * reached or b) checkpointed and put into env->explored_states, branching out
3199 * into one or more children states.
3200 *
3201 * In the former case, precise markings in current state are completely
3202 * ignored by state comparison code (see regsafe() for details). Only
3203 * checkpointed ("old") state precise markings are important, and if old
3204 * state's register/slot is precise, regsafe() assumes current state's
3205 * register/slot as precise and checks value ranges exactly and precisely. If
3206 * states turn out to be compatible, current state's necessary precise
3207 * markings and any required parent states' precise markings are enforced
3208 * after the fact with propagate_precision() logic, after the fact. But it's
3209 * important to realize that in this case, even after marking current state
3210 * registers/slots as precise, we immediately discard current state. So what
3211 * actually matters is any of the precise markings propagated into current
3212 * state's parent states, which are always checkpointed (due to b) case above).
3213 * As such, for scenario a) it doesn't matter if current state has precise
3214 * markings set or not.
3215 *
3216 * Now, for the scenario b), checkpointing and forking into child(ren)
3217 * state(s). Note that before current state gets to checkpointing step, any
3218 * processed instruction always assumes precise SCALAR register/slot
3219 * knowledge: if precise value or range is useful to prune jump branch, BPF
3220 * verifier takes this opportunity enthusiastically. Similarly, when
3221 * register's value is used to calculate offset or memory address, exact
3222 * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
3223 * what we mentioned above about state comparison ignoring precise markings
3224 * during state comparison, BPF verifier ignores and also assumes precise
3225 * markings *at will* during instruction verification process. But as verifier
3226 * assumes precision, it also propagates any precision dependencies across
3227 * parent states, which are not yet finalized, so can be further restricted
3228 * based on new knowledge gained from restrictions enforced by their children
3229 * states. This is so that once those parent states are finalized, i.e., when
3230 * they have no more active children state, state comparison logic in
3231 * is_state_visited() would enforce strict and precise SCALAR ranges, if
3232 * required for correctness.
3233 *
3234 * To build a bit more intuition, note also that once a state is checkpointed,
3235 * the path we took to get to that state is not important. This is crucial
3236 * property for state pruning. When state is checkpointed and finalized at
3237 * some instruction index, it can be correctly and safely used to "short
3238 * circuit" any *compatible* state that reaches exactly the same instruction
3239 * index. I.e., if we jumped to that instruction from a completely different
3240 * code path than original finalized state was derived from, it doesn't
3241 * matter, current state can be discarded because from that instruction
3242 * forward having a compatible state will ensure we will safely reach the
3243 * exit. States describe preconditions for further exploration, but completely
3244 * forget the history of how we got here.
3245 *
3246 * This also means that even if we needed precise SCALAR range to get to
3247 * finalized state, but from that point forward *that same* SCALAR register is
3248 * never used in a precise context (i.e., it's precise value is not needed for
3249 * correctness), it's correct and safe to mark such register as "imprecise"
3250 * (i.e., precise marking set to false). This is what we rely on when we do
3251 * not set precise marking in current state. If no child state requires
3252 * precision for any given SCALAR register, it's safe to dictate that it can
3253 * be imprecise. If any child state does require this register to be precise,
3254 * we'll mark it precise later retroactively during precise markings
3255 * propagation from child state to parent states.
3256 *
3257 * Skipping precise marking setting in current state is a mild version of
3258 * relying on the above observation. But we can utilize this property even
3259 * more aggressively by proactively forgetting any precise marking in the
3260 * current state (which we inherited from the parent state), right before we
3261 * checkpoint it and branch off into new child state. This is done by
3262 * mark_all_scalars_imprecise() to hopefully get more permissive and generic
3263 * finalized states which help in short circuiting more future states.
3264 */
3265static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno,
3266 int spi)
3267{
3268 struct bpf_verifier_state *st = env->cur_state;
3269 int first_idx = st->first_insn_idx;
3270 int last_idx = env->insn_idx;
3271 struct bpf_func_state *func;
3272 struct bpf_reg_state *reg;
3273 u32 reg_mask = regno >= 0 ? 1u << regno : 0;
3274 u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
3275 bool skip_first = true;
3276 bool new_marks = false;
3277 int i, err;
3278
3279 if (!env->bpf_capable)
3280 return 0;
3281
3282 /* Do sanity checks against current state of register and/or stack
3283 * slot, but don't set precise flag in current state, as precision
3284 * tracking in the current state is unnecessary.
3285 */
3286 func = st->frame[frame];
3287 if (regno >= 0) {
3288 reg = &func->regs[regno];
3289 if (reg->type != SCALAR_VALUE) {
3290 WARN_ONCE(1, "backtracing misuse");
3291 return -EFAULT;
3292 }
3293 new_marks = true;
3294 }
3295
3296 while (spi >= 0) {
3297 if (!is_spilled_reg(&func->stack[spi])) {
3298 stack_mask = 0;
3299 break;
3300 }
3301 reg = &func->stack[spi].spilled_ptr;
3302 if (reg->type != SCALAR_VALUE) {
3303 stack_mask = 0;
3304 break;
3305 }
3306 new_marks = true;
3307 break;
3308 }
3309
3310 if (!new_marks)
3311 return 0;
3312 if (!reg_mask && !stack_mask)
3313 return 0;
3314
3315 for (;;) {
3316 DECLARE_BITMAP(mask, 64);
3317 u32 history = st->jmp_history_cnt;
3318
3319 if (env->log.level & BPF_LOG_LEVEL2)
3320 verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
3321
3322 if (last_idx < 0) {
3323 /* we are at the entry into subprog, which
3324 * is expected for global funcs, but only if
3325 * requested precise registers are R1-R5
3326 * (which are global func's input arguments)
3327 */
3328 if (st->curframe == 0 &&
3329 st->frame[0]->subprogno > 0 &&
3330 st->frame[0]->callsite == BPF_MAIN_FUNC &&
3331 stack_mask == 0 && (reg_mask & ~0x3e) == 0) {
3332 bitmap_from_u64(mask, reg_mask);
3333 for_each_set_bit(i, mask, 32) {
3334 reg = &st->frame[0]->regs[i];
3335 if (reg->type != SCALAR_VALUE) {
3336 reg_mask &= ~(1u << i);
3337 continue;
3338 }
3339 reg->precise = true;
3340 }
3341 return 0;
3342 }
3343
3344 verbose(env, "BUG backtracing func entry subprog %d reg_mask %x stack_mask %llx\n",
3345 st->frame[0]->subprogno, reg_mask, stack_mask);
3346 WARN_ONCE(1, "verifier backtracking bug");
3347 return -EFAULT;
3348 }
3349
3350 for (i = last_idx;;) {
3351 if (skip_first) {
3352 err = 0;
3353 skip_first = false;
3354 } else {
3355 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
3356 }
3357 if (err == -ENOTSUPP) {
3358 mark_all_scalars_precise(env, st);
3359 return 0;
3360 } else if (err) {
3361 return err;
3362 }
3363 if (!reg_mask && !stack_mask)
3364 /* Found assignment(s) into tracked register in this state.
3365 * Since this state is already marked, just return.
3366 * Nothing to be tracked further in the parent state.
3367 */
3368 return 0;
3369 if (i == first_idx)
3370 break;
3371 i = get_prev_insn_idx(st, i, &history);
3372 if (i >= env->prog->len) {
3373 /* This can happen if backtracking reached insn 0
3374 * and there are still reg_mask or stack_mask
3375 * to backtrack.
3376 * It means the backtracking missed the spot where
3377 * particular register was initialized with a constant.
3378 */
3379 verbose(env, "BUG backtracking idx %d\n", i);
3380 WARN_ONCE(1, "verifier backtracking bug");
3381 return -EFAULT;
3382 }
3383 }
3384 st = st->parent;
3385 if (!st)
3386 break;
3387
3388 new_marks = false;
3389 func = st->frame[frame];
3390 bitmap_from_u64(mask, reg_mask);
3391 for_each_set_bit(i, mask, 32) {
3392 reg = &func->regs[i];
3393 if (reg->type != SCALAR_VALUE) {
3394 reg_mask &= ~(1u << i);
3395 continue;
3396 }
3397 if (!reg->precise)
3398 new_marks = true;
3399 reg->precise = true;
3400 }
3401
3402 bitmap_from_u64(mask, stack_mask);
3403 for_each_set_bit(i, mask, 64) {
3404 if (i >= func->allocated_stack / BPF_REG_SIZE) {
3405 /* the sequence of instructions:
3406 * 2: (bf) r3 = r10
3407 * 3: (7b) *(u64 *)(r3 -8) = r0
3408 * 4: (79) r4 = *(u64 *)(r10 -8)
3409 * doesn't contain jmps. It's backtracked
3410 * as a single block.
3411 * During backtracking insn 3 is not recognized as
3412 * stack access, so at the end of backtracking
3413 * stack slot fp-8 is still marked in stack_mask.
3414 * However the parent state may not have accessed
3415 * fp-8 and it's "unallocated" stack space.
3416 * In such case fallback to conservative.
3417 */
3418 mark_all_scalars_precise(env, st);
3419 return 0;
3420 }
3421
3422 if (!is_spilled_reg(&func->stack[i])) {
3423 stack_mask &= ~(1ull << i);
3424 continue;
3425 }
3426 reg = &func->stack[i].spilled_ptr;
3427 if (reg->type != SCALAR_VALUE) {
3428 stack_mask &= ~(1ull << i);
3429 continue;
3430 }
3431 if (!reg->precise)
3432 new_marks = true;
3433 reg->precise = true;
3434 }
3435 if (env->log.level & BPF_LOG_LEVEL2) {
3436 verbose(env, "parent %s regs=%x stack=%llx marks:",
3437 new_marks ? "didn't have" : "already had",
3438 reg_mask, stack_mask);
3439 print_verifier_state(env, func, true);
3440 }
3441
3442 if (!reg_mask && !stack_mask)
3443 break;
3444 if (!new_marks)
3445 break;
3446
3447 last_idx = st->last_insn_idx;
3448 first_idx = st->first_insn_idx;
3449 }
3450 return 0;
3451}
3452
3453int mark_chain_precision(struct bpf_verifier_env *env, int regno)
3454{
3455 return __mark_chain_precision(env, env->cur_state->curframe, regno, -1);
3456}
3457
3458static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno)
3459{
3460 return __mark_chain_precision(env, frame, regno, -1);
3461}
3462
3463static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi)
3464{
3465 return __mark_chain_precision(env, frame, -1, spi);
3466}
3467
3468static bool is_spillable_regtype(enum bpf_reg_type type)
3469{
3470 switch (base_type(type)) {
3471 case PTR_TO_MAP_VALUE:
3472 case PTR_TO_STACK:
3473 case PTR_TO_CTX:
3474 case PTR_TO_PACKET:
3475 case PTR_TO_PACKET_META:
3476 case PTR_TO_PACKET_END:
3477 case PTR_TO_FLOW_KEYS:
3478 case CONST_PTR_TO_MAP:
3479 case PTR_TO_SOCKET:
3480 case PTR_TO_SOCK_COMMON:
3481 case PTR_TO_TCP_SOCK:
3482 case PTR_TO_XDP_SOCK:
3483 case PTR_TO_BTF_ID:
3484 case PTR_TO_BUF:
3485 case PTR_TO_MEM:
3486 case PTR_TO_FUNC:
3487 case PTR_TO_MAP_KEY:
3488 return true;
3489 default:
3490 return false;
3491 }
3492}
3493
3494/* Does this register contain a constant zero? */
3495static bool register_is_null(struct bpf_reg_state *reg)
3496{
3497 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
3498}
3499
3500static bool register_is_const(struct bpf_reg_state *reg)
3501{
3502 return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
3503}
3504
3505static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
3506{
3507 return tnum_is_unknown(reg->var_off) &&
3508 reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
3509 reg->umin_value == 0 && reg->umax_value == U64_MAX &&
3510 reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
3511 reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
3512}
3513
3514static bool register_is_bounded(struct bpf_reg_state *reg)
3515{
3516 return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
3517}
3518
3519static bool __is_pointer_value(bool allow_ptr_leaks,
3520 const struct bpf_reg_state *reg)
3521{
3522 if (allow_ptr_leaks)
3523 return false;
3524
3525 return reg->type != SCALAR_VALUE;
3526}
3527
3528/* Copy src state preserving dst->parent and dst->live fields */
3529static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
3530{
3531 struct bpf_reg_state *parent = dst->parent;
3532 enum bpf_reg_liveness live = dst->live;
3533
3534 *dst = *src;
3535 dst->parent = parent;
3536 dst->live = live;
3537}
3538
3539static void save_register_state(struct bpf_func_state *state,
3540 int spi, struct bpf_reg_state *reg,
3541 int size)
3542{
3543 int i;
3544
3545 copy_register_state(&state->stack[spi].spilled_ptr, reg);
3546 if (size == BPF_REG_SIZE)
3547 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
3548
3549 for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
3550 state->stack[spi].slot_type[i - 1] = STACK_SPILL;
3551
3552 /* size < 8 bytes spill */
3553 for (; i; i--)
3554 scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
3555}
3556
3557static bool is_bpf_st_mem(struct bpf_insn *insn)
3558{
3559 return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
3560}
3561
3562/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
3563 * stack boundary and alignment are checked in check_mem_access()
3564 */
3565static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
3566 /* stack frame we're writing to */
3567 struct bpf_func_state *state,
3568 int off, int size, int value_regno,
3569 int insn_idx)
3570{
3571 struct bpf_func_state *cur; /* state of the current function */
3572 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
3573 struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3574 struct bpf_reg_state *reg = NULL;
3575 u32 dst_reg = insn->dst_reg;
3576
3577 err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
3578 if (err)
3579 return err;
3580 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
3581 * so it's aligned access and [off, off + size) are within stack limits
3582 */
3583 if (!env->allow_ptr_leaks &&
3584 state->stack[spi].slot_type[0] == STACK_SPILL &&
3585 size != BPF_REG_SIZE) {
3586 verbose(env, "attempt to corrupt spilled pointer on stack\n");
3587 return -EACCES;
3588 }
3589
3590 cur = env->cur_state->frame[env->cur_state->curframe];
3591 if (value_regno >= 0)
3592 reg = &cur->regs[value_regno];
3593 if (!env->bypass_spec_v4) {
3594 bool sanitize = reg && is_spillable_regtype(reg->type);
3595
3596 for (i = 0; i < size; i++) {
3597 u8 type = state->stack[spi].slot_type[i];
3598
3599 if (type != STACK_MISC && type != STACK_ZERO) {
3600 sanitize = true;
3601 break;
3602 }
3603 }
3604
3605 if (sanitize)
3606 env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
3607 }
3608
3609 err = destroy_if_dynptr_stack_slot(env, state, spi);
3610 if (err)
3611 return err;
3612
3613 mark_stack_slot_scratched(env, spi);
3614 if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
3615 !register_is_null(reg) && env->bpf_capable) {
3616 if (dst_reg != BPF_REG_FP) {
3617 /* The backtracking logic can only recognize explicit
3618 * stack slot address like [fp - 8]. Other spill of
3619 * scalar via different register has to be conservative.
3620 * Backtrack from here and mark all registers as precise
3621 * that contributed into 'reg' being a constant.
3622 */
3623 err = mark_chain_precision(env, value_regno);
3624 if (err)
3625 return err;
3626 }
3627 save_register_state(state, spi, reg, size);
3628 } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
3629 insn->imm != 0 && env->bpf_capable) {
3630 struct bpf_reg_state fake_reg = {};
3631
3632 __mark_reg_known(&fake_reg, (u32)insn->imm);
3633 fake_reg.type = SCALAR_VALUE;
3634 save_register_state(state, spi, &fake_reg, size);
3635 } else if (reg && is_spillable_regtype(reg->type)) {
3636 /* register containing pointer is being spilled into stack */
3637 if (size != BPF_REG_SIZE) {
3638 verbose_linfo(env, insn_idx, "; ");
3639 verbose(env, "invalid size of register spill\n");
3640 return -EACCES;
3641 }
3642 if (state != cur && reg->type == PTR_TO_STACK) {
3643 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
3644 return -EINVAL;
3645 }
3646 save_register_state(state, spi, reg, size);
3647 } else {
3648 u8 type = STACK_MISC;
3649
3650 /* regular write of data into stack destroys any spilled ptr */
3651 state->stack[spi].spilled_ptr.type = NOT_INIT;
3652 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
3653 if (is_spilled_reg(&state->stack[spi]))
3654 for (i = 0; i < BPF_REG_SIZE; i++)
3655 scrub_spilled_slot(&state->stack[spi].slot_type[i]);
3656
3657 /* only mark the slot as written if all 8 bytes were written
3658 * otherwise read propagation may incorrectly stop too soon
3659 * when stack slots are partially written.
3660 * This heuristic means that read propagation will be
3661 * conservative, since it will add reg_live_read marks
3662 * to stack slots all the way to first state when programs
3663 * writes+reads less than 8 bytes
3664 */
3665 if (size == BPF_REG_SIZE)
3666 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
3667
3668 /* when we zero initialize stack slots mark them as such */
3669 if ((reg && register_is_null(reg)) ||
3670 (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
3671 /* backtracking doesn't work for STACK_ZERO yet. */
3672 err = mark_chain_precision(env, value_regno);
3673 if (err)
3674 return err;
3675 type = STACK_ZERO;
3676 }
3677
3678 /* Mark slots affected by this stack write. */
3679 for (i = 0; i < size; i++)
3680 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
3681 type;
3682 }
3683 return 0;
3684}
3685
3686/* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
3687 * known to contain a variable offset.
3688 * This function checks whether the write is permitted and conservatively
3689 * tracks the effects of the write, considering that each stack slot in the
3690 * dynamic range is potentially written to.
3691 *
3692 * 'off' includes 'regno->off'.
3693 * 'value_regno' can be -1, meaning that an unknown value is being written to
3694 * the stack.
3695 *
3696 * Spilled pointers in range are not marked as written because we don't know
3697 * what's going to be actually written. This means that read propagation for
3698 * future reads cannot be terminated by this write.
3699 *
3700 * For privileged programs, uninitialized stack slots are considered
3701 * initialized by this write (even though we don't know exactly what offsets
3702 * are going to be written to). The idea is that we don't want the verifier to
3703 * reject future reads that access slots written to through variable offsets.
3704 */
3705static int check_stack_write_var_off(struct bpf_verifier_env *env,
3706 /* func where register points to */
3707 struct bpf_func_state *state,
3708 int ptr_regno, int off, int size,
3709 int value_regno, int insn_idx)
3710{
3711 struct bpf_func_state *cur; /* state of the current function */
3712 int min_off, max_off;
3713 int i, err;
3714 struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
3715 struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3716 bool writing_zero = false;
3717 /* set if the fact that we're writing a zero is used to let any
3718 * stack slots remain STACK_ZERO
3719 */
3720 bool zero_used = false;
3721
3722 cur = env->cur_state->frame[env->cur_state->curframe];
3723 ptr_reg = &cur->regs[ptr_regno];
3724 min_off = ptr_reg->smin_value + off;
3725 max_off = ptr_reg->smax_value + off + size;
3726 if (value_regno >= 0)
3727 value_reg = &cur->regs[value_regno];
3728 if ((value_reg && register_is_null(value_reg)) ||
3729 (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
3730 writing_zero = true;
3731
3732 err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
3733 if (err)
3734 return err;
3735
3736 for (i = min_off; i < max_off; i++) {
3737 int spi;
3738
3739 spi = __get_spi(i);
3740 err = destroy_if_dynptr_stack_slot(env, state, spi);
3741 if (err)
3742 return err;
3743 }
3744
3745 /* Variable offset writes destroy any spilled pointers in range. */
3746 for (i = min_off; i < max_off; i++) {
3747 u8 new_type, *stype;
3748 int slot, spi;
3749
3750 slot = -i - 1;
3751 spi = slot / BPF_REG_SIZE;
3752 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3753 mark_stack_slot_scratched(env, spi);
3754
3755 if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
3756 /* Reject the write if range we may write to has not
3757 * been initialized beforehand. If we didn't reject
3758 * here, the ptr status would be erased below (even
3759 * though not all slots are actually overwritten),
3760 * possibly opening the door to leaks.
3761 *
3762 * We do however catch STACK_INVALID case below, and
3763 * only allow reading possibly uninitialized memory
3764 * later for CAP_PERFMON, as the write may not happen to
3765 * that slot.
3766 */
3767 verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3768 insn_idx, i);
3769 return -EINVAL;
3770 }
3771
3772 /* Erase all spilled pointers. */
3773 state->stack[spi].spilled_ptr.type = NOT_INIT;
3774
3775 /* Update the slot type. */
3776 new_type = STACK_MISC;
3777 if (writing_zero && *stype == STACK_ZERO) {
3778 new_type = STACK_ZERO;
3779 zero_used = true;
3780 }
3781 /* If the slot is STACK_INVALID, we check whether it's OK to
3782 * pretend that it will be initialized by this write. The slot
3783 * might not actually be written to, and so if we mark it as
3784 * initialized future reads might leak uninitialized memory.
3785 * For privileged programs, we will accept such reads to slots
3786 * that may or may not be written because, if we're reject
3787 * them, the error would be too confusing.
3788 */
3789 if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
3790 verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
3791 insn_idx, i);
3792 return -EINVAL;
3793 }
3794 *stype = new_type;
3795 }
3796 if (zero_used) {
3797 /* backtracking doesn't work for STACK_ZERO yet. */
3798 err = mark_chain_precision(env, value_regno);
3799 if (err)
3800 return err;
3801 }
3802 return 0;
3803}
3804
3805/* When register 'dst_regno' is assigned some values from stack[min_off,
3806 * max_off), we set the register's type according to the types of the
3807 * respective stack slots. If all the stack values are known to be zeros, then
3808 * so is the destination reg. Otherwise, the register is considered to be
3809 * SCALAR. This function does not deal with register filling; the caller must
3810 * ensure that all spilled registers in the stack range have been marked as
3811 * read.
3812 */
3813static void mark_reg_stack_read(struct bpf_verifier_env *env,
3814 /* func where src register points to */
3815 struct bpf_func_state *ptr_state,
3816 int min_off, int max_off, int dst_regno)
3817{
3818 struct bpf_verifier_state *vstate = env->cur_state;
3819 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3820 int i, slot, spi;
3821 u8 *stype;
3822 int zeros = 0;
3823
3824 for (i = min_off; i < max_off; i++) {
3825 slot = -i - 1;
3826 spi = slot / BPF_REG_SIZE;
3827 stype = ptr_state->stack[spi].slot_type;
3828 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3829 break;
3830 zeros++;
3831 }
3832 if (zeros == max_off - min_off) {
3833 /* any access_size read into register is zero extended,
3834 * so the whole register == const_zero
3835 */
3836 __mark_reg_const_zero(&state->regs[dst_regno]);
3837 /* backtracking doesn't support STACK_ZERO yet,
3838 * so mark it precise here, so that later
3839 * backtracking can stop here.
3840 * Backtracking may not need this if this register
3841 * doesn't participate in pointer adjustment.
3842 * Forward propagation of precise flag is not
3843 * necessary either. This mark is only to stop
3844 * backtracking. Any register that contributed
3845 * to const 0 was marked precise before spill.
3846 */
3847 state->regs[dst_regno].precise = true;
3848 } else {
3849 /* have read misc data from the stack */
3850 mark_reg_unknown(env, state->regs, dst_regno);
3851 }
3852 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3853}
3854
3855/* Read the stack at 'off' and put the results into the register indicated by
3856 * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3857 * spilled reg.
3858 *
3859 * 'dst_regno' can be -1, meaning that the read value is not going to a
3860 * register.
3861 *
3862 * The access is assumed to be within the current stack bounds.
3863 */
3864static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3865 /* func where src register points to */
3866 struct bpf_func_state *reg_state,
3867 int off, int size, int dst_regno)
3868{
3869 struct bpf_verifier_state *vstate = env->cur_state;
3870 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3871 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
3872 struct bpf_reg_state *reg;
3873 u8 *stype, type;
3874
3875 stype = reg_state->stack[spi].slot_type;
3876 reg = &reg_state->stack[spi].spilled_ptr;
3877
3878 if (is_spilled_reg(&reg_state->stack[spi])) {
3879 u8 spill_size = 1;
3880
3881 for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3882 spill_size++;
3883
3884 if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
3885 if (reg->type != SCALAR_VALUE) {
3886 verbose_linfo(env, env->insn_idx, "; ");
3887 verbose(env, "invalid size of register fill\n");
3888 return -EACCES;
3889 }
3890
3891 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3892 if (dst_regno < 0)
3893 return 0;
3894
3895 if (!(off % BPF_REG_SIZE) && size == spill_size) {
3896 /* The earlier check_reg_arg() has decided the
3897 * subreg_def for this insn. Save it first.
3898 */
3899 s32 subreg_def = state->regs[dst_regno].subreg_def;
3900
3901 copy_register_state(&state->regs[dst_regno], reg);
3902 state->regs[dst_regno].subreg_def = subreg_def;
3903 } else {
3904 for (i = 0; i < size; i++) {
3905 type = stype[(slot - i) % BPF_REG_SIZE];
3906 if (type == STACK_SPILL)
3907 continue;
3908 if (type == STACK_MISC)
3909 continue;
3910 if (type == STACK_INVALID && env->allow_uninit_stack)
3911 continue;
3912 verbose(env, "invalid read from stack off %d+%d size %d\n",
3913 off, i, size);
3914 return -EACCES;
3915 }
3916 mark_reg_unknown(env, state->regs, dst_regno);
3917 }
3918 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3919 return 0;
3920 }
3921
3922 if (dst_regno >= 0) {
3923 /* restore register state from stack */
3924 copy_register_state(&state->regs[dst_regno], reg);
3925 /* mark reg as written since spilled pointer state likely
3926 * has its liveness marks cleared by is_state_visited()
3927 * which resets stack/reg liveness for state transitions
3928 */
3929 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3930 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
3931 /* If dst_regno==-1, the caller is asking us whether
3932 * it is acceptable to use this value as a SCALAR_VALUE
3933 * (e.g. for XADD).
3934 * We must not allow unprivileged callers to do that
3935 * with spilled pointers.
3936 */
3937 verbose(env, "leaking pointer from stack off %d\n",
3938 off);
3939 return -EACCES;
3940 }
3941 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3942 } else {
3943 for (i = 0; i < size; i++) {
3944 type = stype[(slot - i) % BPF_REG_SIZE];
3945 if (type == STACK_MISC)
3946 continue;
3947 if (type == STACK_ZERO)
3948 continue;
3949 if (type == STACK_INVALID && env->allow_uninit_stack)
3950 continue;
3951 verbose(env, "invalid read from stack off %d+%d size %d\n",
3952 off, i, size);
3953 return -EACCES;
3954 }
3955 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3956 if (dst_regno >= 0)
3957 mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
3958 }
3959 return 0;
3960}
3961
3962enum bpf_access_src {
3963 ACCESS_DIRECT = 1, /* the access is performed by an instruction */
3964 ACCESS_HELPER = 2, /* the access is performed by a helper */
3965};
3966
3967static int check_stack_range_initialized(struct bpf_verifier_env *env,
3968 int regno, int off, int access_size,
3969 bool zero_size_allowed,
3970 enum bpf_access_src type,
3971 struct bpf_call_arg_meta *meta);
3972
3973static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3974{
3975 return cur_regs(env) + regno;
3976}
3977
3978/* Read the stack at 'ptr_regno + off' and put the result into the register
3979 * 'dst_regno'.
3980 * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
3981 * but not its variable offset.
3982 * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3983 *
3984 * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3985 * filling registers (i.e. reads of spilled register cannot be detected when
3986 * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3987 * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
3988 * offset; for a fixed offset check_stack_read_fixed_off should be used
3989 * instead.
3990 */
3991static int check_stack_read_var_off(struct bpf_verifier_env *env,
3992 int ptr_regno, int off, int size, int dst_regno)
3993{
3994 /* The state of the source register. */
3995 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3996 struct bpf_func_state *ptr_state = func(env, reg);
3997 int err;
3998 int min_off, max_off;
3999
4000 /* Note that we pass a NULL meta, so raw access will not be permitted.
4001 */
4002 err = check_stack_range_initialized(env, ptr_regno, off, size,
4003 false, ACCESS_DIRECT, NULL);
4004 if (err)
4005 return err;
4006
4007 min_off = reg->smin_value + off;
4008 max_off = reg->smax_value + off;
4009 mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
4010 return 0;
4011}
4012
4013/* check_stack_read dispatches to check_stack_read_fixed_off or
4014 * check_stack_read_var_off.
4015 *
4016 * The caller must ensure that the offset falls within the allocated stack
4017 * bounds.
4018 *
4019 * 'dst_regno' is a register which will receive the value from the stack. It
4020 * can be -1, meaning that the read value is not going to a register.
4021 */
4022static int check_stack_read(struct bpf_verifier_env *env,
4023 int ptr_regno, int off, int size,
4024 int dst_regno)
4025{
4026 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4027 struct bpf_func_state *state = func(env, reg);
4028 int err;
4029 /* Some accesses are only permitted with a static offset. */
4030 bool var_off = !tnum_is_const(reg->var_off);
4031
4032 /* The offset is required to be static when reads don't go to a
4033 * register, in order to not leak pointers (see
4034 * check_stack_read_fixed_off).
4035 */
4036 if (dst_regno < 0 && var_off) {
4037 char tn_buf[48];
4038
4039 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4040 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
4041 tn_buf, off, size);
4042 return -EACCES;
4043 }
4044 /* Variable offset is prohibited for unprivileged mode for simplicity
4045 * since it requires corresponding support in Spectre masking for stack
4046 * ALU. See also retrieve_ptr_limit().
4047 */
4048 if (!env->bypass_spec_v1 && var_off) {
4049 char tn_buf[48];
4050
4051 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4052 verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
4053 ptr_regno, tn_buf);
4054 return -EACCES;
4055 }
4056
4057 if (!var_off) {
4058 off += reg->var_off.value;
4059 err = check_stack_read_fixed_off(env, state, off, size,
4060 dst_regno);
4061 } else {
4062 /* Variable offset stack reads need more conservative handling
4063 * than fixed offset ones. Note that dst_regno >= 0 on this
4064 * branch.
4065 */
4066 err = check_stack_read_var_off(env, ptr_regno, off, size,
4067 dst_regno);
4068 }
4069 return err;
4070}
4071
4072
4073/* check_stack_write dispatches to check_stack_write_fixed_off or
4074 * check_stack_write_var_off.
4075 *
4076 * 'ptr_regno' is the register used as a pointer into the stack.
4077 * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
4078 * 'value_regno' is the register whose value we're writing to the stack. It can
4079 * be -1, meaning that we're not writing from a register.
4080 *
4081 * The caller must ensure that the offset falls within the maximum stack size.
4082 */
4083static int check_stack_write(struct bpf_verifier_env *env,
4084 int ptr_regno, int off, int size,
4085 int value_regno, int insn_idx)
4086{
4087 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4088 struct bpf_func_state *state = func(env, reg);
4089 int err;
4090
4091 if (tnum_is_const(reg->var_off)) {
4092 off += reg->var_off.value;
4093 err = check_stack_write_fixed_off(env, state, off, size,
4094 value_regno, insn_idx);
4095 } else {
4096 /* Variable offset stack reads need more conservative handling
4097 * than fixed offset ones.
4098 */
4099 err = check_stack_write_var_off(env, state,
4100 ptr_regno, off, size,
4101 value_regno, insn_idx);
4102 }
4103 return err;
4104}
4105
4106static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
4107 int off, int size, enum bpf_access_type type)
4108{
4109 struct bpf_reg_state *regs = cur_regs(env);
4110 struct bpf_map *map = regs[regno].map_ptr;
4111 u32 cap = bpf_map_flags_to_cap(map);
4112
4113 if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
4114 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
4115 map->value_size, off, size);
4116 return -EACCES;
4117 }
4118
4119 if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
4120 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
4121 map->value_size, off, size);
4122 return -EACCES;
4123 }
4124
4125 return 0;
4126}
4127
4128/* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
4129static int __check_mem_access(struct bpf_verifier_env *env, int regno,
4130 int off, int size, u32 mem_size,
4131 bool zero_size_allowed)
4132{
4133 bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
4134 struct bpf_reg_state *reg;
4135
4136 if (off >= 0 && size_ok && (u64)off + size <= mem_size)
4137 return 0;
4138
4139 reg = &cur_regs(env)[regno];
4140 switch (reg->type) {
4141 case PTR_TO_MAP_KEY:
4142 verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
4143 mem_size, off, size);
4144 break;
4145 case PTR_TO_MAP_VALUE:
4146 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
4147 mem_size, off, size);
4148 break;
4149 case PTR_TO_PACKET:
4150 case PTR_TO_PACKET_META:
4151 case PTR_TO_PACKET_END:
4152 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
4153 off, size, regno, reg->id, off, mem_size);
4154 break;
4155 case PTR_TO_MEM:
4156 default:
4157 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
4158 mem_size, off, size);
4159 }
4160
4161 return -EACCES;
4162}
4163
4164/* check read/write into a memory region with possible variable offset */
4165static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
4166 int off, int size, u32 mem_size,
4167 bool zero_size_allowed)
4168{
4169 struct bpf_verifier_state *vstate = env->cur_state;
4170 struct bpf_func_state *state = vstate->frame[vstate->curframe];
4171 struct bpf_reg_state *reg = &state->regs[regno];
4172 int err;
4173
4174 /* We may have adjusted the register pointing to memory region, so we
4175 * need to try adding each of min_value and max_value to off
4176 * to make sure our theoretical access will be safe.
4177 *
4178 * The minimum value is only important with signed
4179 * comparisons where we can't assume the floor of a
4180 * value is 0. If we are using signed variables for our
4181 * index'es we need to make sure that whatever we use
4182 * will have a set floor within our range.
4183 */
4184 if (reg->smin_value < 0 &&
4185 (reg->smin_value == S64_MIN ||
4186 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
4187 reg->smin_value + off < 0)) {
4188 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4189 regno);
4190 return -EACCES;
4191 }
4192 err = __check_mem_access(env, regno, reg->smin_value + off, size,
4193 mem_size, zero_size_allowed);
4194 if (err) {
4195 verbose(env, "R%d min value is outside of the allowed memory range\n",
4196 regno);
4197 return err;
4198 }
4199
4200 /* If we haven't set a max value then we need to bail since we can't be
4201 * sure we won't do bad things.
4202 * If reg->umax_value + off could overflow, treat that as unbounded too.
4203 */
4204 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
4205 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
4206 regno);
4207 return -EACCES;
4208 }
4209 err = __check_mem_access(env, regno, reg->umax_value + off, size,
4210 mem_size, zero_size_allowed);
4211 if (err) {
4212 verbose(env, "R%d max value is outside of the allowed memory range\n",
4213 regno);
4214 return err;
4215 }
4216
4217 return 0;
4218}
4219
4220static int __check_ptr_off_reg(struct bpf_verifier_env *env,
4221 const struct bpf_reg_state *reg, int regno,
4222 bool fixed_off_ok)
4223{
4224 /* Access to this pointer-typed register or passing it to a helper
4225 * is only allowed in its original, unmodified form.
4226 */
4227
4228 if (reg->off < 0) {
4229 verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
4230 reg_type_str(env, reg->type), regno, reg->off);
4231 return -EACCES;
4232 }
4233
4234 if (!fixed_off_ok && reg->off) {
4235 verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
4236 reg_type_str(env, reg->type), regno, reg->off);
4237 return -EACCES;
4238 }
4239
4240 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4241 char tn_buf[48];
4242
4243 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4244 verbose(env, "variable %s access var_off=%s disallowed\n",
4245 reg_type_str(env, reg->type), tn_buf);
4246 return -EACCES;
4247 }
4248
4249 return 0;
4250}
4251
4252int check_ptr_off_reg(struct bpf_verifier_env *env,
4253 const struct bpf_reg_state *reg, int regno)
4254{
4255 return __check_ptr_off_reg(env, reg, regno, false);
4256}
4257
4258static int map_kptr_match_type(struct bpf_verifier_env *env,
4259 struct btf_field *kptr_field,
4260 struct bpf_reg_state *reg, u32 regno)
4261{
4262 const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
4263 int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
4264 const char *reg_name = "";
4265
4266 /* Only unreferenced case accepts untrusted pointers */
4267 if (kptr_field->type == BPF_KPTR_UNREF)
4268 perm_flags |= PTR_UNTRUSTED;
4269
4270 if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
4271 goto bad_type;
4272
4273 if (!btf_is_kernel(reg->btf)) {
4274 verbose(env, "R%d must point to kernel BTF\n", regno);
4275 return -EINVAL;
4276 }
4277 /* We need to verify reg->type and reg->btf, before accessing reg->btf */
4278 reg_name = kernel_type_name(reg->btf, reg->btf_id);
4279
4280 /* For ref_ptr case, release function check should ensure we get one
4281 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
4282 * normal store of unreferenced kptr, we must ensure var_off is zero.
4283 * Since ref_ptr cannot be accessed directly by BPF insns, checks for
4284 * reg->off and reg->ref_obj_id are not needed here.
4285 */
4286 if (__check_ptr_off_reg(env, reg, regno, true))
4287 return -EACCES;
4288
4289 /* A full type match is needed, as BTF can be vmlinux or module BTF, and
4290 * we also need to take into account the reg->off.
4291 *
4292 * We want to support cases like:
4293 *
4294 * struct foo {
4295 * struct bar br;
4296 * struct baz bz;
4297 * };
4298 *
4299 * struct foo *v;
4300 * v = func(); // PTR_TO_BTF_ID
4301 * val->foo = v; // reg->off is zero, btf and btf_id match type
4302 * val->bar = &v->br; // reg->off is still zero, but we need to retry with
4303 * // first member type of struct after comparison fails
4304 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
4305 * // to match type
4306 *
4307 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
4308 * is zero. We must also ensure that btf_struct_ids_match does not walk
4309 * the struct to match type against first member of struct, i.e. reject
4310 * second case from above. Hence, when type is BPF_KPTR_REF, we set
4311 * strict mode to true for type match.
4312 */
4313 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
4314 kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4315 kptr_field->type == BPF_KPTR_REF))
4316 goto bad_type;
4317 return 0;
4318bad_type:
4319 verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
4320 reg_type_str(env, reg->type), reg_name);
4321 verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
4322 if (kptr_field->type == BPF_KPTR_UNREF)
4323 verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
4324 targ_name);
4325 else
4326 verbose(env, "\n");
4327 return -EINVAL;
4328}
4329
4330/* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
4331 * can dereference RCU protected pointers and result is PTR_TRUSTED.
4332 */
4333static bool in_rcu_cs(struct bpf_verifier_env *env)
4334{
4335 return env->cur_state->active_rcu_lock || !env->prog->aux->sleepable;
4336}
4337
4338/* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
4339BTF_SET_START(rcu_protected_types)
4340BTF_ID(struct, prog_test_ref_kfunc)
4341BTF_ID(struct, cgroup)
4342BTF_SET_END(rcu_protected_types)
4343
4344static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
4345{
4346 if (!btf_is_kernel(btf))
4347 return false;
4348 return btf_id_set_contains(&rcu_protected_types, btf_id);
4349}
4350
4351static bool rcu_safe_kptr(const struct btf_field *field)
4352{
4353 const struct btf_field_kptr *kptr = &field->kptr;
4354
4355 return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id);
4356}
4357
4358static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
4359 int value_regno, int insn_idx,
4360 struct btf_field *kptr_field)
4361{
4362 struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4363 int class = BPF_CLASS(insn->code);
4364 struct bpf_reg_state *val_reg;
4365
4366 /* Things we already checked for in check_map_access and caller:
4367 * - Reject cases where variable offset may touch kptr
4368 * - size of access (must be BPF_DW)
4369 * - tnum_is_const(reg->var_off)
4370 * - kptr_field->offset == off + reg->var_off.value
4371 */
4372 /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
4373 if (BPF_MODE(insn->code) != BPF_MEM) {
4374 verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
4375 return -EACCES;
4376 }
4377
4378 /* We only allow loading referenced kptr, since it will be marked as
4379 * untrusted, similar to unreferenced kptr.
4380 */
4381 if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) {
4382 verbose(env, "store to referenced kptr disallowed\n");
4383 return -EACCES;
4384 }
4385
4386 if (class == BPF_LDX) {
4387 val_reg = reg_state(env, value_regno);
4388 /* We can simply mark the value_regno receiving the pointer
4389 * value from map as PTR_TO_BTF_ID, with the correct type.
4390 */
4391 mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
4392 kptr_field->kptr.btf_id,
4393 rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ?
4394 PTR_MAYBE_NULL | MEM_RCU :
4395 PTR_MAYBE_NULL | PTR_UNTRUSTED);
4396 /* For mark_ptr_or_null_reg */
4397 val_reg->id = ++env->id_gen;
4398 } else if (class == BPF_STX) {
4399 val_reg = reg_state(env, value_regno);
4400 if (!register_is_null(val_reg) &&
4401 map_kptr_match_type(env, kptr_field, val_reg, value_regno))
4402 return -EACCES;
4403 } else if (class == BPF_ST) {
4404 if (insn->imm) {
4405 verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
4406 kptr_field->offset);
4407 return -EACCES;
4408 }
4409 } else {
4410 verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
4411 return -EACCES;
4412 }
4413 return 0;
4414}
4415
4416/* check read/write into a map element with possible variable offset */
4417static int check_map_access(struct bpf_verifier_env *env, u32 regno,
4418 int off, int size, bool zero_size_allowed,
4419 enum bpf_access_src src)
4420{
4421 struct bpf_verifier_state *vstate = env->cur_state;
4422 struct bpf_func_state *state = vstate->frame[vstate->curframe];
4423 struct bpf_reg_state *reg = &state->regs[regno];
4424 struct bpf_map *map = reg->map_ptr;
4425 struct btf_record *rec;
4426 int err, i;
4427
4428 err = check_mem_region_access(env, regno, off, size, map->value_size,
4429 zero_size_allowed);
4430 if (err)
4431 return err;
4432
4433 if (IS_ERR_OR_NULL(map->record))
4434 return 0;
4435 rec = map->record;
4436 for (i = 0; i < rec->cnt; i++) {
4437 struct btf_field *field = &rec->fields[i];
4438 u32 p = field->offset;
4439
4440 /* If any part of a field can be touched by load/store, reject
4441 * this program. To check that [x1, x2) overlaps with [y1, y2),
4442 * it is sufficient to check x1 < y2 && y1 < x2.
4443 */
4444 if (reg->smin_value + off < p + btf_field_type_size(field->type) &&
4445 p < reg->umax_value + off + size) {
4446 switch (field->type) {
4447 case BPF_KPTR_UNREF:
4448 case BPF_KPTR_REF:
4449 if (src != ACCESS_DIRECT) {
4450 verbose(env, "kptr cannot be accessed indirectly by helper\n");
4451 return -EACCES;
4452 }
4453 if (!tnum_is_const(reg->var_off)) {
4454 verbose(env, "kptr access cannot have variable offset\n");
4455 return -EACCES;
4456 }
4457 if (p != off + reg->var_off.value) {
4458 verbose(env, "kptr access misaligned expected=%u off=%llu\n",
4459 p, off + reg->var_off.value);
4460 return -EACCES;
4461 }
4462 if (size != bpf_size_to_bytes(BPF_DW)) {
4463 verbose(env, "kptr access size must be BPF_DW\n");
4464 return -EACCES;
4465 }
4466 break;
4467 default:
4468 verbose(env, "%s cannot be accessed directly by load/store\n",
4469 btf_field_type_name(field->type));
4470 return -EACCES;
4471 }
4472 }
4473 }
4474 return 0;
4475}
4476
4477#define MAX_PACKET_OFF 0xffff
4478
4479static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
4480 const struct bpf_call_arg_meta *meta,
4481 enum bpf_access_type t)
4482{
4483 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
4484
4485 switch (prog_type) {
4486 /* Program types only with direct read access go here! */
4487 case BPF_PROG_TYPE_LWT_IN:
4488 case BPF_PROG_TYPE_LWT_OUT:
4489 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
4490 case BPF_PROG_TYPE_SK_REUSEPORT:
4491 case BPF_PROG_TYPE_FLOW_DISSECTOR:
4492 case BPF_PROG_TYPE_CGROUP_SKB:
4493 if (t == BPF_WRITE)
4494 return false;
4495 fallthrough;
4496
4497 /* Program types with direct read + write access go here! */
4498 case BPF_PROG_TYPE_SCHED_CLS:
4499 case BPF_PROG_TYPE_SCHED_ACT:
4500 case BPF_PROG_TYPE_XDP:
4501 case BPF_PROG_TYPE_LWT_XMIT:
4502 case BPF_PROG_TYPE_SK_SKB:
4503 case BPF_PROG_TYPE_SK_MSG:
4504 if (meta)
4505 return meta->pkt_access;
4506
4507 env->seen_direct_write = true;
4508 return true;
4509
4510 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4511 if (t == BPF_WRITE)
4512 env->seen_direct_write = true;
4513
4514 return true;
4515
4516 default:
4517 return false;
4518 }
4519}
4520
4521static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
4522 int size, bool zero_size_allowed)
4523{
4524 struct bpf_reg_state *regs = cur_regs(env);
4525 struct bpf_reg_state *reg = &regs[regno];
4526 int err;
4527
4528 /* We may have added a variable offset to the packet pointer; but any
4529 * reg->range we have comes after that. We are only checking the fixed
4530 * offset.
4531 */
4532
4533 /* We don't allow negative numbers, because we aren't tracking enough
4534 * detail to prove they're safe.
4535 */
4536 if (reg->smin_value < 0) {
4537 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4538 regno);
4539 return -EACCES;
4540 }
4541
4542 err = reg->range < 0 ? -EINVAL :
4543 __check_mem_access(env, regno, off, size, reg->range,
4544 zero_size_allowed);
4545 if (err) {
4546 verbose(env, "R%d offset is outside of the packet\n", regno);
4547 return err;
4548 }
4549
4550 /* __check_mem_access has made sure "off + size - 1" is within u16.
4551 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
4552 * otherwise find_good_pkt_pointers would have refused to set range info
4553 * that __check_mem_access would have rejected this pkt access.
4554 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
4555 */
4556 env->prog->aux->max_pkt_offset =
4557 max_t(u32, env->prog->aux->max_pkt_offset,
4558 off + reg->umax_value + size - 1);
4559
4560 return err;
4561}
4562
4563/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
4564static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
4565 enum bpf_access_type t, enum bpf_reg_type *reg_type,
4566 struct btf **btf, u32 *btf_id)
4567{
4568 struct bpf_insn_access_aux info = {
4569 .reg_type = *reg_type,
4570 .log = &env->log,
4571 };
4572
4573 if (env->ops->is_valid_access &&
4574 env->ops->is_valid_access(off, size, t, env->prog, &info)) {
4575 /* A non zero info.ctx_field_size indicates that this field is a
4576 * candidate for later verifier transformation to load the whole
4577 * field and then apply a mask when accessed with a narrower
4578 * access than actual ctx access size. A zero info.ctx_field_size
4579 * will only allow for whole field access and rejects any other
4580 * type of narrower access.
4581 */
4582 *reg_type = info.reg_type;
4583
4584 if (base_type(*reg_type) == PTR_TO_BTF_ID) {
4585 *btf = info.btf;
4586 *btf_id = info.btf_id;
4587 } else {
4588 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
4589 }
4590 /* remember the offset of last byte accessed in ctx */
4591 if (env->prog->aux->max_ctx_offset < off + size)
4592 env->prog->aux->max_ctx_offset = off + size;
4593 return 0;
4594 }
4595
4596 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
4597 return -EACCES;
4598}
4599
4600static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
4601 int size)
4602{
4603 if (size < 0 || off < 0 ||
4604 (u64)off + size > sizeof(struct bpf_flow_keys)) {
4605 verbose(env, "invalid access to flow keys off=%d size=%d\n",
4606 off, size);
4607 return -EACCES;
4608 }
4609 return 0;
4610}
4611
4612static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
4613 u32 regno, int off, int size,
4614 enum bpf_access_type t)
4615{
4616 struct bpf_reg_state *regs = cur_regs(env);
4617 struct bpf_reg_state *reg = &regs[regno];
4618 struct bpf_insn_access_aux info = {};
4619 bool valid;
4620
4621 if (reg->smin_value < 0) {
4622 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4623 regno);
4624 return -EACCES;
4625 }
4626
4627 switch (reg->type) {
4628 case PTR_TO_SOCK_COMMON:
4629 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
4630 break;
4631 case PTR_TO_SOCKET:
4632 valid = bpf_sock_is_valid_access(off, size, t, &info);
4633 break;
4634 case PTR_TO_TCP_SOCK:
4635 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
4636 break;
4637 case PTR_TO_XDP_SOCK:
4638 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
4639 break;
4640 default:
4641 valid = false;
4642 }
4643
4644
4645 if (valid) {
4646 env->insn_aux_data[insn_idx].ctx_field_size =
4647 info.ctx_field_size;
4648 return 0;
4649 }
4650
4651 verbose(env, "R%d invalid %s access off=%d size=%d\n",
4652 regno, reg_type_str(env, reg->type), off, size);
4653
4654 return -EACCES;
4655}
4656
4657static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
4658{
4659 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4660}
4661
4662static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
4663{
4664 const struct bpf_reg_state *reg = reg_state(env, regno);
4665
4666 return reg->type == PTR_TO_CTX;
4667}
4668
4669static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
4670{
4671 const struct bpf_reg_state *reg = reg_state(env, regno);
4672
4673 return type_is_sk_pointer(reg->type);
4674}
4675
4676static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
4677{
4678 const struct bpf_reg_state *reg = reg_state(env, regno);
4679
4680 return type_is_pkt_pointer(reg->type);
4681}
4682
4683static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
4684{
4685 const struct bpf_reg_state *reg = reg_state(env, regno);
4686
4687 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
4688 return reg->type == PTR_TO_FLOW_KEYS;
4689}
4690
4691static bool is_trusted_reg(const struct bpf_reg_state *reg)
4692{
4693 /* A referenced register is always trusted. */
4694 if (reg->ref_obj_id)
4695 return true;
4696
4697 /* If a register is not referenced, it is trusted if it has the
4698 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
4699 * other type modifiers may be safe, but we elect to take an opt-in
4700 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
4701 * not.
4702 *
4703 * Eventually, we should make PTR_TRUSTED the single source of truth
4704 * for whether a register is trusted.
4705 */
4706 return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
4707 !bpf_type_has_unsafe_modifiers(reg->type);
4708}
4709
4710static bool is_rcu_reg(const struct bpf_reg_state *reg)
4711{
4712 return reg->type & MEM_RCU;
4713}
4714
4715static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
4716 const struct bpf_reg_state *reg,
4717 int off, int size, bool strict)
4718{
4719 struct tnum reg_off;
4720 int ip_align;
4721
4722 /* Byte size accesses are always allowed. */
4723 if (!strict || size == 1)
4724 return 0;
4725
4726 /* For platforms that do not have a Kconfig enabling
4727 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
4728 * NET_IP_ALIGN is universally set to '2'. And on platforms
4729 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
4730 * to this code only in strict mode where we want to emulate
4731 * the NET_IP_ALIGN==2 checking. Therefore use an
4732 * unconditional IP align value of '2'.
4733 */
4734 ip_align = 2;
4735
4736 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
4737 if (!tnum_is_aligned(reg_off, size)) {
4738 char tn_buf[48];
4739
4740 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4741 verbose(env,
4742 "misaligned packet access off %d+%s+%d+%d size %d\n",
4743 ip_align, tn_buf, reg->off, off, size);
4744 return -EACCES;
4745 }
4746
4747 return 0;
4748}
4749
4750static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
4751 const struct bpf_reg_state *reg,
4752 const char *pointer_desc,
4753 int off, int size, bool strict)
4754{
4755 struct tnum reg_off;
4756
4757 /* Byte size accesses are always allowed. */
4758 if (!strict || size == 1)
4759 return 0;
4760
4761 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
4762 if (!tnum_is_aligned(reg_off, size)) {
4763 char tn_buf[48];
4764
4765 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4766 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
4767 pointer_desc, tn_buf, reg->off, off, size);
4768 return -EACCES;
4769 }
4770
4771 return 0;
4772}
4773
4774static int check_ptr_alignment(struct bpf_verifier_env *env,
4775 const struct bpf_reg_state *reg, int off,
4776 int size, bool strict_alignment_once)
4777{
4778 bool strict = env->strict_alignment || strict_alignment_once;
4779 const char *pointer_desc = "";
4780
4781 switch (reg->type) {
4782 case PTR_TO_PACKET:
4783 case PTR_TO_PACKET_META:
4784 /* Special case, because of NET_IP_ALIGN. Given metadata sits
4785 * right in front, treat it the very same way.
4786 */
4787 return check_pkt_ptr_alignment(env, reg, off, size, strict);
4788 case PTR_TO_FLOW_KEYS:
4789 pointer_desc = "flow keys ";
4790 break;
4791 case PTR_TO_MAP_KEY:
4792 pointer_desc = "key ";
4793 break;
4794 case PTR_TO_MAP_VALUE:
4795 pointer_desc = "value ";
4796 break;
4797 case PTR_TO_CTX:
4798 pointer_desc = "context ";
4799 break;
4800 case PTR_TO_STACK:
4801 pointer_desc = "stack ";
4802 /* The stack spill tracking logic in check_stack_write_fixed_off()
4803 * and check_stack_read_fixed_off() relies on stack accesses being
4804 * aligned.
4805 */
4806 strict = true;
4807 break;
4808 case PTR_TO_SOCKET:
4809 pointer_desc = "sock ";
4810 break;
4811 case PTR_TO_SOCK_COMMON:
4812 pointer_desc = "sock_common ";
4813 break;
4814 case PTR_TO_TCP_SOCK:
4815 pointer_desc = "tcp_sock ";
4816 break;
4817 case PTR_TO_XDP_SOCK:
4818 pointer_desc = "xdp_sock ";
4819 break;
4820 default:
4821 break;
4822 }
4823 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
4824 strict);
4825}
4826
4827static int update_stack_depth(struct bpf_verifier_env *env,
4828 const struct bpf_func_state *func,
4829 int off)
4830{
4831 u16 stack = env->subprog_info[func->subprogno].stack_depth;
4832
4833 if (stack >= -off)
4834 return 0;
4835
4836 /* update known max for given subprogram */
4837 env->subprog_info[func->subprogno].stack_depth = -off;
4838 return 0;
4839}
4840
4841/* starting from main bpf function walk all instructions of the function
4842 * and recursively walk all callees that given function can call.
4843 * Ignore jump and exit insns.
4844 * Since recursion is prevented by check_cfg() this algorithm
4845 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
4846 */
4847static int check_max_stack_depth(struct bpf_verifier_env *env)
4848{
4849 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
4850 struct bpf_subprog_info *subprog = env->subprog_info;
4851 struct bpf_insn *insn = env->prog->insnsi;
4852 bool tail_call_reachable = false;
4853 int ret_insn[MAX_CALL_FRAMES];
4854 int ret_prog[MAX_CALL_FRAMES];
4855 int j;
4856
4857process_func:
4858 /* protect against potential stack overflow that might happen when
4859 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
4860 * depth for such case down to 256 so that the worst case scenario
4861 * would result in 8k stack size (32 which is tailcall limit * 256 =
4862 * 8k).
4863 *
4864 * To get the idea what might happen, see an example:
4865 * func1 -> sub rsp, 128
4866 * subfunc1 -> sub rsp, 256
4867 * tailcall1 -> add rsp, 256
4868 * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
4869 * subfunc2 -> sub rsp, 64
4870 * subfunc22 -> sub rsp, 128
4871 * tailcall2 -> add rsp, 128
4872 * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
4873 *
4874 * tailcall will unwind the current stack frame but it will not get rid
4875 * of caller's stack as shown on the example above.
4876 */
4877 if (idx && subprog[idx].has_tail_call && depth >= 256) {
4878 verbose(env,
4879 "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
4880 depth);
4881 return -EACCES;
4882 }
4883 /* round up to 32-bytes, since this is granularity
4884 * of interpreter stack size
4885 */
4886 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
4887 if (depth > MAX_BPF_STACK) {
4888 verbose(env, "combined stack size of %d calls is %d. Too large\n",
4889 frame + 1, depth);
4890 return -EACCES;
4891 }
4892continue_func:
4893 subprog_end = subprog[idx + 1].start;
4894 for (; i < subprog_end; i++) {
4895 int next_insn;
4896
4897 if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
4898 continue;
4899 /* remember insn and function to return to */
4900 ret_insn[frame] = i + 1;
4901 ret_prog[frame] = idx;
4902
4903 /* find the callee */
4904 next_insn = i + insn[i].imm + 1;
4905 idx = find_subprog(env, next_insn);
4906 if (idx < 0) {
4907 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
4908 next_insn);
4909 return -EFAULT;
4910 }
4911 if (subprog[idx].is_async_cb) {
4912 if (subprog[idx].has_tail_call) {
4913 verbose(env, "verifier bug. subprog has tail_call and async cb\n");
4914 return -EFAULT;
4915 }
4916 /* async callbacks don't increase bpf prog stack size */
4917 continue;
4918 }
4919 i = next_insn;
4920
4921 if (subprog[idx].has_tail_call)
4922 tail_call_reachable = true;
4923
4924 frame++;
4925 if (frame >= MAX_CALL_FRAMES) {
4926 verbose(env, "the call stack of %d frames is too deep !\n",
4927 frame);
4928 return -E2BIG;
4929 }
4930 goto process_func;
4931 }
4932 /* if tail call got detected across bpf2bpf calls then mark each of the
4933 * currently present subprog frames as tail call reachable subprogs;
4934 * this info will be utilized by JIT so that we will be preserving the
4935 * tail call counter throughout bpf2bpf calls combined with tailcalls
4936 */
4937 if (tail_call_reachable)
4938 for (j = 0; j < frame; j++)
4939 subprog[ret_prog[j]].tail_call_reachable = true;
4940 if (subprog[0].tail_call_reachable)
4941 env->prog->aux->tail_call_reachable = true;
4942
4943 /* end of for() loop means the last insn of the 'subprog'
4944 * was reached. Doesn't matter whether it was JA or EXIT
4945 */
4946 if (frame == 0)
4947 return 0;
4948 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
4949 frame--;
4950 i = ret_insn[frame];
4951 idx = ret_prog[frame];
4952 goto continue_func;
4953}
4954
4955#ifndef CONFIG_BPF_JIT_ALWAYS_ON
4956static int get_callee_stack_depth(struct bpf_verifier_env *env,
4957 const struct bpf_insn *insn, int idx)
4958{
4959 int start = idx + insn->imm + 1, subprog;
4960
4961 subprog = find_subprog(env, start);
4962 if (subprog < 0) {
4963 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
4964 start);
4965 return -EFAULT;
4966 }
4967 return env->subprog_info[subprog].stack_depth;
4968}
4969#endif
4970
4971static int __check_buffer_access(struct bpf_verifier_env *env,
4972 const char *buf_info,
4973 const struct bpf_reg_state *reg,
4974 int regno, int off, int size)
4975{
4976 if (off < 0) {
4977 verbose(env,
4978 "R%d invalid %s buffer access: off=%d, size=%d\n",
4979 regno, buf_info, off, size);
4980 return -EACCES;
4981 }
4982 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4983 char tn_buf[48];
4984
4985 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4986 verbose(env,
4987 "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
4988 regno, off, tn_buf);
4989 return -EACCES;
4990 }
4991
4992 return 0;
4993}
4994
4995static int check_tp_buffer_access(struct bpf_verifier_env *env,
4996 const struct bpf_reg_state *reg,
4997 int regno, int off, int size)
4998{
4999 int err;
5000
5001 err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
5002 if (err)
5003 return err;
5004
5005 if (off + size > env->prog->aux->max_tp_access)
5006 env->prog->aux->max_tp_access = off + size;
5007
5008 return 0;
5009}
5010
5011static int check_buffer_access(struct bpf_verifier_env *env,
5012 const struct bpf_reg_state *reg,
5013 int regno, int off, int size,
5014 bool zero_size_allowed,
5015 u32 *max_access)
5016{
5017 const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
5018 int err;
5019
5020 err = __check_buffer_access(env, buf_info, reg, regno, off, size);
5021 if (err)
5022 return err;
5023
5024 if (off + size > *max_access)
5025 *max_access = off + size;
5026
5027 return 0;
5028}
5029
5030/* BPF architecture zero extends alu32 ops into 64-bit registesr */
5031static void zext_32_to_64(struct bpf_reg_state *reg)
5032{
5033 reg->var_off = tnum_subreg(reg->var_off);
5034 __reg_assign_32_into_64(reg);
5035}
5036
5037/* truncate register to smaller size (in bytes)
5038 * must be called with size < BPF_REG_SIZE
5039 */
5040static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
5041{
5042 u64 mask;
5043
5044 /* clear high bits in bit representation */
5045 reg->var_off = tnum_cast(reg->var_off, size);
5046
5047 /* fix arithmetic bounds */
5048 mask = ((u64)1 << (size * 8)) - 1;
5049 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
5050 reg->umin_value &= mask;
5051 reg->umax_value &= mask;
5052 } else {
5053 reg->umin_value = 0;
5054 reg->umax_value = mask;
5055 }
5056 reg->smin_value = reg->umin_value;
5057 reg->smax_value = reg->umax_value;
5058
5059 /* If size is smaller than 32bit register the 32bit register
5060 * values are also truncated so we push 64-bit bounds into
5061 * 32-bit bounds. Above were truncated < 32-bits already.
5062 */
5063 if (size >= 4)
5064 return;
5065 __reg_combine_64_into_32(reg);
5066}
5067
5068static bool bpf_map_is_rdonly(const struct bpf_map *map)
5069{
5070 /* A map is considered read-only if the following condition are true:
5071 *
5072 * 1) BPF program side cannot change any of the map content. The
5073 * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
5074 * and was set at map creation time.
5075 * 2) The map value(s) have been initialized from user space by a
5076 * loader and then "frozen", such that no new map update/delete
5077 * operations from syscall side are possible for the rest of
5078 * the map's lifetime from that point onwards.
5079 * 3) Any parallel/pending map update/delete operations from syscall
5080 * side have been completed. Only after that point, it's safe to
5081 * assume that map value(s) are immutable.
5082 */
5083 return (map->map_flags & BPF_F_RDONLY_PROG) &&
5084 READ_ONCE(map->frozen) &&
5085 !bpf_map_write_active(map);
5086}
5087
5088static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
5089{
5090 void *ptr;
5091 u64 addr;
5092 int err;
5093
5094 err = map->ops->map_direct_value_addr(map, &addr, off);
5095 if (err)
5096 return err;
5097 ptr = (void *)(long)addr + off;
5098
5099 switch (size) {
5100 case sizeof(u8):
5101 *val = (u64)*(u8 *)ptr;
5102 break;
5103 case sizeof(u16):
5104 *val = (u64)*(u16 *)ptr;
5105 break;
5106 case sizeof(u32):
5107 *val = (u64)*(u32 *)ptr;
5108 break;
5109 case sizeof(u64):
5110 *val = *(u64 *)ptr;
5111 break;
5112 default:
5113 return -EINVAL;
5114 }
5115 return 0;
5116}
5117
5118#define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu)
5119#define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted)
5120
5121/*
5122 * Allow list few fields as RCU trusted or full trusted.
5123 * This logic doesn't allow mix tagging and will be removed once GCC supports
5124 * btf_type_tag.
5125 */
5126
5127/* RCU trusted: these fields are trusted in RCU CS and never NULL */
5128BTF_TYPE_SAFE_RCU(struct task_struct) {
5129 const cpumask_t *cpus_ptr;
5130 struct css_set __rcu *cgroups;
5131 struct task_struct __rcu *real_parent;
5132 struct task_struct *group_leader;
5133};
5134
5135BTF_TYPE_SAFE_RCU(struct css_set) {
5136 struct cgroup *dfl_cgrp;
5137};
5138
5139/* full trusted: these fields are trusted even outside of RCU CS and never NULL */
5140BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
5141 __bpf_md_ptr(struct seq_file *, seq);
5142};
5143
5144BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
5145 __bpf_md_ptr(struct bpf_iter_meta *, meta);
5146 __bpf_md_ptr(struct task_struct *, task);
5147};
5148
5149BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
5150 struct file *file;
5151};
5152
5153BTF_TYPE_SAFE_TRUSTED(struct file) {
5154 struct inode *f_inode;
5155};
5156
5157BTF_TYPE_SAFE_TRUSTED(struct dentry) {
5158 /* no negative dentry-s in places where bpf can see it */
5159 struct inode *d_inode;
5160};
5161
5162BTF_TYPE_SAFE_TRUSTED(struct socket) {
5163 struct sock *sk;
5164};
5165
5166static bool type_is_rcu(struct bpf_verifier_env *env,
5167 struct bpf_reg_state *reg,
5168 int off)
5169{
5170 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
5171 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
5172
5173 return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_rcu");
5174}
5175
5176static bool type_is_trusted(struct bpf_verifier_env *env,
5177 struct bpf_reg_state *reg,
5178 int off)
5179{
5180 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
5181 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
5182 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
5183 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
5184 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
5185 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
5186
5187 return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_trusted");
5188}
5189
5190static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
5191 struct bpf_reg_state *regs,
5192 int regno, int off, int size,
5193 enum bpf_access_type atype,
5194 int value_regno)
5195{
5196 struct bpf_reg_state *reg = regs + regno;
5197 const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
5198 const char *tname = btf_name_by_offset(reg->btf, t->name_off);
5199 enum bpf_type_flag flag = 0;
5200 u32 btf_id;
5201 int ret;
5202
5203 if (!env->allow_ptr_leaks) {
5204 verbose(env,
5205 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5206 tname);
5207 return -EPERM;
5208 }
5209 if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
5210 verbose(env,
5211 "Cannot access kernel 'struct %s' from non-GPL compatible program\n",
5212 tname);
5213 return -EINVAL;
5214 }
5215 if (off < 0) {
5216 verbose(env,
5217 "R%d is ptr_%s invalid negative access: off=%d\n",
5218 regno, tname, off);
5219 return -EACCES;
5220 }
5221 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5222 char tn_buf[48];
5223
5224 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5225 verbose(env,
5226 "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
5227 regno, tname, off, tn_buf);
5228 return -EACCES;
5229 }
5230
5231 if (reg->type & MEM_USER) {
5232 verbose(env,
5233 "R%d is ptr_%s access user memory: off=%d\n",
5234 regno, tname, off);
5235 return -EACCES;
5236 }
5237
5238 if (reg->type & MEM_PERCPU) {
5239 verbose(env,
5240 "R%d is ptr_%s access percpu memory: off=%d\n",
5241 regno, tname, off);
5242 return -EACCES;
5243 }
5244
5245 if (env->ops->btf_struct_access && !type_is_alloc(reg->type)) {
5246 if (!btf_is_kernel(reg->btf)) {
5247 verbose(env, "verifier internal error: reg->btf must be kernel btf\n");
5248 return -EFAULT;
5249 }
5250 ret = env->ops->btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag);
5251 } else {
5252 /* Writes are permitted with default btf_struct_access for
5253 * program allocated objects (which always have ref_obj_id > 0),
5254 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
5255 */
5256 if (atype != BPF_READ && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
5257 verbose(env, "only read is supported\n");
5258 return -EACCES;
5259 }
5260
5261 if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
5262 !reg->ref_obj_id) {
5263 verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
5264 return -EFAULT;
5265 }
5266
5267 ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag);
5268 }
5269
5270 if (ret < 0)
5271 return ret;
5272
5273 if (ret != PTR_TO_BTF_ID) {
5274 /* just mark; */
5275
5276 } else if (type_flag(reg->type) & PTR_UNTRUSTED) {
5277 /* If this is an untrusted pointer, all pointers formed by walking it
5278 * also inherit the untrusted flag.
5279 */
5280 flag = PTR_UNTRUSTED;
5281
5282 } else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
5283 /* By default any pointer obtained from walking a trusted pointer is no
5284 * longer trusted, unless the field being accessed has explicitly been
5285 * marked as inheriting its parent's state of trust (either full or RCU).
5286 * For example:
5287 * 'cgroups' pointer is untrusted if task->cgroups dereference
5288 * happened in a sleepable program outside of bpf_rcu_read_lock()
5289 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
5290 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
5291 *
5292 * A regular RCU-protected pointer with __rcu tag can also be deemed
5293 * trusted if we are in an RCU CS. Such pointer can be NULL.
5294 */
5295 if (type_is_trusted(env, reg, off)) {
5296 flag |= PTR_TRUSTED;
5297 } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
5298 if (type_is_rcu(env, reg, off)) {
5299 /* ignore __rcu tag and mark it MEM_RCU */
5300 flag |= MEM_RCU;
5301 } else if (flag & MEM_RCU) {
5302 /* __rcu tagged pointers can be NULL */
5303 flag |= PTR_MAYBE_NULL;
5304 } else if (flag & (MEM_PERCPU | MEM_USER)) {
5305 /* keep as-is */
5306 } else {
5307 /* walking unknown pointers yields untrusted pointer */
5308 flag = PTR_UNTRUSTED;
5309 }
5310 } else {
5311 /*
5312 * If not in RCU CS or MEM_RCU pointer can be NULL then
5313 * aggressively mark as untrusted otherwise such
5314 * pointers will be plain PTR_TO_BTF_ID without flags
5315 * and will be allowed to be passed into helpers for
5316 * compat reasons.
5317 */
5318 flag = PTR_UNTRUSTED;
5319 }
5320 } else {
5321 /* Old compat. Deprecated */
5322 flag &= ~PTR_TRUSTED;
5323 }
5324
5325 if (atype == BPF_READ && value_regno >= 0)
5326 mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
5327
5328 return 0;
5329}
5330
5331static int check_ptr_to_map_access(struct bpf_verifier_env *env,
5332 struct bpf_reg_state *regs,
5333 int regno, int off, int size,
5334 enum bpf_access_type atype,
5335 int value_regno)
5336{
5337 struct bpf_reg_state *reg = regs + regno;
5338 struct bpf_map *map = reg->map_ptr;
5339 struct bpf_reg_state map_reg;
5340 enum bpf_type_flag flag = 0;
5341 const struct btf_type *t;
5342 const char *tname;
5343 u32 btf_id;
5344 int ret;
5345
5346 if (!btf_vmlinux) {
5347 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
5348 return -ENOTSUPP;
5349 }
5350
5351 if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
5352 verbose(env, "map_ptr access not supported for map type %d\n",
5353 map->map_type);
5354 return -ENOTSUPP;
5355 }
5356
5357 t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
5358 tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5359
5360 if (!env->allow_ptr_leaks) {
5361 verbose(env,
5362 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5363 tname);
5364 return -EPERM;
5365 }
5366
5367 if (off < 0) {
5368 verbose(env, "R%d is %s invalid negative access: off=%d\n",
5369 regno, tname, off);
5370 return -EACCES;
5371 }
5372
5373 if (atype != BPF_READ) {
5374 verbose(env, "only read from %s is supported\n", tname);
5375 return -EACCES;
5376 }
5377
5378 /* Simulate access to a PTR_TO_BTF_ID */
5379 memset(&map_reg, 0, sizeof(map_reg));
5380 mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0);
5381 ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag);
5382 if (ret < 0)
5383 return ret;
5384
5385 if (value_regno >= 0)
5386 mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
5387
5388 return 0;
5389}
5390
5391/* Check that the stack access at the given offset is within bounds. The
5392 * maximum valid offset is -1.
5393 *
5394 * The minimum valid offset is -MAX_BPF_STACK for writes, and
5395 * -state->allocated_stack for reads.
5396 */
5397static int check_stack_slot_within_bounds(int off,
5398 struct bpf_func_state *state,
5399 enum bpf_access_type t)
5400{
5401 int min_valid_off;
5402
5403 if (t == BPF_WRITE)
5404 min_valid_off = -MAX_BPF_STACK;
5405 else
5406 min_valid_off = -state->allocated_stack;
5407
5408 if (off < min_valid_off || off > -1)
5409 return -EACCES;
5410 return 0;
5411}
5412
5413/* Check that the stack access at 'regno + off' falls within the maximum stack
5414 * bounds.
5415 *
5416 * 'off' includes `regno->offset`, but not its dynamic part (if any).
5417 */
5418static int check_stack_access_within_bounds(
5419 struct bpf_verifier_env *env,
5420 int regno, int off, int access_size,
5421 enum bpf_access_src src, enum bpf_access_type type)
5422{
5423 struct bpf_reg_state *regs = cur_regs(env);
5424 struct bpf_reg_state *reg = regs + regno;
5425 struct bpf_func_state *state = func(env, reg);
5426 int min_off, max_off;
5427 int err;
5428 char *err_extra;
5429
5430 if (src == ACCESS_HELPER)
5431 /* We don't know if helpers are reading or writing (or both). */
5432 err_extra = " indirect access to";
5433 else if (type == BPF_READ)
5434 err_extra = " read from";
5435 else
5436 err_extra = " write to";
5437
5438 if (tnum_is_const(reg->var_off)) {
5439 min_off = reg->var_off.value + off;
5440 if (access_size > 0)
5441 max_off = min_off + access_size - 1;
5442 else
5443 max_off = min_off;
5444 } else {
5445 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
5446 reg->smin_value <= -BPF_MAX_VAR_OFF) {
5447 verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
5448 err_extra, regno);
5449 return -EACCES;
5450 }
5451 min_off = reg->smin_value + off;
5452 if (access_size > 0)
5453 max_off = reg->smax_value + off + access_size - 1;
5454 else
5455 max_off = min_off;
5456 }
5457
5458 err = check_stack_slot_within_bounds(min_off, state, type);
5459 if (!err)
5460 err = check_stack_slot_within_bounds(max_off, state, type);
5461
5462 if (err) {
5463 if (tnum_is_const(reg->var_off)) {
5464 verbose(env, "invalid%s stack R%d off=%d size=%d\n",
5465 err_extra, regno, off, access_size);
5466 } else {
5467 char tn_buf[48];
5468
5469 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5470 verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
5471 err_extra, regno, tn_buf, access_size);
5472 }
5473 }
5474 return err;
5475}
5476
5477/* check whether memory at (regno + off) is accessible for t = (read | write)
5478 * if t==write, value_regno is a register which value is stored into memory
5479 * if t==read, value_regno is a register which will receive the value from memory
5480 * if t==write && value_regno==-1, some unknown value is stored into memory
5481 * if t==read && value_regno==-1, don't care what we read from memory
5482 */
5483static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
5484 int off, int bpf_size, enum bpf_access_type t,
5485 int value_regno, bool strict_alignment_once)
5486{
5487 struct bpf_reg_state *regs = cur_regs(env);
5488 struct bpf_reg_state *reg = regs + regno;
5489 struct bpf_func_state *state;
5490 int size, err = 0;
5491
5492 size = bpf_size_to_bytes(bpf_size);
5493 if (size < 0)
5494 return size;
5495
5496 /* alignment checks will add in reg->off themselves */
5497 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
5498 if (err)
5499 return err;
5500
5501 /* for access checks, reg->off is just part of off */
5502 off += reg->off;
5503
5504 if (reg->type == PTR_TO_MAP_KEY) {
5505 if (t == BPF_WRITE) {
5506 verbose(env, "write to change key R%d not allowed\n", regno);
5507 return -EACCES;
5508 }
5509
5510 err = check_mem_region_access(env, regno, off, size,
5511 reg->map_ptr->key_size, false);
5512 if (err)
5513 return err;
5514 if (value_regno >= 0)
5515 mark_reg_unknown(env, regs, value_regno);
5516 } else if (reg->type == PTR_TO_MAP_VALUE) {
5517 struct btf_field *kptr_field = NULL;
5518
5519 if (t == BPF_WRITE && value_regno >= 0 &&
5520 is_pointer_value(env, value_regno)) {
5521 verbose(env, "R%d leaks addr into map\n", value_regno);
5522 return -EACCES;
5523 }
5524 err = check_map_access_type(env, regno, off, size, t);
5525 if (err)
5526 return err;
5527 err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
5528 if (err)
5529 return err;
5530 if (tnum_is_const(reg->var_off))
5531 kptr_field = btf_record_find(reg->map_ptr->record,
5532 off + reg->var_off.value, BPF_KPTR);
5533 if (kptr_field) {
5534 err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
5535 } else if (t == BPF_READ && value_regno >= 0) {
5536 struct bpf_map *map = reg->map_ptr;
5537
5538 /* if map is read-only, track its contents as scalars */
5539 if (tnum_is_const(reg->var_off) &&
5540 bpf_map_is_rdonly(map) &&
5541 map->ops->map_direct_value_addr) {
5542 int map_off = off + reg->var_off.value;
5543 u64 val = 0;
5544
5545 err = bpf_map_direct_read(map, map_off, size,
5546 &val);
5547 if (err)
5548 return err;
5549
5550 regs[value_regno].type = SCALAR_VALUE;
5551 __mark_reg_known(&regs[value_regno], val);
5552 } else {
5553 mark_reg_unknown(env, regs, value_regno);
5554 }
5555 }
5556 } else if (base_type(reg->type) == PTR_TO_MEM) {
5557 bool rdonly_mem = type_is_rdonly_mem(reg->type);
5558
5559 if (type_may_be_null(reg->type)) {
5560 verbose(env, "R%d invalid mem access '%s'\n", regno,
5561 reg_type_str(env, reg->type));
5562 return -EACCES;
5563 }
5564
5565 if (t == BPF_WRITE && rdonly_mem) {
5566 verbose(env, "R%d cannot write into %s\n",
5567 regno, reg_type_str(env, reg->type));
5568 return -EACCES;
5569 }
5570
5571 if (t == BPF_WRITE && value_regno >= 0 &&
5572 is_pointer_value(env, value_regno)) {
5573 verbose(env, "R%d leaks addr into mem\n", value_regno);
5574 return -EACCES;
5575 }
5576
5577 err = check_mem_region_access(env, regno, off, size,
5578 reg->mem_size, false);
5579 if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
5580 mark_reg_unknown(env, regs, value_regno);
5581 } else if (reg->type == PTR_TO_CTX) {
5582 enum bpf_reg_type reg_type = SCALAR_VALUE;
5583 struct btf *btf = NULL;
5584 u32 btf_id = 0;
5585
5586 if (t == BPF_WRITE && value_regno >= 0 &&
5587 is_pointer_value(env, value_regno)) {
5588 verbose(env, "R%d leaks addr into ctx\n", value_regno);
5589 return -EACCES;
5590 }
5591
5592 err = check_ptr_off_reg(env, reg, regno);
5593 if (err < 0)
5594 return err;
5595
5596 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
5597 &btf_id);
5598 if (err)
5599 verbose_linfo(env, insn_idx, "; ");
5600 if (!err && t == BPF_READ && value_regno >= 0) {
5601 /* ctx access returns either a scalar, or a
5602 * PTR_TO_PACKET[_META,_END]. In the latter
5603 * case, we know the offset is zero.
5604 */
5605 if (reg_type == SCALAR_VALUE) {
5606 mark_reg_unknown(env, regs, value_regno);
5607 } else {
5608 mark_reg_known_zero(env, regs,
5609 value_regno);
5610 if (type_may_be_null(reg_type))
5611 regs[value_regno].id = ++env->id_gen;
5612 /* A load of ctx field could have different
5613 * actual load size with the one encoded in the
5614 * insn. When the dst is PTR, it is for sure not
5615 * a sub-register.
5616 */
5617 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
5618 if (base_type(reg_type) == PTR_TO_BTF_ID) {
5619 regs[value_regno].btf = btf;
5620 regs[value_regno].btf_id = btf_id;
5621 }
5622 }
5623 regs[value_regno].type = reg_type;
5624 }
5625
5626 } else if (reg->type == PTR_TO_STACK) {
5627 /* Basic bounds checks. */
5628 err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
5629 if (err)
5630 return err;
5631
5632 state = func(env, reg);
5633 err = update_stack_depth(env, state, off);
5634 if (err)
5635 return err;
5636
5637 if (t == BPF_READ)
5638 err = check_stack_read(env, regno, off, size,
5639 value_regno);
5640 else
5641 err = check_stack_write(env, regno, off, size,
5642 value_regno, insn_idx);
5643 } else if (reg_is_pkt_pointer(reg)) {
5644 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
5645 verbose(env, "cannot write into packet\n");
5646 return -EACCES;
5647 }
5648 if (t == BPF_WRITE && value_regno >= 0 &&
5649 is_pointer_value(env, value_regno)) {
5650 verbose(env, "R%d leaks addr into packet\n",
5651 value_regno);
5652 return -EACCES;
5653 }
5654 err = check_packet_access(env, regno, off, size, false);
5655 if (!err && t == BPF_READ && value_regno >= 0)
5656 mark_reg_unknown(env, regs, value_regno);
5657 } else if (reg->type == PTR_TO_FLOW_KEYS) {
5658 if (t == BPF_WRITE && value_regno >= 0 &&
5659 is_pointer_value(env, value_regno)) {
5660 verbose(env, "R%d leaks addr into flow keys\n",
5661 value_regno);
5662 return -EACCES;
5663 }
5664
5665 err = check_flow_keys_access(env, off, size);
5666 if (!err && t == BPF_READ && value_regno >= 0)
5667 mark_reg_unknown(env, regs, value_regno);
5668 } else if (type_is_sk_pointer(reg->type)) {
5669 if (t == BPF_WRITE) {
5670 verbose(env, "R%d cannot write into %s\n",
5671 regno, reg_type_str(env, reg->type));
5672 return -EACCES;
5673 }
5674 err = check_sock_access(env, insn_idx, regno, off, size, t);
5675 if (!err && value_regno >= 0)
5676 mark_reg_unknown(env, regs, value_regno);
5677 } else if (reg->type == PTR_TO_TP_BUFFER) {
5678 err = check_tp_buffer_access(env, reg, regno, off, size);
5679 if (!err && t == BPF_READ && value_regno >= 0)
5680 mark_reg_unknown(env, regs, value_regno);
5681 } else if (base_type(reg->type) == PTR_TO_BTF_ID &&
5682 !type_may_be_null(reg->type)) {
5683 err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
5684 value_regno);
5685 } else if (reg->type == CONST_PTR_TO_MAP) {
5686 err = check_ptr_to_map_access(env, regs, regno, off, size, t,
5687 value_regno);
5688 } else if (base_type(reg->type) == PTR_TO_BUF) {
5689 bool rdonly_mem = type_is_rdonly_mem(reg->type);
5690 u32 *max_access;
5691
5692 if (rdonly_mem) {
5693 if (t == BPF_WRITE) {
5694 verbose(env, "R%d cannot write into %s\n",
5695 regno, reg_type_str(env, reg->type));
5696 return -EACCES;
5697 }
5698 max_access = &env->prog->aux->max_rdonly_access;
5699 } else {
5700 max_access = &env->prog->aux->max_rdwr_access;
5701 }
5702
5703 err = check_buffer_access(env, reg, regno, off, size, false,
5704 max_access);
5705
5706 if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
5707 mark_reg_unknown(env, regs, value_regno);
5708 } else {
5709 verbose(env, "R%d invalid mem access '%s'\n", regno,
5710 reg_type_str(env, reg->type));
5711 return -EACCES;
5712 }
5713
5714 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
5715 regs[value_regno].type == SCALAR_VALUE) {
5716 /* b/h/w load zero-extends, mark upper bits as known 0 */
5717 coerce_reg_to_size(&regs[value_regno], size);
5718 }
5719 return err;
5720}
5721
5722static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
5723{
5724 int load_reg;
5725 int err;
5726
5727 switch (insn->imm) {
5728 case BPF_ADD:
5729 case BPF_ADD | BPF_FETCH:
5730 case BPF_AND:
5731 case BPF_AND | BPF_FETCH:
5732 case BPF_OR:
5733 case BPF_OR | BPF_FETCH:
5734 case BPF_XOR:
5735 case BPF_XOR | BPF_FETCH:
5736 case BPF_XCHG:
5737 case BPF_CMPXCHG:
5738 break;
5739 default:
5740 verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
5741 return -EINVAL;
5742 }
5743
5744 if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
5745 verbose(env, "invalid atomic operand size\n");
5746 return -EINVAL;
5747 }
5748
5749 /* check src1 operand */
5750 err = check_reg_arg(env, insn->src_reg, SRC_OP);
5751 if (err)
5752 return err;
5753
5754 /* check src2 operand */
5755 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
5756 if (err)
5757 return err;
5758
5759 if (insn->imm == BPF_CMPXCHG) {
5760 /* Check comparison of R0 with memory location */
5761 const u32 aux_reg = BPF_REG_0;
5762
5763 err = check_reg_arg(env, aux_reg, SRC_OP);
5764 if (err)
5765 return err;
5766
5767 if (is_pointer_value(env, aux_reg)) {
5768 verbose(env, "R%d leaks addr into mem\n", aux_reg);
5769 return -EACCES;
5770 }
5771 }
5772
5773 if (is_pointer_value(env, insn->src_reg)) {
5774 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
5775 return -EACCES;
5776 }
5777
5778 if (is_ctx_reg(env, insn->dst_reg) ||
5779 is_pkt_reg(env, insn->dst_reg) ||
5780 is_flow_key_reg(env, insn->dst_reg) ||
5781 is_sk_reg(env, insn->dst_reg)) {
5782 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
5783 insn->dst_reg,
5784 reg_type_str(env, reg_state(env, insn->dst_reg)->type));
5785 return -EACCES;
5786 }
5787
5788 if (insn->imm & BPF_FETCH) {
5789 if (insn->imm == BPF_CMPXCHG)
5790 load_reg = BPF_REG_0;
5791 else
5792 load_reg = insn->src_reg;
5793
5794 /* check and record load of old value */
5795 err = check_reg_arg(env, load_reg, DST_OP);
5796 if (err)
5797 return err;
5798 } else {
5799 /* This instruction accesses a memory location but doesn't
5800 * actually load it into a register.
5801 */
5802 load_reg = -1;
5803 }
5804
5805 /* Check whether we can read the memory, with second call for fetch
5806 * case to simulate the register fill.
5807 */
5808 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5809 BPF_SIZE(insn->code), BPF_READ, -1, true);
5810 if (!err && load_reg >= 0)
5811 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5812 BPF_SIZE(insn->code), BPF_READ, load_reg,
5813 true);
5814 if (err)
5815 return err;
5816
5817 /* Check whether we can write into the same memory. */
5818 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5819 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
5820 if (err)
5821 return err;
5822
5823 return 0;
5824}
5825
5826/* When register 'regno' is used to read the stack (either directly or through
5827 * a helper function) make sure that it's within stack boundary and, depending
5828 * on the access type, that all elements of the stack are initialized.
5829 *
5830 * 'off' includes 'regno->off', but not its dynamic part (if any).
5831 *
5832 * All registers that have been spilled on the stack in the slots within the
5833 * read offsets are marked as read.
5834 */
5835static int check_stack_range_initialized(
5836 struct bpf_verifier_env *env, int regno, int off,
5837 int access_size, bool zero_size_allowed,
5838 enum bpf_access_src type, struct bpf_call_arg_meta *meta)
5839{
5840 struct bpf_reg_state *reg = reg_state(env, regno);
5841 struct bpf_func_state *state = func(env, reg);
5842 int err, min_off, max_off, i, j, slot, spi;
5843 char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
5844 enum bpf_access_type bounds_check_type;
5845 /* Some accesses can write anything into the stack, others are
5846 * read-only.
5847 */
5848 bool clobber = false;
5849
5850 if (access_size == 0 && !zero_size_allowed) {
5851 verbose(env, "invalid zero-sized read\n");
5852 return -EACCES;
5853 }
5854
5855 if (type == ACCESS_HELPER) {
5856 /* The bounds checks for writes are more permissive than for
5857 * reads. However, if raw_mode is not set, we'll do extra
5858 * checks below.
5859 */
5860 bounds_check_type = BPF_WRITE;
5861 clobber = true;
5862 } else {
5863 bounds_check_type = BPF_READ;
5864 }
5865 err = check_stack_access_within_bounds(env, regno, off, access_size,
5866 type, bounds_check_type);
5867 if (err)
5868 return err;
5869
5870
5871 if (tnum_is_const(reg->var_off)) {
5872 min_off = max_off = reg->var_off.value + off;
5873 } else {
5874 /* Variable offset is prohibited for unprivileged mode for
5875 * simplicity since it requires corresponding support in
5876 * Spectre masking for stack ALU.
5877 * See also retrieve_ptr_limit().
5878 */
5879 if (!env->bypass_spec_v1) {
5880 char tn_buf[48];
5881
5882 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5883 verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
5884 regno, err_extra, tn_buf);
5885 return -EACCES;
5886 }
5887 /* Only initialized buffer on stack is allowed to be accessed
5888 * with variable offset. With uninitialized buffer it's hard to
5889 * guarantee that whole memory is marked as initialized on
5890 * helper return since specific bounds are unknown what may
5891 * cause uninitialized stack leaking.
5892 */
5893 if (meta && meta->raw_mode)
5894 meta = NULL;
5895
5896 min_off = reg->smin_value + off;
5897 max_off = reg->smax_value + off;
5898 }
5899
5900 if (meta && meta->raw_mode) {
5901 /* Ensure we won't be overwriting dynptrs when simulating byte
5902 * by byte access in check_helper_call using meta.access_size.
5903 * This would be a problem if we have a helper in the future
5904 * which takes:
5905 *
5906 * helper(uninit_mem, len, dynptr)
5907 *
5908 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
5909 * may end up writing to dynptr itself when touching memory from
5910 * arg 1. This can be relaxed on a case by case basis for known
5911 * safe cases, but reject due to the possibilitiy of aliasing by
5912 * default.
5913 */
5914 for (i = min_off; i < max_off + access_size; i++) {
5915 int stack_off = -i - 1;
5916
5917 spi = __get_spi(i);
5918 /* raw_mode may write past allocated_stack */
5919 if (state->allocated_stack <= stack_off)
5920 continue;
5921 if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
5922 verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
5923 return -EACCES;
5924 }
5925 }
5926 meta->access_size = access_size;
5927 meta->regno = regno;
5928 return 0;
5929 }
5930
5931 for (i = min_off; i < max_off + access_size; i++) {
5932 u8 *stype;
5933
5934 slot = -i - 1;
5935 spi = slot / BPF_REG_SIZE;
5936 if (state->allocated_stack <= slot)
5937 goto err;
5938 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
5939 if (*stype == STACK_MISC)
5940 goto mark;
5941 if ((*stype == STACK_ZERO) ||
5942 (*stype == STACK_INVALID && env->allow_uninit_stack)) {
5943 if (clobber) {
5944 /* helper can write anything into the stack */
5945 *stype = STACK_MISC;
5946 }
5947 goto mark;
5948 }
5949
5950 if (is_spilled_reg(&state->stack[spi]) &&
5951 (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
5952 env->allow_ptr_leaks)) {
5953 if (clobber) {
5954 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
5955 for (j = 0; j < BPF_REG_SIZE; j++)
5956 scrub_spilled_slot(&state->stack[spi].slot_type[j]);
5957 }
5958 goto mark;
5959 }
5960
5961err:
5962 if (tnum_is_const(reg->var_off)) {
5963 verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
5964 err_extra, regno, min_off, i - min_off, access_size);
5965 } else {
5966 char tn_buf[48];
5967
5968 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5969 verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
5970 err_extra, regno, tn_buf, i - min_off, access_size);
5971 }
5972 return -EACCES;
5973mark:
5974 /* reading any byte out of 8-byte 'spill_slot' will cause
5975 * the whole slot to be marked as 'read'
5976 */
5977 mark_reg_read(env, &state->stack[spi].spilled_ptr,
5978 state->stack[spi].spilled_ptr.parent,
5979 REG_LIVE_READ64);
5980 /* We do not set REG_LIVE_WRITTEN for stack slot, as we can not
5981 * be sure that whether stack slot is written to or not. Hence,
5982 * we must still conservatively propagate reads upwards even if
5983 * helper may write to the entire memory range.
5984 */
5985 }
5986 return update_stack_depth(env, state, min_off);
5987}
5988
5989static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
5990 int access_size, bool zero_size_allowed,
5991 struct bpf_call_arg_meta *meta)
5992{
5993 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5994 u32 *max_access;
5995
5996 switch (base_type(reg->type)) {
5997 case PTR_TO_PACKET:
5998 case PTR_TO_PACKET_META:
5999 return check_packet_access(env, regno, reg->off, access_size,
6000 zero_size_allowed);
6001 case PTR_TO_MAP_KEY:
6002 if (meta && meta->raw_mode) {
6003 verbose(env, "R%d cannot write into %s\n", regno,
6004 reg_type_str(env, reg->type));
6005 return -EACCES;
6006 }
6007 return check_mem_region_access(env, regno, reg->off, access_size,
6008 reg->map_ptr->key_size, false);
6009 case PTR_TO_MAP_VALUE:
6010 if (check_map_access_type(env, regno, reg->off, access_size,
6011 meta && meta->raw_mode ? BPF_WRITE :
6012 BPF_READ))
6013 return -EACCES;
6014 return check_map_access(env, regno, reg->off, access_size,
6015 zero_size_allowed, ACCESS_HELPER);
6016 case PTR_TO_MEM:
6017 if (type_is_rdonly_mem(reg->type)) {
6018 if (meta && meta->raw_mode) {
6019 verbose(env, "R%d cannot write into %s\n", regno,
6020 reg_type_str(env, reg->type));
6021 return -EACCES;
6022 }
6023 }
6024 return check_mem_region_access(env, regno, reg->off,
6025 access_size, reg->mem_size,
6026 zero_size_allowed);
6027 case PTR_TO_BUF:
6028 if (type_is_rdonly_mem(reg->type)) {
6029 if (meta && meta->raw_mode) {
6030 verbose(env, "R%d cannot write into %s\n", regno,
6031 reg_type_str(env, reg->type));
6032 return -EACCES;
6033 }
6034
6035 max_access = &env->prog->aux->max_rdonly_access;
6036 } else {
6037 max_access = &env->prog->aux->max_rdwr_access;
6038 }
6039 return check_buffer_access(env, reg, regno, reg->off,
6040 access_size, zero_size_allowed,
6041 max_access);
6042 case PTR_TO_STACK:
6043 return check_stack_range_initialized(
6044 env,
6045 regno, reg->off, access_size,
6046 zero_size_allowed, ACCESS_HELPER, meta);
6047 case PTR_TO_CTX:
6048 /* in case the function doesn't know how to access the context,
6049 * (because we are in a program of type SYSCALL for example), we
6050 * can not statically check its size.
6051 * Dynamically check it now.
6052 */
6053 if (!env->ops->convert_ctx_access) {
6054 enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ;
6055 int offset = access_size - 1;
6056
6057 /* Allow zero-byte read from PTR_TO_CTX */
6058 if (access_size == 0)
6059 return zero_size_allowed ? 0 : -EACCES;
6060
6061 return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
6062 atype, -1, false);
6063 }
6064
6065 fallthrough;
6066 default: /* scalar_value or invalid ptr */
6067 /* Allow zero-byte read from NULL, regardless of pointer type */
6068 if (zero_size_allowed && access_size == 0 &&
6069 register_is_null(reg))
6070 return 0;
6071
6072 verbose(env, "R%d type=%s ", regno,
6073 reg_type_str(env, reg->type));
6074 verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
6075 return -EACCES;
6076 }
6077}
6078
6079static int check_mem_size_reg(struct bpf_verifier_env *env,
6080 struct bpf_reg_state *reg, u32 regno,
6081 bool zero_size_allowed,
6082 struct bpf_call_arg_meta *meta)
6083{
6084 int err;
6085
6086 /* This is used to refine r0 return value bounds for helpers
6087 * that enforce this value as an upper bound on return values.
6088 * See do_refine_retval_range() for helpers that can refine
6089 * the return value. C type of helper is u32 so we pull register
6090 * bound from umax_value however, if negative verifier errors
6091 * out. Only upper bounds can be learned because retval is an
6092 * int type and negative retvals are allowed.
6093 */
6094 meta->msize_max_value = reg->umax_value;
6095
6096 /* The register is SCALAR_VALUE; the access check
6097 * happens using its boundaries.
6098 */
6099 if (!tnum_is_const(reg->var_off))
6100 /* For unprivileged variable accesses, disable raw
6101 * mode so that the program is required to
6102 * initialize all the memory that the helper could
6103 * just partially fill up.
6104 */
6105 meta = NULL;
6106
6107 if (reg->smin_value < 0) {
6108 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
6109 regno);
6110 return -EACCES;
6111 }
6112
6113 if (reg->umin_value == 0) {
6114 err = check_helper_mem_access(env, regno - 1, 0,
6115 zero_size_allowed,
6116 meta);
6117 if (err)
6118 return err;
6119 }
6120
6121 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
6122 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
6123 regno);
6124 return -EACCES;
6125 }
6126 err = check_helper_mem_access(env, regno - 1,
6127 reg->umax_value,
6128 zero_size_allowed, meta);
6129 if (!err)
6130 err = mark_chain_precision(env, regno);
6131 return err;
6132}
6133
6134int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
6135 u32 regno, u32 mem_size)
6136{
6137 bool may_be_null = type_may_be_null(reg->type);
6138 struct bpf_reg_state saved_reg;
6139 struct bpf_call_arg_meta meta;
6140 int err;
6141
6142 if (register_is_null(reg))
6143 return 0;
6144
6145 memset(&meta, 0, sizeof(meta));
6146 /* Assuming that the register contains a value check if the memory
6147 * access is safe. Temporarily save and restore the register's state as
6148 * the conversion shouldn't be visible to a caller.
6149 */
6150 if (may_be_null) {
6151 saved_reg = *reg;
6152 mark_ptr_not_null_reg(reg);
6153 }
6154
6155 err = check_helper_mem_access(env, regno, mem_size, true, &meta);
6156 /* Check access for BPF_WRITE */
6157 meta.raw_mode = true;
6158 err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
6159
6160 if (may_be_null)
6161 *reg = saved_reg;
6162
6163 return err;
6164}
6165
6166static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
6167 u32 regno)
6168{
6169 struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
6170 bool may_be_null = type_may_be_null(mem_reg->type);
6171 struct bpf_reg_state saved_reg;
6172 struct bpf_call_arg_meta meta;
6173 int err;
6174
6175 WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
6176
6177 memset(&meta, 0, sizeof(meta));
6178
6179 if (may_be_null) {
6180 saved_reg = *mem_reg;
6181 mark_ptr_not_null_reg(mem_reg);
6182 }
6183
6184 err = check_mem_size_reg(env, reg, regno, true, &meta);
6185 /* Check access for BPF_WRITE */
6186 meta.raw_mode = true;
6187 err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
6188
6189 if (may_be_null)
6190 *mem_reg = saved_reg;
6191 return err;
6192}
6193
6194/* Implementation details:
6195 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
6196 * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
6197 * Two bpf_map_lookups (even with the same key) will have different reg->id.
6198 * Two separate bpf_obj_new will also have different reg->id.
6199 * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
6200 * clears reg->id after value_or_null->value transition, since the verifier only
6201 * cares about the range of access to valid map value pointer and doesn't care
6202 * about actual address of the map element.
6203 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
6204 * reg->id > 0 after value_or_null->value transition. By doing so
6205 * two bpf_map_lookups will be considered two different pointers that
6206 * point to different bpf_spin_locks. Likewise for pointers to allocated objects
6207 * returned from bpf_obj_new.
6208 * The verifier allows taking only one bpf_spin_lock at a time to avoid
6209 * dead-locks.
6210 * Since only one bpf_spin_lock is allowed the checks are simpler than
6211 * reg_is_refcounted() logic. The verifier needs to remember only
6212 * one spin_lock instead of array of acquired_refs.
6213 * cur_state->active_lock remembers which map value element or allocated
6214 * object got locked and clears it after bpf_spin_unlock.
6215 */
6216static int process_spin_lock(struct bpf_verifier_env *env, int regno,
6217 bool is_lock)
6218{
6219 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6220 struct bpf_verifier_state *cur = env->cur_state;
6221 bool is_const = tnum_is_const(reg->var_off);
6222 u64 val = reg->var_off.value;
6223 struct bpf_map *map = NULL;
6224 struct btf *btf = NULL;
6225 struct btf_record *rec;
6226
6227 if (!is_const) {
6228 verbose(env,
6229 "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
6230 regno);
6231 return -EINVAL;
6232 }
6233 if (reg->type == PTR_TO_MAP_VALUE) {
6234 map = reg->map_ptr;
6235 if (!map->btf) {
6236 verbose(env,
6237 "map '%s' has to have BTF in order to use bpf_spin_lock\n",
6238 map->name);
6239 return -EINVAL;
6240 }
6241 } else {
6242 btf = reg->btf;
6243 }
6244
6245 rec = reg_btf_record(reg);
6246 if (!btf_record_has_field(rec, BPF_SPIN_LOCK)) {
6247 verbose(env, "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local",
6248 map ? map->name : "kptr");
6249 return -EINVAL;
6250 }
6251 if (rec->spin_lock_off != val + reg->off) {
6252 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
6253 val + reg->off, rec->spin_lock_off);
6254 return -EINVAL;
6255 }
6256 if (is_lock) {
6257 if (cur->active_lock.ptr) {
6258 verbose(env,
6259 "Locking two bpf_spin_locks are not allowed\n");
6260 return -EINVAL;
6261 }
6262 if (map)
6263 cur->active_lock.ptr = map;
6264 else
6265 cur->active_lock.ptr = btf;
6266 cur->active_lock.id = reg->id;
6267 } else {
6268 void *ptr;
6269
6270 if (map)
6271 ptr = map;
6272 else
6273 ptr = btf;
6274
6275 if (!cur->active_lock.ptr) {
6276 verbose(env, "bpf_spin_unlock without taking a lock\n");
6277 return -EINVAL;
6278 }
6279 if (cur->active_lock.ptr != ptr ||
6280 cur->active_lock.id != reg->id) {
6281 verbose(env, "bpf_spin_unlock of different lock\n");
6282 return -EINVAL;
6283 }
6284
6285 invalidate_non_owning_refs(env);
6286
6287 cur->active_lock.ptr = NULL;
6288 cur->active_lock.id = 0;
6289 }
6290 return 0;
6291}
6292
6293static int process_timer_func(struct bpf_verifier_env *env, int regno,
6294 struct bpf_call_arg_meta *meta)
6295{
6296 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6297 bool is_const = tnum_is_const(reg->var_off);
6298 struct bpf_map *map = reg->map_ptr;
6299 u64 val = reg->var_off.value;
6300
6301 if (!is_const) {
6302 verbose(env,
6303 "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
6304 regno);
6305 return -EINVAL;
6306 }
6307 if (!map->btf) {
6308 verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
6309 map->name);
6310 return -EINVAL;
6311 }
6312 if (!btf_record_has_field(map->record, BPF_TIMER)) {
6313 verbose(env, "map '%s' has no valid bpf_timer\n", map->name);
6314 return -EINVAL;
6315 }
6316 if (map->record->timer_off != val + reg->off) {
6317 verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
6318 val + reg->off, map->record->timer_off);
6319 return -EINVAL;
6320 }
6321 if (meta->map_ptr) {
6322 verbose(env, "verifier bug. Two map pointers in a timer helper\n");
6323 return -EFAULT;
6324 }
6325 meta->map_uid = reg->map_uid;
6326 meta->map_ptr = map;
6327 return 0;
6328}
6329
6330static int process_kptr_func(struct bpf_verifier_env *env, int regno,
6331 struct bpf_call_arg_meta *meta)
6332{
6333 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6334 struct bpf_map *map_ptr = reg->map_ptr;
6335 struct btf_field *kptr_field;
6336 u32 kptr_off;
6337
6338 if (!tnum_is_const(reg->var_off)) {
6339 verbose(env,
6340 "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
6341 regno);
6342 return -EINVAL;
6343 }
6344 if (!map_ptr->btf) {
6345 verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
6346 map_ptr->name);
6347 return -EINVAL;
6348 }
6349 if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
6350 verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
6351 return -EINVAL;
6352 }
6353
6354 meta->map_ptr = map_ptr;
6355 kptr_off = reg->off + reg->var_off.value;
6356 kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
6357 if (!kptr_field) {
6358 verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
6359 return -EACCES;
6360 }
6361 if (kptr_field->type != BPF_KPTR_REF) {
6362 verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
6363 return -EACCES;
6364 }
6365 meta->kptr_field = kptr_field;
6366 return 0;
6367}
6368
6369/* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
6370 * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
6371 *
6372 * In both cases we deal with the first 8 bytes, but need to mark the next 8
6373 * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
6374 * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
6375 *
6376 * Mutability of bpf_dynptr is at two levels, one is at the level of struct
6377 * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
6378 * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
6379 * mutate the view of the dynptr and also possibly destroy it. In the latter
6380 * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
6381 * memory that dynptr points to.
6382 *
6383 * The verifier will keep track both levels of mutation (bpf_dynptr's in
6384 * reg->type and the memory's in reg->dynptr.type), but there is no support for
6385 * readonly dynptr view yet, hence only the first case is tracked and checked.
6386 *
6387 * This is consistent with how C applies the const modifier to a struct object,
6388 * where the pointer itself inside bpf_dynptr becomes const but not what it
6389 * points to.
6390 *
6391 * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
6392 * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
6393 */
6394static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
6395 enum bpf_arg_type arg_type)
6396{
6397 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6398 int err;
6399
6400 /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
6401 * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
6402 */
6403 if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
6404 verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
6405 return -EFAULT;
6406 }
6407
6408 /* MEM_UNINIT - Points to memory that is an appropriate candidate for
6409 * constructing a mutable bpf_dynptr object.
6410 *
6411 * Currently, this is only possible with PTR_TO_STACK
6412 * pointing to a region of at least 16 bytes which doesn't
6413 * contain an existing bpf_dynptr.
6414 *
6415 * MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
6416 * mutated or destroyed. However, the memory it points to
6417 * may be mutated.
6418 *
6419 * None - Points to a initialized dynptr that can be mutated and
6420 * destroyed, including mutation of the memory it points
6421 * to.
6422 */
6423 if (arg_type & MEM_UNINIT) {
6424 int i;
6425
6426 if (!is_dynptr_reg_valid_uninit(env, reg)) {
6427 verbose(env, "Dynptr has to be an uninitialized dynptr\n");
6428 return -EINVAL;
6429 }
6430
6431 /* we write BPF_DW bits (8 bytes) at a time */
6432 for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
6433 err = check_mem_access(env, insn_idx, regno,
6434 i, BPF_DW, BPF_WRITE, -1, false);
6435 if (err)
6436 return err;
6437 }
6438
6439 err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx);
6440 } else /* MEM_RDONLY and None case from above */ {
6441 /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
6442 if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
6443 verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
6444 return -EINVAL;
6445 }
6446
6447 if (!is_dynptr_reg_valid_init(env, reg)) {
6448 verbose(env,
6449 "Expected an initialized dynptr as arg #%d\n",
6450 regno);
6451 return -EINVAL;
6452 }
6453
6454 /* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
6455 if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
6456 verbose(env,
6457 "Expected a dynptr of type %s as arg #%d\n",
6458 dynptr_type_str(arg_to_dynptr_type(arg_type)), regno);
6459 return -EINVAL;
6460 }
6461
6462 err = mark_dynptr_read(env, reg);
6463 }
6464 return err;
6465}
6466
6467static bool arg_type_is_mem_size(enum bpf_arg_type type)
6468{
6469 return type == ARG_CONST_SIZE ||
6470 type == ARG_CONST_SIZE_OR_ZERO;
6471}
6472
6473static bool arg_type_is_release(enum bpf_arg_type type)
6474{
6475 return type & OBJ_RELEASE;
6476}
6477
6478static bool arg_type_is_dynptr(enum bpf_arg_type type)
6479{
6480 return base_type(type) == ARG_PTR_TO_DYNPTR;
6481}
6482
6483static int int_ptr_type_to_size(enum bpf_arg_type type)
6484{
6485 if (type == ARG_PTR_TO_INT)
6486 return sizeof(u32);
6487 else if (type == ARG_PTR_TO_LONG)
6488 return sizeof(u64);
6489
6490 return -EINVAL;
6491}
6492
6493static int resolve_map_arg_type(struct bpf_verifier_env *env,
6494 const struct bpf_call_arg_meta *meta,
6495 enum bpf_arg_type *arg_type)
6496{
6497 if (!meta->map_ptr) {
6498 /* kernel subsystem misconfigured verifier */
6499 verbose(env, "invalid map_ptr to access map->type\n");
6500 return -EACCES;
6501 }
6502
6503 switch (meta->map_ptr->map_type) {
6504 case BPF_MAP_TYPE_SOCKMAP:
6505 case BPF_MAP_TYPE_SOCKHASH:
6506 if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
6507 *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
6508 } else {
6509 verbose(env, "invalid arg_type for sockmap/sockhash\n");
6510 return -EINVAL;
6511 }
6512 break;
6513 case BPF_MAP_TYPE_BLOOM_FILTER:
6514 if (meta->func_id == BPF_FUNC_map_peek_elem)
6515 *arg_type = ARG_PTR_TO_MAP_VALUE;
6516 break;
6517 default:
6518 break;
6519 }
6520 return 0;
6521}
6522
6523struct bpf_reg_types {
6524 const enum bpf_reg_type types[10];
6525 u32 *btf_id;
6526};
6527
6528static const struct bpf_reg_types sock_types = {
6529 .types = {
6530 PTR_TO_SOCK_COMMON,
6531 PTR_TO_SOCKET,
6532 PTR_TO_TCP_SOCK,
6533 PTR_TO_XDP_SOCK,
6534 },
6535};
6536
6537#ifdef CONFIG_NET
6538static const struct bpf_reg_types btf_id_sock_common_types = {
6539 .types = {
6540 PTR_TO_SOCK_COMMON,
6541 PTR_TO_SOCKET,
6542 PTR_TO_TCP_SOCK,
6543 PTR_TO_XDP_SOCK,
6544 PTR_TO_BTF_ID,
6545 PTR_TO_BTF_ID | PTR_TRUSTED,
6546 },
6547 .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
6548};
6549#endif
6550
6551static const struct bpf_reg_types mem_types = {
6552 .types = {
6553 PTR_TO_STACK,
6554 PTR_TO_PACKET,
6555 PTR_TO_PACKET_META,
6556 PTR_TO_MAP_KEY,
6557 PTR_TO_MAP_VALUE,
6558 PTR_TO_MEM,
6559 PTR_TO_MEM | MEM_RINGBUF,
6560 PTR_TO_BUF,
6561 },
6562};
6563
6564static const struct bpf_reg_types int_ptr_types = {
6565 .types = {
6566 PTR_TO_STACK,
6567 PTR_TO_PACKET,
6568 PTR_TO_PACKET_META,
6569 PTR_TO_MAP_KEY,
6570 PTR_TO_MAP_VALUE,
6571 },
6572};
6573
6574static const struct bpf_reg_types spin_lock_types = {
6575 .types = {
6576 PTR_TO_MAP_VALUE,
6577 PTR_TO_BTF_ID | MEM_ALLOC,
6578 }
6579};
6580
6581static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
6582static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
6583static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
6584static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
6585static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
6586static const struct bpf_reg_types btf_ptr_types = {
6587 .types = {
6588 PTR_TO_BTF_ID,
6589 PTR_TO_BTF_ID | PTR_TRUSTED,
6590 PTR_TO_BTF_ID | MEM_RCU,
6591 },
6592};
6593static const struct bpf_reg_types percpu_btf_ptr_types = {
6594 .types = {
6595 PTR_TO_BTF_ID | MEM_PERCPU,
6596 PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
6597 }
6598};
6599static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
6600static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
6601static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
6602static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
6603static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
6604static const struct bpf_reg_types dynptr_types = {
6605 .types = {
6606 PTR_TO_STACK,
6607 CONST_PTR_TO_DYNPTR,
6608 }
6609};
6610
6611static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
6612 [ARG_PTR_TO_MAP_KEY] = &mem_types,
6613 [ARG_PTR_TO_MAP_VALUE] = &mem_types,
6614 [ARG_CONST_SIZE] = &scalar_types,
6615 [ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
6616 [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
6617 [ARG_CONST_MAP_PTR] = &const_map_ptr_types,
6618 [ARG_PTR_TO_CTX] = &context_types,
6619 [ARG_PTR_TO_SOCK_COMMON] = &sock_types,
6620#ifdef CONFIG_NET
6621 [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
6622#endif
6623 [ARG_PTR_TO_SOCKET] = &fullsock_types,
6624 [ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
6625 [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
6626 [ARG_PTR_TO_MEM] = &mem_types,
6627 [ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types,
6628 [ARG_PTR_TO_INT] = &int_ptr_types,
6629 [ARG_PTR_TO_LONG] = &int_ptr_types,
6630 [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
6631 [ARG_PTR_TO_FUNC] = &func_ptr_types,
6632 [ARG_PTR_TO_STACK] = &stack_ptr_types,
6633 [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
6634 [ARG_PTR_TO_TIMER] = &timer_types,
6635 [ARG_PTR_TO_KPTR] = &kptr_types,
6636 [ARG_PTR_TO_DYNPTR] = &dynptr_types,
6637};
6638
6639static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
6640 enum bpf_arg_type arg_type,
6641 const u32 *arg_btf_id,
6642 struct bpf_call_arg_meta *meta)
6643{
6644 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6645 enum bpf_reg_type expected, type = reg->type;
6646 const struct bpf_reg_types *compatible;
6647 int i, j;
6648
6649 compatible = compatible_reg_types[base_type(arg_type)];
6650 if (!compatible) {
6651 verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
6652 return -EFAULT;
6653 }
6654
6655 /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
6656 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
6657 *
6658 * Same for MAYBE_NULL:
6659 *
6660 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
6661 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
6662 *
6663 * Therefore we fold these flags depending on the arg_type before comparison.
6664 */
6665 if (arg_type & MEM_RDONLY)
6666 type &= ~MEM_RDONLY;
6667 if (arg_type & PTR_MAYBE_NULL)
6668 type &= ~PTR_MAYBE_NULL;
6669
6670 for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
6671 expected = compatible->types[i];
6672 if (expected == NOT_INIT)
6673 break;
6674
6675 if (type == expected)
6676 goto found;
6677 }
6678
6679 verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
6680 for (j = 0; j + 1 < i; j++)
6681 verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
6682 verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
6683 return -EACCES;
6684
6685found:
6686 if (base_type(reg->type) != PTR_TO_BTF_ID)
6687 return 0;
6688
6689 switch ((int)reg->type) {
6690 case PTR_TO_BTF_ID:
6691 case PTR_TO_BTF_ID | PTR_TRUSTED:
6692 case PTR_TO_BTF_ID | MEM_RCU:
6693 {
6694 /* For bpf_sk_release, it needs to match against first member
6695 * 'struct sock_common', hence make an exception for it. This
6696 * allows bpf_sk_release to work for multiple socket types.
6697 */
6698 bool strict_type_match = arg_type_is_release(arg_type) &&
6699 meta->func_id != BPF_FUNC_sk_release;
6700
6701 if (!arg_btf_id) {
6702 if (!compatible->btf_id) {
6703 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
6704 return -EFAULT;
6705 }
6706 arg_btf_id = compatible->btf_id;
6707 }
6708
6709 if (meta->func_id == BPF_FUNC_kptr_xchg) {
6710 if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
6711 return -EACCES;
6712 } else {
6713 if (arg_btf_id == BPF_PTR_POISON) {
6714 verbose(env, "verifier internal error:");
6715 verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n",
6716 regno);
6717 return -EACCES;
6718 }
6719
6720 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
6721 btf_vmlinux, *arg_btf_id,
6722 strict_type_match)) {
6723 verbose(env, "R%d is of type %s but %s is expected\n",
6724 regno, kernel_type_name(reg->btf, reg->btf_id),
6725 kernel_type_name(btf_vmlinux, *arg_btf_id));
6726 return -EACCES;
6727 }
6728 }
6729 break;
6730 }
6731 case PTR_TO_BTF_ID | MEM_ALLOC:
6732 if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock) {
6733 verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
6734 return -EFAULT;
6735 }
6736 /* Handled by helper specific checks */
6737 break;
6738 case PTR_TO_BTF_ID | MEM_PERCPU:
6739 case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
6740 /* Handled by helper specific checks */
6741 break;
6742 default:
6743 verbose(env, "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
6744 return -EFAULT;
6745 }
6746 return 0;
6747}
6748
6749static struct btf_field *
6750reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
6751{
6752 struct btf_field *field;
6753 struct btf_record *rec;
6754
6755 rec = reg_btf_record(reg);
6756 if (!rec)
6757 return NULL;
6758
6759 field = btf_record_find(rec, off, fields);
6760 if (!field)
6761 return NULL;
6762
6763 return field;
6764}
6765
6766int check_func_arg_reg_off(struct bpf_verifier_env *env,
6767 const struct bpf_reg_state *reg, int regno,
6768 enum bpf_arg_type arg_type)
6769{
6770 u32 type = reg->type;
6771
6772 /* When referenced register is passed to release function, its fixed
6773 * offset must be 0.
6774 *
6775 * We will check arg_type_is_release reg has ref_obj_id when storing
6776 * meta->release_regno.
6777 */
6778 if (arg_type_is_release(arg_type)) {
6779 /* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
6780 * may not directly point to the object being released, but to
6781 * dynptr pointing to such object, which might be at some offset
6782 * on the stack. In that case, we simply to fallback to the
6783 * default handling.
6784 */
6785 if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
6786 return 0;
6787
6788 if ((type_is_ptr_alloc_obj(type) || type_is_non_owning_ref(type)) && reg->off) {
6789 if (reg_find_field_offset(reg, reg->off, BPF_GRAPH_NODE_OR_ROOT))
6790 return __check_ptr_off_reg(env, reg, regno, true);
6791
6792 verbose(env, "R%d must have zero offset when passed to release func\n",
6793 regno);
6794 verbose(env, "No graph node or root found at R%d type:%s off:%d\n", regno,
6795 kernel_type_name(reg->btf, reg->btf_id), reg->off);
6796 return -EINVAL;
6797 }
6798
6799 /* Doing check_ptr_off_reg check for the offset will catch this
6800 * because fixed_off_ok is false, but checking here allows us
6801 * to give the user a better error message.
6802 */
6803 if (reg->off) {
6804 verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
6805 regno);
6806 return -EINVAL;
6807 }
6808 return __check_ptr_off_reg(env, reg, regno, false);
6809 }
6810
6811 switch (type) {
6812 /* Pointer types where both fixed and variable offset is explicitly allowed: */
6813 case PTR_TO_STACK:
6814 case PTR_TO_PACKET:
6815 case PTR_TO_PACKET_META:
6816 case PTR_TO_MAP_KEY:
6817 case PTR_TO_MAP_VALUE:
6818 case PTR_TO_MEM:
6819 case PTR_TO_MEM | MEM_RDONLY:
6820 case PTR_TO_MEM | MEM_RINGBUF:
6821 case PTR_TO_BUF:
6822 case PTR_TO_BUF | MEM_RDONLY:
6823 case SCALAR_VALUE:
6824 return 0;
6825 /* All the rest must be rejected, except PTR_TO_BTF_ID which allows
6826 * fixed offset.
6827 */
6828 case PTR_TO_BTF_ID:
6829 case PTR_TO_BTF_ID | MEM_ALLOC:
6830 case PTR_TO_BTF_ID | PTR_TRUSTED:
6831 case PTR_TO_BTF_ID | MEM_RCU:
6832 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
6833 /* When referenced PTR_TO_BTF_ID is passed to release function,
6834 * its fixed offset must be 0. In the other cases, fixed offset
6835 * can be non-zero. This was already checked above. So pass
6836 * fixed_off_ok as true to allow fixed offset for all other
6837 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
6838 * still need to do checks instead of returning.
6839 */
6840 return __check_ptr_off_reg(env, reg, regno, true);
6841 default:
6842 return __check_ptr_off_reg(env, reg, regno, false);
6843 }
6844}
6845
6846static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
6847 const struct bpf_func_proto *fn,
6848 struct bpf_reg_state *regs)
6849{
6850 struct bpf_reg_state *state = NULL;
6851 int i;
6852
6853 for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
6854 if (arg_type_is_dynptr(fn->arg_type[i])) {
6855 if (state) {
6856 verbose(env, "verifier internal error: multiple dynptr args\n");
6857 return NULL;
6858 }
6859 state = &regs[BPF_REG_1 + i];
6860 }
6861
6862 if (!state)
6863 verbose(env, "verifier internal error: no dynptr arg found\n");
6864
6865 return state;
6866}
6867
6868static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
6869{
6870 struct bpf_func_state *state = func(env, reg);
6871 int spi;
6872
6873 if (reg->type == CONST_PTR_TO_DYNPTR)
6874 return reg->id;
6875 spi = dynptr_get_spi(env, reg);
6876 if (spi < 0)
6877 return spi;
6878 return state->stack[spi].spilled_ptr.id;
6879}
6880
6881static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
6882{
6883 struct bpf_func_state *state = func(env, reg);
6884 int spi;
6885
6886 if (reg->type == CONST_PTR_TO_DYNPTR)
6887 return reg->ref_obj_id;
6888 spi = dynptr_get_spi(env, reg);
6889 if (spi < 0)
6890 return spi;
6891 return state->stack[spi].spilled_ptr.ref_obj_id;
6892}
6893
6894static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
6895 struct bpf_reg_state *reg)
6896{
6897 struct bpf_func_state *state = func(env, reg);
6898 int spi;
6899
6900 if (reg->type == CONST_PTR_TO_DYNPTR)
6901 return reg->dynptr.type;
6902
6903 spi = __get_spi(reg->off);
6904 if (spi < 0) {
6905 verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
6906 return BPF_DYNPTR_TYPE_INVALID;
6907 }
6908
6909 return state->stack[spi].spilled_ptr.dynptr.type;
6910}
6911
6912static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
6913 struct bpf_call_arg_meta *meta,
6914 const struct bpf_func_proto *fn,
6915 int insn_idx)
6916{
6917 u32 regno = BPF_REG_1 + arg;
6918 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6919 enum bpf_arg_type arg_type = fn->arg_type[arg];
6920 enum bpf_reg_type type = reg->type;
6921 u32 *arg_btf_id = NULL;
6922 int err = 0;
6923
6924 if (arg_type == ARG_DONTCARE)
6925 return 0;
6926
6927 err = check_reg_arg(env, regno, SRC_OP);
6928 if (err)
6929 return err;
6930
6931 if (arg_type == ARG_ANYTHING) {
6932 if (is_pointer_value(env, regno)) {
6933 verbose(env, "R%d leaks addr into helper function\n",
6934 regno);
6935 return -EACCES;
6936 }
6937 return 0;
6938 }
6939
6940 if (type_is_pkt_pointer(type) &&
6941 !may_access_direct_pkt_data(env, meta, BPF_READ)) {
6942 verbose(env, "helper access to the packet is not allowed\n");
6943 return -EACCES;
6944 }
6945
6946 if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
6947 err = resolve_map_arg_type(env, meta, &arg_type);
6948 if (err)
6949 return err;
6950 }
6951
6952 if (register_is_null(reg) && type_may_be_null(arg_type))
6953 /* A NULL register has a SCALAR_VALUE type, so skip
6954 * type checking.
6955 */
6956 goto skip_type_check;
6957
6958 /* arg_btf_id and arg_size are in a union. */
6959 if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
6960 base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
6961 arg_btf_id = fn->arg_btf_id[arg];
6962
6963 err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
6964 if (err)
6965 return err;
6966
6967 err = check_func_arg_reg_off(env, reg, regno, arg_type);
6968 if (err)
6969 return err;
6970
6971skip_type_check:
6972 if (arg_type_is_release(arg_type)) {
6973 if (arg_type_is_dynptr(arg_type)) {
6974 struct bpf_func_state *state = func(env, reg);
6975 int spi;
6976
6977 /* Only dynptr created on stack can be released, thus
6978 * the get_spi and stack state checks for spilled_ptr
6979 * should only be done before process_dynptr_func for
6980 * PTR_TO_STACK.
6981 */
6982 if (reg->type == PTR_TO_STACK) {
6983 spi = dynptr_get_spi(env, reg);
6984 if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) {
6985 verbose(env, "arg %d is an unacquired reference\n", regno);
6986 return -EINVAL;
6987 }
6988 } else {
6989 verbose(env, "cannot release unowned const bpf_dynptr\n");
6990 return -EINVAL;
6991 }
6992 } else if (!reg->ref_obj_id && !register_is_null(reg)) {
6993 verbose(env, "R%d must be referenced when passed to release function\n",
6994 regno);
6995 return -EINVAL;
6996 }
6997 if (meta->release_regno) {
6998 verbose(env, "verifier internal error: more than one release argument\n");
6999 return -EFAULT;
7000 }
7001 meta->release_regno = regno;
7002 }
7003
7004 if (reg->ref_obj_id) {
7005 if (meta->ref_obj_id) {
7006 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
7007 regno, reg->ref_obj_id,
7008 meta->ref_obj_id);
7009 return -EFAULT;
7010 }
7011 meta->ref_obj_id = reg->ref_obj_id;
7012 }
7013
7014 switch (base_type(arg_type)) {
7015 case ARG_CONST_MAP_PTR:
7016 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
7017 if (meta->map_ptr) {
7018 /* Use map_uid (which is unique id of inner map) to reject:
7019 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
7020 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
7021 * if (inner_map1 && inner_map2) {
7022 * timer = bpf_map_lookup_elem(inner_map1);
7023 * if (timer)
7024 * // mismatch would have been allowed
7025 * bpf_timer_init(timer, inner_map2);
7026 * }
7027 *
7028 * Comparing map_ptr is enough to distinguish normal and outer maps.
7029 */
7030 if (meta->map_ptr != reg->map_ptr ||
7031 meta->map_uid != reg->map_uid) {
7032 verbose(env,
7033 "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
7034 meta->map_uid, reg->map_uid);
7035 return -EINVAL;
7036 }
7037 }
7038 meta->map_ptr = reg->map_ptr;
7039 meta->map_uid = reg->map_uid;
7040 break;
7041 case ARG_PTR_TO_MAP_KEY:
7042 /* bpf_map_xxx(..., map_ptr, ..., key) call:
7043 * check that [key, key + map->key_size) are within
7044 * stack limits and initialized
7045 */
7046 if (!meta->map_ptr) {
7047 /* in function declaration map_ptr must come before
7048 * map_key, so that it's verified and known before
7049 * we have to check map_key here. Otherwise it means
7050 * that kernel subsystem misconfigured verifier
7051 */
7052 verbose(env, "invalid map_ptr to access map->key\n");
7053 return -EACCES;
7054 }
7055 err = check_helper_mem_access(env, regno,
7056 meta->map_ptr->key_size, false,
7057 NULL);
7058 break;
7059 case ARG_PTR_TO_MAP_VALUE:
7060 if (type_may_be_null(arg_type) && register_is_null(reg))
7061 return 0;
7062
7063 /* bpf_map_xxx(..., map_ptr, ..., value) call:
7064 * check [value, value + map->value_size) validity
7065 */
7066 if (!meta->map_ptr) {
7067 /* kernel subsystem misconfigured verifier */
7068 verbose(env, "invalid map_ptr to access map->value\n");
7069 return -EACCES;
7070 }
7071 meta->raw_mode = arg_type & MEM_UNINIT;
7072 err = check_helper_mem_access(env, regno,
7073 meta->map_ptr->value_size, false,
7074 meta);
7075 break;
7076 case ARG_PTR_TO_PERCPU_BTF_ID:
7077 if (!reg->btf_id) {
7078 verbose(env, "Helper has invalid btf_id in R%d\n", regno);
7079 return -EACCES;
7080 }
7081 meta->ret_btf = reg->btf;
7082 meta->ret_btf_id = reg->btf_id;
7083 break;
7084 case ARG_PTR_TO_SPIN_LOCK:
7085 if (in_rbtree_lock_required_cb(env)) {
7086 verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
7087 return -EACCES;
7088 }
7089 if (meta->func_id == BPF_FUNC_spin_lock) {
7090 err = process_spin_lock(env, regno, true);
7091 if (err)
7092 return err;
7093 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
7094 err = process_spin_lock(env, regno, false);
7095 if (err)
7096 return err;
7097 } else {
7098 verbose(env, "verifier internal error\n");
7099 return -EFAULT;
7100 }
7101 break;
7102 case ARG_PTR_TO_TIMER:
7103 err = process_timer_func(env, regno, meta);
7104 if (err)
7105 return err;
7106 break;
7107 case ARG_PTR_TO_FUNC:
7108 meta->subprogno = reg->subprogno;
7109 break;
7110 case ARG_PTR_TO_MEM:
7111 /* The access to this pointer is only checked when we hit the
7112 * next is_mem_size argument below.
7113 */
7114 meta->raw_mode = arg_type & MEM_UNINIT;
7115 if (arg_type & MEM_FIXED_SIZE) {
7116 err = check_helper_mem_access(env, regno,
7117 fn->arg_size[arg], false,
7118 meta);
7119 }
7120 break;
7121 case ARG_CONST_SIZE:
7122 err = check_mem_size_reg(env, reg, regno, false, meta);
7123 break;
7124 case ARG_CONST_SIZE_OR_ZERO:
7125 err = check_mem_size_reg(env, reg, regno, true, meta);
7126 break;
7127 case ARG_PTR_TO_DYNPTR:
7128 err = process_dynptr_func(env, regno, insn_idx, arg_type);
7129 if (err)
7130 return err;
7131 break;
7132 case ARG_CONST_ALLOC_SIZE_OR_ZERO:
7133 if (!tnum_is_const(reg->var_off)) {
7134 verbose(env, "R%d is not a known constant'\n",
7135 regno);
7136 return -EACCES;
7137 }
7138 meta->mem_size = reg->var_off.value;
7139 err = mark_chain_precision(env, regno);
7140 if (err)
7141 return err;
7142 break;
7143 case ARG_PTR_TO_INT:
7144 case ARG_PTR_TO_LONG:
7145 {
7146 int size = int_ptr_type_to_size(arg_type);
7147
7148 err = check_helper_mem_access(env, regno, size, false, meta);
7149 if (err)
7150 return err;
7151 err = check_ptr_alignment(env, reg, 0, size, true);
7152 break;
7153 }
7154 case ARG_PTR_TO_CONST_STR:
7155 {
7156 struct bpf_map *map = reg->map_ptr;
7157 int map_off;
7158 u64 map_addr;
7159 char *str_ptr;
7160
7161 if (!bpf_map_is_rdonly(map)) {
7162 verbose(env, "R%d does not point to a readonly map'\n", regno);
7163 return -EACCES;
7164 }
7165
7166 if (!tnum_is_const(reg->var_off)) {
7167 verbose(env, "R%d is not a constant address'\n", regno);
7168 return -EACCES;
7169 }
7170
7171 if (!map->ops->map_direct_value_addr) {
7172 verbose(env, "no direct value access support for this map type\n");
7173 return -EACCES;
7174 }
7175
7176 err = check_map_access(env, regno, reg->off,
7177 map->value_size - reg->off, false,
7178 ACCESS_HELPER);
7179 if (err)
7180 return err;
7181
7182 map_off = reg->off + reg->var_off.value;
7183 err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
7184 if (err) {
7185 verbose(env, "direct value access on string failed\n");
7186 return err;
7187 }
7188
7189 str_ptr = (char *)(long)(map_addr);
7190 if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
7191 verbose(env, "string is not zero-terminated\n");
7192 return -EINVAL;
7193 }
7194 break;
7195 }
7196 case ARG_PTR_TO_KPTR:
7197 err = process_kptr_func(env, regno, meta);
7198 if (err)
7199 return err;
7200 break;
7201 }
7202
7203 return err;
7204}
7205
7206static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
7207{
7208 enum bpf_attach_type eatype = env->prog->expected_attach_type;
7209 enum bpf_prog_type type = resolve_prog_type(env->prog);
7210
7211 if (func_id != BPF_FUNC_map_update_elem)
7212 return false;
7213
7214 /* It's not possible to get access to a locked struct sock in these
7215 * contexts, so updating is safe.
7216 */
7217 switch (type) {
7218 case BPF_PROG_TYPE_TRACING:
7219 if (eatype == BPF_TRACE_ITER)
7220 return true;
7221 break;
7222 case BPF_PROG_TYPE_SOCKET_FILTER:
7223 case BPF_PROG_TYPE_SCHED_CLS:
7224 case BPF_PROG_TYPE_SCHED_ACT:
7225 case BPF_PROG_TYPE_XDP:
7226 case BPF_PROG_TYPE_SK_REUSEPORT:
7227 case BPF_PROG_TYPE_FLOW_DISSECTOR:
7228 case BPF_PROG_TYPE_SK_LOOKUP:
7229 return true;
7230 default:
7231 break;
7232 }
7233
7234 verbose(env, "cannot update sockmap in this context\n");
7235 return false;
7236}
7237
7238static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
7239{
7240 return env->prog->jit_requested &&
7241 bpf_jit_supports_subprog_tailcalls();
7242}
7243
7244static int check_map_func_compatibility(struct bpf_verifier_env *env,
7245 struct bpf_map *map, int func_id)
7246{
7247 if (!map)
7248 return 0;
7249
7250 /* We need a two way check, first is from map perspective ... */
7251 switch (map->map_type) {
7252 case BPF_MAP_TYPE_PROG_ARRAY:
7253 if (func_id != BPF_FUNC_tail_call)
7254 goto error;
7255 break;
7256 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
7257 if (func_id != BPF_FUNC_perf_event_read &&
7258 func_id != BPF_FUNC_perf_event_output &&
7259 func_id != BPF_FUNC_skb_output &&
7260 func_id != BPF_FUNC_perf_event_read_value &&
7261 func_id != BPF_FUNC_xdp_output)
7262 goto error;
7263 break;
7264 case BPF_MAP_TYPE_RINGBUF:
7265 if (func_id != BPF_FUNC_ringbuf_output &&
7266 func_id != BPF_FUNC_ringbuf_reserve &&
7267 func_id != BPF_FUNC_ringbuf_query &&
7268 func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
7269 func_id != BPF_FUNC_ringbuf_submit_dynptr &&
7270 func_id != BPF_FUNC_ringbuf_discard_dynptr)
7271 goto error;
7272 break;
7273 case BPF_MAP_TYPE_USER_RINGBUF:
7274 if (func_id != BPF_FUNC_user_ringbuf_drain)
7275 goto error;
7276 break;
7277 case BPF_MAP_TYPE_STACK_TRACE:
7278 if (func_id != BPF_FUNC_get_stackid)
7279 goto error;
7280 break;
7281 case BPF_MAP_TYPE_CGROUP_ARRAY:
7282 if (func_id != BPF_FUNC_skb_under_cgroup &&
7283 func_id != BPF_FUNC_current_task_under_cgroup)
7284 goto error;
7285 break;
7286 case BPF_MAP_TYPE_CGROUP_STORAGE:
7287 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
7288 if (func_id != BPF_FUNC_get_local_storage)
7289 goto error;
7290 break;
7291 case BPF_MAP_TYPE_DEVMAP:
7292 case BPF_MAP_TYPE_DEVMAP_HASH:
7293 if (func_id != BPF_FUNC_redirect_map &&
7294 func_id != BPF_FUNC_map_lookup_elem)
7295 goto error;
7296 break;
7297 /* Restrict bpf side of cpumap and xskmap, open when use-cases
7298 * appear.
7299 */
7300 case BPF_MAP_TYPE_CPUMAP:
7301 if (func_id != BPF_FUNC_redirect_map)
7302 goto error;
7303 break;
7304 case BPF_MAP_TYPE_XSKMAP:
7305 if (func_id != BPF_FUNC_redirect_map &&
7306 func_id != BPF_FUNC_map_lookup_elem)
7307 goto error;
7308 break;
7309 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
7310 case BPF_MAP_TYPE_HASH_OF_MAPS:
7311 if (func_id != BPF_FUNC_map_lookup_elem)
7312 goto error;
7313 break;
7314 case BPF_MAP_TYPE_SOCKMAP:
7315 if (func_id != BPF_FUNC_sk_redirect_map &&
7316 func_id != BPF_FUNC_sock_map_update &&
7317 func_id != BPF_FUNC_map_delete_elem &&
7318 func_id != BPF_FUNC_msg_redirect_map &&
7319 func_id != BPF_FUNC_sk_select_reuseport &&
7320 func_id != BPF_FUNC_map_lookup_elem &&
7321 !may_update_sockmap(env, func_id))
7322 goto error;
7323 break;
7324 case BPF_MAP_TYPE_SOCKHASH:
7325 if (func_id != BPF_FUNC_sk_redirect_hash &&
7326 func_id != BPF_FUNC_sock_hash_update &&
7327 func_id != BPF_FUNC_map_delete_elem &&
7328 func_id != BPF_FUNC_msg_redirect_hash &&
7329 func_id != BPF_FUNC_sk_select_reuseport &&
7330 func_id != BPF_FUNC_map_lookup_elem &&
7331 !may_update_sockmap(env, func_id))
7332 goto error;
7333 break;
7334 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
7335 if (func_id != BPF_FUNC_sk_select_reuseport)
7336 goto error;
7337 break;
7338 case BPF_MAP_TYPE_QUEUE:
7339 case BPF_MAP_TYPE_STACK:
7340 if (func_id != BPF_FUNC_map_peek_elem &&
7341 func_id != BPF_FUNC_map_pop_elem &&
7342 func_id != BPF_FUNC_map_push_elem)
7343 goto error;
7344 break;
7345 case BPF_MAP_TYPE_SK_STORAGE:
7346 if (func_id != BPF_FUNC_sk_storage_get &&
7347 func_id != BPF_FUNC_sk_storage_delete &&
7348 func_id != BPF_FUNC_kptr_xchg)
7349 goto error;
7350 break;
7351 case BPF_MAP_TYPE_INODE_STORAGE:
7352 if (func_id != BPF_FUNC_inode_storage_get &&
7353 func_id != BPF_FUNC_inode_storage_delete &&
7354 func_id != BPF_FUNC_kptr_xchg)
7355 goto error;
7356 break;
7357 case BPF_MAP_TYPE_TASK_STORAGE:
7358 if (func_id != BPF_FUNC_task_storage_get &&
7359 func_id != BPF_FUNC_task_storage_delete &&
7360 func_id != BPF_FUNC_kptr_xchg)
7361 goto error;
7362 break;
7363 case BPF_MAP_TYPE_CGRP_STORAGE:
7364 if (func_id != BPF_FUNC_cgrp_storage_get &&
7365 func_id != BPF_FUNC_cgrp_storage_delete &&
7366 func_id != BPF_FUNC_kptr_xchg)
7367 goto error;
7368 break;
7369 case BPF_MAP_TYPE_BLOOM_FILTER:
7370 if (func_id != BPF_FUNC_map_peek_elem &&
7371 func_id != BPF_FUNC_map_push_elem)
7372 goto error;
7373 break;
7374 default:
7375 break;
7376 }
7377
7378 /* ... and second from the function itself. */
7379 switch (func_id) {
7380 case BPF_FUNC_tail_call:
7381 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
7382 goto error;
7383 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
7384 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
7385 return -EINVAL;
7386 }
7387 break;
7388 case BPF_FUNC_perf_event_read:
7389 case BPF_FUNC_perf_event_output:
7390 case BPF_FUNC_perf_event_read_value:
7391 case BPF_FUNC_skb_output:
7392 case BPF_FUNC_xdp_output:
7393 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
7394 goto error;
7395 break;
7396 case BPF_FUNC_ringbuf_output:
7397 case BPF_FUNC_ringbuf_reserve:
7398 case BPF_FUNC_ringbuf_query:
7399 case BPF_FUNC_ringbuf_reserve_dynptr:
7400 case BPF_FUNC_ringbuf_submit_dynptr:
7401 case BPF_FUNC_ringbuf_discard_dynptr:
7402 if (map->map_type != BPF_MAP_TYPE_RINGBUF)
7403 goto error;
7404 break;
7405 case BPF_FUNC_user_ringbuf_drain:
7406 if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
7407 goto error;
7408 break;
7409 case BPF_FUNC_get_stackid:
7410 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
7411 goto error;
7412 break;
7413 case BPF_FUNC_current_task_under_cgroup:
7414 case BPF_FUNC_skb_under_cgroup:
7415 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
7416 goto error;
7417 break;
7418 case BPF_FUNC_redirect_map:
7419 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
7420 map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
7421 map->map_type != BPF_MAP_TYPE_CPUMAP &&
7422 map->map_type != BPF_MAP_TYPE_XSKMAP)
7423 goto error;
7424 break;
7425 case BPF_FUNC_sk_redirect_map:
7426 case BPF_FUNC_msg_redirect_map:
7427 case BPF_FUNC_sock_map_update:
7428 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
7429 goto error;
7430 break;
7431 case BPF_FUNC_sk_redirect_hash:
7432 case BPF_FUNC_msg_redirect_hash:
7433 case BPF_FUNC_sock_hash_update:
7434 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
7435 goto error;
7436 break;
7437 case BPF_FUNC_get_local_storage:
7438 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
7439 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
7440 goto error;
7441 break;
7442 case BPF_FUNC_sk_select_reuseport:
7443 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
7444 map->map_type != BPF_MAP_TYPE_SOCKMAP &&
7445 map->map_type != BPF_MAP_TYPE_SOCKHASH)
7446 goto error;
7447 break;
7448 case BPF_FUNC_map_pop_elem:
7449 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
7450 map->map_type != BPF_MAP_TYPE_STACK)
7451 goto error;
7452 break;
7453 case BPF_FUNC_map_peek_elem:
7454 case BPF_FUNC_map_push_elem:
7455 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
7456 map->map_type != BPF_MAP_TYPE_STACK &&
7457 map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
7458 goto error;
7459 break;
7460 case BPF_FUNC_map_lookup_percpu_elem:
7461 if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
7462 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
7463 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
7464 goto error;
7465 break;
7466 case BPF_FUNC_sk_storage_get:
7467 case BPF_FUNC_sk_storage_delete:
7468 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
7469 goto error;
7470 break;
7471 case BPF_FUNC_inode_storage_get:
7472 case BPF_FUNC_inode_storage_delete:
7473 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
7474 goto error;
7475 break;
7476 case BPF_FUNC_task_storage_get:
7477 case BPF_FUNC_task_storage_delete:
7478 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
7479 goto error;
7480 break;
7481 case BPF_FUNC_cgrp_storage_get:
7482 case BPF_FUNC_cgrp_storage_delete:
7483 if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
7484 goto error;
7485 break;
7486 default:
7487 break;
7488 }
7489
7490 return 0;
7491error:
7492 verbose(env, "cannot pass map_type %d into func %s#%d\n",
7493 map->map_type, func_id_name(func_id), func_id);
7494 return -EINVAL;
7495}
7496
7497static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
7498{
7499 int count = 0;
7500
7501 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
7502 count++;
7503 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
7504 count++;
7505 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
7506 count++;
7507 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
7508 count++;
7509 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
7510 count++;
7511
7512 /* We only support one arg being in raw mode at the moment,
7513 * which is sufficient for the helper functions we have
7514 * right now.
7515 */
7516 return count <= 1;
7517}
7518
7519static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
7520{
7521 bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
7522 bool has_size = fn->arg_size[arg] != 0;
7523 bool is_next_size = false;
7524
7525 if (arg + 1 < ARRAY_SIZE(fn->arg_type))
7526 is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
7527
7528 if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
7529 return is_next_size;
7530
7531 return has_size == is_next_size || is_next_size == is_fixed;
7532}
7533
7534static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
7535{
7536 /* bpf_xxx(..., buf, len) call will access 'len'
7537 * bytes from memory 'buf'. Both arg types need
7538 * to be paired, so make sure there's no buggy
7539 * helper function specification.
7540 */
7541 if (arg_type_is_mem_size(fn->arg1_type) ||
7542 check_args_pair_invalid(fn, 0) ||
7543 check_args_pair_invalid(fn, 1) ||
7544 check_args_pair_invalid(fn, 2) ||
7545 check_args_pair_invalid(fn, 3) ||
7546 check_args_pair_invalid(fn, 4))
7547 return false;
7548
7549 return true;
7550}
7551
7552static bool check_btf_id_ok(const struct bpf_func_proto *fn)
7553{
7554 int i;
7555
7556 for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
7557 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
7558 return !!fn->arg_btf_id[i];
7559 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
7560 return fn->arg_btf_id[i] == BPF_PTR_POISON;
7561 if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
7562 /* arg_btf_id and arg_size are in a union. */
7563 (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
7564 !(fn->arg_type[i] & MEM_FIXED_SIZE)))
7565 return false;
7566 }
7567
7568 return true;
7569}
7570
7571static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
7572{
7573 return check_raw_mode_ok(fn) &&
7574 check_arg_pair_ok(fn) &&
7575 check_btf_id_ok(fn) ? 0 : -EINVAL;
7576}
7577
7578/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
7579 * are now invalid, so turn them into unknown SCALAR_VALUE.
7580 *
7581 * This also applies to dynptr slices belonging to skb and xdp dynptrs,
7582 * since these slices point to packet data.
7583 */
7584static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
7585{
7586 struct bpf_func_state *state;
7587 struct bpf_reg_state *reg;
7588
7589 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
7590 if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
7591 mark_reg_invalid(env, reg);
7592 }));
7593}
7594
7595enum {
7596 AT_PKT_END = -1,
7597 BEYOND_PKT_END = -2,
7598};
7599
7600static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
7601{
7602 struct bpf_func_state *state = vstate->frame[vstate->curframe];
7603 struct bpf_reg_state *reg = &state->regs[regn];
7604
7605 if (reg->type != PTR_TO_PACKET)
7606 /* PTR_TO_PACKET_META is not supported yet */
7607 return;
7608
7609 /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
7610 * How far beyond pkt_end it goes is unknown.
7611 * if (!range_open) it's the case of pkt >= pkt_end
7612 * if (range_open) it's the case of pkt > pkt_end
7613 * hence this pointer is at least 1 byte bigger than pkt_end
7614 */
7615 if (range_open)
7616 reg->range = BEYOND_PKT_END;
7617 else
7618 reg->range = AT_PKT_END;
7619}
7620
7621/* The pointer with the specified id has released its reference to kernel
7622 * resources. Identify all copies of the same pointer and clear the reference.
7623 */
7624static int release_reference(struct bpf_verifier_env *env,
7625 int ref_obj_id)
7626{
7627 struct bpf_func_state *state;
7628 struct bpf_reg_state *reg;
7629 int err;
7630
7631 err = release_reference_state(cur_func(env), ref_obj_id);
7632 if (err)
7633 return err;
7634
7635 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
7636 if (reg->ref_obj_id == ref_obj_id)
7637 mark_reg_invalid(env, reg);
7638 }));
7639
7640 return 0;
7641}
7642
7643static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
7644{
7645 struct bpf_func_state *unused;
7646 struct bpf_reg_state *reg;
7647
7648 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
7649 if (type_is_non_owning_ref(reg->type))
7650 mark_reg_invalid(env, reg);
7651 }));
7652}
7653
7654static void clear_caller_saved_regs(struct bpf_verifier_env *env,
7655 struct bpf_reg_state *regs)
7656{
7657 int i;
7658
7659 /* after the call registers r0 - r5 were scratched */
7660 for (i = 0; i < CALLER_SAVED_REGS; i++) {
7661 mark_reg_not_init(env, regs, caller_saved[i]);
7662 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
7663 }
7664}
7665
7666typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
7667 struct bpf_func_state *caller,
7668 struct bpf_func_state *callee,
7669 int insn_idx);
7670
7671static int set_callee_state(struct bpf_verifier_env *env,
7672 struct bpf_func_state *caller,
7673 struct bpf_func_state *callee, int insn_idx);
7674
7675static bool is_callback_calling_kfunc(u32 btf_id);
7676
7677static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
7678 int *insn_idx, int subprog,
7679 set_callee_state_fn set_callee_state_cb)
7680{
7681 struct bpf_verifier_state *state = env->cur_state;
7682 struct bpf_func_info_aux *func_info_aux;
7683 struct bpf_func_state *caller, *callee;
7684 int err;
7685 bool is_global = false;
7686
7687 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
7688 verbose(env, "the call stack of %d frames is too deep\n",
7689 state->curframe + 2);
7690 return -E2BIG;
7691 }
7692
7693 caller = state->frame[state->curframe];
7694 if (state->frame[state->curframe + 1]) {
7695 verbose(env, "verifier bug. Frame %d already allocated\n",
7696 state->curframe + 1);
7697 return -EFAULT;
7698 }
7699
7700 func_info_aux = env->prog->aux->func_info_aux;
7701 if (func_info_aux)
7702 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
7703 err = btf_check_subprog_call(env, subprog, caller->regs);
7704 if (err == -EFAULT)
7705 return err;
7706 if (is_global) {
7707 if (err) {
7708 verbose(env, "Caller passes invalid args into func#%d\n",
7709 subprog);
7710 return err;
7711 } else {
7712 if (env->log.level & BPF_LOG_LEVEL)
7713 verbose(env,
7714 "Func#%d is global and valid. Skipping.\n",
7715 subprog);
7716 clear_caller_saved_regs(env, caller->regs);
7717
7718 /* All global functions return a 64-bit SCALAR_VALUE */
7719 mark_reg_unknown(env, caller->regs, BPF_REG_0);
7720 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
7721
7722 /* continue with next insn after call */
7723 return 0;
7724 }
7725 }
7726
7727 /* set_callee_state is used for direct subprog calls, but we are
7728 * interested in validating only BPF helpers that can call subprogs as
7729 * callbacks
7730 */
7731 if (set_callee_state_cb != set_callee_state) {
7732 if (bpf_pseudo_kfunc_call(insn) &&
7733 !is_callback_calling_kfunc(insn->imm)) {
7734 verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
7735 func_id_name(insn->imm), insn->imm);
7736 return -EFAULT;
7737 } else if (!bpf_pseudo_kfunc_call(insn) &&
7738 !is_callback_calling_function(insn->imm)) { /* helper */
7739 verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
7740 func_id_name(insn->imm), insn->imm);
7741 return -EFAULT;
7742 }
7743 }
7744
7745 if (insn->code == (BPF_JMP | BPF_CALL) &&
7746 insn->src_reg == 0 &&
7747 insn->imm == BPF_FUNC_timer_set_callback) {
7748 struct bpf_verifier_state *async_cb;
7749
7750 /* there is no real recursion here. timer callbacks are async */
7751 env->subprog_info[subprog].is_async_cb = true;
7752 async_cb = push_async_cb(env, env->subprog_info[subprog].start,
7753 *insn_idx, subprog);
7754 if (!async_cb)
7755 return -EFAULT;
7756 callee = async_cb->frame[0];
7757 callee->async_entry_cnt = caller->async_entry_cnt + 1;
7758
7759 /* Convert bpf_timer_set_callback() args into timer callback args */
7760 err = set_callee_state_cb(env, caller, callee, *insn_idx);
7761 if (err)
7762 return err;
7763
7764 clear_caller_saved_regs(env, caller->regs);
7765 mark_reg_unknown(env, caller->regs, BPF_REG_0);
7766 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
7767 /* continue with next insn after call */
7768 return 0;
7769 }
7770
7771 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
7772 if (!callee)
7773 return -ENOMEM;
7774 state->frame[state->curframe + 1] = callee;
7775
7776 /* callee cannot access r0, r6 - r9 for reading and has to write
7777 * into its own stack before reading from it.
7778 * callee can read/write into caller's stack
7779 */
7780 init_func_state(env, callee,
7781 /* remember the callsite, it will be used by bpf_exit */
7782 *insn_idx /* callsite */,
7783 state->curframe + 1 /* frameno within this callchain */,
7784 subprog /* subprog number within this prog */);
7785
7786 /* Transfer references to the callee */
7787 err = copy_reference_state(callee, caller);
7788 if (err)
7789 goto err_out;
7790
7791 err = set_callee_state_cb(env, caller, callee, *insn_idx);
7792 if (err)
7793 goto err_out;
7794
7795 clear_caller_saved_regs(env, caller->regs);
7796
7797 /* only increment it after check_reg_arg() finished */
7798 state->curframe++;
7799
7800 /* and go analyze first insn of the callee */
7801 *insn_idx = env->subprog_info[subprog].start - 1;
7802
7803 if (env->log.level & BPF_LOG_LEVEL) {
7804 verbose(env, "caller:\n");
7805 print_verifier_state(env, caller, true);
7806 verbose(env, "callee:\n");
7807 print_verifier_state(env, callee, true);
7808 }
7809 return 0;
7810
7811err_out:
7812 free_func_state(callee);
7813 state->frame[state->curframe + 1] = NULL;
7814 return err;
7815}
7816
7817int map_set_for_each_callback_args(struct bpf_verifier_env *env,
7818 struct bpf_func_state *caller,
7819 struct bpf_func_state *callee)
7820{
7821 /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
7822 * void *callback_ctx, u64 flags);
7823 * callback_fn(struct bpf_map *map, void *key, void *value,
7824 * void *callback_ctx);
7825 */
7826 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
7827
7828 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
7829 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
7830 callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
7831
7832 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
7833 __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
7834 callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
7835
7836 /* pointer to stack or null */
7837 callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
7838
7839 /* unused */
7840 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
7841 return 0;
7842}
7843
7844static int set_callee_state(struct bpf_verifier_env *env,
7845 struct bpf_func_state *caller,
7846 struct bpf_func_state *callee, int insn_idx)
7847{
7848 int i;
7849
7850 /* copy r1 - r5 args that callee can access. The copy includes parent
7851 * pointers, which connects us up to the liveness chain
7852 */
7853 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
7854 callee->regs[i] = caller->regs[i];
7855 return 0;
7856}
7857
7858static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
7859 int *insn_idx)
7860{
7861 int subprog, target_insn;
7862
7863 target_insn = *insn_idx + insn->imm + 1;
7864 subprog = find_subprog(env, target_insn);
7865 if (subprog < 0) {
7866 verbose(env, "verifier bug. No program starts at insn %d\n",
7867 target_insn);
7868 return -EFAULT;
7869 }
7870
7871 return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
7872}
7873
7874static int set_map_elem_callback_state(struct bpf_verifier_env *env,
7875 struct bpf_func_state *caller,
7876 struct bpf_func_state *callee,
7877 int insn_idx)
7878{
7879 struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
7880 struct bpf_map *map;
7881 int err;
7882
7883 if (bpf_map_ptr_poisoned(insn_aux)) {
7884 verbose(env, "tail_call abusing map_ptr\n");
7885 return -EINVAL;
7886 }
7887
7888 map = BPF_MAP_PTR(insn_aux->map_ptr_state);
7889 if (!map->ops->map_set_for_each_callback_args ||
7890 !map->ops->map_for_each_callback) {
7891 verbose(env, "callback function not allowed for map\n");
7892 return -ENOTSUPP;
7893 }
7894
7895 err = map->ops->map_set_for_each_callback_args(env, caller, callee);
7896 if (err)
7897 return err;
7898
7899 callee->in_callback_fn = true;
7900 callee->callback_ret_range = tnum_range(0, 1);
7901 return 0;
7902}
7903
7904static int set_loop_callback_state(struct bpf_verifier_env *env,
7905 struct bpf_func_state *caller,
7906 struct bpf_func_state *callee,
7907 int insn_idx)
7908{
7909 /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
7910 * u64 flags);
7911 * callback_fn(u32 index, void *callback_ctx);
7912 */
7913 callee->regs[BPF_REG_1].type = SCALAR_VALUE;
7914 callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
7915
7916 /* unused */
7917 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
7918 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
7919 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
7920
7921 callee->in_callback_fn = true;
7922 callee->callback_ret_range = tnum_range(0, 1);
7923 return 0;
7924}
7925
7926static int set_timer_callback_state(struct bpf_verifier_env *env,
7927 struct bpf_func_state *caller,
7928 struct bpf_func_state *callee,
7929 int insn_idx)
7930{
7931 struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
7932
7933 /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
7934 * callback_fn(struct bpf_map *map, void *key, void *value);
7935 */
7936 callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
7937 __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
7938 callee->regs[BPF_REG_1].map_ptr = map_ptr;
7939
7940 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
7941 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
7942 callee->regs[BPF_REG_2].map_ptr = map_ptr;
7943
7944 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
7945 __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
7946 callee->regs[BPF_REG_3].map_ptr = map_ptr;
7947
7948 /* unused */
7949 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
7950 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
7951 callee->in_async_callback_fn = true;
7952 callee->callback_ret_range = tnum_range(0, 1);
7953 return 0;
7954}
7955
7956static int set_find_vma_callback_state(struct bpf_verifier_env *env,
7957 struct bpf_func_state *caller,
7958 struct bpf_func_state *callee,
7959 int insn_idx)
7960{
7961 /* bpf_find_vma(struct task_struct *task, u64 addr,
7962 * void *callback_fn, void *callback_ctx, u64 flags)
7963 * (callback_fn)(struct task_struct *task,
7964 * struct vm_area_struct *vma, void *callback_ctx);
7965 */
7966 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
7967
7968 callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
7969 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
7970 callee->regs[BPF_REG_2].btf = btf_vmlinux;
7971 callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA],
7972
7973 /* pointer to stack or null */
7974 callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
7975
7976 /* unused */
7977 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
7978 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
7979 callee->in_callback_fn = true;
7980 callee->callback_ret_range = tnum_range(0, 1);
7981 return 0;
7982}
7983
7984static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
7985 struct bpf_func_state *caller,
7986 struct bpf_func_state *callee,
7987 int insn_idx)
7988{
7989 /* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
7990 * callback_ctx, u64 flags);
7991 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
7992 */
7993 __mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
7994 mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
7995 callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
7996
7997 /* unused */
7998 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
7999 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
8000 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
8001
8002 callee->in_callback_fn = true;
8003 callee->callback_ret_range = tnum_range(0, 1);
8004 return 0;
8005}
8006
8007static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
8008 struct bpf_func_state *caller,
8009 struct bpf_func_state *callee,
8010 int insn_idx)
8011{
8012 /* void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
8013 * bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
8014 *
8015 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add is the same PTR_TO_BTF_ID w/ offset
8016 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
8017 * by this point, so look at 'root'
8018 */
8019 struct btf_field *field;
8020
8021 field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
8022 BPF_RB_ROOT);
8023 if (!field || !field->graph_root.value_btf_id)
8024 return -EFAULT;
8025
8026 mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
8027 ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
8028 mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
8029 ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
8030
8031 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
8032 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
8033 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
8034 callee->in_callback_fn = true;
8035 callee->callback_ret_range = tnum_range(0, 1);
8036 return 0;
8037}
8038
8039static bool is_rbtree_lock_required_kfunc(u32 btf_id);
8040
8041/* Are we currently verifying the callback for a rbtree helper that must
8042 * be called with lock held? If so, no need to complain about unreleased
8043 * lock
8044 */
8045static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
8046{
8047 struct bpf_verifier_state *state = env->cur_state;
8048 struct bpf_insn *insn = env->prog->insnsi;
8049 struct bpf_func_state *callee;
8050 int kfunc_btf_id;
8051
8052 if (!state->curframe)
8053 return false;
8054
8055 callee = state->frame[state->curframe];
8056
8057 if (!callee->in_callback_fn)
8058 return false;
8059
8060 kfunc_btf_id = insn[callee->callsite].imm;
8061 return is_rbtree_lock_required_kfunc(kfunc_btf_id);
8062}
8063
8064static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
8065{
8066 struct bpf_verifier_state *state = env->cur_state;
8067 struct bpf_func_state *caller, *callee;
8068 struct bpf_reg_state *r0;
8069 int err;
8070
8071 callee = state->frame[state->curframe];
8072 r0 = &callee->regs[BPF_REG_0];
8073 if (r0->type == PTR_TO_STACK) {
8074 /* technically it's ok to return caller's stack pointer
8075 * (or caller's caller's pointer) back to the caller,
8076 * since these pointers are valid. Only current stack
8077 * pointer will be invalid as soon as function exits,
8078 * but let's be conservative
8079 */
8080 verbose(env, "cannot return stack pointer to the caller\n");
8081 return -EINVAL;
8082 }
8083
8084 caller = state->frame[state->curframe - 1];
8085 if (callee->in_callback_fn) {
8086 /* enforce R0 return value range [0, 1]. */
8087 struct tnum range = callee->callback_ret_range;
8088
8089 if (r0->type != SCALAR_VALUE) {
8090 verbose(env, "R0 not a scalar value\n");
8091 return -EACCES;
8092 }
8093 if (!tnum_in(range, r0->var_off)) {
8094 verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
8095 return -EINVAL;
8096 }
8097 } else {
8098 /* return to the caller whatever r0 had in the callee */
8099 caller->regs[BPF_REG_0] = *r0;
8100 }
8101
8102 /* callback_fn frame should have released its own additions to parent's
8103 * reference state at this point, or check_reference_leak would
8104 * complain, hence it must be the same as the caller. There is no need
8105 * to copy it back.
8106 */
8107 if (!callee->in_callback_fn) {
8108 /* Transfer references to the caller */
8109 err = copy_reference_state(caller, callee);
8110 if (err)
8111 return err;
8112 }
8113
8114 *insn_idx = callee->callsite + 1;
8115 if (env->log.level & BPF_LOG_LEVEL) {
8116 verbose(env, "returning from callee:\n");
8117 print_verifier_state(env, callee, true);
8118 verbose(env, "to caller at %d:\n", *insn_idx);
8119 print_verifier_state(env, caller, true);
8120 }
8121 /* clear everything in the callee */
8122 free_func_state(callee);
8123 state->frame[state->curframe--] = NULL;
8124 return 0;
8125}
8126
8127static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
8128 int func_id,
8129 struct bpf_call_arg_meta *meta)
8130{
8131 struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
8132
8133 if (ret_type != RET_INTEGER ||
8134 (func_id != BPF_FUNC_get_stack &&
8135 func_id != BPF_FUNC_get_task_stack &&
8136 func_id != BPF_FUNC_probe_read_str &&
8137 func_id != BPF_FUNC_probe_read_kernel_str &&
8138 func_id != BPF_FUNC_probe_read_user_str))
8139 return;
8140
8141 ret_reg->smax_value = meta->msize_max_value;
8142 ret_reg->s32_max_value = meta->msize_max_value;
8143 ret_reg->smin_value = -MAX_ERRNO;
8144 ret_reg->s32_min_value = -MAX_ERRNO;
8145 reg_bounds_sync(ret_reg);
8146}
8147
8148static int
8149record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
8150 int func_id, int insn_idx)
8151{
8152 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
8153 struct bpf_map *map = meta->map_ptr;
8154
8155 if (func_id != BPF_FUNC_tail_call &&
8156 func_id != BPF_FUNC_map_lookup_elem &&
8157 func_id != BPF_FUNC_map_update_elem &&
8158 func_id != BPF_FUNC_map_delete_elem &&
8159 func_id != BPF_FUNC_map_push_elem &&
8160 func_id != BPF_FUNC_map_pop_elem &&
8161 func_id != BPF_FUNC_map_peek_elem &&
8162 func_id != BPF_FUNC_for_each_map_elem &&
8163 func_id != BPF_FUNC_redirect_map &&
8164 func_id != BPF_FUNC_map_lookup_percpu_elem)
8165 return 0;
8166
8167 if (map == NULL) {
8168 verbose(env, "kernel subsystem misconfigured verifier\n");
8169 return -EINVAL;
8170 }
8171
8172 /* In case of read-only, some additional restrictions
8173 * need to be applied in order to prevent altering the
8174 * state of the map from program side.
8175 */
8176 if ((map->map_flags & BPF_F_RDONLY_PROG) &&
8177 (func_id == BPF_FUNC_map_delete_elem ||
8178 func_id == BPF_FUNC_map_update_elem ||
8179 func_id == BPF_FUNC_map_push_elem ||
8180 func_id == BPF_FUNC_map_pop_elem)) {
8181 verbose(env, "write into map forbidden\n");
8182 return -EACCES;
8183 }
8184
8185 if (!BPF_MAP_PTR(aux->map_ptr_state))
8186 bpf_map_ptr_store(aux, meta->map_ptr,
8187 !meta->map_ptr->bypass_spec_v1);
8188 else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
8189 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
8190 !meta->map_ptr->bypass_spec_v1);
8191 return 0;
8192}
8193
8194static int
8195record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
8196 int func_id, int insn_idx)
8197{
8198 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
8199 struct bpf_reg_state *regs = cur_regs(env), *reg;
8200 struct bpf_map *map = meta->map_ptr;
8201 u64 val, max;
8202 int err;
8203
8204 if (func_id != BPF_FUNC_tail_call)
8205 return 0;
8206 if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
8207 verbose(env, "kernel subsystem misconfigured verifier\n");
8208 return -EINVAL;
8209 }
8210
8211 reg = &regs[BPF_REG_3];
8212 val = reg->var_off.value;
8213 max = map->max_entries;
8214
8215 if (!(register_is_const(reg) && val < max)) {
8216 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
8217 return 0;
8218 }
8219
8220 err = mark_chain_precision(env, BPF_REG_3);
8221 if (err)
8222 return err;
8223 if (bpf_map_key_unseen(aux))
8224 bpf_map_key_store(aux, val);
8225 else if (!bpf_map_key_poisoned(aux) &&
8226 bpf_map_key_immediate(aux) != val)
8227 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
8228 return 0;
8229}
8230
8231static int check_reference_leak(struct bpf_verifier_env *env)
8232{
8233 struct bpf_func_state *state = cur_func(env);
8234 bool refs_lingering = false;
8235 int i;
8236
8237 if (state->frameno && !state->in_callback_fn)
8238 return 0;
8239
8240 for (i = 0; i < state->acquired_refs; i++) {
8241 if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
8242 continue;
8243 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
8244 state->refs[i].id, state->refs[i].insn_idx);
8245 refs_lingering = true;
8246 }
8247 return refs_lingering ? -EINVAL : 0;
8248}
8249
8250static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
8251 struct bpf_reg_state *regs)
8252{
8253 struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
8254 struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
8255 struct bpf_map *fmt_map = fmt_reg->map_ptr;
8256 struct bpf_bprintf_data data = {};
8257 int err, fmt_map_off, num_args;
8258 u64 fmt_addr;
8259 char *fmt;
8260
8261 /* data must be an array of u64 */
8262 if (data_len_reg->var_off.value % 8)
8263 return -EINVAL;
8264 num_args = data_len_reg->var_off.value / 8;
8265
8266 /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
8267 * and map_direct_value_addr is set.
8268 */
8269 fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
8270 err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
8271 fmt_map_off);
8272 if (err) {
8273 verbose(env, "verifier bug\n");
8274 return -EFAULT;
8275 }
8276 fmt = (char *)(long)fmt_addr + fmt_map_off;
8277
8278 /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
8279 * can focus on validating the format specifiers.
8280 */
8281 err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
8282 if (err < 0)
8283 verbose(env, "Invalid format string\n");
8284
8285 return err;
8286}
8287
8288static int check_get_func_ip(struct bpf_verifier_env *env)
8289{
8290 enum bpf_prog_type type = resolve_prog_type(env->prog);
8291 int func_id = BPF_FUNC_get_func_ip;
8292
8293 if (type == BPF_PROG_TYPE_TRACING) {
8294 if (!bpf_prog_has_trampoline(env->prog)) {
8295 verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
8296 func_id_name(func_id), func_id);
8297 return -ENOTSUPP;
8298 }
8299 return 0;
8300 } else if (type == BPF_PROG_TYPE_KPROBE) {
8301 return 0;
8302 }
8303
8304 verbose(env, "func %s#%d not supported for program type %d\n",
8305 func_id_name(func_id), func_id, type);
8306 return -ENOTSUPP;
8307}
8308
8309static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
8310{
8311 return &env->insn_aux_data[env->insn_idx];
8312}
8313
8314static bool loop_flag_is_zero(struct bpf_verifier_env *env)
8315{
8316 struct bpf_reg_state *regs = cur_regs(env);
8317 struct bpf_reg_state *reg = &regs[BPF_REG_4];
8318 bool reg_is_null = register_is_null(reg);
8319
8320 if (reg_is_null)
8321 mark_chain_precision(env, BPF_REG_4);
8322
8323 return reg_is_null;
8324}
8325
8326static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
8327{
8328 struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
8329
8330 if (!state->initialized) {
8331 state->initialized = 1;
8332 state->fit_for_inline = loop_flag_is_zero(env);
8333 state->callback_subprogno = subprogno;
8334 return;
8335 }
8336
8337 if (!state->fit_for_inline)
8338 return;
8339
8340 state->fit_for_inline = (loop_flag_is_zero(env) &&
8341 state->callback_subprogno == subprogno);
8342}
8343
8344static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
8345 int *insn_idx_p)
8346{
8347 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
8348 const struct bpf_func_proto *fn = NULL;
8349 enum bpf_return_type ret_type;
8350 enum bpf_type_flag ret_flag;
8351 struct bpf_reg_state *regs;
8352 struct bpf_call_arg_meta meta;
8353 int insn_idx = *insn_idx_p;
8354 bool changes_data;
8355 int i, err, func_id;
8356
8357 /* find function prototype */
8358 func_id = insn->imm;
8359 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
8360 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
8361 func_id);
8362 return -EINVAL;
8363 }
8364
8365 if (env->ops->get_func_proto)
8366 fn = env->ops->get_func_proto(func_id, env->prog);
8367 if (!fn) {
8368 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
8369 func_id);
8370 return -EINVAL;
8371 }
8372
8373 /* eBPF programs must be GPL compatible to use GPL-ed functions */
8374 if (!env->prog->gpl_compatible && fn->gpl_only) {
8375 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
8376 return -EINVAL;
8377 }
8378
8379 if (fn->allowed && !fn->allowed(env->prog)) {
8380 verbose(env, "helper call is not allowed in probe\n");
8381 return -EINVAL;
8382 }
8383
8384 if (!env->prog->aux->sleepable && fn->might_sleep) {
8385 verbose(env, "helper call might sleep in a non-sleepable prog\n");
8386 return -EINVAL;
8387 }
8388
8389 /* With LD_ABS/IND some JITs save/restore skb from r1. */
8390 changes_data = bpf_helper_changes_pkt_data(fn->func);
8391 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
8392 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
8393 func_id_name(func_id), func_id);
8394 return -EINVAL;
8395 }
8396
8397 memset(&meta, 0, sizeof(meta));
8398 meta.pkt_access = fn->pkt_access;
8399
8400 err = check_func_proto(fn, func_id);
8401 if (err) {
8402 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
8403 func_id_name(func_id), func_id);
8404 return err;
8405 }
8406
8407 if (env->cur_state->active_rcu_lock) {
8408 if (fn->might_sleep) {
8409 verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
8410 func_id_name(func_id), func_id);
8411 return -EINVAL;
8412 }
8413
8414 if (env->prog->aux->sleepable && is_storage_get_function(func_id))
8415 env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
8416 }
8417
8418 meta.func_id = func_id;
8419 /* check args */
8420 for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
8421 err = check_func_arg(env, i, &meta, fn, insn_idx);
8422 if (err)
8423 return err;
8424 }
8425
8426 err = record_func_map(env, &meta, func_id, insn_idx);
8427 if (err)
8428 return err;
8429
8430 err = record_func_key(env, &meta, func_id, insn_idx);
8431 if (err)
8432 return err;
8433
8434 /* Mark slots with STACK_MISC in case of raw mode, stack offset
8435 * is inferred from register state.
8436 */
8437 for (i = 0; i < meta.access_size; i++) {
8438 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
8439 BPF_WRITE, -1, false);
8440 if (err)
8441 return err;
8442 }
8443
8444 regs = cur_regs(env);
8445
8446 if (meta.release_regno) {
8447 err = -EINVAL;
8448 /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
8449 * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
8450 * is safe to do directly.
8451 */
8452 if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
8453 if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
8454 verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
8455 return -EFAULT;
8456 }
8457 err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
8458 } else if (meta.ref_obj_id) {
8459 err = release_reference(env, meta.ref_obj_id);
8460 } else if (register_is_null(&regs[meta.release_regno])) {
8461 /* meta.ref_obj_id can only be 0 if register that is meant to be
8462 * released is NULL, which must be > R0.
8463 */
8464 err = 0;
8465 }
8466 if (err) {
8467 verbose(env, "func %s#%d reference has not been acquired before\n",
8468 func_id_name(func_id), func_id);
8469 return err;
8470 }
8471 }
8472
8473 switch (func_id) {
8474 case BPF_FUNC_tail_call:
8475 err = check_reference_leak(env);
8476 if (err) {
8477 verbose(env, "tail_call would lead to reference leak\n");
8478 return err;
8479 }
8480 break;
8481 case BPF_FUNC_get_local_storage:
8482 /* check that flags argument in get_local_storage(map, flags) is 0,
8483 * this is required because get_local_storage() can't return an error.
8484 */
8485 if (!register_is_null(&regs[BPF_REG_2])) {
8486 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
8487 return -EINVAL;
8488 }
8489 break;
8490 case BPF_FUNC_for_each_map_elem:
8491 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8492 set_map_elem_callback_state);
8493 break;
8494 case BPF_FUNC_timer_set_callback:
8495 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8496 set_timer_callback_state);
8497 break;
8498 case BPF_FUNC_find_vma:
8499 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8500 set_find_vma_callback_state);
8501 break;
8502 case BPF_FUNC_snprintf:
8503 err = check_bpf_snprintf_call(env, regs);
8504 break;
8505 case BPF_FUNC_loop:
8506 update_loop_inline_state(env, meta.subprogno);
8507 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8508 set_loop_callback_state);
8509 break;
8510 case BPF_FUNC_dynptr_from_mem:
8511 if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
8512 verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
8513 reg_type_str(env, regs[BPF_REG_1].type));
8514 return -EACCES;
8515 }
8516 break;
8517 case BPF_FUNC_set_retval:
8518 if (prog_type == BPF_PROG_TYPE_LSM &&
8519 env->prog->expected_attach_type == BPF_LSM_CGROUP) {
8520 if (!env->prog->aux->attach_func_proto->type) {
8521 /* Make sure programs that attach to void
8522 * hooks don't try to modify return value.
8523 */
8524 verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
8525 return -EINVAL;
8526 }
8527 }
8528 break;
8529 case BPF_FUNC_dynptr_data:
8530 {
8531 struct bpf_reg_state *reg;
8532 int id, ref_obj_id;
8533
8534 reg = get_dynptr_arg_reg(env, fn, regs);
8535 if (!reg)
8536 return -EFAULT;
8537
8538
8539 if (meta.dynptr_id) {
8540 verbose(env, "verifier internal error: meta.dynptr_id already set\n");
8541 return -EFAULT;
8542 }
8543 if (meta.ref_obj_id) {
8544 verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
8545 return -EFAULT;
8546 }
8547
8548 id = dynptr_id(env, reg);
8549 if (id < 0) {
8550 verbose(env, "verifier internal error: failed to obtain dynptr id\n");
8551 return id;
8552 }
8553
8554 ref_obj_id = dynptr_ref_obj_id(env, reg);
8555 if (ref_obj_id < 0) {
8556 verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
8557 return ref_obj_id;
8558 }
8559
8560 meta.dynptr_id = id;
8561 meta.ref_obj_id = ref_obj_id;
8562
8563 break;
8564 }
8565 case BPF_FUNC_dynptr_write:
8566 {
8567 enum bpf_dynptr_type dynptr_type;
8568 struct bpf_reg_state *reg;
8569
8570 reg = get_dynptr_arg_reg(env, fn, regs);
8571 if (!reg)
8572 return -EFAULT;
8573
8574 dynptr_type = dynptr_get_type(env, reg);
8575 if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
8576 return -EFAULT;
8577
8578 if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
8579 /* this will trigger clear_all_pkt_pointers(), which will
8580 * invalidate all dynptr slices associated with the skb
8581 */
8582 changes_data = true;
8583
8584 break;
8585 }
8586 case BPF_FUNC_user_ringbuf_drain:
8587 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8588 set_user_ringbuf_callback_state);
8589 break;
8590 }
8591
8592 if (err)
8593 return err;
8594
8595 /* reset caller saved regs */
8596 for (i = 0; i < CALLER_SAVED_REGS; i++) {
8597 mark_reg_not_init(env, regs, caller_saved[i]);
8598 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
8599 }
8600
8601 /* helper call returns 64-bit value. */
8602 regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
8603
8604 /* update return register (already marked as written above) */
8605 ret_type = fn->ret_type;
8606 ret_flag = type_flag(ret_type);
8607
8608 switch (base_type(ret_type)) {
8609 case RET_INTEGER:
8610 /* sets type to SCALAR_VALUE */
8611 mark_reg_unknown(env, regs, BPF_REG_0);
8612 break;
8613 case RET_VOID:
8614 regs[BPF_REG_0].type = NOT_INIT;
8615 break;
8616 case RET_PTR_TO_MAP_VALUE:
8617 /* There is no offset yet applied, variable or fixed */
8618 mark_reg_known_zero(env, regs, BPF_REG_0);
8619 /* remember map_ptr, so that check_map_access()
8620 * can check 'value_size' boundary of memory access
8621 * to map element returned from bpf_map_lookup_elem()
8622 */
8623 if (meta.map_ptr == NULL) {
8624 verbose(env,
8625 "kernel subsystem misconfigured verifier\n");
8626 return -EINVAL;
8627 }
8628 regs[BPF_REG_0].map_ptr = meta.map_ptr;
8629 regs[BPF_REG_0].map_uid = meta.map_uid;
8630 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
8631 if (!type_may_be_null(ret_type) &&
8632 btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) {
8633 regs[BPF_REG_0].id = ++env->id_gen;
8634 }
8635 break;
8636 case RET_PTR_TO_SOCKET:
8637 mark_reg_known_zero(env, regs, BPF_REG_0);
8638 regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
8639 break;
8640 case RET_PTR_TO_SOCK_COMMON:
8641 mark_reg_known_zero(env, regs, BPF_REG_0);
8642 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
8643 break;
8644 case RET_PTR_TO_TCP_SOCK:
8645 mark_reg_known_zero(env, regs, BPF_REG_0);
8646 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
8647 break;
8648 case RET_PTR_TO_MEM:
8649 mark_reg_known_zero(env, regs, BPF_REG_0);
8650 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
8651 regs[BPF_REG_0].mem_size = meta.mem_size;
8652 break;
8653 case RET_PTR_TO_MEM_OR_BTF_ID:
8654 {
8655 const struct btf_type *t;
8656
8657 mark_reg_known_zero(env, regs, BPF_REG_0);
8658 t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
8659 if (!btf_type_is_struct(t)) {
8660 u32 tsize;
8661 const struct btf_type *ret;
8662 const char *tname;
8663
8664 /* resolve the type size of ksym. */
8665 ret = btf_resolve_size(meta.ret_btf, t, &tsize);
8666 if (IS_ERR(ret)) {
8667 tname = btf_name_by_offset(meta.ret_btf, t->name_off);
8668 verbose(env, "unable to resolve the size of type '%s': %ld\n",
8669 tname, PTR_ERR(ret));
8670 return -EINVAL;
8671 }
8672 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
8673 regs[BPF_REG_0].mem_size = tsize;
8674 } else {
8675 /* MEM_RDONLY may be carried from ret_flag, but it
8676 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
8677 * it will confuse the check of PTR_TO_BTF_ID in
8678 * check_mem_access().
8679 */
8680 ret_flag &= ~MEM_RDONLY;
8681
8682 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
8683 regs[BPF_REG_0].btf = meta.ret_btf;
8684 regs[BPF_REG_0].btf_id = meta.ret_btf_id;
8685 }
8686 break;
8687 }
8688 case RET_PTR_TO_BTF_ID:
8689 {
8690 struct btf *ret_btf;
8691 int ret_btf_id;
8692
8693 mark_reg_known_zero(env, regs, BPF_REG_0);
8694 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
8695 if (func_id == BPF_FUNC_kptr_xchg) {
8696 ret_btf = meta.kptr_field->kptr.btf;
8697 ret_btf_id = meta.kptr_field->kptr.btf_id;
8698 } else {
8699 if (fn->ret_btf_id == BPF_PTR_POISON) {
8700 verbose(env, "verifier internal error:");
8701 verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n",
8702 func_id_name(func_id));
8703 return -EINVAL;
8704 }
8705 ret_btf = btf_vmlinux;
8706 ret_btf_id = *fn->ret_btf_id;
8707 }
8708 if (ret_btf_id == 0) {
8709 verbose(env, "invalid return type %u of func %s#%d\n",
8710 base_type(ret_type), func_id_name(func_id),
8711 func_id);
8712 return -EINVAL;
8713 }
8714 regs[BPF_REG_0].btf = ret_btf;
8715 regs[BPF_REG_0].btf_id = ret_btf_id;
8716 break;
8717 }
8718 default:
8719 verbose(env, "unknown return type %u of func %s#%d\n",
8720 base_type(ret_type), func_id_name(func_id), func_id);
8721 return -EINVAL;
8722 }
8723
8724 if (type_may_be_null(regs[BPF_REG_0].type))
8725 regs[BPF_REG_0].id = ++env->id_gen;
8726
8727 if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
8728 verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
8729 func_id_name(func_id), func_id);
8730 return -EFAULT;
8731 }
8732
8733 if (is_dynptr_ref_function(func_id))
8734 regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
8735
8736 if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
8737 /* For release_reference() */
8738 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
8739 } else if (is_acquire_function(func_id, meta.map_ptr)) {
8740 int id = acquire_reference_state(env, insn_idx);
8741
8742 if (id < 0)
8743 return id;
8744 /* For mark_ptr_or_null_reg() */
8745 regs[BPF_REG_0].id = id;
8746 /* For release_reference() */
8747 regs[BPF_REG_0].ref_obj_id = id;
8748 }
8749
8750 do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
8751
8752 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
8753 if (err)
8754 return err;
8755
8756 if ((func_id == BPF_FUNC_get_stack ||
8757 func_id == BPF_FUNC_get_task_stack) &&
8758 !env->prog->has_callchain_buf) {
8759 const char *err_str;
8760
8761#ifdef CONFIG_PERF_EVENTS
8762 err = get_callchain_buffers(sysctl_perf_event_max_stack);
8763 err_str = "cannot get callchain buffer for func %s#%d\n";
8764#else
8765 err = -ENOTSUPP;
8766 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
8767#endif
8768 if (err) {
8769 verbose(env, err_str, func_id_name(func_id), func_id);
8770 return err;
8771 }
8772
8773 env->prog->has_callchain_buf = true;
8774 }
8775
8776 if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
8777 env->prog->call_get_stack = true;
8778
8779 if (func_id == BPF_FUNC_get_func_ip) {
8780 if (check_get_func_ip(env))
8781 return -ENOTSUPP;
8782 env->prog->call_get_func_ip = true;
8783 }
8784
8785 if (changes_data)
8786 clear_all_pkt_pointers(env);
8787 return 0;
8788}
8789
8790/* mark_btf_func_reg_size() is used when the reg size is determined by
8791 * the BTF func_proto's return value size and argument.
8792 */
8793static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
8794 size_t reg_size)
8795{
8796 struct bpf_reg_state *reg = &cur_regs(env)[regno];
8797
8798 if (regno == BPF_REG_0) {
8799 /* Function return value */
8800 reg->live |= REG_LIVE_WRITTEN;
8801 reg->subreg_def = reg_size == sizeof(u64) ?
8802 DEF_NOT_SUBREG : env->insn_idx + 1;
8803 } else {
8804 /* Function argument */
8805 if (reg_size == sizeof(u64)) {
8806 mark_insn_zext(env, reg);
8807 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
8808 } else {
8809 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
8810 }
8811 }
8812}
8813
8814struct bpf_kfunc_call_arg_meta {
8815 /* In parameters */
8816 struct btf *btf;
8817 u32 func_id;
8818 u32 kfunc_flags;
8819 const struct btf_type *func_proto;
8820 const char *func_name;
8821 /* Out parameters */
8822 u32 ref_obj_id;
8823 u8 release_regno;
8824 bool r0_rdonly;
8825 u32 ret_btf_id;
8826 u64 r0_size;
8827 u32 subprogno;
8828 struct {
8829 u64 value;
8830 bool found;
8831 } arg_constant;
8832 struct {
8833 struct btf *btf;
8834 u32 btf_id;
8835 } arg_obj_drop;
8836 struct {
8837 struct btf_field *field;
8838 } arg_list_head;
8839 struct {
8840 struct btf_field *field;
8841 } arg_rbtree_root;
8842 struct {
8843 enum bpf_dynptr_type type;
8844 u32 id;
8845 } initialized_dynptr;
8846 u64 mem_size;
8847};
8848
8849static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
8850{
8851 return meta->kfunc_flags & KF_ACQUIRE;
8852}
8853
8854static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
8855{
8856 return meta->kfunc_flags & KF_RET_NULL;
8857}
8858
8859static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
8860{
8861 return meta->kfunc_flags & KF_RELEASE;
8862}
8863
8864static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
8865{
8866 return meta->kfunc_flags & KF_TRUSTED_ARGS;
8867}
8868
8869static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
8870{
8871 return meta->kfunc_flags & KF_SLEEPABLE;
8872}
8873
8874static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
8875{
8876 return meta->kfunc_flags & KF_DESTRUCTIVE;
8877}
8878
8879static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
8880{
8881 return meta->kfunc_flags & KF_RCU;
8882}
8883
8884static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
8885{
8886 return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
8887}
8888
8889static bool __kfunc_param_match_suffix(const struct btf *btf,
8890 const struct btf_param *arg,
8891 const char *suffix)
8892{
8893 int suffix_len = strlen(suffix), len;
8894 const char *param_name;
8895
8896 /* In the future, this can be ported to use BTF tagging */
8897 param_name = btf_name_by_offset(btf, arg->name_off);
8898 if (str_is_empty(param_name))
8899 return false;
8900 len = strlen(param_name);
8901 if (len < suffix_len)
8902 return false;
8903 param_name += len - suffix_len;
8904 return !strncmp(param_name, suffix, suffix_len);
8905}
8906
8907static bool is_kfunc_arg_mem_size(const struct btf *btf,
8908 const struct btf_param *arg,
8909 const struct bpf_reg_state *reg)
8910{
8911 const struct btf_type *t;
8912
8913 t = btf_type_skip_modifiers(btf, arg->type, NULL);
8914 if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
8915 return false;
8916
8917 return __kfunc_param_match_suffix(btf, arg, "__sz");
8918}
8919
8920static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
8921 const struct btf_param *arg,
8922 const struct bpf_reg_state *reg)
8923{
8924 const struct btf_type *t;
8925
8926 t = btf_type_skip_modifiers(btf, arg->type, NULL);
8927 if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
8928 return false;
8929
8930 return __kfunc_param_match_suffix(btf, arg, "__szk");
8931}
8932
8933static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
8934{
8935 return __kfunc_param_match_suffix(btf, arg, "__k");
8936}
8937
8938static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
8939{
8940 return __kfunc_param_match_suffix(btf, arg, "__ign");
8941}
8942
8943static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
8944{
8945 return __kfunc_param_match_suffix(btf, arg, "__alloc");
8946}
8947
8948static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
8949{
8950 return __kfunc_param_match_suffix(btf, arg, "__uninit");
8951}
8952
8953static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
8954 const struct btf_param *arg,
8955 const char *name)
8956{
8957 int len, target_len = strlen(name);
8958 const char *param_name;
8959
8960 param_name = btf_name_by_offset(btf, arg->name_off);
8961 if (str_is_empty(param_name))
8962 return false;
8963 len = strlen(param_name);
8964 if (len != target_len)
8965 return false;
8966 if (strcmp(param_name, name))
8967 return false;
8968
8969 return true;
8970}
8971
8972enum {
8973 KF_ARG_DYNPTR_ID,
8974 KF_ARG_LIST_HEAD_ID,
8975 KF_ARG_LIST_NODE_ID,
8976 KF_ARG_RB_ROOT_ID,
8977 KF_ARG_RB_NODE_ID,
8978};
8979
8980BTF_ID_LIST(kf_arg_btf_ids)
8981BTF_ID(struct, bpf_dynptr_kern)
8982BTF_ID(struct, bpf_list_head)
8983BTF_ID(struct, bpf_list_node)
8984BTF_ID(struct, bpf_rb_root)
8985BTF_ID(struct, bpf_rb_node)
8986
8987static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
8988 const struct btf_param *arg, int type)
8989{
8990 const struct btf_type *t;
8991 u32 res_id;
8992
8993 t = btf_type_skip_modifiers(btf, arg->type, NULL);
8994 if (!t)
8995 return false;
8996 if (!btf_type_is_ptr(t))
8997 return false;
8998 t = btf_type_skip_modifiers(btf, t->type, &res_id);
8999 if (!t)
9000 return false;
9001 return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
9002}
9003
9004static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
9005{
9006 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
9007}
9008
9009static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
9010{
9011 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
9012}
9013
9014static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
9015{
9016 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
9017}
9018
9019static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
9020{
9021 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
9022}
9023
9024static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
9025{
9026 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
9027}
9028
9029static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
9030 const struct btf_param *arg)
9031{
9032 const struct btf_type *t;
9033
9034 t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
9035 if (!t)
9036 return false;
9037
9038 return true;
9039}
9040
9041/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
9042static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
9043 const struct btf *btf,
9044 const struct btf_type *t, int rec)
9045{
9046 const struct btf_type *member_type;
9047 const struct btf_member *member;
9048 u32 i;
9049
9050 if (!btf_type_is_struct(t))
9051 return false;
9052
9053 for_each_member(i, t, member) {
9054 const struct btf_array *array;
9055
9056 member_type = btf_type_skip_modifiers(btf, member->type, NULL);
9057 if (btf_type_is_struct(member_type)) {
9058 if (rec >= 3) {
9059 verbose(env, "max struct nesting depth exceeded\n");
9060 return false;
9061 }
9062 if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
9063 return false;
9064 continue;
9065 }
9066 if (btf_type_is_array(member_type)) {
9067 array = btf_array(member_type);
9068 if (!array->nelems)
9069 return false;
9070 member_type = btf_type_skip_modifiers(btf, array->type, NULL);
9071 if (!btf_type_is_scalar(member_type))
9072 return false;
9073 continue;
9074 }
9075 if (!btf_type_is_scalar(member_type))
9076 return false;
9077 }
9078 return true;
9079}
9080
9081
9082static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
9083#ifdef CONFIG_NET
9084 [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
9085 [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
9086 [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
9087#endif
9088};
9089
9090enum kfunc_ptr_arg_type {
9091 KF_ARG_PTR_TO_CTX,
9092 KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */
9093 KF_ARG_PTR_TO_KPTR, /* PTR_TO_KPTR but type specific */
9094 KF_ARG_PTR_TO_DYNPTR,
9095 KF_ARG_PTR_TO_LIST_HEAD,
9096 KF_ARG_PTR_TO_LIST_NODE,
9097 KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */
9098 KF_ARG_PTR_TO_MEM,
9099 KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */
9100 KF_ARG_PTR_TO_CALLBACK,
9101 KF_ARG_PTR_TO_RB_ROOT,
9102 KF_ARG_PTR_TO_RB_NODE,
9103};
9104
9105enum special_kfunc_type {
9106 KF_bpf_obj_new_impl,
9107 KF_bpf_obj_drop_impl,
9108 KF_bpf_list_push_front,
9109 KF_bpf_list_push_back,
9110 KF_bpf_list_pop_front,
9111 KF_bpf_list_pop_back,
9112 KF_bpf_cast_to_kern_ctx,
9113 KF_bpf_rdonly_cast,
9114 KF_bpf_rcu_read_lock,
9115 KF_bpf_rcu_read_unlock,
9116 KF_bpf_rbtree_remove,
9117 KF_bpf_rbtree_add,
9118 KF_bpf_rbtree_first,
9119 KF_bpf_dynptr_from_skb,
9120 KF_bpf_dynptr_from_xdp,
9121 KF_bpf_dynptr_slice,
9122 KF_bpf_dynptr_slice_rdwr,
9123};
9124
9125BTF_SET_START(special_kfunc_set)
9126BTF_ID(func, bpf_obj_new_impl)
9127BTF_ID(func, bpf_obj_drop_impl)
9128BTF_ID(func, bpf_list_push_front)
9129BTF_ID(func, bpf_list_push_back)
9130BTF_ID(func, bpf_list_pop_front)
9131BTF_ID(func, bpf_list_pop_back)
9132BTF_ID(func, bpf_cast_to_kern_ctx)
9133BTF_ID(func, bpf_rdonly_cast)
9134BTF_ID(func, bpf_rbtree_remove)
9135BTF_ID(func, bpf_rbtree_add)
9136BTF_ID(func, bpf_rbtree_first)
9137BTF_ID(func, bpf_dynptr_from_skb)
9138BTF_ID(func, bpf_dynptr_from_xdp)
9139BTF_ID(func, bpf_dynptr_slice)
9140BTF_ID(func, bpf_dynptr_slice_rdwr)
9141BTF_SET_END(special_kfunc_set)
9142
9143BTF_ID_LIST(special_kfunc_list)
9144BTF_ID(func, bpf_obj_new_impl)
9145BTF_ID(func, bpf_obj_drop_impl)
9146BTF_ID(func, bpf_list_push_front)
9147BTF_ID(func, bpf_list_push_back)
9148BTF_ID(func, bpf_list_pop_front)
9149BTF_ID(func, bpf_list_pop_back)
9150BTF_ID(func, bpf_cast_to_kern_ctx)
9151BTF_ID(func, bpf_rdonly_cast)
9152BTF_ID(func, bpf_rcu_read_lock)
9153BTF_ID(func, bpf_rcu_read_unlock)
9154BTF_ID(func, bpf_rbtree_remove)
9155BTF_ID(func, bpf_rbtree_add)
9156BTF_ID(func, bpf_rbtree_first)
9157BTF_ID(func, bpf_dynptr_from_skb)
9158BTF_ID(func, bpf_dynptr_from_xdp)
9159BTF_ID(func, bpf_dynptr_slice)
9160BTF_ID(func, bpf_dynptr_slice_rdwr)
9161
9162static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
9163{
9164 return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
9165}
9166
9167static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
9168{
9169 return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
9170}
9171
9172static enum kfunc_ptr_arg_type
9173get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
9174 struct bpf_kfunc_call_arg_meta *meta,
9175 const struct btf_type *t, const struct btf_type *ref_t,
9176 const char *ref_tname, const struct btf_param *args,
9177 int argno, int nargs)
9178{
9179 u32 regno = argno + 1;
9180 struct bpf_reg_state *regs = cur_regs(env);
9181 struct bpf_reg_state *reg = &regs[regno];
9182 bool arg_mem_size = false;
9183
9184 if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
9185 return KF_ARG_PTR_TO_CTX;
9186
9187 /* In this function, we verify the kfunc's BTF as per the argument type,
9188 * leaving the rest of the verification with respect to the register
9189 * type to our caller. When a set of conditions hold in the BTF type of
9190 * arguments, we resolve it to a known kfunc_ptr_arg_type.
9191 */
9192 if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
9193 return KF_ARG_PTR_TO_CTX;
9194
9195 if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
9196 return KF_ARG_PTR_TO_ALLOC_BTF_ID;
9197
9198 if (is_kfunc_arg_kptr_get(meta, argno)) {
9199 if (!btf_type_is_ptr(ref_t)) {
9200 verbose(env, "arg#0 BTF type must be a double pointer for kptr_get kfunc\n");
9201 return -EINVAL;
9202 }
9203 ref_t = btf_type_by_id(meta->btf, ref_t->type);
9204 ref_tname = btf_name_by_offset(meta->btf, ref_t->name_off);
9205 if (!btf_type_is_struct(ref_t)) {
9206 verbose(env, "kernel function %s args#0 pointer type %s %s is not supported\n",
9207 meta->func_name, btf_type_str(ref_t), ref_tname);
9208 return -EINVAL;
9209 }
9210 return KF_ARG_PTR_TO_KPTR;
9211 }
9212
9213 if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
9214 return KF_ARG_PTR_TO_DYNPTR;
9215
9216 if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
9217 return KF_ARG_PTR_TO_LIST_HEAD;
9218
9219 if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
9220 return KF_ARG_PTR_TO_LIST_NODE;
9221
9222 if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
9223 return KF_ARG_PTR_TO_RB_ROOT;
9224
9225 if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
9226 return KF_ARG_PTR_TO_RB_NODE;
9227
9228 if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
9229 if (!btf_type_is_struct(ref_t)) {
9230 verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
9231 meta->func_name, argno, btf_type_str(ref_t), ref_tname);
9232 return -EINVAL;
9233 }
9234 return KF_ARG_PTR_TO_BTF_ID;
9235 }
9236
9237 if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
9238 return KF_ARG_PTR_TO_CALLBACK;
9239
9240
9241 if (argno + 1 < nargs &&
9242 (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
9243 is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
9244 arg_mem_size = true;
9245
9246 /* This is the catch all argument type of register types supported by
9247 * check_helper_mem_access. However, we only allow when argument type is
9248 * pointer to scalar, or struct composed (recursively) of scalars. When
9249 * arg_mem_size is true, the pointer can be void *.
9250 */
9251 if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
9252 (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
9253 verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
9254 argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
9255 return -EINVAL;
9256 }
9257 return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
9258}
9259
9260static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
9261 struct bpf_reg_state *reg,
9262 const struct btf_type *ref_t,
9263 const char *ref_tname, u32 ref_id,
9264 struct bpf_kfunc_call_arg_meta *meta,
9265 int argno)
9266{
9267 const struct btf_type *reg_ref_t;
9268 bool strict_type_match = false;
9269 const struct btf *reg_btf;
9270 const char *reg_ref_tname;
9271 u32 reg_ref_id;
9272
9273 if (base_type(reg->type) == PTR_TO_BTF_ID) {
9274 reg_btf = reg->btf;
9275 reg_ref_id = reg->btf_id;
9276 } else {
9277 reg_btf = btf_vmlinux;
9278 reg_ref_id = *reg2btf_ids[base_type(reg->type)];
9279 }
9280
9281 /* Enforce strict type matching for calls to kfuncs that are acquiring
9282 * or releasing a reference, or are no-cast aliases. We do _not_
9283 * enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
9284 * as we want to enable BPF programs to pass types that are bitwise
9285 * equivalent without forcing them to explicitly cast with something
9286 * like bpf_cast_to_kern_ctx().
9287 *
9288 * For example, say we had a type like the following:
9289 *
9290 * struct bpf_cpumask {
9291 * cpumask_t cpumask;
9292 * refcount_t usage;
9293 * };
9294 *
9295 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
9296 * to a struct cpumask, so it would be safe to pass a struct
9297 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
9298 *
9299 * The philosophy here is similar to how we allow scalars of different
9300 * types to be passed to kfuncs as long as the size is the same. The
9301 * only difference here is that we're simply allowing
9302 * btf_struct_ids_match() to walk the struct at the 0th offset, and
9303 * resolve types.
9304 */
9305 if (is_kfunc_acquire(meta) ||
9306 (is_kfunc_release(meta) && reg->ref_obj_id) ||
9307 btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
9308 strict_type_match = true;
9309
9310 WARN_ON_ONCE(is_kfunc_trusted_args(meta) && reg->off);
9311
9312 reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
9313 reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
9314 if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) {
9315 verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
9316 meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
9317 btf_type_str(reg_ref_t), reg_ref_tname);
9318 return -EINVAL;
9319 }
9320 return 0;
9321}
9322
9323static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env,
9324 struct bpf_reg_state *reg,
9325 const struct btf_type *ref_t,
9326 const char *ref_tname,
9327 struct bpf_kfunc_call_arg_meta *meta,
9328 int argno)
9329{
9330 struct btf_field *kptr_field;
9331
9332 /* check_func_arg_reg_off allows var_off for
9333 * PTR_TO_MAP_VALUE, but we need fixed offset to find
9334 * off_desc.
9335 */
9336 if (!tnum_is_const(reg->var_off)) {
9337 verbose(env, "arg#0 must have constant offset\n");
9338 return -EINVAL;
9339 }
9340
9341 kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR);
9342 if (!kptr_field || kptr_field->type != BPF_KPTR_REF) {
9343 verbose(env, "arg#0 no referenced kptr at map value offset=%llu\n",
9344 reg->off + reg->var_off.value);
9345 return -EINVAL;
9346 }
9347
9348 if (!btf_struct_ids_match(&env->log, meta->btf, ref_t->type, 0, kptr_field->kptr.btf,
9349 kptr_field->kptr.btf_id, true)) {
9350 verbose(env, "kernel function %s args#%d expected pointer to %s %s\n",
9351 meta->func_name, argno, btf_type_str(ref_t), ref_tname);
9352 return -EINVAL;
9353 }
9354 return 0;
9355}
9356
9357static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
9358{
9359 struct bpf_verifier_state *state = env->cur_state;
9360
9361 if (!state->active_lock.ptr) {
9362 verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
9363 return -EFAULT;
9364 }
9365
9366 if (type_flag(reg->type) & NON_OWN_REF) {
9367 verbose(env, "verifier internal error: NON_OWN_REF already set\n");
9368 return -EFAULT;
9369 }
9370
9371 reg->type |= NON_OWN_REF;
9372 return 0;
9373}
9374
9375static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
9376{
9377 struct bpf_func_state *state, *unused;
9378 struct bpf_reg_state *reg;
9379 int i;
9380
9381 state = cur_func(env);
9382
9383 if (!ref_obj_id) {
9384 verbose(env, "verifier internal error: ref_obj_id is zero for "
9385 "owning -> non-owning conversion\n");
9386 return -EFAULT;
9387 }
9388
9389 for (i = 0; i < state->acquired_refs; i++) {
9390 if (state->refs[i].id != ref_obj_id)
9391 continue;
9392
9393 /* Clear ref_obj_id here so release_reference doesn't clobber
9394 * the whole reg
9395 */
9396 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
9397 if (reg->ref_obj_id == ref_obj_id) {
9398 reg->ref_obj_id = 0;
9399 ref_set_non_owning(env, reg);
9400 }
9401 }));
9402 return 0;
9403 }
9404
9405 verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
9406 return -EFAULT;
9407}
9408
9409/* Implementation details:
9410 *
9411 * Each register points to some region of memory, which we define as an
9412 * allocation. Each allocation may embed a bpf_spin_lock which protects any
9413 * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
9414 * allocation. The lock and the data it protects are colocated in the same
9415 * memory region.
9416 *
9417 * Hence, everytime a register holds a pointer value pointing to such
9418 * allocation, the verifier preserves a unique reg->id for it.
9419 *
9420 * The verifier remembers the lock 'ptr' and the lock 'id' whenever
9421 * bpf_spin_lock is called.
9422 *
9423 * To enable this, lock state in the verifier captures two values:
9424 * active_lock.ptr = Register's type specific pointer
9425 * active_lock.id = A unique ID for each register pointer value
9426 *
9427 * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
9428 * supported register types.
9429 *
9430 * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
9431 * allocated objects is the reg->btf pointer.
9432 *
9433 * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
9434 * can establish the provenance of the map value statically for each distinct
9435 * lookup into such maps. They always contain a single map value hence unique
9436 * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
9437 *
9438 * So, in case of global variables, they use array maps with max_entries = 1,
9439 * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
9440 * into the same map value as max_entries is 1, as described above).
9441 *
9442 * In case of inner map lookups, the inner map pointer has same map_ptr as the
9443 * outer map pointer (in verifier context), but each lookup into an inner map
9444 * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
9445 * maps from the same outer map share the same map_ptr as active_lock.ptr, they
9446 * will get different reg->id assigned to each lookup, hence different
9447 * active_lock.id.
9448 *
9449 * In case of allocated objects, active_lock.ptr is the reg->btf, and the
9450 * reg->id is a unique ID preserved after the NULL pointer check on the pointer
9451 * returned from bpf_obj_new. Each allocation receives a new reg->id.
9452 */
9453static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
9454{
9455 void *ptr;
9456 u32 id;
9457
9458 switch ((int)reg->type) {
9459 case PTR_TO_MAP_VALUE:
9460 ptr = reg->map_ptr;
9461 break;
9462 case PTR_TO_BTF_ID | MEM_ALLOC:
9463 ptr = reg->btf;
9464 break;
9465 default:
9466 verbose(env, "verifier internal error: unknown reg type for lock check\n");
9467 return -EFAULT;
9468 }
9469 id = reg->id;
9470
9471 if (!env->cur_state->active_lock.ptr)
9472 return -EINVAL;
9473 if (env->cur_state->active_lock.ptr != ptr ||
9474 env->cur_state->active_lock.id != id) {
9475 verbose(env, "held lock and object are not in the same allocation\n");
9476 return -EINVAL;
9477 }
9478 return 0;
9479}
9480
9481static bool is_bpf_list_api_kfunc(u32 btf_id)
9482{
9483 return btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
9484 btf_id == special_kfunc_list[KF_bpf_list_push_back] ||
9485 btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
9486 btf_id == special_kfunc_list[KF_bpf_list_pop_back];
9487}
9488
9489static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
9490{
9491 return btf_id == special_kfunc_list[KF_bpf_rbtree_add] ||
9492 btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
9493 btf_id == special_kfunc_list[KF_bpf_rbtree_first];
9494}
9495
9496static bool is_bpf_graph_api_kfunc(u32 btf_id)
9497{
9498 return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id);
9499}
9500
9501static bool is_callback_calling_kfunc(u32 btf_id)
9502{
9503 return btf_id == special_kfunc_list[KF_bpf_rbtree_add];
9504}
9505
9506static bool is_rbtree_lock_required_kfunc(u32 btf_id)
9507{
9508 return is_bpf_rbtree_api_kfunc(btf_id);
9509}
9510
9511static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
9512 enum btf_field_type head_field_type,
9513 u32 kfunc_btf_id)
9514{
9515 bool ret;
9516
9517 switch (head_field_type) {
9518 case BPF_LIST_HEAD:
9519 ret = is_bpf_list_api_kfunc(kfunc_btf_id);
9520 break;
9521 case BPF_RB_ROOT:
9522 ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
9523 break;
9524 default:
9525 verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
9526 btf_field_type_name(head_field_type));
9527 return false;
9528 }
9529
9530 if (!ret)
9531 verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
9532 btf_field_type_name(head_field_type));
9533 return ret;
9534}
9535
9536static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
9537 enum btf_field_type node_field_type,
9538 u32 kfunc_btf_id)
9539{
9540 bool ret;
9541
9542 switch (node_field_type) {
9543 case BPF_LIST_NODE:
9544 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
9545 kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back]);
9546 break;
9547 case BPF_RB_NODE:
9548 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
9549 kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add]);
9550 break;
9551 default:
9552 verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
9553 btf_field_type_name(node_field_type));
9554 return false;
9555 }
9556
9557 if (!ret)
9558 verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
9559 btf_field_type_name(node_field_type));
9560 return ret;
9561}
9562
9563static int
9564__process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
9565 struct bpf_reg_state *reg, u32 regno,
9566 struct bpf_kfunc_call_arg_meta *meta,
9567 enum btf_field_type head_field_type,
9568 struct btf_field **head_field)
9569{
9570 const char *head_type_name;
9571 struct btf_field *field;
9572 struct btf_record *rec;
9573 u32 head_off;
9574
9575 if (meta->btf != btf_vmlinux) {
9576 verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
9577 return -EFAULT;
9578 }
9579
9580 if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
9581 return -EFAULT;
9582
9583 head_type_name = btf_field_type_name(head_field_type);
9584 if (!tnum_is_const(reg->var_off)) {
9585 verbose(env,
9586 "R%d doesn't have constant offset. %s has to be at the constant offset\n",
9587 regno, head_type_name);
9588 return -EINVAL;
9589 }
9590
9591 rec = reg_btf_record(reg);
9592 head_off = reg->off + reg->var_off.value;
9593 field = btf_record_find(rec, head_off, head_field_type);
9594 if (!field) {
9595 verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
9596 return -EINVAL;
9597 }
9598
9599 /* All functions require bpf_list_head to be protected using a bpf_spin_lock */
9600 if (check_reg_allocation_locked(env, reg)) {
9601 verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
9602 rec->spin_lock_off, head_type_name);
9603 return -EINVAL;
9604 }
9605
9606 if (*head_field) {
9607 verbose(env, "verifier internal error: repeating %s arg\n", head_type_name);
9608 return -EFAULT;
9609 }
9610 *head_field = field;
9611 return 0;
9612}
9613
9614static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
9615 struct bpf_reg_state *reg, u32 regno,
9616 struct bpf_kfunc_call_arg_meta *meta)
9617{
9618 return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
9619 &meta->arg_list_head.field);
9620}
9621
9622static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
9623 struct bpf_reg_state *reg, u32 regno,
9624 struct bpf_kfunc_call_arg_meta *meta)
9625{
9626 return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
9627 &meta->arg_rbtree_root.field);
9628}
9629
9630static int
9631__process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
9632 struct bpf_reg_state *reg, u32 regno,
9633 struct bpf_kfunc_call_arg_meta *meta,
9634 enum btf_field_type head_field_type,
9635 enum btf_field_type node_field_type,
9636 struct btf_field **node_field)
9637{
9638 const char *node_type_name;
9639 const struct btf_type *et, *t;
9640 struct btf_field *field;
9641 u32 node_off;
9642
9643 if (meta->btf != btf_vmlinux) {
9644 verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
9645 return -EFAULT;
9646 }
9647
9648 if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
9649 return -EFAULT;
9650
9651 node_type_name = btf_field_type_name(node_field_type);
9652 if (!tnum_is_const(reg->var_off)) {
9653 verbose(env,
9654 "R%d doesn't have constant offset. %s has to be at the constant offset\n",
9655 regno, node_type_name);
9656 return -EINVAL;
9657 }
9658
9659 node_off = reg->off + reg->var_off.value;
9660 field = reg_find_field_offset(reg, node_off, node_field_type);
9661 if (!field || field->offset != node_off) {
9662 verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
9663 return -EINVAL;
9664 }
9665
9666 field = *node_field;
9667
9668 et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
9669 t = btf_type_by_id(reg->btf, reg->btf_id);
9670 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
9671 field->graph_root.value_btf_id, true)) {
9672 verbose(env, "operation on %s expects arg#1 %s at offset=%d "
9673 "in struct %s, but arg is at offset=%d in struct %s\n",
9674 btf_field_type_name(head_field_type),
9675 btf_field_type_name(node_field_type),
9676 field->graph_root.node_offset,
9677 btf_name_by_offset(field->graph_root.btf, et->name_off),
9678 node_off, btf_name_by_offset(reg->btf, t->name_off));
9679 return -EINVAL;
9680 }
9681
9682 if (node_off != field->graph_root.node_offset) {
9683 verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
9684 node_off, btf_field_type_name(node_field_type),
9685 field->graph_root.node_offset,
9686 btf_name_by_offset(field->graph_root.btf, et->name_off));
9687 return -EINVAL;
9688 }
9689
9690 return 0;
9691}
9692
9693static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
9694 struct bpf_reg_state *reg, u32 regno,
9695 struct bpf_kfunc_call_arg_meta *meta)
9696{
9697 return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
9698 BPF_LIST_HEAD, BPF_LIST_NODE,
9699 &meta->arg_list_head.field);
9700}
9701
9702static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
9703 struct bpf_reg_state *reg, u32 regno,
9704 struct bpf_kfunc_call_arg_meta *meta)
9705{
9706 return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
9707 BPF_RB_ROOT, BPF_RB_NODE,
9708 &meta->arg_rbtree_root.field);
9709}
9710
9711static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
9712 int insn_idx)
9713{
9714 const char *func_name = meta->func_name, *ref_tname;
9715 const struct btf *btf = meta->btf;
9716 const struct btf_param *args;
9717 u32 i, nargs;
9718 int ret;
9719
9720 args = (const struct btf_param *)(meta->func_proto + 1);
9721 nargs = btf_type_vlen(meta->func_proto);
9722 if (nargs > MAX_BPF_FUNC_REG_ARGS) {
9723 verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
9724 MAX_BPF_FUNC_REG_ARGS);
9725 return -EINVAL;
9726 }
9727
9728 /* Check that BTF function arguments match actual types that the
9729 * verifier sees.
9730 */
9731 for (i = 0; i < nargs; i++) {
9732 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
9733 const struct btf_type *t, *ref_t, *resolve_ret;
9734 enum bpf_arg_type arg_type = ARG_DONTCARE;
9735 u32 regno = i + 1, ref_id, type_size;
9736 bool is_ret_buf_sz = false;
9737 int kf_arg_type;
9738
9739 t = btf_type_skip_modifiers(btf, args[i].type, NULL);
9740
9741 if (is_kfunc_arg_ignore(btf, &args[i]))
9742 continue;
9743
9744 if (btf_type_is_scalar(t)) {
9745 if (reg->type != SCALAR_VALUE) {
9746 verbose(env, "R%d is not a scalar\n", regno);
9747 return -EINVAL;
9748 }
9749
9750 if (is_kfunc_arg_constant(meta->btf, &args[i])) {
9751 if (meta->arg_constant.found) {
9752 verbose(env, "verifier internal error: only one constant argument permitted\n");
9753 return -EFAULT;
9754 }
9755 if (!tnum_is_const(reg->var_off)) {
9756 verbose(env, "R%d must be a known constant\n", regno);
9757 return -EINVAL;
9758 }
9759 ret = mark_chain_precision(env, regno);
9760 if (ret < 0)
9761 return ret;
9762 meta->arg_constant.found = true;
9763 meta->arg_constant.value = reg->var_off.value;
9764 } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
9765 meta->r0_rdonly = true;
9766 is_ret_buf_sz = true;
9767 } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
9768 is_ret_buf_sz = true;
9769 }
9770
9771 if (is_ret_buf_sz) {
9772 if (meta->r0_size) {
9773 verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
9774 return -EINVAL;
9775 }
9776
9777 if (!tnum_is_const(reg->var_off)) {
9778 verbose(env, "R%d is not a const\n", regno);
9779 return -EINVAL;
9780 }
9781
9782 meta->r0_size = reg->var_off.value;
9783 ret = mark_chain_precision(env, regno);
9784 if (ret)
9785 return ret;
9786 }
9787 continue;
9788 }
9789
9790 if (!btf_type_is_ptr(t)) {
9791 verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
9792 return -EINVAL;
9793 }
9794
9795 if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) &&
9796 (register_is_null(reg) || type_may_be_null(reg->type))) {
9797 verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
9798 return -EACCES;
9799 }
9800
9801 if (reg->ref_obj_id) {
9802 if (is_kfunc_release(meta) && meta->ref_obj_id) {
9803 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
9804 regno, reg->ref_obj_id,
9805 meta->ref_obj_id);
9806 return -EFAULT;
9807 }
9808 meta->ref_obj_id = reg->ref_obj_id;
9809 if (is_kfunc_release(meta))
9810 meta->release_regno = regno;
9811 }
9812
9813 ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
9814 ref_tname = btf_name_by_offset(btf, ref_t->name_off);
9815
9816 kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
9817 if (kf_arg_type < 0)
9818 return kf_arg_type;
9819
9820 switch (kf_arg_type) {
9821 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
9822 case KF_ARG_PTR_TO_BTF_ID:
9823 if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
9824 break;
9825
9826 if (!is_trusted_reg(reg)) {
9827 if (!is_kfunc_rcu(meta)) {
9828 verbose(env, "R%d must be referenced or trusted\n", regno);
9829 return -EINVAL;
9830 }
9831 if (!is_rcu_reg(reg)) {
9832 verbose(env, "R%d must be a rcu pointer\n", regno);
9833 return -EINVAL;
9834 }
9835 }
9836
9837 fallthrough;
9838 case KF_ARG_PTR_TO_CTX:
9839 /* Trusted arguments have the same offset checks as release arguments */
9840 arg_type |= OBJ_RELEASE;
9841 break;
9842 case KF_ARG_PTR_TO_KPTR:
9843 case KF_ARG_PTR_TO_DYNPTR:
9844 case KF_ARG_PTR_TO_LIST_HEAD:
9845 case KF_ARG_PTR_TO_LIST_NODE:
9846 case KF_ARG_PTR_TO_RB_ROOT:
9847 case KF_ARG_PTR_TO_RB_NODE:
9848 case KF_ARG_PTR_TO_MEM:
9849 case KF_ARG_PTR_TO_MEM_SIZE:
9850 case KF_ARG_PTR_TO_CALLBACK:
9851 /* Trusted by default */
9852 break;
9853 default:
9854 WARN_ON_ONCE(1);
9855 return -EFAULT;
9856 }
9857
9858 if (is_kfunc_release(meta) && reg->ref_obj_id)
9859 arg_type |= OBJ_RELEASE;
9860 ret = check_func_arg_reg_off(env, reg, regno, arg_type);
9861 if (ret < 0)
9862 return ret;
9863
9864 switch (kf_arg_type) {
9865 case KF_ARG_PTR_TO_CTX:
9866 if (reg->type != PTR_TO_CTX) {
9867 verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
9868 return -EINVAL;
9869 }
9870
9871 if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
9872 ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
9873 if (ret < 0)
9874 return -EINVAL;
9875 meta->ret_btf_id = ret;
9876 }
9877 break;
9878 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
9879 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
9880 verbose(env, "arg#%d expected pointer to allocated object\n", i);
9881 return -EINVAL;
9882 }
9883 if (!reg->ref_obj_id) {
9884 verbose(env, "allocated object must be referenced\n");
9885 return -EINVAL;
9886 }
9887 if (meta->btf == btf_vmlinux &&
9888 meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
9889 meta->arg_obj_drop.btf = reg->btf;
9890 meta->arg_obj_drop.btf_id = reg->btf_id;
9891 }
9892 break;
9893 case KF_ARG_PTR_TO_KPTR:
9894 if (reg->type != PTR_TO_MAP_VALUE) {
9895 verbose(env, "arg#0 expected pointer to map value\n");
9896 return -EINVAL;
9897 }
9898 ret = process_kf_arg_ptr_to_kptr(env, reg, ref_t, ref_tname, meta, i);
9899 if (ret < 0)
9900 return ret;
9901 break;
9902 case KF_ARG_PTR_TO_DYNPTR:
9903 {
9904 enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
9905
9906 if (reg->type != PTR_TO_STACK &&
9907 reg->type != CONST_PTR_TO_DYNPTR) {
9908 verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
9909 return -EINVAL;
9910 }
9911
9912 if (reg->type == CONST_PTR_TO_DYNPTR)
9913 dynptr_arg_type |= MEM_RDONLY;
9914
9915 if (is_kfunc_arg_uninit(btf, &args[i]))
9916 dynptr_arg_type |= MEM_UNINIT;
9917
9918 if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb])
9919 dynptr_arg_type |= DYNPTR_TYPE_SKB;
9920 else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp])
9921 dynptr_arg_type |= DYNPTR_TYPE_XDP;
9922
9923 ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type);
9924 if (ret < 0)
9925 return ret;
9926
9927 if (!(dynptr_arg_type & MEM_UNINIT)) {
9928 int id = dynptr_id(env, reg);
9929
9930 if (id < 0) {
9931 verbose(env, "verifier internal error: failed to obtain dynptr id\n");
9932 return id;
9933 }
9934 meta->initialized_dynptr.id = id;
9935 meta->initialized_dynptr.type = dynptr_get_type(env, reg);
9936 }
9937
9938 break;
9939 }
9940 case KF_ARG_PTR_TO_LIST_HEAD:
9941 if (reg->type != PTR_TO_MAP_VALUE &&
9942 reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
9943 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
9944 return -EINVAL;
9945 }
9946 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
9947 verbose(env, "allocated object must be referenced\n");
9948 return -EINVAL;
9949 }
9950 ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
9951 if (ret < 0)
9952 return ret;
9953 break;
9954 case KF_ARG_PTR_TO_RB_ROOT:
9955 if (reg->type != PTR_TO_MAP_VALUE &&
9956 reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
9957 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
9958 return -EINVAL;
9959 }
9960 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
9961 verbose(env, "allocated object must be referenced\n");
9962 return -EINVAL;
9963 }
9964 ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
9965 if (ret < 0)
9966 return ret;
9967 break;
9968 case KF_ARG_PTR_TO_LIST_NODE:
9969 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
9970 verbose(env, "arg#%d expected pointer to allocated object\n", i);
9971 return -EINVAL;
9972 }
9973 if (!reg->ref_obj_id) {
9974 verbose(env, "allocated object must be referenced\n");
9975 return -EINVAL;
9976 }
9977 ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
9978 if (ret < 0)
9979 return ret;
9980 break;
9981 case KF_ARG_PTR_TO_RB_NODE:
9982 if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
9983 if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
9984 verbose(env, "rbtree_remove node input must be non-owning ref\n");
9985 return -EINVAL;
9986 }
9987 if (in_rbtree_lock_required_cb(env)) {
9988 verbose(env, "rbtree_remove not allowed in rbtree cb\n");
9989 return -EINVAL;
9990 }
9991 } else {
9992 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
9993 verbose(env, "arg#%d expected pointer to allocated object\n", i);
9994 return -EINVAL;
9995 }
9996 if (!reg->ref_obj_id) {
9997 verbose(env, "allocated object must be referenced\n");
9998 return -EINVAL;
9999 }
10000 }
10001
10002 ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
10003 if (ret < 0)
10004 return ret;
10005 break;
10006 case KF_ARG_PTR_TO_BTF_ID:
10007 /* Only base_type is checked, further checks are done here */
10008 if ((base_type(reg->type) != PTR_TO_BTF_ID ||
10009 (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
10010 !reg2btf_ids[base_type(reg->type)]) {
10011 verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
10012 verbose(env, "expected %s or socket\n",
10013 reg_type_str(env, base_type(reg->type) |
10014 (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
10015 return -EINVAL;
10016 }
10017 ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
10018 if (ret < 0)
10019 return ret;
10020 break;
10021 case KF_ARG_PTR_TO_MEM:
10022 resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
10023 if (IS_ERR(resolve_ret)) {
10024 verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
10025 i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
10026 return -EINVAL;
10027 }
10028 ret = check_mem_reg(env, reg, regno, type_size);
10029 if (ret < 0)
10030 return ret;
10031 break;
10032 case KF_ARG_PTR_TO_MEM_SIZE:
10033 {
10034 struct bpf_reg_state *size_reg = &regs[regno + 1];
10035 const struct btf_param *size_arg = &args[i + 1];
10036
10037 ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
10038 if (ret < 0) {
10039 verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
10040 return ret;
10041 }
10042
10043 if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
10044 if (meta->arg_constant.found) {
10045 verbose(env, "verifier internal error: only one constant argument permitted\n");
10046 return -EFAULT;
10047 }
10048 if (!tnum_is_const(size_reg->var_off)) {
10049 verbose(env, "R%d must be a known constant\n", regno + 1);
10050 return -EINVAL;
10051 }
10052 meta->arg_constant.found = true;
10053 meta->arg_constant.value = size_reg->var_off.value;
10054 }
10055
10056 /* Skip next '__sz' or '__szk' argument */
10057 i++;
10058 break;
10059 }
10060 case KF_ARG_PTR_TO_CALLBACK:
10061 meta->subprogno = reg->subprogno;
10062 break;
10063 }
10064 }
10065
10066 if (is_kfunc_release(meta) && !meta->release_regno) {
10067 verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
10068 func_name);
10069 return -EINVAL;
10070 }
10071
10072 return 0;
10073}
10074
10075static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10076 int *insn_idx_p)
10077{
10078 const struct btf_type *t, *func, *func_proto, *ptr_type;
10079 u32 i, nargs, func_id, ptr_type_id, release_ref_obj_id;
10080 struct bpf_reg_state *regs = cur_regs(env);
10081 const char *func_name, *ptr_type_name;
10082 bool sleepable, rcu_lock, rcu_unlock;
10083 struct bpf_kfunc_call_arg_meta meta;
10084 int err, insn_idx = *insn_idx_p;
10085 const struct btf_param *args;
10086 const struct btf_type *ret_t;
10087 struct btf *desc_btf;
10088 u32 *kfunc_flags;
10089
10090 /* skip for now, but return error when we find this in fixup_kfunc_call */
10091 if (!insn->imm)
10092 return 0;
10093
10094 desc_btf = find_kfunc_desc_btf(env, insn->off);
10095 if (IS_ERR(desc_btf))
10096 return PTR_ERR(desc_btf);
10097
10098 func_id = insn->imm;
10099 func = btf_type_by_id(desc_btf, func_id);
10100 func_name = btf_name_by_offset(desc_btf, func->name_off);
10101 func_proto = btf_type_by_id(desc_btf, func->type);
10102
10103 kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id);
10104 if (!kfunc_flags) {
10105 verbose(env, "calling kernel function %s is not allowed\n",
10106 func_name);
10107 return -EACCES;
10108 }
10109
10110 /* Prepare kfunc call metadata */
10111 memset(&meta, 0, sizeof(meta));
10112 meta.btf = desc_btf;
10113 meta.func_id = func_id;
10114 meta.kfunc_flags = *kfunc_flags;
10115 meta.func_proto = func_proto;
10116 meta.func_name = func_name;
10117
10118 if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
10119 verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
10120 return -EACCES;
10121 }
10122
10123 sleepable = is_kfunc_sleepable(&meta);
10124 if (sleepable && !env->prog->aux->sleepable) {
10125 verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
10126 return -EACCES;
10127 }
10128
10129 rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
10130 rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
10131
10132 if (env->cur_state->active_rcu_lock) {
10133 struct bpf_func_state *state;
10134 struct bpf_reg_state *reg;
10135
10136 if (rcu_lock) {
10137 verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
10138 return -EINVAL;
10139 } else if (rcu_unlock) {
10140 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
10141 if (reg->type & MEM_RCU) {
10142 reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
10143 reg->type |= PTR_UNTRUSTED;
10144 }
10145 }));
10146 env->cur_state->active_rcu_lock = false;
10147 } else if (sleepable) {
10148 verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
10149 return -EACCES;
10150 }
10151 } else if (rcu_lock) {
10152 env->cur_state->active_rcu_lock = true;
10153 } else if (rcu_unlock) {
10154 verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
10155 return -EINVAL;
10156 }
10157
10158 /* Check the arguments */
10159 err = check_kfunc_args(env, &meta, insn_idx);
10160 if (err < 0)
10161 return err;
10162 /* In case of release function, we get register number of refcounted
10163 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
10164 */
10165 if (meta.release_regno) {
10166 err = release_reference(env, regs[meta.release_regno].ref_obj_id);
10167 if (err) {
10168 verbose(env, "kfunc %s#%d reference has not been acquired before\n",
10169 func_name, func_id);
10170 return err;
10171 }
10172 }
10173
10174 if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front] ||
10175 meta.func_id == special_kfunc_list[KF_bpf_list_push_back] ||
10176 meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
10177 release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
10178 err = ref_convert_owning_non_owning(env, release_ref_obj_id);
10179 if (err) {
10180 verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
10181 func_name, func_id);
10182 return err;
10183 }
10184
10185 err = release_reference(env, release_ref_obj_id);
10186 if (err) {
10187 verbose(env, "kfunc %s#%d reference has not been acquired before\n",
10188 func_name, func_id);
10189 return err;
10190 }
10191 }
10192
10193 if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
10194 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
10195 set_rbtree_add_callback_state);
10196 if (err) {
10197 verbose(env, "kfunc %s#%d failed callback verification\n",
10198 func_name, func_id);
10199 return err;
10200 }
10201 }
10202
10203 for (i = 0; i < CALLER_SAVED_REGS; i++)
10204 mark_reg_not_init(env, regs, caller_saved[i]);
10205
10206 /* Check return type */
10207 t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
10208
10209 if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
10210 /* Only exception is bpf_obj_new_impl */
10211 if (meta.btf != btf_vmlinux || meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl]) {
10212 verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
10213 return -EINVAL;
10214 }
10215 }
10216
10217 if (btf_type_is_scalar(t)) {
10218 mark_reg_unknown(env, regs, BPF_REG_0);
10219 mark_btf_func_reg_size(env, BPF_REG_0, t->size);
10220 } else if (btf_type_is_ptr(t)) {
10221 ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
10222
10223 if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
10224 if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
10225 struct btf *ret_btf;
10226 u32 ret_btf_id;
10227
10228 if (unlikely(!bpf_global_ma_set))
10229 return -ENOMEM;
10230
10231 if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
10232 verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
10233 return -EINVAL;
10234 }
10235
10236 ret_btf = env->prog->aux->btf;
10237 ret_btf_id = meta.arg_constant.value;
10238
10239 /* This may be NULL due to user not supplying a BTF */
10240 if (!ret_btf) {
10241 verbose(env, "bpf_obj_new requires prog BTF\n");
10242 return -EINVAL;
10243 }
10244
10245 ret_t = btf_type_by_id(ret_btf, ret_btf_id);
10246 if (!ret_t || !__btf_type_is_struct(ret_t)) {
10247 verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
10248 return -EINVAL;
10249 }
10250
10251 mark_reg_known_zero(env, regs, BPF_REG_0);
10252 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
10253 regs[BPF_REG_0].btf = ret_btf;
10254 regs[BPF_REG_0].btf_id = ret_btf_id;
10255
10256 env->insn_aux_data[insn_idx].obj_new_size = ret_t->size;
10257 env->insn_aux_data[insn_idx].kptr_struct_meta =
10258 btf_find_struct_meta(ret_btf, ret_btf_id);
10259 } else if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
10260 env->insn_aux_data[insn_idx].kptr_struct_meta =
10261 btf_find_struct_meta(meta.arg_obj_drop.btf,
10262 meta.arg_obj_drop.btf_id);
10263 } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
10264 meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
10265 struct btf_field *field = meta.arg_list_head.field;
10266
10267 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
10268 } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
10269 meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
10270 struct btf_field *field = meta.arg_rbtree_root.field;
10271
10272 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
10273 } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
10274 mark_reg_known_zero(env, regs, BPF_REG_0);
10275 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
10276 regs[BPF_REG_0].btf = desc_btf;
10277 regs[BPF_REG_0].btf_id = meta.ret_btf_id;
10278 } else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
10279 ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
10280 if (!ret_t || !btf_type_is_struct(ret_t)) {
10281 verbose(env,
10282 "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
10283 return -EINVAL;
10284 }
10285
10286 mark_reg_known_zero(env, regs, BPF_REG_0);
10287 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
10288 regs[BPF_REG_0].btf = desc_btf;
10289 regs[BPF_REG_0].btf_id = meta.arg_constant.value;
10290 } else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
10291 meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
10292 enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
10293
10294 mark_reg_known_zero(env, regs, BPF_REG_0);
10295
10296 if (!meta.arg_constant.found) {
10297 verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
10298 return -EFAULT;
10299 }
10300
10301 regs[BPF_REG_0].mem_size = meta.arg_constant.value;
10302
10303 /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
10304 regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
10305
10306 if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
10307 regs[BPF_REG_0].type |= MEM_RDONLY;
10308 } else {
10309 /* this will set env->seen_direct_write to true */
10310 if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
10311 verbose(env, "the prog does not allow writes to packet data\n");
10312 return -EINVAL;
10313 }
10314 }
10315
10316 if (!meta.initialized_dynptr.id) {
10317 verbose(env, "verifier internal error: no dynptr id\n");
10318 return -EFAULT;
10319 }
10320 regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
10321
10322 /* we don't need to set BPF_REG_0's ref obj id
10323 * because packet slices are not refcounted (see
10324 * dynptr_type_refcounted)
10325 */
10326 } else {
10327 verbose(env, "kernel function %s unhandled dynamic return type\n",
10328 meta.func_name);
10329 return -EFAULT;
10330 }
10331 } else if (!__btf_type_is_struct(ptr_type)) {
10332 if (!meta.r0_size) {
10333 ptr_type_name = btf_name_by_offset(desc_btf,
10334 ptr_type->name_off);
10335 verbose(env,
10336 "kernel function %s returns pointer type %s %s is not supported\n",
10337 func_name,
10338 btf_type_str(ptr_type),
10339 ptr_type_name);
10340 return -EINVAL;
10341 }
10342
10343 mark_reg_known_zero(env, regs, BPF_REG_0);
10344 regs[BPF_REG_0].type = PTR_TO_MEM;
10345 regs[BPF_REG_0].mem_size = meta.r0_size;
10346
10347 if (meta.r0_rdonly)
10348 regs[BPF_REG_0].type |= MEM_RDONLY;
10349
10350 /* Ensures we don't access the memory after a release_reference() */
10351 if (meta.ref_obj_id)
10352 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
10353 } else {
10354 mark_reg_known_zero(env, regs, BPF_REG_0);
10355 regs[BPF_REG_0].btf = desc_btf;
10356 regs[BPF_REG_0].type = PTR_TO_BTF_ID;
10357 regs[BPF_REG_0].btf_id = ptr_type_id;
10358 }
10359
10360 if (is_kfunc_ret_null(&meta)) {
10361 regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
10362 /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
10363 regs[BPF_REG_0].id = ++env->id_gen;
10364 }
10365 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
10366 if (is_kfunc_acquire(&meta)) {
10367 int id = acquire_reference_state(env, insn_idx);
10368
10369 if (id < 0)
10370 return id;
10371 if (is_kfunc_ret_null(&meta))
10372 regs[BPF_REG_0].id = id;
10373 regs[BPF_REG_0].ref_obj_id = id;
10374 } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
10375 ref_set_non_owning(env, &regs[BPF_REG_0]);
10376 }
10377
10378 if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove])
10379 invalidate_non_owning_refs(env);
10380
10381 if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
10382 regs[BPF_REG_0].id = ++env->id_gen;
10383 } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
10384
10385 nargs = btf_type_vlen(func_proto);
10386 args = (const struct btf_param *)(func_proto + 1);
10387 for (i = 0; i < nargs; i++) {
10388 u32 regno = i + 1;
10389
10390 t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
10391 if (btf_type_is_ptr(t))
10392 mark_btf_func_reg_size(env, regno, sizeof(void *));
10393 else
10394 /* scalar. ensured by btf_check_kfunc_arg_match() */
10395 mark_btf_func_reg_size(env, regno, t->size);
10396 }
10397
10398 return 0;
10399}
10400
10401static bool signed_add_overflows(s64 a, s64 b)
10402{
10403 /* Do the add in u64, where overflow is well-defined */
10404 s64 res = (s64)((u64)a + (u64)b);
10405
10406 if (b < 0)
10407 return res > a;
10408 return res < a;
10409}
10410
10411static bool signed_add32_overflows(s32 a, s32 b)
10412{
10413 /* Do the add in u32, where overflow is well-defined */
10414 s32 res = (s32)((u32)a + (u32)b);
10415
10416 if (b < 0)
10417 return res > a;
10418 return res < a;
10419}
10420
10421static bool signed_sub_overflows(s64 a, s64 b)
10422{
10423 /* Do the sub in u64, where overflow is well-defined */
10424 s64 res = (s64)((u64)a - (u64)b);
10425
10426 if (b < 0)
10427 return res < a;
10428 return res > a;
10429}
10430
10431static bool signed_sub32_overflows(s32 a, s32 b)
10432{
10433 /* Do the sub in u32, where overflow is well-defined */
10434 s32 res = (s32)((u32)a - (u32)b);
10435
10436 if (b < 0)
10437 return res < a;
10438 return res > a;
10439}
10440
10441static bool check_reg_sane_offset(struct bpf_verifier_env *env,
10442 const struct bpf_reg_state *reg,
10443 enum bpf_reg_type type)
10444{
10445 bool known = tnum_is_const(reg->var_off);
10446 s64 val = reg->var_off.value;
10447 s64 smin = reg->smin_value;
10448
10449 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
10450 verbose(env, "math between %s pointer and %lld is not allowed\n",
10451 reg_type_str(env, type), val);
10452 return false;
10453 }
10454
10455 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
10456 verbose(env, "%s pointer offset %d is not allowed\n",
10457 reg_type_str(env, type), reg->off);
10458 return false;
10459 }
10460
10461 if (smin == S64_MIN) {
10462 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
10463 reg_type_str(env, type));
10464 return false;
10465 }
10466
10467 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
10468 verbose(env, "value %lld makes %s pointer be out of bounds\n",
10469 smin, reg_type_str(env, type));
10470 return false;
10471 }
10472
10473 return true;
10474}
10475
10476enum {
10477 REASON_BOUNDS = -1,
10478 REASON_TYPE = -2,
10479 REASON_PATHS = -3,
10480 REASON_LIMIT = -4,
10481 REASON_STACK = -5,
10482};
10483
10484static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
10485 u32 *alu_limit, bool mask_to_left)
10486{
10487 u32 max = 0, ptr_limit = 0;
10488
10489 switch (ptr_reg->type) {
10490 case PTR_TO_STACK:
10491 /* Offset 0 is out-of-bounds, but acceptable start for the
10492 * left direction, see BPF_REG_FP. Also, unknown scalar
10493 * offset where we would need to deal with min/max bounds is
10494 * currently prohibited for unprivileged.
10495 */
10496 max = MAX_BPF_STACK + mask_to_left;
10497 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
10498 break;
10499 case PTR_TO_MAP_VALUE:
10500 max = ptr_reg->map_ptr->value_size;
10501 ptr_limit = (mask_to_left ?
10502 ptr_reg->smin_value :
10503 ptr_reg->umax_value) + ptr_reg->off;
10504 break;
10505 default:
10506 return REASON_TYPE;
10507 }
10508
10509 if (ptr_limit >= max)
10510 return REASON_LIMIT;
10511 *alu_limit = ptr_limit;
10512 return 0;
10513}
10514
10515static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
10516 const struct bpf_insn *insn)
10517{
10518 return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
10519}
10520
10521static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
10522 u32 alu_state, u32 alu_limit)
10523{
10524 /* If we arrived here from different branches with different
10525 * state or limits to sanitize, then this won't work.
10526 */
10527 if (aux->alu_state &&
10528 (aux->alu_state != alu_state ||
10529 aux->alu_limit != alu_limit))
10530 return REASON_PATHS;
10531
10532 /* Corresponding fixup done in do_misc_fixups(). */
10533 aux->alu_state = alu_state;
10534 aux->alu_limit = alu_limit;
10535 return 0;
10536}
10537
10538static int sanitize_val_alu(struct bpf_verifier_env *env,
10539 struct bpf_insn *insn)
10540{
10541 struct bpf_insn_aux_data *aux = cur_aux(env);
10542
10543 if (can_skip_alu_sanitation(env, insn))
10544 return 0;
10545
10546 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
10547}
10548
10549static bool sanitize_needed(u8 opcode)
10550{
10551 return opcode == BPF_ADD || opcode == BPF_SUB;
10552}
10553
10554struct bpf_sanitize_info {
10555 struct bpf_insn_aux_data aux;
10556 bool mask_to_left;
10557};
10558
10559static struct bpf_verifier_state *
10560sanitize_speculative_path(struct bpf_verifier_env *env,
10561 const struct bpf_insn *insn,
10562 u32 next_idx, u32 curr_idx)
10563{
10564 struct bpf_verifier_state *branch;
10565 struct bpf_reg_state *regs;
10566
10567 branch = push_stack(env, next_idx, curr_idx, true);
10568 if (branch && insn) {
10569 regs = branch->frame[branch->curframe]->regs;
10570 if (BPF_SRC(insn->code) == BPF_K) {
10571 mark_reg_unknown(env, regs, insn->dst_reg);
10572 } else if (BPF_SRC(insn->code) == BPF_X) {
10573 mark_reg_unknown(env, regs, insn->dst_reg);
10574 mark_reg_unknown(env, regs, insn->src_reg);
10575 }
10576 }
10577 return branch;
10578}
10579
10580static int sanitize_ptr_alu(struct bpf_verifier_env *env,
10581 struct bpf_insn *insn,
10582 const struct bpf_reg_state *ptr_reg,
10583 const struct bpf_reg_state *off_reg,
10584 struct bpf_reg_state *dst_reg,
10585 struct bpf_sanitize_info *info,
10586 const bool commit_window)
10587{
10588 struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
10589 struct bpf_verifier_state *vstate = env->cur_state;
10590 bool off_is_imm = tnum_is_const(off_reg->var_off);
10591 bool off_is_neg = off_reg->smin_value < 0;
10592 bool ptr_is_dst_reg = ptr_reg == dst_reg;
10593 u8 opcode = BPF_OP(insn->code);
10594 u32 alu_state, alu_limit;
10595 struct bpf_reg_state tmp;
10596 bool ret;
10597 int err;
10598
10599 if (can_skip_alu_sanitation(env, insn))
10600 return 0;
10601
10602 /* We already marked aux for masking from non-speculative
10603 * paths, thus we got here in the first place. We only care
10604 * to explore bad access from here.
10605 */
10606 if (vstate->speculative)
10607 goto do_sim;
10608
10609 if (!commit_window) {
10610 if (!tnum_is_const(off_reg->var_off) &&
10611 (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
10612 return REASON_BOUNDS;
10613
10614 info->mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
10615 (opcode == BPF_SUB && !off_is_neg);
10616 }
10617
10618 err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
10619 if (err < 0)
10620 return err;
10621
10622 if (commit_window) {
10623 /* In commit phase we narrow the masking window based on
10624 * the observed pointer move after the simulated operation.
10625 */
10626 alu_state = info->aux.alu_state;
10627 alu_limit = abs(info->aux.alu_limit - alu_limit);
10628 } else {
10629 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
10630 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
10631 alu_state |= ptr_is_dst_reg ?
10632 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
10633
10634 /* Limit pruning on unknown scalars to enable deep search for
10635 * potential masking differences from other program paths.
10636 */
10637 if (!off_is_imm)
10638 env->explore_alu_limits = true;
10639 }
10640
10641 err = update_alu_sanitation_state(aux, alu_state, alu_limit);
10642 if (err < 0)
10643 return err;
10644do_sim:
10645 /* If we're in commit phase, we're done here given we already
10646 * pushed the truncated dst_reg into the speculative verification
10647 * stack.
10648 *
10649 * Also, when register is a known constant, we rewrite register-based
10650 * operation to immediate-based, and thus do not need masking (and as
10651 * a consequence, do not need to simulate the zero-truncation either).
10652 */
10653 if (commit_window || off_is_imm)
10654 return 0;
10655
10656 /* Simulate and find potential out-of-bounds access under
10657 * speculative execution from truncation as a result of
10658 * masking when off was not within expected range. If off
10659 * sits in dst, then we temporarily need to move ptr there
10660 * to simulate dst (== 0) +/-= ptr. Needed, for example,
10661 * for cases where we use K-based arithmetic in one direction
10662 * and truncated reg-based in the other in order to explore
10663 * bad access.
10664 */
10665 if (!ptr_is_dst_reg) {
10666 tmp = *dst_reg;
10667 copy_register_state(dst_reg, ptr_reg);
10668 }
10669 ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
10670 env->insn_idx);
10671 if (!ptr_is_dst_reg && ret)
10672 *dst_reg = tmp;
10673 return !ret ? REASON_STACK : 0;
10674}
10675
10676static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
10677{
10678 struct bpf_verifier_state *vstate = env->cur_state;
10679
10680 /* If we simulate paths under speculation, we don't update the
10681 * insn as 'seen' such that when we verify unreachable paths in
10682 * the non-speculative domain, sanitize_dead_code() can still
10683 * rewrite/sanitize them.
10684 */
10685 if (!vstate->speculative)
10686 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
10687}
10688
10689static int sanitize_err(struct bpf_verifier_env *env,
10690 const struct bpf_insn *insn, int reason,
10691 const struct bpf_reg_state *off_reg,
10692 const struct bpf_reg_state *dst_reg)
10693{
10694 static const char *err = "pointer arithmetic with it prohibited for !root";
10695 const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
10696 u32 dst = insn->dst_reg, src = insn->src_reg;
10697
10698 switch (reason) {
10699 case REASON_BOUNDS:
10700 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
10701 off_reg == dst_reg ? dst : src, err);
10702 break;
10703 case REASON_TYPE:
10704 verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
10705 off_reg == dst_reg ? src : dst, err);
10706 break;
10707 case REASON_PATHS:
10708 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
10709 dst, op, err);
10710 break;
10711 case REASON_LIMIT:
10712 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
10713 dst, op, err);
10714 break;
10715 case REASON_STACK:
10716 verbose(env, "R%d could not be pushed for speculative verification, %s\n",
10717 dst, err);
10718 break;
10719 default:
10720 verbose(env, "verifier internal error: unknown reason (%d)\n",
10721 reason);
10722 break;
10723 }
10724
10725 return -EACCES;
10726}
10727
10728/* check that stack access falls within stack limits and that 'reg' doesn't
10729 * have a variable offset.
10730 *
10731 * Variable offset is prohibited for unprivileged mode for simplicity since it
10732 * requires corresponding support in Spectre masking for stack ALU. See also
10733 * retrieve_ptr_limit().
10734 *
10735 *
10736 * 'off' includes 'reg->off'.
10737 */
10738static int check_stack_access_for_ptr_arithmetic(
10739 struct bpf_verifier_env *env,
10740 int regno,
10741 const struct bpf_reg_state *reg,
10742 int off)
10743{
10744 if (!tnum_is_const(reg->var_off)) {
10745 char tn_buf[48];
10746
10747 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
10748 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
10749 regno, tn_buf, off);
10750 return -EACCES;
10751 }
10752
10753 if (off >= 0 || off < -MAX_BPF_STACK) {
10754 verbose(env, "R%d stack pointer arithmetic goes out of range, "
10755 "prohibited for !root; off=%d\n", regno, off);
10756 return -EACCES;
10757 }
10758
10759 return 0;
10760}
10761
10762static int sanitize_check_bounds(struct bpf_verifier_env *env,
10763 const struct bpf_insn *insn,
10764 const struct bpf_reg_state *dst_reg)
10765{
10766 u32 dst = insn->dst_reg;
10767
10768 /* For unprivileged we require that resulting offset must be in bounds
10769 * in order to be able to sanitize access later on.
10770 */
10771 if (env->bypass_spec_v1)
10772 return 0;
10773
10774 switch (dst_reg->type) {
10775 case PTR_TO_STACK:
10776 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
10777 dst_reg->off + dst_reg->var_off.value))
10778 return -EACCES;
10779 break;
10780 case PTR_TO_MAP_VALUE:
10781 if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
10782 verbose(env, "R%d pointer arithmetic of map value goes out of range, "
10783 "prohibited for !root\n", dst);
10784 return -EACCES;
10785 }
10786 break;
10787 default:
10788 break;
10789 }
10790
10791 return 0;
10792}
10793
10794/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
10795 * Caller should also handle BPF_MOV case separately.
10796 * If we return -EACCES, caller may want to try again treating pointer as a
10797 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
10798 */
10799static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
10800 struct bpf_insn *insn,
10801 const struct bpf_reg_state *ptr_reg,
10802 const struct bpf_reg_state *off_reg)
10803{
10804 struct bpf_verifier_state *vstate = env->cur_state;
10805 struct bpf_func_state *state = vstate->frame[vstate->curframe];
10806 struct bpf_reg_state *regs = state->regs, *dst_reg;
10807 bool known = tnum_is_const(off_reg->var_off);
10808 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
10809 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
10810 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
10811 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
10812 struct bpf_sanitize_info info = {};
10813 u8 opcode = BPF_OP(insn->code);
10814 u32 dst = insn->dst_reg;
10815 int ret;
10816
10817 dst_reg = &regs[dst];
10818
10819 if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
10820 smin_val > smax_val || umin_val > umax_val) {
10821 /* Taint dst register if offset had invalid bounds derived from
10822 * e.g. dead branches.
10823 */
10824 __mark_reg_unknown(env, dst_reg);
10825 return 0;
10826 }
10827
10828 if (BPF_CLASS(insn->code) != BPF_ALU64) {
10829 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
10830 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
10831 __mark_reg_unknown(env, dst_reg);
10832 return 0;
10833 }
10834
10835 verbose(env,
10836 "R%d 32-bit pointer arithmetic prohibited\n",
10837 dst);
10838 return -EACCES;
10839 }
10840
10841 if (ptr_reg->type & PTR_MAYBE_NULL) {
10842 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
10843 dst, reg_type_str(env, ptr_reg->type));
10844 return -EACCES;
10845 }
10846
10847 switch (base_type(ptr_reg->type)) {
10848 case CONST_PTR_TO_MAP:
10849 /* smin_val represents the known value */
10850 if (known && smin_val == 0 && opcode == BPF_ADD)
10851 break;
10852 fallthrough;
10853 case PTR_TO_PACKET_END:
10854 case PTR_TO_SOCKET:
10855 case PTR_TO_SOCK_COMMON:
10856 case PTR_TO_TCP_SOCK:
10857 case PTR_TO_XDP_SOCK:
10858 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
10859 dst, reg_type_str(env, ptr_reg->type));
10860 return -EACCES;
10861 default:
10862 break;
10863 }
10864
10865 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
10866 * The id may be overwritten later if we create a new variable offset.
10867 */
10868 dst_reg->type = ptr_reg->type;
10869 dst_reg->id = ptr_reg->id;
10870
10871 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
10872 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
10873 return -EINVAL;
10874
10875 /* pointer types do not carry 32-bit bounds at the moment. */
10876 __mark_reg32_unbounded(dst_reg);
10877
10878 if (sanitize_needed(opcode)) {
10879 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
10880 &info, false);
10881 if (ret < 0)
10882 return sanitize_err(env, insn, ret, off_reg, dst_reg);
10883 }
10884
10885 switch (opcode) {
10886 case BPF_ADD:
10887 /* We can take a fixed offset as long as it doesn't overflow
10888 * the s32 'off' field
10889 */
10890 if (known && (ptr_reg->off + smin_val ==
10891 (s64)(s32)(ptr_reg->off + smin_val))) {
10892 /* pointer += K. Accumulate it into fixed offset */
10893 dst_reg->smin_value = smin_ptr;
10894 dst_reg->smax_value = smax_ptr;
10895 dst_reg->umin_value = umin_ptr;
10896 dst_reg->umax_value = umax_ptr;
10897 dst_reg->var_off = ptr_reg->var_off;
10898 dst_reg->off = ptr_reg->off + smin_val;
10899 dst_reg->raw = ptr_reg->raw;
10900 break;
10901 }
10902 /* A new variable offset is created. Note that off_reg->off
10903 * == 0, since it's a scalar.
10904 * dst_reg gets the pointer type and since some positive
10905 * integer value was added to the pointer, give it a new 'id'
10906 * if it's a PTR_TO_PACKET.
10907 * this creates a new 'base' pointer, off_reg (variable) gets
10908 * added into the variable offset, and we copy the fixed offset
10909 * from ptr_reg.
10910 */
10911 if (signed_add_overflows(smin_ptr, smin_val) ||
10912 signed_add_overflows(smax_ptr, smax_val)) {
10913 dst_reg->smin_value = S64_MIN;
10914 dst_reg->smax_value = S64_MAX;
10915 } else {
10916 dst_reg->smin_value = smin_ptr + smin_val;
10917 dst_reg->smax_value = smax_ptr + smax_val;
10918 }
10919 if (umin_ptr + umin_val < umin_ptr ||
10920 umax_ptr + umax_val < umax_ptr) {
10921 dst_reg->umin_value = 0;
10922 dst_reg->umax_value = U64_MAX;
10923 } else {
10924 dst_reg->umin_value = umin_ptr + umin_val;
10925 dst_reg->umax_value = umax_ptr + umax_val;
10926 }
10927 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
10928 dst_reg->off = ptr_reg->off;
10929 dst_reg->raw = ptr_reg->raw;
10930 if (reg_is_pkt_pointer(ptr_reg)) {
10931 dst_reg->id = ++env->id_gen;
10932 /* something was added to pkt_ptr, set range to zero */
10933 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
10934 }
10935 break;
10936 case BPF_SUB:
10937 if (dst_reg == off_reg) {
10938 /* scalar -= pointer. Creates an unknown scalar */
10939 verbose(env, "R%d tried to subtract pointer from scalar\n",
10940 dst);
10941 return -EACCES;
10942 }
10943 /* We don't allow subtraction from FP, because (according to
10944 * test_verifier.c test "invalid fp arithmetic", JITs might not
10945 * be able to deal with it.
10946 */
10947 if (ptr_reg->type == PTR_TO_STACK) {
10948 verbose(env, "R%d subtraction from stack pointer prohibited\n",
10949 dst);
10950 return -EACCES;
10951 }
10952 if (known && (ptr_reg->off - smin_val ==
10953 (s64)(s32)(ptr_reg->off - smin_val))) {
10954 /* pointer -= K. Subtract it from fixed offset */
10955 dst_reg->smin_value = smin_ptr;
10956 dst_reg->smax_value = smax_ptr;
10957 dst_reg->umin_value = umin_ptr;
10958 dst_reg->umax_value = umax_ptr;
10959 dst_reg->var_off = ptr_reg->var_off;
10960 dst_reg->id = ptr_reg->id;
10961 dst_reg->off = ptr_reg->off - smin_val;
10962 dst_reg->raw = ptr_reg->raw;
10963 break;
10964 }
10965 /* A new variable offset is created. If the subtrahend is known
10966 * nonnegative, then any reg->range we had before is still good.
10967 */
10968 if (signed_sub_overflows(smin_ptr, smax_val) ||
10969 signed_sub_overflows(smax_ptr, smin_val)) {
10970 /* Overflow possible, we know nothing */
10971 dst_reg->smin_value = S64_MIN;
10972 dst_reg->smax_value = S64_MAX;
10973 } else {
10974 dst_reg->smin_value = smin_ptr - smax_val;
10975 dst_reg->smax_value = smax_ptr - smin_val;
10976 }
10977 if (umin_ptr < umax_val) {
10978 /* Overflow possible, we know nothing */
10979 dst_reg->umin_value = 0;
10980 dst_reg->umax_value = U64_MAX;
10981 } else {
10982 /* Cannot overflow (as long as bounds are consistent) */
10983 dst_reg->umin_value = umin_ptr - umax_val;
10984 dst_reg->umax_value = umax_ptr - umin_val;
10985 }
10986 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
10987 dst_reg->off = ptr_reg->off;
10988 dst_reg->raw = ptr_reg->raw;
10989 if (reg_is_pkt_pointer(ptr_reg)) {
10990 dst_reg->id = ++env->id_gen;
10991 /* something was added to pkt_ptr, set range to zero */
10992 if (smin_val < 0)
10993 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
10994 }
10995 break;
10996 case BPF_AND:
10997 case BPF_OR:
10998 case BPF_XOR:
10999 /* bitwise ops on pointers are troublesome, prohibit. */
11000 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
11001 dst, bpf_alu_string[opcode >> 4]);
11002 return -EACCES;
11003 default:
11004 /* other operators (e.g. MUL,LSH) produce non-pointer results */
11005 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
11006 dst, bpf_alu_string[opcode >> 4]);
11007 return -EACCES;
11008 }
11009
11010 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
11011 return -EINVAL;
11012 reg_bounds_sync(dst_reg);
11013 if (sanitize_check_bounds(env, insn, dst_reg) < 0)
11014 return -EACCES;
11015 if (sanitize_needed(opcode)) {
11016 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
11017 &info, true);
11018 if (ret < 0)
11019 return sanitize_err(env, insn, ret, off_reg, dst_reg);
11020 }
11021
11022 return 0;
11023}
11024
11025static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
11026 struct bpf_reg_state *src_reg)
11027{
11028 s32 smin_val = src_reg->s32_min_value;
11029 s32 smax_val = src_reg->s32_max_value;
11030 u32 umin_val = src_reg->u32_min_value;
11031 u32 umax_val = src_reg->u32_max_value;
11032
11033 if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
11034 signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
11035 dst_reg->s32_min_value = S32_MIN;
11036 dst_reg->s32_max_value = S32_MAX;
11037 } else {
11038 dst_reg->s32_min_value += smin_val;
11039 dst_reg->s32_max_value += smax_val;
11040 }
11041 if (dst_reg->u32_min_value + umin_val < umin_val ||
11042 dst_reg->u32_max_value + umax_val < umax_val) {
11043 dst_reg->u32_min_value = 0;
11044 dst_reg->u32_max_value = U32_MAX;
11045 } else {
11046 dst_reg->u32_min_value += umin_val;
11047 dst_reg->u32_max_value += umax_val;
11048 }
11049}
11050
11051static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
11052 struct bpf_reg_state *src_reg)
11053{
11054 s64 smin_val = src_reg->smin_value;
11055 s64 smax_val = src_reg->smax_value;
11056 u64 umin_val = src_reg->umin_value;
11057 u64 umax_val = src_reg->umax_value;
11058
11059 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
11060 signed_add_overflows(dst_reg->smax_value, smax_val)) {
11061 dst_reg->smin_value = S64_MIN;
11062 dst_reg->smax_value = S64_MAX;
11063 } else {
11064 dst_reg->smin_value += smin_val;
11065 dst_reg->smax_value += smax_val;
11066 }
11067 if (dst_reg->umin_value + umin_val < umin_val ||
11068 dst_reg->umax_value + umax_val < umax_val) {
11069 dst_reg->umin_value = 0;
11070 dst_reg->umax_value = U64_MAX;
11071 } else {
11072 dst_reg->umin_value += umin_val;
11073 dst_reg->umax_value += umax_val;
11074 }
11075}
11076
11077static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
11078 struct bpf_reg_state *src_reg)
11079{
11080 s32 smin_val = src_reg->s32_min_value;
11081 s32 smax_val = src_reg->s32_max_value;
11082 u32 umin_val = src_reg->u32_min_value;
11083 u32 umax_val = src_reg->u32_max_value;
11084
11085 if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
11086 signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
11087 /* Overflow possible, we know nothing */
11088 dst_reg->s32_min_value = S32_MIN;
11089 dst_reg->s32_max_value = S32_MAX;
11090 } else {
11091 dst_reg->s32_min_value -= smax_val;
11092 dst_reg->s32_max_value -= smin_val;
11093 }
11094 if (dst_reg->u32_min_value < umax_val) {
11095 /* Overflow possible, we know nothing */
11096 dst_reg->u32_min_value = 0;
11097 dst_reg->u32_max_value = U32_MAX;
11098 } else {
11099 /* Cannot overflow (as long as bounds are consistent) */
11100 dst_reg->u32_min_value -= umax_val;
11101 dst_reg->u32_max_value -= umin_val;
11102 }
11103}
11104
11105static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
11106 struct bpf_reg_state *src_reg)
11107{
11108 s64 smin_val = src_reg->smin_value;
11109 s64 smax_val = src_reg->smax_value;
11110 u64 umin_val = src_reg->umin_value;
11111 u64 umax_val = src_reg->umax_value;
11112
11113 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
11114 signed_sub_overflows(dst_reg->smax_value, smin_val)) {
11115 /* Overflow possible, we know nothing */
11116 dst_reg->smin_value = S64_MIN;
11117 dst_reg->smax_value = S64_MAX;
11118 } else {
11119 dst_reg->smin_value -= smax_val;
11120 dst_reg->smax_value -= smin_val;
11121 }
11122 if (dst_reg->umin_value < umax_val) {
11123 /* Overflow possible, we know nothing */
11124 dst_reg->umin_value = 0;
11125 dst_reg->umax_value = U64_MAX;
11126 } else {
11127 /* Cannot overflow (as long as bounds are consistent) */
11128 dst_reg->umin_value -= umax_val;
11129 dst_reg->umax_value -= umin_val;
11130 }
11131}
11132
11133static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
11134 struct bpf_reg_state *src_reg)
11135{
11136 s32 smin_val = src_reg->s32_min_value;
11137 u32 umin_val = src_reg->u32_min_value;
11138 u32 umax_val = src_reg->u32_max_value;
11139
11140 if (smin_val < 0 || dst_reg->s32_min_value < 0) {
11141 /* Ain't nobody got time to multiply that sign */
11142 __mark_reg32_unbounded(dst_reg);
11143 return;
11144 }
11145 /* Both values are positive, so we can work with unsigned and
11146 * copy the result to signed (unless it exceeds S32_MAX).
11147 */
11148 if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
11149 /* Potential overflow, we know nothing */
11150 __mark_reg32_unbounded(dst_reg);
11151 return;
11152 }
11153 dst_reg->u32_min_value *= umin_val;
11154 dst_reg->u32_max_value *= umax_val;
11155 if (dst_reg->u32_max_value > S32_MAX) {
11156 /* Overflow possible, we know nothing */
11157 dst_reg->s32_min_value = S32_MIN;
11158 dst_reg->s32_max_value = S32_MAX;
11159 } else {
11160 dst_reg->s32_min_value = dst_reg->u32_min_value;
11161 dst_reg->s32_max_value = dst_reg->u32_max_value;
11162 }
11163}
11164
11165static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
11166 struct bpf_reg_state *src_reg)
11167{
11168 s64 smin_val = src_reg->smin_value;
11169 u64 umin_val = src_reg->umin_value;
11170 u64 umax_val = src_reg->umax_value;
11171
11172 if (smin_val < 0 || dst_reg->smin_value < 0) {
11173 /* Ain't nobody got time to multiply that sign */
11174 __mark_reg64_unbounded(dst_reg);
11175 return;
11176 }
11177 /* Both values are positive, so we can work with unsigned and
11178 * copy the result to signed (unless it exceeds S64_MAX).
11179 */
11180 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
11181 /* Potential overflow, we know nothing */
11182 __mark_reg64_unbounded(dst_reg);
11183 return;
11184 }
11185 dst_reg->umin_value *= umin_val;
11186 dst_reg->umax_value *= umax_val;
11187 if (dst_reg->umax_value > S64_MAX) {
11188 /* Overflow possible, we know nothing */
11189 dst_reg->smin_value = S64_MIN;
11190 dst_reg->smax_value = S64_MAX;
11191 } else {
11192 dst_reg->smin_value = dst_reg->umin_value;
11193 dst_reg->smax_value = dst_reg->umax_value;
11194 }
11195}
11196
11197static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
11198 struct bpf_reg_state *src_reg)
11199{
11200 bool src_known = tnum_subreg_is_const(src_reg->var_off);
11201 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
11202 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
11203 s32 smin_val = src_reg->s32_min_value;
11204 u32 umax_val = src_reg->u32_max_value;
11205
11206 if (src_known && dst_known) {
11207 __mark_reg32_known(dst_reg, var32_off.value);
11208 return;
11209 }
11210
11211 /* We get our minimum from the var_off, since that's inherently
11212 * bitwise. Our maximum is the minimum of the operands' maxima.
11213 */
11214 dst_reg->u32_min_value = var32_off.value;
11215 dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
11216 if (dst_reg->s32_min_value < 0 || smin_val < 0) {
11217 /* Lose signed bounds when ANDing negative numbers,
11218 * ain't nobody got time for that.
11219 */
11220 dst_reg->s32_min_value = S32_MIN;
11221 dst_reg->s32_max_value = S32_MAX;
11222 } else {
11223 /* ANDing two positives gives a positive, so safe to
11224 * cast result into s64.
11225 */
11226 dst_reg->s32_min_value = dst_reg->u32_min_value;
11227 dst_reg->s32_max_value = dst_reg->u32_max_value;
11228 }
11229}
11230
11231static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
11232 struct bpf_reg_state *src_reg)
11233{
11234 bool src_known = tnum_is_const(src_reg->var_off);
11235 bool dst_known = tnum_is_const(dst_reg->var_off);
11236 s64 smin_val = src_reg->smin_value;
11237 u64 umax_val = src_reg->umax_value;
11238
11239 if (src_known && dst_known) {
11240 __mark_reg_known(dst_reg, dst_reg->var_off.value);
11241 return;
11242 }
11243
11244 /* We get our minimum from the var_off, since that's inherently
11245 * bitwise. Our maximum is the minimum of the operands' maxima.
11246 */
11247 dst_reg->umin_value = dst_reg->var_off.value;
11248 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
11249 if (dst_reg->smin_value < 0 || smin_val < 0) {
11250 /* Lose signed bounds when ANDing negative numbers,
11251 * ain't nobody got time for that.
11252 */
11253 dst_reg->smin_value = S64_MIN;
11254 dst_reg->smax_value = S64_MAX;
11255 } else {
11256 /* ANDing two positives gives a positive, so safe to
11257 * cast result into s64.
11258 */
11259 dst_reg->smin_value = dst_reg->umin_value;
11260 dst_reg->smax_value = dst_reg->umax_value;
11261 }
11262 /* We may learn something more from the var_off */
11263 __update_reg_bounds(dst_reg);
11264}
11265
11266static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
11267 struct bpf_reg_state *src_reg)
11268{
11269 bool src_known = tnum_subreg_is_const(src_reg->var_off);
11270 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
11271 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
11272 s32 smin_val = src_reg->s32_min_value;
11273 u32 umin_val = src_reg->u32_min_value;
11274
11275 if (src_known && dst_known) {
11276 __mark_reg32_known(dst_reg, var32_off.value);
11277 return;
11278 }
11279
11280 /* We get our maximum from the var_off, and our minimum is the
11281 * maximum of the operands' minima
11282 */
11283 dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
11284 dst_reg->u32_max_value = var32_off.value | var32_off.mask;
11285 if (dst_reg->s32_min_value < 0 || smin_val < 0) {
11286 /* Lose signed bounds when ORing negative numbers,
11287 * ain't nobody got time for that.
11288 */
11289 dst_reg->s32_min_value = S32_MIN;
11290 dst_reg->s32_max_value = S32_MAX;
11291 } else {
11292 /* ORing two positives gives a positive, so safe to
11293 * cast result into s64.
11294 */
11295 dst_reg->s32_min_value = dst_reg->u32_min_value;
11296 dst_reg->s32_max_value = dst_reg->u32_max_value;
11297 }
11298}
11299
11300static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
11301 struct bpf_reg_state *src_reg)
11302{
11303 bool src_known = tnum_is_const(src_reg->var_off);
11304 bool dst_known = tnum_is_const(dst_reg->var_off);
11305 s64 smin_val = src_reg->smin_value;
11306 u64 umin_val = src_reg->umin_value;
11307
11308 if (src_known && dst_known) {
11309 __mark_reg_known(dst_reg, dst_reg->var_off.value);
11310 return;
11311 }
11312
11313 /* We get our maximum from the var_off, and our minimum is the
11314 * maximum of the operands' minima
11315 */
11316 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
11317 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
11318 if (dst_reg->smin_value < 0 || smin_val < 0) {
11319 /* Lose signed bounds when ORing negative numbers,
11320 * ain't nobody got time for that.
11321 */
11322 dst_reg->smin_value = S64_MIN;
11323 dst_reg->smax_value = S64_MAX;
11324 } else {
11325 /* ORing two positives gives a positive, so safe to
11326 * cast result into s64.
11327 */
11328 dst_reg->smin_value = dst_reg->umin_value;
11329 dst_reg->smax_value = dst_reg->umax_value;
11330 }
11331 /* We may learn something more from the var_off */
11332 __update_reg_bounds(dst_reg);
11333}
11334
11335static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
11336 struct bpf_reg_state *src_reg)
11337{
11338 bool src_known = tnum_subreg_is_const(src_reg->var_off);
11339 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
11340 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
11341 s32 smin_val = src_reg->s32_min_value;
11342
11343 if (src_known && dst_known) {
11344 __mark_reg32_known(dst_reg, var32_off.value);
11345 return;
11346 }
11347
11348 /* We get both minimum and maximum from the var32_off. */
11349 dst_reg->u32_min_value = var32_off.value;
11350 dst_reg->u32_max_value = var32_off.value | var32_off.mask;
11351
11352 if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
11353 /* XORing two positive sign numbers gives a positive,
11354 * so safe to cast u32 result into s32.
11355 */
11356 dst_reg->s32_min_value = dst_reg->u32_min_value;
11357 dst_reg->s32_max_value = dst_reg->u32_max_value;
11358 } else {
11359 dst_reg->s32_min_value = S32_MIN;
11360 dst_reg->s32_max_value = S32_MAX;
11361 }
11362}
11363
11364static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
11365 struct bpf_reg_state *src_reg)
11366{
11367 bool src_known = tnum_is_const(src_reg->var_off);
11368 bool dst_known = tnum_is_const(dst_reg->var_off);
11369 s64 smin_val = src_reg->smin_value;
11370
11371 if (src_known && dst_known) {
11372 /* dst_reg->var_off.value has been updated earlier */
11373 __mark_reg_known(dst_reg, dst_reg->var_off.value);
11374 return;
11375 }
11376
11377 /* We get both minimum and maximum from the var_off. */
11378 dst_reg->umin_value = dst_reg->var_off.value;
11379 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
11380
11381 if (dst_reg->smin_value >= 0 && smin_val >= 0) {
11382 /* XORing two positive sign numbers gives a positive,
11383 * so safe to cast u64 result into s64.
11384 */
11385 dst_reg->smin_value = dst_reg->umin_value;
11386 dst_reg->smax_value = dst_reg->umax_value;
11387 } else {
11388 dst_reg->smin_value = S64_MIN;
11389 dst_reg->smax_value = S64_MAX;
11390 }
11391
11392 __update_reg_bounds(dst_reg);
11393}
11394
11395static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
11396 u64 umin_val, u64 umax_val)
11397{
11398 /* We lose all sign bit information (except what we can pick
11399 * up from var_off)
11400 */
11401 dst_reg->s32_min_value = S32_MIN;
11402 dst_reg->s32_max_value = S32_MAX;
11403 /* If we might shift our top bit out, then we know nothing */
11404 if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
11405 dst_reg->u32_min_value = 0;
11406 dst_reg->u32_max_value = U32_MAX;
11407 } else {
11408 dst_reg->u32_min_value <<= umin_val;
11409 dst_reg->u32_max_value <<= umax_val;
11410 }
11411}
11412
11413static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
11414 struct bpf_reg_state *src_reg)
11415{
11416 u32 umax_val = src_reg->u32_max_value;
11417 u32 umin_val = src_reg->u32_min_value;
11418 /* u32 alu operation will zext upper bits */
11419 struct tnum subreg = tnum_subreg(dst_reg->var_off);
11420
11421 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
11422 dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
11423 /* Not required but being careful mark reg64 bounds as unknown so
11424 * that we are forced to pick them up from tnum and zext later and
11425 * if some path skips this step we are still safe.
11426 */
11427 __mark_reg64_unbounded(dst_reg);
11428 __update_reg32_bounds(dst_reg);
11429}
11430
11431static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
11432 u64 umin_val, u64 umax_val)
11433{
11434 /* Special case <<32 because it is a common compiler pattern to sign
11435 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
11436 * positive we know this shift will also be positive so we can track
11437 * bounds correctly. Otherwise we lose all sign bit information except
11438 * what we can pick up from var_off. Perhaps we can generalize this
11439 * later to shifts of any length.
11440 */
11441 if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
11442 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
11443 else
11444 dst_reg->smax_value = S64_MAX;
11445
11446 if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
11447 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
11448 else
11449 dst_reg->smin_value = S64_MIN;
11450
11451 /* If we might shift our top bit out, then we know nothing */
11452 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
11453 dst_reg->umin_value = 0;
11454 dst_reg->umax_value = U64_MAX;
11455 } else {
11456 dst_reg->umin_value <<= umin_val;
11457 dst_reg->umax_value <<= umax_val;
11458 }
11459}
11460
11461static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
11462 struct bpf_reg_state *src_reg)
11463{
11464 u64 umax_val = src_reg->umax_value;
11465 u64 umin_val = src_reg->umin_value;
11466
11467 /* scalar64 calc uses 32bit unshifted bounds so must be called first */
11468 __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
11469 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
11470
11471 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
11472 /* We may learn something more from the var_off */
11473 __update_reg_bounds(dst_reg);
11474}
11475
11476static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
11477 struct bpf_reg_state *src_reg)
11478{
11479 struct tnum subreg = tnum_subreg(dst_reg->var_off);
11480 u32 umax_val = src_reg->u32_max_value;
11481 u32 umin_val = src_reg->u32_min_value;
11482
11483 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
11484 * be negative, then either:
11485 * 1) src_reg might be zero, so the sign bit of the result is
11486 * unknown, so we lose our signed bounds
11487 * 2) it's known negative, thus the unsigned bounds capture the
11488 * signed bounds
11489 * 3) the signed bounds cross zero, so they tell us nothing
11490 * about the result
11491 * If the value in dst_reg is known nonnegative, then again the
11492 * unsigned bounds capture the signed bounds.
11493 * Thus, in all cases it suffices to blow away our signed bounds
11494 * and rely on inferring new ones from the unsigned bounds and
11495 * var_off of the result.
11496 */
11497 dst_reg->s32_min_value = S32_MIN;
11498 dst_reg->s32_max_value = S32_MAX;
11499
11500 dst_reg->var_off = tnum_rshift(subreg, umin_val);
11501 dst_reg->u32_min_value >>= umax_val;
11502 dst_reg->u32_max_value >>= umin_val;
11503
11504 __mark_reg64_unbounded(dst_reg);
11505 __update_reg32_bounds(dst_reg);
11506}
11507
11508static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
11509 struct bpf_reg_state *src_reg)
11510{
11511 u64 umax_val = src_reg->umax_value;
11512 u64 umin_val = src_reg->umin_value;
11513
11514 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
11515 * be negative, then either:
11516 * 1) src_reg might be zero, so the sign bit of the result is
11517 * unknown, so we lose our signed bounds
11518 * 2) it's known negative, thus the unsigned bounds capture the
11519 * signed bounds
11520 * 3) the signed bounds cross zero, so they tell us nothing
11521 * about the result
11522 * If the value in dst_reg is known nonnegative, then again the
11523 * unsigned bounds capture the signed bounds.
11524 * Thus, in all cases it suffices to blow away our signed bounds
11525 * and rely on inferring new ones from the unsigned bounds and
11526 * var_off of the result.
11527 */
11528 dst_reg->smin_value = S64_MIN;
11529 dst_reg->smax_value = S64_MAX;
11530 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
11531 dst_reg->umin_value >>= umax_val;
11532 dst_reg->umax_value >>= umin_val;
11533
11534 /* Its not easy to operate on alu32 bounds here because it depends
11535 * on bits being shifted in. Take easy way out and mark unbounded
11536 * so we can recalculate later from tnum.
11537 */
11538 __mark_reg32_unbounded(dst_reg);
11539 __update_reg_bounds(dst_reg);
11540}
11541
11542static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
11543 struct bpf_reg_state *src_reg)
11544{
11545 u64 umin_val = src_reg->u32_min_value;
11546
11547 /* Upon reaching here, src_known is true and
11548 * umax_val is equal to umin_val.
11549 */
11550 dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
11551 dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
11552
11553 dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
11554
11555 /* blow away the dst_reg umin_value/umax_value and rely on
11556 * dst_reg var_off to refine the result.
11557 */
11558 dst_reg->u32_min_value = 0;
11559 dst_reg->u32_max_value = U32_MAX;
11560
11561 __mark_reg64_unbounded(dst_reg);
11562 __update_reg32_bounds(dst_reg);
11563}
11564
11565static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
11566 struct bpf_reg_state *src_reg)
11567{
11568 u64 umin_val = src_reg->umin_value;
11569
11570 /* Upon reaching here, src_known is true and umax_val is equal
11571 * to umin_val.
11572 */
11573 dst_reg->smin_value >>= umin_val;
11574 dst_reg->smax_value >>= umin_val;
11575
11576 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
11577
11578 /* blow away the dst_reg umin_value/umax_value and rely on
11579 * dst_reg var_off to refine the result.
11580 */
11581 dst_reg->umin_value = 0;
11582 dst_reg->umax_value = U64_MAX;
11583
11584 /* Its not easy to operate on alu32 bounds here because it depends
11585 * on bits being shifted in from upper 32-bits. Take easy way out
11586 * and mark unbounded so we can recalculate later from tnum.
11587 */
11588 __mark_reg32_unbounded(dst_reg);
11589 __update_reg_bounds(dst_reg);
11590}
11591
11592/* WARNING: This function does calculations on 64-bit values, but the actual
11593 * execution may occur on 32-bit values. Therefore, things like bitshifts
11594 * need extra checks in the 32-bit case.
11595 */
11596static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
11597 struct bpf_insn *insn,
11598 struct bpf_reg_state *dst_reg,
11599 struct bpf_reg_state src_reg)
11600{
11601 struct bpf_reg_state *regs = cur_regs(env);
11602 u8 opcode = BPF_OP(insn->code);
11603 bool src_known;
11604 s64 smin_val, smax_val;
11605 u64 umin_val, umax_val;
11606 s32 s32_min_val, s32_max_val;
11607 u32 u32_min_val, u32_max_val;
11608 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
11609 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
11610 int ret;
11611
11612 smin_val = src_reg.smin_value;
11613 smax_val = src_reg.smax_value;
11614 umin_val = src_reg.umin_value;
11615 umax_val = src_reg.umax_value;
11616
11617 s32_min_val = src_reg.s32_min_value;
11618 s32_max_val = src_reg.s32_max_value;
11619 u32_min_val = src_reg.u32_min_value;
11620 u32_max_val = src_reg.u32_max_value;
11621
11622 if (alu32) {
11623 src_known = tnum_subreg_is_const(src_reg.var_off);
11624 if ((src_known &&
11625 (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
11626 s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
11627 /* Taint dst register if offset had invalid bounds
11628 * derived from e.g. dead branches.
11629 */
11630 __mark_reg_unknown(env, dst_reg);
11631 return 0;
11632 }
11633 } else {
11634 src_known = tnum_is_const(src_reg.var_off);
11635 if ((src_known &&
11636 (smin_val != smax_val || umin_val != umax_val)) ||
11637 smin_val > smax_val || umin_val > umax_val) {
11638 /* Taint dst register if offset had invalid bounds
11639 * derived from e.g. dead branches.
11640 */
11641 __mark_reg_unknown(env, dst_reg);
11642 return 0;
11643 }
11644 }
11645
11646 if (!src_known &&
11647 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
11648 __mark_reg_unknown(env, dst_reg);
11649 return 0;
11650 }
11651
11652 if (sanitize_needed(opcode)) {
11653 ret = sanitize_val_alu(env, insn);
11654 if (ret < 0)
11655 return sanitize_err(env, insn, ret, NULL, NULL);
11656 }
11657
11658 /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
11659 * There are two classes of instructions: The first class we track both
11660 * alu32 and alu64 sign/unsigned bounds independently this provides the
11661 * greatest amount of precision when alu operations are mixed with jmp32
11662 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
11663 * and BPF_OR. This is possible because these ops have fairly easy to
11664 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
11665 * See alu32 verifier tests for examples. The second class of
11666 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
11667 * with regards to tracking sign/unsigned bounds because the bits may
11668 * cross subreg boundaries in the alu64 case. When this happens we mark
11669 * the reg unbounded in the subreg bound space and use the resulting
11670 * tnum to calculate an approximation of the sign/unsigned bounds.
11671 */
11672 switch (opcode) {
11673 case BPF_ADD:
11674 scalar32_min_max_add(dst_reg, &src_reg);
11675 scalar_min_max_add(dst_reg, &src_reg);
11676 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
11677 break;
11678 case BPF_SUB:
11679 scalar32_min_max_sub(dst_reg, &src_reg);
11680 scalar_min_max_sub(dst_reg, &src_reg);
11681 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
11682 break;
11683 case BPF_MUL:
11684 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
11685 scalar32_min_max_mul(dst_reg, &src_reg);
11686 scalar_min_max_mul(dst_reg, &src_reg);
11687 break;
11688 case BPF_AND:
11689 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
11690 scalar32_min_max_and(dst_reg, &src_reg);
11691 scalar_min_max_and(dst_reg, &src_reg);
11692 break;
11693 case BPF_OR:
11694 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
11695 scalar32_min_max_or(dst_reg, &src_reg);
11696 scalar_min_max_or(dst_reg, &src_reg);
11697 break;
11698 case BPF_XOR:
11699 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
11700 scalar32_min_max_xor(dst_reg, &src_reg);
11701 scalar_min_max_xor(dst_reg, &src_reg);
11702 break;
11703 case BPF_LSH:
11704 if (umax_val >= insn_bitness) {
11705 /* Shifts greater than 31 or 63 are undefined.
11706 * This includes shifts by a negative number.
11707 */
11708 mark_reg_unknown(env, regs, insn->dst_reg);
11709 break;
11710 }
11711 if (alu32)
11712 scalar32_min_max_lsh(dst_reg, &src_reg);
11713 else
11714 scalar_min_max_lsh(dst_reg, &src_reg);
11715 break;
11716 case BPF_RSH:
11717 if (umax_val >= insn_bitness) {
11718 /* Shifts greater than 31 or 63 are undefined.
11719 * This includes shifts by a negative number.
11720 */
11721 mark_reg_unknown(env, regs, insn->dst_reg);
11722 break;
11723 }
11724 if (alu32)
11725 scalar32_min_max_rsh(dst_reg, &src_reg);
11726 else
11727 scalar_min_max_rsh(dst_reg, &src_reg);
11728 break;
11729 case BPF_ARSH:
11730 if (umax_val >= insn_bitness) {
11731 /* Shifts greater than 31 or 63 are undefined.
11732 * This includes shifts by a negative number.
11733 */
11734 mark_reg_unknown(env, regs, insn->dst_reg);
11735 break;
11736 }
11737 if (alu32)
11738 scalar32_min_max_arsh(dst_reg, &src_reg);
11739 else
11740 scalar_min_max_arsh(dst_reg, &src_reg);
11741 break;
11742 default:
11743 mark_reg_unknown(env, regs, insn->dst_reg);
11744 break;
11745 }
11746
11747 /* ALU32 ops are zero extended into 64bit register */
11748 if (alu32)
11749 zext_32_to_64(dst_reg);
11750 reg_bounds_sync(dst_reg);
11751 return 0;
11752}
11753
11754/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
11755 * and var_off.
11756 */
11757static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
11758 struct bpf_insn *insn)
11759{
11760 struct bpf_verifier_state *vstate = env->cur_state;
11761 struct bpf_func_state *state = vstate->frame[vstate->curframe];
11762 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
11763 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
11764 u8 opcode = BPF_OP(insn->code);
11765 int err;
11766
11767 dst_reg = &regs[insn->dst_reg];
11768 src_reg = NULL;
11769 if (dst_reg->type != SCALAR_VALUE)
11770 ptr_reg = dst_reg;
11771 else
11772 /* Make sure ID is cleared otherwise dst_reg min/max could be
11773 * incorrectly propagated into other registers by find_equal_scalars()
11774 */
11775 dst_reg->id = 0;
11776 if (BPF_SRC(insn->code) == BPF_X) {
11777 src_reg = &regs[insn->src_reg];
11778 if (src_reg->type != SCALAR_VALUE) {
11779 if (dst_reg->type != SCALAR_VALUE) {
11780 /* Combining two pointers by any ALU op yields
11781 * an arbitrary scalar. Disallow all math except
11782 * pointer subtraction
11783 */
11784 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
11785 mark_reg_unknown(env, regs, insn->dst_reg);
11786 return 0;
11787 }
11788 verbose(env, "R%d pointer %s pointer prohibited\n",
11789 insn->dst_reg,
11790 bpf_alu_string[opcode >> 4]);
11791 return -EACCES;
11792 } else {
11793 /* scalar += pointer
11794 * This is legal, but we have to reverse our
11795 * src/dest handling in computing the range
11796 */
11797 err = mark_chain_precision(env, insn->dst_reg);
11798 if (err)
11799 return err;
11800 return adjust_ptr_min_max_vals(env, insn,
11801 src_reg, dst_reg);
11802 }
11803 } else if (ptr_reg) {
11804 /* pointer += scalar */
11805 err = mark_chain_precision(env, insn->src_reg);
11806 if (err)
11807 return err;
11808 return adjust_ptr_min_max_vals(env, insn,
11809 dst_reg, src_reg);
11810 } else if (dst_reg->precise) {
11811 /* if dst_reg is precise, src_reg should be precise as well */
11812 err = mark_chain_precision(env, insn->src_reg);
11813 if (err)
11814 return err;
11815 }
11816 } else {
11817 /* Pretend the src is a reg with a known value, since we only
11818 * need to be able to read from this state.
11819 */
11820 off_reg.type = SCALAR_VALUE;
11821 __mark_reg_known(&off_reg, insn->imm);
11822 src_reg = &off_reg;
11823 if (ptr_reg) /* pointer += K */
11824 return adjust_ptr_min_max_vals(env, insn,
11825 ptr_reg, src_reg);
11826 }
11827
11828 /* Got here implies adding two SCALAR_VALUEs */
11829 if (WARN_ON_ONCE(ptr_reg)) {
11830 print_verifier_state(env, state, true);
11831 verbose(env, "verifier internal error: unexpected ptr_reg\n");
11832 return -EINVAL;
11833 }
11834 if (WARN_ON(!src_reg)) {
11835 print_verifier_state(env, state, true);
11836 verbose(env, "verifier internal error: no src_reg\n");
11837 return -EINVAL;
11838 }
11839 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
11840}
11841
11842/* check validity of 32-bit and 64-bit arithmetic operations */
11843static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
11844{
11845 struct bpf_reg_state *regs = cur_regs(env);
11846 u8 opcode = BPF_OP(insn->code);
11847 int err;
11848
11849 if (opcode == BPF_END || opcode == BPF_NEG) {
11850 if (opcode == BPF_NEG) {
11851 if (BPF_SRC(insn->code) != BPF_K ||
11852 insn->src_reg != BPF_REG_0 ||
11853 insn->off != 0 || insn->imm != 0) {
11854 verbose(env, "BPF_NEG uses reserved fields\n");
11855 return -EINVAL;
11856 }
11857 } else {
11858 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
11859 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
11860 BPF_CLASS(insn->code) == BPF_ALU64) {
11861 verbose(env, "BPF_END uses reserved fields\n");
11862 return -EINVAL;
11863 }
11864 }
11865
11866 /* check src operand */
11867 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11868 if (err)
11869 return err;
11870
11871 if (is_pointer_value(env, insn->dst_reg)) {
11872 verbose(env, "R%d pointer arithmetic prohibited\n",
11873 insn->dst_reg);
11874 return -EACCES;
11875 }
11876
11877 /* check dest operand */
11878 err = check_reg_arg(env, insn->dst_reg, DST_OP);
11879 if (err)
11880 return err;
11881
11882 } else if (opcode == BPF_MOV) {
11883
11884 if (BPF_SRC(insn->code) == BPF_X) {
11885 if (insn->imm != 0 || insn->off != 0) {
11886 verbose(env, "BPF_MOV uses reserved fields\n");
11887 return -EINVAL;
11888 }
11889
11890 /* check src operand */
11891 err = check_reg_arg(env, insn->src_reg, SRC_OP);
11892 if (err)
11893 return err;
11894 } else {
11895 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
11896 verbose(env, "BPF_MOV uses reserved fields\n");
11897 return -EINVAL;
11898 }
11899 }
11900
11901 /* check dest operand, mark as required later */
11902 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
11903 if (err)
11904 return err;
11905
11906 if (BPF_SRC(insn->code) == BPF_X) {
11907 struct bpf_reg_state *src_reg = regs + insn->src_reg;
11908 struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
11909
11910 if (BPF_CLASS(insn->code) == BPF_ALU64) {
11911 /* case: R1 = R2
11912 * copy register state to dest reg
11913 */
11914 if (src_reg->type == SCALAR_VALUE && !src_reg->id)
11915 /* Assign src and dst registers the same ID
11916 * that will be used by find_equal_scalars()
11917 * to propagate min/max range.
11918 */
11919 src_reg->id = ++env->id_gen;
11920 copy_register_state(dst_reg, src_reg);
11921 dst_reg->live |= REG_LIVE_WRITTEN;
11922 dst_reg->subreg_def = DEF_NOT_SUBREG;
11923 } else {
11924 /* R1 = (u32) R2 */
11925 if (is_pointer_value(env, insn->src_reg)) {
11926 verbose(env,
11927 "R%d partial copy of pointer\n",
11928 insn->src_reg);
11929 return -EACCES;
11930 } else if (src_reg->type == SCALAR_VALUE) {
11931 copy_register_state(dst_reg, src_reg);
11932 /* Make sure ID is cleared otherwise
11933 * dst_reg min/max could be incorrectly
11934 * propagated into src_reg by find_equal_scalars()
11935 */
11936 dst_reg->id = 0;
11937 dst_reg->live |= REG_LIVE_WRITTEN;
11938 dst_reg->subreg_def = env->insn_idx + 1;
11939 } else {
11940 mark_reg_unknown(env, regs,
11941 insn->dst_reg);
11942 }
11943 zext_32_to_64(dst_reg);
11944 reg_bounds_sync(dst_reg);
11945 }
11946 } else {
11947 /* case: R = imm
11948 * remember the value we stored into this reg
11949 */
11950 /* clear any state __mark_reg_known doesn't set */
11951 mark_reg_unknown(env, regs, insn->dst_reg);
11952 regs[insn->dst_reg].type = SCALAR_VALUE;
11953 if (BPF_CLASS(insn->code) == BPF_ALU64) {
11954 __mark_reg_known(regs + insn->dst_reg,
11955 insn->imm);
11956 } else {
11957 __mark_reg_known(regs + insn->dst_reg,
11958 (u32)insn->imm);
11959 }
11960 }
11961
11962 } else if (opcode > BPF_END) {
11963 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
11964 return -EINVAL;
11965
11966 } else { /* all other ALU ops: and, sub, xor, add, ... */
11967
11968 if (BPF_SRC(insn->code) == BPF_X) {
11969 if (insn->imm != 0 || insn->off != 0) {
11970 verbose(env, "BPF_ALU uses reserved fields\n");
11971 return -EINVAL;
11972 }
11973 /* check src1 operand */
11974 err = check_reg_arg(env, insn->src_reg, SRC_OP);
11975 if (err)
11976 return err;
11977 } else {
11978 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
11979 verbose(env, "BPF_ALU uses reserved fields\n");
11980 return -EINVAL;
11981 }
11982 }
11983
11984 /* check src2 operand */
11985 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11986 if (err)
11987 return err;
11988
11989 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
11990 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
11991 verbose(env, "div by zero\n");
11992 return -EINVAL;
11993 }
11994
11995 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
11996 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
11997 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
11998
11999 if (insn->imm < 0 || insn->imm >= size) {
12000 verbose(env, "invalid shift %d\n", insn->imm);
12001 return -EINVAL;
12002 }
12003 }
12004
12005 /* check dest operand */
12006 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
12007 if (err)
12008 return err;
12009
12010 return adjust_reg_min_max_vals(env, insn);
12011 }
12012
12013 return 0;
12014}
12015
12016static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
12017 struct bpf_reg_state *dst_reg,
12018 enum bpf_reg_type type,
12019 bool range_right_open)
12020{
12021 struct bpf_func_state *state;
12022 struct bpf_reg_state *reg;
12023 int new_range;
12024
12025 if (dst_reg->off < 0 ||
12026 (dst_reg->off == 0 && range_right_open))
12027 /* This doesn't give us any range */
12028 return;
12029
12030 if (dst_reg->umax_value > MAX_PACKET_OFF ||
12031 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
12032 /* Risk of overflow. For instance, ptr + (1<<63) may be less
12033 * than pkt_end, but that's because it's also less than pkt.
12034 */
12035 return;
12036
12037 new_range = dst_reg->off;
12038 if (range_right_open)
12039 new_range++;
12040
12041 /* Examples for register markings:
12042 *
12043 * pkt_data in dst register:
12044 *
12045 * r2 = r3;
12046 * r2 += 8;
12047 * if (r2 > pkt_end) goto <handle exception>
12048 * <access okay>
12049 *
12050 * r2 = r3;
12051 * r2 += 8;
12052 * if (r2 < pkt_end) goto <access okay>
12053 * <handle exception>
12054 *
12055 * Where:
12056 * r2 == dst_reg, pkt_end == src_reg
12057 * r2=pkt(id=n,off=8,r=0)
12058 * r3=pkt(id=n,off=0,r=0)
12059 *
12060 * pkt_data in src register:
12061 *
12062 * r2 = r3;
12063 * r2 += 8;
12064 * if (pkt_end >= r2) goto <access okay>
12065 * <handle exception>
12066 *
12067 * r2 = r3;
12068 * r2 += 8;
12069 * if (pkt_end <= r2) goto <handle exception>
12070 * <access okay>
12071 *
12072 * Where:
12073 * pkt_end == dst_reg, r2 == src_reg
12074 * r2=pkt(id=n,off=8,r=0)
12075 * r3=pkt(id=n,off=0,r=0)
12076 *
12077 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
12078 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
12079 * and [r3, r3 + 8-1) respectively is safe to access depending on
12080 * the check.
12081 */
12082
12083 /* If our ids match, then we must have the same max_value. And we
12084 * don't care about the other reg's fixed offset, since if it's too big
12085 * the range won't allow anything.
12086 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
12087 */
12088 bpf_for_each_reg_in_vstate(vstate, state, reg, ({
12089 if (reg->type == type && reg->id == dst_reg->id)
12090 /* keep the maximum range already checked */
12091 reg->range = max(reg->range, new_range);
12092 }));
12093}
12094
12095static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
12096{
12097 struct tnum subreg = tnum_subreg(reg->var_off);
12098 s32 sval = (s32)val;
12099
12100 switch (opcode) {
12101 case BPF_JEQ:
12102 if (tnum_is_const(subreg))
12103 return !!tnum_equals_const(subreg, val);
12104 break;
12105 case BPF_JNE:
12106 if (tnum_is_const(subreg))
12107 return !tnum_equals_const(subreg, val);
12108 break;
12109 case BPF_JSET:
12110 if ((~subreg.mask & subreg.value) & val)
12111 return 1;
12112 if (!((subreg.mask | subreg.value) & val))
12113 return 0;
12114 break;
12115 case BPF_JGT:
12116 if (reg->u32_min_value > val)
12117 return 1;
12118 else if (reg->u32_max_value <= val)
12119 return 0;
12120 break;
12121 case BPF_JSGT:
12122 if (reg->s32_min_value > sval)
12123 return 1;
12124 else if (reg->s32_max_value <= sval)
12125 return 0;
12126 break;
12127 case BPF_JLT:
12128 if (reg->u32_max_value < val)
12129 return 1;
12130 else if (reg->u32_min_value >= val)
12131 return 0;
12132 break;
12133 case BPF_JSLT:
12134 if (reg->s32_max_value < sval)
12135 return 1;
12136 else if (reg->s32_min_value >= sval)
12137 return 0;
12138 break;
12139 case BPF_JGE:
12140 if (reg->u32_min_value >= val)
12141 return 1;
12142 else if (reg->u32_max_value < val)
12143 return 0;
12144 break;
12145 case BPF_JSGE:
12146 if (reg->s32_min_value >= sval)
12147 return 1;
12148 else if (reg->s32_max_value < sval)
12149 return 0;
12150 break;
12151 case BPF_JLE:
12152 if (reg->u32_max_value <= val)
12153 return 1;
12154 else if (reg->u32_min_value > val)
12155 return 0;
12156 break;
12157 case BPF_JSLE:
12158 if (reg->s32_max_value <= sval)
12159 return 1;
12160 else if (reg->s32_min_value > sval)
12161 return 0;
12162 break;
12163 }
12164
12165 return -1;
12166}
12167
12168
12169static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
12170{
12171 s64 sval = (s64)val;
12172
12173 switch (opcode) {
12174 case BPF_JEQ:
12175 if (tnum_is_const(reg->var_off))
12176 return !!tnum_equals_const(reg->var_off, val);
12177 break;
12178 case BPF_JNE:
12179 if (tnum_is_const(reg->var_off))
12180 return !tnum_equals_const(reg->var_off, val);
12181 break;
12182 case BPF_JSET:
12183 if ((~reg->var_off.mask & reg->var_off.value) & val)
12184 return 1;
12185 if (!((reg->var_off.mask | reg->var_off.value) & val))
12186 return 0;
12187 break;
12188 case BPF_JGT:
12189 if (reg->umin_value > val)
12190 return 1;
12191 else if (reg->umax_value <= val)
12192 return 0;
12193 break;
12194 case BPF_JSGT:
12195 if (reg->smin_value > sval)
12196 return 1;
12197 else if (reg->smax_value <= sval)
12198 return 0;
12199 break;
12200 case BPF_JLT:
12201 if (reg->umax_value < val)
12202 return 1;
12203 else if (reg->umin_value >= val)
12204 return 0;
12205 break;
12206 case BPF_JSLT:
12207 if (reg->smax_value < sval)
12208 return 1;
12209 else if (reg->smin_value >= sval)
12210 return 0;
12211 break;
12212 case BPF_JGE:
12213 if (reg->umin_value >= val)
12214 return 1;
12215 else if (reg->umax_value < val)
12216 return 0;
12217 break;
12218 case BPF_JSGE:
12219 if (reg->smin_value >= sval)
12220 return 1;
12221 else if (reg->smax_value < sval)
12222 return 0;
12223 break;
12224 case BPF_JLE:
12225 if (reg->umax_value <= val)
12226 return 1;
12227 else if (reg->umin_value > val)
12228 return 0;
12229 break;
12230 case BPF_JSLE:
12231 if (reg->smax_value <= sval)
12232 return 1;
12233 else if (reg->smin_value > sval)
12234 return 0;
12235 break;
12236 }
12237
12238 return -1;
12239}
12240
12241/* compute branch direction of the expression "if (reg opcode val) goto target;"
12242 * and return:
12243 * 1 - branch will be taken and "goto target" will be executed
12244 * 0 - branch will not be taken and fall-through to next insn
12245 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
12246 * range [0,10]
12247 */
12248static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
12249 bool is_jmp32)
12250{
12251 if (__is_pointer_value(false, reg)) {
12252 if (!reg_type_not_null(reg->type))
12253 return -1;
12254
12255 /* If pointer is valid tests against zero will fail so we can
12256 * use this to direct branch taken.
12257 */
12258 if (val != 0)
12259 return -1;
12260
12261 switch (opcode) {
12262 case BPF_JEQ:
12263 return 0;
12264 case BPF_JNE:
12265 return 1;
12266 default:
12267 return -1;
12268 }
12269 }
12270
12271 if (is_jmp32)
12272 return is_branch32_taken(reg, val, opcode);
12273 return is_branch64_taken(reg, val, opcode);
12274}
12275
12276static int flip_opcode(u32 opcode)
12277{
12278 /* How can we transform "a <op> b" into "b <op> a"? */
12279 static const u8 opcode_flip[16] = {
12280 /* these stay the same */
12281 [BPF_JEQ >> 4] = BPF_JEQ,
12282 [BPF_JNE >> 4] = BPF_JNE,
12283 [BPF_JSET >> 4] = BPF_JSET,
12284 /* these swap "lesser" and "greater" (L and G in the opcodes) */
12285 [BPF_JGE >> 4] = BPF_JLE,
12286 [BPF_JGT >> 4] = BPF_JLT,
12287 [BPF_JLE >> 4] = BPF_JGE,
12288 [BPF_JLT >> 4] = BPF_JGT,
12289 [BPF_JSGE >> 4] = BPF_JSLE,
12290 [BPF_JSGT >> 4] = BPF_JSLT,
12291 [BPF_JSLE >> 4] = BPF_JSGE,
12292 [BPF_JSLT >> 4] = BPF_JSGT
12293 };
12294 return opcode_flip[opcode >> 4];
12295}
12296
12297static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
12298 struct bpf_reg_state *src_reg,
12299 u8 opcode)
12300{
12301 struct bpf_reg_state *pkt;
12302
12303 if (src_reg->type == PTR_TO_PACKET_END) {
12304 pkt = dst_reg;
12305 } else if (dst_reg->type == PTR_TO_PACKET_END) {
12306 pkt = src_reg;
12307 opcode = flip_opcode(opcode);
12308 } else {
12309 return -1;
12310 }
12311
12312 if (pkt->range >= 0)
12313 return -1;
12314
12315 switch (opcode) {
12316 case BPF_JLE:
12317 /* pkt <= pkt_end */
12318 fallthrough;
12319 case BPF_JGT:
12320 /* pkt > pkt_end */
12321 if (pkt->range == BEYOND_PKT_END)
12322 /* pkt has at last one extra byte beyond pkt_end */
12323 return opcode == BPF_JGT;
12324 break;
12325 case BPF_JLT:
12326 /* pkt < pkt_end */
12327 fallthrough;
12328 case BPF_JGE:
12329 /* pkt >= pkt_end */
12330 if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
12331 return opcode == BPF_JGE;
12332 break;
12333 }
12334 return -1;
12335}
12336
12337/* Adjusts the register min/max values in the case that the dst_reg is the
12338 * variable register that we are working on, and src_reg is a constant or we're
12339 * simply doing a BPF_K check.
12340 * In JEQ/JNE cases we also adjust the var_off values.
12341 */
12342static void reg_set_min_max(struct bpf_reg_state *true_reg,
12343 struct bpf_reg_state *false_reg,
12344 u64 val, u32 val32,
12345 u8 opcode, bool is_jmp32)
12346{
12347 struct tnum false_32off = tnum_subreg(false_reg->var_off);
12348 struct tnum false_64off = false_reg->var_off;
12349 struct tnum true_32off = tnum_subreg(true_reg->var_off);
12350 struct tnum true_64off = true_reg->var_off;
12351 s64 sval = (s64)val;
12352 s32 sval32 = (s32)val32;
12353
12354 /* If the dst_reg is a pointer, we can't learn anything about its
12355 * variable offset from the compare (unless src_reg were a pointer into
12356 * the same object, but we don't bother with that.
12357 * Since false_reg and true_reg have the same type by construction, we
12358 * only need to check one of them for pointerness.
12359 */
12360 if (__is_pointer_value(false, false_reg))
12361 return;
12362
12363 switch (opcode) {
12364 /* JEQ/JNE comparison doesn't change the register equivalence.
12365 *
12366 * r1 = r2;
12367 * if (r1 == 42) goto label;
12368 * ...
12369 * label: // here both r1 and r2 are known to be 42.
12370 *
12371 * Hence when marking register as known preserve it's ID.
12372 */
12373 case BPF_JEQ:
12374 if (is_jmp32) {
12375 __mark_reg32_known(true_reg, val32);
12376 true_32off = tnum_subreg(true_reg->var_off);
12377 } else {
12378 ___mark_reg_known(true_reg, val);
12379 true_64off = true_reg->var_off;
12380 }
12381 break;
12382 case BPF_JNE:
12383 if (is_jmp32) {
12384 __mark_reg32_known(false_reg, val32);
12385 false_32off = tnum_subreg(false_reg->var_off);
12386 } else {
12387 ___mark_reg_known(false_reg, val);
12388 false_64off = false_reg->var_off;
12389 }
12390 break;
12391 case BPF_JSET:
12392 if (is_jmp32) {
12393 false_32off = tnum_and(false_32off, tnum_const(~val32));
12394 if (is_power_of_2(val32))
12395 true_32off = tnum_or(true_32off,
12396 tnum_const(val32));
12397 } else {
12398 false_64off = tnum_and(false_64off, tnum_const(~val));
12399 if (is_power_of_2(val))
12400 true_64off = tnum_or(true_64off,
12401 tnum_const(val));
12402 }
12403 break;
12404 case BPF_JGE:
12405 case BPF_JGT:
12406 {
12407 if (is_jmp32) {
12408 u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1;
12409 u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
12410
12411 false_reg->u32_max_value = min(false_reg->u32_max_value,
12412 false_umax);
12413 true_reg->u32_min_value = max(true_reg->u32_min_value,
12414 true_umin);
12415 } else {
12416 u64 false_umax = opcode == BPF_JGT ? val : val - 1;
12417 u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
12418
12419 false_reg->umax_value = min(false_reg->umax_value, false_umax);
12420 true_reg->umin_value = max(true_reg->umin_value, true_umin);
12421 }
12422 break;
12423 }
12424 case BPF_JSGE:
12425 case BPF_JSGT:
12426 {
12427 if (is_jmp32) {
12428 s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1;
12429 s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
12430
12431 false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
12432 true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
12433 } else {
12434 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
12435 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
12436
12437 false_reg->smax_value = min(false_reg->smax_value, false_smax);
12438 true_reg->smin_value = max(true_reg->smin_value, true_smin);
12439 }
12440 break;
12441 }
12442 case BPF_JLE:
12443 case BPF_JLT:
12444 {
12445 if (is_jmp32) {
12446 u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1;
12447 u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
12448
12449 false_reg->u32_min_value = max(false_reg->u32_min_value,
12450 false_umin);
12451 true_reg->u32_max_value = min(true_reg->u32_max_value,
12452 true_umax);
12453 } else {
12454 u64 false_umin = opcode == BPF_JLT ? val : val + 1;
12455 u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
12456
12457 false_reg->umin_value = max(false_reg->umin_value, false_umin);
12458 true_reg->umax_value = min(true_reg->umax_value, true_umax);
12459 }
12460 break;
12461 }
12462 case BPF_JSLE:
12463 case BPF_JSLT:
12464 {
12465 if (is_jmp32) {
12466 s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1;
12467 s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
12468
12469 false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
12470 true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
12471 } else {
12472 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
12473 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
12474
12475 false_reg->smin_value = max(false_reg->smin_value, false_smin);
12476 true_reg->smax_value = min(true_reg->smax_value, true_smax);
12477 }
12478 break;
12479 }
12480 default:
12481 return;
12482 }
12483
12484 if (is_jmp32) {
12485 false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
12486 tnum_subreg(false_32off));
12487 true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
12488 tnum_subreg(true_32off));
12489 __reg_combine_32_into_64(false_reg);
12490 __reg_combine_32_into_64(true_reg);
12491 } else {
12492 false_reg->var_off = false_64off;
12493 true_reg->var_off = true_64off;
12494 __reg_combine_64_into_32(false_reg);
12495 __reg_combine_64_into_32(true_reg);
12496 }
12497}
12498
12499/* Same as above, but for the case that dst_reg holds a constant and src_reg is
12500 * the variable reg.
12501 */
12502static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
12503 struct bpf_reg_state *false_reg,
12504 u64 val, u32 val32,
12505 u8 opcode, bool is_jmp32)
12506{
12507 opcode = flip_opcode(opcode);
12508 /* This uses zero as "not present in table"; luckily the zero opcode,
12509 * BPF_JA, can't get here.
12510 */
12511 if (opcode)
12512 reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
12513}
12514
12515/* Regs are known to be equal, so intersect their min/max/var_off */
12516static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
12517 struct bpf_reg_state *dst_reg)
12518{
12519 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
12520 dst_reg->umin_value);
12521 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
12522 dst_reg->umax_value);
12523 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
12524 dst_reg->smin_value);
12525 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
12526 dst_reg->smax_value);
12527 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
12528 dst_reg->var_off);
12529 reg_bounds_sync(src_reg);
12530 reg_bounds_sync(dst_reg);
12531}
12532
12533static void reg_combine_min_max(struct bpf_reg_state *true_src,
12534 struct bpf_reg_state *true_dst,
12535 struct bpf_reg_state *false_src,
12536 struct bpf_reg_state *false_dst,
12537 u8 opcode)
12538{
12539 switch (opcode) {
12540 case BPF_JEQ:
12541 __reg_combine_min_max(true_src, true_dst);
12542 break;
12543 case BPF_JNE:
12544 __reg_combine_min_max(false_src, false_dst);
12545 break;
12546 }
12547}
12548
12549static void mark_ptr_or_null_reg(struct bpf_func_state *state,
12550 struct bpf_reg_state *reg, u32 id,
12551 bool is_null)
12552{
12553 if (type_may_be_null(reg->type) && reg->id == id &&
12554 (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
12555 /* Old offset (both fixed and variable parts) should have been
12556 * known-zero, because we don't allow pointer arithmetic on
12557 * pointers that might be NULL. If we see this happening, don't
12558 * convert the register.
12559 *
12560 * But in some cases, some helpers that return local kptrs
12561 * advance offset for the returned pointer. In those cases, it
12562 * is fine to expect to see reg->off.
12563 */
12564 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
12565 return;
12566 if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
12567 WARN_ON_ONCE(reg->off))
12568 return;
12569
12570 if (is_null) {
12571 reg->type = SCALAR_VALUE;
12572 /* We don't need id and ref_obj_id from this point
12573 * onwards anymore, thus we should better reset it,
12574 * so that state pruning has chances to take effect.
12575 */
12576 reg->id = 0;
12577 reg->ref_obj_id = 0;
12578
12579 return;
12580 }
12581
12582 mark_ptr_not_null_reg(reg);
12583
12584 if (!reg_may_point_to_spin_lock(reg)) {
12585 /* For not-NULL ptr, reg->ref_obj_id will be reset
12586 * in release_reference().
12587 *
12588 * reg->id is still used by spin_lock ptr. Other
12589 * than spin_lock ptr type, reg->id can be reset.
12590 */
12591 reg->id = 0;
12592 }
12593 }
12594}
12595
12596/* The logic is similar to find_good_pkt_pointers(), both could eventually
12597 * be folded together at some point.
12598 */
12599static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
12600 bool is_null)
12601{
12602 struct bpf_func_state *state = vstate->frame[vstate->curframe];
12603 struct bpf_reg_state *regs = state->regs, *reg;
12604 u32 ref_obj_id = regs[regno].ref_obj_id;
12605 u32 id = regs[regno].id;
12606
12607 if (ref_obj_id && ref_obj_id == id && is_null)
12608 /* regs[regno] is in the " == NULL" branch.
12609 * No one could have freed the reference state before
12610 * doing the NULL check.
12611 */
12612 WARN_ON_ONCE(release_reference_state(state, id));
12613
12614 bpf_for_each_reg_in_vstate(vstate, state, reg, ({
12615 mark_ptr_or_null_reg(state, reg, id, is_null);
12616 }));
12617}
12618
12619static bool try_match_pkt_pointers(const struct bpf_insn *insn,
12620 struct bpf_reg_state *dst_reg,
12621 struct bpf_reg_state *src_reg,
12622 struct bpf_verifier_state *this_branch,
12623 struct bpf_verifier_state *other_branch)
12624{
12625 if (BPF_SRC(insn->code) != BPF_X)
12626 return false;
12627
12628 /* Pointers are always 64-bit. */
12629 if (BPF_CLASS(insn->code) == BPF_JMP32)
12630 return false;
12631
12632 switch (BPF_OP(insn->code)) {
12633 case BPF_JGT:
12634 if ((dst_reg->type == PTR_TO_PACKET &&
12635 src_reg->type == PTR_TO_PACKET_END) ||
12636 (dst_reg->type == PTR_TO_PACKET_META &&
12637 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
12638 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
12639 find_good_pkt_pointers(this_branch, dst_reg,
12640 dst_reg->type, false);
12641 mark_pkt_end(other_branch, insn->dst_reg, true);
12642 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
12643 src_reg->type == PTR_TO_PACKET) ||
12644 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
12645 src_reg->type == PTR_TO_PACKET_META)) {
12646 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
12647 find_good_pkt_pointers(other_branch, src_reg,
12648 src_reg->type, true);
12649 mark_pkt_end(this_branch, insn->src_reg, false);
12650 } else {
12651 return false;
12652 }
12653 break;
12654 case BPF_JLT:
12655 if ((dst_reg->type == PTR_TO_PACKET &&
12656 src_reg->type == PTR_TO_PACKET_END) ||
12657 (dst_reg->type == PTR_TO_PACKET_META &&
12658 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
12659 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
12660 find_good_pkt_pointers(other_branch, dst_reg,
12661 dst_reg->type, true);
12662 mark_pkt_end(this_branch, insn->dst_reg, false);
12663 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
12664 src_reg->type == PTR_TO_PACKET) ||
12665 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
12666 src_reg->type == PTR_TO_PACKET_META)) {
12667 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
12668 find_good_pkt_pointers(this_branch, src_reg,
12669 src_reg->type, false);
12670 mark_pkt_end(other_branch, insn->src_reg, true);
12671 } else {
12672 return false;
12673 }
12674 break;
12675 case BPF_JGE:
12676 if ((dst_reg->type == PTR_TO_PACKET &&
12677 src_reg->type == PTR_TO_PACKET_END) ||
12678 (dst_reg->type == PTR_TO_PACKET_META &&
12679 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
12680 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
12681 find_good_pkt_pointers(this_branch, dst_reg,
12682 dst_reg->type, true);
12683 mark_pkt_end(other_branch, insn->dst_reg, false);
12684 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
12685 src_reg->type == PTR_TO_PACKET) ||
12686 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
12687 src_reg->type == PTR_TO_PACKET_META)) {
12688 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
12689 find_good_pkt_pointers(other_branch, src_reg,
12690 src_reg->type, false);
12691 mark_pkt_end(this_branch, insn->src_reg, true);
12692 } else {
12693 return false;
12694 }
12695 break;
12696 case BPF_JLE:
12697 if ((dst_reg->type == PTR_TO_PACKET &&
12698 src_reg->type == PTR_TO_PACKET_END) ||
12699 (dst_reg->type == PTR_TO_PACKET_META &&
12700 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
12701 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
12702 find_good_pkt_pointers(other_branch, dst_reg,
12703 dst_reg->type, false);
12704 mark_pkt_end(this_branch, insn->dst_reg, true);
12705 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
12706 src_reg->type == PTR_TO_PACKET) ||
12707 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
12708 src_reg->type == PTR_TO_PACKET_META)) {
12709 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
12710 find_good_pkt_pointers(this_branch, src_reg,
12711 src_reg->type, true);
12712 mark_pkt_end(other_branch, insn->src_reg, false);
12713 } else {
12714 return false;
12715 }
12716 break;
12717 default:
12718 return false;
12719 }
12720
12721 return true;
12722}
12723
12724static void find_equal_scalars(struct bpf_verifier_state *vstate,
12725 struct bpf_reg_state *known_reg)
12726{
12727 struct bpf_func_state *state;
12728 struct bpf_reg_state *reg;
12729
12730 bpf_for_each_reg_in_vstate(vstate, state, reg, ({
12731 if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
12732 copy_register_state(reg, known_reg);
12733 }));
12734}
12735
12736static int check_cond_jmp_op(struct bpf_verifier_env *env,
12737 struct bpf_insn *insn, int *insn_idx)
12738{
12739 struct bpf_verifier_state *this_branch = env->cur_state;
12740 struct bpf_verifier_state *other_branch;
12741 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
12742 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
12743 struct bpf_reg_state *eq_branch_regs;
12744 u8 opcode = BPF_OP(insn->code);
12745 bool is_jmp32;
12746 int pred = -1;
12747 int err;
12748
12749 /* Only conditional jumps are expected to reach here. */
12750 if (opcode == BPF_JA || opcode > BPF_JSLE) {
12751 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
12752 return -EINVAL;
12753 }
12754
12755 if (BPF_SRC(insn->code) == BPF_X) {
12756 if (insn->imm != 0) {
12757 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
12758 return -EINVAL;
12759 }
12760
12761 /* check src1 operand */
12762 err = check_reg_arg(env, insn->src_reg, SRC_OP);
12763 if (err)
12764 return err;
12765
12766 if (is_pointer_value(env, insn->src_reg)) {
12767 verbose(env, "R%d pointer comparison prohibited\n",
12768 insn->src_reg);
12769 return -EACCES;
12770 }
12771 src_reg = &regs[insn->src_reg];
12772 } else {
12773 if (insn->src_reg != BPF_REG_0) {
12774 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
12775 return -EINVAL;
12776 }
12777 }
12778
12779 /* check src2 operand */
12780 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
12781 if (err)
12782 return err;
12783
12784 dst_reg = &regs[insn->dst_reg];
12785 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
12786
12787 if (BPF_SRC(insn->code) == BPF_K) {
12788 pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
12789 } else if (src_reg->type == SCALAR_VALUE &&
12790 is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
12791 pred = is_branch_taken(dst_reg,
12792 tnum_subreg(src_reg->var_off).value,
12793 opcode,
12794 is_jmp32);
12795 } else if (src_reg->type == SCALAR_VALUE &&
12796 !is_jmp32 && tnum_is_const(src_reg->var_off)) {
12797 pred = is_branch_taken(dst_reg,
12798 src_reg->var_off.value,
12799 opcode,
12800 is_jmp32);
12801 } else if (reg_is_pkt_pointer_any(dst_reg) &&
12802 reg_is_pkt_pointer_any(src_reg) &&
12803 !is_jmp32) {
12804 pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
12805 }
12806
12807 if (pred >= 0) {
12808 /* If we get here with a dst_reg pointer type it is because
12809 * above is_branch_taken() special cased the 0 comparison.
12810 */
12811 if (!__is_pointer_value(false, dst_reg))
12812 err = mark_chain_precision(env, insn->dst_reg);
12813 if (BPF_SRC(insn->code) == BPF_X && !err &&
12814 !__is_pointer_value(false, src_reg))
12815 err = mark_chain_precision(env, insn->src_reg);
12816 if (err)
12817 return err;
12818 }
12819
12820 if (pred == 1) {
12821 /* Only follow the goto, ignore fall-through. If needed, push
12822 * the fall-through branch for simulation under speculative
12823 * execution.
12824 */
12825 if (!env->bypass_spec_v1 &&
12826 !sanitize_speculative_path(env, insn, *insn_idx + 1,
12827 *insn_idx))
12828 return -EFAULT;
12829 *insn_idx += insn->off;
12830 return 0;
12831 } else if (pred == 0) {
12832 /* Only follow the fall-through branch, since that's where the
12833 * program will go. If needed, push the goto branch for
12834 * simulation under speculative execution.
12835 */
12836 if (!env->bypass_spec_v1 &&
12837 !sanitize_speculative_path(env, insn,
12838 *insn_idx + insn->off + 1,
12839 *insn_idx))
12840 return -EFAULT;
12841 return 0;
12842 }
12843
12844 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
12845 false);
12846 if (!other_branch)
12847 return -EFAULT;
12848 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
12849
12850 /* detect if we are comparing against a constant value so we can adjust
12851 * our min/max values for our dst register.
12852 * this is only legit if both are scalars (or pointers to the same
12853 * object, I suppose, see the PTR_MAYBE_NULL related if block below),
12854 * because otherwise the different base pointers mean the offsets aren't
12855 * comparable.
12856 */
12857 if (BPF_SRC(insn->code) == BPF_X) {
12858 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
12859
12860 if (dst_reg->type == SCALAR_VALUE &&
12861 src_reg->type == SCALAR_VALUE) {
12862 if (tnum_is_const(src_reg->var_off) ||
12863 (is_jmp32 &&
12864 tnum_is_const(tnum_subreg(src_reg->var_off))))
12865 reg_set_min_max(&other_branch_regs[insn->dst_reg],
12866 dst_reg,
12867 src_reg->var_off.value,
12868 tnum_subreg(src_reg->var_off).value,
12869 opcode, is_jmp32);
12870 else if (tnum_is_const(dst_reg->var_off) ||
12871 (is_jmp32 &&
12872 tnum_is_const(tnum_subreg(dst_reg->var_off))))
12873 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
12874 src_reg,
12875 dst_reg->var_off.value,
12876 tnum_subreg(dst_reg->var_off).value,
12877 opcode, is_jmp32);
12878 else if (!is_jmp32 &&
12879 (opcode == BPF_JEQ || opcode == BPF_JNE))
12880 /* Comparing for equality, we can combine knowledge */
12881 reg_combine_min_max(&other_branch_regs[insn->src_reg],
12882 &other_branch_regs[insn->dst_reg],
12883 src_reg, dst_reg, opcode);
12884 if (src_reg->id &&
12885 !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
12886 find_equal_scalars(this_branch, src_reg);
12887 find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
12888 }
12889
12890 }
12891 } else if (dst_reg->type == SCALAR_VALUE) {
12892 reg_set_min_max(&other_branch_regs[insn->dst_reg],
12893 dst_reg, insn->imm, (u32)insn->imm,
12894 opcode, is_jmp32);
12895 }
12896
12897 if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
12898 !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
12899 find_equal_scalars(this_branch, dst_reg);
12900 find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
12901 }
12902
12903 /* if one pointer register is compared to another pointer
12904 * register check if PTR_MAYBE_NULL could be lifted.
12905 * E.g. register A - maybe null
12906 * register B - not null
12907 * for JNE A, B, ... - A is not null in the false branch;
12908 * for JEQ A, B, ... - A is not null in the true branch.
12909 *
12910 * Since PTR_TO_BTF_ID points to a kernel struct that does
12911 * not need to be null checked by the BPF program, i.e.,
12912 * could be null even without PTR_MAYBE_NULL marking, so
12913 * only propagate nullness when neither reg is that type.
12914 */
12915 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
12916 __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
12917 type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
12918 base_type(src_reg->type) != PTR_TO_BTF_ID &&
12919 base_type(dst_reg->type) != PTR_TO_BTF_ID) {
12920 eq_branch_regs = NULL;
12921 switch (opcode) {
12922 case BPF_JEQ:
12923 eq_branch_regs = other_branch_regs;
12924 break;
12925 case BPF_JNE:
12926 eq_branch_regs = regs;
12927 break;
12928 default:
12929 /* do nothing */
12930 break;
12931 }
12932 if (eq_branch_regs) {
12933 if (type_may_be_null(src_reg->type))
12934 mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
12935 else
12936 mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
12937 }
12938 }
12939
12940 /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
12941 * NOTE: these optimizations below are related with pointer comparison
12942 * which will never be JMP32.
12943 */
12944 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
12945 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
12946 type_may_be_null(dst_reg->type)) {
12947 /* Mark all identical registers in each branch as either
12948 * safe or unknown depending R == 0 or R != 0 conditional.
12949 */
12950 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
12951 opcode == BPF_JNE);
12952 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
12953 opcode == BPF_JEQ);
12954 } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
12955 this_branch, other_branch) &&
12956 is_pointer_value(env, insn->dst_reg)) {
12957 verbose(env, "R%d pointer comparison prohibited\n",
12958 insn->dst_reg);
12959 return -EACCES;
12960 }
12961 if (env->log.level & BPF_LOG_LEVEL)
12962 print_insn_state(env, this_branch->frame[this_branch->curframe]);
12963 return 0;
12964}
12965
12966/* verify BPF_LD_IMM64 instruction */
12967static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
12968{
12969 struct bpf_insn_aux_data *aux = cur_aux(env);
12970 struct bpf_reg_state *regs = cur_regs(env);
12971 struct bpf_reg_state *dst_reg;
12972 struct bpf_map *map;
12973 int err;
12974
12975 if (BPF_SIZE(insn->code) != BPF_DW) {
12976 verbose(env, "invalid BPF_LD_IMM insn\n");
12977 return -EINVAL;
12978 }
12979 if (insn->off != 0) {
12980 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
12981 return -EINVAL;
12982 }
12983
12984 err = check_reg_arg(env, insn->dst_reg, DST_OP);
12985 if (err)
12986 return err;
12987
12988 dst_reg = &regs[insn->dst_reg];
12989 if (insn->src_reg == 0) {
12990 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
12991
12992 dst_reg->type = SCALAR_VALUE;
12993 __mark_reg_known(&regs[insn->dst_reg], imm);
12994 return 0;
12995 }
12996
12997 /* All special src_reg cases are listed below. From this point onwards
12998 * we either succeed and assign a corresponding dst_reg->type after
12999 * zeroing the offset, or fail and reject the program.
13000 */
13001 mark_reg_known_zero(env, regs, insn->dst_reg);
13002
13003 if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
13004 dst_reg->type = aux->btf_var.reg_type;
13005 switch (base_type(dst_reg->type)) {
13006 case PTR_TO_MEM:
13007 dst_reg->mem_size = aux->btf_var.mem_size;
13008 break;
13009 case PTR_TO_BTF_ID:
13010 dst_reg->btf = aux->btf_var.btf;
13011 dst_reg->btf_id = aux->btf_var.btf_id;
13012 break;
13013 default:
13014 verbose(env, "bpf verifier is misconfigured\n");
13015 return -EFAULT;
13016 }
13017 return 0;
13018 }
13019
13020 if (insn->src_reg == BPF_PSEUDO_FUNC) {
13021 struct bpf_prog_aux *aux = env->prog->aux;
13022 u32 subprogno = find_subprog(env,
13023 env->insn_idx + insn->imm + 1);
13024
13025 if (!aux->func_info) {
13026 verbose(env, "missing btf func_info\n");
13027 return -EINVAL;
13028 }
13029 if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
13030 verbose(env, "callback function not static\n");
13031 return -EINVAL;
13032 }
13033
13034 dst_reg->type = PTR_TO_FUNC;
13035 dst_reg->subprogno = subprogno;
13036 return 0;
13037 }
13038
13039 map = env->used_maps[aux->map_index];
13040 dst_reg->map_ptr = map;
13041
13042 if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
13043 insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
13044 dst_reg->type = PTR_TO_MAP_VALUE;
13045 dst_reg->off = aux->map_off;
13046 WARN_ON_ONCE(map->max_entries != 1);
13047 /* We want reg->id to be same (0) as map_value is not distinct */
13048 } else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
13049 insn->src_reg == BPF_PSEUDO_MAP_IDX) {
13050 dst_reg->type = CONST_PTR_TO_MAP;
13051 } else {
13052 verbose(env, "bpf verifier is misconfigured\n");
13053 return -EINVAL;
13054 }
13055
13056 return 0;
13057}
13058
13059static bool may_access_skb(enum bpf_prog_type type)
13060{
13061 switch (type) {
13062 case BPF_PROG_TYPE_SOCKET_FILTER:
13063 case BPF_PROG_TYPE_SCHED_CLS:
13064 case BPF_PROG_TYPE_SCHED_ACT:
13065 return true;
13066 default:
13067 return false;
13068 }
13069}
13070
13071/* verify safety of LD_ABS|LD_IND instructions:
13072 * - they can only appear in the programs where ctx == skb
13073 * - since they are wrappers of function calls, they scratch R1-R5 registers,
13074 * preserve R6-R9, and store return value into R0
13075 *
13076 * Implicit input:
13077 * ctx == skb == R6 == CTX
13078 *
13079 * Explicit input:
13080 * SRC == any register
13081 * IMM == 32-bit immediate
13082 *
13083 * Output:
13084 * R0 - 8/16/32-bit skb data converted to cpu endianness
13085 */
13086static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
13087{
13088 struct bpf_reg_state *regs = cur_regs(env);
13089 static const int ctx_reg = BPF_REG_6;
13090 u8 mode = BPF_MODE(insn->code);
13091 int i, err;
13092
13093 if (!may_access_skb(resolve_prog_type(env->prog))) {
13094 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
13095 return -EINVAL;
13096 }
13097
13098 if (!env->ops->gen_ld_abs) {
13099 verbose(env, "bpf verifier is misconfigured\n");
13100 return -EINVAL;
13101 }
13102
13103 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
13104 BPF_SIZE(insn->code) == BPF_DW ||
13105 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
13106 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
13107 return -EINVAL;
13108 }
13109
13110 /* check whether implicit source operand (register R6) is readable */
13111 err = check_reg_arg(env, ctx_reg, SRC_OP);
13112 if (err)
13113 return err;
13114
13115 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
13116 * gen_ld_abs() may terminate the program at runtime, leading to
13117 * reference leak.
13118 */
13119 err = check_reference_leak(env);
13120 if (err) {
13121 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
13122 return err;
13123 }
13124
13125 if (env->cur_state->active_lock.ptr) {
13126 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
13127 return -EINVAL;
13128 }
13129
13130 if (env->cur_state->active_rcu_lock) {
13131 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
13132 return -EINVAL;
13133 }
13134
13135 if (regs[ctx_reg].type != PTR_TO_CTX) {
13136 verbose(env,
13137 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
13138 return -EINVAL;
13139 }
13140
13141 if (mode == BPF_IND) {
13142 /* check explicit source operand */
13143 err = check_reg_arg(env, insn->src_reg, SRC_OP);
13144 if (err)
13145 return err;
13146 }
13147
13148 err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
13149 if (err < 0)
13150 return err;
13151
13152 /* reset caller saved regs to unreadable */
13153 for (i = 0; i < CALLER_SAVED_REGS; i++) {
13154 mark_reg_not_init(env, regs, caller_saved[i]);
13155 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
13156 }
13157
13158 /* mark destination R0 register as readable, since it contains
13159 * the value fetched from the packet.
13160 * Already marked as written above.
13161 */
13162 mark_reg_unknown(env, regs, BPF_REG_0);
13163 /* ld_abs load up to 32-bit skb data. */
13164 regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
13165 return 0;
13166}
13167
13168static int check_return_code(struct bpf_verifier_env *env)
13169{
13170 struct tnum enforce_attach_type_range = tnum_unknown;
13171 const struct bpf_prog *prog = env->prog;
13172 struct bpf_reg_state *reg;
13173 struct tnum range = tnum_range(0, 1);
13174 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
13175 int err;
13176 struct bpf_func_state *frame = env->cur_state->frame[0];
13177 const bool is_subprog = frame->subprogno;
13178
13179 /* LSM and struct_ops func-ptr's return type could be "void" */
13180 if (!is_subprog) {
13181 switch (prog_type) {
13182 case BPF_PROG_TYPE_LSM:
13183 if (prog->expected_attach_type == BPF_LSM_CGROUP)
13184 /* See below, can be 0 or 0-1 depending on hook. */
13185 break;
13186 fallthrough;
13187 case BPF_PROG_TYPE_STRUCT_OPS:
13188 if (!prog->aux->attach_func_proto->type)
13189 return 0;
13190 break;
13191 default:
13192 break;
13193 }
13194 }
13195
13196 /* eBPF calling convention is such that R0 is used
13197 * to return the value from eBPF program.
13198 * Make sure that it's readable at this time
13199 * of bpf_exit, which means that program wrote
13200 * something into it earlier
13201 */
13202 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
13203 if (err)
13204 return err;
13205
13206 if (is_pointer_value(env, BPF_REG_0)) {
13207 verbose(env, "R0 leaks addr as return value\n");
13208 return -EACCES;
13209 }
13210
13211 reg = cur_regs(env) + BPF_REG_0;
13212
13213 if (frame->in_async_callback_fn) {
13214 /* enforce return zero from async callbacks like timer */
13215 if (reg->type != SCALAR_VALUE) {
13216 verbose(env, "In async callback the register R0 is not a known value (%s)\n",
13217 reg_type_str(env, reg->type));
13218 return -EINVAL;
13219 }
13220
13221 if (!tnum_in(tnum_const(0), reg->var_off)) {
13222 verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
13223 return -EINVAL;
13224 }
13225 return 0;
13226 }
13227
13228 if (is_subprog) {
13229 if (reg->type != SCALAR_VALUE) {
13230 verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
13231 reg_type_str(env, reg->type));
13232 return -EINVAL;
13233 }
13234 return 0;
13235 }
13236
13237 switch (prog_type) {
13238 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
13239 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
13240 env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
13241 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
13242 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
13243 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
13244 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
13245 range = tnum_range(1, 1);
13246 if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
13247 env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
13248 range = tnum_range(0, 3);
13249 break;
13250 case BPF_PROG_TYPE_CGROUP_SKB:
13251 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
13252 range = tnum_range(0, 3);
13253 enforce_attach_type_range = tnum_range(2, 3);
13254 }
13255 break;
13256 case BPF_PROG_TYPE_CGROUP_SOCK:
13257 case BPF_PROG_TYPE_SOCK_OPS:
13258 case BPF_PROG_TYPE_CGROUP_DEVICE:
13259 case BPF_PROG_TYPE_CGROUP_SYSCTL:
13260 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
13261 break;
13262 case BPF_PROG_TYPE_RAW_TRACEPOINT:
13263 if (!env->prog->aux->attach_btf_id)
13264 return 0;
13265 range = tnum_const(0);
13266 break;
13267 case BPF_PROG_TYPE_TRACING:
13268 switch (env->prog->expected_attach_type) {
13269 case BPF_TRACE_FENTRY:
13270 case BPF_TRACE_FEXIT:
13271 range = tnum_const(0);
13272 break;
13273 case BPF_TRACE_RAW_TP:
13274 case BPF_MODIFY_RETURN:
13275 return 0;
13276 case BPF_TRACE_ITER:
13277 break;
13278 default:
13279 return -ENOTSUPP;
13280 }
13281 break;
13282 case BPF_PROG_TYPE_SK_LOOKUP:
13283 range = tnum_range(SK_DROP, SK_PASS);
13284 break;
13285
13286 case BPF_PROG_TYPE_LSM:
13287 if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
13288 /* Regular BPF_PROG_TYPE_LSM programs can return
13289 * any value.
13290 */
13291 return 0;
13292 }
13293 if (!env->prog->aux->attach_func_proto->type) {
13294 /* Make sure programs that attach to void
13295 * hooks don't try to modify return value.
13296 */
13297 range = tnum_range(1, 1);
13298 }
13299 break;
13300
13301 case BPF_PROG_TYPE_EXT:
13302 /* freplace program can return anything as its return value
13303 * depends on the to-be-replaced kernel func or bpf program.
13304 */
13305 default:
13306 return 0;
13307 }
13308
13309 if (reg->type != SCALAR_VALUE) {
13310 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
13311 reg_type_str(env, reg->type));
13312 return -EINVAL;
13313 }
13314
13315 if (!tnum_in(range, reg->var_off)) {
13316 verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
13317 if (prog->expected_attach_type == BPF_LSM_CGROUP &&
13318 prog_type == BPF_PROG_TYPE_LSM &&
13319 !prog->aux->attach_func_proto->type)
13320 verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
13321 return -EINVAL;
13322 }
13323
13324 if (!tnum_is_unknown(enforce_attach_type_range) &&
13325 tnum_in(enforce_attach_type_range, reg->var_off))
13326 env->prog->enforce_expected_attach_type = 1;
13327 return 0;
13328}
13329
13330/* non-recursive DFS pseudo code
13331 * 1 procedure DFS-iterative(G,v):
13332 * 2 label v as discovered
13333 * 3 let S be a stack
13334 * 4 S.push(v)
13335 * 5 while S is not empty
13336 * 6 t <- S.peek()
13337 * 7 if t is what we're looking for:
13338 * 8 return t
13339 * 9 for all edges e in G.adjacentEdges(t) do
13340 * 10 if edge e is already labelled
13341 * 11 continue with the next edge
13342 * 12 w <- G.adjacentVertex(t,e)
13343 * 13 if vertex w is not discovered and not explored
13344 * 14 label e as tree-edge
13345 * 15 label w as discovered
13346 * 16 S.push(w)
13347 * 17 continue at 5
13348 * 18 else if vertex w is discovered
13349 * 19 label e as back-edge
13350 * 20 else
13351 * 21 // vertex w is explored
13352 * 22 label e as forward- or cross-edge
13353 * 23 label t as explored
13354 * 24 S.pop()
13355 *
13356 * convention:
13357 * 0x10 - discovered
13358 * 0x11 - discovered and fall-through edge labelled
13359 * 0x12 - discovered and fall-through and branch edges labelled
13360 * 0x20 - explored
13361 */
13362
13363enum {
13364 DISCOVERED = 0x10,
13365 EXPLORED = 0x20,
13366 FALLTHROUGH = 1,
13367 BRANCH = 2,
13368};
13369
13370static u32 state_htab_size(struct bpf_verifier_env *env)
13371{
13372 return env->prog->len;
13373}
13374
13375static struct bpf_verifier_state_list **explored_state(
13376 struct bpf_verifier_env *env,
13377 int idx)
13378{
13379 struct bpf_verifier_state *cur = env->cur_state;
13380 struct bpf_func_state *state = cur->frame[cur->curframe];
13381
13382 return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
13383}
13384
13385static void mark_prune_point(struct bpf_verifier_env *env, int idx)
13386{
13387 env->insn_aux_data[idx].prune_point = true;
13388}
13389
13390static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
13391{
13392 return env->insn_aux_data[insn_idx].prune_point;
13393}
13394
13395enum {
13396 DONE_EXPLORING = 0,
13397 KEEP_EXPLORING = 1,
13398};
13399
13400/* t, w, e - match pseudo-code above:
13401 * t - index of current instruction
13402 * w - next instruction
13403 * e - edge
13404 */
13405static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
13406 bool loop_ok)
13407{
13408 int *insn_stack = env->cfg.insn_stack;
13409 int *insn_state = env->cfg.insn_state;
13410
13411 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
13412 return DONE_EXPLORING;
13413
13414 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
13415 return DONE_EXPLORING;
13416
13417 if (w < 0 || w >= env->prog->len) {
13418 verbose_linfo(env, t, "%d: ", t);
13419 verbose(env, "jump out of range from insn %d to %d\n", t, w);
13420 return -EINVAL;
13421 }
13422
13423 if (e == BRANCH) {
13424 /* mark branch target for state pruning */
13425 mark_prune_point(env, w);
13426 mark_jmp_point(env, w);
13427 }
13428
13429 if (insn_state[w] == 0) {
13430 /* tree-edge */
13431 insn_state[t] = DISCOVERED | e;
13432 insn_state[w] = DISCOVERED;
13433 if (env->cfg.cur_stack >= env->prog->len)
13434 return -E2BIG;
13435 insn_stack[env->cfg.cur_stack++] = w;
13436 return KEEP_EXPLORING;
13437 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
13438 if (loop_ok && env->bpf_capable)
13439 return DONE_EXPLORING;
13440 verbose_linfo(env, t, "%d: ", t);
13441 verbose_linfo(env, w, "%d: ", w);
13442 verbose(env, "back-edge from insn %d to %d\n", t, w);
13443 return -EINVAL;
13444 } else if (insn_state[w] == EXPLORED) {
13445 /* forward- or cross-edge */
13446 insn_state[t] = DISCOVERED | e;
13447 } else {
13448 verbose(env, "insn state internal bug\n");
13449 return -EFAULT;
13450 }
13451 return DONE_EXPLORING;
13452}
13453
13454static int visit_func_call_insn(int t, struct bpf_insn *insns,
13455 struct bpf_verifier_env *env,
13456 bool visit_callee)
13457{
13458 int ret;
13459
13460 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
13461 if (ret)
13462 return ret;
13463
13464 mark_prune_point(env, t + 1);
13465 /* when we exit from subprog, we need to record non-linear history */
13466 mark_jmp_point(env, t + 1);
13467
13468 if (visit_callee) {
13469 mark_prune_point(env, t);
13470 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
13471 /* It's ok to allow recursion from CFG point of
13472 * view. __check_func_call() will do the actual
13473 * check.
13474 */
13475 bpf_pseudo_func(insns + t));
13476 }
13477 return ret;
13478}
13479
13480/* Visits the instruction at index t and returns one of the following:
13481 * < 0 - an error occurred
13482 * DONE_EXPLORING - the instruction was fully explored
13483 * KEEP_EXPLORING - there is still work to be done before it is fully explored
13484 */
13485static int visit_insn(int t, struct bpf_verifier_env *env)
13486{
13487 struct bpf_insn *insns = env->prog->insnsi;
13488 int ret;
13489
13490 if (bpf_pseudo_func(insns + t))
13491 return visit_func_call_insn(t, insns, env, true);
13492
13493 /* All non-branch instructions have a single fall-through edge. */
13494 if (BPF_CLASS(insns[t].code) != BPF_JMP &&
13495 BPF_CLASS(insns[t].code) != BPF_JMP32)
13496 return push_insn(t, t + 1, FALLTHROUGH, env, false);
13497
13498 switch (BPF_OP(insns[t].code)) {
13499 case BPF_EXIT:
13500 return DONE_EXPLORING;
13501
13502 case BPF_CALL:
13503 if (insns[t].imm == BPF_FUNC_timer_set_callback)
13504 /* Mark this call insn as a prune point to trigger
13505 * is_state_visited() check before call itself is
13506 * processed by __check_func_call(). Otherwise new
13507 * async state will be pushed for further exploration.
13508 */
13509 mark_prune_point(env, t);
13510 return visit_func_call_insn(t, insns, env,
13511 insns[t].src_reg == BPF_PSEUDO_CALL);
13512
13513 case BPF_JA:
13514 if (BPF_SRC(insns[t].code) != BPF_K)
13515 return -EINVAL;
13516
13517 /* unconditional jump with single edge */
13518 ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
13519 true);
13520 if (ret)
13521 return ret;
13522
13523 mark_prune_point(env, t + insns[t].off + 1);
13524 mark_jmp_point(env, t + insns[t].off + 1);
13525
13526 return ret;
13527
13528 default:
13529 /* conditional jump with two edges */
13530 mark_prune_point(env, t);
13531
13532 ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
13533 if (ret)
13534 return ret;
13535
13536 return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
13537 }
13538}
13539
13540/* non-recursive depth-first-search to detect loops in BPF program
13541 * loop == back-edge in directed graph
13542 */
13543static int check_cfg(struct bpf_verifier_env *env)
13544{
13545 int insn_cnt = env->prog->len;
13546 int *insn_stack, *insn_state;
13547 int ret = 0;
13548 int i;
13549
13550 insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
13551 if (!insn_state)
13552 return -ENOMEM;
13553
13554 insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
13555 if (!insn_stack) {
13556 kvfree(insn_state);
13557 return -ENOMEM;
13558 }
13559
13560 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
13561 insn_stack[0] = 0; /* 0 is the first instruction */
13562 env->cfg.cur_stack = 1;
13563
13564 while (env->cfg.cur_stack > 0) {
13565 int t = insn_stack[env->cfg.cur_stack - 1];
13566
13567 ret = visit_insn(t, env);
13568 switch (ret) {
13569 case DONE_EXPLORING:
13570 insn_state[t] = EXPLORED;
13571 env->cfg.cur_stack--;
13572 break;
13573 case KEEP_EXPLORING:
13574 break;
13575 default:
13576 if (ret > 0) {
13577 verbose(env, "visit_insn internal bug\n");
13578 ret = -EFAULT;
13579 }
13580 goto err_free;
13581 }
13582 }
13583
13584 if (env->cfg.cur_stack < 0) {
13585 verbose(env, "pop stack internal bug\n");
13586 ret = -EFAULT;
13587 goto err_free;
13588 }
13589
13590 for (i = 0; i < insn_cnt; i++) {
13591 if (insn_state[i] != EXPLORED) {
13592 verbose(env, "unreachable insn %d\n", i);
13593 ret = -EINVAL;
13594 goto err_free;
13595 }
13596 }
13597 ret = 0; /* cfg looks good */
13598
13599err_free:
13600 kvfree(insn_state);
13601 kvfree(insn_stack);
13602 env->cfg.insn_state = env->cfg.insn_stack = NULL;
13603 return ret;
13604}
13605
13606static int check_abnormal_return(struct bpf_verifier_env *env)
13607{
13608 int i;
13609
13610 for (i = 1; i < env->subprog_cnt; i++) {
13611 if (env->subprog_info[i].has_ld_abs) {
13612 verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
13613 return -EINVAL;
13614 }
13615 if (env->subprog_info[i].has_tail_call) {
13616 verbose(env, "tail_call is not allowed in subprogs without BTF\n");
13617 return -EINVAL;
13618 }
13619 }
13620 return 0;
13621}
13622
13623/* The minimum supported BTF func info size */
13624#define MIN_BPF_FUNCINFO_SIZE 8
13625#define MAX_FUNCINFO_REC_SIZE 252
13626
13627static int check_btf_func(struct bpf_verifier_env *env,
13628 const union bpf_attr *attr,
13629 bpfptr_t uattr)
13630{
13631 const struct btf_type *type, *func_proto, *ret_type;
13632 u32 i, nfuncs, urec_size, min_size;
13633 u32 krec_size = sizeof(struct bpf_func_info);
13634 struct bpf_func_info *krecord;
13635 struct bpf_func_info_aux *info_aux = NULL;
13636 struct bpf_prog *prog;
13637 const struct btf *btf;
13638 bpfptr_t urecord;
13639 u32 prev_offset = 0;
13640 bool scalar_return;
13641 int ret = -ENOMEM;
13642
13643 nfuncs = attr->func_info_cnt;
13644 if (!nfuncs) {
13645 if (check_abnormal_return(env))
13646 return -EINVAL;
13647 return 0;
13648 }
13649
13650 if (nfuncs != env->subprog_cnt) {
13651 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
13652 return -EINVAL;
13653 }
13654
13655 urec_size = attr->func_info_rec_size;
13656 if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
13657 urec_size > MAX_FUNCINFO_REC_SIZE ||
13658 urec_size % sizeof(u32)) {
13659 verbose(env, "invalid func info rec size %u\n", urec_size);
13660 return -EINVAL;
13661 }
13662
13663 prog = env->prog;
13664 btf = prog->aux->btf;
13665
13666 urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
13667 min_size = min_t(u32, krec_size, urec_size);
13668
13669 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
13670 if (!krecord)
13671 return -ENOMEM;
13672 info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
13673 if (!info_aux)
13674 goto err_free;
13675
13676 for (i = 0; i < nfuncs; i++) {
13677 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
13678 if (ret) {
13679 if (ret == -E2BIG) {
13680 verbose(env, "nonzero tailing record in func info");
13681 /* set the size kernel expects so loader can zero
13682 * out the rest of the record.
13683 */
13684 if (copy_to_bpfptr_offset(uattr,
13685 offsetof(union bpf_attr, func_info_rec_size),
13686 &min_size, sizeof(min_size)))
13687 ret = -EFAULT;
13688 }
13689 goto err_free;
13690 }
13691
13692 if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
13693 ret = -EFAULT;
13694 goto err_free;
13695 }
13696
13697 /* check insn_off */
13698 ret = -EINVAL;
13699 if (i == 0) {
13700 if (krecord[i].insn_off) {
13701 verbose(env,
13702 "nonzero insn_off %u for the first func info record",
13703 krecord[i].insn_off);
13704 goto err_free;
13705 }
13706 } else if (krecord[i].insn_off <= prev_offset) {
13707 verbose(env,
13708 "same or smaller insn offset (%u) than previous func info record (%u)",
13709 krecord[i].insn_off, prev_offset);
13710 goto err_free;
13711 }
13712
13713 if (env->subprog_info[i].start != krecord[i].insn_off) {
13714 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
13715 goto err_free;
13716 }
13717
13718 /* check type_id */
13719 type = btf_type_by_id(btf, krecord[i].type_id);
13720 if (!type || !btf_type_is_func(type)) {
13721 verbose(env, "invalid type id %d in func info",
13722 krecord[i].type_id);
13723 goto err_free;
13724 }
13725 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
13726
13727 func_proto = btf_type_by_id(btf, type->type);
13728 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
13729 /* btf_func_check() already verified it during BTF load */
13730 goto err_free;
13731 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
13732 scalar_return =
13733 btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
13734 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
13735 verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
13736 goto err_free;
13737 }
13738 if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
13739 verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
13740 goto err_free;
13741 }
13742
13743 prev_offset = krecord[i].insn_off;
13744 bpfptr_add(&urecord, urec_size);
13745 }
13746
13747 prog->aux->func_info = krecord;
13748 prog->aux->func_info_cnt = nfuncs;
13749 prog->aux->func_info_aux = info_aux;
13750 return 0;
13751
13752err_free:
13753 kvfree(krecord);
13754 kfree(info_aux);
13755 return ret;
13756}
13757
13758static void adjust_btf_func(struct bpf_verifier_env *env)
13759{
13760 struct bpf_prog_aux *aux = env->prog->aux;
13761 int i;
13762
13763 if (!aux->func_info)
13764 return;
13765
13766 for (i = 0; i < env->subprog_cnt; i++)
13767 aux->func_info[i].insn_off = env->subprog_info[i].start;
13768}
13769
13770#define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col)
13771#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
13772
13773static int check_btf_line(struct bpf_verifier_env *env,
13774 const union bpf_attr *attr,
13775 bpfptr_t uattr)
13776{
13777 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
13778 struct bpf_subprog_info *sub;
13779 struct bpf_line_info *linfo;
13780 struct bpf_prog *prog;
13781 const struct btf *btf;
13782 bpfptr_t ulinfo;
13783 int err;
13784
13785 nr_linfo = attr->line_info_cnt;
13786 if (!nr_linfo)
13787 return 0;
13788 if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
13789 return -EINVAL;
13790
13791 rec_size = attr->line_info_rec_size;
13792 if (rec_size < MIN_BPF_LINEINFO_SIZE ||
13793 rec_size > MAX_LINEINFO_REC_SIZE ||
13794 rec_size & (sizeof(u32) - 1))
13795 return -EINVAL;
13796
13797 /* Need to zero it in case the userspace may
13798 * pass in a smaller bpf_line_info object.
13799 */
13800 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
13801 GFP_KERNEL | __GFP_NOWARN);
13802 if (!linfo)
13803 return -ENOMEM;
13804
13805 prog = env->prog;
13806 btf = prog->aux->btf;
13807
13808 s = 0;
13809 sub = env->subprog_info;
13810 ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
13811 expected_size = sizeof(struct bpf_line_info);
13812 ncopy = min_t(u32, expected_size, rec_size);
13813 for (i = 0; i < nr_linfo; i++) {
13814 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
13815 if (err) {
13816 if (err == -E2BIG) {
13817 verbose(env, "nonzero tailing record in line_info");
13818 if (copy_to_bpfptr_offset(uattr,
13819 offsetof(union bpf_attr, line_info_rec_size),
13820 &expected_size, sizeof(expected_size)))
13821 err = -EFAULT;
13822 }
13823 goto err_free;
13824 }
13825
13826 if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
13827 err = -EFAULT;
13828 goto err_free;
13829 }
13830
13831 /*
13832 * Check insn_off to ensure
13833 * 1) strictly increasing AND
13834 * 2) bounded by prog->len
13835 *
13836 * The linfo[0].insn_off == 0 check logically falls into
13837 * the later "missing bpf_line_info for func..." case
13838 * because the first linfo[0].insn_off must be the
13839 * first sub also and the first sub must have
13840 * subprog_info[0].start == 0.
13841 */
13842 if ((i && linfo[i].insn_off <= prev_offset) ||
13843 linfo[i].insn_off >= prog->len) {
13844 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
13845 i, linfo[i].insn_off, prev_offset,
13846 prog->len);
13847 err = -EINVAL;
13848 goto err_free;
13849 }
13850
13851 if (!prog->insnsi[linfo[i].insn_off].code) {
13852 verbose(env,
13853 "Invalid insn code at line_info[%u].insn_off\n",
13854 i);
13855 err = -EINVAL;
13856 goto err_free;
13857 }
13858
13859 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
13860 !btf_name_by_offset(btf, linfo[i].file_name_off)) {
13861 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
13862 err = -EINVAL;
13863 goto err_free;
13864 }
13865
13866 if (s != env->subprog_cnt) {
13867 if (linfo[i].insn_off == sub[s].start) {
13868 sub[s].linfo_idx = i;
13869 s++;
13870 } else if (sub[s].start < linfo[i].insn_off) {
13871 verbose(env, "missing bpf_line_info for func#%u\n", s);
13872 err = -EINVAL;
13873 goto err_free;
13874 }
13875 }
13876
13877 prev_offset = linfo[i].insn_off;
13878 bpfptr_add(&ulinfo, rec_size);
13879 }
13880
13881 if (s != env->subprog_cnt) {
13882 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
13883 env->subprog_cnt - s, s);
13884 err = -EINVAL;
13885 goto err_free;
13886 }
13887
13888 prog->aux->linfo = linfo;
13889 prog->aux->nr_linfo = nr_linfo;
13890
13891 return 0;
13892
13893err_free:
13894 kvfree(linfo);
13895 return err;
13896}
13897
13898#define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo)
13899#define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE
13900
13901static int check_core_relo(struct bpf_verifier_env *env,
13902 const union bpf_attr *attr,
13903 bpfptr_t uattr)
13904{
13905 u32 i, nr_core_relo, ncopy, expected_size, rec_size;
13906 struct bpf_core_relo core_relo = {};
13907 struct bpf_prog *prog = env->prog;
13908 const struct btf *btf = prog->aux->btf;
13909 struct bpf_core_ctx ctx = {
13910 .log = &env->log,
13911 .btf = btf,
13912 };
13913 bpfptr_t u_core_relo;
13914 int err;
13915
13916 nr_core_relo = attr->core_relo_cnt;
13917 if (!nr_core_relo)
13918 return 0;
13919 if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
13920 return -EINVAL;
13921
13922 rec_size = attr->core_relo_rec_size;
13923 if (rec_size < MIN_CORE_RELO_SIZE ||
13924 rec_size > MAX_CORE_RELO_SIZE ||
13925 rec_size % sizeof(u32))
13926 return -EINVAL;
13927
13928 u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
13929 expected_size = sizeof(struct bpf_core_relo);
13930 ncopy = min_t(u32, expected_size, rec_size);
13931
13932 /* Unlike func_info and line_info, copy and apply each CO-RE
13933 * relocation record one at a time.
13934 */
13935 for (i = 0; i < nr_core_relo; i++) {
13936 /* future proofing when sizeof(bpf_core_relo) changes */
13937 err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
13938 if (err) {
13939 if (err == -E2BIG) {
13940 verbose(env, "nonzero tailing record in core_relo");
13941 if (copy_to_bpfptr_offset(uattr,
13942 offsetof(union bpf_attr, core_relo_rec_size),
13943 &expected_size, sizeof(expected_size)))
13944 err = -EFAULT;
13945 }
13946 break;
13947 }
13948
13949 if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
13950 err = -EFAULT;
13951 break;
13952 }
13953
13954 if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
13955 verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
13956 i, core_relo.insn_off, prog->len);
13957 err = -EINVAL;
13958 break;
13959 }
13960
13961 err = bpf_core_apply(&ctx, &core_relo, i,
13962 &prog->insnsi[core_relo.insn_off / 8]);
13963 if (err)
13964 break;
13965 bpfptr_add(&u_core_relo, rec_size);
13966 }
13967 return err;
13968}
13969
13970static int check_btf_info(struct bpf_verifier_env *env,
13971 const union bpf_attr *attr,
13972 bpfptr_t uattr)
13973{
13974 struct btf *btf;
13975 int err;
13976
13977 if (!attr->func_info_cnt && !attr->line_info_cnt) {
13978 if (check_abnormal_return(env))
13979 return -EINVAL;
13980 return 0;
13981 }
13982
13983 btf = btf_get_by_fd(attr->prog_btf_fd);
13984 if (IS_ERR(btf))
13985 return PTR_ERR(btf);
13986 if (btf_is_kernel(btf)) {
13987 btf_put(btf);
13988 return -EACCES;
13989 }
13990 env->prog->aux->btf = btf;
13991
13992 err = check_btf_func(env, attr, uattr);
13993 if (err)
13994 return err;
13995
13996 err = check_btf_line(env, attr, uattr);
13997 if (err)
13998 return err;
13999
14000 err = check_core_relo(env, attr, uattr);
14001 if (err)
14002 return err;
14003
14004 return 0;
14005}
14006
14007/* check %cur's range satisfies %old's */
14008static bool range_within(struct bpf_reg_state *old,
14009 struct bpf_reg_state *cur)
14010{
14011 return old->umin_value <= cur->umin_value &&
14012 old->umax_value >= cur->umax_value &&
14013 old->smin_value <= cur->smin_value &&
14014 old->smax_value >= cur->smax_value &&
14015 old->u32_min_value <= cur->u32_min_value &&
14016 old->u32_max_value >= cur->u32_max_value &&
14017 old->s32_min_value <= cur->s32_min_value &&
14018 old->s32_max_value >= cur->s32_max_value;
14019}
14020
14021/* If in the old state two registers had the same id, then they need to have
14022 * the same id in the new state as well. But that id could be different from
14023 * the old state, so we need to track the mapping from old to new ids.
14024 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
14025 * regs with old id 5 must also have new id 9 for the new state to be safe. But
14026 * regs with a different old id could still have new id 9, we don't care about
14027 * that.
14028 * So we look through our idmap to see if this old id has been seen before. If
14029 * so, we require the new id to match; otherwise, we add the id pair to the map.
14030 */
14031static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
14032{
14033 unsigned int i;
14034
14035 /* either both IDs should be set or both should be zero */
14036 if (!!old_id != !!cur_id)
14037 return false;
14038
14039 if (old_id == 0) /* cur_id == 0 as well */
14040 return true;
14041
14042 for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
14043 if (!idmap[i].old) {
14044 /* Reached an empty slot; haven't seen this id before */
14045 idmap[i].old = old_id;
14046 idmap[i].cur = cur_id;
14047 return true;
14048 }
14049 if (idmap[i].old == old_id)
14050 return idmap[i].cur == cur_id;
14051 }
14052 /* We ran out of idmap slots, which should be impossible */
14053 WARN_ON_ONCE(1);
14054 return false;
14055}
14056
14057static void clean_func_state(struct bpf_verifier_env *env,
14058 struct bpf_func_state *st)
14059{
14060 enum bpf_reg_liveness live;
14061 int i, j;
14062
14063 for (i = 0; i < BPF_REG_FP; i++) {
14064 live = st->regs[i].live;
14065 /* liveness must not touch this register anymore */
14066 st->regs[i].live |= REG_LIVE_DONE;
14067 if (!(live & REG_LIVE_READ))
14068 /* since the register is unused, clear its state
14069 * to make further comparison simpler
14070 */
14071 __mark_reg_not_init(env, &st->regs[i]);
14072 }
14073
14074 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
14075 live = st->stack[i].spilled_ptr.live;
14076 /* liveness must not touch this stack slot anymore */
14077 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
14078 if (!(live & REG_LIVE_READ)) {
14079 __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
14080 for (j = 0; j < BPF_REG_SIZE; j++)
14081 st->stack[i].slot_type[j] = STACK_INVALID;
14082 }
14083 }
14084}
14085
14086static void clean_verifier_state(struct bpf_verifier_env *env,
14087 struct bpf_verifier_state *st)
14088{
14089 int i;
14090
14091 if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
14092 /* all regs in this state in all frames were already marked */
14093 return;
14094
14095 for (i = 0; i <= st->curframe; i++)
14096 clean_func_state(env, st->frame[i]);
14097}
14098
14099/* the parentage chains form a tree.
14100 * the verifier states are added to state lists at given insn and
14101 * pushed into state stack for future exploration.
14102 * when the verifier reaches bpf_exit insn some of the verifer states
14103 * stored in the state lists have their final liveness state already,
14104 * but a lot of states will get revised from liveness point of view when
14105 * the verifier explores other branches.
14106 * Example:
14107 * 1: r0 = 1
14108 * 2: if r1 == 100 goto pc+1
14109 * 3: r0 = 2
14110 * 4: exit
14111 * when the verifier reaches exit insn the register r0 in the state list of
14112 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
14113 * of insn 2 and goes exploring further. At the insn 4 it will walk the
14114 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
14115 *
14116 * Since the verifier pushes the branch states as it sees them while exploring
14117 * the program the condition of walking the branch instruction for the second
14118 * time means that all states below this branch were already explored and
14119 * their final liveness marks are already propagated.
14120 * Hence when the verifier completes the search of state list in is_state_visited()
14121 * we can call this clean_live_states() function to mark all liveness states
14122 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
14123 * will not be used.
14124 * This function also clears the registers and stack for states that !READ
14125 * to simplify state merging.
14126 *
14127 * Important note here that walking the same branch instruction in the callee
14128 * doesn't meant that the states are DONE. The verifier has to compare
14129 * the callsites
14130 */
14131static void clean_live_states(struct bpf_verifier_env *env, int insn,
14132 struct bpf_verifier_state *cur)
14133{
14134 struct bpf_verifier_state_list *sl;
14135 int i;
14136
14137 sl = *explored_state(env, insn);
14138 while (sl) {
14139 if (sl->state.branches)
14140 goto next;
14141 if (sl->state.insn_idx != insn ||
14142 sl->state.curframe != cur->curframe)
14143 goto next;
14144 for (i = 0; i <= cur->curframe; i++)
14145 if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
14146 goto next;
14147 clean_verifier_state(env, &sl->state);
14148next:
14149 sl = sl->next;
14150 }
14151}
14152
14153static bool regs_exact(const struct bpf_reg_state *rold,
14154 const struct bpf_reg_state *rcur,
14155 struct bpf_id_pair *idmap)
14156{
14157 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
14158 check_ids(rold->id, rcur->id, idmap) &&
14159 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
14160}
14161
14162/* Returns true if (rold safe implies rcur safe) */
14163static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
14164 struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
14165{
14166 if (!(rold->live & REG_LIVE_READ))
14167 /* explored state didn't use this */
14168 return true;
14169 if (rold->type == NOT_INIT)
14170 /* explored state can't have used this */
14171 return true;
14172 if (rcur->type == NOT_INIT)
14173 return false;
14174
14175 /* Enforce that register types have to match exactly, including their
14176 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
14177 * rule.
14178 *
14179 * One can make a point that using a pointer register as unbounded
14180 * SCALAR would be technically acceptable, but this could lead to
14181 * pointer leaks because scalars are allowed to leak while pointers
14182 * are not. We could make this safe in special cases if root is
14183 * calling us, but it's probably not worth the hassle.
14184 *
14185 * Also, register types that are *not* MAYBE_NULL could technically be
14186 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
14187 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
14188 * to the same map).
14189 * However, if the old MAYBE_NULL register then got NULL checked,
14190 * doing so could have affected others with the same id, and we can't
14191 * check for that because we lost the id when we converted to
14192 * a non-MAYBE_NULL variant.
14193 * So, as a general rule we don't allow mixing MAYBE_NULL and
14194 * non-MAYBE_NULL registers as well.
14195 */
14196 if (rold->type != rcur->type)
14197 return false;
14198
14199 switch (base_type(rold->type)) {
14200 case SCALAR_VALUE:
14201 if (regs_exact(rold, rcur, idmap))
14202 return true;
14203 if (env->explore_alu_limits)
14204 return false;
14205 if (!rold->precise)
14206 return true;
14207 /* new val must satisfy old val knowledge */
14208 return range_within(rold, rcur) &&
14209 tnum_in(rold->var_off, rcur->var_off);
14210 case PTR_TO_MAP_KEY:
14211 case PTR_TO_MAP_VALUE:
14212 case PTR_TO_MEM:
14213 case PTR_TO_BUF:
14214 case PTR_TO_TP_BUFFER:
14215 /* If the new min/max/var_off satisfy the old ones and
14216 * everything else matches, we are OK.
14217 */
14218 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
14219 range_within(rold, rcur) &&
14220 tnum_in(rold->var_off, rcur->var_off) &&
14221 check_ids(rold->id, rcur->id, idmap) &&
14222 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
14223 case PTR_TO_PACKET_META:
14224 case PTR_TO_PACKET:
14225 /* We must have at least as much range as the old ptr
14226 * did, so that any accesses which were safe before are
14227 * still safe. This is true even if old range < old off,
14228 * since someone could have accessed through (ptr - k), or
14229 * even done ptr -= k in a register, to get a safe access.
14230 */
14231 if (rold->range > rcur->range)
14232 return false;
14233 /* If the offsets don't match, we can't trust our alignment;
14234 * nor can we be sure that we won't fall out of range.
14235 */
14236 if (rold->off != rcur->off)
14237 return false;
14238 /* id relations must be preserved */
14239 if (!check_ids(rold->id, rcur->id, idmap))
14240 return false;
14241 /* new val must satisfy old val knowledge */
14242 return range_within(rold, rcur) &&
14243 tnum_in(rold->var_off, rcur->var_off);
14244 case PTR_TO_STACK:
14245 /* two stack pointers are equal only if they're pointing to
14246 * the same stack frame, since fp-8 in foo != fp-8 in bar
14247 */
14248 return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
14249 default:
14250 return regs_exact(rold, rcur, idmap);
14251 }
14252}
14253
14254static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
14255 struct bpf_func_state *cur, struct bpf_id_pair *idmap)
14256{
14257 int i, spi;
14258
14259 /* walk slots of the explored stack and ignore any additional
14260 * slots in the current stack, since explored(safe) state
14261 * didn't use them
14262 */
14263 for (i = 0; i < old->allocated_stack; i++) {
14264 spi = i / BPF_REG_SIZE;
14265
14266 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
14267 i += BPF_REG_SIZE - 1;
14268 /* explored state didn't use this */
14269 continue;
14270 }
14271
14272 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
14273 continue;
14274
14275 if (env->allow_uninit_stack &&
14276 old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
14277 continue;
14278
14279 /* explored stack has more populated slots than current stack
14280 * and these slots were used
14281 */
14282 if (i >= cur->allocated_stack)
14283 return false;
14284
14285 /* if old state was safe with misc data in the stack
14286 * it will be safe with zero-initialized stack.
14287 * The opposite is not true
14288 */
14289 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
14290 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
14291 continue;
14292 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
14293 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
14294 /* Ex: old explored (safe) state has STACK_SPILL in
14295 * this stack slot, but current has STACK_MISC ->
14296 * this verifier states are not equivalent,
14297 * return false to continue verification of this path
14298 */
14299 return false;
14300 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
14301 continue;
14302 /* Both old and cur are having same slot_type */
14303 switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
14304 case STACK_SPILL:
14305 /* when explored and current stack slot are both storing
14306 * spilled registers, check that stored pointers types
14307 * are the same as well.
14308 * Ex: explored safe path could have stored
14309 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
14310 * but current path has stored:
14311 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
14312 * such verifier states are not equivalent.
14313 * return false to continue verification of this path
14314 */
14315 if (!regsafe(env, &old->stack[spi].spilled_ptr,
14316 &cur->stack[spi].spilled_ptr, idmap))
14317 return false;
14318 break;
14319 case STACK_DYNPTR:
14320 {
14321 const struct bpf_reg_state *old_reg, *cur_reg;
14322
14323 old_reg = &old->stack[spi].spilled_ptr;
14324 cur_reg = &cur->stack[spi].spilled_ptr;
14325 if (old_reg->dynptr.type != cur_reg->dynptr.type ||
14326 old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
14327 !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
14328 return false;
14329 break;
14330 }
14331 case STACK_MISC:
14332 case STACK_ZERO:
14333 case STACK_INVALID:
14334 continue;
14335 /* Ensure that new unhandled slot types return false by default */
14336 default:
14337 return false;
14338 }
14339 }
14340 return true;
14341}
14342
14343static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
14344 struct bpf_id_pair *idmap)
14345{
14346 int i;
14347
14348 if (old->acquired_refs != cur->acquired_refs)
14349 return false;
14350
14351 for (i = 0; i < old->acquired_refs; i++) {
14352 if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap))
14353 return false;
14354 }
14355
14356 return true;
14357}
14358
14359/* compare two verifier states
14360 *
14361 * all states stored in state_list are known to be valid, since
14362 * verifier reached 'bpf_exit' instruction through them
14363 *
14364 * this function is called when verifier exploring different branches of
14365 * execution popped from the state stack. If it sees an old state that has
14366 * more strict register state and more strict stack state then this execution
14367 * branch doesn't need to be explored further, since verifier already
14368 * concluded that more strict state leads to valid finish.
14369 *
14370 * Therefore two states are equivalent if register state is more conservative
14371 * and explored stack state is more conservative than the current one.
14372 * Example:
14373 * explored current
14374 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
14375 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
14376 *
14377 * In other words if current stack state (one being explored) has more
14378 * valid slots than old one that already passed validation, it means
14379 * the verifier can stop exploring and conclude that current state is valid too
14380 *
14381 * Similarly with registers. If explored state has register type as invalid
14382 * whereas register type in current state is meaningful, it means that
14383 * the current state will reach 'bpf_exit' instruction safely
14384 */
14385static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
14386 struct bpf_func_state *cur)
14387{
14388 int i;
14389
14390 for (i = 0; i < MAX_BPF_REG; i++)
14391 if (!regsafe(env, &old->regs[i], &cur->regs[i],
14392 env->idmap_scratch))
14393 return false;
14394
14395 if (!stacksafe(env, old, cur, env->idmap_scratch))
14396 return false;
14397
14398 if (!refsafe(old, cur, env->idmap_scratch))
14399 return false;
14400
14401 return true;
14402}
14403
14404static bool states_equal(struct bpf_verifier_env *env,
14405 struct bpf_verifier_state *old,
14406 struct bpf_verifier_state *cur)
14407{
14408 int i;
14409
14410 if (old->curframe != cur->curframe)
14411 return false;
14412
14413 memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
14414
14415 /* Verification state from speculative execution simulation
14416 * must never prune a non-speculative execution one.
14417 */
14418 if (old->speculative && !cur->speculative)
14419 return false;
14420
14421 if (old->active_lock.ptr != cur->active_lock.ptr)
14422 return false;
14423
14424 /* Old and cur active_lock's have to be either both present
14425 * or both absent.
14426 */
14427 if (!!old->active_lock.id != !!cur->active_lock.id)
14428 return false;
14429
14430 if (old->active_lock.id &&
14431 !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch))
14432 return false;
14433
14434 if (old->active_rcu_lock != cur->active_rcu_lock)
14435 return false;
14436
14437 /* for states to be equal callsites have to be the same
14438 * and all frame states need to be equivalent
14439 */
14440 for (i = 0; i <= old->curframe; i++) {
14441 if (old->frame[i]->callsite != cur->frame[i]->callsite)
14442 return false;
14443 if (!func_states_equal(env, old->frame[i], cur->frame[i]))
14444 return false;
14445 }
14446 return true;
14447}
14448
14449/* Return 0 if no propagation happened. Return negative error code if error
14450 * happened. Otherwise, return the propagated bit.
14451 */
14452static int propagate_liveness_reg(struct bpf_verifier_env *env,
14453 struct bpf_reg_state *reg,
14454 struct bpf_reg_state *parent_reg)
14455{
14456 u8 parent_flag = parent_reg->live & REG_LIVE_READ;
14457 u8 flag = reg->live & REG_LIVE_READ;
14458 int err;
14459
14460 /* When comes here, read flags of PARENT_REG or REG could be any of
14461 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
14462 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
14463 */
14464 if (parent_flag == REG_LIVE_READ64 ||
14465 /* Or if there is no read flag from REG. */
14466 !flag ||
14467 /* Or if the read flag from REG is the same as PARENT_REG. */
14468 parent_flag == flag)
14469 return 0;
14470
14471 err = mark_reg_read(env, reg, parent_reg, flag);
14472 if (err)
14473 return err;
14474
14475 return flag;
14476}
14477
14478/* A write screens off any subsequent reads; but write marks come from the
14479 * straight-line code between a state and its parent. When we arrive at an
14480 * equivalent state (jump target or such) we didn't arrive by the straight-line
14481 * code, so read marks in the state must propagate to the parent regardless
14482 * of the state's write marks. That's what 'parent == state->parent' comparison
14483 * in mark_reg_read() is for.
14484 */
14485static int propagate_liveness(struct bpf_verifier_env *env,
14486 const struct bpf_verifier_state *vstate,
14487 struct bpf_verifier_state *vparent)
14488{
14489 struct bpf_reg_state *state_reg, *parent_reg;
14490 struct bpf_func_state *state, *parent;
14491 int i, frame, err = 0;
14492
14493 if (vparent->curframe != vstate->curframe) {
14494 WARN(1, "propagate_live: parent frame %d current frame %d\n",
14495 vparent->curframe, vstate->curframe);
14496 return -EFAULT;
14497 }
14498 /* Propagate read liveness of registers... */
14499 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
14500 for (frame = 0; frame <= vstate->curframe; frame++) {
14501 parent = vparent->frame[frame];
14502 state = vstate->frame[frame];
14503 parent_reg = parent->regs;
14504 state_reg = state->regs;
14505 /* We don't need to worry about FP liveness, it's read-only */
14506 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
14507 err = propagate_liveness_reg(env, &state_reg[i],
14508 &parent_reg[i]);
14509 if (err < 0)
14510 return err;
14511 if (err == REG_LIVE_READ64)
14512 mark_insn_zext(env, &parent_reg[i]);
14513 }
14514
14515 /* Propagate stack slots. */
14516 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
14517 i < parent->allocated_stack / BPF_REG_SIZE; i++) {
14518 parent_reg = &parent->stack[i].spilled_ptr;
14519 state_reg = &state->stack[i].spilled_ptr;
14520 err = propagate_liveness_reg(env, state_reg,
14521 parent_reg);
14522 if (err < 0)
14523 return err;
14524 }
14525 }
14526 return 0;
14527}
14528
14529/* find precise scalars in the previous equivalent state and
14530 * propagate them into the current state
14531 */
14532static int propagate_precision(struct bpf_verifier_env *env,
14533 const struct bpf_verifier_state *old)
14534{
14535 struct bpf_reg_state *state_reg;
14536 struct bpf_func_state *state;
14537 int i, err = 0, fr;
14538
14539 for (fr = old->curframe; fr >= 0; fr--) {
14540 state = old->frame[fr];
14541 state_reg = state->regs;
14542 for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
14543 if (state_reg->type != SCALAR_VALUE ||
14544 !state_reg->precise)
14545 continue;
14546 if (env->log.level & BPF_LOG_LEVEL2)
14547 verbose(env, "frame %d: propagating r%d\n", i, fr);
14548 err = mark_chain_precision_frame(env, fr, i);
14549 if (err < 0)
14550 return err;
14551 }
14552
14553 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
14554 if (!is_spilled_reg(&state->stack[i]))
14555 continue;
14556 state_reg = &state->stack[i].spilled_ptr;
14557 if (state_reg->type != SCALAR_VALUE ||
14558 !state_reg->precise)
14559 continue;
14560 if (env->log.level & BPF_LOG_LEVEL2)
14561 verbose(env, "frame %d: propagating fp%d\n",
14562 (-i - 1) * BPF_REG_SIZE, fr);
14563 err = mark_chain_precision_stack_frame(env, fr, i);
14564 if (err < 0)
14565 return err;
14566 }
14567 }
14568 return 0;
14569}
14570
14571static bool states_maybe_looping(struct bpf_verifier_state *old,
14572 struct bpf_verifier_state *cur)
14573{
14574 struct bpf_func_state *fold, *fcur;
14575 int i, fr = cur->curframe;
14576
14577 if (old->curframe != fr)
14578 return false;
14579
14580 fold = old->frame[fr];
14581 fcur = cur->frame[fr];
14582 for (i = 0; i < MAX_BPF_REG; i++)
14583 if (memcmp(&fold->regs[i], &fcur->regs[i],
14584 offsetof(struct bpf_reg_state, parent)))
14585 return false;
14586 return true;
14587}
14588
14589
14590static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
14591{
14592 struct bpf_verifier_state_list *new_sl;
14593 struct bpf_verifier_state_list *sl, **pprev;
14594 struct bpf_verifier_state *cur = env->cur_state, *new;
14595 int i, j, err, states_cnt = 0;
14596 bool add_new_state = env->test_state_freq ? true : false;
14597
14598 /* bpf progs typically have pruning point every 4 instructions
14599 * http://vger.kernel.org/bpfconf2019.html#session-1
14600 * Do not add new state for future pruning if the verifier hasn't seen
14601 * at least 2 jumps and at least 8 instructions.
14602 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
14603 * In tests that amounts to up to 50% reduction into total verifier
14604 * memory consumption and 20% verifier time speedup.
14605 */
14606 if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
14607 env->insn_processed - env->prev_insn_processed >= 8)
14608 add_new_state = true;
14609
14610 pprev = explored_state(env, insn_idx);
14611 sl = *pprev;
14612
14613 clean_live_states(env, insn_idx, cur);
14614
14615 while (sl) {
14616 states_cnt++;
14617 if (sl->state.insn_idx != insn_idx)
14618 goto next;
14619
14620 if (sl->state.branches) {
14621 struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
14622
14623 if (frame->in_async_callback_fn &&
14624 frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
14625 /* Different async_entry_cnt means that the verifier is
14626 * processing another entry into async callback.
14627 * Seeing the same state is not an indication of infinite
14628 * loop or infinite recursion.
14629 * But finding the same state doesn't mean that it's safe
14630 * to stop processing the current state. The previous state
14631 * hasn't yet reached bpf_exit, since state.branches > 0.
14632 * Checking in_async_callback_fn alone is not enough either.
14633 * Since the verifier still needs to catch infinite loops
14634 * inside async callbacks.
14635 */
14636 } else if (states_maybe_looping(&sl->state, cur) &&
14637 states_equal(env, &sl->state, cur)) {
14638 verbose_linfo(env, insn_idx, "; ");
14639 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
14640 return -EINVAL;
14641 }
14642 /* if the verifier is processing a loop, avoid adding new state
14643 * too often, since different loop iterations have distinct
14644 * states and may not help future pruning.
14645 * This threshold shouldn't be too low to make sure that
14646 * a loop with large bound will be rejected quickly.
14647 * The most abusive loop will be:
14648 * r1 += 1
14649 * if r1 < 1000000 goto pc-2
14650 * 1M insn_procssed limit / 100 == 10k peak states.
14651 * This threshold shouldn't be too high either, since states
14652 * at the end of the loop are likely to be useful in pruning.
14653 */
14654 if (env->jmps_processed - env->prev_jmps_processed < 20 &&
14655 env->insn_processed - env->prev_insn_processed < 100)
14656 add_new_state = false;
14657 goto miss;
14658 }
14659 if (states_equal(env, &sl->state, cur)) {
14660 sl->hit_cnt++;
14661 /* reached equivalent register/stack state,
14662 * prune the search.
14663 * Registers read by the continuation are read by us.
14664 * If we have any write marks in env->cur_state, they
14665 * will prevent corresponding reads in the continuation
14666 * from reaching our parent (an explored_state). Our
14667 * own state will get the read marks recorded, but
14668 * they'll be immediately forgotten as we're pruning
14669 * this state and will pop a new one.
14670 */
14671 err = propagate_liveness(env, &sl->state, cur);
14672
14673 /* if previous state reached the exit with precision and
14674 * current state is equivalent to it (except precsion marks)
14675 * the precision needs to be propagated back in
14676 * the current state.
14677 */
14678 err = err ? : push_jmp_history(env, cur);
14679 err = err ? : propagate_precision(env, &sl->state);
14680 if (err)
14681 return err;
14682 return 1;
14683 }
14684miss:
14685 /* when new state is not going to be added do not increase miss count.
14686 * Otherwise several loop iterations will remove the state
14687 * recorded earlier. The goal of these heuristics is to have
14688 * states from some iterations of the loop (some in the beginning
14689 * and some at the end) to help pruning.
14690 */
14691 if (add_new_state)
14692 sl->miss_cnt++;
14693 /* heuristic to determine whether this state is beneficial
14694 * to keep checking from state equivalence point of view.
14695 * Higher numbers increase max_states_per_insn and verification time,
14696 * but do not meaningfully decrease insn_processed.
14697 */
14698 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
14699 /* the state is unlikely to be useful. Remove it to
14700 * speed up verification
14701 */
14702 *pprev = sl->next;
14703 if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
14704 u32 br = sl->state.branches;
14705
14706 WARN_ONCE(br,
14707 "BUG live_done but branches_to_explore %d\n",
14708 br);
14709 free_verifier_state(&sl->state, false);
14710 kfree(sl);
14711 env->peak_states--;
14712 } else {
14713 /* cannot free this state, since parentage chain may
14714 * walk it later. Add it for free_list instead to
14715 * be freed at the end of verification
14716 */
14717 sl->next = env->free_list;
14718 env->free_list = sl;
14719 }
14720 sl = *pprev;
14721 continue;
14722 }
14723next:
14724 pprev = &sl->next;
14725 sl = *pprev;
14726 }
14727
14728 if (env->max_states_per_insn < states_cnt)
14729 env->max_states_per_insn = states_cnt;
14730
14731 if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
14732 return 0;
14733
14734 if (!add_new_state)
14735 return 0;
14736
14737 /* There were no equivalent states, remember the current one.
14738 * Technically the current state is not proven to be safe yet,
14739 * but it will either reach outer most bpf_exit (which means it's safe)
14740 * or it will be rejected. When there are no loops the verifier won't be
14741 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
14742 * again on the way to bpf_exit.
14743 * When looping the sl->state.branches will be > 0 and this state
14744 * will not be considered for equivalence until branches == 0.
14745 */
14746 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
14747 if (!new_sl)
14748 return -ENOMEM;
14749 env->total_states++;
14750 env->peak_states++;
14751 env->prev_jmps_processed = env->jmps_processed;
14752 env->prev_insn_processed = env->insn_processed;
14753
14754 /* forget precise markings we inherited, see __mark_chain_precision */
14755 if (env->bpf_capable)
14756 mark_all_scalars_imprecise(env, cur);
14757
14758 /* add new state to the head of linked list */
14759 new = &new_sl->state;
14760 err = copy_verifier_state(new, cur);
14761 if (err) {
14762 free_verifier_state(new, false);
14763 kfree(new_sl);
14764 return err;
14765 }
14766 new->insn_idx = insn_idx;
14767 WARN_ONCE(new->branches != 1,
14768 "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
14769
14770 cur->parent = new;
14771 cur->first_insn_idx = insn_idx;
14772 clear_jmp_history(cur);
14773 new_sl->next = *explored_state(env, insn_idx);
14774 *explored_state(env, insn_idx) = new_sl;
14775 /* connect new state to parentage chain. Current frame needs all
14776 * registers connected. Only r6 - r9 of the callers are alive (pushed
14777 * to the stack implicitly by JITs) so in callers' frames connect just
14778 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
14779 * the state of the call instruction (with WRITTEN set), and r0 comes
14780 * from callee with its full parentage chain, anyway.
14781 */
14782 /* clear write marks in current state: the writes we did are not writes
14783 * our child did, so they don't screen off its reads from us.
14784 * (There are no read marks in current state, because reads always mark
14785 * their parent and current state never has children yet. Only
14786 * explored_states can get read marks.)
14787 */
14788 for (j = 0; j <= cur->curframe; j++) {
14789 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
14790 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
14791 for (i = 0; i < BPF_REG_FP; i++)
14792 cur->frame[j]->regs[i].live = REG_LIVE_NONE;
14793 }
14794
14795 /* all stack frames are accessible from callee, clear them all */
14796 for (j = 0; j <= cur->curframe; j++) {
14797 struct bpf_func_state *frame = cur->frame[j];
14798 struct bpf_func_state *newframe = new->frame[j];
14799
14800 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
14801 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
14802 frame->stack[i].spilled_ptr.parent =
14803 &newframe->stack[i].spilled_ptr;
14804 }
14805 }
14806 return 0;
14807}
14808
14809/* Return true if it's OK to have the same insn return a different type. */
14810static bool reg_type_mismatch_ok(enum bpf_reg_type type)
14811{
14812 switch (base_type(type)) {
14813 case PTR_TO_CTX:
14814 case PTR_TO_SOCKET:
14815 case PTR_TO_SOCK_COMMON:
14816 case PTR_TO_TCP_SOCK:
14817 case PTR_TO_XDP_SOCK:
14818 case PTR_TO_BTF_ID:
14819 return false;
14820 default:
14821 return true;
14822 }
14823}
14824
14825/* If an instruction was previously used with particular pointer types, then we
14826 * need to be careful to avoid cases such as the below, where it may be ok
14827 * for one branch accessing the pointer, but not ok for the other branch:
14828 *
14829 * R1 = sock_ptr
14830 * goto X;
14831 * ...
14832 * R1 = some_other_valid_ptr;
14833 * goto X;
14834 * ...
14835 * R2 = *(u32 *)(R1 + 0);
14836 */
14837static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
14838{
14839 return src != prev && (!reg_type_mismatch_ok(src) ||
14840 !reg_type_mismatch_ok(prev));
14841}
14842
14843static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
14844 bool allow_trust_missmatch)
14845{
14846 enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
14847
14848 if (*prev_type == NOT_INIT) {
14849 /* Saw a valid insn
14850 * dst_reg = *(u32 *)(src_reg + off)
14851 * save type to validate intersecting paths
14852 */
14853 *prev_type = type;
14854 } else if (reg_type_mismatch(type, *prev_type)) {
14855 /* Abuser program is trying to use the same insn
14856 * dst_reg = *(u32*) (src_reg + off)
14857 * with different pointer types:
14858 * src_reg == ctx in one branch and
14859 * src_reg == stack|map in some other branch.
14860 * Reject it.
14861 */
14862 if (allow_trust_missmatch &&
14863 base_type(type) == PTR_TO_BTF_ID &&
14864 base_type(*prev_type) == PTR_TO_BTF_ID) {
14865 /*
14866 * Have to support a use case when one path through
14867 * the program yields TRUSTED pointer while another
14868 * is UNTRUSTED. Fallback to UNTRUSTED to generate
14869 * BPF_PROBE_MEM.
14870 */
14871 *prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
14872 } else {
14873 verbose(env, "same insn cannot be used with different pointers\n");
14874 return -EINVAL;
14875 }
14876 }
14877
14878 return 0;
14879}
14880
14881static int do_check(struct bpf_verifier_env *env)
14882{
14883 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
14884 struct bpf_verifier_state *state = env->cur_state;
14885 struct bpf_insn *insns = env->prog->insnsi;
14886 struct bpf_reg_state *regs;
14887 int insn_cnt = env->prog->len;
14888 bool do_print_state = false;
14889 int prev_insn_idx = -1;
14890
14891 for (;;) {
14892 struct bpf_insn *insn;
14893 u8 class;
14894 int err;
14895
14896 env->prev_insn_idx = prev_insn_idx;
14897 if (env->insn_idx >= insn_cnt) {
14898 verbose(env, "invalid insn idx %d insn_cnt %d\n",
14899 env->insn_idx, insn_cnt);
14900 return -EFAULT;
14901 }
14902
14903 insn = &insns[env->insn_idx];
14904 class = BPF_CLASS(insn->code);
14905
14906 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
14907 verbose(env,
14908 "BPF program is too large. Processed %d insn\n",
14909 env->insn_processed);
14910 return -E2BIG;
14911 }
14912
14913 state->last_insn_idx = env->prev_insn_idx;
14914
14915 if (is_prune_point(env, env->insn_idx)) {
14916 err = is_state_visited(env, env->insn_idx);
14917 if (err < 0)
14918 return err;
14919 if (err == 1) {
14920 /* found equivalent state, can prune the search */
14921 if (env->log.level & BPF_LOG_LEVEL) {
14922 if (do_print_state)
14923 verbose(env, "\nfrom %d to %d%s: safe\n",
14924 env->prev_insn_idx, env->insn_idx,
14925 env->cur_state->speculative ?
14926 " (speculative execution)" : "");
14927 else
14928 verbose(env, "%d: safe\n", env->insn_idx);
14929 }
14930 goto process_bpf_exit;
14931 }
14932 }
14933
14934 if (is_jmp_point(env, env->insn_idx)) {
14935 err = push_jmp_history(env, state);
14936 if (err)
14937 return err;
14938 }
14939
14940 if (signal_pending(current))
14941 return -EAGAIN;
14942
14943 if (need_resched())
14944 cond_resched();
14945
14946 if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
14947 verbose(env, "\nfrom %d to %d%s:",
14948 env->prev_insn_idx, env->insn_idx,
14949 env->cur_state->speculative ?
14950 " (speculative execution)" : "");
14951 print_verifier_state(env, state->frame[state->curframe], true);
14952 do_print_state = false;
14953 }
14954
14955 if (env->log.level & BPF_LOG_LEVEL) {
14956 const struct bpf_insn_cbs cbs = {
14957 .cb_call = disasm_kfunc_name,
14958 .cb_print = verbose,
14959 .private_data = env,
14960 };
14961
14962 if (verifier_state_scratched(env))
14963 print_insn_state(env, state->frame[state->curframe]);
14964
14965 verbose_linfo(env, env->insn_idx, "; ");
14966 env->prev_log_len = env->log.len_used;
14967 verbose(env, "%d: ", env->insn_idx);
14968 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
14969 env->prev_insn_print_len = env->log.len_used - env->prev_log_len;
14970 env->prev_log_len = env->log.len_used;
14971 }
14972
14973 if (bpf_prog_is_offloaded(env->prog->aux)) {
14974 err = bpf_prog_offload_verify_insn(env, env->insn_idx,
14975 env->prev_insn_idx);
14976 if (err)
14977 return err;
14978 }
14979
14980 regs = cur_regs(env);
14981 sanitize_mark_insn_seen(env);
14982 prev_insn_idx = env->insn_idx;
14983
14984 if (class == BPF_ALU || class == BPF_ALU64) {
14985 err = check_alu_op(env, insn);
14986 if (err)
14987 return err;
14988
14989 } else if (class == BPF_LDX) {
14990 enum bpf_reg_type src_reg_type;
14991
14992 /* check for reserved fields is already done */
14993
14994 /* check src operand */
14995 err = check_reg_arg(env, insn->src_reg, SRC_OP);
14996 if (err)
14997 return err;
14998
14999 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
15000 if (err)
15001 return err;
15002
15003 src_reg_type = regs[insn->src_reg].type;
15004
15005 /* check that memory (src_reg + off) is readable,
15006 * the state of dst_reg will be updated by this func
15007 */
15008 err = check_mem_access(env, env->insn_idx, insn->src_reg,
15009 insn->off, BPF_SIZE(insn->code),
15010 BPF_READ, insn->dst_reg, false);
15011 if (err)
15012 return err;
15013
15014 err = save_aux_ptr_type(env, src_reg_type, true);
15015 if (err)
15016 return err;
15017 } else if (class == BPF_STX) {
15018 enum bpf_reg_type dst_reg_type;
15019
15020 if (BPF_MODE(insn->code) == BPF_ATOMIC) {
15021 err = check_atomic(env, env->insn_idx, insn);
15022 if (err)
15023 return err;
15024 env->insn_idx++;
15025 continue;
15026 }
15027
15028 if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
15029 verbose(env, "BPF_STX uses reserved fields\n");
15030 return -EINVAL;
15031 }
15032
15033 /* check src1 operand */
15034 err = check_reg_arg(env, insn->src_reg, SRC_OP);
15035 if (err)
15036 return err;
15037 /* check src2 operand */
15038 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15039 if (err)
15040 return err;
15041
15042 dst_reg_type = regs[insn->dst_reg].type;
15043
15044 /* check that memory (dst_reg + off) is writeable */
15045 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
15046 insn->off, BPF_SIZE(insn->code),
15047 BPF_WRITE, insn->src_reg, false);
15048 if (err)
15049 return err;
15050
15051 err = save_aux_ptr_type(env, dst_reg_type, false);
15052 if (err)
15053 return err;
15054 } else if (class == BPF_ST) {
15055 enum bpf_reg_type dst_reg_type;
15056
15057 if (BPF_MODE(insn->code) != BPF_MEM ||
15058 insn->src_reg != BPF_REG_0) {
15059 verbose(env, "BPF_ST uses reserved fields\n");
15060 return -EINVAL;
15061 }
15062 /* check src operand */
15063 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
15064 if (err)
15065 return err;
15066
15067 dst_reg_type = regs[insn->dst_reg].type;
15068
15069 /* check that memory (dst_reg + off) is writeable */
15070 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
15071 insn->off, BPF_SIZE(insn->code),
15072 BPF_WRITE, -1, false);
15073 if (err)
15074 return err;
15075
15076 err = save_aux_ptr_type(env, dst_reg_type, false);
15077 if (err)
15078 return err;
15079 } else if (class == BPF_JMP || class == BPF_JMP32) {
15080 u8 opcode = BPF_OP(insn->code);
15081
15082 env->jmps_processed++;
15083 if (opcode == BPF_CALL) {
15084 if (BPF_SRC(insn->code) != BPF_K ||
15085 (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
15086 && insn->off != 0) ||
15087 (insn->src_reg != BPF_REG_0 &&
15088 insn->src_reg != BPF_PSEUDO_CALL &&
15089 insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
15090 insn->dst_reg != BPF_REG_0 ||
15091 class == BPF_JMP32) {
15092 verbose(env, "BPF_CALL uses reserved fields\n");
15093 return -EINVAL;
15094 }
15095
15096 if (env->cur_state->active_lock.ptr) {
15097 if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
15098 (insn->src_reg == BPF_PSEUDO_CALL) ||
15099 (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
15100 (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) {
15101 verbose(env, "function calls are not allowed while holding a lock\n");
15102 return -EINVAL;
15103 }
15104 }
15105 if (insn->src_reg == BPF_PSEUDO_CALL)
15106 err = check_func_call(env, insn, &env->insn_idx);
15107 else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
15108 err = check_kfunc_call(env, insn, &env->insn_idx);
15109 else
15110 err = check_helper_call(env, insn, &env->insn_idx);
15111 if (err)
15112 return err;
15113 } else if (opcode == BPF_JA) {
15114 if (BPF_SRC(insn->code) != BPF_K ||
15115 insn->imm != 0 ||
15116 insn->src_reg != BPF_REG_0 ||
15117 insn->dst_reg != BPF_REG_0 ||
15118 class == BPF_JMP32) {
15119 verbose(env, "BPF_JA uses reserved fields\n");
15120 return -EINVAL;
15121 }
15122
15123 env->insn_idx += insn->off + 1;
15124 continue;
15125
15126 } else if (opcode == BPF_EXIT) {
15127 if (BPF_SRC(insn->code) != BPF_K ||
15128 insn->imm != 0 ||
15129 insn->src_reg != BPF_REG_0 ||
15130 insn->dst_reg != BPF_REG_0 ||
15131 class == BPF_JMP32) {
15132 verbose(env, "BPF_EXIT uses reserved fields\n");
15133 return -EINVAL;
15134 }
15135
15136 if (env->cur_state->active_lock.ptr &&
15137 !in_rbtree_lock_required_cb(env)) {
15138 verbose(env, "bpf_spin_unlock is missing\n");
15139 return -EINVAL;
15140 }
15141
15142 if (env->cur_state->active_rcu_lock) {
15143 verbose(env, "bpf_rcu_read_unlock is missing\n");
15144 return -EINVAL;
15145 }
15146
15147 /* We must do check_reference_leak here before
15148 * prepare_func_exit to handle the case when
15149 * state->curframe > 0, it may be a callback
15150 * function, for which reference_state must
15151 * match caller reference state when it exits.
15152 */
15153 err = check_reference_leak(env);
15154 if (err)
15155 return err;
15156
15157 if (state->curframe) {
15158 /* exit from nested function */
15159 err = prepare_func_exit(env, &env->insn_idx);
15160 if (err)
15161 return err;
15162 do_print_state = true;
15163 continue;
15164 }
15165
15166 err = check_return_code(env);
15167 if (err)
15168 return err;
15169process_bpf_exit:
15170 mark_verifier_state_scratched(env);
15171 update_branch_counts(env, env->cur_state);
15172 err = pop_stack(env, &prev_insn_idx,
15173 &env->insn_idx, pop_log);
15174 if (err < 0) {
15175 if (err != -ENOENT)
15176 return err;
15177 break;
15178 } else {
15179 do_print_state = true;
15180 continue;
15181 }
15182 } else {
15183 err = check_cond_jmp_op(env, insn, &env->insn_idx);
15184 if (err)
15185 return err;
15186 }
15187 } else if (class == BPF_LD) {
15188 u8 mode = BPF_MODE(insn->code);
15189
15190 if (mode == BPF_ABS || mode == BPF_IND) {
15191 err = check_ld_abs(env, insn);
15192 if (err)
15193 return err;
15194
15195 } else if (mode == BPF_IMM) {
15196 err = check_ld_imm(env, insn);
15197 if (err)
15198 return err;
15199
15200 env->insn_idx++;
15201 sanitize_mark_insn_seen(env);
15202 } else {
15203 verbose(env, "invalid BPF_LD mode\n");
15204 return -EINVAL;
15205 }
15206 } else {
15207 verbose(env, "unknown insn class %d\n", class);
15208 return -EINVAL;
15209 }
15210
15211 env->insn_idx++;
15212 }
15213
15214 return 0;
15215}
15216
15217static int find_btf_percpu_datasec(struct btf *btf)
15218{
15219 const struct btf_type *t;
15220 const char *tname;
15221 int i, n;
15222
15223 /*
15224 * Both vmlinux and module each have their own ".data..percpu"
15225 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
15226 * types to look at only module's own BTF types.
15227 */
15228 n = btf_nr_types(btf);
15229 if (btf_is_module(btf))
15230 i = btf_nr_types(btf_vmlinux);
15231 else
15232 i = 1;
15233
15234 for(; i < n; i++) {
15235 t = btf_type_by_id(btf, i);
15236 if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
15237 continue;
15238
15239 tname = btf_name_by_offset(btf, t->name_off);
15240 if (!strcmp(tname, ".data..percpu"))
15241 return i;
15242 }
15243
15244 return -ENOENT;
15245}
15246
15247/* replace pseudo btf_id with kernel symbol address */
15248static int check_pseudo_btf_id(struct bpf_verifier_env *env,
15249 struct bpf_insn *insn,
15250 struct bpf_insn_aux_data *aux)
15251{
15252 const struct btf_var_secinfo *vsi;
15253 const struct btf_type *datasec;
15254 struct btf_mod_pair *btf_mod;
15255 const struct btf_type *t;
15256 const char *sym_name;
15257 bool percpu = false;
15258 u32 type, id = insn->imm;
15259 struct btf *btf;
15260 s32 datasec_id;
15261 u64 addr;
15262 int i, btf_fd, err;
15263
15264 btf_fd = insn[1].imm;
15265 if (btf_fd) {
15266 btf = btf_get_by_fd(btf_fd);
15267 if (IS_ERR(btf)) {
15268 verbose(env, "invalid module BTF object FD specified.\n");
15269 return -EINVAL;
15270 }
15271 } else {
15272 if (!btf_vmlinux) {
15273 verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
15274 return -EINVAL;
15275 }
15276 btf = btf_vmlinux;
15277 btf_get(btf);
15278 }
15279
15280 t = btf_type_by_id(btf, id);
15281 if (!t) {
15282 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
15283 err = -ENOENT;
15284 goto err_put;
15285 }
15286
15287 if (!btf_type_is_var(t)) {
15288 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
15289 err = -EINVAL;
15290 goto err_put;
15291 }
15292
15293 sym_name = btf_name_by_offset(btf, t->name_off);
15294 addr = kallsyms_lookup_name(sym_name);
15295 if (!addr) {
15296 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
15297 sym_name);
15298 err = -ENOENT;
15299 goto err_put;
15300 }
15301
15302 datasec_id = find_btf_percpu_datasec(btf);
15303 if (datasec_id > 0) {
15304 datasec = btf_type_by_id(btf, datasec_id);
15305 for_each_vsi(i, datasec, vsi) {
15306 if (vsi->type == id) {
15307 percpu = true;
15308 break;
15309 }
15310 }
15311 }
15312
15313 insn[0].imm = (u32)addr;
15314 insn[1].imm = addr >> 32;
15315
15316 type = t->type;
15317 t = btf_type_skip_modifiers(btf, type, NULL);
15318 if (percpu) {
15319 aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
15320 aux->btf_var.btf = btf;
15321 aux->btf_var.btf_id = type;
15322 } else if (!btf_type_is_struct(t)) {
15323 const struct btf_type *ret;
15324 const char *tname;
15325 u32 tsize;
15326
15327 /* resolve the type size of ksym. */
15328 ret = btf_resolve_size(btf, t, &tsize);
15329 if (IS_ERR(ret)) {
15330 tname = btf_name_by_offset(btf, t->name_off);
15331 verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
15332 tname, PTR_ERR(ret));
15333 err = -EINVAL;
15334 goto err_put;
15335 }
15336 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
15337 aux->btf_var.mem_size = tsize;
15338 } else {
15339 aux->btf_var.reg_type = PTR_TO_BTF_ID;
15340 aux->btf_var.btf = btf;
15341 aux->btf_var.btf_id = type;
15342 }
15343
15344 /* check whether we recorded this BTF (and maybe module) already */
15345 for (i = 0; i < env->used_btf_cnt; i++) {
15346 if (env->used_btfs[i].btf == btf) {
15347 btf_put(btf);
15348 return 0;
15349 }
15350 }
15351
15352 if (env->used_btf_cnt >= MAX_USED_BTFS) {
15353 err = -E2BIG;
15354 goto err_put;
15355 }
15356
15357 btf_mod = &env->used_btfs[env->used_btf_cnt];
15358 btf_mod->btf = btf;
15359 btf_mod->module = NULL;
15360
15361 /* if we reference variables from kernel module, bump its refcount */
15362 if (btf_is_module(btf)) {
15363 btf_mod->module = btf_try_get_module(btf);
15364 if (!btf_mod->module) {
15365 err = -ENXIO;
15366 goto err_put;
15367 }
15368 }
15369
15370 env->used_btf_cnt++;
15371
15372 return 0;
15373err_put:
15374 btf_put(btf);
15375 return err;
15376}
15377
15378static bool is_tracing_prog_type(enum bpf_prog_type type)
15379{
15380 switch (type) {
15381 case BPF_PROG_TYPE_KPROBE:
15382 case BPF_PROG_TYPE_TRACEPOINT:
15383 case BPF_PROG_TYPE_PERF_EVENT:
15384 case BPF_PROG_TYPE_RAW_TRACEPOINT:
15385 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
15386 return true;
15387 default:
15388 return false;
15389 }
15390}
15391
15392static int check_map_prog_compatibility(struct bpf_verifier_env *env,
15393 struct bpf_map *map,
15394 struct bpf_prog *prog)
15395
15396{
15397 enum bpf_prog_type prog_type = resolve_prog_type(prog);
15398
15399 if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
15400 btf_record_has_field(map->record, BPF_RB_ROOT)) {
15401 if (is_tracing_prog_type(prog_type)) {
15402 verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
15403 return -EINVAL;
15404 }
15405 }
15406
15407 if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
15408 if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
15409 verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
15410 return -EINVAL;
15411 }
15412
15413 if (is_tracing_prog_type(prog_type)) {
15414 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
15415 return -EINVAL;
15416 }
15417
15418 if (prog->aux->sleepable) {
15419 verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n");
15420 return -EINVAL;
15421 }
15422 }
15423
15424 if (btf_record_has_field(map->record, BPF_TIMER)) {
15425 if (is_tracing_prog_type(prog_type)) {
15426 verbose(env, "tracing progs cannot use bpf_timer yet\n");
15427 return -EINVAL;
15428 }
15429 }
15430
15431 if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
15432 !bpf_offload_prog_map_match(prog, map)) {
15433 verbose(env, "offload device mismatch between prog and map\n");
15434 return -EINVAL;
15435 }
15436
15437 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
15438 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
15439 return -EINVAL;
15440 }
15441
15442 if (prog->aux->sleepable)
15443 switch (map->map_type) {
15444 case BPF_MAP_TYPE_HASH:
15445 case BPF_MAP_TYPE_LRU_HASH:
15446 case BPF_MAP_TYPE_ARRAY:
15447 case BPF_MAP_TYPE_PERCPU_HASH:
15448 case BPF_MAP_TYPE_PERCPU_ARRAY:
15449 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
15450 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
15451 case BPF_MAP_TYPE_HASH_OF_MAPS:
15452 case BPF_MAP_TYPE_RINGBUF:
15453 case BPF_MAP_TYPE_USER_RINGBUF:
15454 case BPF_MAP_TYPE_INODE_STORAGE:
15455 case BPF_MAP_TYPE_SK_STORAGE:
15456 case BPF_MAP_TYPE_TASK_STORAGE:
15457 case BPF_MAP_TYPE_CGRP_STORAGE:
15458 break;
15459 default:
15460 verbose(env,
15461 "Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
15462 return -EINVAL;
15463 }
15464
15465 return 0;
15466}
15467
15468static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
15469{
15470 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
15471 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
15472}
15473
15474/* find and rewrite pseudo imm in ld_imm64 instructions:
15475 *
15476 * 1. if it accesses map FD, replace it with actual map pointer.
15477 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
15478 *
15479 * NOTE: btf_vmlinux is required for converting pseudo btf_id.
15480 */
15481static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
15482{
15483 struct bpf_insn *insn = env->prog->insnsi;
15484 int insn_cnt = env->prog->len;
15485 int i, j, err;
15486
15487 err = bpf_prog_calc_tag(env->prog);
15488 if (err)
15489 return err;
15490
15491 for (i = 0; i < insn_cnt; i++, insn++) {
15492 if (BPF_CLASS(insn->code) == BPF_LDX &&
15493 (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
15494 verbose(env, "BPF_LDX uses reserved fields\n");
15495 return -EINVAL;
15496 }
15497
15498 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
15499 struct bpf_insn_aux_data *aux;
15500 struct bpf_map *map;
15501 struct fd f;
15502 u64 addr;
15503 u32 fd;
15504
15505 if (i == insn_cnt - 1 || insn[1].code != 0 ||
15506 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
15507 insn[1].off != 0) {
15508 verbose(env, "invalid bpf_ld_imm64 insn\n");
15509 return -EINVAL;
15510 }
15511
15512 if (insn[0].src_reg == 0)
15513 /* valid generic load 64-bit imm */
15514 goto next_insn;
15515
15516 if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
15517 aux = &env->insn_aux_data[i];
15518 err = check_pseudo_btf_id(env, insn, aux);
15519 if (err)
15520 return err;
15521 goto next_insn;
15522 }
15523
15524 if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
15525 aux = &env->insn_aux_data[i];
15526 aux->ptr_type = PTR_TO_FUNC;
15527 goto next_insn;
15528 }
15529
15530 /* In final convert_pseudo_ld_imm64() step, this is
15531 * converted into regular 64-bit imm load insn.
15532 */
15533 switch (insn[0].src_reg) {
15534 case BPF_PSEUDO_MAP_VALUE:
15535 case BPF_PSEUDO_MAP_IDX_VALUE:
15536 break;
15537 case BPF_PSEUDO_MAP_FD:
15538 case BPF_PSEUDO_MAP_IDX:
15539 if (insn[1].imm == 0)
15540 break;
15541 fallthrough;
15542 default:
15543 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
15544 return -EINVAL;
15545 }
15546
15547 switch (insn[0].src_reg) {
15548 case BPF_PSEUDO_MAP_IDX_VALUE:
15549 case BPF_PSEUDO_MAP_IDX:
15550 if (bpfptr_is_null(env->fd_array)) {
15551 verbose(env, "fd_idx without fd_array is invalid\n");
15552 return -EPROTO;
15553 }
15554 if (copy_from_bpfptr_offset(&fd, env->fd_array,
15555 insn[0].imm * sizeof(fd),
15556 sizeof(fd)))
15557 return -EFAULT;
15558 break;
15559 default:
15560 fd = insn[0].imm;
15561 break;
15562 }
15563
15564 f = fdget(fd);
15565 map = __bpf_map_get(f);
15566 if (IS_ERR(map)) {
15567 verbose(env, "fd %d is not pointing to valid bpf_map\n",
15568 insn[0].imm);
15569 return PTR_ERR(map);
15570 }
15571
15572 err = check_map_prog_compatibility(env, map, env->prog);
15573 if (err) {
15574 fdput(f);
15575 return err;
15576 }
15577
15578 aux = &env->insn_aux_data[i];
15579 if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
15580 insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
15581 addr = (unsigned long)map;
15582 } else {
15583 u32 off = insn[1].imm;
15584
15585 if (off >= BPF_MAX_VAR_OFF) {
15586 verbose(env, "direct value offset of %u is not allowed\n", off);
15587 fdput(f);
15588 return -EINVAL;
15589 }
15590
15591 if (!map->ops->map_direct_value_addr) {
15592 verbose(env, "no direct value access support for this map type\n");
15593 fdput(f);
15594 return -EINVAL;
15595 }
15596
15597 err = map->ops->map_direct_value_addr(map, &addr, off);
15598 if (err) {
15599 verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
15600 map->value_size, off);
15601 fdput(f);
15602 return err;
15603 }
15604
15605 aux->map_off = off;
15606 addr += off;
15607 }
15608
15609 insn[0].imm = (u32)addr;
15610 insn[1].imm = addr >> 32;
15611
15612 /* check whether we recorded this map already */
15613 for (j = 0; j < env->used_map_cnt; j++) {
15614 if (env->used_maps[j] == map) {
15615 aux->map_index = j;
15616 fdput(f);
15617 goto next_insn;
15618 }
15619 }
15620
15621 if (env->used_map_cnt >= MAX_USED_MAPS) {
15622 fdput(f);
15623 return -E2BIG;
15624 }
15625
15626 /* hold the map. If the program is rejected by verifier,
15627 * the map will be released by release_maps() or it
15628 * will be used by the valid program until it's unloaded
15629 * and all maps are released in free_used_maps()
15630 */
15631 bpf_map_inc(map);
15632
15633 aux->map_index = env->used_map_cnt;
15634 env->used_maps[env->used_map_cnt++] = map;
15635
15636 if (bpf_map_is_cgroup_storage(map) &&
15637 bpf_cgroup_storage_assign(env->prog->aux, map)) {
15638 verbose(env, "only one cgroup storage of each type is allowed\n");
15639 fdput(f);
15640 return -EBUSY;
15641 }
15642
15643 fdput(f);
15644next_insn:
15645 insn++;
15646 i++;
15647 continue;
15648 }
15649
15650 /* Basic sanity check before we invest more work here. */
15651 if (!bpf_opcode_in_insntable(insn->code)) {
15652 verbose(env, "unknown opcode %02x\n", insn->code);
15653 return -EINVAL;
15654 }
15655 }
15656
15657 /* now all pseudo BPF_LD_IMM64 instructions load valid
15658 * 'struct bpf_map *' into a register instead of user map_fd.
15659 * These pointers will be used later by verifier to validate map access.
15660 */
15661 return 0;
15662}
15663
15664/* drop refcnt of maps used by the rejected program */
15665static void release_maps(struct bpf_verifier_env *env)
15666{
15667 __bpf_free_used_maps(env->prog->aux, env->used_maps,
15668 env->used_map_cnt);
15669}
15670
15671/* drop refcnt of maps used by the rejected program */
15672static void release_btfs(struct bpf_verifier_env *env)
15673{
15674 __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
15675 env->used_btf_cnt);
15676}
15677
15678/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
15679static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
15680{
15681 struct bpf_insn *insn = env->prog->insnsi;
15682 int insn_cnt = env->prog->len;
15683 int i;
15684
15685 for (i = 0; i < insn_cnt; i++, insn++) {
15686 if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
15687 continue;
15688 if (insn->src_reg == BPF_PSEUDO_FUNC)
15689 continue;
15690 insn->src_reg = 0;
15691 }
15692}
15693
15694/* single env->prog->insni[off] instruction was replaced with the range
15695 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
15696 * [0, off) and [off, end) to new locations, so the patched range stays zero
15697 */
15698static void adjust_insn_aux_data(struct bpf_verifier_env *env,
15699 struct bpf_insn_aux_data *new_data,
15700 struct bpf_prog *new_prog, u32 off, u32 cnt)
15701{
15702 struct bpf_insn_aux_data *old_data = env->insn_aux_data;
15703 struct bpf_insn *insn = new_prog->insnsi;
15704 u32 old_seen = old_data[off].seen;
15705 u32 prog_len;
15706 int i;
15707
15708 /* aux info at OFF always needs adjustment, no matter fast path
15709 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
15710 * original insn at old prog.
15711 */
15712 old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
15713
15714 if (cnt == 1)
15715 return;
15716 prog_len = new_prog->len;
15717
15718 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
15719 memcpy(new_data + off + cnt - 1, old_data + off,
15720 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
15721 for (i = off; i < off + cnt - 1; i++) {
15722 /* Expand insni[off]'s seen count to the patched range. */
15723 new_data[i].seen = old_seen;
15724 new_data[i].zext_dst = insn_has_def32(env, insn + i);
15725 }
15726 env->insn_aux_data = new_data;
15727 vfree(old_data);
15728}
15729
15730static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
15731{
15732 int i;
15733
15734 if (len == 1)
15735 return;
15736 /* NOTE: fake 'exit' subprog should be updated as well. */
15737 for (i = 0; i <= env->subprog_cnt; i++) {
15738 if (env->subprog_info[i].start <= off)
15739 continue;
15740 env->subprog_info[i].start += len - 1;
15741 }
15742}
15743
15744static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
15745{
15746 struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
15747 int i, sz = prog->aux->size_poke_tab;
15748 struct bpf_jit_poke_descriptor *desc;
15749
15750 for (i = 0; i < sz; i++) {
15751 desc = &tab[i];
15752 if (desc->insn_idx <= off)
15753 continue;
15754 desc->insn_idx += len - 1;
15755 }
15756}
15757
15758static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
15759 const struct bpf_insn *patch, u32 len)
15760{
15761 struct bpf_prog *new_prog;
15762 struct bpf_insn_aux_data *new_data = NULL;
15763
15764 if (len > 1) {
15765 new_data = vzalloc(array_size(env->prog->len + len - 1,
15766 sizeof(struct bpf_insn_aux_data)));
15767 if (!new_data)
15768 return NULL;
15769 }
15770
15771 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
15772 if (IS_ERR(new_prog)) {
15773 if (PTR_ERR(new_prog) == -ERANGE)
15774 verbose(env,
15775 "insn %d cannot be patched due to 16-bit range\n",
15776 env->insn_aux_data[off].orig_idx);
15777 vfree(new_data);
15778 return NULL;
15779 }
15780 adjust_insn_aux_data(env, new_data, new_prog, off, len);
15781 adjust_subprog_starts(env, off, len);
15782 adjust_poke_descs(new_prog, off, len);
15783 return new_prog;
15784}
15785
15786static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
15787 u32 off, u32 cnt)
15788{
15789 int i, j;
15790
15791 /* find first prog starting at or after off (first to remove) */
15792 for (i = 0; i < env->subprog_cnt; i++)
15793 if (env->subprog_info[i].start >= off)
15794 break;
15795 /* find first prog starting at or after off + cnt (first to stay) */
15796 for (j = i; j < env->subprog_cnt; j++)
15797 if (env->subprog_info[j].start >= off + cnt)
15798 break;
15799 /* if j doesn't start exactly at off + cnt, we are just removing
15800 * the front of previous prog
15801 */
15802 if (env->subprog_info[j].start != off + cnt)
15803 j--;
15804
15805 if (j > i) {
15806 struct bpf_prog_aux *aux = env->prog->aux;
15807 int move;
15808
15809 /* move fake 'exit' subprog as well */
15810 move = env->subprog_cnt + 1 - j;
15811
15812 memmove(env->subprog_info + i,
15813 env->subprog_info + j,
15814 sizeof(*env->subprog_info) * move);
15815 env->subprog_cnt -= j - i;
15816
15817 /* remove func_info */
15818 if (aux->func_info) {
15819 move = aux->func_info_cnt - j;
15820
15821 memmove(aux->func_info + i,
15822 aux->func_info + j,
15823 sizeof(*aux->func_info) * move);
15824 aux->func_info_cnt -= j - i;
15825 /* func_info->insn_off is set after all code rewrites,
15826 * in adjust_btf_func() - no need to adjust
15827 */
15828 }
15829 } else {
15830 /* convert i from "first prog to remove" to "first to adjust" */
15831 if (env->subprog_info[i].start == off)
15832 i++;
15833 }
15834
15835 /* update fake 'exit' subprog as well */
15836 for (; i <= env->subprog_cnt; i++)
15837 env->subprog_info[i].start -= cnt;
15838
15839 return 0;
15840}
15841
15842static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
15843 u32 cnt)
15844{
15845 struct bpf_prog *prog = env->prog;
15846 u32 i, l_off, l_cnt, nr_linfo;
15847 struct bpf_line_info *linfo;
15848
15849 nr_linfo = prog->aux->nr_linfo;
15850 if (!nr_linfo)
15851 return 0;
15852
15853 linfo = prog->aux->linfo;
15854
15855 /* find first line info to remove, count lines to be removed */
15856 for (i = 0; i < nr_linfo; i++)
15857 if (linfo[i].insn_off >= off)
15858 break;
15859
15860 l_off = i;
15861 l_cnt = 0;
15862 for (; i < nr_linfo; i++)
15863 if (linfo[i].insn_off < off + cnt)
15864 l_cnt++;
15865 else
15866 break;
15867
15868 /* First live insn doesn't match first live linfo, it needs to "inherit"
15869 * last removed linfo. prog is already modified, so prog->len == off
15870 * means no live instructions after (tail of the program was removed).
15871 */
15872 if (prog->len != off && l_cnt &&
15873 (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
15874 l_cnt--;
15875 linfo[--i].insn_off = off + cnt;
15876 }
15877
15878 /* remove the line info which refer to the removed instructions */
15879 if (l_cnt) {
15880 memmove(linfo + l_off, linfo + i,
15881 sizeof(*linfo) * (nr_linfo - i));
15882
15883 prog->aux->nr_linfo -= l_cnt;
15884 nr_linfo = prog->aux->nr_linfo;
15885 }
15886
15887 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
15888 for (i = l_off; i < nr_linfo; i++)
15889 linfo[i].insn_off -= cnt;
15890
15891 /* fix up all subprogs (incl. 'exit') which start >= off */
15892 for (i = 0; i <= env->subprog_cnt; i++)
15893 if (env->subprog_info[i].linfo_idx > l_off) {
15894 /* program may have started in the removed region but
15895 * may not be fully removed
15896 */
15897 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
15898 env->subprog_info[i].linfo_idx -= l_cnt;
15899 else
15900 env->subprog_info[i].linfo_idx = l_off;
15901 }
15902
15903 return 0;
15904}
15905
15906static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
15907{
15908 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
15909 unsigned int orig_prog_len = env->prog->len;
15910 int err;
15911
15912 if (bpf_prog_is_offloaded(env->prog->aux))
15913 bpf_prog_offload_remove_insns(env, off, cnt);
15914
15915 err = bpf_remove_insns(env->prog, off, cnt);
15916 if (err)
15917 return err;
15918
15919 err = adjust_subprog_starts_after_remove(env, off, cnt);
15920 if (err)
15921 return err;
15922
15923 err = bpf_adj_linfo_after_remove(env, off, cnt);
15924 if (err)
15925 return err;
15926
15927 memmove(aux_data + off, aux_data + off + cnt,
15928 sizeof(*aux_data) * (orig_prog_len - off - cnt));
15929
15930 return 0;
15931}
15932
15933/* The verifier does more data flow analysis than llvm and will not
15934 * explore branches that are dead at run time. Malicious programs can
15935 * have dead code too. Therefore replace all dead at-run-time code
15936 * with 'ja -1'.
15937 *
15938 * Just nops are not optimal, e.g. if they would sit at the end of the
15939 * program and through another bug we would manage to jump there, then
15940 * we'd execute beyond program memory otherwise. Returning exception
15941 * code also wouldn't work since we can have subprogs where the dead
15942 * code could be located.
15943 */
15944static void sanitize_dead_code(struct bpf_verifier_env *env)
15945{
15946 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
15947 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
15948 struct bpf_insn *insn = env->prog->insnsi;
15949 const int insn_cnt = env->prog->len;
15950 int i;
15951
15952 for (i = 0; i < insn_cnt; i++) {
15953 if (aux_data[i].seen)
15954 continue;
15955 memcpy(insn + i, &trap, sizeof(trap));
15956 aux_data[i].zext_dst = false;
15957 }
15958}
15959
15960static bool insn_is_cond_jump(u8 code)
15961{
15962 u8 op;
15963
15964 if (BPF_CLASS(code) == BPF_JMP32)
15965 return true;
15966
15967 if (BPF_CLASS(code) != BPF_JMP)
15968 return false;
15969
15970 op = BPF_OP(code);
15971 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
15972}
15973
15974static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
15975{
15976 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
15977 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
15978 struct bpf_insn *insn = env->prog->insnsi;
15979 const int insn_cnt = env->prog->len;
15980 int i;
15981
15982 for (i = 0; i < insn_cnt; i++, insn++) {
15983 if (!insn_is_cond_jump(insn->code))
15984 continue;
15985
15986 if (!aux_data[i + 1].seen)
15987 ja.off = insn->off;
15988 else if (!aux_data[i + 1 + insn->off].seen)
15989 ja.off = 0;
15990 else
15991 continue;
15992
15993 if (bpf_prog_is_offloaded(env->prog->aux))
15994 bpf_prog_offload_replace_insn(env, i, &ja);
15995
15996 memcpy(insn, &ja, sizeof(ja));
15997 }
15998}
15999
16000static int opt_remove_dead_code(struct bpf_verifier_env *env)
16001{
16002 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
16003 int insn_cnt = env->prog->len;
16004 int i, err;
16005
16006 for (i = 0; i < insn_cnt; i++) {
16007 int j;
16008
16009 j = 0;
16010 while (i + j < insn_cnt && !aux_data[i + j].seen)
16011 j++;
16012 if (!j)
16013 continue;
16014
16015 err = verifier_remove_insns(env, i, j);
16016 if (err)
16017 return err;
16018 insn_cnt = env->prog->len;
16019 }
16020
16021 return 0;
16022}
16023
16024static int opt_remove_nops(struct bpf_verifier_env *env)
16025{
16026 const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
16027 struct bpf_insn *insn = env->prog->insnsi;
16028 int insn_cnt = env->prog->len;
16029 int i, err;
16030
16031 for (i = 0; i < insn_cnt; i++) {
16032 if (memcmp(&insn[i], &ja, sizeof(ja)))
16033 continue;
16034
16035 err = verifier_remove_insns(env, i, 1);
16036 if (err)
16037 return err;
16038 insn_cnt--;
16039 i--;
16040 }
16041
16042 return 0;
16043}
16044
16045static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
16046 const union bpf_attr *attr)
16047{
16048 struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
16049 struct bpf_insn_aux_data *aux = env->insn_aux_data;
16050 int i, patch_len, delta = 0, len = env->prog->len;
16051 struct bpf_insn *insns = env->prog->insnsi;
16052 struct bpf_prog *new_prog;
16053 bool rnd_hi32;
16054
16055 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
16056 zext_patch[1] = BPF_ZEXT_REG(0);
16057 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
16058 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
16059 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
16060 for (i = 0; i < len; i++) {
16061 int adj_idx = i + delta;
16062 struct bpf_insn insn;
16063 int load_reg;
16064
16065 insn = insns[adj_idx];
16066 load_reg = insn_def_regno(&insn);
16067 if (!aux[adj_idx].zext_dst) {
16068 u8 code, class;
16069 u32 imm_rnd;
16070
16071 if (!rnd_hi32)
16072 continue;
16073
16074 code = insn.code;
16075 class = BPF_CLASS(code);
16076 if (load_reg == -1)
16077 continue;
16078
16079 /* NOTE: arg "reg" (the fourth one) is only used for
16080 * BPF_STX + SRC_OP, so it is safe to pass NULL
16081 * here.
16082 */
16083 if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
16084 if (class == BPF_LD &&
16085 BPF_MODE(code) == BPF_IMM)
16086 i++;
16087 continue;
16088 }
16089
16090 /* ctx load could be transformed into wider load. */
16091 if (class == BPF_LDX &&
16092 aux[adj_idx].ptr_type == PTR_TO_CTX)
16093 continue;
16094
16095 imm_rnd = get_random_u32();
16096 rnd_hi32_patch[0] = insn;
16097 rnd_hi32_patch[1].imm = imm_rnd;
16098 rnd_hi32_patch[3].dst_reg = load_reg;
16099 patch = rnd_hi32_patch;
16100 patch_len = 4;
16101 goto apply_patch_buffer;
16102 }
16103
16104 /* Add in an zero-extend instruction if a) the JIT has requested
16105 * it or b) it's a CMPXCHG.
16106 *
16107 * The latter is because: BPF_CMPXCHG always loads a value into
16108 * R0, therefore always zero-extends. However some archs'
16109 * equivalent instruction only does this load when the
16110 * comparison is successful. This detail of CMPXCHG is
16111 * orthogonal to the general zero-extension behaviour of the
16112 * CPU, so it's treated independently of bpf_jit_needs_zext.
16113 */
16114 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
16115 continue;
16116
16117 /* Zero-extension is done by the caller. */
16118 if (bpf_pseudo_kfunc_call(&insn))
16119 continue;
16120
16121 if (WARN_ON(load_reg == -1)) {
16122 verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
16123 return -EFAULT;
16124 }
16125
16126 zext_patch[0] = insn;
16127 zext_patch[1].dst_reg = load_reg;
16128 zext_patch[1].src_reg = load_reg;
16129 patch = zext_patch;
16130 patch_len = 2;
16131apply_patch_buffer:
16132 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
16133 if (!new_prog)
16134 return -ENOMEM;
16135 env->prog = new_prog;
16136 insns = new_prog->insnsi;
16137 aux = env->insn_aux_data;
16138 delta += patch_len - 1;
16139 }
16140
16141 return 0;
16142}
16143
16144/* convert load instructions that access fields of a context type into a
16145 * sequence of instructions that access fields of the underlying structure:
16146 * struct __sk_buff -> struct sk_buff
16147 * struct bpf_sock_ops -> struct sock
16148 */
16149static int convert_ctx_accesses(struct bpf_verifier_env *env)
16150{
16151 const struct bpf_verifier_ops *ops = env->ops;
16152 int i, cnt, size, ctx_field_size, delta = 0;
16153 const int insn_cnt = env->prog->len;
16154 struct bpf_insn insn_buf[16], *insn;
16155 u32 target_size, size_default, off;
16156 struct bpf_prog *new_prog;
16157 enum bpf_access_type type;
16158 bool is_narrower_load;
16159
16160 if (ops->gen_prologue || env->seen_direct_write) {
16161 if (!ops->gen_prologue) {
16162 verbose(env, "bpf verifier is misconfigured\n");
16163 return -EINVAL;
16164 }
16165 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
16166 env->prog);
16167 if (cnt >= ARRAY_SIZE(insn_buf)) {
16168 verbose(env, "bpf verifier is misconfigured\n");
16169 return -EINVAL;
16170 } else if (cnt) {
16171 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
16172 if (!new_prog)
16173 return -ENOMEM;
16174
16175 env->prog = new_prog;
16176 delta += cnt - 1;
16177 }
16178 }
16179
16180 if (bpf_prog_is_offloaded(env->prog->aux))
16181 return 0;
16182
16183 insn = env->prog->insnsi + delta;
16184
16185 for (i = 0; i < insn_cnt; i++, insn++) {
16186 bpf_convert_ctx_access_t convert_ctx_access;
16187
16188 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
16189 insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
16190 insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
16191 insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
16192 type = BPF_READ;
16193 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
16194 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
16195 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
16196 insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
16197 insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
16198 insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
16199 insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
16200 insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
16201 type = BPF_WRITE;
16202 } else {
16203 continue;
16204 }
16205
16206 if (type == BPF_WRITE &&
16207 env->insn_aux_data[i + delta].sanitize_stack_spill) {
16208 struct bpf_insn patch[] = {
16209 *insn,
16210 BPF_ST_NOSPEC(),
16211 };
16212
16213 cnt = ARRAY_SIZE(patch);
16214 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
16215 if (!new_prog)
16216 return -ENOMEM;
16217
16218 delta += cnt - 1;
16219 env->prog = new_prog;
16220 insn = new_prog->insnsi + i + delta;
16221 continue;
16222 }
16223
16224 switch ((int)env->insn_aux_data[i + delta].ptr_type) {
16225 case PTR_TO_CTX:
16226 if (!ops->convert_ctx_access)
16227 continue;
16228 convert_ctx_access = ops->convert_ctx_access;
16229 break;
16230 case PTR_TO_SOCKET:
16231 case PTR_TO_SOCK_COMMON:
16232 convert_ctx_access = bpf_sock_convert_ctx_access;
16233 break;
16234 case PTR_TO_TCP_SOCK:
16235 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
16236 break;
16237 case PTR_TO_XDP_SOCK:
16238 convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
16239 break;
16240 case PTR_TO_BTF_ID:
16241 case PTR_TO_BTF_ID | PTR_UNTRUSTED:
16242 /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
16243 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
16244 * be said once it is marked PTR_UNTRUSTED, hence we must handle
16245 * any faults for loads into such types. BPF_WRITE is disallowed
16246 * for this case.
16247 */
16248 case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
16249 if (type == BPF_READ) {
16250 insn->code = BPF_LDX | BPF_PROBE_MEM |
16251 BPF_SIZE((insn)->code);
16252 env->prog->aux->num_exentries++;
16253 }
16254 continue;
16255 default:
16256 continue;
16257 }
16258
16259 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
16260 size = BPF_LDST_BYTES(insn);
16261
16262 /* If the read access is a narrower load of the field,
16263 * convert to a 4/8-byte load, to minimum program type specific
16264 * convert_ctx_access changes. If conversion is successful,
16265 * we will apply proper mask to the result.
16266 */
16267 is_narrower_load = size < ctx_field_size;
16268 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
16269 off = insn->off;
16270 if (is_narrower_load) {
16271 u8 size_code;
16272
16273 if (type == BPF_WRITE) {
16274 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
16275 return -EINVAL;
16276 }
16277
16278 size_code = BPF_H;
16279 if (ctx_field_size == 4)
16280 size_code = BPF_W;
16281 else if (ctx_field_size == 8)
16282 size_code = BPF_DW;
16283
16284 insn->off = off & ~(size_default - 1);
16285 insn->code = BPF_LDX | BPF_MEM | size_code;
16286 }
16287
16288 target_size = 0;
16289 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
16290 &target_size);
16291 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
16292 (ctx_field_size && !target_size)) {
16293 verbose(env, "bpf verifier is misconfigured\n");
16294 return -EINVAL;
16295 }
16296
16297 if (is_narrower_load && size < target_size) {
16298 u8 shift = bpf_ctx_narrow_access_offset(
16299 off, size, size_default) * 8;
16300 if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
16301 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
16302 return -EINVAL;
16303 }
16304 if (ctx_field_size <= 4) {
16305 if (shift)
16306 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
16307 insn->dst_reg,
16308 shift);
16309 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
16310 (1 << size * 8) - 1);
16311 } else {
16312 if (shift)
16313 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
16314 insn->dst_reg,
16315 shift);
16316 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
16317 (1ULL << size * 8) - 1);
16318 }
16319 }
16320
16321 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16322 if (!new_prog)
16323 return -ENOMEM;
16324
16325 delta += cnt - 1;
16326
16327 /* keep walking new program and skip insns we just inserted */
16328 env->prog = new_prog;
16329 insn = new_prog->insnsi + i + delta;
16330 }
16331
16332 return 0;
16333}
16334
16335static int jit_subprogs(struct bpf_verifier_env *env)
16336{
16337 struct bpf_prog *prog = env->prog, **func, *tmp;
16338 int i, j, subprog_start, subprog_end = 0, len, subprog;
16339 struct bpf_map *map_ptr;
16340 struct bpf_insn *insn;
16341 void *old_bpf_func;
16342 int err, num_exentries;
16343
16344 if (env->subprog_cnt <= 1)
16345 return 0;
16346
16347 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
16348 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
16349 continue;
16350
16351 /* Upon error here we cannot fall back to interpreter but
16352 * need a hard reject of the program. Thus -EFAULT is
16353 * propagated in any case.
16354 */
16355 subprog = find_subprog(env, i + insn->imm + 1);
16356 if (subprog < 0) {
16357 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
16358 i + insn->imm + 1);
16359 return -EFAULT;
16360 }
16361 /* temporarily remember subprog id inside insn instead of
16362 * aux_data, since next loop will split up all insns into funcs
16363 */
16364 insn->off = subprog;
16365 /* remember original imm in case JIT fails and fallback
16366 * to interpreter will be needed
16367 */
16368 env->insn_aux_data[i].call_imm = insn->imm;
16369 /* point imm to __bpf_call_base+1 from JITs point of view */
16370 insn->imm = 1;
16371 if (bpf_pseudo_func(insn))
16372 /* jit (e.g. x86_64) may emit fewer instructions
16373 * if it learns a u32 imm is the same as a u64 imm.
16374 * Force a non zero here.
16375 */
16376 insn[1].imm = 1;
16377 }
16378
16379 err = bpf_prog_alloc_jited_linfo(prog);
16380 if (err)
16381 goto out_undo_insn;
16382
16383 err = -ENOMEM;
16384 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
16385 if (!func)
16386 goto out_undo_insn;
16387
16388 for (i = 0; i < env->subprog_cnt; i++) {
16389 subprog_start = subprog_end;
16390 subprog_end = env->subprog_info[i + 1].start;
16391
16392 len = subprog_end - subprog_start;
16393 /* bpf_prog_run() doesn't call subprogs directly,
16394 * hence main prog stats include the runtime of subprogs.
16395 * subprogs don't have IDs and not reachable via prog_get_next_id
16396 * func[i]->stats will never be accessed and stays NULL
16397 */
16398 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
16399 if (!func[i])
16400 goto out_free;
16401 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
16402 len * sizeof(struct bpf_insn));
16403 func[i]->type = prog->type;
16404 func[i]->len = len;
16405 if (bpf_prog_calc_tag(func[i]))
16406 goto out_free;
16407 func[i]->is_func = 1;
16408 func[i]->aux->func_idx = i;
16409 /* Below members will be freed only at prog->aux */
16410 func[i]->aux->btf = prog->aux->btf;
16411 func[i]->aux->func_info = prog->aux->func_info;
16412 func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
16413 func[i]->aux->poke_tab = prog->aux->poke_tab;
16414 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
16415
16416 for (j = 0; j < prog->aux->size_poke_tab; j++) {
16417 struct bpf_jit_poke_descriptor *poke;
16418
16419 poke = &prog->aux->poke_tab[j];
16420 if (poke->insn_idx < subprog_end &&
16421 poke->insn_idx >= subprog_start)
16422 poke->aux = func[i]->aux;
16423 }
16424
16425 func[i]->aux->name[0] = 'F';
16426 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
16427 func[i]->jit_requested = 1;
16428 func[i]->blinding_requested = prog->blinding_requested;
16429 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
16430 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
16431 func[i]->aux->linfo = prog->aux->linfo;
16432 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
16433 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
16434 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
16435 num_exentries = 0;
16436 insn = func[i]->insnsi;
16437 for (j = 0; j < func[i]->len; j++, insn++) {
16438 if (BPF_CLASS(insn->code) == BPF_LDX &&
16439 BPF_MODE(insn->code) == BPF_PROBE_MEM)
16440 num_exentries++;
16441 }
16442 func[i]->aux->num_exentries = num_exentries;
16443 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
16444 func[i] = bpf_int_jit_compile(func[i]);
16445 if (!func[i]->jited) {
16446 err = -ENOTSUPP;
16447 goto out_free;
16448 }
16449 cond_resched();
16450 }
16451
16452 /* at this point all bpf functions were successfully JITed
16453 * now populate all bpf_calls with correct addresses and
16454 * run last pass of JIT
16455 */
16456 for (i = 0; i < env->subprog_cnt; i++) {
16457 insn = func[i]->insnsi;
16458 for (j = 0; j < func[i]->len; j++, insn++) {
16459 if (bpf_pseudo_func(insn)) {
16460 subprog = insn->off;
16461 insn[0].imm = (u32)(long)func[subprog]->bpf_func;
16462 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
16463 continue;
16464 }
16465 if (!bpf_pseudo_call(insn))
16466 continue;
16467 subprog = insn->off;
16468 insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
16469 }
16470
16471 /* we use the aux data to keep a list of the start addresses
16472 * of the JITed images for each function in the program
16473 *
16474 * for some architectures, such as powerpc64, the imm field
16475 * might not be large enough to hold the offset of the start
16476 * address of the callee's JITed image from __bpf_call_base
16477 *
16478 * in such cases, we can lookup the start address of a callee
16479 * by using its subprog id, available from the off field of
16480 * the call instruction, as an index for this list
16481 */
16482 func[i]->aux->func = func;
16483 func[i]->aux->func_cnt = env->subprog_cnt;
16484 }
16485 for (i = 0; i < env->subprog_cnt; i++) {
16486 old_bpf_func = func[i]->bpf_func;
16487 tmp = bpf_int_jit_compile(func[i]);
16488 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
16489 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
16490 err = -ENOTSUPP;
16491 goto out_free;
16492 }
16493 cond_resched();
16494 }
16495
16496 /* finally lock prog and jit images for all functions and
16497 * populate kallsysm
16498 */
16499 for (i = 0; i < env->subprog_cnt; i++) {
16500 bpf_prog_lock_ro(func[i]);
16501 bpf_prog_kallsyms_add(func[i]);
16502 }
16503
16504 /* Last step: make now unused interpreter insns from main
16505 * prog consistent for later dump requests, so they can
16506 * later look the same as if they were interpreted only.
16507 */
16508 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
16509 if (bpf_pseudo_func(insn)) {
16510 insn[0].imm = env->insn_aux_data[i].call_imm;
16511 insn[1].imm = insn->off;
16512 insn->off = 0;
16513 continue;
16514 }
16515 if (!bpf_pseudo_call(insn))
16516 continue;
16517 insn->off = env->insn_aux_data[i].call_imm;
16518 subprog = find_subprog(env, i + insn->off + 1);
16519 insn->imm = subprog;
16520 }
16521
16522 prog->jited = 1;
16523 prog->bpf_func = func[0]->bpf_func;
16524 prog->jited_len = func[0]->jited_len;
16525 prog->aux->func = func;
16526 prog->aux->func_cnt = env->subprog_cnt;
16527 bpf_prog_jit_attempt_done(prog);
16528 return 0;
16529out_free:
16530 /* We failed JIT'ing, so at this point we need to unregister poke
16531 * descriptors from subprogs, so that kernel is not attempting to
16532 * patch it anymore as we're freeing the subprog JIT memory.
16533 */
16534 for (i = 0; i < prog->aux->size_poke_tab; i++) {
16535 map_ptr = prog->aux->poke_tab[i].tail_call.map;
16536 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
16537 }
16538 /* At this point we're guaranteed that poke descriptors are not
16539 * live anymore. We can just unlink its descriptor table as it's
16540 * released with the main prog.
16541 */
16542 for (i = 0; i < env->subprog_cnt; i++) {
16543 if (!func[i])
16544 continue;
16545 func[i]->aux->poke_tab = NULL;
16546 bpf_jit_free(func[i]);
16547 }
16548 kfree(func);
16549out_undo_insn:
16550 /* cleanup main prog to be interpreted */
16551 prog->jit_requested = 0;
16552 prog->blinding_requested = 0;
16553 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
16554 if (!bpf_pseudo_call(insn))
16555 continue;
16556 insn->off = 0;
16557 insn->imm = env->insn_aux_data[i].call_imm;
16558 }
16559 bpf_prog_jit_attempt_done(prog);
16560 return err;
16561}
16562
16563static int fixup_call_args(struct bpf_verifier_env *env)
16564{
16565#ifndef CONFIG_BPF_JIT_ALWAYS_ON
16566 struct bpf_prog *prog = env->prog;
16567 struct bpf_insn *insn = prog->insnsi;
16568 bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
16569 int i, depth;
16570#endif
16571 int err = 0;
16572
16573 if (env->prog->jit_requested &&
16574 !bpf_prog_is_offloaded(env->prog->aux)) {
16575 err = jit_subprogs(env);
16576 if (err == 0)
16577 return 0;
16578 if (err == -EFAULT)
16579 return err;
16580 }
16581#ifndef CONFIG_BPF_JIT_ALWAYS_ON
16582 if (has_kfunc_call) {
16583 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
16584 return -EINVAL;
16585 }
16586 if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
16587 /* When JIT fails the progs with bpf2bpf calls and tail_calls
16588 * have to be rejected, since interpreter doesn't support them yet.
16589 */
16590 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
16591 return -EINVAL;
16592 }
16593 for (i = 0; i < prog->len; i++, insn++) {
16594 if (bpf_pseudo_func(insn)) {
16595 /* When JIT fails the progs with callback calls
16596 * have to be rejected, since interpreter doesn't support them yet.
16597 */
16598 verbose(env, "callbacks are not allowed in non-JITed programs\n");
16599 return -EINVAL;
16600 }
16601
16602 if (!bpf_pseudo_call(insn))
16603 continue;
16604 depth = get_callee_stack_depth(env, insn, i);
16605 if (depth < 0)
16606 return depth;
16607 bpf_patch_call_args(insn, depth);
16608 }
16609 err = 0;
16610#endif
16611 return err;
16612}
16613
16614static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
16615 struct bpf_insn *insn_buf, int insn_idx, int *cnt)
16616{
16617 const struct bpf_kfunc_desc *desc;
16618 void *xdp_kfunc;
16619
16620 if (!insn->imm) {
16621 verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
16622 return -EINVAL;
16623 }
16624
16625 *cnt = 0;
16626
16627 if (bpf_dev_bound_kfunc_id(insn->imm)) {
16628 xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
16629 if (xdp_kfunc) {
16630 insn->imm = BPF_CALL_IMM(xdp_kfunc);
16631 return 0;
16632 }
16633
16634 /* fallback to default kfunc when not supported by netdev */
16635 }
16636
16637 /* insn->imm has the btf func_id. Replace it with
16638 * an address (relative to __bpf_call_base).
16639 */
16640 desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
16641 if (!desc) {
16642 verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
16643 insn->imm);
16644 return -EFAULT;
16645 }
16646
16647 insn->imm = desc->imm;
16648 if (insn->off)
16649 return 0;
16650 if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
16651 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
16652 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
16653 u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
16654
16655 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
16656 insn_buf[1] = addr[0];
16657 insn_buf[2] = addr[1];
16658 insn_buf[3] = *insn;
16659 *cnt = 4;
16660 } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
16661 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
16662 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
16663
16664 insn_buf[0] = addr[0];
16665 insn_buf[1] = addr[1];
16666 insn_buf[2] = *insn;
16667 *cnt = 3;
16668 } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
16669 desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
16670 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
16671 *cnt = 1;
16672 } else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
16673 bool seen_direct_write = env->seen_direct_write;
16674 bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
16675
16676 if (is_rdonly)
16677 insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly);
16678
16679 /* restore env->seen_direct_write to its original value, since
16680 * may_access_direct_pkt_data mutates it
16681 */
16682 env->seen_direct_write = seen_direct_write;
16683 }
16684 return 0;
16685}
16686
16687/* Do various post-verification rewrites in a single program pass.
16688 * These rewrites simplify JIT and interpreter implementations.
16689 */
16690static int do_misc_fixups(struct bpf_verifier_env *env)
16691{
16692 struct bpf_prog *prog = env->prog;
16693 enum bpf_attach_type eatype = prog->expected_attach_type;
16694 enum bpf_prog_type prog_type = resolve_prog_type(prog);
16695 struct bpf_insn *insn = prog->insnsi;
16696 const struct bpf_func_proto *fn;
16697 const int insn_cnt = prog->len;
16698 const struct bpf_map_ops *ops;
16699 struct bpf_insn_aux_data *aux;
16700 struct bpf_insn insn_buf[16];
16701 struct bpf_prog *new_prog;
16702 struct bpf_map *map_ptr;
16703 int i, ret, cnt, delta = 0;
16704
16705 for (i = 0; i < insn_cnt; i++, insn++) {
16706 /* Make divide-by-zero exceptions impossible. */
16707 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
16708 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
16709 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
16710 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
16711 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
16712 bool isdiv = BPF_OP(insn->code) == BPF_DIV;
16713 struct bpf_insn *patchlet;
16714 struct bpf_insn chk_and_div[] = {
16715 /* [R,W]x div 0 -> 0 */
16716 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
16717 BPF_JNE | BPF_K, insn->src_reg,
16718 0, 2, 0),
16719 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
16720 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
16721 *insn,
16722 };
16723 struct bpf_insn chk_and_mod[] = {
16724 /* [R,W]x mod 0 -> [R,W]x */
16725 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
16726 BPF_JEQ | BPF_K, insn->src_reg,
16727 0, 1 + (is64 ? 0 : 1), 0),
16728 *insn,
16729 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
16730 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
16731 };
16732
16733 patchlet = isdiv ? chk_and_div : chk_and_mod;
16734 cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
16735 ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
16736
16737 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
16738 if (!new_prog)
16739 return -ENOMEM;
16740
16741 delta += cnt - 1;
16742 env->prog = prog = new_prog;
16743 insn = new_prog->insnsi + i + delta;
16744 continue;
16745 }
16746
16747 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
16748 if (BPF_CLASS(insn->code) == BPF_LD &&
16749 (BPF_MODE(insn->code) == BPF_ABS ||
16750 BPF_MODE(insn->code) == BPF_IND)) {
16751 cnt = env->ops->gen_ld_abs(insn, insn_buf);
16752 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
16753 verbose(env, "bpf verifier is misconfigured\n");
16754 return -EINVAL;
16755 }
16756
16757 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16758 if (!new_prog)
16759 return -ENOMEM;
16760
16761 delta += cnt - 1;
16762 env->prog = prog = new_prog;
16763 insn = new_prog->insnsi + i + delta;
16764 continue;
16765 }
16766
16767 /* Rewrite pointer arithmetic to mitigate speculation attacks. */
16768 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
16769 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
16770 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
16771 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
16772 struct bpf_insn *patch = &insn_buf[0];
16773 bool issrc, isneg, isimm;
16774 u32 off_reg;
16775
16776 aux = &env->insn_aux_data[i + delta];
16777 if (!aux->alu_state ||
16778 aux->alu_state == BPF_ALU_NON_POINTER)
16779 continue;
16780
16781 isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
16782 issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
16783 BPF_ALU_SANITIZE_SRC;
16784 isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
16785
16786 off_reg = issrc ? insn->src_reg : insn->dst_reg;
16787 if (isimm) {
16788 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
16789 } else {
16790 if (isneg)
16791 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
16792 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
16793 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
16794 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
16795 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
16796 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
16797 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
16798 }
16799 if (!issrc)
16800 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
16801 insn->src_reg = BPF_REG_AX;
16802 if (isneg)
16803 insn->code = insn->code == code_add ?
16804 code_sub : code_add;
16805 *patch++ = *insn;
16806 if (issrc && isneg && !isimm)
16807 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
16808 cnt = patch - insn_buf;
16809
16810 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16811 if (!new_prog)
16812 return -ENOMEM;
16813
16814 delta += cnt - 1;
16815 env->prog = prog = new_prog;
16816 insn = new_prog->insnsi + i + delta;
16817 continue;
16818 }
16819
16820 if (insn->code != (BPF_JMP | BPF_CALL))
16821 continue;
16822 if (insn->src_reg == BPF_PSEUDO_CALL)
16823 continue;
16824 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
16825 ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
16826 if (ret)
16827 return ret;
16828 if (cnt == 0)
16829 continue;
16830
16831 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16832 if (!new_prog)
16833 return -ENOMEM;
16834
16835 delta += cnt - 1;
16836 env->prog = prog = new_prog;
16837 insn = new_prog->insnsi + i + delta;
16838 continue;
16839 }
16840
16841 if (insn->imm == BPF_FUNC_get_route_realm)
16842 prog->dst_needed = 1;
16843 if (insn->imm == BPF_FUNC_get_prandom_u32)
16844 bpf_user_rnd_init_once();
16845 if (insn->imm == BPF_FUNC_override_return)
16846 prog->kprobe_override = 1;
16847 if (insn->imm == BPF_FUNC_tail_call) {
16848 /* If we tail call into other programs, we
16849 * cannot make any assumptions since they can
16850 * be replaced dynamically during runtime in
16851 * the program array.
16852 */
16853 prog->cb_access = 1;
16854 if (!allow_tail_call_in_subprogs(env))
16855 prog->aux->stack_depth = MAX_BPF_STACK;
16856 prog->aux->max_pkt_offset = MAX_PACKET_OFF;
16857
16858 /* mark bpf_tail_call as different opcode to avoid
16859 * conditional branch in the interpreter for every normal
16860 * call and to prevent accidental JITing by JIT compiler
16861 * that doesn't support bpf_tail_call yet
16862 */
16863 insn->imm = 0;
16864 insn->code = BPF_JMP | BPF_TAIL_CALL;
16865
16866 aux = &env->insn_aux_data[i + delta];
16867 if (env->bpf_capable && !prog->blinding_requested &&
16868 prog->jit_requested &&
16869 !bpf_map_key_poisoned(aux) &&
16870 !bpf_map_ptr_poisoned(aux) &&
16871 !bpf_map_ptr_unpriv(aux)) {
16872 struct bpf_jit_poke_descriptor desc = {
16873 .reason = BPF_POKE_REASON_TAIL_CALL,
16874 .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
16875 .tail_call.key = bpf_map_key_immediate(aux),
16876 .insn_idx = i + delta,
16877 };
16878
16879 ret = bpf_jit_add_poke_descriptor(prog, &desc);
16880 if (ret < 0) {
16881 verbose(env, "adding tail call poke descriptor failed\n");
16882 return ret;
16883 }
16884
16885 insn->imm = ret + 1;
16886 continue;
16887 }
16888
16889 if (!bpf_map_ptr_unpriv(aux))
16890 continue;
16891
16892 /* instead of changing every JIT dealing with tail_call
16893 * emit two extra insns:
16894 * if (index >= max_entries) goto out;
16895 * index &= array->index_mask;
16896 * to avoid out-of-bounds cpu speculation
16897 */
16898 if (bpf_map_ptr_poisoned(aux)) {
16899 verbose(env, "tail_call abusing map_ptr\n");
16900 return -EINVAL;
16901 }
16902
16903 map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
16904 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
16905 map_ptr->max_entries, 2);
16906 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
16907 container_of(map_ptr,
16908 struct bpf_array,
16909 map)->index_mask);
16910 insn_buf[2] = *insn;
16911 cnt = 3;
16912 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16913 if (!new_prog)
16914 return -ENOMEM;
16915
16916 delta += cnt - 1;
16917 env->prog = prog = new_prog;
16918 insn = new_prog->insnsi + i + delta;
16919 continue;
16920 }
16921
16922 if (insn->imm == BPF_FUNC_timer_set_callback) {
16923 /* The verifier will process callback_fn as many times as necessary
16924 * with different maps and the register states prepared by
16925 * set_timer_callback_state will be accurate.
16926 *
16927 * The following use case is valid:
16928 * map1 is shared by prog1, prog2, prog3.
16929 * prog1 calls bpf_timer_init for some map1 elements
16930 * prog2 calls bpf_timer_set_callback for some map1 elements.
16931 * Those that were not bpf_timer_init-ed will return -EINVAL.
16932 * prog3 calls bpf_timer_start for some map1 elements.
16933 * Those that were not both bpf_timer_init-ed and
16934 * bpf_timer_set_callback-ed will return -EINVAL.
16935 */
16936 struct bpf_insn ld_addrs[2] = {
16937 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
16938 };
16939
16940 insn_buf[0] = ld_addrs[0];
16941 insn_buf[1] = ld_addrs[1];
16942 insn_buf[2] = *insn;
16943 cnt = 3;
16944
16945 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16946 if (!new_prog)
16947 return -ENOMEM;
16948
16949 delta += cnt - 1;
16950 env->prog = prog = new_prog;
16951 insn = new_prog->insnsi + i + delta;
16952 goto patch_call_imm;
16953 }
16954
16955 if (is_storage_get_function(insn->imm)) {
16956 if (!env->prog->aux->sleepable ||
16957 env->insn_aux_data[i + delta].storage_get_func_atomic)
16958 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
16959 else
16960 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
16961 insn_buf[1] = *insn;
16962 cnt = 2;
16963
16964 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16965 if (!new_prog)
16966 return -ENOMEM;
16967
16968 delta += cnt - 1;
16969 env->prog = prog = new_prog;
16970 insn = new_prog->insnsi + i + delta;
16971 goto patch_call_imm;
16972 }
16973
16974 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
16975 * and other inlining handlers are currently limited to 64 bit
16976 * only.
16977 */
16978 if (prog->jit_requested && BITS_PER_LONG == 64 &&
16979 (insn->imm == BPF_FUNC_map_lookup_elem ||
16980 insn->imm == BPF_FUNC_map_update_elem ||
16981 insn->imm == BPF_FUNC_map_delete_elem ||
16982 insn->imm == BPF_FUNC_map_push_elem ||
16983 insn->imm == BPF_FUNC_map_pop_elem ||
16984 insn->imm == BPF_FUNC_map_peek_elem ||
16985 insn->imm == BPF_FUNC_redirect_map ||
16986 insn->imm == BPF_FUNC_for_each_map_elem ||
16987 insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
16988 aux = &env->insn_aux_data[i + delta];
16989 if (bpf_map_ptr_poisoned(aux))
16990 goto patch_call_imm;
16991
16992 map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
16993 ops = map_ptr->ops;
16994 if (insn->imm == BPF_FUNC_map_lookup_elem &&
16995 ops->map_gen_lookup) {
16996 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
16997 if (cnt == -EOPNOTSUPP)
16998 goto patch_map_ops_generic;
16999 if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
17000 verbose(env, "bpf verifier is misconfigured\n");
17001 return -EINVAL;
17002 }
17003
17004 new_prog = bpf_patch_insn_data(env, i + delta,
17005 insn_buf, cnt);
17006 if (!new_prog)
17007 return -ENOMEM;
17008
17009 delta += cnt - 1;
17010 env->prog = prog = new_prog;
17011 insn = new_prog->insnsi + i + delta;
17012 continue;
17013 }
17014
17015 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
17016 (void *(*)(struct bpf_map *map, void *key))NULL));
17017 BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
17018 (int (*)(struct bpf_map *map, void *key))NULL));
17019 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
17020 (int (*)(struct bpf_map *map, void *key, void *value,
17021 u64 flags))NULL));
17022 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
17023 (int (*)(struct bpf_map *map, void *value,
17024 u64 flags))NULL));
17025 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
17026 (int (*)(struct bpf_map *map, void *value))NULL));
17027 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
17028 (int (*)(struct bpf_map *map, void *value))NULL));
17029 BUILD_BUG_ON(!__same_type(ops->map_redirect,
17030 (int (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
17031 BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
17032 (int (*)(struct bpf_map *map,
17033 bpf_callback_t callback_fn,
17034 void *callback_ctx,
17035 u64 flags))NULL));
17036 BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
17037 (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
17038
17039patch_map_ops_generic:
17040 switch (insn->imm) {
17041 case BPF_FUNC_map_lookup_elem:
17042 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
17043 continue;
17044 case BPF_FUNC_map_update_elem:
17045 insn->imm = BPF_CALL_IMM(ops->map_update_elem);
17046 continue;
17047 case BPF_FUNC_map_delete_elem:
17048 insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
17049 continue;
17050 case BPF_FUNC_map_push_elem:
17051 insn->imm = BPF_CALL_IMM(ops->map_push_elem);
17052 continue;
17053 case BPF_FUNC_map_pop_elem:
17054 insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
17055 continue;
17056 case BPF_FUNC_map_peek_elem:
17057 insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
17058 continue;
17059 case BPF_FUNC_redirect_map:
17060 insn->imm = BPF_CALL_IMM(ops->map_redirect);
17061 continue;
17062 case BPF_FUNC_for_each_map_elem:
17063 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
17064 continue;
17065 case BPF_FUNC_map_lookup_percpu_elem:
17066 insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
17067 continue;
17068 }
17069
17070 goto patch_call_imm;
17071 }
17072
17073 /* Implement bpf_jiffies64 inline. */
17074 if (prog->jit_requested && BITS_PER_LONG == 64 &&
17075 insn->imm == BPF_FUNC_jiffies64) {
17076 struct bpf_insn ld_jiffies_addr[2] = {
17077 BPF_LD_IMM64(BPF_REG_0,
17078 (unsigned long)&jiffies),
17079 };
17080
17081 insn_buf[0] = ld_jiffies_addr[0];
17082 insn_buf[1] = ld_jiffies_addr[1];
17083 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
17084 BPF_REG_0, 0);
17085 cnt = 3;
17086
17087 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
17088 cnt);
17089 if (!new_prog)
17090 return -ENOMEM;
17091
17092 delta += cnt - 1;
17093 env->prog = prog = new_prog;
17094 insn = new_prog->insnsi + i + delta;
17095 continue;
17096 }
17097
17098 /* Implement bpf_get_func_arg inline. */
17099 if (prog_type == BPF_PROG_TYPE_TRACING &&
17100 insn->imm == BPF_FUNC_get_func_arg) {
17101 /* Load nr_args from ctx - 8 */
17102 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
17103 insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
17104 insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
17105 insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
17106 insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
17107 insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
17108 insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
17109 insn_buf[7] = BPF_JMP_A(1);
17110 insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
17111 cnt = 9;
17112
17113 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17114 if (!new_prog)
17115 return -ENOMEM;
17116
17117 delta += cnt - 1;
17118 env->prog = prog = new_prog;
17119 insn = new_prog->insnsi + i + delta;
17120 continue;
17121 }
17122
17123 /* Implement bpf_get_func_ret inline. */
17124 if (prog_type == BPF_PROG_TYPE_TRACING &&
17125 insn->imm == BPF_FUNC_get_func_ret) {
17126 if (eatype == BPF_TRACE_FEXIT ||
17127 eatype == BPF_MODIFY_RETURN) {
17128 /* Load nr_args from ctx - 8 */
17129 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
17130 insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
17131 insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
17132 insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
17133 insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
17134 insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
17135 cnt = 6;
17136 } else {
17137 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
17138 cnt = 1;
17139 }
17140
17141 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17142 if (!new_prog)
17143 return -ENOMEM;
17144
17145 delta += cnt - 1;
17146 env->prog = prog = new_prog;
17147 insn = new_prog->insnsi + i + delta;
17148 continue;
17149 }
17150
17151 /* Implement get_func_arg_cnt inline. */
17152 if (prog_type == BPF_PROG_TYPE_TRACING &&
17153 insn->imm == BPF_FUNC_get_func_arg_cnt) {
17154 /* Load nr_args from ctx - 8 */
17155 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
17156
17157 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
17158 if (!new_prog)
17159 return -ENOMEM;
17160
17161 env->prog = prog = new_prog;
17162 insn = new_prog->insnsi + i + delta;
17163 continue;
17164 }
17165
17166 /* Implement bpf_get_func_ip inline. */
17167 if (prog_type == BPF_PROG_TYPE_TRACING &&
17168 insn->imm == BPF_FUNC_get_func_ip) {
17169 /* Load IP address from ctx - 16 */
17170 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
17171
17172 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
17173 if (!new_prog)
17174 return -ENOMEM;
17175
17176 env->prog = prog = new_prog;
17177 insn = new_prog->insnsi + i + delta;
17178 continue;
17179 }
17180
17181patch_call_imm:
17182 fn = env->ops->get_func_proto(insn->imm, env->prog);
17183 /* all functions that have prototype and verifier allowed
17184 * programs to call them, must be real in-kernel functions
17185 */
17186 if (!fn->func) {
17187 verbose(env,
17188 "kernel subsystem misconfigured func %s#%d\n",
17189 func_id_name(insn->imm), insn->imm);
17190 return -EFAULT;
17191 }
17192 insn->imm = fn->func - __bpf_call_base;
17193 }
17194
17195 /* Since poke tab is now finalized, publish aux to tracker. */
17196 for (i = 0; i < prog->aux->size_poke_tab; i++) {
17197 map_ptr = prog->aux->poke_tab[i].tail_call.map;
17198 if (!map_ptr->ops->map_poke_track ||
17199 !map_ptr->ops->map_poke_untrack ||
17200 !map_ptr->ops->map_poke_run) {
17201 verbose(env, "bpf verifier is misconfigured\n");
17202 return -EINVAL;
17203 }
17204
17205 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
17206 if (ret < 0) {
17207 verbose(env, "tracking tail call prog failed\n");
17208 return ret;
17209 }
17210 }
17211
17212 sort_kfunc_descs_by_imm(env->prog);
17213
17214 return 0;
17215}
17216
17217static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
17218 int position,
17219 s32 stack_base,
17220 u32 callback_subprogno,
17221 u32 *cnt)
17222{
17223 s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
17224 s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
17225 s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
17226 int reg_loop_max = BPF_REG_6;
17227 int reg_loop_cnt = BPF_REG_7;
17228 int reg_loop_ctx = BPF_REG_8;
17229
17230 struct bpf_prog *new_prog;
17231 u32 callback_start;
17232 u32 call_insn_offset;
17233 s32 callback_offset;
17234
17235 /* This represents an inlined version of bpf_iter.c:bpf_loop,
17236 * be careful to modify this code in sync.
17237 */
17238 struct bpf_insn insn_buf[] = {
17239 /* Return error and jump to the end of the patch if
17240 * expected number of iterations is too big.
17241 */
17242 BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
17243 BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
17244 BPF_JMP_IMM(BPF_JA, 0, 0, 16),
17245 /* spill R6, R7, R8 to use these as loop vars */
17246 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
17247 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
17248 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
17249 /* initialize loop vars */
17250 BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
17251 BPF_MOV32_IMM(reg_loop_cnt, 0),
17252 BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
17253 /* loop header,
17254 * if reg_loop_cnt >= reg_loop_max skip the loop body
17255 */
17256 BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
17257 /* callback call,
17258 * correct callback offset would be set after patching
17259 */
17260 BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
17261 BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
17262 BPF_CALL_REL(0),
17263 /* increment loop counter */
17264 BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
17265 /* jump to loop header if callback returned 0 */
17266 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
17267 /* return value of bpf_loop,
17268 * set R0 to the number of iterations
17269 */
17270 BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
17271 /* restore original values of R6, R7, R8 */
17272 BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
17273 BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
17274 BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
17275 };
17276
17277 *cnt = ARRAY_SIZE(insn_buf);
17278 new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
17279 if (!new_prog)
17280 return new_prog;
17281
17282 /* callback start is known only after patching */
17283 callback_start = env->subprog_info[callback_subprogno].start;
17284 /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
17285 call_insn_offset = position + 12;
17286 callback_offset = callback_start - call_insn_offset - 1;
17287 new_prog->insnsi[call_insn_offset].imm = callback_offset;
17288
17289 return new_prog;
17290}
17291
17292static bool is_bpf_loop_call(struct bpf_insn *insn)
17293{
17294 return insn->code == (BPF_JMP | BPF_CALL) &&
17295 insn->src_reg == 0 &&
17296 insn->imm == BPF_FUNC_loop;
17297}
17298
17299/* For all sub-programs in the program (including main) check
17300 * insn_aux_data to see if there are bpf_loop calls that require
17301 * inlining. If such calls are found the calls are replaced with a
17302 * sequence of instructions produced by `inline_bpf_loop` function and
17303 * subprog stack_depth is increased by the size of 3 registers.
17304 * This stack space is used to spill values of the R6, R7, R8. These
17305 * registers are used to store the loop bound, counter and context
17306 * variables.
17307 */
17308static int optimize_bpf_loop(struct bpf_verifier_env *env)
17309{
17310 struct bpf_subprog_info *subprogs = env->subprog_info;
17311 int i, cur_subprog = 0, cnt, delta = 0;
17312 struct bpf_insn *insn = env->prog->insnsi;
17313 int insn_cnt = env->prog->len;
17314 u16 stack_depth = subprogs[cur_subprog].stack_depth;
17315 u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
17316 u16 stack_depth_extra = 0;
17317
17318 for (i = 0; i < insn_cnt; i++, insn++) {
17319 struct bpf_loop_inline_state *inline_state =
17320 &env->insn_aux_data[i + delta].loop_inline_state;
17321
17322 if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
17323 struct bpf_prog *new_prog;
17324
17325 stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
17326 new_prog = inline_bpf_loop(env,
17327 i + delta,
17328 -(stack_depth + stack_depth_extra),
17329 inline_state->callback_subprogno,
17330 &cnt);
17331 if (!new_prog)
17332 return -ENOMEM;
17333
17334 delta += cnt - 1;
17335 env->prog = new_prog;
17336 insn = new_prog->insnsi + i + delta;
17337 }
17338
17339 if (subprogs[cur_subprog + 1].start == i + delta + 1) {
17340 subprogs[cur_subprog].stack_depth += stack_depth_extra;
17341 cur_subprog++;
17342 stack_depth = subprogs[cur_subprog].stack_depth;
17343 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
17344 stack_depth_extra = 0;
17345 }
17346 }
17347
17348 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
17349
17350 return 0;
17351}
17352
17353static void free_states(struct bpf_verifier_env *env)
17354{
17355 struct bpf_verifier_state_list *sl, *sln;
17356 int i;
17357
17358 sl = env->free_list;
17359 while (sl) {
17360 sln = sl->next;
17361 free_verifier_state(&sl->state, false);
17362 kfree(sl);
17363 sl = sln;
17364 }
17365 env->free_list = NULL;
17366
17367 if (!env->explored_states)
17368 return;
17369
17370 for (i = 0; i < state_htab_size(env); i++) {
17371 sl = env->explored_states[i];
17372
17373 while (sl) {
17374 sln = sl->next;
17375 free_verifier_state(&sl->state, false);
17376 kfree(sl);
17377 sl = sln;
17378 }
17379 env->explored_states[i] = NULL;
17380 }
17381}
17382
17383static int do_check_common(struct bpf_verifier_env *env, int subprog)
17384{
17385 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
17386 struct bpf_verifier_state *state;
17387 struct bpf_reg_state *regs;
17388 int ret, i;
17389
17390 env->prev_linfo = NULL;
17391 env->pass_cnt++;
17392
17393 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
17394 if (!state)
17395 return -ENOMEM;
17396 state->curframe = 0;
17397 state->speculative = false;
17398 state->branches = 1;
17399 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
17400 if (!state->frame[0]) {
17401 kfree(state);
17402 return -ENOMEM;
17403 }
17404 env->cur_state = state;
17405 init_func_state(env, state->frame[0],
17406 BPF_MAIN_FUNC /* callsite */,
17407 0 /* frameno */,
17408 subprog);
17409 state->first_insn_idx = env->subprog_info[subprog].start;
17410 state->last_insn_idx = -1;
17411
17412 regs = state->frame[state->curframe]->regs;
17413 if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
17414 ret = btf_prepare_func_args(env, subprog, regs);
17415 if (ret)
17416 goto out;
17417 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
17418 if (regs[i].type == PTR_TO_CTX)
17419 mark_reg_known_zero(env, regs, i);
17420 else if (regs[i].type == SCALAR_VALUE)
17421 mark_reg_unknown(env, regs, i);
17422 else if (base_type(regs[i].type) == PTR_TO_MEM) {
17423 const u32 mem_size = regs[i].mem_size;
17424
17425 mark_reg_known_zero(env, regs, i);
17426 regs[i].mem_size = mem_size;
17427 regs[i].id = ++env->id_gen;
17428 }
17429 }
17430 } else {
17431 /* 1st arg to a function */
17432 regs[BPF_REG_1].type = PTR_TO_CTX;
17433 mark_reg_known_zero(env, regs, BPF_REG_1);
17434 ret = btf_check_subprog_arg_match(env, subprog, regs);
17435 if (ret == -EFAULT)
17436 /* unlikely verifier bug. abort.
17437 * ret == 0 and ret < 0 are sadly acceptable for
17438 * main() function due to backward compatibility.
17439 * Like socket filter program may be written as:
17440 * int bpf_prog(struct pt_regs *ctx)
17441 * and never dereference that ctx in the program.
17442 * 'struct pt_regs' is a type mismatch for socket
17443 * filter that should be using 'struct __sk_buff'.
17444 */
17445 goto out;
17446 }
17447
17448 ret = do_check(env);
17449out:
17450 /* check for NULL is necessary, since cur_state can be freed inside
17451 * do_check() under memory pressure.
17452 */
17453 if (env->cur_state) {
17454 free_verifier_state(env->cur_state, true);
17455 env->cur_state = NULL;
17456 }
17457 while (!pop_stack(env, NULL, NULL, false));
17458 if (!ret && pop_log)
17459 bpf_vlog_reset(&env->log, 0);
17460 free_states(env);
17461 return ret;
17462}
17463
17464/* Verify all global functions in a BPF program one by one based on their BTF.
17465 * All global functions must pass verification. Otherwise the whole program is rejected.
17466 * Consider:
17467 * int bar(int);
17468 * int foo(int f)
17469 * {
17470 * return bar(f);
17471 * }
17472 * int bar(int b)
17473 * {
17474 * ...
17475 * }
17476 * foo() will be verified first for R1=any_scalar_value. During verification it
17477 * will be assumed that bar() already verified successfully and call to bar()
17478 * from foo() will be checked for type match only. Later bar() will be verified
17479 * independently to check that it's safe for R1=any_scalar_value.
17480 */
17481static int do_check_subprogs(struct bpf_verifier_env *env)
17482{
17483 struct bpf_prog_aux *aux = env->prog->aux;
17484 int i, ret;
17485
17486 if (!aux->func_info)
17487 return 0;
17488
17489 for (i = 1; i < env->subprog_cnt; i++) {
17490 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
17491 continue;
17492 env->insn_idx = env->subprog_info[i].start;
17493 WARN_ON_ONCE(env->insn_idx == 0);
17494 ret = do_check_common(env, i);
17495 if (ret) {
17496 return ret;
17497 } else if (env->log.level & BPF_LOG_LEVEL) {
17498 verbose(env,
17499 "Func#%d is safe for any args that match its prototype\n",
17500 i);
17501 }
17502 }
17503 return 0;
17504}
17505
17506static int do_check_main(struct bpf_verifier_env *env)
17507{
17508 int ret;
17509
17510 env->insn_idx = 0;
17511 ret = do_check_common(env, 0);
17512 if (!ret)
17513 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
17514 return ret;
17515}
17516
17517
17518static void print_verification_stats(struct bpf_verifier_env *env)
17519{
17520 int i;
17521
17522 if (env->log.level & BPF_LOG_STATS) {
17523 verbose(env, "verification time %lld usec\n",
17524 div_u64(env->verification_time, 1000));
17525 verbose(env, "stack depth ");
17526 for (i = 0; i < env->subprog_cnt; i++) {
17527 u32 depth = env->subprog_info[i].stack_depth;
17528
17529 verbose(env, "%d", depth);
17530 if (i + 1 < env->subprog_cnt)
17531 verbose(env, "+");
17532 }
17533 verbose(env, "\n");
17534 }
17535 verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
17536 "total_states %d peak_states %d mark_read %d\n",
17537 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
17538 env->max_states_per_insn, env->total_states,
17539 env->peak_states, env->longest_mark_read_walk);
17540}
17541
17542static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
17543{
17544 const struct btf_type *t, *func_proto;
17545 const struct bpf_struct_ops *st_ops;
17546 const struct btf_member *member;
17547 struct bpf_prog *prog = env->prog;
17548 u32 btf_id, member_idx;
17549 const char *mname;
17550
17551 if (!prog->gpl_compatible) {
17552 verbose(env, "struct ops programs must have a GPL compatible license\n");
17553 return -EINVAL;
17554 }
17555
17556 btf_id = prog->aux->attach_btf_id;
17557 st_ops = bpf_struct_ops_find(btf_id);
17558 if (!st_ops) {
17559 verbose(env, "attach_btf_id %u is not a supported struct\n",
17560 btf_id);
17561 return -ENOTSUPP;
17562 }
17563
17564 t = st_ops->type;
17565 member_idx = prog->expected_attach_type;
17566 if (member_idx >= btf_type_vlen(t)) {
17567 verbose(env, "attach to invalid member idx %u of struct %s\n",
17568 member_idx, st_ops->name);
17569 return -EINVAL;
17570 }
17571
17572 member = &btf_type_member(t)[member_idx];
17573 mname = btf_name_by_offset(btf_vmlinux, member->name_off);
17574 func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
17575 NULL);
17576 if (!func_proto) {
17577 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
17578 mname, member_idx, st_ops->name);
17579 return -EINVAL;
17580 }
17581
17582 if (st_ops->check_member) {
17583 int err = st_ops->check_member(t, member, prog);
17584
17585 if (err) {
17586 verbose(env, "attach to unsupported member %s of struct %s\n",
17587 mname, st_ops->name);
17588 return err;
17589 }
17590 }
17591
17592 prog->aux->attach_func_proto = func_proto;
17593 prog->aux->attach_func_name = mname;
17594 env->ops = st_ops->verifier_ops;
17595
17596 return 0;
17597}
17598#define SECURITY_PREFIX "security_"
17599
17600static int check_attach_modify_return(unsigned long addr, const char *func_name)
17601{
17602 if (within_error_injection_list(addr) ||
17603 !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
17604 return 0;
17605
17606 return -EINVAL;
17607}
17608
17609/* list of non-sleepable functions that are otherwise on
17610 * ALLOW_ERROR_INJECTION list
17611 */
17612BTF_SET_START(btf_non_sleepable_error_inject)
17613/* Three functions below can be called from sleepable and non-sleepable context.
17614 * Assume non-sleepable from bpf safety point of view.
17615 */
17616BTF_ID(func, __filemap_add_folio)
17617BTF_ID(func, should_fail_alloc_page)
17618BTF_ID(func, should_failslab)
17619BTF_SET_END(btf_non_sleepable_error_inject)
17620
17621static int check_non_sleepable_error_inject(u32 btf_id)
17622{
17623 return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
17624}
17625
17626int bpf_check_attach_target(struct bpf_verifier_log *log,
17627 const struct bpf_prog *prog,
17628 const struct bpf_prog *tgt_prog,
17629 u32 btf_id,
17630 struct bpf_attach_target_info *tgt_info)
17631{
17632 bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
17633 const char prefix[] = "btf_trace_";
17634 int ret = 0, subprog = -1, i;
17635 const struct btf_type *t;
17636 bool conservative = true;
17637 const char *tname;
17638 struct btf *btf;
17639 long addr = 0;
17640
17641 if (!btf_id) {
17642 bpf_log(log, "Tracing programs must provide btf_id\n");
17643 return -EINVAL;
17644 }
17645 btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
17646 if (!btf) {
17647 bpf_log(log,
17648 "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
17649 return -EINVAL;
17650 }
17651 t = btf_type_by_id(btf, btf_id);
17652 if (!t) {
17653 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
17654 return -EINVAL;
17655 }
17656 tname = btf_name_by_offset(btf, t->name_off);
17657 if (!tname) {
17658 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
17659 return -EINVAL;
17660 }
17661 if (tgt_prog) {
17662 struct bpf_prog_aux *aux = tgt_prog->aux;
17663
17664 if (bpf_prog_is_dev_bound(prog->aux) &&
17665 !bpf_prog_dev_bound_match(prog, tgt_prog)) {
17666 bpf_log(log, "Target program bound device mismatch");
17667 return -EINVAL;
17668 }
17669
17670 for (i = 0; i < aux->func_info_cnt; i++)
17671 if (aux->func_info[i].type_id == btf_id) {
17672 subprog = i;
17673 break;
17674 }
17675 if (subprog == -1) {
17676 bpf_log(log, "Subprog %s doesn't exist\n", tname);
17677 return -EINVAL;
17678 }
17679 conservative = aux->func_info_aux[subprog].unreliable;
17680 if (prog_extension) {
17681 if (conservative) {
17682 bpf_log(log,
17683 "Cannot replace static functions\n");
17684 return -EINVAL;
17685 }
17686 if (!prog->jit_requested) {
17687 bpf_log(log,
17688 "Extension programs should be JITed\n");
17689 return -EINVAL;
17690 }
17691 }
17692 if (!tgt_prog->jited) {
17693 bpf_log(log, "Can attach to only JITed progs\n");
17694 return -EINVAL;
17695 }
17696 if (tgt_prog->type == prog->type) {
17697 /* Cannot fentry/fexit another fentry/fexit program.
17698 * Cannot attach program extension to another extension.
17699 * It's ok to attach fentry/fexit to extension program.
17700 */
17701 bpf_log(log, "Cannot recursively attach\n");
17702 return -EINVAL;
17703 }
17704 if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
17705 prog_extension &&
17706 (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
17707 tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
17708 /* Program extensions can extend all program types
17709 * except fentry/fexit. The reason is the following.
17710 * The fentry/fexit programs are used for performance
17711 * analysis, stats and can be attached to any program
17712 * type except themselves. When extension program is
17713 * replacing XDP function it is necessary to allow
17714 * performance analysis of all functions. Both original
17715 * XDP program and its program extension. Hence
17716 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
17717 * allowed. If extending of fentry/fexit was allowed it
17718 * would be possible to create long call chain
17719 * fentry->extension->fentry->extension beyond
17720 * reasonable stack size. Hence extending fentry is not
17721 * allowed.
17722 */
17723 bpf_log(log, "Cannot extend fentry/fexit\n");
17724 return -EINVAL;
17725 }
17726 } else {
17727 if (prog_extension) {
17728 bpf_log(log, "Cannot replace kernel functions\n");
17729 return -EINVAL;
17730 }
17731 }
17732
17733 switch (prog->expected_attach_type) {
17734 case BPF_TRACE_RAW_TP:
17735 if (tgt_prog) {
17736 bpf_log(log,
17737 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
17738 return -EINVAL;
17739 }
17740 if (!btf_type_is_typedef(t)) {
17741 bpf_log(log, "attach_btf_id %u is not a typedef\n",
17742 btf_id);
17743 return -EINVAL;
17744 }
17745 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
17746 bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
17747 btf_id, tname);
17748 return -EINVAL;
17749 }
17750 tname += sizeof(prefix) - 1;
17751 t = btf_type_by_id(btf, t->type);
17752 if (!btf_type_is_ptr(t))
17753 /* should never happen in valid vmlinux build */
17754 return -EINVAL;
17755 t = btf_type_by_id(btf, t->type);
17756 if (!btf_type_is_func_proto(t))
17757 /* should never happen in valid vmlinux build */
17758 return -EINVAL;
17759
17760 break;
17761 case BPF_TRACE_ITER:
17762 if (!btf_type_is_func(t)) {
17763 bpf_log(log, "attach_btf_id %u is not a function\n",
17764 btf_id);
17765 return -EINVAL;
17766 }
17767 t = btf_type_by_id(btf, t->type);
17768 if (!btf_type_is_func_proto(t))
17769 return -EINVAL;
17770 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
17771 if (ret)
17772 return ret;
17773 break;
17774 default:
17775 if (!prog_extension)
17776 return -EINVAL;
17777 fallthrough;
17778 case BPF_MODIFY_RETURN:
17779 case BPF_LSM_MAC:
17780 case BPF_LSM_CGROUP:
17781 case BPF_TRACE_FENTRY:
17782 case BPF_TRACE_FEXIT:
17783 if (!btf_type_is_func(t)) {
17784 bpf_log(log, "attach_btf_id %u is not a function\n",
17785 btf_id);
17786 return -EINVAL;
17787 }
17788 if (prog_extension &&
17789 btf_check_type_match(log, prog, btf, t))
17790 return -EINVAL;
17791 t = btf_type_by_id(btf, t->type);
17792 if (!btf_type_is_func_proto(t))
17793 return -EINVAL;
17794
17795 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
17796 (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
17797 prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
17798 return -EINVAL;
17799
17800 if (tgt_prog && conservative)
17801 t = NULL;
17802
17803 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
17804 if (ret < 0)
17805 return ret;
17806
17807 if (tgt_prog) {
17808 if (subprog == 0)
17809 addr = (long) tgt_prog->bpf_func;
17810 else
17811 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
17812 } else {
17813 addr = kallsyms_lookup_name(tname);
17814 if (!addr) {
17815 bpf_log(log,
17816 "The address of function %s cannot be found\n",
17817 tname);
17818 return -ENOENT;
17819 }
17820 }
17821
17822 if (prog->aux->sleepable) {
17823 ret = -EINVAL;
17824 switch (prog->type) {
17825 case BPF_PROG_TYPE_TRACING:
17826
17827 /* fentry/fexit/fmod_ret progs can be sleepable if they are
17828 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
17829 */
17830 if (!check_non_sleepable_error_inject(btf_id) &&
17831 within_error_injection_list(addr))
17832 ret = 0;
17833 /* fentry/fexit/fmod_ret progs can also be sleepable if they are
17834 * in the fmodret id set with the KF_SLEEPABLE flag.
17835 */
17836 else {
17837 u32 *flags = btf_kfunc_is_modify_return(btf, btf_id);
17838
17839 if (flags && (*flags & KF_SLEEPABLE))
17840 ret = 0;
17841 }
17842 break;
17843 case BPF_PROG_TYPE_LSM:
17844 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
17845 * Only some of them are sleepable.
17846 */
17847 if (bpf_lsm_is_sleepable_hook(btf_id))
17848 ret = 0;
17849 break;
17850 default:
17851 break;
17852 }
17853 if (ret) {
17854 bpf_log(log, "%s is not sleepable\n", tname);
17855 return ret;
17856 }
17857 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
17858 if (tgt_prog) {
17859 bpf_log(log, "can't modify return codes of BPF programs\n");
17860 return -EINVAL;
17861 }
17862 ret = -EINVAL;
17863 if (btf_kfunc_is_modify_return(btf, btf_id) ||
17864 !check_attach_modify_return(addr, tname))
17865 ret = 0;
17866 if (ret) {
17867 bpf_log(log, "%s() is not modifiable\n", tname);
17868 return ret;
17869 }
17870 }
17871
17872 break;
17873 }
17874 tgt_info->tgt_addr = addr;
17875 tgt_info->tgt_name = tname;
17876 tgt_info->tgt_type = t;
17877 return 0;
17878}
17879
17880BTF_SET_START(btf_id_deny)
17881BTF_ID_UNUSED
17882#ifdef CONFIG_SMP
17883BTF_ID(func, migrate_disable)
17884BTF_ID(func, migrate_enable)
17885#endif
17886#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
17887BTF_ID(func, rcu_read_unlock_strict)
17888#endif
17889BTF_SET_END(btf_id_deny)
17890
17891static bool can_be_sleepable(struct bpf_prog *prog)
17892{
17893 if (prog->type == BPF_PROG_TYPE_TRACING) {
17894 switch (prog->expected_attach_type) {
17895 case BPF_TRACE_FENTRY:
17896 case BPF_TRACE_FEXIT:
17897 case BPF_MODIFY_RETURN:
17898 case BPF_TRACE_ITER:
17899 return true;
17900 default:
17901 return false;
17902 }
17903 }
17904 return prog->type == BPF_PROG_TYPE_LSM ||
17905 prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
17906 prog->type == BPF_PROG_TYPE_STRUCT_OPS;
17907}
17908
17909static int check_attach_btf_id(struct bpf_verifier_env *env)
17910{
17911 struct bpf_prog *prog = env->prog;
17912 struct bpf_prog *tgt_prog = prog->aux->dst_prog;
17913 struct bpf_attach_target_info tgt_info = {};
17914 u32 btf_id = prog->aux->attach_btf_id;
17915 struct bpf_trampoline *tr;
17916 int ret;
17917 u64 key;
17918
17919 if (prog->type == BPF_PROG_TYPE_SYSCALL) {
17920 if (prog->aux->sleepable)
17921 /* attach_btf_id checked to be zero already */
17922 return 0;
17923 verbose(env, "Syscall programs can only be sleepable\n");
17924 return -EINVAL;
17925 }
17926
17927 if (prog->aux->sleepable && !can_be_sleepable(prog)) {
17928 verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
17929 return -EINVAL;
17930 }
17931
17932 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
17933 return check_struct_ops_btf_id(env);
17934
17935 if (prog->type != BPF_PROG_TYPE_TRACING &&
17936 prog->type != BPF_PROG_TYPE_LSM &&
17937 prog->type != BPF_PROG_TYPE_EXT)
17938 return 0;
17939
17940 ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
17941 if (ret)
17942 return ret;
17943
17944 if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
17945 /* to make freplace equivalent to their targets, they need to
17946 * inherit env->ops and expected_attach_type for the rest of the
17947 * verification
17948 */
17949 env->ops = bpf_verifier_ops[tgt_prog->type];
17950 prog->expected_attach_type = tgt_prog->expected_attach_type;
17951 }
17952
17953 /* store info about the attachment target that will be used later */
17954 prog->aux->attach_func_proto = tgt_info.tgt_type;
17955 prog->aux->attach_func_name = tgt_info.tgt_name;
17956
17957 if (tgt_prog) {
17958 prog->aux->saved_dst_prog_type = tgt_prog->type;
17959 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
17960 }
17961
17962 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
17963 prog->aux->attach_btf_trace = true;
17964 return 0;
17965 } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
17966 if (!bpf_iter_prog_supported(prog))
17967 return -EINVAL;
17968 return 0;
17969 }
17970
17971 if (prog->type == BPF_PROG_TYPE_LSM) {
17972 ret = bpf_lsm_verify_prog(&env->log, prog);
17973 if (ret < 0)
17974 return ret;
17975 } else if (prog->type == BPF_PROG_TYPE_TRACING &&
17976 btf_id_set_contains(&btf_id_deny, btf_id)) {
17977 return -EINVAL;
17978 }
17979
17980 key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
17981 tr = bpf_trampoline_get(key, &tgt_info);
17982 if (!tr)
17983 return -ENOMEM;
17984
17985 prog->aux->dst_trampoline = tr;
17986 return 0;
17987}
17988
17989struct btf *bpf_get_btf_vmlinux(void)
17990{
17991 if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
17992 mutex_lock(&bpf_verifier_lock);
17993 if (!btf_vmlinux)
17994 btf_vmlinux = btf_parse_vmlinux();
17995 mutex_unlock(&bpf_verifier_lock);
17996 }
17997 return btf_vmlinux;
17998}
17999
18000int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
18001{
18002 u64 start_time = ktime_get_ns();
18003 struct bpf_verifier_env *env;
18004 struct bpf_verifier_log *log;
18005 int i, len, ret = -EINVAL;
18006 bool is_priv;
18007
18008 /* no program is valid */
18009 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
18010 return -EINVAL;
18011
18012 /* 'struct bpf_verifier_env' can be global, but since it's not small,
18013 * allocate/free it every time bpf_check() is called
18014 */
18015 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
18016 if (!env)
18017 return -ENOMEM;
18018 log = &env->log;
18019
18020 len = (*prog)->len;
18021 env->insn_aux_data =
18022 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
18023 ret = -ENOMEM;
18024 if (!env->insn_aux_data)
18025 goto err_free_env;
18026 for (i = 0; i < len; i++)
18027 env->insn_aux_data[i].orig_idx = i;
18028 env->prog = *prog;
18029 env->ops = bpf_verifier_ops[env->prog->type];
18030 env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
18031 is_priv = bpf_capable();
18032
18033 bpf_get_btf_vmlinux();
18034
18035 /* grab the mutex to protect few globals used by verifier */
18036 if (!is_priv)
18037 mutex_lock(&bpf_verifier_lock);
18038
18039 if (attr->log_level || attr->log_buf || attr->log_size) {
18040 /* user requested verbose verifier output
18041 * and supplied buffer to store the verification trace
18042 */
18043 log->level = attr->log_level;
18044 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
18045 log->len_total = attr->log_size;
18046
18047 /* log attributes have to be sane */
18048 if (!bpf_verifier_log_attr_valid(log)) {
18049 ret = -EINVAL;
18050 goto err_unlock;
18051 }
18052 }
18053
18054 mark_verifier_state_clean(env);
18055
18056 if (IS_ERR(btf_vmlinux)) {
18057 /* Either gcc or pahole or kernel are broken. */
18058 verbose(env, "in-kernel BTF is malformed\n");
18059 ret = PTR_ERR(btf_vmlinux);
18060 goto skip_full_check;
18061 }
18062
18063 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
18064 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
18065 env->strict_alignment = true;
18066 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
18067 env->strict_alignment = false;
18068
18069 env->allow_ptr_leaks = bpf_allow_ptr_leaks();
18070 env->allow_uninit_stack = bpf_allow_uninit_stack();
18071 env->bypass_spec_v1 = bpf_bypass_spec_v1();
18072 env->bypass_spec_v4 = bpf_bypass_spec_v4();
18073 env->bpf_capable = bpf_capable();
18074
18075 if (is_priv)
18076 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
18077
18078 env->explored_states = kvcalloc(state_htab_size(env),
18079 sizeof(struct bpf_verifier_state_list *),
18080 GFP_USER);
18081 ret = -ENOMEM;
18082 if (!env->explored_states)
18083 goto skip_full_check;
18084
18085 ret = add_subprog_and_kfunc(env);
18086 if (ret < 0)
18087 goto skip_full_check;
18088
18089 ret = check_subprogs(env);
18090 if (ret < 0)
18091 goto skip_full_check;
18092
18093 ret = check_btf_info(env, attr, uattr);
18094 if (ret < 0)
18095 goto skip_full_check;
18096
18097 ret = check_attach_btf_id(env);
18098 if (ret)
18099 goto skip_full_check;
18100
18101 ret = resolve_pseudo_ldimm64(env);
18102 if (ret < 0)
18103 goto skip_full_check;
18104
18105 if (bpf_prog_is_offloaded(env->prog->aux)) {
18106 ret = bpf_prog_offload_verifier_prep(env->prog);
18107 if (ret)
18108 goto skip_full_check;
18109 }
18110
18111 ret = check_cfg(env);
18112 if (ret < 0)
18113 goto skip_full_check;
18114
18115 ret = do_check_subprogs(env);
18116 ret = ret ?: do_check_main(env);
18117
18118 if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
18119 ret = bpf_prog_offload_finalize(env);
18120
18121skip_full_check:
18122 kvfree(env->explored_states);
18123
18124 if (ret == 0)
18125 ret = check_max_stack_depth(env);
18126
18127 /* instruction rewrites happen after this point */
18128 if (ret == 0)
18129 ret = optimize_bpf_loop(env);
18130
18131 if (is_priv) {
18132 if (ret == 0)
18133 opt_hard_wire_dead_code_branches(env);
18134 if (ret == 0)
18135 ret = opt_remove_dead_code(env);
18136 if (ret == 0)
18137 ret = opt_remove_nops(env);
18138 } else {
18139 if (ret == 0)
18140 sanitize_dead_code(env);
18141 }
18142
18143 if (ret == 0)
18144 /* program is valid, convert *(u32*)(ctx + off) accesses */
18145 ret = convert_ctx_accesses(env);
18146
18147 if (ret == 0)
18148 ret = do_misc_fixups(env);
18149
18150 /* do 32-bit optimization after insn patching has done so those patched
18151 * insns could be handled correctly.
18152 */
18153 if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
18154 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
18155 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
18156 : false;
18157 }
18158
18159 if (ret == 0)
18160 ret = fixup_call_args(env);
18161
18162 env->verification_time = ktime_get_ns() - start_time;
18163 print_verification_stats(env);
18164 env->prog->aux->verified_insns = env->insn_processed;
18165
18166 if (log->level && bpf_verifier_log_full(log))
18167 ret = -ENOSPC;
18168 if (log->level && !log->ubuf) {
18169 ret = -EFAULT;
18170 goto err_release_maps;
18171 }
18172
18173 if (ret)
18174 goto err_release_maps;
18175
18176 if (env->used_map_cnt) {
18177 /* if program passed verifier, update used_maps in bpf_prog_info */
18178 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
18179 sizeof(env->used_maps[0]),
18180 GFP_KERNEL);
18181
18182 if (!env->prog->aux->used_maps) {
18183 ret = -ENOMEM;
18184 goto err_release_maps;
18185 }
18186
18187 memcpy(env->prog->aux->used_maps, env->used_maps,
18188 sizeof(env->used_maps[0]) * env->used_map_cnt);
18189 env->prog->aux->used_map_cnt = env->used_map_cnt;
18190 }
18191 if (env->used_btf_cnt) {
18192 /* if program passed verifier, update used_btfs in bpf_prog_aux */
18193 env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
18194 sizeof(env->used_btfs[0]),
18195 GFP_KERNEL);
18196 if (!env->prog->aux->used_btfs) {
18197 ret = -ENOMEM;
18198 goto err_release_maps;
18199 }
18200
18201 memcpy(env->prog->aux->used_btfs, env->used_btfs,
18202 sizeof(env->used_btfs[0]) * env->used_btf_cnt);
18203 env->prog->aux->used_btf_cnt = env->used_btf_cnt;
18204 }
18205 if (env->used_map_cnt || env->used_btf_cnt) {
18206 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
18207 * bpf_ld_imm64 instructions
18208 */
18209 convert_pseudo_ld_imm64(env);
18210 }
18211
18212 adjust_btf_func(env);
18213
18214err_release_maps:
18215 if (!env->prog->aux->used_maps)
18216 /* if we didn't copy map pointers into bpf_prog_info, release
18217 * them now. Otherwise free_used_maps() will release them.
18218 */
18219 release_maps(env);
18220 if (!env->prog->aux->used_btfs)
18221 release_btfs(env);
18222
18223 /* extension progs temporarily inherit the attach_type of their targets
18224 for verification purposes, so set it back to zero before returning
18225 */
18226 if (env->prog->type == BPF_PROG_TYPE_EXT)
18227 env->prog->expected_attach_type = 0;
18228
18229 *prog = env->prog;
18230err_unlock:
18231 if (!is_priv)
18232 mutex_unlock(&bpf_verifier_lock);
18233 vfree(env->insn_aux_data);
18234err_free_env:
18235 kfree(env);
18236 return ret;
18237}