selftests/bpf: enhance align selftest's expected log matching
[linux-block.git] / kernel / bpf / verifier.c
CommitLineData
5b497af4 1// SPDX-License-Identifier: GPL-2.0-only
51580e79 2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
969bf05e 3 * Copyright (c) 2016 Facebook
fd978bf7 4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
51580e79 5 */
838e9690 6#include <uapi/linux/btf.h>
aef2feda 7#include <linux/bpf-cgroup.h>
51580e79
AS
8#include <linux/kernel.h>
9#include <linux/types.h>
10#include <linux/slab.h>
11#include <linux/bpf.h>
838e9690 12#include <linux/btf.h>
58e2af8b 13#include <linux/bpf_verifier.h>
51580e79
AS
14#include <linux/filter.h>
15#include <net/netlink.h>
16#include <linux/file.h>
17#include <linux/vmalloc.h>
ebb676da 18#include <linux/stringify.h>
cc8b0b92
AS
19#include <linux/bsearch.h>
20#include <linux/sort.h>
c195651e 21#include <linux/perf_event.h>
d9762e84 22#include <linux/ctype.h>
6ba43b76 23#include <linux/error-injection.h>
9e4e01df 24#include <linux/bpf_lsm.h>
1e6c62a8 25#include <linux/btf_ids.h>
47e34cb7 26#include <linux/poison.h>
51580e79 27
f4ac7e0b
JK
28#include "disasm.h"
29
00176a34 30static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
91cc1a99 31#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
00176a34
JK
32 [_id] = & _name ## _verifier_ops,
33#define BPF_MAP_TYPE(_id, _ops)
f2e10bff 34#define BPF_LINK_TYPE(_id, _name)
00176a34
JK
35#include <linux/bpf_types.h>
36#undef BPF_PROG_TYPE
37#undef BPF_MAP_TYPE
f2e10bff 38#undef BPF_LINK_TYPE
00176a34
JK
39};
40
51580e79
AS
41/* bpf_check() is a static code analyzer that walks eBPF program
42 * instruction by instruction and updates register/stack state.
43 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
44 *
45 * The first pass is depth-first-search to check that the program is a DAG.
46 * It rejects the following programs:
47 * - larger than BPF_MAXINSNS insns
48 * - if loop is present (detected via back-edge)
49 * - unreachable insns exist (shouldn't be a forest. program = one function)
50 * - out of bounds or malformed jumps
51 * The second pass is all possible path descent from the 1st insn.
8fb33b60 52 * Since it's analyzing all paths through the program, the length of the
eba38a96 53 * analysis is limited to 64k insn, which may be hit even if total number of
51580e79
AS
54 * insn is less then 4K, but there are too many branches that change stack/regs.
55 * Number of 'branches to be analyzed' is limited to 1k
56 *
57 * On entry to each instruction, each register has a type, and the instruction
58 * changes the types of the registers depending on instruction semantics.
59 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
60 * copied to R1.
61 *
62 * All registers are 64-bit.
63 * R0 - return register
64 * R1-R5 argument passing registers
65 * R6-R9 callee saved registers
66 * R10 - frame pointer read-only
67 *
68 * At the start of BPF program the register R1 contains a pointer to bpf_context
69 * and has type PTR_TO_CTX.
70 *
71 * Verifier tracks arithmetic operations on pointers in case:
72 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
73 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
74 * 1st insn copies R10 (which has FRAME_PTR) type into R1
75 * and 2nd arithmetic instruction is pattern matched to recognize
76 * that it wants to construct a pointer to some element within stack.
77 * So after 2nd insn, the register R1 has type PTR_TO_STACK
78 * (and -20 constant is saved for further stack bounds checking).
79 * Meaning that this reg is a pointer to stack plus known immediate constant.
80 *
f1174f77 81 * Most of the time the registers have SCALAR_VALUE type, which
51580e79 82 * means the register has some value, but it's not a valid pointer.
f1174f77 83 * (like pointer plus pointer becomes SCALAR_VALUE type)
51580e79
AS
84 *
85 * When verifier sees load or store instructions the type of base register
c64b7983
JS
86 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
87 * four pointer types recognized by check_mem_access() function.
51580e79
AS
88 *
89 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
90 * and the range of [ptr, ptr + map's value_size) is accessible.
91 *
92 * registers used to pass values to function calls are checked against
93 * function argument constraints.
94 *
95 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
96 * It means that the register type passed to this function must be
97 * PTR_TO_STACK and it will be used inside the function as
98 * 'pointer to map element key'
99 *
100 * For example the argument constraints for bpf_map_lookup_elem():
101 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
102 * .arg1_type = ARG_CONST_MAP_PTR,
103 * .arg2_type = ARG_PTR_TO_MAP_KEY,
104 *
105 * ret_type says that this function returns 'pointer to map elem value or null'
106 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
107 * 2nd argument should be a pointer to stack, which will be used inside
108 * the helper function as a pointer to map element key.
109 *
110 * On the kernel side the helper function looks like:
111 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
112 * {
113 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
114 * void *key = (void *) (unsigned long) r2;
115 * void *value;
116 *
117 * here kernel can access 'key' and 'map' pointers safely, knowing that
118 * [key, key + map->key_size) bytes are valid and were initialized on
119 * the stack of eBPF program.
120 * }
121 *
122 * Corresponding eBPF program may look like:
123 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
124 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
125 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
126 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
127 * here verifier looks at prototype of map_lookup_elem() and sees:
128 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
129 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
130 *
131 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
132 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
133 * and were initialized prior to this call.
134 * If it's ok, then verifier allows this BPF_CALL insn and looks at
135 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
136 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
8fb33b60 137 * returns either pointer to map value or NULL.
51580e79
AS
138 *
139 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
140 * insn, the register holding that pointer in the true branch changes state to
141 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
142 * branch. See check_cond_jmp_op().
143 *
144 * After the call R0 is set to return type of the function and registers R1-R5
145 * are set to NOT_INIT to indicate that they are no longer readable.
fd978bf7
JS
146 *
147 * The following reference types represent a potential reference to a kernel
148 * resource which, after first being allocated, must be checked and freed by
149 * the BPF program:
150 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
151 *
152 * When the verifier sees a helper call return a reference type, it allocates a
153 * pointer id for the reference and stores it in the current function state.
154 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
155 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
156 * passes through a NULL-check conditional. For the branch wherein the state is
157 * changed to CONST_IMM, the verifier releases the reference.
6acc9b43
JS
158 *
159 * For each helper function that allocates a reference, such as
160 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
161 * bpf_sk_release(). When a reference type passes into the release function,
162 * the verifier also releases the reference. If any unchecked or unreleased
163 * reference remains at the end of the program, the verifier rejects it.
51580e79
AS
164 */
165
17a52670 166/* verifier_state + insn_idx are pushed to stack when branch is encountered */
58e2af8b 167struct bpf_verifier_stack_elem {
17a52670
AS
168 /* verifer state is 'st'
169 * before processing instruction 'insn_idx'
170 * and after processing instruction 'prev_insn_idx'
171 */
58e2af8b 172 struct bpf_verifier_state st;
17a52670
AS
173 int insn_idx;
174 int prev_insn_idx;
58e2af8b 175 struct bpf_verifier_stack_elem *next;
6f8a57cc
AN
176 /* length of verifier log at the time this state was pushed on stack */
177 u32 log_pos;
cbd35700
AS
178};
179
b285fcb7 180#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
ceefbc96 181#define BPF_COMPLEXITY_LIMIT_STATES 64
07016151 182
d2e4c1e6
DB
183#define BPF_MAP_KEY_POISON (1ULL << 63)
184#define BPF_MAP_KEY_SEEN (1ULL << 62)
185
c93552c4
DB
186#define BPF_MAP_PTR_UNPRIV 1UL
187#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
188 POISON_POINTER_DELTA))
189#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
190
bc34dee6
JK
191static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
192static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
6a3cd331 193static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
5d92ddc3 194static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
6a3cd331
DM
195static int ref_set_non_owning(struct bpf_verifier_env *env,
196 struct bpf_reg_state *reg);
bc34dee6 197
c93552c4
DB
198static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
199{
d2e4c1e6 200 return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
c93552c4
DB
201}
202
203static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
204{
d2e4c1e6 205 return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
c93552c4
DB
206}
207
208static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
209 const struct bpf_map *map, bool unpriv)
210{
211 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
212 unpriv |= bpf_map_ptr_unpriv(aux);
d2e4c1e6
DB
213 aux->map_ptr_state = (unsigned long)map |
214 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
215}
216
217static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
218{
219 return aux->map_key_state & BPF_MAP_KEY_POISON;
220}
221
222static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
223{
224 return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
225}
226
227static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
228{
229 return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
230}
231
232static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
233{
234 bool poisoned = bpf_map_key_poisoned(aux);
235
236 aux->map_key_state = state | BPF_MAP_KEY_SEEN |
237 (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
c93552c4 238}
fad73a1a 239
23a2d70c
YS
240static bool bpf_pseudo_call(const struct bpf_insn *insn)
241{
242 return insn->code == (BPF_JMP | BPF_CALL) &&
243 insn->src_reg == BPF_PSEUDO_CALL;
244}
245
e6ac2450
MKL
246static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
247{
248 return insn->code == (BPF_JMP | BPF_CALL) &&
249 insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
250}
251
33ff9823
DB
252struct bpf_call_arg_meta {
253 struct bpf_map *map_ptr;
435faee1 254 bool raw_mode;
36bbef52 255 bool pkt_access;
8f14852e 256 u8 release_regno;
435faee1
DB
257 int regno;
258 int access_size;
457f4436 259 int mem_size;
10060503 260 u64 msize_max_value;
1b986589 261 int ref_obj_id;
f8064ab9 262 int dynptr_id;
3e8ce298 263 int map_uid;
d83525ca 264 int func_id;
22dc4a0f 265 struct btf *btf;
eaa6bcb7 266 u32 btf_id;
22dc4a0f 267 struct btf *ret_btf;
eaa6bcb7 268 u32 ret_btf_id;
69c087ba 269 u32 subprogno;
aa3496ac 270 struct btf_field *kptr_field;
33ff9823
DB
271};
272
8580ac94
AS
273struct btf *btf_vmlinux;
274
cbd35700
AS
275static DEFINE_MUTEX(bpf_verifier_lock);
276
d9762e84
MKL
277static const struct bpf_line_info *
278find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
279{
280 const struct bpf_line_info *linfo;
281 const struct bpf_prog *prog;
282 u32 i, nr_linfo;
283
284 prog = env->prog;
285 nr_linfo = prog->aux->nr_linfo;
286
287 if (!nr_linfo || insn_off >= prog->len)
288 return NULL;
289
290 linfo = prog->aux->linfo;
291 for (i = 1; i < nr_linfo; i++)
292 if (insn_off < linfo[i].insn_off)
293 break;
294
295 return &linfo[i - 1];
296}
297
77d2e05a
MKL
298void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
299 va_list args)
cbd35700 300{
a2a7d570 301 unsigned int n;
cbd35700 302
a2a7d570 303 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
a2a7d570
JK
304
305 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
306 "verifier log line truncated - local buffer too short\n");
307
8580ac94 308 if (log->level == BPF_LOG_KERNEL) {
436d404c
HT
309 bool newline = n > 0 && log->kbuf[n - 1] == '\n';
310
311 pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
8580ac94
AS
312 return;
313 }
436d404c
HT
314
315 n = min(log->len_total - log->len_used - 1, n);
316 log->kbuf[n] = '\0';
a2a7d570
JK
317 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
318 log->len_used += n;
319 else
320 log->ubuf = NULL;
cbd35700 321}
abe08840 322
6f8a57cc
AN
323static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
324{
325 char zero = 0;
326
327 if (!bpf_verifier_log_needed(log))
328 return;
329
330 log->len_used = new_pos;
331 if (put_user(zero, log->ubuf + new_pos))
332 log->ubuf = NULL;
333}
334
abe08840
JO
335/* log_level controls verbosity level of eBPF verifier.
336 * bpf_verifier_log_write() is used to dump the verification trace to the log,
337 * so the user can figure out what's wrong with the program
430e68d1 338 */
abe08840
JO
339__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
340 const char *fmt, ...)
341{
342 va_list args;
343
77d2e05a
MKL
344 if (!bpf_verifier_log_needed(&env->log))
345 return;
346
abe08840 347 va_start(args, fmt);
77d2e05a 348 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
349 va_end(args);
350}
351EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
352
353__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
354{
77d2e05a 355 struct bpf_verifier_env *env = private_data;
abe08840
JO
356 va_list args;
357
77d2e05a
MKL
358 if (!bpf_verifier_log_needed(&env->log))
359 return;
360
abe08840 361 va_start(args, fmt);
77d2e05a 362 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
363 va_end(args);
364}
cbd35700 365
9e15db66
AS
366__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
367 const char *fmt, ...)
368{
369 va_list args;
370
371 if (!bpf_verifier_log_needed(log))
372 return;
373
374 va_start(args, fmt);
375 bpf_verifier_vlog(log, fmt, args);
376 va_end(args);
377}
84c6ac41 378EXPORT_SYMBOL_GPL(bpf_log);
9e15db66 379
d9762e84
MKL
380static const char *ltrim(const char *s)
381{
382 while (isspace(*s))
383 s++;
384
385 return s;
386}
387
388__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
389 u32 insn_off,
390 const char *prefix_fmt, ...)
391{
392 const struct bpf_line_info *linfo;
393
394 if (!bpf_verifier_log_needed(&env->log))
395 return;
396
397 linfo = find_linfo(env, insn_off);
398 if (!linfo || linfo == env->prev_linfo)
399 return;
400
401 if (prefix_fmt) {
402 va_list args;
403
404 va_start(args, prefix_fmt);
405 bpf_verifier_vlog(&env->log, prefix_fmt, args);
406 va_end(args);
407 }
408
409 verbose(env, "%s\n",
410 ltrim(btf_name_by_offset(env->prog->aux->btf,
411 linfo->line_off)));
412
413 env->prev_linfo = linfo;
414}
415
bc2591d6
YS
416static void verbose_invalid_scalar(struct bpf_verifier_env *env,
417 struct bpf_reg_state *reg,
418 struct tnum *range, const char *ctx,
419 const char *reg_name)
420{
421 char tn_buf[48];
422
423 verbose(env, "At %s the register %s ", ctx, reg_name);
424 if (!tnum_is_unknown(reg->var_off)) {
425 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
426 verbose(env, "has value %s", tn_buf);
427 } else {
428 verbose(env, "has unknown scalar value");
429 }
430 tnum_strn(tn_buf, sizeof(tn_buf), *range);
431 verbose(env, " should have been in %s\n", tn_buf);
432}
433
de8f3a83
DB
434static bool type_is_pkt_pointer(enum bpf_reg_type type)
435{
0c9a7a7e 436 type = base_type(type);
de8f3a83
DB
437 return type == PTR_TO_PACKET ||
438 type == PTR_TO_PACKET_META;
439}
440
46f8bc92
MKL
441static bool type_is_sk_pointer(enum bpf_reg_type type)
442{
443 return type == PTR_TO_SOCKET ||
655a51e5 444 type == PTR_TO_SOCK_COMMON ||
fada7fdc
JL
445 type == PTR_TO_TCP_SOCK ||
446 type == PTR_TO_XDP_SOCK;
46f8bc92
MKL
447}
448
cac616db
JF
449static bool reg_type_not_null(enum bpf_reg_type type)
450{
451 return type == PTR_TO_SOCKET ||
452 type == PTR_TO_TCP_SOCK ||
453 type == PTR_TO_MAP_VALUE ||
69c087ba 454 type == PTR_TO_MAP_KEY ||
01c66c48 455 type == PTR_TO_SOCK_COMMON;
cac616db
JF
456}
457
d8939cb0
DM
458static bool type_is_ptr_alloc_obj(u32 type)
459{
460 return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
461}
462
6a3cd331
DM
463static bool type_is_non_owning_ref(u32 type)
464{
465 return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
466}
467
4e814da0
KKD
468static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
469{
470 struct btf_record *rec = NULL;
471 struct btf_struct_meta *meta;
472
473 if (reg->type == PTR_TO_MAP_VALUE) {
474 rec = reg->map_ptr->record;
d8939cb0 475 } else if (type_is_ptr_alloc_obj(reg->type)) {
4e814da0
KKD
476 meta = btf_find_struct_meta(reg->btf, reg->btf_id);
477 if (meta)
478 rec = meta->record;
479 }
480 return rec;
481}
482
d83525ca
AS
483static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
484{
4e814da0 485 return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
cba368c1
MKL
486}
487
20b2aff4
HL
488static bool type_is_rdonly_mem(u32 type)
489{
490 return type & MEM_RDONLY;
cba368c1
MKL
491}
492
48946bd6 493static bool type_may_be_null(u32 type)
fd1b0d60 494{
48946bd6 495 return type & PTR_MAYBE_NULL;
fd1b0d60
LB
496}
497
64d85290
JS
498static bool is_acquire_function(enum bpf_func_id func_id,
499 const struct bpf_map *map)
500{
501 enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
502
503 if (func_id == BPF_FUNC_sk_lookup_tcp ||
504 func_id == BPF_FUNC_sk_lookup_udp ||
457f4436 505 func_id == BPF_FUNC_skc_lookup_tcp ||
c0a5a21c
KKD
506 func_id == BPF_FUNC_ringbuf_reserve ||
507 func_id == BPF_FUNC_kptr_xchg)
64d85290
JS
508 return true;
509
510 if (func_id == BPF_FUNC_map_lookup_elem &&
511 (map_type == BPF_MAP_TYPE_SOCKMAP ||
512 map_type == BPF_MAP_TYPE_SOCKHASH))
513 return true;
514
515 return false;
46f8bc92
MKL
516}
517
1b986589
MKL
518static bool is_ptr_cast_function(enum bpf_func_id func_id)
519{
520 return func_id == BPF_FUNC_tcp_sock ||
1df8f55a
MKL
521 func_id == BPF_FUNC_sk_fullsock ||
522 func_id == BPF_FUNC_skc_to_tcp_sock ||
523 func_id == BPF_FUNC_skc_to_tcp6_sock ||
524 func_id == BPF_FUNC_skc_to_udp6_sock ||
3bc253c2 525 func_id == BPF_FUNC_skc_to_mptcp_sock ||
1df8f55a
MKL
526 func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
527 func_id == BPF_FUNC_skc_to_tcp_request_sock;
1b986589
MKL
528}
529
88374342 530static bool is_dynptr_ref_function(enum bpf_func_id func_id)
b2d8ef19
DM
531{
532 return func_id == BPF_FUNC_dynptr_data;
533}
534
be2ef816
AN
535static bool is_callback_calling_function(enum bpf_func_id func_id)
536{
537 return func_id == BPF_FUNC_for_each_map_elem ||
538 func_id == BPF_FUNC_timer_set_callback ||
539 func_id == BPF_FUNC_find_vma ||
540 func_id == BPF_FUNC_loop ||
541 func_id == BPF_FUNC_user_ringbuf_drain;
542}
543
9bb00b28
YS
544static bool is_storage_get_function(enum bpf_func_id func_id)
545{
546 return func_id == BPF_FUNC_sk_storage_get ||
547 func_id == BPF_FUNC_inode_storage_get ||
548 func_id == BPF_FUNC_task_storage_get ||
549 func_id == BPF_FUNC_cgrp_storage_get;
550}
551
b2d8ef19
DM
552static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
553 const struct bpf_map *map)
554{
555 int ref_obj_uses = 0;
556
557 if (is_ptr_cast_function(func_id))
558 ref_obj_uses++;
559 if (is_acquire_function(func_id, map))
560 ref_obj_uses++;
88374342 561 if (is_dynptr_ref_function(func_id))
b2d8ef19
DM
562 ref_obj_uses++;
563
564 return ref_obj_uses > 1;
565}
566
39491867
BJ
567static bool is_cmpxchg_insn(const struct bpf_insn *insn)
568{
569 return BPF_CLASS(insn->code) == BPF_STX &&
570 BPF_MODE(insn->code) == BPF_ATOMIC &&
571 insn->imm == BPF_CMPXCHG;
572}
573
c25b2ae1
HL
574/* string representation of 'enum bpf_reg_type'
575 *
576 * Note that reg_type_str() can not appear more than once in a single verbose()
577 * statement.
578 */
579static const char *reg_type_str(struct bpf_verifier_env *env,
580 enum bpf_reg_type type)
581{
ef66c547 582 char postfix[16] = {0}, prefix[64] = {0};
c25b2ae1
HL
583 static const char * const str[] = {
584 [NOT_INIT] = "?",
7df5072c 585 [SCALAR_VALUE] = "scalar",
c25b2ae1
HL
586 [PTR_TO_CTX] = "ctx",
587 [CONST_PTR_TO_MAP] = "map_ptr",
588 [PTR_TO_MAP_VALUE] = "map_value",
589 [PTR_TO_STACK] = "fp",
590 [PTR_TO_PACKET] = "pkt",
591 [PTR_TO_PACKET_META] = "pkt_meta",
592 [PTR_TO_PACKET_END] = "pkt_end",
593 [PTR_TO_FLOW_KEYS] = "flow_keys",
594 [PTR_TO_SOCKET] = "sock",
595 [PTR_TO_SOCK_COMMON] = "sock_common",
596 [PTR_TO_TCP_SOCK] = "tcp_sock",
597 [PTR_TO_TP_BUFFER] = "tp_buffer",
598 [PTR_TO_XDP_SOCK] = "xdp_sock",
599 [PTR_TO_BTF_ID] = "ptr_",
c25b2ae1 600 [PTR_TO_MEM] = "mem",
20b2aff4 601 [PTR_TO_BUF] = "buf",
c25b2ae1
HL
602 [PTR_TO_FUNC] = "func",
603 [PTR_TO_MAP_KEY] = "map_key",
27060531 604 [CONST_PTR_TO_DYNPTR] = "dynptr_ptr",
c25b2ae1
HL
605 };
606
607 if (type & PTR_MAYBE_NULL) {
5844101a 608 if (base_type(type) == PTR_TO_BTF_ID)
c25b2ae1
HL
609 strncpy(postfix, "or_null_", 16);
610 else
611 strncpy(postfix, "_or_null", 16);
612 }
613
9bb00b28 614 snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
ef66c547
DV
615 type & MEM_RDONLY ? "rdonly_" : "",
616 type & MEM_RINGBUF ? "ringbuf_" : "",
617 type & MEM_USER ? "user_" : "",
618 type & MEM_PERCPU ? "percpu_" : "",
9bb00b28 619 type & MEM_RCU ? "rcu_" : "",
3f00c523
DV
620 type & PTR_UNTRUSTED ? "untrusted_" : "",
621 type & PTR_TRUSTED ? "trusted_" : ""
ef66c547 622 );
20b2aff4
HL
623
624 snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
625 prefix, str[base_type(type)], postfix);
c25b2ae1
HL
626 return env->type_str_buf;
627}
17a52670 628
8efea21d
EC
629static char slot_type_char[] = {
630 [STACK_INVALID] = '?',
631 [STACK_SPILL] = 'r',
632 [STACK_MISC] = 'm',
633 [STACK_ZERO] = '0',
97e03f52 634 [STACK_DYNPTR] = 'd',
8efea21d
EC
635};
636
4e92024a
AS
637static void print_liveness(struct bpf_verifier_env *env,
638 enum bpf_reg_liveness live)
639{
9242b5f5 640 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
4e92024a
AS
641 verbose(env, "_");
642 if (live & REG_LIVE_READ)
643 verbose(env, "r");
644 if (live & REG_LIVE_WRITTEN)
645 verbose(env, "w");
9242b5f5
AS
646 if (live & REG_LIVE_DONE)
647 verbose(env, "D");
4e92024a
AS
648}
649
79168a66 650static int __get_spi(s32 off)
97e03f52
JK
651{
652 return (-off - 1) / BPF_REG_SIZE;
653}
654
f5b625e5
KKD
655static struct bpf_func_state *func(struct bpf_verifier_env *env,
656 const struct bpf_reg_state *reg)
657{
658 struct bpf_verifier_state *cur = env->cur_state;
659
660 return cur->frame[reg->frameno];
661}
662
97e03f52
JK
663static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
664{
f5b625e5 665 int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
97e03f52 666
f5b625e5
KKD
667 /* We need to check that slots between [spi - nr_slots + 1, spi] are
668 * within [0, allocated_stack).
669 *
670 * Please note that the spi grows downwards. For example, a dynptr
671 * takes the size of two stack slots; the first slot will be at
672 * spi and the second slot will be at spi - 1.
673 */
674 return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
97e03f52
JK
675}
676
79168a66 677static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
f4d7e40a 678{
79168a66 679 int off, spi;
f4d7e40a 680
79168a66
KKD
681 if (!tnum_is_const(reg->var_off)) {
682 verbose(env, "dynptr has to be at a constant offset\n");
683 return -EINVAL;
684 }
685
686 off = reg->off + reg->var_off.value;
687 if (off % BPF_REG_SIZE) {
688 verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
689 return -EINVAL;
690 }
691
692 spi = __get_spi(off);
693 if (spi < 1) {
694 verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
695 return -EINVAL;
696 }
97e03f52 697
f5b625e5
KKD
698 if (!is_spi_bounds_valid(func(env, reg), spi, BPF_DYNPTR_NR_SLOTS))
699 return -ERANGE;
700 return spi;
f4d7e40a
AS
701}
702
22dc4a0f 703static const char *kernel_type_name(const struct btf* btf, u32 id)
9e15db66 704{
22dc4a0f 705 return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
9e15db66
AS
706}
707
d54e0f6c
AN
708static const char *dynptr_type_str(enum bpf_dynptr_type type)
709{
710 switch (type) {
711 case BPF_DYNPTR_TYPE_LOCAL:
712 return "local";
713 case BPF_DYNPTR_TYPE_RINGBUF:
714 return "ringbuf";
715 case BPF_DYNPTR_TYPE_SKB:
716 return "skb";
717 case BPF_DYNPTR_TYPE_XDP:
718 return "xdp";
719 case BPF_DYNPTR_TYPE_INVALID:
720 return "<invalid>";
721 default:
722 WARN_ONCE(1, "unknown dynptr type %d\n", type);
723 return "<unknown>";
724 }
725}
726
0f55f9ed
CL
727static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
728{
729 env->scratched_regs |= 1U << regno;
730}
731
732static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
733{
343e5375 734 env->scratched_stack_slots |= 1ULL << spi;
0f55f9ed
CL
735}
736
737static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
738{
739 return (env->scratched_regs >> regno) & 1;
740}
741
742static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
743{
744 return (env->scratched_stack_slots >> regno) & 1;
745}
746
747static bool verifier_state_scratched(const struct bpf_verifier_env *env)
748{
749 return env->scratched_regs || env->scratched_stack_slots;
750}
751
752static void mark_verifier_state_clean(struct bpf_verifier_env *env)
753{
754 env->scratched_regs = 0U;
343e5375 755 env->scratched_stack_slots = 0ULL;
0f55f9ed
CL
756}
757
758/* Used for printing the entire verifier state. */
759static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
760{
761 env->scratched_regs = ~0U;
343e5375 762 env->scratched_stack_slots = ~0ULL;
0f55f9ed
CL
763}
764
97e03f52
JK
765static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
766{
767 switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
768 case DYNPTR_TYPE_LOCAL:
769 return BPF_DYNPTR_TYPE_LOCAL;
bc34dee6
JK
770 case DYNPTR_TYPE_RINGBUF:
771 return BPF_DYNPTR_TYPE_RINGBUF;
b5964b96
JK
772 case DYNPTR_TYPE_SKB:
773 return BPF_DYNPTR_TYPE_SKB;
05421aec
JK
774 case DYNPTR_TYPE_XDP:
775 return BPF_DYNPTR_TYPE_XDP;
97e03f52
JK
776 default:
777 return BPF_DYNPTR_TYPE_INVALID;
778 }
779}
780
66e3a13e
JK
781static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
782{
783 switch (type) {
784 case BPF_DYNPTR_TYPE_LOCAL:
785 return DYNPTR_TYPE_LOCAL;
786 case BPF_DYNPTR_TYPE_RINGBUF:
787 return DYNPTR_TYPE_RINGBUF;
788 case BPF_DYNPTR_TYPE_SKB:
789 return DYNPTR_TYPE_SKB;
790 case BPF_DYNPTR_TYPE_XDP:
791 return DYNPTR_TYPE_XDP;
792 default:
793 return 0;
794 }
795}
796
bc34dee6
JK
797static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
798{
799 return type == BPF_DYNPTR_TYPE_RINGBUF;
800}
801
27060531
KKD
802static void __mark_dynptr_reg(struct bpf_reg_state *reg,
803 enum bpf_dynptr_type type,
f8064ab9 804 bool first_slot, int dynptr_id);
27060531
KKD
805
806static void __mark_reg_not_init(const struct bpf_verifier_env *env,
807 struct bpf_reg_state *reg);
808
f8064ab9
KKD
809static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
810 struct bpf_reg_state *sreg1,
27060531
KKD
811 struct bpf_reg_state *sreg2,
812 enum bpf_dynptr_type type)
813{
f8064ab9
KKD
814 int id = ++env->id_gen;
815
816 __mark_dynptr_reg(sreg1, type, true, id);
817 __mark_dynptr_reg(sreg2, type, false, id);
27060531
KKD
818}
819
f8064ab9
KKD
820static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
821 struct bpf_reg_state *reg,
27060531
KKD
822 enum bpf_dynptr_type type)
823{
f8064ab9 824 __mark_dynptr_reg(reg, type, true, ++env->id_gen);
27060531
KKD
825}
826
ef8fc7a0
KKD
827static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
828 struct bpf_func_state *state, int spi);
27060531 829
97e03f52
JK
830static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
831 enum bpf_arg_type arg_type, int insn_idx)
832{
833 struct bpf_func_state *state = func(env, reg);
834 enum bpf_dynptr_type type;
379d4ba8 835 int spi, i, id, err;
97e03f52 836
79168a66
KKD
837 spi = dynptr_get_spi(env, reg);
838 if (spi < 0)
839 return spi;
97e03f52 840
379d4ba8
KKD
841 /* We cannot assume both spi and spi - 1 belong to the same dynptr,
842 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
843 * to ensure that for the following example:
844 * [d1][d1][d2][d2]
845 * spi 3 2 1 0
846 * So marking spi = 2 should lead to destruction of both d1 and d2. In
847 * case they do belong to same dynptr, second call won't see slot_type
848 * as STACK_DYNPTR and will simply skip destruction.
849 */
850 err = destroy_if_dynptr_stack_slot(env, state, spi);
851 if (err)
852 return err;
853 err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
854 if (err)
855 return err;
97e03f52
JK
856
857 for (i = 0; i < BPF_REG_SIZE; i++) {
858 state->stack[spi].slot_type[i] = STACK_DYNPTR;
859 state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
860 }
861
862 type = arg_to_dynptr_type(arg_type);
863 if (type == BPF_DYNPTR_TYPE_INVALID)
864 return -EINVAL;
865
f8064ab9 866 mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
27060531 867 &state->stack[spi - 1].spilled_ptr, type);
97e03f52 868
bc34dee6
JK
869 if (dynptr_type_refcounted(type)) {
870 /* The id is used to track proper releasing */
871 id = acquire_reference_state(env, insn_idx);
872 if (id < 0)
873 return id;
874
27060531
KKD
875 state->stack[spi].spilled_ptr.ref_obj_id = id;
876 state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
bc34dee6
JK
877 }
878
d6fefa11
KKD
879 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
880 state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
881
97e03f52
JK
882 return 0;
883}
884
885static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
886{
887 struct bpf_func_state *state = func(env, reg);
888 int spi, i;
889
79168a66
KKD
890 spi = dynptr_get_spi(env, reg);
891 if (spi < 0)
892 return spi;
97e03f52
JK
893
894 for (i = 0; i < BPF_REG_SIZE; i++) {
895 state->stack[spi].slot_type[i] = STACK_INVALID;
896 state->stack[spi - 1].slot_type[i] = STACK_INVALID;
897 }
898
bc34dee6 899 /* Invalidate any slices associated with this dynptr */
27060531
KKD
900 if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type))
901 WARN_ON_ONCE(release_reference(env, state->stack[spi].spilled_ptr.ref_obj_id));
97e03f52 902
27060531
KKD
903 __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
904 __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
d6fefa11
KKD
905
906 /* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?
907 *
908 * While we don't allow reading STACK_INVALID, it is still possible to
909 * do <8 byte writes marking some but not all slots as STACK_MISC. Then,
910 * helpers or insns can do partial read of that part without failing,
911 * but check_stack_range_initialized, check_stack_read_var_off, and
912 * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
913 * the slot conservatively. Hence we need to prevent those liveness
914 * marking walks.
915 *
916 * This was not a problem before because STACK_INVALID is only set by
917 * default (where the default reg state has its reg->parent as NULL), or
918 * in clean_live_states after REG_LIVE_DONE (at which point
919 * mark_reg_read won't walk reg->parent chain), but not randomly during
920 * verifier state exploration (like we did above). Hence, for our case
921 * parentage chain will still be live (i.e. reg->parent may be
922 * non-NULL), while earlier reg->parent was NULL, so we need
923 * REG_LIVE_WRITTEN to screen off read marker propagation when it is
924 * done later on reads or by mark_dynptr_read as well to unnecessary
925 * mark registers in verifier state.
926 */
927 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
928 state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
929
97e03f52
JK
930 return 0;
931}
932
ef8fc7a0
KKD
933static void __mark_reg_unknown(const struct bpf_verifier_env *env,
934 struct bpf_reg_state *reg);
935
dbd8d228
KKD
936static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
937{
938 if (!env->allow_ptr_leaks)
939 __mark_reg_not_init(env, reg);
940 else
941 __mark_reg_unknown(env, reg);
942}
943
ef8fc7a0
KKD
944static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
945 struct bpf_func_state *state, int spi)
97e03f52 946{
f8064ab9
KKD
947 struct bpf_func_state *fstate;
948 struct bpf_reg_state *dreg;
949 int i, dynptr_id;
27060531 950
ef8fc7a0
KKD
951 /* We always ensure that STACK_DYNPTR is never set partially,
952 * hence just checking for slot_type[0] is enough. This is
953 * different for STACK_SPILL, where it may be only set for
954 * 1 byte, so code has to use is_spilled_reg.
955 */
956 if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
957 return 0;
97e03f52 958
ef8fc7a0
KKD
959 /* Reposition spi to first slot */
960 if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
961 spi = spi + 1;
962
963 if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
964 verbose(env, "cannot overwrite referenced dynptr\n");
965 return -EINVAL;
966 }
967
968 mark_stack_slot_scratched(env, spi);
969 mark_stack_slot_scratched(env, spi - 1);
97e03f52 970
ef8fc7a0 971 /* Writing partially to one dynptr stack slot destroys both. */
97e03f52 972 for (i = 0; i < BPF_REG_SIZE; i++) {
ef8fc7a0
KKD
973 state->stack[spi].slot_type[i] = STACK_INVALID;
974 state->stack[spi - 1].slot_type[i] = STACK_INVALID;
97e03f52
JK
975 }
976
f8064ab9
KKD
977 dynptr_id = state->stack[spi].spilled_ptr.id;
978 /* Invalidate any slices associated with this dynptr */
979 bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
980 /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
981 if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
982 continue;
dbd8d228
KKD
983 if (dreg->dynptr_id == dynptr_id)
984 mark_reg_invalid(env, dreg);
f8064ab9 985 }));
ef8fc7a0
KKD
986
987 /* Do not release reference state, we are destroying dynptr on stack,
988 * not using some helper to release it. Just reset register.
989 */
990 __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
991 __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
992
993 /* Same reason as unmark_stack_slots_dynptr above */
994 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
995 state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
996
997 return 0;
998}
999
7e0dac28 1000static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
97e03f52 1001{
7e0dac28
JK
1002 int spi;
1003
27060531
KKD
1004 if (reg->type == CONST_PTR_TO_DYNPTR)
1005 return false;
97e03f52 1006
7e0dac28
JK
1007 spi = dynptr_get_spi(env, reg);
1008
1009 /* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
1010 * error because this just means the stack state hasn't been updated yet.
1011 * We will do check_mem_access to check and update stack bounds later.
f5b625e5 1012 */
7e0dac28
JK
1013 if (spi < 0 && spi != -ERANGE)
1014 return false;
1015
1016 /* We don't need to check if the stack slots are marked by previous
1017 * dynptr initializations because we allow overwriting existing unreferenced
1018 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
1019 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
1020 * touching are completely destructed before we reinitialize them for a new
1021 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
1022 * instead of delaying it until the end where the user will get "Unreleased
379d4ba8
KKD
1023 * reference" error.
1024 */
97e03f52
JK
1025 return true;
1026}
1027
7e0dac28 1028static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
97e03f52
JK
1029{
1030 struct bpf_func_state *state = func(env, reg);
7e0dac28 1031 int i, spi;
97e03f52 1032
7e0dac28
JK
1033 /* This already represents first slot of initialized bpf_dynptr.
1034 *
1035 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
1036 * check_func_arg_reg_off's logic, so we don't need to check its
1037 * offset and alignment.
1038 */
27060531
KKD
1039 if (reg->type == CONST_PTR_TO_DYNPTR)
1040 return true;
1041
7e0dac28 1042 spi = dynptr_get_spi(env, reg);
79168a66
KKD
1043 if (spi < 0)
1044 return false;
f5b625e5 1045 if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
97e03f52
JK
1046 return false;
1047
1048 for (i = 0; i < BPF_REG_SIZE; i++) {
1049 if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
1050 state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
1051 return false;
1052 }
1053
e9e315b4
RS
1054 return true;
1055}
1056
6b75bd3d
KKD
1057static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1058 enum bpf_arg_type arg_type)
e9e315b4
RS
1059{
1060 struct bpf_func_state *state = func(env, reg);
1061 enum bpf_dynptr_type dynptr_type;
27060531 1062 int spi;
e9e315b4 1063
97e03f52
JK
1064 /* ARG_PTR_TO_DYNPTR takes any type of dynptr */
1065 if (arg_type == ARG_PTR_TO_DYNPTR)
1066 return true;
1067
e9e315b4 1068 dynptr_type = arg_to_dynptr_type(arg_type);
27060531
KKD
1069 if (reg->type == CONST_PTR_TO_DYNPTR) {
1070 return reg->dynptr.type == dynptr_type;
1071 } else {
79168a66
KKD
1072 spi = dynptr_get_spi(env, reg);
1073 if (spi < 0)
1074 return false;
27060531
KKD
1075 return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
1076 }
97e03f52
JK
1077}
1078
27113c59
MKL
1079/* The reg state of a pointer or a bounded scalar was saved when
1080 * it was spilled to the stack.
1081 */
1082static bool is_spilled_reg(const struct bpf_stack_state *stack)
1083{
1084 return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
1085}
1086
354e8f19
MKL
1087static void scrub_spilled_slot(u8 *stype)
1088{
1089 if (*stype != STACK_INVALID)
1090 *stype = STACK_MISC;
1091}
1092
61bd5218 1093static void print_verifier_state(struct bpf_verifier_env *env,
0f55f9ed
CL
1094 const struct bpf_func_state *state,
1095 bool print_all)
17a52670 1096{
f4d7e40a 1097 const struct bpf_reg_state *reg;
17a52670
AS
1098 enum bpf_reg_type t;
1099 int i;
1100
f4d7e40a
AS
1101 if (state->frameno)
1102 verbose(env, " frame%d:", state->frameno);
17a52670 1103 for (i = 0; i < MAX_BPF_REG; i++) {
1a0dc1ac
AS
1104 reg = &state->regs[i];
1105 t = reg->type;
17a52670
AS
1106 if (t == NOT_INIT)
1107 continue;
0f55f9ed
CL
1108 if (!print_all && !reg_scratched(env, i))
1109 continue;
4e92024a
AS
1110 verbose(env, " R%d", i);
1111 print_liveness(env, reg->live);
7df5072c 1112 verbose(env, "=");
b5dc0163
AS
1113 if (t == SCALAR_VALUE && reg->precise)
1114 verbose(env, "P");
f1174f77
EC
1115 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
1116 tnum_is_const(reg->var_off)) {
1117 /* reg->off should be 0 for SCALAR_VALUE */
7df5072c 1118 verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
61bd5218 1119 verbose(env, "%lld", reg->var_off.value + reg->off);
f1174f77 1120 } else {
7df5072c
ML
1121 const char *sep = "";
1122
1123 verbose(env, "%s", reg_type_str(env, t));
5844101a 1124 if (base_type(t) == PTR_TO_BTF_ID)
22dc4a0f 1125 verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
7df5072c
ML
1126 verbose(env, "(");
1127/*
1128 * _a stands for append, was shortened to avoid multiline statements below.
1129 * This macro is used to output a comma separated list of attributes.
1130 */
1131#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
1132
1133 if (reg->id)
1134 verbose_a("id=%d", reg->id);
a28ace78 1135 if (reg->ref_obj_id)
7df5072c 1136 verbose_a("ref_obj_id=%d", reg->ref_obj_id);
6a3cd331
DM
1137 if (type_is_non_owning_ref(reg->type))
1138 verbose_a("%s", "non_own_ref");
f1174f77 1139 if (t != SCALAR_VALUE)
7df5072c 1140 verbose_a("off=%d", reg->off);
de8f3a83 1141 if (type_is_pkt_pointer(t))
7df5072c 1142 verbose_a("r=%d", reg->range);
c25b2ae1
HL
1143 else if (base_type(t) == CONST_PTR_TO_MAP ||
1144 base_type(t) == PTR_TO_MAP_KEY ||
1145 base_type(t) == PTR_TO_MAP_VALUE)
7df5072c
ML
1146 verbose_a("ks=%d,vs=%d",
1147 reg->map_ptr->key_size,
1148 reg->map_ptr->value_size);
7d1238f2
EC
1149 if (tnum_is_const(reg->var_off)) {
1150 /* Typically an immediate SCALAR_VALUE, but
1151 * could be a pointer whose offset is too big
1152 * for reg->off
1153 */
7df5072c 1154 verbose_a("imm=%llx", reg->var_off.value);
7d1238f2
EC
1155 } else {
1156 if (reg->smin_value != reg->umin_value &&
1157 reg->smin_value != S64_MIN)
7df5072c 1158 verbose_a("smin=%lld", (long long)reg->smin_value);
7d1238f2
EC
1159 if (reg->smax_value != reg->umax_value &&
1160 reg->smax_value != S64_MAX)
7df5072c 1161 verbose_a("smax=%lld", (long long)reg->smax_value);
7d1238f2 1162 if (reg->umin_value != 0)
7df5072c 1163 verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
7d1238f2 1164 if (reg->umax_value != U64_MAX)
7df5072c 1165 verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
7d1238f2
EC
1166 if (!tnum_is_unknown(reg->var_off)) {
1167 char tn_buf[48];
f1174f77 1168
7d1238f2 1169 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7df5072c 1170 verbose_a("var_off=%s", tn_buf);
7d1238f2 1171 }
3f50f132
JF
1172 if (reg->s32_min_value != reg->smin_value &&
1173 reg->s32_min_value != S32_MIN)
7df5072c 1174 verbose_a("s32_min=%d", (int)(reg->s32_min_value));
3f50f132
JF
1175 if (reg->s32_max_value != reg->smax_value &&
1176 reg->s32_max_value != S32_MAX)
7df5072c 1177 verbose_a("s32_max=%d", (int)(reg->s32_max_value));
3f50f132
JF
1178 if (reg->u32_min_value != reg->umin_value &&
1179 reg->u32_min_value != U32_MIN)
7df5072c 1180 verbose_a("u32_min=%d", (int)(reg->u32_min_value));
3f50f132
JF
1181 if (reg->u32_max_value != reg->umax_value &&
1182 reg->u32_max_value != U32_MAX)
7df5072c 1183 verbose_a("u32_max=%d", (int)(reg->u32_max_value));
f1174f77 1184 }
7df5072c
ML
1185#undef verbose_a
1186
61bd5218 1187 verbose(env, ")");
f1174f77 1188 }
17a52670 1189 }
638f5b90 1190 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
8efea21d
EC
1191 char types_buf[BPF_REG_SIZE + 1];
1192 bool valid = false;
1193 int j;
1194
1195 for (j = 0; j < BPF_REG_SIZE; j++) {
1196 if (state->stack[i].slot_type[j] != STACK_INVALID)
1197 valid = true;
d54e0f6c 1198 types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
8efea21d
EC
1199 }
1200 types_buf[BPF_REG_SIZE] = 0;
1201 if (!valid)
1202 continue;
0f55f9ed
CL
1203 if (!print_all && !stack_slot_scratched(env, i))
1204 continue;
d54e0f6c
AN
1205 switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) {
1206 case STACK_SPILL:
b5dc0163
AS
1207 reg = &state->stack[i].spilled_ptr;
1208 t = reg->type;
d54e0f6c
AN
1209
1210 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1211 print_liveness(env, reg->live);
7df5072c 1212 verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
b5dc0163
AS
1213 if (t == SCALAR_VALUE && reg->precise)
1214 verbose(env, "P");
1215 if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
1216 verbose(env, "%lld", reg->var_off.value + reg->off);
d54e0f6c
AN
1217 break;
1218 case STACK_DYNPTR:
1219 i += BPF_DYNPTR_NR_SLOTS - 1;
1220 reg = &state->stack[i].spilled_ptr;
1221
1222 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1223 print_liveness(env, reg->live);
1224 verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type));
1225 if (reg->ref_obj_id)
1226 verbose(env, "(ref_id=%d)", reg->ref_obj_id);
1227 break;
1228 case STACK_MISC:
1229 case STACK_ZERO:
1230 default:
1231 reg = &state->stack[i].spilled_ptr;
1232
1233 for (j = 0; j < BPF_REG_SIZE; j++)
1234 types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
1235 types_buf[BPF_REG_SIZE] = 0;
1236
1237 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1238 print_liveness(env, reg->live);
8efea21d 1239 verbose(env, "=%s", types_buf);
d54e0f6c 1240 break;
b5dc0163 1241 }
17a52670 1242 }
fd978bf7
JS
1243 if (state->acquired_refs && state->refs[0].id) {
1244 verbose(env, " refs=%d", state->refs[0].id);
1245 for (i = 1; i < state->acquired_refs; i++)
1246 if (state->refs[i].id)
1247 verbose(env, ",%d", state->refs[i].id);
1248 }
bfc6bb74
AS
1249 if (state->in_callback_fn)
1250 verbose(env, " cb");
1251 if (state->in_async_callback_fn)
1252 verbose(env, " async_cb");
61bd5218 1253 verbose(env, "\n");
0f55f9ed 1254 mark_verifier_state_clean(env);
17a52670
AS
1255}
1256
2e576648
CL
1257static inline u32 vlog_alignment(u32 pos)
1258{
1259 return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
1260 BPF_LOG_MIN_ALIGNMENT) - pos - 1;
1261}
1262
1263static void print_insn_state(struct bpf_verifier_env *env,
1264 const struct bpf_func_state *state)
1265{
1266 if (env->prev_log_len && env->prev_log_len == env->log.len_used) {
1267 /* remove new line character */
1268 bpf_vlog_reset(&env->log, env->prev_log_len - 1);
1269 verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' ');
1270 } else {
1271 verbose(env, "%d:", env->insn_idx);
1272 }
1273 print_verifier_state(env, state, false);
17a52670
AS
1274}
1275
c69431aa
LB
1276/* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1277 * small to hold src. This is different from krealloc since we don't want to preserve
1278 * the contents of dst.
1279 *
1280 * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1281 * not be allocated.
638f5b90 1282 */
c69431aa 1283static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
638f5b90 1284{
45435d8d
KC
1285 size_t alloc_bytes;
1286 void *orig = dst;
c69431aa
LB
1287 size_t bytes;
1288
1289 if (ZERO_OR_NULL_PTR(src))
1290 goto out;
1291
1292 if (unlikely(check_mul_overflow(n, size, &bytes)))
1293 return NULL;
1294
45435d8d
KC
1295 alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1296 dst = krealloc(orig, alloc_bytes, flags);
1297 if (!dst) {
1298 kfree(orig);
1299 return NULL;
c69431aa
LB
1300 }
1301
1302 memcpy(dst, src, bytes);
1303out:
1304 return dst ? dst : ZERO_SIZE_PTR;
1305}
1306
1307/* resize an array from old_n items to new_n items. the array is reallocated if it's too
1308 * small to hold new_n items. new items are zeroed out if the array grows.
1309 *
1310 * Contrary to krealloc_array, does not free arr if new_n is zero.
1311 */
1312static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1313{
ceb35b66 1314 size_t alloc_size;
42378a9c
KC
1315 void *new_arr;
1316
c69431aa
LB
1317 if (!new_n || old_n == new_n)
1318 goto out;
1319
ceb35b66
KC
1320 alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1321 new_arr = krealloc(arr, alloc_size, GFP_KERNEL);
42378a9c
KC
1322 if (!new_arr) {
1323 kfree(arr);
c69431aa 1324 return NULL;
42378a9c
KC
1325 }
1326 arr = new_arr;
c69431aa
LB
1327
1328 if (new_n > old_n)
1329 memset(arr + old_n * size, 0, (new_n - old_n) * size);
1330
1331out:
1332 return arr ? arr : ZERO_SIZE_PTR;
1333}
1334
1335static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1336{
1337 dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1338 sizeof(struct bpf_reference_state), GFP_KERNEL);
1339 if (!dst->refs)
1340 return -ENOMEM;
1341
1342 dst->acquired_refs = src->acquired_refs;
1343 return 0;
1344}
1345
1346static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1347{
1348 size_t n = src->allocated_stack / BPF_REG_SIZE;
1349
1350 dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1351 GFP_KERNEL);
1352 if (!dst->stack)
1353 return -ENOMEM;
1354
1355 dst->allocated_stack = src->allocated_stack;
1356 return 0;
1357}
1358
1359static int resize_reference_state(struct bpf_func_state *state, size_t n)
1360{
1361 state->refs = realloc_array(state->refs, state->acquired_refs, n,
1362 sizeof(struct bpf_reference_state));
1363 if (!state->refs)
1364 return -ENOMEM;
1365
1366 state->acquired_refs = n;
1367 return 0;
1368}
1369
1370static int grow_stack_state(struct bpf_func_state *state, int size)
1371{
1372 size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
1373
1374 if (old_n >= n)
1375 return 0;
1376
1377 state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1378 if (!state->stack)
1379 return -ENOMEM;
1380
1381 state->allocated_stack = size;
1382 return 0;
fd978bf7
JS
1383}
1384
1385/* Acquire a pointer id from the env and update the state->refs to include
1386 * this new pointer reference.
1387 * On success, returns a valid pointer id to associate with the register
1388 * On failure, returns a negative errno.
638f5b90 1389 */
fd978bf7 1390static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
638f5b90 1391{
fd978bf7
JS
1392 struct bpf_func_state *state = cur_func(env);
1393 int new_ofs = state->acquired_refs;
1394 int id, err;
1395
c69431aa 1396 err = resize_reference_state(state, state->acquired_refs + 1);
fd978bf7
JS
1397 if (err)
1398 return err;
1399 id = ++env->id_gen;
1400 state->refs[new_ofs].id = id;
1401 state->refs[new_ofs].insn_idx = insn_idx;
9d9d00ac 1402 state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0;
638f5b90 1403
fd978bf7
JS
1404 return id;
1405}
1406
1407/* release function corresponding to acquire_reference_state(). Idempotent. */
46f8bc92 1408static int release_reference_state(struct bpf_func_state *state, int ptr_id)
fd978bf7
JS
1409{
1410 int i, last_idx;
1411
fd978bf7
JS
1412 last_idx = state->acquired_refs - 1;
1413 for (i = 0; i < state->acquired_refs; i++) {
1414 if (state->refs[i].id == ptr_id) {
9d9d00ac
KKD
1415 /* Cannot release caller references in callbacks */
1416 if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
1417 return -EINVAL;
fd978bf7
JS
1418 if (last_idx && i != last_idx)
1419 memcpy(&state->refs[i], &state->refs[last_idx],
1420 sizeof(*state->refs));
1421 memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1422 state->acquired_refs--;
638f5b90 1423 return 0;
638f5b90 1424 }
638f5b90 1425 }
46f8bc92 1426 return -EINVAL;
fd978bf7
JS
1427}
1428
f4d7e40a
AS
1429static void free_func_state(struct bpf_func_state *state)
1430{
5896351e
AS
1431 if (!state)
1432 return;
fd978bf7 1433 kfree(state->refs);
f4d7e40a
AS
1434 kfree(state->stack);
1435 kfree(state);
1436}
1437
b5dc0163
AS
1438static void clear_jmp_history(struct bpf_verifier_state *state)
1439{
1440 kfree(state->jmp_history);
1441 state->jmp_history = NULL;
1442 state->jmp_history_cnt = 0;
1443}
1444
1969db47
AS
1445static void free_verifier_state(struct bpf_verifier_state *state,
1446 bool free_self)
638f5b90 1447{
f4d7e40a
AS
1448 int i;
1449
1450 for (i = 0; i <= state->curframe; i++) {
1451 free_func_state(state->frame[i]);
1452 state->frame[i] = NULL;
1453 }
b5dc0163 1454 clear_jmp_history(state);
1969db47
AS
1455 if (free_self)
1456 kfree(state);
638f5b90
AS
1457}
1458
1459/* copy verifier state from src to dst growing dst stack space
1460 * when necessary to accommodate larger src stack
1461 */
f4d7e40a
AS
1462static int copy_func_state(struct bpf_func_state *dst,
1463 const struct bpf_func_state *src)
638f5b90
AS
1464{
1465 int err;
1466
fd978bf7
JS
1467 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
1468 err = copy_reference_state(dst, src);
638f5b90
AS
1469 if (err)
1470 return err;
638f5b90
AS
1471 return copy_stack_state(dst, src);
1472}
1473
f4d7e40a
AS
1474static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1475 const struct bpf_verifier_state *src)
1476{
1477 struct bpf_func_state *dst;
1478 int i, err;
1479
06ab6a50
LB
1480 dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1481 src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
1482 GFP_USER);
1483 if (!dst_state->jmp_history)
1484 return -ENOMEM;
b5dc0163
AS
1485 dst_state->jmp_history_cnt = src->jmp_history_cnt;
1486
f4d7e40a
AS
1487 /* if dst has more stack frames then src frame, free them */
1488 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1489 free_func_state(dst_state->frame[i]);
1490 dst_state->frame[i] = NULL;
1491 }
979d63d5 1492 dst_state->speculative = src->speculative;
9bb00b28 1493 dst_state->active_rcu_lock = src->active_rcu_lock;
f4d7e40a 1494 dst_state->curframe = src->curframe;
d0d78c1d
KKD
1495 dst_state->active_lock.ptr = src->active_lock.ptr;
1496 dst_state->active_lock.id = src->active_lock.id;
2589726d
AS
1497 dst_state->branches = src->branches;
1498 dst_state->parent = src->parent;
b5dc0163
AS
1499 dst_state->first_insn_idx = src->first_insn_idx;
1500 dst_state->last_insn_idx = src->last_insn_idx;
f4d7e40a
AS
1501 for (i = 0; i <= src->curframe; i++) {
1502 dst = dst_state->frame[i];
1503 if (!dst) {
1504 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1505 if (!dst)
1506 return -ENOMEM;
1507 dst_state->frame[i] = dst;
1508 }
1509 err = copy_func_state(dst, src->frame[i]);
1510 if (err)
1511 return err;
1512 }
1513 return 0;
1514}
1515
2589726d
AS
1516static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1517{
1518 while (st) {
1519 u32 br = --st->branches;
1520
1521 /* WARN_ON(br > 1) technically makes sense here,
1522 * but see comment in push_stack(), hence:
1523 */
1524 WARN_ONCE((int)br < 0,
1525 "BUG update_branch_counts:branches_to_explore=%d\n",
1526 br);
1527 if (br)
1528 break;
1529 st = st->parent;
1530 }
1531}
1532
638f5b90 1533static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
6f8a57cc 1534 int *insn_idx, bool pop_log)
638f5b90
AS
1535{
1536 struct bpf_verifier_state *cur = env->cur_state;
1537 struct bpf_verifier_stack_elem *elem, *head = env->head;
1538 int err;
17a52670
AS
1539
1540 if (env->head == NULL)
638f5b90 1541 return -ENOENT;
17a52670 1542
638f5b90
AS
1543 if (cur) {
1544 err = copy_verifier_state(cur, &head->st);
1545 if (err)
1546 return err;
1547 }
6f8a57cc
AN
1548 if (pop_log)
1549 bpf_vlog_reset(&env->log, head->log_pos);
638f5b90
AS
1550 if (insn_idx)
1551 *insn_idx = head->insn_idx;
17a52670 1552 if (prev_insn_idx)
638f5b90
AS
1553 *prev_insn_idx = head->prev_insn_idx;
1554 elem = head->next;
1969db47 1555 free_verifier_state(&head->st, false);
638f5b90 1556 kfree(head);
17a52670
AS
1557 env->head = elem;
1558 env->stack_size--;
638f5b90 1559 return 0;
17a52670
AS
1560}
1561
58e2af8b 1562static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
979d63d5
DB
1563 int insn_idx, int prev_insn_idx,
1564 bool speculative)
17a52670 1565{
638f5b90 1566 struct bpf_verifier_state *cur = env->cur_state;
58e2af8b 1567 struct bpf_verifier_stack_elem *elem;
638f5b90 1568 int err;
17a52670 1569
638f5b90 1570 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
17a52670
AS
1571 if (!elem)
1572 goto err;
1573
17a52670
AS
1574 elem->insn_idx = insn_idx;
1575 elem->prev_insn_idx = prev_insn_idx;
1576 elem->next = env->head;
6f8a57cc 1577 elem->log_pos = env->log.len_used;
17a52670
AS
1578 env->head = elem;
1579 env->stack_size++;
1969db47
AS
1580 err = copy_verifier_state(&elem->st, cur);
1581 if (err)
1582 goto err;
979d63d5 1583 elem->st.speculative |= speculative;
b285fcb7
AS
1584 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1585 verbose(env, "The sequence of %d jumps is too complex.\n",
1586 env->stack_size);
17a52670
AS
1587 goto err;
1588 }
2589726d
AS
1589 if (elem->st.parent) {
1590 ++elem->st.parent->branches;
1591 /* WARN_ON(branches > 2) technically makes sense here,
1592 * but
1593 * 1. speculative states will bump 'branches' for non-branch
1594 * instructions
1595 * 2. is_state_visited() heuristics may decide not to create
1596 * a new state for a sequence of branches and all such current
1597 * and cloned states will be pointing to a single parent state
1598 * which might have large 'branches' count.
1599 */
1600 }
17a52670
AS
1601 return &elem->st;
1602err:
5896351e
AS
1603 free_verifier_state(env->cur_state, true);
1604 env->cur_state = NULL;
17a52670 1605 /* pop all elements and return */
6f8a57cc 1606 while (!pop_stack(env, NULL, NULL, false));
17a52670
AS
1607 return NULL;
1608}
1609
1610#define CALLER_SAVED_REGS 6
1611static const int caller_saved[CALLER_SAVED_REGS] = {
1612 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1613};
1614
e688c3db
AS
1615/* This helper doesn't clear reg->id */
1616static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
b03c9f9f 1617{
b03c9f9f
EC
1618 reg->var_off = tnum_const(imm);
1619 reg->smin_value = (s64)imm;
1620 reg->smax_value = (s64)imm;
1621 reg->umin_value = imm;
1622 reg->umax_value = imm;
3f50f132
JF
1623
1624 reg->s32_min_value = (s32)imm;
1625 reg->s32_max_value = (s32)imm;
1626 reg->u32_min_value = (u32)imm;
1627 reg->u32_max_value = (u32)imm;
1628}
1629
e688c3db
AS
1630/* Mark the unknown part of a register (variable offset or scalar value) as
1631 * known to have the value @imm.
1632 */
1633static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1634{
a73bf9f2 1635 /* Clear off and union(map_ptr, range) */
e688c3db
AS
1636 memset(((u8 *)reg) + sizeof(reg->type), 0,
1637 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
a73bf9f2
AN
1638 reg->id = 0;
1639 reg->ref_obj_id = 0;
e688c3db
AS
1640 ___mark_reg_known(reg, imm);
1641}
1642
3f50f132
JF
1643static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1644{
1645 reg->var_off = tnum_const_subreg(reg->var_off, imm);
1646 reg->s32_min_value = (s32)imm;
1647 reg->s32_max_value = (s32)imm;
1648 reg->u32_min_value = (u32)imm;
1649 reg->u32_max_value = (u32)imm;
b03c9f9f
EC
1650}
1651
f1174f77
EC
1652/* Mark the 'variable offset' part of a register as zero. This should be
1653 * used only on registers holding a pointer type.
1654 */
1655static void __mark_reg_known_zero(struct bpf_reg_state *reg)
a9789ef9 1656{
b03c9f9f 1657 __mark_reg_known(reg, 0);
f1174f77 1658}
a9789ef9 1659
cc2b14d5
AS
1660static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1661{
1662 __mark_reg_known(reg, 0);
cc2b14d5
AS
1663 reg->type = SCALAR_VALUE;
1664}
1665
61bd5218
JK
1666static void mark_reg_known_zero(struct bpf_verifier_env *env,
1667 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
1668{
1669 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 1670 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
f1174f77
EC
1671 /* Something bad happened, let's kill all regs */
1672 for (regno = 0; regno < MAX_BPF_REG; regno++)
f54c7898 1673 __mark_reg_not_init(env, regs + regno);
f1174f77
EC
1674 return;
1675 }
1676 __mark_reg_known_zero(regs + regno);
1677}
1678
27060531 1679static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
f8064ab9 1680 bool first_slot, int dynptr_id)
27060531
KKD
1681{
1682 /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
1683 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
1684 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
1685 */
1686 __mark_reg_known_zero(reg);
1687 reg->type = CONST_PTR_TO_DYNPTR;
f8064ab9
KKD
1688 /* Give each dynptr a unique id to uniquely associate slices to it. */
1689 reg->id = dynptr_id;
27060531
KKD
1690 reg->dynptr.type = type;
1691 reg->dynptr.first_slot = first_slot;
1692}
1693
4ddb7416
DB
1694static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1695{
c25b2ae1 1696 if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
4ddb7416
DB
1697 const struct bpf_map *map = reg->map_ptr;
1698
1699 if (map->inner_map_meta) {
1700 reg->type = CONST_PTR_TO_MAP;
1701 reg->map_ptr = map->inner_map_meta;
3e8ce298
AS
1702 /* transfer reg's id which is unique for every map_lookup_elem
1703 * as UID of the inner map.
1704 */
db559117 1705 if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER))
34d11a44 1706 reg->map_uid = reg->id;
4ddb7416
DB
1707 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1708 reg->type = PTR_TO_XDP_SOCK;
1709 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1710 map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1711 reg->type = PTR_TO_SOCKET;
1712 } else {
1713 reg->type = PTR_TO_MAP_VALUE;
1714 }
c25b2ae1 1715 return;
4ddb7416 1716 }
c25b2ae1
HL
1717
1718 reg->type &= ~PTR_MAYBE_NULL;
4ddb7416
DB
1719}
1720
5d92ddc3
DM
1721static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
1722 struct btf_field_graph_root *ds_head)
1723{
1724 __mark_reg_known_zero(&regs[regno]);
1725 regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
1726 regs[regno].btf = ds_head->btf;
1727 regs[regno].btf_id = ds_head->value_btf_id;
1728 regs[regno].off = ds_head->node_offset;
1729}
1730
de8f3a83
DB
1731static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1732{
1733 return type_is_pkt_pointer(reg->type);
1734}
1735
1736static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1737{
1738 return reg_is_pkt_pointer(reg) ||
1739 reg->type == PTR_TO_PACKET_END;
1740}
1741
66e3a13e
JK
1742static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
1743{
1744 return base_type(reg->type) == PTR_TO_MEM &&
1745 (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP);
1746}
1747
de8f3a83
DB
1748/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1749static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1750 enum bpf_reg_type which)
1751{
1752 /* The register can already have a range from prior markings.
1753 * This is fine as long as it hasn't been advanced from its
1754 * origin.
1755 */
1756 return reg->type == which &&
1757 reg->id == 0 &&
1758 reg->off == 0 &&
1759 tnum_equals_const(reg->var_off, 0);
1760}
1761
3f50f132
JF
1762/* Reset the min/max bounds of a register */
1763static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1764{
1765 reg->smin_value = S64_MIN;
1766 reg->smax_value = S64_MAX;
1767 reg->umin_value = 0;
1768 reg->umax_value = U64_MAX;
1769
1770 reg->s32_min_value = S32_MIN;
1771 reg->s32_max_value = S32_MAX;
1772 reg->u32_min_value = 0;
1773 reg->u32_max_value = U32_MAX;
1774}
1775
1776static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1777{
1778 reg->smin_value = S64_MIN;
1779 reg->smax_value = S64_MAX;
1780 reg->umin_value = 0;
1781 reg->umax_value = U64_MAX;
1782}
1783
1784static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1785{
1786 reg->s32_min_value = S32_MIN;
1787 reg->s32_max_value = S32_MAX;
1788 reg->u32_min_value = 0;
1789 reg->u32_max_value = U32_MAX;
1790}
1791
1792static void __update_reg32_bounds(struct bpf_reg_state *reg)
1793{
1794 struct tnum var32_off = tnum_subreg(reg->var_off);
1795
1796 /* min signed is max(sign bit) | min(other bits) */
1797 reg->s32_min_value = max_t(s32, reg->s32_min_value,
1798 var32_off.value | (var32_off.mask & S32_MIN));
1799 /* max signed is min(sign bit) | max(other bits) */
1800 reg->s32_max_value = min_t(s32, reg->s32_max_value,
1801 var32_off.value | (var32_off.mask & S32_MAX));
1802 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1803 reg->u32_max_value = min(reg->u32_max_value,
1804 (u32)(var32_off.value | var32_off.mask));
1805}
1806
1807static void __update_reg64_bounds(struct bpf_reg_state *reg)
b03c9f9f
EC
1808{
1809 /* min signed is max(sign bit) | min(other bits) */
1810 reg->smin_value = max_t(s64, reg->smin_value,
1811 reg->var_off.value | (reg->var_off.mask & S64_MIN));
1812 /* max signed is min(sign bit) | max(other bits) */
1813 reg->smax_value = min_t(s64, reg->smax_value,
1814 reg->var_off.value | (reg->var_off.mask & S64_MAX));
1815 reg->umin_value = max(reg->umin_value, reg->var_off.value);
1816 reg->umax_value = min(reg->umax_value,
1817 reg->var_off.value | reg->var_off.mask);
1818}
1819
3f50f132
JF
1820static void __update_reg_bounds(struct bpf_reg_state *reg)
1821{
1822 __update_reg32_bounds(reg);
1823 __update_reg64_bounds(reg);
1824}
1825
b03c9f9f 1826/* Uses signed min/max values to inform unsigned, and vice-versa */
3f50f132
JF
1827static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1828{
1829 /* Learn sign from signed bounds.
1830 * If we cannot cross the sign boundary, then signed and unsigned bounds
1831 * are the same, so combine. This works even in the negative case, e.g.
1832 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1833 */
1834 if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1835 reg->s32_min_value = reg->u32_min_value =
1836 max_t(u32, reg->s32_min_value, reg->u32_min_value);
1837 reg->s32_max_value = reg->u32_max_value =
1838 min_t(u32, reg->s32_max_value, reg->u32_max_value);
1839 return;
1840 }
1841 /* Learn sign from unsigned bounds. Signed bounds cross the sign
1842 * boundary, so we must be careful.
1843 */
1844 if ((s32)reg->u32_max_value >= 0) {
1845 /* Positive. We can't learn anything from the smin, but smax
1846 * is positive, hence safe.
1847 */
1848 reg->s32_min_value = reg->u32_min_value;
1849 reg->s32_max_value = reg->u32_max_value =
1850 min_t(u32, reg->s32_max_value, reg->u32_max_value);
1851 } else if ((s32)reg->u32_min_value < 0) {
1852 /* Negative. We can't learn anything from the smax, but smin
1853 * is negative, hence safe.
1854 */
1855 reg->s32_min_value = reg->u32_min_value =
1856 max_t(u32, reg->s32_min_value, reg->u32_min_value);
1857 reg->s32_max_value = reg->u32_max_value;
1858 }
1859}
1860
1861static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
b03c9f9f
EC
1862{
1863 /* Learn sign from signed bounds.
1864 * If we cannot cross the sign boundary, then signed and unsigned bounds
1865 * are the same, so combine. This works even in the negative case, e.g.
1866 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1867 */
1868 if (reg->smin_value >= 0 || reg->smax_value < 0) {
1869 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1870 reg->umin_value);
1871 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1872 reg->umax_value);
1873 return;
1874 }
1875 /* Learn sign from unsigned bounds. Signed bounds cross the sign
1876 * boundary, so we must be careful.
1877 */
1878 if ((s64)reg->umax_value >= 0) {
1879 /* Positive. We can't learn anything from the smin, but smax
1880 * is positive, hence safe.
1881 */
1882 reg->smin_value = reg->umin_value;
1883 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1884 reg->umax_value);
1885 } else if ((s64)reg->umin_value < 0) {
1886 /* Negative. We can't learn anything from the smax, but smin
1887 * is negative, hence safe.
1888 */
1889 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1890 reg->umin_value);
1891 reg->smax_value = reg->umax_value;
1892 }
1893}
1894
3f50f132
JF
1895static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1896{
1897 __reg32_deduce_bounds(reg);
1898 __reg64_deduce_bounds(reg);
1899}
1900
b03c9f9f
EC
1901/* Attempts to improve var_off based on unsigned min/max information */
1902static void __reg_bound_offset(struct bpf_reg_state *reg)
1903{
3f50f132
JF
1904 struct tnum var64_off = tnum_intersect(reg->var_off,
1905 tnum_range(reg->umin_value,
1906 reg->umax_value));
1907 struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1908 tnum_range(reg->u32_min_value,
1909 reg->u32_max_value));
1910
1911 reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
b03c9f9f
EC
1912}
1913
3844d153
DB
1914static void reg_bounds_sync(struct bpf_reg_state *reg)
1915{
1916 /* We might have learned new bounds from the var_off. */
1917 __update_reg_bounds(reg);
1918 /* We might have learned something about the sign bit. */
1919 __reg_deduce_bounds(reg);
1920 /* We might have learned some bits from the bounds. */
1921 __reg_bound_offset(reg);
1922 /* Intersecting with the old var_off might have improved our bounds
1923 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1924 * then new var_off is (0; 0x7f...fc) which improves our umax.
1925 */
1926 __update_reg_bounds(reg);
1927}
1928
e572ff80
DB
1929static bool __reg32_bound_s64(s32 a)
1930{
1931 return a >= 0 && a <= S32_MAX;
1932}
1933
3f50f132 1934static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
b03c9f9f 1935{
3f50f132
JF
1936 reg->umin_value = reg->u32_min_value;
1937 reg->umax_value = reg->u32_max_value;
e572ff80
DB
1938
1939 /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1940 * be positive otherwise set to worse case bounds and refine later
1941 * from tnum.
3f50f132 1942 */
e572ff80
DB
1943 if (__reg32_bound_s64(reg->s32_min_value) &&
1944 __reg32_bound_s64(reg->s32_max_value)) {
3a71dc36 1945 reg->smin_value = reg->s32_min_value;
e572ff80
DB
1946 reg->smax_value = reg->s32_max_value;
1947 } else {
3a71dc36 1948 reg->smin_value = 0;
e572ff80
DB
1949 reg->smax_value = U32_MAX;
1950 }
3f50f132
JF
1951}
1952
1953static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1954{
1955 /* special case when 64-bit register has upper 32-bit register
1956 * zeroed. Typically happens after zext or <<32, >>32 sequence
1957 * allowing us to use 32-bit bounds directly,
1958 */
1959 if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1960 __reg_assign_32_into_64(reg);
1961 } else {
1962 /* Otherwise the best we can do is push lower 32bit known and
1963 * unknown bits into register (var_off set from jmp logic)
1964 * then learn as much as possible from the 64-bit tnum
1965 * known and unknown bits. The previous smin/smax bounds are
1966 * invalid here because of jmp32 compare so mark them unknown
1967 * so they do not impact tnum bounds calculation.
1968 */
1969 __mark_reg64_unbounded(reg);
3f50f132 1970 }
3844d153 1971 reg_bounds_sync(reg);
3f50f132
JF
1972}
1973
1974static bool __reg64_bound_s32(s64 a)
1975{
388e2c0b 1976 return a >= S32_MIN && a <= S32_MAX;
3f50f132
JF
1977}
1978
1979static bool __reg64_bound_u32(u64 a)
1980{
b9979db8 1981 return a >= U32_MIN && a <= U32_MAX;
3f50f132
JF
1982}
1983
1984static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1985{
1986 __mark_reg32_unbounded(reg);
b0270958 1987 if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
3f50f132 1988 reg->s32_min_value = (s32)reg->smin_value;
3f50f132 1989 reg->s32_max_value = (s32)reg->smax_value;
b0270958 1990 }
10bf4e83 1991 if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
3f50f132 1992 reg->u32_min_value = (u32)reg->umin_value;
3f50f132 1993 reg->u32_max_value = (u32)reg->umax_value;
10bf4e83 1994 }
3844d153 1995 reg_bounds_sync(reg);
b03c9f9f
EC
1996}
1997
f1174f77 1998/* Mark a register as having a completely unknown (scalar) value. */
f54c7898
DB
1999static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2000 struct bpf_reg_state *reg)
f1174f77 2001{
a9c676bc 2002 /*
a73bf9f2 2003 * Clear type, off, and union(map_ptr, range) and
a9c676bc
AS
2004 * padding between 'type' and union
2005 */
2006 memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
f1174f77 2007 reg->type = SCALAR_VALUE;
a73bf9f2
AN
2008 reg->id = 0;
2009 reg->ref_obj_id = 0;
f1174f77 2010 reg->var_off = tnum_unknown;
f4d7e40a 2011 reg->frameno = 0;
be2ef816 2012 reg->precise = !env->bpf_capable;
b03c9f9f 2013 __mark_reg_unbounded(reg);
f1174f77
EC
2014}
2015
61bd5218
JK
2016static void mark_reg_unknown(struct bpf_verifier_env *env,
2017 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
2018{
2019 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 2020 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
19ceb417
AS
2021 /* Something bad happened, let's kill all regs except FP */
2022 for (regno = 0; regno < BPF_REG_FP; regno++)
f54c7898 2023 __mark_reg_not_init(env, regs + regno);
f1174f77
EC
2024 return;
2025 }
f54c7898 2026 __mark_reg_unknown(env, regs + regno);
f1174f77
EC
2027}
2028
f54c7898
DB
2029static void __mark_reg_not_init(const struct bpf_verifier_env *env,
2030 struct bpf_reg_state *reg)
f1174f77 2031{
f54c7898 2032 __mark_reg_unknown(env, reg);
f1174f77
EC
2033 reg->type = NOT_INIT;
2034}
2035
61bd5218
JK
2036static void mark_reg_not_init(struct bpf_verifier_env *env,
2037 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
2038{
2039 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 2040 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
19ceb417
AS
2041 /* Something bad happened, let's kill all regs except FP */
2042 for (regno = 0; regno < BPF_REG_FP; regno++)
f54c7898 2043 __mark_reg_not_init(env, regs + regno);
f1174f77
EC
2044 return;
2045 }
f54c7898 2046 __mark_reg_not_init(env, regs + regno);
a9789ef9
DB
2047}
2048
41c48f3a
AI
2049static void mark_btf_ld_reg(struct bpf_verifier_env *env,
2050 struct bpf_reg_state *regs, u32 regno,
22dc4a0f 2051 enum bpf_reg_type reg_type,
c6f1bfe8
YS
2052 struct btf *btf, u32 btf_id,
2053 enum bpf_type_flag flag)
41c48f3a
AI
2054{
2055 if (reg_type == SCALAR_VALUE) {
2056 mark_reg_unknown(env, regs, regno);
2057 return;
2058 }
2059 mark_reg_known_zero(env, regs, regno);
c6f1bfe8 2060 regs[regno].type = PTR_TO_BTF_ID | flag;
22dc4a0f 2061 regs[regno].btf = btf;
41c48f3a
AI
2062 regs[regno].btf_id = btf_id;
2063}
2064
5327ed3d 2065#define DEF_NOT_SUBREG (0)
61bd5218 2066static void init_reg_state(struct bpf_verifier_env *env,
f4d7e40a 2067 struct bpf_func_state *state)
17a52670 2068{
f4d7e40a 2069 struct bpf_reg_state *regs = state->regs;
17a52670
AS
2070 int i;
2071
dc503a8a 2072 for (i = 0; i < MAX_BPF_REG; i++) {
61bd5218 2073 mark_reg_not_init(env, regs, i);
dc503a8a 2074 regs[i].live = REG_LIVE_NONE;
679c782d 2075 regs[i].parent = NULL;
5327ed3d 2076 regs[i].subreg_def = DEF_NOT_SUBREG;
dc503a8a 2077 }
17a52670
AS
2078
2079 /* frame pointer */
f1174f77 2080 regs[BPF_REG_FP].type = PTR_TO_STACK;
61bd5218 2081 mark_reg_known_zero(env, regs, BPF_REG_FP);
f4d7e40a 2082 regs[BPF_REG_FP].frameno = state->frameno;
6760bf2d
DB
2083}
2084
f4d7e40a
AS
2085#define BPF_MAIN_FUNC (-1)
2086static void init_func_state(struct bpf_verifier_env *env,
2087 struct bpf_func_state *state,
2088 int callsite, int frameno, int subprogno)
2089{
2090 state->callsite = callsite;
2091 state->frameno = frameno;
2092 state->subprogno = subprogno;
1bfe26fb 2093 state->callback_ret_range = tnum_range(0, 0);
f4d7e40a 2094 init_reg_state(env, state);
0f55f9ed 2095 mark_verifier_state_scratched(env);
f4d7e40a
AS
2096}
2097
bfc6bb74
AS
2098/* Similar to push_stack(), but for async callbacks */
2099static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2100 int insn_idx, int prev_insn_idx,
2101 int subprog)
2102{
2103 struct bpf_verifier_stack_elem *elem;
2104 struct bpf_func_state *frame;
2105
2106 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
2107 if (!elem)
2108 goto err;
2109
2110 elem->insn_idx = insn_idx;
2111 elem->prev_insn_idx = prev_insn_idx;
2112 elem->next = env->head;
2113 elem->log_pos = env->log.len_used;
2114 env->head = elem;
2115 env->stack_size++;
2116 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2117 verbose(env,
2118 "The sequence of %d jumps is too complex for async cb.\n",
2119 env->stack_size);
2120 goto err;
2121 }
2122 /* Unlike push_stack() do not copy_verifier_state().
2123 * The caller state doesn't matter.
2124 * This is async callback. It starts in a fresh stack.
2125 * Initialize it similar to do_check_common().
2126 */
2127 elem->st.branches = 1;
2128 frame = kzalloc(sizeof(*frame), GFP_KERNEL);
2129 if (!frame)
2130 goto err;
2131 init_func_state(env, frame,
2132 BPF_MAIN_FUNC /* callsite */,
2133 0 /* frameno within this callchain */,
2134 subprog /* subprog number within this prog */);
2135 elem->st.frame[0] = frame;
2136 return &elem->st;
2137err:
2138 free_verifier_state(env->cur_state, true);
2139 env->cur_state = NULL;
2140 /* pop all elements and return */
2141 while (!pop_stack(env, NULL, NULL, false));
2142 return NULL;
2143}
2144
2145
17a52670
AS
2146enum reg_arg_type {
2147 SRC_OP, /* register is used as source operand */
2148 DST_OP, /* register is used as destination operand */
2149 DST_OP_NO_MARK /* same as above, check only, don't mark */
2150};
2151
cc8b0b92
AS
2152static int cmp_subprogs(const void *a, const void *b)
2153{
9c8105bd
JW
2154 return ((struct bpf_subprog_info *)a)->start -
2155 ((struct bpf_subprog_info *)b)->start;
cc8b0b92
AS
2156}
2157
2158static int find_subprog(struct bpf_verifier_env *env, int off)
2159{
9c8105bd 2160 struct bpf_subprog_info *p;
cc8b0b92 2161
9c8105bd
JW
2162 p = bsearch(&off, env->subprog_info, env->subprog_cnt,
2163 sizeof(env->subprog_info[0]), cmp_subprogs);
cc8b0b92
AS
2164 if (!p)
2165 return -ENOENT;
9c8105bd 2166 return p - env->subprog_info;
cc8b0b92
AS
2167
2168}
2169
2170static int add_subprog(struct bpf_verifier_env *env, int off)
2171{
2172 int insn_cnt = env->prog->len;
2173 int ret;
2174
2175 if (off >= insn_cnt || off < 0) {
2176 verbose(env, "call to invalid destination\n");
2177 return -EINVAL;
2178 }
2179 ret = find_subprog(env, off);
2180 if (ret >= 0)
282a0f46 2181 return ret;
4cb3d99c 2182 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
cc8b0b92
AS
2183 verbose(env, "too many subprograms\n");
2184 return -E2BIG;
2185 }
e6ac2450 2186 /* determine subprog starts. The end is one before the next starts */
9c8105bd
JW
2187 env->subprog_info[env->subprog_cnt++].start = off;
2188 sort(env->subprog_info, env->subprog_cnt,
2189 sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
282a0f46 2190 return env->subprog_cnt - 1;
cc8b0b92
AS
2191}
2192
2357672c
KKD
2193#define MAX_KFUNC_DESCS 256
2194#define MAX_KFUNC_BTFS 256
2195
e6ac2450
MKL
2196struct bpf_kfunc_desc {
2197 struct btf_func_model func_model;
2198 u32 func_id;
2199 s32 imm;
2357672c
KKD
2200 u16 offset;
2201};
2202
2203struct bpf_kfunc_btf {
2204 struct btf *btf;
2205 struct module *module;
2206 u16 offset;
e6ac2450
MKL
2207};
2208
e6ac2450
MKL
2209struct bpf_kfunc_desc_tab {
2210 struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
2211 u32 nr_descs;
2212};
2213
2357672c
KKD
2214struct bpf_kfunc_btf_tab {
2215 struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2216 u32 nr_descs;
2217};
2218
2219static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
e6ac2450
MKL
2220{
2221 const struct bpf_kfunc_desc *d0 = a;
2222 const struct bpf_kfunc_desc *d1 = b;
2223
2224 /* func_id is not greater than BTF_MAX_TYPE */
2357672c
KKD
2225 return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2226}
2227
2228static int kfunc_btf_cmp_by_off(const void *a, const void *b)
2229{
2230 const struct bpf_kfunc_btf *d0 = a;
2231 const struct bpf_kfunc_btf *d1 = b;
2232
2233 return d0->offset - d1->offset;
e6ac2450
MKL
2234}
2235
2236static const struct bpf_kfunc_desc *
2357672c 2237find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
e6ac2450
MKL
2238{
2239 struct bpf_kfunc_desc desc = {
2240 .func_id = func_id,
2357672c 2241 .offset = offset,
e6ac2450
MKL
2242 };
2243 struct bpf_kfunc_desc_tab *tab;
2244
2245 tab = prog->aux->kfunc_tab;
2246 return bsearch(&desc, tab->descs, tab->nr_descs,
2357672c
KKD
2247 sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
2248}
2249
2250static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
b202d844 2251 s16 offset)
2357672c
KKD
2252{
2253 struct bpf_kfunc_btf kf_btf = { .offset = offset };
2254 struct bpf_kfunc_btf_tab *tab;
2255 struct bpf_kfunc_btf *b;
2256 struct module *mod;
2257 struct btf *btf;
2258 int btf_fd;
2259
2260 tab = env->prog->aux->kfunc_btf_tab;
2261 b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2262 sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2263 if (!b) {
2264 if (tab->nr_descs == MAX_KFUNC_BTFS) {
2265 verbose(env, "too many different module BTFs\n");
2266 return ERR_PTR(-E2BIG);
2267 }
2268
2269 if (bpfptr_is_null(env->fd_array)) {
2270 verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2271 return ERR_PTR(-EPROTO);
2272 }
2273
2274 if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2275 offset * sizeof(btf_fd),
2276 sizeof(btf_fd)))
2277 return ERR_PTR(-EFAULT);
2278
2279 btf = btf_get_by_fd(btf_fd);
588cd7ef
KKD
2280 if (IS_ERR(btf)) {
2281 verbose(env, "invalid module BTF fd specified\n");
2357672c 2282 return btf;
588cd7ef 2283 }
2357672c
KKD
2284
2285 if (!btf_is_module(btf)) {
2286 verbose(env, "BTF fd for kfunc is not a module BTF\n");
2287 btf_put(btf);
2288 return ERR_PTR(-EINVAL);
2289 }
2290
2291 mod = btf_try_get_module(btf);
2292 if (!mod) {
2293 btf_put(btf);
2294 return ERR_PTR(-ENXIO);
2295 }
2296
2297 b = &tab->descs[tab->nr_descs++];
2298 b->btf = btf;
2299 b->module = mod;
2300 b->offset = offset;
2301
2302 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2303 kfunc_btf_cmp_by_off, NULL);
2304 }
2357672c 2305 return b->btf;
e6ac2450
MKL
2306}
2307
2357672c
KKD
2308void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2309{
2310 if (!tab)
2311 return;
2312
2313 while (tab->nr_descs--) {
2314 module_put(tab->descs[tab->nr_descs].module);
2315 btf_put(tab->descs[tab->nr_descs].btf);
2316 }
2317 kfree(tab);
2318}
2319
43bf0878 2320static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2357672c 2321{
2357672c
KKD
2322 if (offset) {
2323 if (offset < 0) {
2324 /* In the future, this can be allowed to increase limit
2325 * of fd index into fd_array, interpreted as u16.
2326 */
2327 verbose(env, "negative offset disallowed for kernel module function call\n");
2328 return ERR_PTR(-EINVAL);
2329 }
2330
b202d844 2331 return __find_kfunc_desc_btf(env, offset);
2357672c
KKD
2332 }
2333 return btf_vmlinux ?: ERR_PTR(-ENOENT);
e6ac2450
MKL
2334}
2335
2357672c 2336static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
e6ac2450
MKL
2337{
2338 const struct btf_type *func, *func_proto;
2357672c 2339 struct bpf_kfunc_btf_tab *btf_tab;
e6ac2450
MKL
2340 struct bpf_kfunc_desc_tab *tab;
2341 struct bpf_prog_aux *prog_aux;
2342 struct bpf_kfunc_desc *desc;
2343 const char *func_name;
2357672c 2344 struct btf *desc_btf;
8cbf062a 2345 unsigned long call_imm;
e6ac2450
MKL
2346 unsigned long addr;
2347 int err;
2348
2349 prog_aux = env->prog->aux;
2350 tab = prog_aux->kfunc_tab;
2357672c 2351 btf_tab = prog_aux->kfunc_btf_tab;
e6ac2450
MKL
2352 if (!tab) {
2353 if (!btf_vmlinux) {
2354 verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2355 return -ENOTSUPP;
2356 }
2357
2358 if (!env->prog->jit_requested) {
2359 verbose(env, "JIT is required for calling kernel function\n");
2360 return -ENOTSUPP;
2361 }
2362
2363 if (!bpf_jit_supports_kfunc_call()) {
2364 verbose(env, "JIT does not support calling kernel function\n");
2365 return -ENOTSUPP;
2366 }
2367
2368 if (!env->prog->gpl_compatible) {
2369 verbose(env, "cannot call kernel function from non-GPL compatible program\n");
2370 return -EINVAL;
2371 }
2372
2373 tab = kzalloc(sizeof(*tab), GFP_KERNEL);
2374 if (!tab)
2375 return -ENOMEM;
2376 prog_aux->kfunc_tab = tab;
2377 }
2378
a5d82727
KKD
2379 /* func_id == 0 is always invalid, but instead of returning an error, be
2380 * conservative and wait until the code elimination pass before returning
2381 * error, so that invalid calls that get pruned out can be in BPF programs
2382 * loaded from userspace. It is also required that offset be untouched
2383 * for such calls.
2384 */
2385 if (!func_id && !offset)
2386 return 0;
2387
2357672c
KKD
2388 if (!btf_tab && offset) {
2389 btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
2390 if (!btf_tab)
2391 return -ENOMEM;
2392 prog_aux->kfunc_btf_tab = btf_tab;
2393 }
2394
43bf0878 2395 desc_btf = find_kfunc_desc_btf(env, offset);
2357672c
KKD
2396 if (IS_ERR(desc_btf)) {
2397 verbose(env, "failed to find BTF for kernel function\n");
2398 return PTR_ERR(desc_btf);
2399 }
2400
2401 if (find_kfunc_desc(env->prog, func_id, offset))
e6ac2450
MKL
2402 return 0;
2403
2404 if (tab->nr_descs == MAX_KFUNC_DESCS) {
2405 verbose(env, "too many different kernel function calls\n");
2406 return -E2BIG;
2407 }
2408
2357672c 2409 func = btf_type_by_id(desc_btf, func_id);
e6ac2450
MKL
2410 if (!func || !btf_type_is_func(func)) {
2411 verbose(env, "kernel btf_id %u is not a function\n",
2412 func_id);
2413 return -EINVAL;
2414 }
2357672c 2415 func_proto = btf_type_by_id(desc_btf, func->type);
e6ac2450
MKL
2416 if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2417 verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
2418 func_id);
2419 return -EINVAL;
2420 }
2421
2357672c 2422 func_name = btf_name_by_offset(desc_btf, func->name_off);
e6ac2450
MKL
2423 addr = kallsyms_lookup_name(func_name);
2424 if (!addr) {
2425 verbose(env, "cannot find address for kernel function %s\n",
2426 func_name);
2427 return -EINVAL;
2428 }
2429
8cbf062a
HT
2430 call_imm = BPF_CALL_IMM(addr);
2431 /* Check whether or not the relative offset overflows desc->imm */
2432 if ((unsigned long)(s32)call_imm != call_imm) {
2433 verbose(env, "address of kernel function %s is out of range\n",
2434 func_name);
2435 return -EINVAL;
2436 }
2437
3d76a4d3
SF
2438 if (bpf_dev_bound_kfunc_id(func_id)) {
2439 err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
2440 if (err)
2441 return err;
2442 }
2443
e6ac2450
MKL
2444 desc = &tab->descs[tab->nr_descs++];
2445 desc->func_id = func_id;
8cbf062a 2446 desc->imm = call_imm;
2357672c
KKD
2447 desc->offset = offset;
2448 err = btf_distill_func_proto(&env->log, desc_btf,
e6ac2450
MKL
2449 func_proto, func_name,
2450 &desc->func_model);
2451 if (!err)
2452 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2357672c 2453 kfunc_desc_cmp_by_id_off, NULL);
e6ac2450
MKL
2454 return err;
2455}
2456
2457static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
2458{
2459 const struct bpf_kfunc_desc *d0 = a;
2460 const struct bpf_kfunc_desc *d1 = b;
2461
2462 if (d0->imm > d1->imm)
2463 return 1;
2464 else if (d0->imm < d1->imm)
2465 return -1;
2466 return 0;
2467}
2468
2469static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
2470{
2471 struct bpf_kfunc_desc_tab *tab;
2472
2473 tab = prog->aux->kfunc_tab;
2474 if (!tab)
2475 return;
2476
2477 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2478 kfunc_desc_cmp_by_imm, NULL);
2479}
2480
2481bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2482{
2483 return !!prog->aux->kfunc_tab;
2484}
2485
2486const struct btf_func_model *
2487bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
2488 const struct bpf_insn *insn)
2489{
2490 const struct bpf_kfunc_desc desc = {
2491 .imm = insn->imm,
2492 };
2493 const struct bpf_kfunc_desc *res;
2494 struct bpf_kfunc_desc_tab *tab;
2495
2496 tab = prog->aux->kfunc_tab;
2497 res = bsearch(&desc, tab->descs, tab->nr_descs,
2498 sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
2499
2500 return res ? &res->func_model : NULL;
2501}
2502
2503static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
cc8b0b92 2504{
9c8105bd 2505 struct bpf_subprog_info *subprog = env->subprog_info;
cc8b0b92 2506 struct bpf_insn *insn = env->prog->insnsi;
e6ac2450 2507 int i, ret, insn_cnt = env->prog->len;
cc8b0b92 2508
f910cefa
JW
2509 /* Add entry function. */
2510 ret = add_subprog(env, 0);
e6ac2450 2511 if (ret)
f910cefa
JW
2512 return ret;
2513
e6ac2450
MKL
2514 for (i = 0; i < insn_cnt; i++, insn++) {
2515 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2516 !bpf_pseudo_kfunc_call(insn))
cc8b0b92 2517 continue;
e6ac2450 2518
2c78ee89 2519 if (!env->bpf_capable) {
e6ac2450 2520 verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
cc8b0b92
AS
2521 return -EPERM;
2522 }
e6ac2450 2523
3990ed4c 2524 if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
e6ac2450 2525 ret = add_subprog(env, i + insn->imm + 1);
3990ed4c 2526 else
2357672c 2527 ret = add_kfunc_call(env, insn->imm, insn->off);
e6ac2450 2528
cc8b0b92
AS
2529 if (ret < 0)
2530 return ret;
2531 }
2532
4cb3d99c
JW
2533 /* Add a fake 'exit' subprog which could simplify subprog iteration
2534 * logic. 'subprog_cnt' should not be increased.
2535 */
2536 subprog[env->subprog_cnt].start = insn_cnt;
2537
06ee7115 2538 if (env->log.level & BPF_LOG_LEVEL2)
cc8b0b92 2539 for (i = 0; i < env->subprog_cnt; i++)
9c8105bd 2540 verbose(env, "func#%d @%d\n", i, subprog[i].start);
cc8b0b92 2541
e6ac2450
MKL
2542 return 0;
2543}
2544
2545static int check_subprogs(struct bpf_verifier_env *env)
2546{
2547 int i, subprog_start, subprog_end, off, cur_subprog = 0;
2548 struct bpf_subprog_info *subprog = env->subprog_info;
2549 struct bpf_insn *insn = env->prog->insnsi;
2550 int insn_cnt = env->prog->len;
2551
cc8b0b92 2552 /* now check that all jumps are within the same subprog */
4cb3d99c
JW
2553 subprog_start = subprog[cur_subprog].start;
2554 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
2555 for (i = 0; i < insn_cnt; i++) {
2556 u8 code = insn[i].code;
2557
7f6e4312 2558 if (code == (BPF_JMP | BPF_CALL) &&
df2ccc18
IL
2559 insn[i].src_reg == 0 &&
2560 insn[i].imm == BPF_FUNC_tail_call)
7f6e4312 2561 subprog[cur_subprog].has_tail_call = true;
09b28d76
AS
2562 if (BPF_CLASS(code) == BPF_LD &&
2563 (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2564 subprog[cur_subprog].has_ld_abs = true;
092ed096 2565 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
cc8b0b92
AS
2566 goto next;
2567 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
2568 goto next;
2569 off = i + insn[i].off + 1;
2570 if (off < subprog_start || off >= subprog_end) {
2571 verbose(env, "jump out of range from insn %d to %d\n", i, off);
2572 return -EINVAL;
2573 }
2574next:
2575 if (i == subprog_end - 1) {
2576 /* to avoid fall-through from one subprog into another
2577 * the last insn of the subprog should be either exit
2578 * or unconditional jump back
2579 */
2580 if (code != (BPF_JMP | BPF_EXIT) &&
2581 code != (BPF_JMP | BPF_JA)) {
2582 verbose(env, "last insn is not an exit or jmp\n");
2583 return -EINVAL;
2584 }
2585 subprog_start = subprog_end;
4cb3d99c
JW
2586 cur_subprog++;
2587 if (cur_subprog < env->subprog_cnt)
9c8105bd 2588 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
2589 }
2590 }
2591 return 0;
2592}
2593
679c782d
EC
2594/* Parentage chain of this register (or stack slot) should take care of all
2595 * issues like callee-saved registers, stack slot allocation time, etc.
2596 */
f4d7e40a 2597static int mark_reg_read(struct bpf_verifier_env *env,
679c782d 2598 const struct bpf_reg_state *state,
5327ed3d 2599 struct bpf_reg_state *parent, u8 flag)
f4d7e40a
AS
2600{
2601 bool writes = parent == state->parent; /* Observe write marks */
06ee7115 2602 int cnt = 0;
dc503a8a
EC
2603
2604 while (parent) {
2605 /* if read wasn't screened by an earlier write ... */
679c782d 2606 if (writes && state->live & REG_LIVE_WRITTEN)
dc503a8a 2607 break;
9242b5f5
AS
2608 if (parent->live & REG_LIVE_DONE) {
2609 verbose(env, "verifier BUG type %s var_off %lld off %d\n",
c25b2ae1 2610 reg_type_str(env, parent->type),
9242b5f5
AS
2611 parent->var_off.value, parent->off);
2612 return -EFAULT;
2613 }
5327ed3d
JW
2614 /* The first condition is more likely to be true than the
2615 * second, checked it first.
2616 */
2617 if ((parent->live & REG_LIVE_READ) == flag ||
2618 parent->live & REG_LIVE_READ64)
25af32da
AS
2619 /* The parentage chain never changes and
2620 * this parent was already marked as LIVE_READ.
2621 * There is no need to keep walking the chain again and
2622 * keep re-marking all parents as LIVE_READ.
2623 * This case happens when the same register is read
2624 * multiple times without writes into it in-between.
5327ed3d
JW
2625 * Also, if parent has the stronger REG_LIVE_READ64 set,
2626 * then no need to set the weak REG_LIVE_READ32.
25af32da
AS
2627 */
2628 break;
dc503a8a 2629 /* ... then we depend on parent's value */
5327ed3d
JW
2630 parent->live |= flag;
2631 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
2632 if (flag == REG_LIVE_READ64)
2633 parent->live &= ~REG_LIVE_READ32;
dc503a8a
EC
2634 state = parent;
2635 parent = state->parent;
f4d7e40a 2636 writes = true;
06ee7115 2637 cnt++;
dc503a8a 2638 }
06ee7115
AS
2639
2640 if (env->longest_mark_read_walk < cnt)
2641 env->longest_mark_read_walk = cnt;
f4d7e40a 2642 return 0;
dc503a8a
EC
2643}
2644
d6fefa11
KKD
2645static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
2646{
2647 struct bpf_func_state *state = func(env, reg);
2648 int spi, ret;
2649
2650 /* For CONST_PTR_TO_DYNPTR, it must have already been done by
2651 * check_reg_arg in check_helper_call and mark_btf_func_reg_size in
2652 * check_kfunc_call.
2653 */
2654 if (reg->type == CONST_PTR_TO_DYNPTR)
2655 return 0;
79168a66
KKD
2656 spi = dynptr_get_spi(env, reg);
2657 if (spi < 0)
2658 return spi;
d6fefa11
KKD
2659 /* Caller ensures dynptr is valid and initialized, which means spi is in
2660 * bounds and spi is the first dynptr slot. Simply mark stack slot as
2661 * read.
2662 */
2663 ret = mark_reg_read(env, &state->stack[spi].spilled_ptr,
2664 state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64);
2665 if (ret)
2666 return ret;
2667 return mark_reg_read(env, &state->stack[spi - 1].spilled_ptr,
2668 state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64);
2669}
2670
5327ed3d
JW
2671/* This function is supposed to be used by the following 32-bit optimization
2672 * code only. It returns TRUE if the source or destination register operates
2673 * on 64-bit, otherwise return FALSE.
2674 */
2675static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
2676 u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
2677{
2678 u8 code, class, op;
2679
2680 code = insn->code;
2681 class = BPF_CLASS(code);
2682 op = BPF_OP(code);
2683 if (class == BPF_JMP) {
2684 /* BPF_EXIT for "main" will reach here. Return TRUE
2685 * conservatively.
2686 */
2687 if (op == BPF_EXIT)
2688 return true;
2689 if (op == BPF_CALL) {
2690 /* BPF to BPF call will reach here because of marking
2691 * caller saved clobber with DST_OP_NO_MARK for which we
2692 * don't care the register def because they are anyway
2693 * marked as NOT_INIT already.
2694 */
2695 if (insn->src_reg == BPF_PSEUDO_CALL)
2696 return false;
2697 /* Helper call will reach here because of arg type
2698 * check, conservatively return TRUE.
2699 */
2700 if (t == SRC_OP)
2701 return true;
2702
2703 return false;
2704 }
2705 }
2706
2707 if (class == BPF_ALU64 || class == BPF_JMP ||
2708 /* BPF_END always use BPF_ALU class. */
2709 (class == BPF_ALU && op == BPF_END && insn->imm == 64))
2710 return true;
2711
2712 if (class == BPF_ALU || class == BPF_JMP32)
2713 return false;
2714
2715 if (class == BPF_LDX) {
2716 if (t != SRC_OP)
2717 return BPF_SIZE(code) == BPF_DW;
2718 /* LDX source must be ptr. */
2719 return true;
2720 }
2721
2722 if (class == BPF_STX) {
83a28819
IL
2723 /* BPF_STX (including atomic variants) has multiple source
2724 * operands, one of which is a ptr. Check whether the caller is
2725 * asking about it.
2726 */
2727 if (t == SRC_OP && reg->type != SCALAR_VALUE)
5327ed3d
JW
2728 return true;
2729 return BPF_SIZE(code) == BPF_DW;
2730 }
2731
2732 if (class == BPF_LD) {
2733 u8 mode = BPF_MODE(code);
2734
2735 /* LD_IMM64 */
2736 if (mode == BPF_IMM)
2737 return true;
2738
2739 /* Both LD_IND and LD_ABS return 32-bit data. */
2740 if (t != SRC_OP)
2741 return false;
2742
2743 /* Implicit ctx ptr. */
2744 if (regno == BPF_REG_6)
2745 return true;
2746
2747 /* Explicit source could be any width. */
2748 return true;
2749 }
2750
2751 if (class == BPF_ST)
2752 /* The only source register for BPF_ST is a ptr. */
2753 return true;
2754
2755 /* Conservatively return true at default. */
2756 return true;
2757}
2758
83a28819
IL
2759/* Return the regno defined by the insn, or -1. */
2760static int insn_def_regno(const struct bpf_insn *insn)
b325fbca 2761{
83a28819
IL
2762 switch (BPF_CLASS(insn->code)) {
2763 case BPF_JMP:
2764 case BPF_JMP32:
2765 case BPF_ST:
2766 return -1;
2767 case BPF_STX:
2768 if (BPF_MODE(insn->code) == BPF_ATOMIC &&
2769 (insn->imm & BPF_FETCH)) {
2770 if (insn->imm == BPF_CMPXCHG)
2771 return BPF_REG_0;
2772 else
2773 return insn->src_reg;
2774 } else {
2775 return -1;
2776 }
2777 default:
2778 return insn->dst_reg;
2779 }
b325fbca
JW
2780}
2781
2782/* Return TRUE if INSN has defined any 32-bit value explicitly. */
2783static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
2784{
83a28819
IL
2785 int dst_reg = insn_def_regno(insn);
2786
2787 if (dst_reg == -1)
b325fbca
JW
2788 return false;
2789
83a28819 2790 return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
b325fbca
JW
2791}
2792
5327ed3d
JW
2793static void mark_insn_zext(struct bpf_verifier_env *env,
2794 struct bpf_reg_state *reg)
2795{
2796 s32 def_idx = reg->subreg_def;
2797
2798 if (def_idx == DEF_NOT_SUBREG)
2799 return;
2800
2801 env->insn_aux_data[def_idx - 1].zext_dst = true;
2802 /* The dst will be zero extended, so won't be sub-register anymore. */
2803 reg->subreg_def = DEF_NOT_SUBREG;
2804}
2805
dc503a8a 2806static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
17a52670
AS
2807 enum reg_arg_type t)
2808{
f4d7e40a
AS
2809 struct bpf_verifier_state *vstate = env->cur_state;
2810 struct bpf_func_state *state = vstate->frame[vstate->curframe];
5327ed3d 2811 struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
c342dc10 2812 struct bpf_reg_state *reg, *regs = state->regs;
5327ed3d 2813 bool rw64;
dc503a8a 2814
17a52670 2815 if (regno >= MAX_BPF_REG) {
61bd5218 2816 verbose(env, "R%d is invalid\n", regno);
17a52670
AS
2817 return -EINVAL;
2818 }
2819
0f55f9ed
CL
2820 mark_reg_scratched(env, regno);
2821
c342dc10 2822 reg = &regs[regno];
5327ed3d 2823 rw64 = is_reg64(env, insn, regno, reg, t);
17a52670
AS
2824 if (t == SRC_OP) {
2825 /* check whether register used as source operand can be read */
c342dc10 2826 if (reg->type == NOT_INIT) {
61bd5218 2827 verbose(env, "R%d !read_ok\n", regno);
17a52670
AS
2828 return -EACCES;
2829 }
679c782d 2830 /* We don't need to worry about FP liveness because it's read-only */
c342dc10
JW
2831 if (regno == BPF_REG_FP)
2832 return 0;
2833
5327ed3d
JW
2834 if (rw64)
2835 mark_insn_zext(env, reg);
2836
2837 return mark_reg_read(env, reg, reg->parent,
2838 rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
17a52670
AS
2839 } else {
2840 /* check whether register used as dest operand can be written to */
2841 if (regno == BPF_REG_FP) {
61bd5218 2842 verbose(env, "frame pointer is read only\n");
17a52670
AS
2843 return -EACCES;
2844 }
c342dc10 2845 reg->live |= REG_LIVE_WRITTEN;
5327ed3d 2846 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
17a52670 2847 if (t == DST_OP)
61bd5218 2848 mark_reg_unknown(env, regs, regno);
17a52670
AS
2849 }
2850 return 0;
2851}
2852
bffdeaa8
AN
2853static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
2854{
2855 env->insn_aux_data[idx].jmp_point = true;
2856}
2857
2858static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
2859{
2860 return env->insn_aux_data[insn_idx].jmp_point;
2861}
2862
b5dc0163
AS
2863/* for any branch, call, exit record the history of jmps in the given state */
2864static int push_jmp_history(struct bpf_verifier_env *env,
2865 struct bpf_verifier_state *cur)
2866{
2867 u32 cnt = cur->jmp_history_cnt;
2868 struct bpf_idx_pair *p;
ceb35b66 2869 size_t alloc_size;
b5dc0163 2870
bffdeaa8
AN
2871 if (!is_jmp_point(env, env->insn_idx))
2872 return 0;
2873
b5dc0163 2874 cnt++;
ceb35b66
KC
2875 alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
2876 p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
b5dc0163
AS
2877 if (!p)
2878 return -ENOMEM;
2879 p[cnt - 1].idx = env->insn_idx;
2880 p[cnt - 1].prev_idx = env->prev_insn_idx;
2881 cur->jmp_history = p;
2882 cur->jmp_history_cnt = cnt;
2883 return 0;
2884}
2885
2886/* Backtrack one insn at a time. If idx is not at the top of recorded
2887 * history then previous instruction came from straight line execution.
2888 */
2889static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
2890 u32 *history)
2891{
2892 u32 cnt = *history;
2893
2894 if (cnt && st->jmp_history[cnt - 1].idx == i) {
2895 i = st->jmp_history[cnt - 1].prev_idx;
2896 (*history)--;
2897 } else {
2898 i--;
2899 }
2900 return i;
2901}
2902
e6ac2450
MKL
2903static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
2904{
2905 const struct btf_type *func;
2357672c 2906 struct btf *desc_btf;
e6ac2450
MKL
2907
2908 if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
2909 return NULL;
2910
43bf0878 2911 desc_btf = find_kfunc_desc_btf(data, insn->off);
2357672c
KKD
2912 if (IS_ERR(desc_btf))
2913 return "<error>";
2914
2915 func = btf_type_by_id(desc_btf, insn->imm);
2916 return btf_name_by_offset(desc_btf, func->name_off);
e6ac2450
MKL
2917}
2918
b5dc0163
AS
2919/* For given verifier state backtrack_insn() is called from the last insn to
2920 * the first insn. Its purpose is to compute a bitmask of registers and
2921 * stack slots that needs precision in the parent verifier state.
2922 */
2923static int backtrack_insn(struct bpf_verifier_env *env, int idx,
2924 u32 *reg_mask, u64 *stack_mask)
2925{
2926 const struct bpf_insn_cbs cbs = {
e6ac2450 2927 .cb_call = disasm_kfunc_name,
b5dc0163
AS
2928 .cb_print = verbose,
2929 .private_data = env,
2930 };
2931 struct bpf_insn *insn = env->prog->insnsi + idx;
2932 u8 class = BPF_CLASS(insn->code);
2933 u8 opcode = BPF_OP(insn->code);
2934 u8 mode = BPF_MODE(insn->code);
2935 u32 dreg = 1u << insn->dst_reg;
2936 u32 sreg = 1u << insn->src_reg;
2937 u32 spi;
2938
2939 if (insn->code == 0)
2940 return 0;
496f3324 2941 if (env->log.level & BPF_LOG_LEVEL2) {
b5dc0163
AS
2942 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
2943 verbose(env, "%d: ", idx);
2944 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
2945 }
2946
2947 if (class == BPF_ALU || class == BPF_ALU64) {
2948 if (!(*reg_mask & dreg))
2949 return 0;
2950 if (opcode == BPF_MOV) {
2951 if (BPF_SRC(insn->code) == BPF_X) {
2952 /* dreg = sreg
2953 * dreg needs precision after this insn
2954 * sreg needs precision before this insn
2955 */
2956 *reg_mask &= ~dreg;
2957 *reg_mask |= sreg;
2958 } else {
2959 /* dreg = K
2960 * dreg needs precision after this insn.
2961 * Corresponding register is already marked
2962 * as precise=true in this verifier state.
2963 * No further markings in parent are necessary
2964 */
2965 *reg_mask &= ~dreg;
2966 }
2967 } else {
2968 if (BPF_SRC(insn->code) == BPF_X) {
2969 /* dreg += sreg
2970 * both dreg and sreg need precision
2971 * before this insn
2972 */
2973 *reg_mask |= sreg;
2974 } /* else dreg += K
2975 * dreg still needs precision before this insn
2976 */
2977 }
2978 } else if (class == BPF_LDX) {
2979 if (!(*reg_mask & dreg))
2980 return 0;
2981 *reg_mask &= ~dreg;
2982
2983 /* scalars can only be spilled into stack w/o losing precision.
2984 * Load from any other memory can be zero extended.
2985 * The desire to keep that precision is already indicated
2986 * by 'precise' mark in corresponding register of this state.
2987 * No further tracking necessary.
2988 */
2989 if (insn->src_reg != BPF_REG_FP)
2990 return 0;
b5dc0163
AS
2991
2992 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
2993 * that [fp - off] slot contains scalar that needs to be
2994 * tracked with precision
2995 */
2996 spi = (-insn->off - 1) / BPF_REG_SIZE;
2997 if (spi >= 64) {
2998 verbose(env, "BUG spi %d\n", spi);
2999 WARN_ONCE(1, "verifier backtracking bug");
3000 return -EFAULT;
3001 }
3002 *stack_mask |= 1ull << spi;
b3b50f05 3003 } else if (class == BPF_STX || class == BPF_ST) {
b5dc0163 3004 if (*reg_mask & dreg)
b3b50f05 3005 /* stx & st shouldn't be using _scalar_ dst_reg
b5dc0163
AS
3006 * to access memory. It means backtracking
3007 * encountered a case of pointer subtraction.
3008 */
3009 return -ENOTSUPP;
3010 /* scalars can only be spilled into stack */
3011 if (insn->dst_reg != BPF_REG_FP)
3012 return 0;
b5dc0163
AS
3013 spi = (-insn->off - 1) / BPF_REG_SIZE;
3014 if (spi >= 64) {
3015 verbose(env, "BUG spi %d\n", spi);
3016 WARN_ONCE(1, "verifier backtracking bug");
3017 return -EFAULT;
3018 }
3019 if (!(*stack_mask & (1ull << spi)))
3020 return 0;
3021 *stack_mask &= ~(1ull << spi);
b3b50f05
AN
3022 if (class == BPF_STX)
3023 *reg_mask |= sreg;
b5dc0163
AS
3024 } else if (class == BPF_JMP || class == BPF_JMP32) {
3025 if (opcode == BPF_CALL) {
3026 if (insn->src_reg == BPF_PSEUDO_CALL)
3027 return -ENOTSUPP;
be2ef816
AN
3028 /* BPF helpers that invoke callback subprogs are
3029 * equivalent to BPF_PSEUDO_CALL above
3030 */
3031 if (insn->src_reg == 0 && is_callback_calling_function(insn->imm))
3032 return -ENOTSUPP;
d3178e8a
HS
3033 /* kfunc with imm==0 is invalid and fixup_kfunc_call will
3034 * catch this error later. Make backtracking conservative
3035 * with ENOTSUPP.
3036 */
3037 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
3038 return -ENOTSUPP;
b5dc0163
AS
3039 /* regular helper call sets R0 */
3040 *reg_mask &= ~1;
3041 if (*reg_mask & 0x3f) {
3042 /* if backtracing was looking for registers R1-R5
3043 * they should have been found already.
3044 */
3045 verbose(env, "BUG regs %x\n", *reg_mask);
3046 WARN_ONCE(1, "verifier backtracking bug");
3047 return -EFAULT;
3048 }
3049 } else if (opcode == BPF_EXIT) {
3050 return -ENOTSUPP;
3051 }
3052 } else if (class == BPF_LD) {
3053 if (!(*reg_mask & dreg))
3054 return 0;
3055 *reg_mask &= ~dreg;
3056 /* It's ld_imm64 or ld_abs or ld_ind.
3057 * For ld_imm64 no further tracking of precision
3058 * into parent is necessary
3059 */
3060 if (mode == BPF_IND || mode == BPF_ABS)
3061 /* to be analyzed */
3062 return -ENOTSUPP;
b5dc0163
AS
3063 }
3064 return 0;
3065}
3066
3067/* the scalar precision tracking algorithm:
3068 * . at the start all registers have precise=false.
3069 * . scalar ranges are tracked as normal through alu and jmp insns.
3070 * . once precise value of the scalar register is used in:
3071 * . ptr + scalar alu
3072 * . if (scalar cond K|scalar)
3073 * . helper_call(.., scalar, ...) where ARG_CONST is expected
3074 * backtrack through the verifier states and mark all registers and
3075 * stack slots with spilled constants that these scalar regisers
3076 * should be precise.
3077 * . during state pruning two registers (or spilled stack slots)
3078 * are equivalent if both are not precise.
3079 *
3080 * Note the verifier cannot simply walk register parentage chain,
3081 * since many different registers and stack slots could have been
3082 * used to compute single precise scalar.
3083 *
3084 * The approach of starting with precise=true for all registers and then
3085 * backtrack to mark a register as not precise when the verifier detects
3086 * that program doesn't care about specific value (e.g., when helper
3087 * takes register as ARG_ANYTHING parameter) is not safe.
3088 *
3089 * It's ok to walk single parentage chain of the verifier states.
3090 * It's possible that this backtracking will go all the way till 1st insn.
3091 * All other branches will be explored for needing precision later.
3092 *
3093 * The backtracking needs to deal with cases like:
3094 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
3095 * r9 -= r8
3096 * r5 = r9
3097 * if r5 > 0x79f goto pc+7
3098 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
3099 * r5 += 1
3100 * ...
3101 * call bpf_perf_event_output#25
3102 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
3103 *
3104 * and this case:
3105 * r6 = 1
3106 * call foo // uses callee's r6 inside to compute r0
3107 * r0 += r6
3108 * if r0 == 0 goto
3109 *
3110 * to track above reg_mask/stack_mask needs to be independent for each frame.
3111 *
3112 * Also if parent's curframe > frame where backtracking started,
3113 * the verifier need to mark registers in both frames, otherwise callees
3114 * may incorrectly prune callers. This is similar to
3115 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
3116 *
3117 * For now backtracking falls back into conservative marking.
3118 */
3119static void mark_all_scalars_precise(struct bpf_verifier_env *env,
3120 struct bpf_verifier_state *st)
3121{
3122 struct bpf_func_state *func;
3123 struct bpf_reg_state *reg;
3124 int i, j;
3125
3126 /* big hammer: mark all scalars precise in this path.
3127 * pop_stack may still get !precise scalars.
f63181b6
AN
3128 * We also skip current state and go straight to first parent state,
3129 * because precision markings in current non-checkpointed state are
3130 * not needed. See why in the comment in __mark_chain_precision below.
b5dc0163 3131 */
f63181b6 3132 for (st = st->parent; st; st = st->parent) {
b5dc0163
AS
3133 for (i = 0; i <= st->curframe; i++) {
3134 func = st->frame[i];
3135 for (j = 0; j < BPF_REG_FP; j++) {
3136 reg = &func->regs[j];
3137 if (reg->type != SCALAR_VALUE)
3138 continue;
3139 reg->precise = true;
3140 }
3141 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
27113c59 3142 if (!is_spilled_reg(&func->stack[j]))
b5dc0163
AS
3143 continue;
3144 reg = &func->stack[j].spilled_ptr;
3145 if (reg->type != SCALAR_VALUE)
3146 continue;
3147 reg->precise = true;
3148 }
3149 }
f63181b6 3150 }
b5dc0163
AS
3151}
3152
7a830b53
AN
3153static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
3154{
3155 struct bpf_func_state *func;
3156 struct bpf_reg_state *reg;
3157 int i, j;
3158
3159 for (i = 0; i <= st->curframe; i++) {
3160 func = st->frame[i];
3161 for (j = 0; j < BPF_REG_FP; j++) {
3162 reg = &func->regs[j];
3163 if (reg->type != SCALAR_VALUE)
3164 continue;
3165 reg->precise = false;
3166 }
3167 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
3168 if (!is_spilled_reg(&func->stack[j]))
3169 continue;
3170 reg = &func->stack[j].spilled_ptr;
3171 if (reg->type != SCALAR_VALUE)
3172 continue;
3173 reg->precise = false;
3174 }
3175 }
3176}
3177
f63181b6
AN
3178/*
3179 * __mark_chain_precision() backtracks BPF program instruction sequence and
3180 * chain of verifier states making sure that register *regno* (if regno >= 0)
3181 * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
3182 * SCALARS, as well as any other registers and slots that contribute to
3183 * a tracked state of given registers/stack slots, depending on specific BPF
3184 * assembly instructions (see backtrack_insns() for exact instruction handling
3185 * logic). This backtracking relies on recorded jmp_history and is able to
3186 * traverse entire chain of parent states. This process ends only when all the
3187 * necessary registers/slots and their transitive dependencies are marked as
3188 * precise.
3189 *
3190 * One important and subtle aspect is that precise marks *do not matter* in
3191 * the currently verified state (current state). It is important to understand
3192 * why this is the case.
3193 *
3194 * First, note that current state is the state that is not yet "checkpointed",
3195 * i.e., it is not yet put into env->explored_states, and it has no children
3196 * states as well. It's ephemeral, and can end up either a) being discarded if
3197 * compatible explored state is found at some point or BPF_EXIT instruction is
3198 * reached or b) checkpointed and put into env->explored_states, branching out
3199 * into one or more children states.
3200 *
3201 * In the former case, precise markings in current state are completely
3202 * ignored by state comparison code (see regsafe() for details). Only
3203 * checkpointed ("old") state precise markings are important, and if old
3204 * state's register/slot is precise, regsafe() assumes current state's
3205 * register/slot as precise and checks value ranges exactly and precisely. If
3206 * states turn out to be compatible, current state's necessary precise
3207 * markings and any required parent states' precise markings are enforced
3208 * after the fact with propagate_precision() logic, after the fact. But it's
3209 * important to realize that in this case, even after marking current state
3210 * registers/slots as precise, we immediately discard current state. So what
3211 * actually matters is any of the precise markings propagated into current
3212 * state's parent states, which are always checkpointed (due to b) case above).
3213 * As such, for scenario a) it doesn't matter if current state has precise
3214 * markings set or not.
3215 *
3216 * Now, for the scenario b), checkpointing and forking into child(ren)
3217 * state(s). Note that before current state gets to checkpointing step, any
3218 * processed instruction always assumes precise SCALAR register/slot
3219 * knowledge: if precise value or range is useful to prune jump branch, BPF
3220 * verifier takes this opportunity enthusiastically. Similarly, when
3221 * register's value is used to calculate offset or memory address, exact
3222 * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
3223 * what we mentioned above about state comparison ignoring precise markings
3224 * during state comparison, BPF verifier ignores and also assumes precise
3225 * markings *at will* during instruction verification process. But as verifier
3226 * assumes precision, it also propagates any precision dependencies across
3227 * parent states, which are not yet finalized, so can be further restricted
3228 * based on new knowledge gained from restrictions enforced by their children
3229 * states. This is so that once those parent states are finalized, i.e., when
3230 * they have no more active children state, state comparison logic in
3231 * is_state_visited() would enforce strict and precise SCALAR ranges, if
3232 * required for correctness.
3233 *
3234 * To build a bit more intuition, note also that once a state is checkpointed,
3235 * the path we took to get to that state is not important. This is crucial
3236 * property for state pruning. When state is checkpointed and finalized at
3237 * some instruction index, it can be correctly and safely used to "short
3238 * circuit" any *compatible* state that reaches exactly the same instruction
3239 * index. I.e., if we jumped to that instruction from a completely different
3240 * code path than original finalized state was derived from, it doesn't
3241 * matter, current state can be discarded because from that instruction
3242 * forward having a compatible state will ensure we will safely reach the
3243 * exit. States describe preconditions for further exploration, but completely
3244 * forget the history of how we got here.
3245 *
3246 * This also means that even if we needed precise SCALAR range to get to
3247 * finalized state, but from that point forward *that same* SCALAR register is
3248 * never used in a precise context (i.e., it's precise value is not needed for
3249 * correctness), it's correct and safe to mark such register as "imprecise"
3250 * (i.e., precise marking set to false). This is what we rely on when we do
3251 * not set precise marking in current state. If no child state requires
3252 * precision for any given SCALAR register, it's safe to dictate that it can
3253 * be imprecise. If any child state does require this register to be precise,
3254 * we'll mark it precise later retroactively during precise markings
3255 * propagation from child state to parent states.
7a830b53
AN
3256 *
3257 * Skipping precise marking setting in current state is a mild version of
3258 * relying on the above observation. But we can utilize this property even
3259 * more aggressively by proactively forgetting any precise marking in the
3260 * current state (which we inherited from the parent state), right before we
3261 * checkpoint it and branch off into new child state. This is done by
3262 * mark_all_scalars_imprecise() to hopefully get more permissive and generic
3263 * finalized states which help in short circuiting more future states.
f63181b6 3264 */
529409ea 3265static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno,
a3ce685d 3266 int spi)
b5dc0163
AS
3267{
3268 struct bpf_verifier_state *st = env->cur_state;
3269 int first_idx = st->first_insn_idx;
3270 int last_idx = env->insn_idx;
3271 struct bpf_func_state *func;
3272 struct bpf_reg_state *reg;
a3ce685d
AS
3273 u32 reg_mask = regno >= 0 ? 1u << regno : 0;
3274 u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
b5dc0163 3275 bool skip_first = true;
a3ce685d 3276 bool new_marks = false;
b5dc0163
AS
3277 int i, err;
3278
2c78ee89 3279 if (!env->bpf_capable)
b5dc0163
AS
3280 return 0;
3281
f63181b6
AN
3282 /* Do sanity checks against current state of register and/or stack
3283 * slot, but don't set precise flag in current state, as precision
3284 * tracking in the current state is unnecessary.
3285 */
529409ea 3286 func = st->frame[frame];
a3ce685d
AS
3287 if (regno >= 0) {
3288 reg = &func->regs[regno];
3289 if (reg->type != SCALAR_VALUE) {
3290 WARN_ONCE(1, "backtracing misuse");
3291 return -EFAULT;
3292 }
f63181b6 3293 new_marks = true;
b5dc0163 3294 }
b5dc0163 3295
a3ce685d 3296 while (spi >= 0) {
27113c59 3297 if (!is_spilled_reg(&func->stack[spi])) {
a3ce685d
AS
3298 stack_mask = 0;
3299 break;
3300 }
3301 reg = &func->stack[spi].spilled_ptr;
3302 if (reg->type != SCALAR_VALUE) {
3303 stack_mask = 0;
3304 break;
3305 }
f63181b6 3306 new_marks = true;
a3ce685d
AS
3307 break;
3308 }
3309
3310 if (!new_marks)
3311 return 0;
3312 if (!reg_mask && !stack_mask)
3313 return 0;
be2ef816 3314
b5dc0163
AS
3315 for (;;) {
3316 DECLARE_BITMAP(mask, 64);
b5dc0163
AS
3317 u32 history = st->jmp_history_cnt;
3318
496f3324 3319 if (env->log.level & BPF_LOG_LEVEL2)
b5dc0163 3320 verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
be2ef816
AN
3321
3322 if (last_idx < 0) {
3323 /* we are at the entry into subprog, which
3324 * is expected for global funcs, but only if
3325 * requested precise registers are R1-R5
3326 * (which are global func's input arguments)
3327 */
3328 if (st->curframe == 0 &&
3329 st->frame[0]->subprogno > 0 &&
3330 st->frame[0]->callsite == BPF_MAIN_FUNC &&
3331 stack_mask == 0 && (reg_mask & ~0x3e) == 0) {
3332 bitmap_from_u64(mask, reg_mask);
3333 for_each_set_bit(i, mask, 32) {
3334 reg = &st->frame[0]->regs[i];
3335 if (reg->type != SCALAR_VALUE) {
3336 reg_mask &= ~(1u << i);
3337 continue;
3338 }
3339 reg->precise = true;
3340 }
3341 return 0;
3342 }
3343
3344 verbose(env, "BUG backtracing func entry subprog %d reg_mask %x stack_mask %llx\n",
3345 st->frame[0]->subprogno, reg_mask, stack_mask);
3346 WARN_ONCE(1, "verifier backtracking bug");
3347 return -EFAULT;
3348 }
3349
b5dc0163
AS
3350 for (i = last_idx;;) {
3351 if (skip_first) {
3352 err = 0;
3353 skip_first = false;
3354 } else {
3355 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
3356 }
3357 if (err == -ENOTSUPP) {
3358 mark_all_scalars_precise(env, st);
3359 return 0;
3360 } else if (err) {
3361 return err;
3362 }
3363 if (!reg_mask && !stack_mask)
3364 /* Found assignment(s) into tracked register in this state.
3365 * Since this state is already marked, just return.
3366 * Nothing to be tracked further in the parent state.
3367 */
3368 return 0;
3369 if (i == first_idx)
3370 break;
3371 i = get_prev_insn_idx(st, i, &history);
3372 if (i >= env->prog->len) {
3373 /* This can happen if backtracking reached insn 0
3374 * and there are still reg_mask or stack_mask
3375 * to backtrack.
3376 * It means the backtracking missed the spot where
3377 * particular register was initialized with a constant.
3378 */
3379 verbose(env, "BUG backtracking idx %d\n", i);
3380 WARN_ONCE(1, "verifier backtracking bug");
3381 return -EFAULT;
3382 }
3383 }
3384 st = st->parent;
3385 if (!st)
3386 break;
3387
a3ce685d 3388 new_marks = false;
529409ea 3389 func = st->frame[frame];
b5dc0163
AS
3390 bitmap_from_u64(mask, reg_mask);
3391 for_each_set_bit(i, mask, 32) {
3392 reg = &func->regs[i];
a3ce685d
AS
3393 if (reg->type != SCALAR_VALUE) {
3394 reg_mask &= ~(1u << i);
b5dc0163 3395 continue;
a3ce685d 3396 }
b5dc0163
AS
3397 if (!reg->precise)
3398 new_marks = true;
3399 reg->precise = true;
3400 }
3401
3402 bitmap_from_u64(mask, stack_mask);
3403 for_each_set_bit(i, mask, 64) {
3404 if (i >= func->allocated_stack / BPF_REG_SIZE) {
2339cd6c
AS
3405 /* the sequence of instructions:
3406 * 2: (bf) r3 = r10
3407 * 3: (7b) *(u64 *)(r3 -8) = r0
3408 * 4: (79) r4 = *(u64 *)(r10 -8)
3409 * doesn't contain jmps. It's backtracked
3410 * as a single block.
3411 * During backtracking insn 3 is not recognized as
3412 * stack access, so at the end of backtracking
3413 * stack slot fp-8 is still marked in stack_mask.
3414 * However the parent state may not have accessed
3415 * fp-8 and it's "unallocated" stack space.
3416 * In such case fallback to conservative.
b5dc0163 3417 */
2339cd6c
AS
3418 mark_all_scalars_precise(env, st);
3419 return 0;
b5dc0163
AS
3420 }
3421
27113c59 3422 if (!is_spilled_reg(&func->stack[i])) {
a3ce685d 3423 stack_mask &= ~(1ull << i);
b5dc0163 3424 continue;
a3ce685d 3425 }
b5dc0163 3426 reg = &func->stack[i].spilled_ptr;
a3ce685d
AS
3427 if (reg->type != SCALAR_VALUE) {
3428 stack_mask &= ~(1ull << i);
b5dc0163 3429 continue;
a3ce685d 3430 }
b5dc0163
AS
3431 if (!reg->precise)
3432 new_marks = true;
3433 reg->precise = true;
3434 }
496f3324 3435 if (env->log.level & BPF_LOG_LEVEL2) {
2e576648 3436 verbose(env, "parent %s regs=%x stack=%llx marks:",
b5dc0163
AS
3437 new_marks ? "didn't have" : "already had",
3438 reg_mask, stack_mask);
2e576648 3439 print_verifier_state(env, func, true);
b5dc0163
AS
3440 }
3441
a3ce685d
AS
3442 if (!reg_mask && !stack_mask)
3443 break;
b5dc0163
AS
3444 if (!new_marks)
3445 break;
3446
3447 last_idx = st->last_insn_idx;
3448 first_idx = st->first_insn_idx;
3449 }
3450 return 0;
3451}
3452
eb1f7f71 3453int mark_chain_precision(struct bpf_verifier_env *env, int regno)
a3ce685d 3454{
529409ea 3455 return __mark_chain_precision(env, env->cur_state->curframe, regno, -1);
a3ce685d
AS
3456}
3457
529409ea 3458static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno)
a3ce685d 3459{
529409ea 3460 return __mark_chain_precision(env, frame, regno, -1);
a3ce685d
AS
3461}
3462
529409ea 3463static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi)
a3ce685d 3464{
529409ea 3465 return __mark_chain_precision(env, frame, -1, spi);
a3ce685d 3466}
b5dc0163 3467
1be7f75d
AS
3468static bool is_spillable_regtype(enum bpf_reg_type type)
3469{
c25b2ae1 3470 switch (base_type(type)) {
1be7f75d 3471 case PTR_TO_MAP_VALUE:
1be7f75d
AS
3472 case PTR_TO_STACK:
3473 case PTR_TO_CTX:
969bf05e 3474 case PTR_TO_PACKET:
de8f3a83 3475 case PTR_TO_PACKET_META:
969bf05e 3476 case PTR_TO_PACKET_END:
d58e468b 3477 case PTR_TO_FLOW_KEYS:
1be7f75d 3478 case CONST_PTR_TO_MAP:
c64b7983 3479 case PTR_TO_SOCKET:
46f8bc92 3480 case PTR_TO_SOCK_COMMON:
655a51e5 3481 case PTR_TO_TCP_SOCK:
fada7fdc 3482 case PTR_TO_XDP_SOCK:
65726b5b 3483 case PTR_TO_BTF_ID:
20b2aff4 3484 case PTR_TO_BUF:
744ea4e3 3485 case PTR_TO_MEM:
69c087ba
YS
3486 case PTR_TO_FUNC:
3487 case PTR_TO_MAP_KEY:
1be7f75d
AS
3488 return true;
3489 default:
3490 return false;
3491 }
3492}
3493
cc2b14d5
AS
3494/* Does this register contain a constant zero? */
3495static bool register_is_null(struct bpf_reg_state *reg)
3496{
3497 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
3498}
3499
f7cf25b2
AS
3500static bool register_is_const(struct bpf_reg_state *reg)
3501{
3502 return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
3503}
3504
5689d49b
YS
3505static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
3506{
3507 return tnum_is_unknown(reg->var_off) &&
3508 reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
3509 reg->umin_value == 0 && reg->umax_value == U64_MAX &&
3510 reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
3511 reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
3512}
3513
3514static bool register_is_bounded(struct bpf_reg_state *reg)
3515{
3516 return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
3517}
3518
6e7e63cb
JH
3519static bool __is_pointer_value(bool allow_ptr_leaks,
3520 const struct bpf_reg_state *reg)
3521{
3522 if (allow_ptr_leaks)
3523 return false;
3524
3525 return reg->type != SCALAR_VALUE;
3526}
3527
71f656a5
EZ
3528/* Copy src state preserving dst->parent and dst->live fields */
3529static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
3530{
3531 struct bpf_reg_state *parent = dst->parent;
3532 enum bpf_reg_liveness live = dst->live;
3533
3534 *dst = *src;
3535 dst->parent = parent;
3536 dst->live = live;
3537}
3538
f7cf25b2 3539static void save_register_state(struct bpf_func_state *state,
354e8f19
MKL
3540 int spi, struct bpf_reg_state *reg,
3541 int size)
f7cf25b2
AS
3542{
3543 int i;
3544
71f656a5 3545 copy_register_state(&state->stack[spi].spilled_ptr, reg);
354e8f19
MKL
3546 if (size == BPF_REG_SIZE)
3547 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
f7cf25b2 3548
354e8f19
MKL
3549 for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
3550 state->stack[spi].slot_type[i - 1] = STACK_SPILL;
f7cf25b2 3551
354e8f19
MKL
3552 /* size < 8 bytes spill */
3553 for (; i; i--)
3554 scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
f7cf25b2
AS
3555}
3556
ecdf985d
EZ
3557static bool is_bpf_st_mem(struct bpf_insn *insn)
3558{
3559 return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
3560}
3561
01f810ac 3562/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
17a52670
AS
3563 * stack boundary and alignment are checked in check_mem_access()
3564 */
01f810ac
AM
3565static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
3566 /* stack frame we're writing to */
3567 struct bpf_func_state *state,
3568 int off, int size, int value_regno,
3569 int insn_idx)
17a52670 3570{
f4d7e40a 3571 struct bpf_func_state *cur; /* state of the current function */
638f5b90 3572 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
ecdf985d 3573 struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
f7cf25b2 3574 struct bpf_reg_state *reg = NULL;
ecdf985d 3575 u32 dst_reg = insn->dst_reg;
638f5b90 3576
c69431aa 3577 err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
638f5b90
AS
3578 if (err)
3579 return err;
9c399760
AS
3580 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
3581 * so it's aligned access and [off, off + size) are within stack limits
3582 */
638f5b90
AS
3583 if (!env->allow_ptr_leaks &&
3584 state->stack[spi].slot_type[0] == STACK_SPILL &&
3585 size != BPF_REG_SIZE) {
3586 verbose(env, "attempt to corrupt spilled pointer on stack\n");
3587 return -EACCES;
3588 }
17a52670 3589
f4d7e40a 3590 cur = env->cur_state->frame[env->cur_state->curframe];
f7cf25b2
AS
3591 if (value_regno >= 0)
3592 reg = &cur->regs[value_regno];
2039f26f
DB
3593 if (!env->bypass_spec_v4) {
3594 bool sanitize = reg && is_spillable_regtype(reg->type);
3595
3596 for (i = 0; i < size; i++) {
e4f4db47
LG
3597 u8 type = state->stack[spi].slot_type[i];
3598
3599 if (type != STACK_MISC && type != STACK_ZERO) {
2039f26f
DB
3600 sanitize = true;
3601 break;
3602 }
3603 }
3604
3605 if (sanitize)
3606 env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
3607 }
17a52670 3608
ef8fc7a0
KKD
3609 err = destroy_if_dynptr_stack_slot(env, state, spi);
3610 if (err)
3611 return err;
3612
0f55f9ed 3613 mark_stack_slot_scratched(env, spi);
354e8f19 3614 if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
2c78ee89 3615 !register_is_null(reg) && env->bpf_capable) {
b5dc0163
AS
3616 if (dst_reg != BPF_REG_FP) {
3617 /* The backtracking logic can only recognize explicit
3618 * stack slot address like [fp - 8]. Other spill of
8fb33b60 3619 * scalar via different register has to be conservative.
b5dc0163
AS
3620 * Backtrack from here and mark all registers as precise
3621 * that contributed into 'reg' being a constant.
3622 */
3623 err = mark_chain_precision(env, value_regno);
3624 if (err)
3625 return err;
3626 }
354e8f19 3627 save_register_state(state, spi, reg, size);
ecdf985d
EZ
3628 } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
3629 insn->imm != 0 && env->bpf_capable) {
3630 struct bpf_reg_state fake_reg = {};
3631
3632 __mark_reg_known(&fake_reg, (u32)insn->imm);
3633 fake_reg.type = SCALAR_VALUE;
3634 save_register_state(state, spi, &fake_reg, size);
f7cf25b2 3635 } else if (reg && is_spillable_regtype(reg->type)) {
17a52670 3636 /* register containing pointer is being spilled into stack */
9c399760 3637 if (size != BPF_REG_SIZE) {
f7cf25b2 3638 verbose_linfo(env, insn_idx, "; ");
61bd5218 3639 verbose(env, "invalid size of register spill\n");
17a52670
AS
3640 return -EACCES;
3641 }
f7cf25b2 3642 if (state != cur && reg->type == PTR_TO_STACK) {
f4d7e40a
AS
3643 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
3644 return -EINVAL;
3645 }
354e8f19 3646 save_register_state(state, spi, reg, size);
9c399760 3647 } else {
cc2b14d5
AS
3648 u8 type = STACK_MISC;
3649
679c782d
EC
3650 /* regular write of data into stack destroys any spilled ptr */
3651 state->stack[spi].spilled_ptr.type = NOT_INIT;
0bae2d4d 3652 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
27113c59 3653 if (is_spilled_reg(&state->stack[spi]))
0bae2d4d 3654 for (i = 0; i < BPF_REG_SIZE; i++)
354e8f19 3655 scrub_spilled_slot(&state->stack[spi].slot_type[i]);
9c399760 3656
cc2b14d5
AS
3657 /* only mark the slot as written if all 8 bytes were written
3658 * otherwise read propagation may incorrectly stop too soon
3659 * when stack slots are partially written.
3660 * This heuristic means that read propagation will be
3661 * conservative, since it will add reg_live_read marks
3662 * to stack slots all the way to first state when programs
3663 * writes+reads less than 8 bytes
3664 */
3665 if (size == BPF_REG_SIZE)
3666 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
3667
3668 /* when we zero initialize stack slots mark them as such */
ecdf985d
EZ
3669 if ((reg && register_is_null(reg)) ||
3670 (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
b5dc0163
AS
3671 /* backtracking doesn't work for STACK_ZERO yet. */
3672 err = mark_chain_precision(env, value_regno);
3673 if (err)
3674 return err;
cc2b14d5 3675 type = STACK_ZERO;
b5dc0163 3676 }
cc2b14d5 3677
0bae2d4d 3678 /* Mark slots affected by this stack write. */
9c399760 3679 for (i = 0; i < size; i++)
638f5b90 3680 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
cc2b14d5 3681 type;
17a52670
AS
3682 }
3683 return 0;
3684}
3685
01f810ac
AM
3686/* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
3687 * known to contain a variable offset.
3688 * This function checks whether the write is permitted and conservatively
3689 * tracks the effects of the write, considering that each stack slot in the
3690 * dynamic range is potentially written to.
3691 *
3692 * 'off' includes 'regno->off'.
3693 * 'value_regno' can be -1, meaning that an unknown value is being written to
3694 * the stack.
3695 *
3696 * Spilled pointers in range are not marked as written because we don't know
3697 * what's going to be actually written. This means that read propagation for
3698 * future reads cannot be terminated by this write.
3699 *
3700 * For privileged programs, uninitialized stack slots are considered
3701 * initialized by this write (even though we don't know exactly what offsets
3702 * are going to be written to). The idea is that we don't want the verifier to
3703 * reject future reads that access slots written to through variable offsets.
3704 */
3705static int check_stack_write_var_off(struct bpf_verifier_env *env,
3706 /* func where register points to */
3707 struct bpf_func_state *state,
3708 int ptr_regno, int off, int size,
3709 int value_regno, int insn_idx)
3710{
3711 struct bpf_func_state *cur; /* state of the current function */
3712 int min_off, max_off;
3713 int i, err;
3714 struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
31ff2135 3715 struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
01f810ac
AM
3716 bool writing_zero = false;
3717 /* set if the fact that we're writing a zero is used to let any
3718 * stack slots remain STACK_ZERO
3719 */
3720 bool zero_used = false;
3721
3722 cur = env->cur_state->frame[env->cur_state->curframe];
3723 ptr_reg = &cur->regs[ptr_regno];
3724 min_off = ptr_reg->smin_value + off;
3725 max_off = ptr_reg->smax_value + off + size;
3726 if (value_regno >= 0)
3727 value_reg = &cur->regs[value_regno];
31ff2135
EZ
3728 if ((value_reg && register_is_null(value_reg)) ||
3729 (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
01f810ac
AM
3730 writing_zero = true;
3731
c69431aa 3732 err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
01f810ac
AM
3733 if (err)
3734 return err;
3735
ef8fc7a0
KKD
3736 for (i = min_off; i < max_off; i++) {
3737 int spi;
3738
3739 spi = __get_spi(i);
3740 err = destroy_if_dynptr_stack_slot(env, state, spi);
3741 if (err)
3742 return err;
3743 }
01f810ac
AM
3744
3745 /* Variable offset writes destroy any spilled pointers in range. */
3746 for (i = min_off; i < max_off; i++) {
3747 u8 new_type, *stype;
3748 int slot, spi;
3749
3750 slot = -i - 1;
3751 spi = slot / BPF_REG_SIZE;
3752 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
0f55f9ed 3753 mark_stack_slot_scratched(env, spi);
01f810ac 3754
f5e477a8
KKD
3755 if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
3756 /* Reject the write if range we may write to has not
3757 * been initialized beforehand. If we didn't reject
3758 * here, the ptr status would be erased below (even
3759 * though not all slots are actually overwritten),
3760 * possibly opening the door to leaks.
3761 *
3762 * We do however catch STACK_INVALID case below, and
3763 * only allow reading possibly uninitialized memory
3764 * later for CAP_PERFMON, as the write may not happen to
3765 * that slot.
01f810ac
AM
3766 */
3767 verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3768 insn_idx, i);
3769 return -EINVAL;
3770 }
3771
3772 /* Erase all spilled pointers. */
3773 state->stack[spi].spilled_ptr.type = NOT_INIT;
3774
3775 /* Update the slot type. */
3776 new_type = STACK_MISC;
3777 if (writing_zero && *stype == STACK_ZERO) {
3778 new_type = STACK_ZERO;
3779 zero_used = true;
3780 }
3781 /* If the slot is STACK_INVALID, we check whether it's OK to
3782 * pretend that it will be initialized by this write. The slot
3783 * might not actually be written to, and so if we mark it as
3784 * initialized future reads might leak uninitialized memory.
3785 * For privileged programs, we will accept such reads to slots
3786 * that may or may not be written because, if we're reject
3787 * them, the error would be too confusing.
3788 */
3789 if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
3790 verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
3791 insn_idx, i);
3792 return -EINVAL;
3793 }
3794 *stype = new_type;
3795 }
3796 if (zero_used) {
3797 /* backtracking doesn't work for STACK_ZERO yet. */
3798 err = mark_chain_precision(env, value_regno);
3799 if (err)
3800 return err;
3801 }
3802 return 0;
3803}
3804
3805/* When register 'dst_regno' is assigned some values from stack[min_off,
3806 * max_off), we set the register's type according to the types of the
3807 * respective stack slots. If all the stack values are known to be zeros, then
3808 * so is the destination reg. Otherwise, the register is considered to be
3809 * SCALAR. This function does not deal with register filling; the caller must
3810 * ensure that all spilled registers in the stack range have been marked as
3811 * read.
3812 */
3813static void mark_reg_stack_read(struct bpf_verifier_env *env,
3814 /* func where src register points to */
3815 struct bpf_func_state *ptr_state,
3816 int min_off, int max_off, int dst_regno)
3817{
3818 struct bpf_verifier_state *vstate = env->cur_state;
3819 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3820 int i, slot, spi;
3821 u8 *stype;
3822 int zeros = 0;
3823
3824 for (i = min_off; i < max_off; i++) {
3825 slot = -i - 1;
3826 spi = slot / BPF_REG_SIZE;
3827 stype = ptr_state->stack[spi].slot_type;
3828 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3829 break;
3830 zeros++;
3831 }
3832 if (zeros == max_off - min_off) {
3833 /* any access_size read into register is zero extended,
3834 * so the whole register == const_zero
3835 */
3836 __mark_reg_const_zero(&state->regs[dst_regno]);
3837 /* backtracking doesn't support STACK_ZERO yet,
3838 * so mark it precise here, so that later
3839 * backtracking can stop here.
3840 * Backtracking may not need this if this register
3841 * doesn't participate in pointer adjustment.
3842 * Forward propagation of precise flag is not
3843 * necessary either. This mark is only to stop
3844 * backtracking. Any register that contributed
3845 * to const 0 was marked precise before spill.
3846 */
3847 state->regs[dst_regno].precise = true;
3848 } else {
3849 /* have read misc data from the stack */
3850 mark_reg_unknown(env, state->regs, dst_regno);
3851 }
3852 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3853}
3854
3855/* Read the stack at 'off' and put the results into the register indicated by
3856 * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3857 * spilled reg.
3858 *
3859 * 'dst_regno' can be -1, meaning that the read value is not going to a
3860 * register.
3861 *
3862 * The access is assumed to be within the current stack bounds.
3863 */
3864static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3865 /* func where src register points to */
3866 struct bpf_func_state *reg_state,
3867 int off, int size, int dst_regno)
17a52670 3868{
f4d7e40a
AS
3869 struct bpf_verifier_state *vstate = env->cur_state;
3870 struct bpf_func_state *state = vstate->frame[vstate->curframe];
638f5b90 3871 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
f7cf25b2 3872 struct bpf_reg_state *reg;
354e8f19 3873 u8 *stype, type;
17a52670 3874
f4d7e40a 3875 stype = reg_state->stack[spi].slot_type;
f7cf25b2 3876 reg = &reg_state->stack[spi].spilled_ptr;
17a52670 3877
27113c59 3878 if (is_spilled_reg(&reg_state->stack[spi])) {
f30d4968
MKL
3879 u8 spill_size = 1;
3880
3881 for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3882 spill_size++;
354e8f19 3883
f30d4968 3884 if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
f7cf25b2
AS
3885 if (reg->type != SCALAR_VALUE) {
3886 verbose_linfo(env, env->insn_idx, "; ");
3887 verbose(env, "invalid size of register fill\n");
3888 return -EACCES;
3889 }
354e8f19
MKL
3890
3891 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3892 if (dst_regno < 0)
3893 return 0;
3894
f30d4968 3895 if (!(off % BPF_REG_SIZE) && size == spill_size) {
354e8f19
MKL
3896 /* The earlier check_reg_arg() has decided the
3897 * subreg_def for this insn. Save it first.
3898 */
3899 s32 subreg_def = state->regs[dst_regno].subreg_def;
3900
71f656a5 3901 copy_register_state(&state->regs[dst_regno], reg);
354e8f19
MKL
3902 state->regs[dst_regno].subreg_def = subreg_def;
3903 } else {
3904 for (i = 0; i < size; i++) {
3905 type = stype[(slot - i) % BPF_REG_SIZE];
3906 if (type == STACK_SPILL)
3907 continue;
3908 if (type == STACK_MISC)
3909 continue;
6715df8d
EZ
3910 if (type == STACK_INVALID && env->allow_uninit_stack)
3911 continue;
354e8f19
MKL
3912 verbose(env, "invalid read from stack off %d+%d size %d\n",
3913 off, i, size);
3914 return -EACCES;
3915 }
01f810ac 3916 mark_reg_unknown(env, state->regs, dst_regno);
f7cf25b2 3917 }
354e8f19 3918 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
f7cf25b2 3919 return 0;
17a52670 3920 }
17a52670 3921
01f810ac 3922 if (dst_regno >= 0) {
17a52670 3923 /* restore register state from stack */
71f656a5 3924 copy_register_state(&state->regs[dst_regno], reg);
2f18f62e
AS
3925 /* mark reg as written since spilled pointer state likely
3926 * has its liveness marks cleared by is_state_visited()
3927 * which resets stack/reg liveness for state transitions
3928 */
01f810ac 3929 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
6e7e63cb 3930 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
01f810ac 3931 /* If dst_regno==-1, the caller is asking us whether
6e7e63cb
JH
3932 * it is acceptable to use this value as a SCALAR_VALUE
3933 * (e.g. for XADD).
3934 * We must not allow unprivileged callers to do that
3935 * with spilled pointers.
3936 */
3937 verbose(env, "leaking pointer from stack off %d\n",
3938 off);
3939 return -EACCES;
dc503a8a 3940 }
f7cf25b2 3941 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
17a52670
AS
3942 } else {
3943 for (i = 0; i < size; i++) {
01f810ac
AM
3944 type = stype[(slot - i) % BPF_REG_SIZE];
3945 if (type == STACK_MISC)
cc2b14d5 3946 continue;
01f810ac 3947 if (type == STACK_ZERO)
cc2b14d5 3948 continue;
6715df8d
EZ
3949 if (type == STACK_INVALID && env->allow_uninit_stack)
3950 continue;
cc2b14d5
AS
3951 verbose(env, "invalid read from stack off %d+%d size %d\n",
3952 off, i, size);
3953 return -EACCES;
3954 }
f7cf25b2 3955 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
01f810ac
AM
3956 if (dst_regno >= 0)
3957 mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
17a52670 3958 }
f7cf25b2 3959 return 0;
17a52670
AS
3960}
3961
61df10c7 3962enum bpf_access_src {
01f810ac
AM
3963 ACCESS_DIRECT = 1, /* the access is performed by an instruction */
3964 ACCESS_HELPER = 2, /* the access is performed by a helper */
3965};
3966
3967static int check_stack_range_initialized(struct bpf_verifier_env *env,
3968 int regno, int off, int access_size,
3969 bool zero_size_allowed,
61df10c7 3970 enum bpf_access_src type,
01f810ac
AM
3971 struct bpf_call_arg_meta *meta);
3972
3973static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3974{
3975 return cur_regs(env) + regno;
3976}
3977
3978/* Read the stack at 'ptr_regno + off' and put the result into the register
3979 * 'dst_regno'.
3980 * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
3981 * but not its variable offset.
3982 * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3983 *
3984 * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3985 * filling registers (i.e. reads of spilled register cannot be detected when
3986 * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3987 * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
3988 * offset; for a fixed offset check_stack_read_fixed_off should be used
3989 * instead.
3990 */
3991static int check_stack_read_var_off(struct bpf_verifier_env *env,
3992 int ptr_regno, int off, int size, int dst_regno)
e4298d25 3993{
01f810ac
AM
3994 /* The state of the source register. */
3995 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3996 struct bpf_func_state *ptr_state = func(env, reg);
3997 int err;
3998 int min_off, max_off;
3999
4000 /* Note that we pass a NULL meta, so raw access will not be permitted.
e4298d25 4001 */
01f810ac
AM
4002 err = check_stack_range_initialized(env, ptr_regno, off, size,
4003 false, ACCESS_DIRECT, NULL);
4004 if (err)
4005 return err;
4006
4007 min_off = reg->smin_value + off;
4008 max_off = reg->smax_value + off;
4009 mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
4010 return 0;
4011}
4012
4013/* check_stack_read dispatches to check_stack_read_fixed_off or
4014 * check_stack_read_var_off.
4015 *
4016 * The caller must ensure that the offset falls within the allocated stack
4017 * bounds.
4018 *
4019 * 'dst_regno' is a register which will receive the value from the stack. It
4020 * can be -1, meaning that the read value is not going to a register.
4021 */
4022static int check_stack_read(struct bpf_verifier_env *env,
4023 int ptr_regno, int off, int size,
4024 int dst_regno)
4025{
4026 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4027 struct bpf_func_state *state = func(env, reg);
4028 int err;
4029 /* Some accesses are only permitted with a static offset. */
4030 bool var_off = !tnum_is_const(reg->var_off);
4031
4032 /* The offset is required to be static when reads don't go to a
4033 * register, in order to not leak pointers (see
4034 * check_stack_read_fixed_off).
4035 */
4036 if (dst_regno < 0 && var_off) {
e4298d25
DB
4037 char tn_buf[48];
4038
4039 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
01f810ac 4040 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
e4298d25
DB
4041 tn_buf, off, size);
4042 return -EACCES;
4043 }
01f810ac
AM
4044 /* Variable offset is prohibited for unprivileged mode for simplicity
4045 * since it requires corresponding support in Spectre masking for stack
4046 * ALU. See also retrieve_ptr_limit().
4047 */
4048 if (!env->bypass_spec_v1 && var_off) {
4049 char tn_buf[48];
e4298d25 4050
01f810ac
AM
4051 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4052 verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
4053 ptr_regno, tn_buf);
e4298d25
DB
4054 return -EACCES;
4055 }
4056
01f810ac
AM
4057 if (!var_off) {
4058 off += reg->var_off.value;
4059 err = check_stack_read_fixed_off(env, state, off, size,
4060 dst_regno);
4061 } else {
4062 /* Variable offset stack reads need more conservative handling
4063 * than fixed offset ones. Note that dst_regno >= 0 on this
4064 * branch.
4065 */
4066 err = check_stack_read_var_off(env, ptr_regno, off, size,
4067 dst_regno);
4068 }
4069 return err;
4070}
4071
4072
4073/* check_stack_write dispatches to check_stack_write_fixed_off or
4074 * check_stack_write_var_off.
4075 *
4076 * 'ptr_regno' is the register used as a pointer into the stack.
4077 * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
4078 * 'value_regno' is the register whose value we're writing to the stack. It can
4079 * be -1, meaning that we're not writing from a register.
4080 *
4081 * The caller must ensure that the offset falls within the maximum stack size.
4082 */
4083static int check_stack_write(struct bpf_verifier_env *env,
4084 int ptr_regno, int off, int size,
4085 int value_regno, int insn_idx)
4086{
4087 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4088 struct bpf_func_state *state = func(env, reg);
4089 int err;
4090
4091 if (tnum_is_const(reg->var_off)) {
4092 off += reg->var_off.value;
4093 err = check_stack_write_fixed_off(env, state, off, size,
4094 value_regno, insn_idx);
4095 } else {
4096 /* Variable offset stack reads need more conservative handling
4097 * than fixed offset ones.
4098 */
4099 err = check_stack_write_var_off(env, state,
4100 ptr_regno, off, size,
4101 value_regno, insn_idx);
4102 }
4103 return err;
e4298d25
DB
4104}
4105
591fe988
DB
4106static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
4107 int off, int size, enum bpf_access_type type)
4108{
4109 struct bpf_reg_state *regs = cur_regs(env);
4110 struct bpf_map *map = regs[regno].map_ptr;
4111 u32 cap = bpf_map_flags_to_cap(map);
4112
4113 if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
4114 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
4115 map->value_size, off, size);
4116 return -EACCES;
4117 }
4118
4119 if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
4120 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
4121 map->value_size, off, size);
4122 return -EACCES;
4123 }
4124
4125 return 0;
4126}
4127
457f4436
AN
4128/* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
4129static int __check_mem_access(struct bpf_verifier_env *env, int regno,
4130 int off, int size, u32 mem_size,
4131 bool zero_size_allowed)
17a52670 4132{
457f4436
AN
4133 bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
4134 struct bpf_reg_state *reg;
4135
4136 if (off >= 0 && size_ok && (u64)off + size <= mem_size)
4137 return 0;
17a52670 4138
457f4436
AN
4139 reg = &cur_regs(env)[regno];
4140 switch (reg->type) {
69c087ba
YS
4141 case PTR_TO_MAP_KEY:
4142 verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
4143 mem_size, off, size);
4144 break;
457f4436 4145 case PTR_TO_MAP_VALUE:
61bd5218 4146 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
457f4436
AN
4147 mem_size, off, size);
4148 break;
4149 case PTR_TO_PACKET:
4150 case PTR_TO_PACKET_META:
4151 case PTR_TO_PACKET_END:
4152 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
4153 off, size, regno, reg->id, off, mem_size);
4154 break;
4155 case PTR_TO_MEM:
4156 default:
4157 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
4158 mem_size, off, size);
17a52670 4159 }
457f4436
AN
4160
4161 return -EACCES;
17a52670
AS
4162}
4163
457f4436
AN
4164/* check read/write into a memory region with possible variable offset */
4165static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
4166 int off, int size, u32 mem_size,
4167 bool zero_size_allowed)
dbcfe5f7 4168{
f4d7e40a
AS
4169 struct bpf_verifier_state *vstate = env->cur_state;
4170 struct bpf_func_state *state = vstate->frame[vstate->curframe];
dbcfe5f7
GB
4171 struct bpf_reg_state *reg = &state->regs[regno];
4172 int err;
4173
457f4436 4174 /* We may have adjusted the register pointing to memory region, so we
f1174f77
EC
4175 * need to try adding each of min_value and max_value to off
4176 * to make sure our theoretical access will be safe.
2e576648
CL
4177 *
4178 * The minimum value is only important with signed
dbcfe5f7
GB
4179 * comparisons where we can't assume the floor of a
4180 * value is 0. If we are using signed variables for our
4181 * index'es we need to make sure that whatever we use
4182 * will have a set floor within our range.
4183 */
b7137c4e
DB
4184 if (reg->smin_value < 0 &&
4185 (reg->smin_value == S64_MIN ||
4186 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
4187 reg->smin_value + off < 0)) {
61bd5218 4188 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
dbcfe5f7
GB
4189 regno);
4190 return -EACCES;
4191 }
457f4436
AN
4192 err = __check_mem_access(env, regno, reg->smin_value + off, size,
4193 mem_size, zero_size_allowed);
dbcfe5f7 4194 if (err) {
457f4436 4195 verbose(env, "R%d min value is outside of the allowed memory range\n",
61bd5218 4196 regno);
dbcfe5f7
GB
4197 return err;
4198 }
4199
b03c9f9f
EC
4200 /* If we haven't set a max value then we need to bail since we can't be
4201 * sure we won't do bad things.
4202 * If reg->umax_value + off could overflow, treat that as unbounded too.
dbcfe5f7 4203 */
b03c9f9f 4204 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
457f4436 4205 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
dbcfe5f7
GB
4206 regno);
4207 return -EACCES;
4208 }
457f4436
AN
4209 err = __check_mem_access(env, regno, reg->umax_value + off, size,
4210 mem_size, zero_size_allowed);
4211 if (err) {
4212 verbose(env, "R%d max value is outside of the allowed memory range\n",
61bd5218 4213 regno);
457f4436
AN
4214 return err;
4215 }
4216
4217 return 0;
4218}
d83525ca 4219
e9147b44
KKD
4220static int __check_ptr_off_reg(struct bpf_verifier_env *env,
4221 const struct bpf_reg_state *reg, int regno,
4222 bool fixed_off_ok)
4223{
4224 /* Access to this pointer-typed register or passing it to a helper
4225 * is only allowed in its original, unmodified form.
4226 */
4227
4228 if (reg->off < 0) {
4229 verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
4230 reg_type_str(env, reg->type), regno, reg->off);
4231 return -EACCES;
4232 }
4233
4234 if (!fixed_off_ok && reg->off) {
4235 verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
4236 reg_type_str(env, reg->type), regno, reg->off);
4237 return -EACCES;
4238 }
4239
4240 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4241 char tn_buf[48];
4242
4243 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4244 verbose(env, "variable %s access var_off=%s disallowed\n",
4245 reg_type_str(env, reg->type), tn_buf);
4246 return -EACCES;
4247 }
4248
4249 return 0;
4250}
4251
4252int check_ptr_off_reg(struct bpf_verifier_env *env,
4253 const struct bpf_reg_state *reg, int regno)
4254{
4255 return __check_ptr_off_reg(env, reg, regno, false);
4256}
4257
61df10c7 4258static int map_kptr_match_type(struct bpf_verifier_env *env,
aa3496ac 4259 struct btf_field *kptr_field,
61df10c7
KKD
4260 struct bpf_reg_state *reg, u32 regno)
4261{
aa3496ac 4262 const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
20c09d92 4263 int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
61df10c7
KKD
4264 const char *reg_name = "";
4265
6efe152d 4266 /* Only unreferenced case accepts untrusted pointers */
aa3496ac 4267 if (kptr_field->type == BPF_KPTR_UNREF)
6efe152d
KKD
4268 perm_flags |= PTR_UNTRUSTED;
4269
4270 if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
61df10c7
KKD
4271 goto bad_type;
4272
4273 if (!btf_is_kernel(reg->btf)) {
4274 verbose(env, "R%d must point to kernel BTF\n", regno);
4275 return -EINVAL;
4276 }
4277 /* We need to verify reg->type and reg->btf, before accessing reg->btf */
4278 reg_name = kernel_type_name(reg->btf, reg->btf_id);
4279
c0a5a21c
KKD
4280 /* For ref_ptr case, release function check should ensure we get one
4281 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
4282 * normal store of unreferenced kptr, we must ensure var_off is zero.
4283 * Since ref_ptr cannot be accessed directly by BPF insns, checks for
4284 * reg->off and reg->ref_obj_id are not needed here.
4285 */
61df10c7
KKD
4286 if (__check_ptr_off_reg(env, reg, regno, true))
4287 return -EACCES;
4288
4289 /* A full type match is needed, as BTF can be vmlinux or module BTF, and
4290 * we also need to take into account the reg->off.
4291 *
4292 * We want to support cases like:
4293 *
4294 * struct foo {
4295 * struct bar br;
4296 * struct baz bz;
4297 * };
4298 *
4299 * struct foo *v;
4300 * v = func(); // PTR_TO_BTF_ID
4301 * val->foo = v; // reg->off is zero, btf and btf_id match type
4302 * val->bar = &v->br; // reg->off is still zero, but we need to retry with
4303 * // first member type of struct after comparison fails
4304 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
4305 * // to match type
4306 *
4307 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
2ab3b380
KKD
4308 * is zero. We must also ensure that btf_struct_ids_match does not walk
4309 * the struct to match type against first member of struct, i.e. reject
4310 * second case from above. Hence, when type is BPF_KPTR_REF, we set
4311 * strict mode to true for type match.
61df10c7
KKD
4312 */
4313 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
aa3496ac
KKD
4314 kptr_field->kptr.btf, kptr_field->kptr.btf_id,
4315 kptr_field->type == BPF_KPTR_REF))
61df10c7
KKD
4316 goto bad_type;
4317 return 0;
4318bad_type:
4319 verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
4320 reg_type_str(env, reg->type), reg_name);
6efe152d 4321 verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
aa3496ac 4322 if (kptr_field->type == BPF_KPTR_UNREF)
6efe152d
KKD
4323 verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
4324 targ_name);
4325 else
4326 verbose(env, "\n");
61df10c7
KKD
4327 return -EINVAL;
4328}
4329
20c09d92
AS
4330/* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
4331 * can dereference RCU protected pointers and result is PTR_TRUSTED.
4332 */
4333static bool in_rcu_cs(struct bpf_verifier_env *env)
4334{
4335 return env->cur_state->active_rcu_lock || !env->prog->aux->sleepable;
4336}
4337
4338/* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
4339BTF_SET_START(rcu_protected_types)
4340BTF_ID(struct, prog_test_ref_kfunc)
4341BTF_ID(struct, cgroup)
4342BTF_SET_END(rcu_protected_types)
4343
4344static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
4345{
4346 if (!btf_is_kernel(btf))
4347 return false;
4348 return btf_id_set_contains(&rcu_protected_types, btf_id);
4349}
4350
4351static bool rcu_safe_kptr(const struct btf_field *field)
4352{
4353 const struct btf_field_kptr *kptr = &field->kptr;
4354
4355 return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id);
4356}
4357
61df10c7
KKD
4358static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
4359 int value_regno, int insn_idx,
aa3496ac 4360 struct btf_field *kptr_field)
61df10c7
KKD
4361{
4362 struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4363 int class = BPF_CLASS(insn->code);
4364 struct bpf_reg_state *val_reg;
4365
4366 /* Things we already checked for in check_map_access and caller:
4367 * - Reject cases where variable offset may touch kptr
4368 * - size of access (must be BPF_DW)
4369 * - tnum_is_const(reg->var_off)
aa3496ac 4370 * - kptr_field->offset == off + reg->var_off.value
61df10c7
KKD
4371 */
4372 /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
4373 if (BPF_MODE(insn->code) != BPF_MEM) {
4374 verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
4375 return -EACCES;
4376 }
4377
6efe152d
KKD
4378 /* We only allow loading referenced kptr, since it will be marked as
4379 * untrusted, similar to unreferenced kptr.
4380 */
aa3496ac 4381 if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) {
6efe152d 4382 verbose(env, "store to referenced kptr disallowed\n");
c0a5a21c
KKD
4383 return -EACCES;
4384 }
4385
61df10c7
KKD
4386 if (class == BPF_LDX) {
4387 val_reg = reg_state(env, value_regno);
4388 /* We can simply mark the value_regno receiving the pointer
4389 * value from map as PTR_TO_BTF_ID, with the correct type.
4390 */
aa3496ac 4391 mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
20c09d92
AS
4392 kptr_field->kptr.btf_id,
4393 rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ?
4394 PTR_MAYBE_NULL | MEM_RCU :
4395 PTR_MAYBE_NULL | PTR_UNTRUSTED);
61df10c7
KKD
4396 /* For mark_ptr_or_null_reg */
4397 val_reg->id = ++env->id_gen;
4398 } else if (class == BPF_STX) {
4399 val_reg = reg_state(env, value_regno);
4400 if (!register_is_null(val_reg) &&
aa3496ac 4401 map_kptr_match_type(env, kptr_field, val_reg, value_regno))
61df10c7
KKD
4402 return -EACCES;
4403 } else if (class == BPF_ST) {
4404 if (insn->imm) {
4405 verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
aa3496ac 4406 kptr_field->offset);
61df10c7
KKD
4407 return -EACCES;
4408 }
4409 } else {
4410 verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
4411 return -EACCES;
4412 }
4413 return 0;
4414}
4415
457f4436
AN
4416/* check read/write into a map element with possible variable offset */
4417static int check_map_access(struct bpf_verifier_env *env, u32 regno,
61df10c7
KKD
4418 int off, int size, bool zero_size_allowed,
4419 enum bpf_access_src src)
457f4436
AN
4420{
4421 struct bpf_verifier_state *vstate = env->cur_state;
4422 struct bpf_func_state *state = vstate->frame[vstate->curframe];
4423 struct bpf_reg_state *reg = &state->regs[regno];
4424 struct bpf_map *map = reg->map_ptr;
aa3496ac
KKD
4425 struct btf_record *rec;
4426 int err, i;
457f4436
AN
4427
4428 err = check_mem_region_access(env, regno, off, size, map->value_size,
4429 zero_size_allowed);
4430 if (err)
4431 return err;
4432
aa3496ac
KKD
4433 if (IS_ERR_OR_NULL(map->record))
4434 return 0;
4435 rec = map->record;
4436 for (i = 0; i < rec->cnt; i++) {
4437 struct btf_field *field = &rec->fields[i];
4438 u32 p = field->offset;
d83525ca 4439
db559117
KKD
4440 /* If any part of a field can be touched by load/store, reject
4441 * this program. To check that [x1, x2) overlaps with [y1, y2),
d83525ca
AS
4442 * it is sufficient to check x1 < y2 && y1 < x2.
4443 */
aa3496ac
KKD
4444 if (reg->smin_value + off < p + btf_field_type_size(field->type) &&
4445 p < reg->umax_value + off + size) {
4446 switch (field->type) {
4447 case BPF_KPTR_UNREF:
4448 case BPF_KPTR_REF:
61df10c7
KKD
4449 if (src != ACCESS_DIRECT) {
4450 verbose(env, "kptr cannot be accessed indirectly by helper\n");
4451 return -EACCES;
4452 }
4453 if (!tnum_is_const(reg->var_off)) {
4454 verbose(env, "kptr access cannot have variable offset\n");
4455 return -EACCES;
4456 }
4457 if (p != off + reg->var_off.value) {
4458 verbose(env, "kptr access misaligned expected=%u off=%llu\n",
4459 p, off + reg->var_off.value);
4460 return -EACCES;
4461 }
4462 if (size != bpf_size_to_bytes(BPF_DW)) {
4463 verbose(env, "kptr access size must be BPF_DW\n");
4464 return -EACCES;
4465 }
4466 break;
aa3496ac 4467 default:
db559117
KKD
4468 verbose(env, "%s cannot be accessed directly by load/store\n",
4469 btf_field_type_name(field->type));
aa3496ac 4470 return -EACCES;
61df10c7
KKD
4471 }
4472 }
4473 }
aa3496ac 4474 return 0;
dbcfe5f7
GB
4475}
4476
969bf05e
AS
4477#define MAX_PACKET_OFF 0xffff
4478
58e2af8b 4479static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3a0af8fd
TG
4480 const struct bpf_call_arg_meta *meta,
4481 enum bpf_access_type t)
4acf6c0b 4482{
7e40781c
UP
4483 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
4484
4485 switch (prog_type) {
5d66fa7d 4486 /* Program types only with direct read access go here! */
3a0af8fd
TG
4487 case BPF_PROG_TYPE_LWT_IN:
4488 case BPF_PROG_TYPE_LWT_OUT:
004d4b27 4489 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2dbb9b9e 4490 case BPF_PROG_TYPE_SK_REUSEPORT:
5d66fa7d 4491 case BPF_PROG_TYPE_FLOW_DISSECTOR:
d5563d36 4492 case BPF_PROG_TYPE_CGROUP_SKB:
3a0af8fd
TG
4493 if (t == BPF_WRITE)
4494 return false;
8731745e 4495 fallthrough;
5d66fa7d
DB
4496
4497 /* Program types with direct read + write access go here! */
36bbef52
DB
4498 case BPF_PROG_TYPE_SCHED_CLS:
4499 case BPF_PROG_TYPE_SCHED_ACT:
4acf6c0b 4500 case BPF_PROG_TYPE_XDP:
3a0af8fd 4501 case BPF_PROG_TYPE_LWT_XMIT:
8a31db56 4502 case BPF_PROG_TYPE_SK_SKB:
4f738adb 4503 case BPF_PROG_TYPE_SK_MSG:
36bbef52
DB
4504 if (meta)
4505 return meta->pkt_access;
4506
4507 env->seen_direct_write = true;
4acf6c0b 4508 return true;
0d01da6a
SF
4509
4510 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4511 if (t == BPF_WRITE)
4512 env->seen_direct_write = true;
4513
4514 return true;
4515
4acf6c0b
BB
4516 default:
4517 return false;
4518 }
4519}
4520
f1174f77 4521static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 4522 int size, bool zero_size_allowed)
f1174f77 4523{
638f5b90 4524 struct bpf_reg_state *regs = cur_regs(env);
f1174f77
EC
4525 struct bpf_reg_state *reg = &regs[regno];
4526 int err;
4527
4528 /* We may have added a variable offset to the packet pointer; but any
4529 * reg->range we have comes after that. We are only checking the fixed
4530 * offset.
4531 */
4532
4533 /* We don't allow negative numbers, because we aren't tracking enough
4534 * detail to prove they're safe.
4535 */
b03c9f9f 4536 if (reg->smin_value < 0) {
61bd5218 4537 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
f1174f77
EC
4538 regno);
4539 return -EACCES;
4540 }
6d94e741
AS
4541
4542 err = reg->range < 0 ? -EINVAL :
4543 __check_mem_access(env, regno, off, size, reg->range,
457f4436 4544 zero_size_allowed);
f1174f77 4545 if (err) {
61bd5218 4546 verbose(env, "R%d offset is outside of the packet\n", regno);
f1174f77
EC
4547 return err;
4548 }
e647815a 4549
457f4436 4550 /* __check_mem_access has made sure "off + size - 1" is within u16.
e647815a
JW
4551 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
4552 * otherwise find_good_pkt_pointers would have refused to set range info
457f4436 4553 * that __check_mem_access would have rejected this pkt access.
e647815a
JW
4554 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
4555 */
4556 env->prog->aux->max_pkt_offset =
4557 max_t(u32, env->prog->aux->max_pkt_offset,
4558 off + reg->umax_value + size - 1);
4559
f1174f77
EC
4560 return err;
4561}
4562
4563/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
31fd8581 4564static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
9e15db66 4565 enum bpf_access_type t, enum bpf_reg_type *reg_type,
22dc4a0f 4566 struct btf **btf, u32 *btf_id)
17a52670 4567{
f96da094
DB
4568 struct bpf_insn_access_aux info = {
4569 .reg_type = *reg_type,
9e15db66 4570 .log = &env->log,
f96da094 4571 };
31fd8581 4572
4f9218aa 4573 if (env->ops->is_valid_access &&
5e43f899 4574 env->ops->is_valid_access(off, size, t, env->prog, &info)) {
f96da094
DB
4575 /* A non zero info.ctx_field_size indicates that this field is a
4576 * candidate for later verifier transformation to load the whole
4577 * field and then apply a mask when accessed with a narrower
4578 * access than actual ctx access size. A zero info.ctx_field_size
4579 * will only allow for whole field access and rejects any other
4580 * type of narrower access.
31fd8581 4581 */
23994631 4582 *reg_type = info.reg_type;
31fd8581 4583
c25b2ae1 4584 if (base_type(*reg_type) == PTR_TO_BTF_ID) {
22dc4a0f 4585 *btf = info.btf;
9e15db66 4586 *btf_id = info.btf_id;
22dc4a0f 4587 } else {
9e15db66 4588 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
22dc4a0f 4589 }
32bbe007
AS
4590 /* remember the offset of last byte accessed in ctx */
4591 if (env->prog->aux->max_ctx_offset < off + size)
4592 env->prog->aux->max_ctx_offset = off + size;
17a52670 4593 return 0;
32bbe007 4594 }
17a52670 4595
61bd5218 4596 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
17a52670
AS
4597 return -EACCES;
4598}
4599
d58e468b
PP
4600static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
4601 int size)
4602{
4603 if (size < 0 || off < 0 ||
4604 (u64)off + size > sizeof(struct bpf_flow_keys)) {
4605 verbose(env, "invalid access to flow keys off=%d size=%d\n",
4606 off, size);
4607 return -EACCES;
4608 }
4609 return 0;
4610}
4611
5f456649
MKL
4612static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
4613 u32 regno, int off, int size,
4614 enum bpf_access_type t)
c64b7983
JS
4615{
4616 struct bpf_reg_state *regs = cur_regs(env);
4617 struct bpf_reg_state *reg = &regs[regno];
5f456649 4618 struct bpf_insn_access_aux info = {};
46f8bc92 4619 bool valid;
c64b7983
JS
4620
4621 if (reg->smin_value < 0) {
4622 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
4623 regno);
4624 return -EACCES;
4625 }
4626
46f8bc92
MKL
4627 switch (reg->type) {
4628 case PTR_TO_SOCK_COMMON:
4629 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
4630 break;
4631 case PTR_TO_SOCKET:
4632 valid = bpf_sock_is_valid_access(off, size, t, &info);
4633 break;
655a51e5
MKL
4634 case PTR_TO_TCP_SOCK:
4635 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
4636 break;
fada7fdc
JL
4637 case PTR_TO_XDP_SOCK:
4638 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
4639 break;
46f8bc92
MKL
4640 default:
4641 valid = false;
c64b7983
JS
4642 }
4643
5f456649 4644
46f8bc92
MKL
4645 if (valid) {
4646 env->insn_aux_data[insn_idx].ctx_field_size =
4647 info.ctx_field_size;
4648 return 0;
4649 }
4650
4651 verbose(env, "R%d invalid %s access off=%d size=%d\n",
c25b2ae1 4652 regno, reg_type_str(env, reg->type), off, size);
46f8bc92
MKL
4653
4654 return -EACCES;
c64b7983
JS
4655}
4656
4cabc5b1
DB
4657static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
4658{
2a159c6f 4659 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4cabc5b1
DB
4660}
4661
f37a8cb8
DB
4662static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
4663{
2a159c6f 4664 const struct bpf_reg_state *reg = reg_state(env, regno);
f37a8cb8 4665
46f8bc92
MKL
4666 return reg->type == PTR_TO_CTX;
4667}
4668
4669static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
4670{
4671 const struct bpf_reg_state *reg = reg_state(env, regno);
4672
4673 return type_is_sk_pointer(reg->type);
f37a8cb8
DB
4674}
4675
ca369602
DB
4676static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
4677{
2a159c6f 4678 const struct bpf_reg_state *reg = reg_state(env, regno);
ca369602
DB
4679
4680 return type_is_pkt_pointer(reg->type);
4681}
4682
4b5defde
DB
4683static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
4684{
4685 const struct bpf_reg_state *reg = reg_state(env, regno);
4686
4687 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
4688 return reg->type == PTR_TO_FLOW_KEYS;
4689}
4690
9bb00b28
YS
4691static bool is_trusted_reg(const struct bpf_reg_state *reg)
4692{
4693 /* A referenced register is always trusted. */
4694 if (reg->ref_obj_id)
4695 return true;
4696
4697 /* If a register is not referenced, it is trusted if it has the
fca1aa75 4698 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
9bb00b28
YS
4699 * other type modifiers may be safe, but we elect to take an opt-in
4700 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
4701 * not.
4702 *
4703 * Eventually, we should make PTR_TRUSTED the single source of truth
4704 * for whether a register is trusted.
4705 */
4706 return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
4707 !bpf_type_has_unsafe_modifiers(reg->type);
4708}
4709
fca1aa75
YS
4710static bool is_rcu_reg(const struct bpf_reg_state *reg)
4711{
4712 return reg->type & MEM_RCU;
4713}
4714
61bd5218
JK
4715static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
4716 const struct bpf_reg_state *reg,
d1174416 4717 int off, int size, bool strict)
969bf05e 4718{
f1174f77 4719 struct tnum reg_off;
e07b98d9 4720 int ip_align;
d1174416
DM
4721
4722 /* Byte size accesses are always allowed. */
4723 if (!strict || size == 1)
4724 return 0;
4725
e4eda884
DM
4726 /* For platforms that do not have a Kconfig enabling
4727 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
4728 * NET_IP_ALIGN is universally set to '2'. And on platforms
4729 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
4730 * to this code only in strict mode where we want to emulate
4731 * the NET_IP_ALIGN==2 checking. Therefore use an
4732 * unconditional IP align value of '2'.
e07b98d9 4733 */
e4eda884 4734 ip_align = 2;
f1174f77
EC
4735
4736 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
4737 if (!tnum_is_aligned(reg_off, size)) {
4738 char tn_buf[48];
4739
4740 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218
JK
4741 verbose(env,
4742 "misaligned packet access off %d+%s+%d+%d size %d\n",
f1174f77 4743 ip_align, tn_buf, reg->off, off, size);
969bf05e
AS
4744 return -EACCES;
4745 }
79adffcd 4746
969bf05e
AS
4747 return 0;
4748}
4749
61bd5218
JK
4750static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
4751 const struct bpf_reg_state *reg,
f1174f77
EC
4752 const char *pointer_desc,
4753 int off, int size, bool strict)
79adffcd 4754{
f1174f77
EC
4755 struct tnum reg_off;
4756
4757 /* Byte size accesses are always allowed. */
4758 if (!strict || size == 1)
4759 return 0;
4760
4761 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
4762 if (!tnum_is_aligned(reg_off, size)) {
4763 char tn_buf[48];
4764
4765 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 4766 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
f1174f77 4767 pointer_desc, tn_buf, reg->off, off, size);
79adffcd
DB
4768 return -EACCES;
4769 }
4770
969bf05e
AS
4771 return 0;
4772}
4773
e07b98d9 4774static int check_ptr_alignment(struct bpf_verifier_env *env,
ca369602
DB
4775 const struct bpf_reg_state *reg, int off,
4776 int size, bool strict_alignment_once)
79adffcd 4777{
ca369602 4778 bool strict = env->strict_alignment || strict_alignment_once;
f1174f77 4779 const char *pointer_desc = "";
d1174416 4780
79adffcd
DB
4781 switch (reg->type) {
4782 case PTR_TO_PACKET:
de8f3a83
DB
4783 case PTR_TO_PACKET_META:
4784 /* Special case, because of NET_IP_ALIGN. Given metadata sits
4785 * right in front, treat it the very same way.
4786 */
61bd5218 4787 return check_pkt_ptr_alignment(env, reg, off, size, strict);
d58e468b
PP
4788 case PTR_TO_FLOW_KEYS:
4789 pointer_desc = "flow keys ";
4790 break;
69c087ba
YS
4791 case PTR_TO_MAP_KEY:
4792 pointer_desc = "key ";
4793 break;
f1174f77
EC
4794 case PTR_TO_MAP_VALUE:
4795 pointer_desc = "value ";
4796 break;
4797 case PTR_TO_CTX:
4798 pointer_desc = "context ";
4799 break;
4800 case PTR_TO_STACK:
4801 pointer_desc = "stack ";
01f810ac
AM
4802 /* The stack spill tracking logic in check_stack_write_fixed_off()
4803 * and check_stack_read_fixed_off() relies on stack accesses being
a5ec6ae1
JH
4804 * aligned.
4805 */
4806 strict = true;
f1174f77 4807 break;
c64b7983
JS
4808 case PTR_TO_SOCKET:
4809 pointer_desc = "sock ";
4810 break;
46f8bc92
MKL
4811 case PTR_TO_SOCK_COMMON:
4812 pointer_desc = "sock_common ";
4813 break;
655a51e5
MKL
4814 case PTR_TO_TCP_SOCK:
4815 pointer_desc = "tcp_sock ";
4816 break;
fada7fdc
JL
4817 case PTR_TO_XDP_SOCK:
4818 pointer_desc = "xdp_sock ";
4819 break;
79adffcd 4820 default:
f1174f77 4821 break;
79adffcd 4822 }
61bd5218
JK
4823 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
4824 strict);
79adffcd
DB
4825}
4826
f4d7e40a
AS
4827static int update_stack_depth(struct bpf_verifier_env *env,
4828 const struct bpf_func_state *func,
4829 int off)
4830{
9c8105bd 4831 u16 stack = env->subprog_info[func->subprogno].stack_depth;
f4d7e40a
AS
4832
4833 if (stack >= -off)
4834 return 0;
4835
4836 /* update known max for given subprogram */
9c8105bd 4837 env->subprog_info[func->subprogno].stack_depth = -off;
70a87ffe
AS
4838 return 0;
4839}
f4d7e40a 4840
70a87ffe
AS
4841/* starting from main bpf function walk all instructions of the function
4842 * and recursively walk all callees that given function can call.
4843 * Ignore jump and exit insns.
4844 * Since recursion is prevented by check_cfg() this algorithm
4845 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
4846 */
4847static int check_max_stack_depth(struct bpf_verifier_env *env)
4848{
9c8105bd
JW
4849 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
4850 struct bpf_subprog_info *subprog = env->subprog_info;
70a87ffe 4851 struct bpf_insn *insn = env->prog->insnsi;
ebf7d1f5 4852 bool tail_call_reachable = false;
70a87ffe
AS
4853 int ret_insn[MAX_CALL_FRAMES];
4854 int ret_prog[MAX_CALL_FRAMES];
ebf7d1f5 4855 int j;
f4d7e40a 4856
70a87ffe 4857process_func:
7f6e4312
MF
4858 /* protect against potential stack overflow that might happen when
4859 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
4860 * depth for such case down to 256 so that the worst case scenario
4861 * would result in 8k stack size (32 which is tailcall limit * 256 =
4862 * 8k).
4863 *
4864 * To get the idea what might happen, see an example:
4865 * func1 -> sub rsp, 128
4866 * subfunc1 -> sub rsp, 256
4867 * tailcall1 -> add rsp, 256
4868 * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
4869 * subfunc2 -> sub rsp, 64
4870 * subfunc22 -> sub rsp, 128
4871 * tailcall2 -> add rsp, 128
4872 * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
4873 *
4874 * tailcall will unwind the current stack frame but it will not get rid
4875 * of caller's stack as shown on the example above.
4876 */
4877 if (idx && subprog[idx].has_tail_call && depth >= 256) {
4878 verbose(env,
4879 "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
4880 depth);
4881 return -EACCES;
4882 }
70a87ffe
AS
4883 /* round up to 32-bytes, since this is granularity
4884 * of interpreter stack size
4885 */
9c8105bd 4886 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe 4887 if (depth > MAX_BPF_STACK) {
f4d7e40a 4888 verbose(env, "combined stack size of %d calls is %d. Too large\n",
70a87ffe 4889 frame + 1, depth);
f4d7e40a
AS
4890 return -EACCES;
4891 }
70a87ffe 4892continue_func:
4cb3d99c 4893 subprog_end = subprog[idx + 1].start;
70a87ffe 4894 for (; i < subprog_end; i++) {
7ddc80a4
AS
4895 int next_insn;
4896
69c087ba 4897 if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
70a87ffe
AS
4898 continue;
4899 /* remember insn and function to return to */
4900 ret_insn[frame] = i + 1;
9c8105bd 4901 ret_prog[frame] = idx;
70a87ffe
AS
4902
4903 /* find the callee */
7ddc80a4
AS
4904 next_insn = i + insn[i].imm + 1;
4905 idx = find_subprog(env, next_insn);
9c8105bd 4906 if (idx < 0) {
70a87ffe 4907 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
7ddc80a4 4908 next_insn);
70a87ffe
AS
4909 return -EFAULT;
4910 }
7ddc80a4
AS
4911 if (subprog[idx].is_async_cb) {
4912 if (subprog[idx].has_tail_call) {
4913 verbose(env, "verifier bug. subprog has tail_call and async cb\n");
4914 return -EFAULT;
4915 }
4916 /* async callbacks don't increase bpf prog stack size */
4917 continue;
4918 }
4919 i = next_insn;
ebf7d1f5
MF
4920
4921 if (subprog[idx].has_tail_call)
4922 tail_call_reachable = true;
4923
70a87ffe
AS
4924 frame++;
4925 if (frame >= MAX_CALL_FRAMES) {
927cb781
PC
4926 verbose(env, "the call stack of %d frames is too deep !\n",
4927 frame);
4928 return -E2BIG;
70a87ffe
AS
4929 }
4930 goto process_func;
4931 }
ebf7d1f5
MF
4932 /* if tail call got detected across bpf2bpf calls then mark each of the
4933 * currently present subprog frames as tail call reachable subprogs;
4934 * this info will be utilized by JIT so that we will be preserving the
4935 * tail call counter throughout bpf2bpf calls combined with tailcalls
4936 */
4937 if (tail_call_reachable)
4938 for (j = 0; j < frame; j++)
4939 subprog[ret_prog[j]].tail_call_reachable = true;
5dd0a6b8
DB
4940 if (subprog[0].tail_call_reachable)
4941 env->prog->aux->tail_call_reachable = true;
ebf7d1f5 4942
70a87ffe
AS
4943 /* end of for() loop means the last insn of the 'subprog'
4944 * was reached. Doesn't matter whether it was JA or EXIT
4945 */
4946 if (frame == 0)
4947 return 0;
9c8105bd 4948 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe
AS
4949 frame--;
4950 i = ret_insn[frame];
9c8105bd 4951 idx = ret_prog[frame];
70a87ffe 4952 goto continue_func;
f4d7e40a
AS
4953}
4954
19d28fbd 4955#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
4956static int get_callee_stack_depth(struct bpf_verifier_env *env,
4957 const struct bpf_insn *insn, int idx)
4958{
4959 int start = idx + insn->imm + 1, subprog;
4960
4961 subprog = find_subprog(env, start);
4962 if (subprog < 0) {
4963 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
4964 start);
4965 return -EFAULT;
4966 }
9c8105bd 4967 return env->subprog_info[subprog].stack_depth;
1ea47e01 4968}
19d28fbd 4969#endif
1ea47e01 4970
afbf21dc
YS
4971static int __check_buffer_access(struct bpf_verifier_env *env,
4972 const char *buf_info,
4973 const struct bpf_reg_state *reg,
4974 int regno, int off, int size)
9df1c28b
MM
4975{
4976 if (off < 0) {
4977 verbose(env,
4fc00b79 4978 "R%d invalid %s buffer access: off=%d, size=%d\n",
afbf21dc 4979 regno, buf_info, off, size);
9df1c28b
MM
4980 return -EACCES;
4981 }
4982 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4983 char tn_buf[48];
4984
4985 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4986 verbose(env,
4fc00b79 4987 "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
9df1c28b
MM
4988 regno, off, tn_buf);
4989 return -EACCES;
4990 }
afbf21dc
YS
4991
4992 return 0;
4993}
4994
4995static int check_tp_buffer_access(struct bpf_verifier_env *env,
4996 const struct bpf_reg_state *reg,
4997 int regno, int off, int size)
4998{
4999 int err;
5000
5001 err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
5002 if (err)
5003 return err;
5004
9df1c28b
MM
5005 if (off + size > env->prog->aux->max_tp_access)
5006 env->prog->aux->max_tp_access = off + size;
5007
5008 return 0;
5009}
5010
afbf21dc
YS
5011static int check_buffer_access(struct bpf_verifier_env *env,
5012 const struct bpf_reg_state *reg,
5013 int regno, int off, int size,
5014 bool zero_size_allowed,
afbf21dc
YS
5015 u32 *max_access)
5016{
44e9a741 5017 const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
afbf21dc
YS
5018 int err;
5019
5020 err = __check_buffer_access(env, buf_info, reg, regno, off, size);
5021 if (err)
5022 return err;
5023
5024 if (off + size > *max_access)
5025 *max_access = off + size;
5026
5027 return 0;
5028}
5029
3f50f132
JF
5030/* BPF architecture zero extends alu32 ops into 64-bit registesr */
5031static void zext_32_to_64(struct bpf_reg_state *reg)
5032{
5033 reg->var_off = tnum_subreg(reg->var_off);
5034 __reg_assign_32_into_64(reg);
5035}
9df1c28b 5036
0c17d1d2
JH
5037/* truncate register to smaller size (in bytes)
5038 * must be called with size < BPF_REG_SIZE
5039 */
5040static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
5041{
5042 u64 mask;
5043
5044 /* clear high bits in bit representation */
5045 reg->var_off = tnum_cast(reg->var_off, size);
5046
5047 /* fix arithmetic bounds */
5048 mask = ((u64)1 << (size * 8)) - 1;
5049 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
5050 reg->umin_value &= mask;
5051 reg->umax_value &= mask;
5052 } else {
5053 reg->umin_value = 0;
5054 reg->umax_value = mask;
5055 }
5056 reg->smin_value = reg->umin_value;
5057 reg->smax_value = reg->umax_value;
3f50f132
JF
5058
5059 /* If size is smaller than 32bit register the 32bit register
5060 * values are also truncated so we push 64-bit bounds into
5061 * 32-bit bounds. Above were truncated < 32-bits already.
5062 */
5063 if (size >= 4)
5064 return;
5065 __reg_combine_64_into_32(reg);
0c17d1d2
JH
5066}
5067
a23740ec
AN
5068static bool bpf_map_is_rdonly(const struct bpf_map *map)
5069{
353050be
DB
5070 /* A map is considered read-only if the following condition are true:
5071 *
5072 * 1) BPF program side cannot change any of the map content. The
5073 * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
5074 * and was set at map creation time.
5075 * 2) The map value(s) have been initialized from user space by a
5076 * loader and then "frozen", such that no new map update/delete
5077 * operations from syscall side are possible for the rest of
5078 * the map's lifetime from that point onwards.
5079 * 3) Any parallel/pending map update/delete operations from syscall
5080 * side have been completed. Only after that point, it's safe to
5081 * assume that map value(s) are immutable.
5082 */
5083 return (map->map_flags & BPF_F_RDONLY_PROG) &&
5084 READ_ONCE(map->frozen) &&
5085 !bpf_map_write_active(map);
a23740ec
AN
5086}
5087
5088static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
5089{
5090 void *ptr;
5091 u64 addr;
5092 int err;
5093
5094 err = map->ops->map_direct_value_addr(map, &addr, off);
5095 if (err)
5096 return err;
2dedd7d2 5097 ptr = (void *)(long)addr + off;
a23740ec
AN
5098
5099 switch (size) {
5100 case sizeof(u8):
5101 *val = (u64)*(u8 *)ptr;
5102 break;
5103 case sizeof(u16):
5104 *val = (u64)*(u16 *)ptr;
5105 break;
5106 case sizeof(u32):
5107 *val = (u64)*(u32 *)ptr;
5108 break;
5109 case sizeof(u64):
5110 *val = *(u64 *)ptr;
5111 break;
5112 default:
5113 return -EINVAL;
5114 }
5115 return 0;
5116}
5117
6fcd486b
AS
5118#define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu)
5119#define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted)
57539b1c 5120
6fcd486b
AS
5121/*
5122 * Allow list few fields as RCU trusted or full trusted.
5123 * This logic doesn't allow mix tagging and will be removed once GCC supports
5124 * btf_type_tag.
5125 */
5126
5127/* RCU trusted: these fields are trusted in RCU CS and never NULL */
5128BTF_TYPE_SAFE_RCU(struct task_struct) {
57539b1c 5129 const cpumask_t *cpus_ptr;
8d093b4e 5130 struct css_set __rcu *cgroups;
6fcd486b
AS
5131 struct task_struct __rcu *real_parent;
5132 struct task_struct *group_leader;
8d093b4e
AS
5133};
5134
6fcd486b 5135BTF_TYPE_SAFE_RCU(struct css_set) {
8d093b4e 5136 struct cgroup *dfl_cgrp;
57539b1c
DV
5137};
5138
6fcd486b
AS
5139/* full trusted: these fields are trusted even outside of RCU CS and never NULL */
5140BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
5141 __bpf_md_ptr(struct seq_file *, seq);
5142};
5143
5144BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
5145 __bpf_md_ptr(struct bpf_iter_meta *, meta);
5146 __bpf_md_ptr(struct task_struct *, task);
5147};
5148
5149BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
5150 struct file *file;
5151};
5152
5153BTF_TYPE_SAFE_TRUSTED(struct file) {
5154 struct inode *f_inode;
5155};
5156
5157BTF_TYPE_SAFE_TRUSTED(struct dentry) {
5158 /* no negative dentry-s in places where bpf can see it */
5159 struct inode *d_inode;
5160};
5161
5162BTF_TYPE_SAFE_TRUSTED(struct socket) {
5163 struct sock *sk;
5164};
5165
5166static bool type_is_rcu(struct bpf_verifier_env *env,
5167 struct bpf_reg_state *reg,
5168 int off)
57539b1c 5169{
6fcd486b
AS
5170 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
5171 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
57539b1c 5172
6fcd486b
AS
5173 return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_rcu");
5174}
57539b1c 5175
6fcd486b
AS
5176static bool type_is_trusted(struct bpf_verifier_env *env,
5177 struct bpf_reg_state *reg,
5178 int off)
5179{
5180 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
5181 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
5182 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
5183 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
5184 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
5185 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
5186
5187 return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_trusted");
57539b1c
DV
5188}
5189
9e15db66
AS
5190static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
5191 struct bpf_reg_state *regs,
5192 int regno, int off, int size,
5193 enum bpf_access_type atype,
5194 int value_regno)
5195{
5196 struct bpf_reg_state *reg = regs + regno;
22dc4a0f
AN
5197 const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
5198 const char *tname = btf_name_by_offset(reg->btf, t->name_off);
c6f1bfe8 5199 enum bpf_type_flag flag = 0;
9e15db66
AS
5200 u32 btf_id;
5201 int ret;
5202
c67cae55
AS
5203 if (!env->allow_ptr_leaks) {
5204 verbose(env,
5205 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
5206 tname);
5207 return -EPERM;
5208 }
5209 if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
5210 verbose(env,
5211 "Cannot access kernel 'struct %s' from non-GPL compatible program\n",
5212 tname);
5213 return -EINVAL;
5214 }
9e15db66
AS
5215 if (off < 0) {
5216 verbose(env,
5217 "R%d is ptr_%s invalid negative access: off=%d\n",
5218 regno, tname, off);
5219 return -EACCES;
5220 }
5221 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5222 char tn_buf[48];
5223
5224 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5225 verbose(env,
5226 "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
5227 regno, tname, off, tn_buf);
5228 return -EACCES;
5229 }
5230
c6f1bfe8
YS
5231 if (reg->type & MEM_USER) {
5232 verbose(env,
5233 "R%d is ptr_%s access user memory: off=%d\n",
5234 regno, tname, off);
5235 return -EACCES;
5236 }
5237
5844101a
HL
5238 if (reg->type & MEM_PERCPU) {
5239 verbose(env,
5240 "R%d is ptr_%s access percpu memory: off=%d\n",
5241 regno, tname, off);
5242 return -EACCES;
5243 }
5244
282de143
KKD
5245 if (env->ops->btf_struct_access && !type_is_alloc(reg->type)) {
5246 if (!btf_is_kernel(reg->btf)) {
5247 verbose(env, "verifier internal error: reg->btf must be kernel btf\n");
5248 return -EFAULT;
5249 }
6728aea7 5250 ret = env->ops->btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag);
27ae7997 5251 } else {
282de143
KKD
5252 /* Writes are permitted with default btf_struct_access for
5253 * program allocated objects (which always have ref_obj_id > 0),
5254 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
5255 */
5256 if (atype != BPF_READ && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
27ae7997
MKL
5257 verbose(env, "only read is supported\n");
5258 return -EACCES;
5259 }
5260
6a3cd331
DM
5261 if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
5262 !reg->ref_obj_id) {
282de143
KKD
5263 verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
5264 return -EFAULT;
5265 }
5266
6728aea7 5267 ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag);
27ae7997
MKL
5268 }
5269
9e15db66
AS
5270 if (ret < 0)
5271 return ret;
5272
6fcd486b
AS
5273 if (ret != PTR_TO_BTF_ID) {
5274 /* just mark; */
6efe152d 5275
6fcd486b
AS
5276 } else if (type_flag(reg->type) & PTR_UNTRUSTED) {
5277 /* If this is an untrusted pointer, all pointers formed by walking it
5278 * also inherit the untrusted flag.
5279 */
5280 flag = PTR_UNTRUSTED;
5281
5282 } else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
5283 /* By default any pointer obtained from walking a trusted pointer is no
5284 * longer trusted, unless the field being accessed has explicitly been
5285 * marked as inheriting its parent's state of trust (either full or RCU).
5286 * For example:
5287 * 'cgroups' pointer is untrusted if task->cgroups dereference
5288 * happened in a sleepable program outside of bpf_rcu_read_lock()
5289 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
5290 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
5291 *
5292 * A regular RCU-protected pointer with __rcu tag can also be deemed
5293 * trusted if we are in an RCU CS. Such pointer can be NULL.
20c09d92 5294 */
6fcd486b
AS
5295 if (type_is_trusted(env, reg, off)) {
5296 flag |= PTR_TRUSTED;
5297 } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
5298 if (type_is_rcu(env, reg, off)) {
5299 /* ignore __rcu tag and mark it MEM_RCU */
5300 flag |= MEM_RCU;
5301 } else if (flag & MEM_RCU) {
5302 /* __rcu tagged pointers can be NULL */
5303 flag |= PTR_MAYBE_NULL;
5304 } else if (flag & (MEM_PERCPU | MEM_USER)) {
5305 /* keep as-is */
5306 } else {
5307 /* walking unknown pointers yields untrusted pointer */
5308 flag = PTR_UNTRUSTED;
5309 }
5310 } else {
5311 /*
5312 * If not in RCU CS or MEM_RCU pointer can be NULL then
5313 * aggressively mark as untrusted otherwise such
5314 * pointers will be plain PTR_TO_BTF_ID without flags
5315 * and will be allowed to be passed into helpers for
5316 * compat reasons.
5317 */
5318 flag = PTR_UNTRUSTED;
5319 }
20c09d92 5320 } else {
6fcd486b 5321 /* Old compat. Deprecated */
57539b1c 5322 flag &= ~PTR_TRUSTED;
20c09d92 5323 }
3f00c523 5324
41c48f3a 5325 if (atype == BPF_READ && value_regno >= 0)
c6f1bfe8 5326 mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
41c48f3a
AI
5327
5328 return 0;
5329}
5330
5331static int check_ptr_to_map_access(struct bpf_verifier_env *env,
5332 struct bpf_reg_state *regs,
5333 int regno, int off, int size,
5334 enum bpf_access_type atype,
5335 int value_regno)
5336{
5337 struct bpf_reg_state *reg = regs + regno;
5338 struct bpf_map *map = reg->map_ptr;
6728aea7 5339 struct bpf_reg_state map_reg;
c6f1bfe8 5340 enum bpf_type_flag flag = 0;
41c48f3a
AI
5341 const struct btf_type *t;
5342 const char *tname;
5343 u32 btf_id;
5344 int ret;
5345
5346 if (!btf_vmlinux) {
5347 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
5348 return -ENOTSUPP;
5349 }
5350
5351 if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
5352 verbose(env, "map_ptr access not supported for map type %d\n",
5353 map->map_type);
5354 return -ENOTSUPP;
5355 }
5356
5357 t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
5358 tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5359
c67cae55 5360 if (!env->allow_ptr_leaks) {
41c48f3a 5361 verbose(env,
c67cae55 5362 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
41c48f3a
AI
5363 tname);
5364 return -EPERM;
9e15db66 5365 }
27ae7997 5366
41c48f3a
AI
5367 if (off < 0) {
5368 verbose(env, "R%d is %s invalid negative access: off=%d\n",
5369 regno, tname, off);
5370 return -EACCES;
5371 }
5372
5373 if (atype != BPF_READ) {
5374 verbose(env, "only read from %s is supported\n", tname);
5375 return -EACCES;
5376 }
5377
6728aea7
KKD
5378 /* Simulate access to a PTR_TO_BTF_ID */
5379 memset(&map_reg, 0, sizeof(map_reg));
5380 mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0);
5381 ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag);
41c48f3a
AI
5382 if (ret < 0)
5383 return ret;
5384
5385 if (value_regno >= 0)
c6f1bfe8 5386 mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
41c48f3a 5387
9e15db66
AS
5388 return 0;
5389}
5390
01f810ac
AM
5391/* Check that the stack access at the given offset is within bounds. The
5392 * maximum valid offset is -1.
5393 *
5394 * The minimum valid offset is -MAX_BPF_STACK for writes, and
5395 * -state->allocated_stack for reads.
5396 */
5397static int check_stack_slot_within_bounds(int off,
5398 struct bpf_func_state *state,
5399 enum bpf_access_type t)
5400{
5401 int min_valid_off;
5402
5403 if (t == BPF_WRITE)
5404 min_valid_off = -MAX_BPF_STACK;
5405 else
5406 min_valid_off = -state->allocated_stack;
5407
5408 if (off < min_valid_off || off > -1)
5409 return -EACCES;
5410 return 0;
5411}
5412
5413/* Check that the stack access at 'regno + off' falls within the maximum stack
5414 * bounds.
5415 *
5416 * 'off' includes `regno->offset`, but not its dynamic part (if any).
5417 */
5418static int check_stack_access_within_bounds(
5419 struct bpf_verifier_env *env,
5420 int regno, int off, int access_size,
61df10c7 5421 enum bpf_access_src src, enum bpf_access_type type)
01f810ac
AM
5422{
5423 struct bpf_reg_state *regs = cur_regs(env);
5424 struct bpf_reg_state *reg = regs + regno;
5425 struct bpf_func_state *state = func(env, reg);
5426 int min_off, max_off;
5427 int err;
5428 char *err_extra;
5429
5430 if (src == ACCESS_HELPER)
5431 /* We don't know if helpers are reading or writing (or both). */
5432 err_extra = " indirect access to";
5433 else if (type == BPF_READ)
5434 err_extra = " read from";
5435 else
5436 err_extra = " write to";
5437
5438 if (tnum_is_const(reg->var_off)) {
5439 min_off = reg->var_off.value + off;
5440 if (access_size > 0)
5441 max_off = min_off + access_size - 1;
5442 else
5443 max_off = min_off;
5444 } else {
5445 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
5446 reg->smin_value <= -BPF_MAX_VAR_OFF) {
5447 verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
5448 err_extra, regno);
5449 return -EACCES;
5450 }
5451 min_off = reg->smin_value + off;
5452 if (access_size > 0)
5453 max_off = reg->smax_value + off + access_size - 1;
5454 else
5455 max_off = min_off;
5456 }
5457
5458 err = check_stack_slot_within_bounds(min_off, state, type);
5459 if (!err)
5460 err = check_stack_slot_within_bounds(max_off, state, type);
5461
5462 if (err) {
5463 if (tnum_is_const(reg->var_off)) {
5464 verbose(env, "invalid%s stack R%d off=%d size=%d\n",
5465 err_extra, regno, off, access_size);
5466 } else {
5467 char tn_buf[48];
5468
5469 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5470 verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
5471 err_extra, regno, tn_buf, access_size);
5472 }
5473 }
5474 return err;
5475}
41c48f3a 5476
17a52670
AS
5477/* check whether memory at (regno + off) is accessible for t = (read | write)
5478 * if t==write, value_regno is a register which value is stored into memory
5479 * if t==read, value_regno is a register which will receive the value from memory
5480 * if t==write && value_regno==-1, some unknown value is stored into memory
5481 * if t==read && value_regno==-1, don't care what we read from memory
5482 */
ca369602
DB
5483static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
5484 int off, int bpf_size, enum bpf_access_type t,
5485 int value_regno, bool strict_alignment_once)
17a52670 5486{
638f5b90
AS
5487 struct bpf_reg_state *regs = cur_regs(env);
5488 struct bpf_reg_state *reg = regs + regno;
f4d7e40a 5489 struct bpf_func_state *state;
17a52670
AS
5490 int size, err = 0;
5491
5492 size = bpf_size_to_bytes(bpf_size);
5493 if (size < 0)
5494 return size;
5495
f1174f77 5496 /* alignment checks will add in reg->off themselves */
ca369602 5497 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
969bf05e
AS
5498 if (err)
5499 return err;
17a52670 5500
f1174f77
EC
5501 /* for access checks, reg->off is just part of off */
5502 off += reg->off;
5503
69c087ba
YS
5504 if (reg->type == PTR_TO_MAP_KEY) {
5505 if (t == BPF_WRITE) {
5506 verbose(env, "write to change key R%d not allowed\n", regno);
5507 return -EACCES;
5508 }
5509
5510 err = check_mem_region_access(env, regno, off, size,
5511 reg->map_ptr->key_size, false);
5512 if (err)
5513 return err;
5514 if (value_regno >= 0)
5515 mark_reg_unknown(env, regs, value_regno);
5516 } else if (reg->type == PTR_TO_MAP_VALUE) {
aa3496ac 5517 struct btf_field *kptr_field = NULL;
61df10c7 5518
1be7f75d
AS
5519 if (t == BPF_WRITE && value_regno >= 0 &&
5520 is_pointer_value(env, value_regno)) {
61bd5218 5521 verbose(env, "R%d leaks addr into map\n", value_regno);
1be7f75d
AS
5522 return -EACCES;
5523 }
591fe988
DB
5524 err = check_map_access_type(env, regno, off, size, t);
5525 if (err)
5526 return err;
61df10c7
KKD
5527 err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
5528 if (err)
5529 return err;
5530 if (tnum_is_const(reg->var_off))
aa3496ac
KKD
5531 kptr_field = btf_record_find(reg->map_ptr->record,
5532 off + reg->var_off.value, BPF_KPTR);
5533 if (kptr_field) {
5534 err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
61df10c7 5535 } else if (t == BPF_READ && value_regno >= 0) {
a23740ec
AN
5536 struct bpf_map *map = reg->map_ptr;
5537
5538 /* if map is read-only, track its contents as scalars */
5539 if (tnum_is_const(reg->var_off) &&
5540 bpf_map_is_rdonly(map) &&
5541 map->ops->map_direct_value_addr) {
5542 int map_off = off + reg->var_off.value;
5543 u64 val = 0;
5544
5545 err = bpf_map_direct_read(map, map_off, size,
5546 &val);
5547 if (err)
5548 return err;
5549
5550 regs[value_regno].type = SCALAR_VALUE;
5551 __mark_reg_known(&regs[value_regno], val);
5552 } else {
5553 mark_reg_unknown(env, regs, value_regno);
5554 }
5555 }
34d3a78c
HL
5556 } else if (base_type(reg->type) == PTR_TO_MEM) {
5557 bool rdonly_mem = type_is_rdonly_mem(reg->type);
5558
5559 if (type_may_be_null(reg->type)) {
5560 verbose(env, "R%d invalid mem access '%s'\n", regno,
5561 reg_type_str(env, reg->type));
5562 return -EACCES;
5563 }
5564
5565 if (t == BPF_WRITE && rdonly_mem) {
5566 verbose(env, "R%d cannot write into %s\n",
5567 regno, reg_type_str(env, reg->type));
5568 return -EACCES;
5569 }
5570
457f4436
AN
5571 if (t == BPF_WRITE && value_regno >= 0 &&
5572 is_pointer_value(env, value_regno)) {
5573 verbose(env, "R%d leaks addr into mem\n", value_regno);
5574 return -EACCES;
5575 }
34d3a78c 5576
457f4436
AN
5577 err = check_mem_region_access(env, regno, off, size,
5578 reg->mem_size, false);
34d3a78c 5579 if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
457f4436 5580 mark_reg_unknown(env, regs, value_regno);
1a0dc1ac 5581 } else if (reg->type == PTR_TO_CTX) {
f1174f77 5582 enum bpf_reg_type reg_type = SCALAR_VALUE;
22dc4a0f 5583 struct btf *btf = NULL;
9e15db66 5584 u32 btf_id = 0;
19de99f7 5585
1be7f75d
AS
5586 if (t == BPF_WRITE && value_regno >= 0 &&
5587 is_pointer_value(env, value_regno)) {
61bd5218 5588 verbose(env, "R%d leaks addr into ctx\n", value_regno);
1be7f75d
AS
5589 return -EACCES;
5590 }
f1174f77 5591
be80a1d3 5592 err = check_ptr_off_reg(env, reg, regno);
58990d1f
DB
5593 if (err < 0)
5594 return err;
5595
c6f1bfe8
YS
5596 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
5597 &btf_id);
9e15db66
AS
5598 if (err)
5599 verbose_linfo(env, insn_idx, "; ");
969bf05e 5600 if (!err && t == BPF_READ && value_regno >= 0) {
f1174f77 5601 /* ctx access returns either a scalar, or a
de8f3a83
DB
5602 * PTR_TO_PACKET[_META,_END]. In the latter
5603 * case, we know the offset is zero.
f1174f77 5604 */
46f8bc92 5605 if (reg_type == SCALAR_VALUE) {
638f5b90 5606 mark_reg_unknown(env, regs, value_regno);
46f8bc92 5607 } else {
638f5b90 5608 mark_reg_known_zero(env, regs,
61bd5218 5609 value_regno);
c25b2ae1 5610 if (type_may_be_null(reg_type))
46f8bc92 5611 regs[value_regno].id = ++env->id_gen;
5327ed3d
JW
5612 /* A load of ctx field could have different
5613 * actual load size with the one encoded in the
5614 * insn. When the dst is PTR, it is for sure not
5615 * a sub-register.
5616 */
5617 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
c25b2ae1 5618 if (base_type(reg_type) == PTR_TO_BTF_ID) {
22dc4a0f 5619 regs[value_regno].btf = btf;
9e15db66 5620 regs[value_regno].btf_id = btf_id;
22dc4a0f 5621 }
46f8bc92 5622 }
638f5b90 5623 regs[value_regno].type = reg_type;
969bf05e 5624 }
17a52670 5625
f1174f77 5626 } else if (reg->type == PTR_TO_STACK) {
01f810ac
AM
5627 /* Basic bounds checks. */
5628 err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
e4298d25
DB
5629 if (err)
5630 return err;
8726679a 5631
f4d7e40a
AS
5632 state = func(env, reg);
5633 err = update_stack_depth(env, state, off);
5634 if (err)
5635 return err;
8726679a 5636
01f810ac
AM
5637 if (t == BPF_READ)
5638 err = check_stack_read(env, regno, off, size,
61bd5218 5639 value_regno);
01f810ac
AM
5640 else
5641 err = check_stack_write(env, regno, off, size,
5642 value_regno, insn_idx);
de8f3a83 5643 } else if (reg_is_pkt_pointer(reg)) {
3a0af8fd 5644 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
61bd5218 5645 verbose(env, "cannot write into packet\n");
969bf05e
AS
5646 return -EACCES;
5647 }
4acf6c0b
BB
5648 if (t == BPF_WRITE && value_regno >= 0 &&
5649 is_pointer_value(env, value_regno)) {
61bd5218
JK
5650 verbose(env, "R%d leaks addr into packet\n",
5651 value_regno);
4acf6c0b
BB
5652 return -EACCES;
5653 }
9fd29c08 5654 err = check_packet_access(env, regno, off, size, false);
969bf05e 5655 if (!err && t == BPF_READ && value_regno >= 0)
638f5b90 5656 mark_reg_unknown(env, regs, value_regno);
d58e468b
PP
5657 } else if (reg->type == PTR_TO_FLOW_KEYS) {
5658 if (t == BPF_WRITE && value_regno >= 0 &&
5659 is_pointer_value(env, value_regno)) {
5660 verbose(env, "R%d leaks addr into flow keys\n",
5661 value_regno);
5662 return -EACCES;
5663 }
5664
5665 err = check_flow_keys_access(env, off, size);
5666 if (!err && t == BPF_READ && value_regno >= 0)
5667 mark_reg_unknown(env, regs, value_regno);
46f8bc92 5668 } else if (type_is_sk_pointer(reg->type)) {
c64b7983 5669 if (t == BPF_WRITE) {
46f8bc92 5670 verbose(env, "R%d cannot write into %s\n",
c25b2ae1 5671 regno, reg_type_str(env, reg->type));
c64b7983
JS
5672 return -EACCES;
5673 }
5f456649 5674 err = check_sock_access(env, insn_idx, regno, off, size, t);
c64b7983
JS
5675 if (!err && value_regno >= 0)
5676 mark_reg_unknown(env, regs, value_regno);
9df1c28b
MM
5677 } else if (reg->type == PTR_TO_TP_BUFFER) {
5678 err = check_tp_buffer_access(env, reg, regno, off, size);
5679 if (!err && t == BPF_READ && value_regno >= 0)
5680 mark_reg_unknown(env, regs, value_regno);
bff61f6f
HL
5681 } else if (base_type(reg->type) == PTR_TO_BTF_ID &&
5682 !type_may_be_null(reg->type)) {
9e15db66
AS
5683 err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
5684 value_regno);
41c48f3a
AI
5685 } else if (reg->type == CONST_PTR_TO_MAP) {
5686 err = check_ptr_to_map_access(env, regs, regno, off, size, t,
5687 value_regno);
20b2aff4
HL
5688 } else if (base_type(reg->type) == PTR_TO_BUF) {
5689 bool rdonly_mem = type_is_rdonly_mem(reg->type);
20b2aff4
HL
5690 u32 *max_access;
5691
5692 if (rdonly_mem) {
5693 if (t == BPF_WRITE) {
5694 verbose(env, "R%d cannot write into %s\n",
5695 regno, reg_type_str(env, reg->type));
5696 return -EACCES;
5697 }
20b2aff4
HL
5698 max_access = &env->prog->aux->max_rdonly_access;
5699 } else {
20b2aff4 5700 max_access = &env->prog->aux->max_rdwr_access;
afbf21dc 5701 }
20b2aff4 5702
f6dfbe31 5703 err = check_buffer_access(env, reg, regno, off, size, false,
44e9a741 5704 max_access);
20b2aff4
HL
5705
5706 if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
afbf21dc 5707 mark_reg_unknown(env, regs, value_regno);
17a52670 5708 } else {
61bd5218 5709 verbose(env, "R%d invalid mem access '%s'\n", regno,
c25b2ae1 5710 reg_type_str(env, reg->type));
17a52670
AS
5711 return -EACCES;
5712 }
969bf05e 5713
f1174f77 5714 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
638f5b90 5715 regs[value_regno].type == SCALAR_VALUE) {
f1174f77 5716 /* b/h/w load zero-extends, mark upper bits as known 0 */
0c17d1d2 5717 coerce_reg_to_size(&regs[value_regno], size);
969bf05e 5718 }
17a52670
AS
5719 return err;
5720}
5721
91c960b0 5722static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
17a52670 5723{
5ffa2550 5724 int load_reg;
17a52670
AS
5725 int err;
5726
5ca419f2
BJ
5727 switch (insn->imm) {
5728 case BPF_ADD:
5729 case BPF_ADD | BPF_FETCH:
981f94c3
BJ
5730 case BPF_AND:
5731 case BPF_AND | BPF_FETCH:
5732 case BPF_OR:
5733 case BPF_OR | BPF_FETCH:
5734 case BPF_XOR:
5735 case BPF_XOR | BPF_FETCH:
5ffa2550
BJ
5736 case BPF_XCHG:
5737 case BPF_CMPXCHG:
5ca419f2
BJ
5738 break;
5739 default:
91c960b0
BJ
5740 verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
5741 return -EINVAL;
5742 }
5743
5744 if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
5745 verbose(env, "invalid atomic operand size\n");
17a52670
AS
5746 return -EINVAL;
5747 }
5748
5749 /* check src1 operand */
dc503a8a 5750 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
5751 if (err)
5752 return err;
5753
5754 /* check src2 operand */
dc503a8a 5755 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
5756 if (err)
5757 return err;
5758
5ffa2550
BJ
5759 if (insn->imm == BPF_CMPXCHG) {
5760 /* Check comparison of R0 with memory location */
a82fe085
DB
5761 const u32 aux_reg = BPF_REG_0;
5762
5763 err = check_reg_arg(env, aux_reg, SRC_OP);
5ffa2550
BJ
5764 if (err)
5765 return err;
a82fe085
DB
5766
5767 if (is_pointer_value(env, aux_reg)) {
5768 verbose(env, "R%d leaks addr into mem\n", aux_reg);
5769 return -EACCES;
5770 }
5ffa2550
BJ
5771 }
5772
6bdf6abc 5773 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 5774 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6bdf6abc
DB
5775 return -EACCES;
5776 }
5777
ca369602 5778 if (is_ctx_reg(env, insn->dst_reg) ||
4b5defde 5779 is_pkt_reg(env, insn->dst_reg) ||
46f8bc92
MKL
5780 is_flow_key_reg(env, insn->dst_reg) ||
5781 is_sk_reg(env, insn->dst_reg)) {
91c960b0 5782 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
2a159c6f 5783 insn->dst_reg,
c25b2ae1 5784 reg_type_str(env, reg_state(env, insn->dst_reg)->type));
f37a8cb8
DB
5785 return -EACCES;
5786 }
5787
37086bfd
BJ
5788 if (insn->imm & BPF_FETCH) {
5789 if (insn->imm == BPF_CMPXCHG)
5790 load_reg = BPF_REG_0;
5791 else
5792 load_reg = insn->src_reg;
5793
5794 /* check and record load of old value */
5795 err = check_reg_arg(env, load_reg, DST_OP);
5796 if (err)
5797 return err;
5798 } else {
5799 /* This instruction accesses a memory location but doesn't
5800 * actually load it into a register.
5801 */
5802 load_reg = -1;
5803 }
5804
7d3baf0a
DB
5805 /* Check whether we can read the memory, with second call for fetch
5806 * case to simulate the register fill.
5807 */
31fd8581 5808 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
7d3baf0a
DB
5809 BPF_SIZE(insn->code), BPF_READ, -1, true);
5810 if (!err && load_reg >= 0)
5811 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5812 BPF_SIZE(insn->code), BPF_READ, load_reg,
5813 true);
17a52670
AS
5814 if (err)
5815 return err;
5816
7d3baf0a 5817 /* Check whether we can write into the same memory. */
5ca419f2
BJ
5818 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
5819 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
5820 if (err)
5821 return err;
5822
5ca419f2 5823 return 0;
17a52670
AS
5824}
5825
01f810ac
AM
5826/* When register 'regno' is used to read the stack (either directly or through
5827 * a helper function) make sure that it's within stack boundary and, depending
5828 * on the access type, that all elements of the stack are initialized.
5829 *
5830 * 'off' includes 'regno->off', but not its dynamic part (if any).
5831 *
5832 * All registers that have been spilled on the stack in the slots within the
5833 * read offsets are marked as read.
5834 */
5835static int check_stack_range_initialized(
5836 struct bpf_verifier_env *env, int regno, int off,
5837 int access_size, bool zero_size_allowed,
61df10c7 5838 enum bpf_access_src type, struct bpf_call_arg_meta *meta)
2011fccf
AI
5839{
5840 struct bpf_reg_state *reg = reg_state(env, regno);
01f810ac
AM
5841 struct bpf_func_state *state = func(env, reg);
5842 int err, min_off, max_off, i, j, slot, spi;
5843 char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
5844 enum bpf_access_type bounds_check_type;
5845 /* Some accesses can write anything into the stack, others are
5846 * read-only.
5847 */
5848 bool clobber = false;
2011fccf 5849
01f810ac
AM
5850 if (access_size == 0 && !zero_size_allowed) {
5851 verbose(env, "invalid zero-sized read\n");
2011fccf
AI
5852 return -EACCES;
5853 }
2011fccf 5854
01f810ac
AM
5855 if (type == ACCESS_HELPER) {
5856 /* The bounds checks for writes are more permissive than for
5857 * reads. However, if raw_mode is not set, we'll do extra
5858 * checks below.
5859 */
5860 bounds_check_type = BPF_WRITE;
5861 clobber = true;
5862 } else {
5863 bounds_check_type = BPF_READ;
5864 }
5865 err = check_stack_access_within_bounds(env, regno, off, access_size,
5866 type, bounds_check_type);
5867 if (err)
5868 return err;
5869
17a52670 5870
2011fccf 5871 if (tnum_is_const(reg->var_off)) {
01f810ac 5872 min_off = max_off = reg->var_off.value + off;
2011fccf 5873 } else {
088ec26d
AI
5874 /* Variable offset is prohibited for unprivileged mode for
5875 * simplicity since it requires corresponding support in
5876 * Spectre masking for stack ALU.
5877 * See also retrieve_ptr_limit().
5878 */
2c78ee89 5879 if (!env->bypass_spec_v1) {
088ec26d 5880 char tn_buf[48];
f1174f77 5881
088ec26d 5882 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
01f810ac
AM
5883 verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
5884 regno, err_extra, tn_buf);
088ec26d
AI
5885 return -EACCES;
5886 }
f2bcd05e
AI
5887 /* Only initialized buffer on stack is allowed to be accessed
5888 * with variable offset. With uninitialized buffer it's hard to
5889 * guarantee that whole memory is marked as initialized on
5890 * helper return since specific bounds are unknown what may
5891 * cause uninitialized stack leaking.
5892 */
5893 if (meta && meta->raw_mode)
5894 meta = NULL;
5895
01f810ac
AM
5896 min_off = reg->smin_value + off;
5897 max_off = reg->smax_value + off;
17a52670
AS
5898 }
5899
435faee1 5900 if (meta && meta->raw_mode) {
ef8fc7a0
KKD
5901 /* Ensure we won't be overwriting dynptrs when simulating byte
5902 * by byte access in check_helper_call using meta.access_size.
5903 * This would be a problem if we have a helper in the future
5904 * which takes:
5905 *
5906 * helper(uninit_mem, len, dynptr)
5907 *
5908 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
5909 * may end up writing to dynptr itself when touching memory from
5910 * arg 1. This can be relaxed on a case by case basis for known
5911 * safe cases, but reject due to the possibilitiy of aliasing by
5912 * default.
5913 */
5914 for (i = min_off; i < max_off + access_size; i++) {
5915 int stack_off = -i - 1;
5916
5917 spi = __get_spi(i);
5918 /* raw_mode may write past allocated_stack */
5919 if (state->allocated_stack <= stack_off)
5920 continue;
5921 if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
5922 verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
5923 return -EACCES;
5924 }
5925 }
435faee1
DB
5926 meta->access_size = access_size;
5927 meta->regno = regno;
5928 return 0;
5929 }
5930
2011fccf 5931 for (i = min_off; i < max_off + access_size; i++) {
cc2b14d5
AS
5932 u8 *stype;
5933
2011fccf 5934 slot = -i - 1;
638f5b90 5935 spi = slot / BPF_REG_SIZE;
cc2b14d5
AS
5936 if (state->allocated_stack <= slot)
5937 goto err;
5938 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
5939 if (*stype == STACK_MISC)
5940 goto mark;
6715df8d
EZ
5941 if ((*stype == STACK_ZERO) ||
5942 (*stype == STACK_INVALID && env->allow_uninit_stack)) {
01f810ac
AM
5943 if (clobber) {
5944 /* helper can write anything into the stack */
5945 *stype = STACK_MISC;
5946 }
cc2b14d5 5947 goto mark;
17a52670 5948 }
1d68f22b 5949
27113c59 5950 if (is_spilled_reg(&state->stack[spi]) &&
cd17d38f
YS
5951 (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
5952 env->allow_ptr_leaks)) {
01f810ac
AM
5953 if (clobber) {
5954 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
5955 for (j = 0; j < BPF_REG_SIZE; j++)
354e8f19 5956 scrub_spilled_slot(&state->stack[spi].slot_type[j]);
01f810ac 5957 }
f7cf25b2
AS
5958 goto mark;
5959 }
5960
cc2b14d5 5961err:
2011fccf 5962 if (tnum_is_const(reg->var_off)) {
01f810ac
AM
5963 verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
5964 err_extra, regno, min_off, i - min_off, access_size);
2011fccf
AI
5965 } else {
5966 char tn_buf[48];
5967
5968 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
01f810ac
AM
5969 verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
5970 err_extra, regno, tn_buf, i - min_off, access_size);
2011fccf 5971 }
cc2b14d5
AS
5972 return -EACCES;
5973mark:
5974 /* reading any byte out of 8-byte 'spill_slot' will cause
5975 * the whole slot to be marked as 'read'
5976 */
679c782d 5977 mark_reg_read(env, &state->stack[spi].spilled_ptr,
5327ed3d
JW
5978 state->stack[spi].spilled_ptr.parent,
5979 REG_LIVE_READ64);
261f4664
KKD
5980 /* We do not set REG_LIVE_WRITTEN for stack slot, as we can not
5981 * be sure that whether stack slot is written to or not. Hence,
5982 * we must still conservatively propagate reads upwards even if
5983 * helper may write to the entire memory range.
5984 */
17a52670 5985 }
2011fccf 5986 return update_stack_depth(env, state, min_off);
17a52670
AS
5987}
5988
06c1c049
GB
5989static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
5990 int access_size, bool zero_size_allowed,
5991 struct bpf_call_arg_meta *meta)
5992{
638f5b90 5993 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
20b2aff4 5994 u32 *max_access;
06c1c049 5995
20b2aff4 5996 switch (base_type(reg->type)) {
06c1c049 5997 case PTR_TO_PACKET:
de8f3a83 5998 case PTR_TO_PACKET_META:
9fd29c08
YS
5999 return check_packet_access(env, regno, reg->off, access_size,
6000 zero_size_allowed);
69c087ba 6001 case PTR_TO_MAP_KEY:
7b3552d3
KKD
6002 if (meta && meta->raw_mode) {
6003 verbose(env, "R%d cannot write into %s\n", regno,
6004 reg_type_str(env, reg->type));
6005 return -EACCES;
6006 }
69c087ba
YS
6007 return check_mem_region_access(env, regno, reg->off, access_size,
6008 reg->map_ptr->key_size, false);
06c1c049 6009 case PTR_TO_MAP_VALUE:
591fe988
DB
6010 if (check_map_access_type(env, regno, reg->off, access_size,
6011 meta && meta->raw_mode ? BPF_WRITE :
6012 BPF_READ))
6013 return -EACCES;
9fd29c08 6014 return check_map_access(env, regno, reg->off, access_size,
61df10c7 6015 zero_size_allowed, ACCESS_HELPER);
457f4436 6016 case PTR_TO_MEM:
97e6d7da
KKD
6017 if (type_is_rdonly_mem(reg->type)) {
6018 if (meta && meta->raw_mode) {
6019 verbose(env, "R%d cannot write into %s\n", regno,
6020 reg_type_str(env, reg->type));
6021 return -EACCES;
6022 }
6023 }
457f4436
AN
6024 return check_mem_region_access(env, regno, reg->off,
6025 access_size, reg->mem_size,
6026 zero_size_allowed);
20b2aff4
HL
6027 case PTR_TO_BUF:
6028 if (type_is_rdonly_mem(reg->type)) {
97e6d7da
KKD
6029 if (meta && meta->raw_mode) {
6030 verbose(env, "R%d cannot write into %s\n", regno,
6031 reg_type_str(env, reg->type));
20b2aff4 6032 return -EACCES;
97e6d7da 6033 }
20b2aff4 6034
20b2aff4
HL
6035 max_access = &env->prog->aux->max_rdonly_access;
6036 } else {
20b2aff4
HL
6037 max_access = &env->prog->aux->max_rdwr_access;
6038 }
afbf21dc
YS
6039 return check_buffer_access(env, reg, regno, reg->off,
6040 access_size, zero_size_allowed,
44e9a741 6041 max_access);
0d004c02 6042 case PTR_TO_STACK:
01f810ac
AM
6043 return check_stack_range_initialized(
6044 env,
6045 regno, reg->off, access_size,
6046 zero_size_allowed, ACCESS_HELPER, meta);
15baa55f
BT
6047 case PTR_TO_CTX:
6048 /* in case the function doesn't know how to access the context,
6049 * (because we are in a program of type SYSCALL for example), we
6050 * can not statically check its size.
6051 * Dynamically check it now.
6052 */
6053 if (!env->ops->convert_ctx_access) {
6054 enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ;
6055 int offset = access_size - 1;
6056
6057 /* Allow zero-byte read from PTR_TO_CTX */
6058 if (access_size == 0)
6059 return zero_size_allowed ? 0 : -EACCES;
6060
6061 return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
6062 atype, -1, false);
6063 }
6064
6065 fallthrough;
0d004c02
LB
6066 default: /* scalar_value or invalid ptr */
6067 /* Allow zero-byte read from NULL, regardless of pointer type */
6068 if (zero_size_allowed && access_size == 0 &&
6069 register_is_null(reg))
6070 return 0;
6071
c25b2ae1
HL
6072 verbose(env, "R%d type=%s ", regno,
6073 reg_type_str(env, reg->type));
6074 verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
0d004c02 6075 return -EACCES;
06c1c049
GB
6076 }
6077}
6078
d583691c
KKD
6079static int check_mem_size_reg(struct bpf_verifier_env *env,
6080 struct bpf_reg_state *reg, u32 regno,
6081 bool zero_size_allowed,
6082 struct bpf_call_arg_meta *meta)
6083{
6084 int err;
6085
6086 /* This is used to refine r0 return value bounds for helpers
6087 * that enforce this value as an upper bound on return values.
6088 * See do_refine_retval_range() for helpers that can refine
6089 * the return value. C type of helper is u32 so we pull register
6090 * bound from umax_value however, if negative verifier errors
6091 * out. Only upper bounds can be learned because retval is an
6092 * int type and negative retvals are allowed.
6093 */
be77354a 6094 meta->msize_max_value = reg->umax_value;
d583691c
KKD
6095
6096 /* The register is SCALAR_VALUE; the access check
6097 * happens using its boundaries.
6098 */
6099 if (!tnum_is_const(reg->var_off))
6100 /* For unprivileged variable accesses, disable raw
6101 * mode so that the program is required to
6102 * initialize all the memory that the helper could
6103 * just partially fill up.
6104 */
6105 meta = NULL;
6106
6107 if (reg->smin_value < 0) {
6108 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
6109 regno);
6110 return -EACCES;
6111 }
6112
6113 if (reg->umin_value == 0) {
6114 err = check_helper_mem_access(env, regno - 1, 0,
6115 zero_size_allowed,
6116 meta);
6117 if (err)
6118 return err;
6119 }
6120
6121 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
6122 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
6123 regno);
6124 return -EACCES;
6125 }
6126 err = check_helper_mem_access(env, regno - 1,
6127 reg->umax_value,
6128 zero_size_allowed, meta);
6129 if (!err)
6130 err = mark_chain_precision(env, regno);
6131 return err;
6132}
6133
e5069b9c
DB
6134int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
6135 u32 regno, u32 mem_size)
6136{
be77354a
KKD
6137 bool may_be_null = type_may_be_null(reg->type);
6138 struct bpf_reg_state saved_reg;
6139 struct bpf_call_arg_meta meta;
6140 int err;
6141
e5069b9c
DB
6142 if (register_is_null(reg))
6143 return 0;
6144
be77354a
KKD
6145 memset(&meta, 0, sizeof(meta));
6146 /* Assuming that the register contains a value check if the memory
6147 * access is safe. Temporarily save and restore the register's state as
6148 * the conversion shouldn't be visible to a caller.
6149 */
6150 if (may_be_null) {
6151 saved_reg = *reg;
e5069b9c 6152 mark_ptr_not_null_reg(reg);
e5069b9c
DB
6153 }
6154
be77354a
KKD
6155 err = check_helper_mem_access(env, regno, mem_size, true, &meta);
6156 /* Check access for BPF_WRITE */
6157 meta.raw_mode = true;
6158 err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
6159
6160 if (may_be_null)
6161 *reg = saved_reg;
6162
6163 return err;
e5069b9c
DB
6164}
6165
00b85860
KKD
6166static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
6167 u32 regno)
d583691c
KKD
6168{
6169 struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
6170 bool may_be_null = type_may_be_null(mem_reg->type);
6171 struct bpf_reg_state saved_reg;
be77354a 6172 struct bpf_call_arg_meta meta;
d583691c
KKD
6173 int err;
6174
6175 WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
6176
be77354a
KKD
6177 memset(&meta, 0, sizeof(meta));
6178
d583691c
KKD
6179 if (may_be_null) {
6180 saved_reg = *mem_reg;
6181 mark_ptr_not_null_reg(mem_reg);
6182 }
6183
be77354a
KKD
6184 err = check_mem_size_reg(env, reg, regno, true, &meta);
6185 /* Check access for BPF_WRITE */
6186 meta.raw_mode = true;
6187 err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
d583691c
KKD
6188
6189 if (may_be_null)
6190 *mem_reg = saved_reg;
6191 return err;
6192}
6193
d83525ca 6194/* Implementation details:
4e814da0
KKD
6195 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
6196 * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
d83525ca 6197 * Two bpf_map_lookups (even with the same key) will have different reg->id.
4e814da0
KKD
6198 * Two separate bpf_obj_new will also have different reg->id.
6199 * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
6200 * clears reg->id after value_or_null->value transition, since the verifier only
6201 * cares about the range of access to valid map value pointer and doesn't care
6202 * about actual address of the map element.
d83525ca
AS
6203 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
6204 * reg->id > 0 after value_or_null->value transition. By doing so
6205 * two bpf_map_lookups will be considered two different pointers that
4e814da0
KKD
6206 * point to different bpf_spin_locks. Likewise for pointers to allocated objects
6207 * returned from bpf_obj_new.
d83525ca
AS
6208 * The verifier allows taking only one bpf_spin_lock at a time to avoid
6209 * dead-locks.
6210 * Since only one bpf_spin_lock is allowed the checks are simpler than
6211 * reg_is_refcounted() logic. The verifier needs to remember only
6212 * one spin_lock instead of array of acquired_refs.
d0d78c1d 6213 * cur_state->active_lock remembers which map value element or allocated
4e814da0 6214 * object got locked and clears it after bpf_spin_unlock.
d83525ca
AS
6215 */
6216static int process_spin_lock(struct bpf_verifier_env *env, int regno,
6217 bool is_lock)
6218{
6219 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6220 struct bpf_verifier_state *cur = env->cur_state;
6221 bool is_const = tnum_is_const(reg->var_off);
d83525ca 6222 u64 val = reg->var_off.value;
4e814da0
KKD
6223 struct bpf_map *map = NULL;
6224 struct btf *btf = NULL;
6225 struct btf_record *rec;
d83525ca 6226
d83525ca
AS
6227 if (!is_const) {
6228 verbose(env,
6229 "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
6230 regno);
6231 return -EINVAL;
6232 }
4e814da0
KKD
6233 if (reg->type == PTR_TO_MAP_VALUE) {
6234 map = reg->map_ptr;
6235 if (!map->btf) {
6236 verbose(env,
6237 "map '%s' has to have BTF in order to use bpf_spin_lock\n",
6238 map->name);
6239 return -EINVAL;
6240 }
6241 } else {
6242 btf = reg->btf;
d83525ca 6243 }
4e814da0
KKD
6244
6245 rec = reg_btf_record(reg);
6246 if (!btf_record_has_field(rec, BPF_SPIN_LOCK)) {
6247 verbose(env, "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local",
6248 map ? map->name : "kptr");
d83525ca
AS
6249 return -EINVAL;
6250 }
4e814da0 6251 if (rec->spin_lock_off != val + reg->off) {
db559117 6252 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
4e814da0 6253 val + reg->off, rec->spin_lock_off);
d83525ca
AS
6254 return -EINVAL;
6255 }
6256 if (is_lock) {
d0d78c1d 6257 if (cur->active_lock.ptr) {
d83525ca
AS
6258 verbose(env,
6259 "Locking two bpf_spin_locks are not allowed\n");
6260 return -EINVAL;
6261 }
d0d78c1d
KKD
6262 if (map)
6263 cur->active_lock.ptr = map;
6264 else
6265 cur->active_lock.ptr = btf;
6266 cur->active_lock.id = reg->id;
d83525ca 6267 } else {
d0d78c1d
KKD
6268 void *ptr;
6269
6270 if (map)
6271 ptr = map;
6272 else
6273 ptr = btf;
6274
6275 if (!cur->active_lock.ptr) {
d83525ca
AS
6276 verbose(env, "bpf_spin_unlock without taking a lock\n");
6277 return -EINVAL;
6278 }
d0d78c1d
KKD
6279 if (cur->active_lock.ptr != ptr ||
6280 cur->active_lock.id != reg->id) {
d83525ca
AS
6281 verbose(env, "bpf_spin_unlock of different lock\n");
6282 return -EINVAL;
6283 }
534e86bc 6284
6a3cd331 6285 invalidate_non_owning_refs(env);
534e86bc 6286
6a3cd331
DM
6287 cur->active_lock.ptr = NULL;
6288 cur->active_lock.id = 0;
d83525ca
AS
6289 }
6290 return 0;
6291}
6292
b00628b1
AS
6293static int process_timer_func(struct bpf_verifier_env *env, int regno,
6294 struct bpf_call_arg_meta *meta)
6295{
6296 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6297 bool is_const = tnum_is_const(reg->var_off);
6298 struct bpf_map *map = reg->map_ptr;
6299 u64 val = reg->var_off.value;
6300
6301 if (!is_const) {
6302 verbose(env,
6303 "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
6304 regno);
6305 return -EINVAL;
6306 }
6307 if (!map->btf) {
6308 verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
6309 map->name);
6310 return -EINVAL;
6311 }
db559117
KKD
6312 if (!btf_record_has_field(map->record, BPF_TIMER)) {
6313 verbose(env, "map '%s' has no valid bpf_timer\n", map->name);
68134668
AS
6314 return -EINVAL;
6315 }
db559117 6316 if (map->record->timer_off != val + reg->off) {
68134668 6317 verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
db559117 6318 val + reg->off, map->record->timer_off);
b00628b1
AS
6319 return -EINVAL;
6320 }
6321 if (meta->map_ptr) {
6322 verbose(env, "verifier bug. Two map pointers in a timer helper\n");
6323 return -EFAULT;
6324 }
3e8ce298 6325 meta->map_uid = reg->map_uid;
b00628b1
AS
6326 meta->map_ptr = map;
6327 return 0;
6328}
6329
c0a5a21c
KKD
6330static int process_kptr_func(struct bpf_verifier_env *env, int regno,
6331 struct bpf_call_arg_meta *meta)
6332{
6333 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
c0a5a21c 6334 struct bpf_map *map_ptr = reg->map_ptr;
aa3496ac 6335 struct btf_field *kptr_field;
c0a5a21c 6336 u32 kptr_off;
c0a5a21c
KKD
6337
6338 if (!tnum_is_const(reg->var_off)) {
6339 verbose(env,
6340 "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
6341 regno);
6342 return -EINVAL;
6343 }
6344 if (!map_ptr->btf) {
6345 verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
6346 map_ptr->name);
6347 return -EINVAL;
6348 }
aa3496ac
KKD
6349 if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
6350 verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
c0a5a21c
KKD
6351 return -EINVAL;
6352 }
6353
6354 meta->map_ptr = map_ptr;
6355 kptr_off = reg->off + reg->var_off.value;
aa3496ac
KKD
6356 kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
6357 if (!kptr_field) {
c0a5a21c
KKD
6358 verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
6359 return -EACCES;
6360 }
aa3496ac 6361 if (kptr_field->type != BPF_KPTR_REF) {
c0a5a21c
KKD
6362 verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
6363 return -EACCES;
6364 }
aa3496ac 6365 meta->kptr_field = kptr_field;
c0a5a21c
KKD
6366 return 0;
6367}
6368
27060531
KKD
6369/* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
6370 * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
6371 *
6372 * In both cases we deal with the first 8 bytes, but need to mark the next 8
6373 * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
6374 * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
6375 *
6376 * Mutability of bpf_dynptr is at two levels, one is at the level of struct
6377 * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
6378 * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
6379 * mutate the view of the dynptr and also possibly destroy it. In the latter
6380 * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
6381 * memory that dynptr points to.
6382 *
6383 * The verifier will keep track both levels of mutation (bpf_dynptr's in
6384 * reg->type and the memory's in reg->dynptr.type), but there is no support for
6385 * readonly dynptr view yet, hence only the first case is tracked and checked.
6386 *
6387 * This is consistent with how C applies the const modifier to a struct object,
6388 * where the pointer itself inside bpf_dynptr becomes const but not what it
6389 * points to.
6390 *
6391 * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
6392 * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
6393 */
1d18feb2
JK
6394static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
6395 enum bpf_arg_type arg_type)
6b75bd3d
KKD
6396{
6397 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
1d18feb2 6398 int err;
6b75bd3d 6399
27060531
KKD
6400 /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
6401 * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
6402 */
6403 if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
6404 verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
6405 return -EFAULT;
6406 }
79168a66 6407
27060531
KKD
6408 /* MEM_UNINIT - Points to memory that is an appropriate candidate for
6409 * constructing a mutable bpf_dynptr object.
6410 *
6411 * Currently, this is only possible with PTR_TO_STACK
6412 * pointing to a region of at least 16 bytes which doesn't
6413 * contain an existing bpf_dynptr.
6414 *
6415 * MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
6416 * mutated or destroyed. However, the memory it points to
6417 * may be mutated.
6418 *
6419 * None - Points to a initialized dynptr that can be mutated and
6420 * destroyed, including mutation of the memory it points
6421 * to.
6b75bd3d 6422 */
6b75bd3d 6423 if (arg_type & MEM_UNINIT) {
1d18feb2
JK
6424 int i;
6425
7e0dac28 6426 if (!is_dynptr_reg_valid_uninit(env, reg)) {
6b75bd3d
KKD
6427 verbose(env, "Dynptr has to be an uninitialized dynptr\n");
6428 return -EINVAL;
6429 }
6430
1d18feb2
JK
6431 /* we write BPF_DW bits (8 bytes) at a time */
6432 for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
6433 err = check_mem_access(env, insn_idx, regno,
6434 i, BPF_DW, BPF_WRITE, -1, false);
6435 if (err)
6436 return err;
6b75bd3d
KKD
6437 }
6438
1d18feb2 6439 err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx);
27060531
KKD
6440 } else /* MEM_RDONLY and None case from above */ {
6441 /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
6442 if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
6443 verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
6444 return -EINVAL;
6445 }
6446
7e0dac28 6447 if (!is_dynptr_reg_valid_init(env, reg)) {
6b75bd3d
KKD
6448 verbose(env,
6449 "Expected an initialized dynptr as arg #%d\n",
6450 regno);
6451 return -EINVAL;
6452 }
6453
27060531
KKD
6454 /* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
6455 if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
6b75bd3d
KKD
6456 verbose(env,
6457 "Expected a dynptr of type %s as arg #%d\n",
d54e0f6c 6458 dynptr_type_str(arg_to_dynptr_type(arg_type)), regno);
6b75bd3d
KKD
6459 return -EINVAL;
6460 }
d6fefa11
KKD
6461
6462 err = mark_dynptr_read(env, reg);
6b75bd3d 6463 }
1d18feb2 6464 return err;
6b75bd3d
KKD
6465}
6466
90133415
DB
6467static bool arg_type_is_mem_size(enum bpf_arg_type type)
6468{
6469 return type == ARG_CONST_SIZE ||
6470 type == ARG_CONST_SIZE_OR_ZERO;
6471}
6472
8f14852e
KKD
6473static bool arg_type_is_release(enum bpf_arg_type type)
6474{
6475 return type & OBJ_RELEASE;
6476}
6477
97e03f52
JK
6478static bool arg_type_is_dynptr(enum bpf_arg_type type)
6479{
6480 return base_type(type) == ARG_PTR_TO_DYNPTR;
6481}
6482
57c3bb72
AI
6483static int int_ptr_type_to_size(enum bpf_arg_type type)
6484{
6485 if (type == ARG_PTR_TO_INT)
6486 return sizeof(u32);
6487 else if (type == ARG_PTR_TO_LONG)
6488 return sizeof(u64);
6489
6490 return -EINVAL;
6491}
6492
912f442c
LB
6493static int resolve_map_arg_type(struct bpf_verifier_env *env,
6494 const struct bpf_call_arg_meta *meta,
6495 enum bpf_arg_type *arg_type)
6496{
6497 if (!meta->map_ptr) {
6498 /* kernel subsystem misconfigured verifier */
6499 verbose(env, "invalid map_ptr to access map->type\n");
6500 return -EACCES;
6501 }
6502
6503 switch (meta->map_ptr->map_type) {
6504 case BPF_MAP_TYPE_SOCKMAP:
6505 case BPF_MAP_TYPE_SOCKHASH:
6506 if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
6550f2dd 6507 *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
912f442c
LB
6508 } else {
6509 verbose(env, "invalid arg_type for sockmap/sockhash\n");
6510 return -EINVAL;
6511 }
6512 break;
9330986c
JK
6513 case BPF_MAP_TYPE_BLOOM_FILTER:
6514 if (meta->func_id == BPF_FUNC_map_peek_elem)
6515 *arg_type = ARG_PTR_TO_MAP_VALUE;
6516 break;
912f442c
LB
6517 default:
6518 break;
6519 }
6520 return 0;
6521}
6522
f79e7ea5
LB
6523struct bpf_reg_types {
6524 const enum bpf_reg_type types[10];
1df8f55a 6525 u32 *btf_id;
f79e7ea5
LB
6526};
6527
f79e7ea5
LB
6528static const struct bpf_reg_types sock_types = {
6529 .types = {
6530 PTR_TO_SOCK_COMMON,
6531 PTR_TO_SOCKET,
6532 PTR_TO_TCP_SOCK,
6533 PTR_TO_XDP_SOCK,
6534 },
6535};
6536
49a2a4d4 6537#ifdef CONFIG_NET
1df8f55a
MKL
6538static const struct bpf_reg_types btf_id_sock_common_types = {
6539 .types = {
6540 PTR_TO_SOCK_COMMON,
6541 PTR_TO_SOCKET,
6542 PTR_TO_TCP_SOCK,
6543 PTR_TO_XDP_SOCK,
6544 PTR_TO_BTF_ID,
3f00c523 6545 PTR_TO_BTF_ID | PTR_TRUSTED,
1df8f55a
MKL
6546 },
6547 .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
6548};
49a2a4d4 6549#endif
1df8f55a 6550
f79e7ea5
LB
6551static const struct bpf_reg_types mem_types = {
6552 .types = {
6553 PTR_TO_STACK,
6554 PTR_TO_PACKET,
6555 PTR_TO_PACKET_META,
69c087ba 6556 PTR_TO_MAP_KEY,
f79e7ea5
LB
6557 PTR_TO_MAP_VALUE,
6558 PTR_TO_MEM,
894f2a8b 6559 PTR_TO_MEM | MEM_RINGBUF,
20b2aff4 6560 PTR_TO_BUF,
f79e7ea5
LB
6561 },
6562};
6563
6564static const struct bpf_reg_types int_ptr_types = {
6565 .types = {
6566 PTR_TO_STACK,
6567 PTR_TO_PACKET,
6568 PTR_TO_PACKET_META,
69c087ba 6569 PTR_TO_MAP_KEY,
f79e7ea5
LB
6570 PTR_TO_MAP_VALUE,
6571 },
6572};
6573
4e814da0
KKD
6574static const struct bpf_reg_types spin_lock_types = {
6575 .types = {
6576 PTR_TO_MAP_VALUE,
6577 PTR_TO_BTF_ID | MEM_ALLOC,
6578 }
6579};
6580
f79e7ea5
LB
6581static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
6582static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
6583static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
894f2a8b 6584static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
f79e7ea5 6585static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
3f00c523
DV
6586static const struct bpf_reg_types btf_ptr_types = {
6587 .types = {
6588 PTR_TO_BTF_ID,
6589 PTR_TO_BTF_ID | PTR_TRUSTED,
fca1aa75 6590 PTR_TO_BTF_ID | MEM_RCU,
3f00c523
DV
6591 },
6592};
6593static const struct bpf_reg_types percpu_btf_ptr_types = {
6594 .types = {
6595 PTR_TO_BTF_ID | MEM_PERCPU,
6596 PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
6597 }
6598};
69c087ba
YS
6599static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
6600static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
fff13c4b 6601static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
b00628b1 6602static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
c0a5a21c 6603static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
20571567
DV
6604static const struct bpf_reg_types dynptr_types = {
6605 .types = {
6606 PTR_TO_STACK,
27060531 6607 CONST_PTR_TO_DYNPTR,
20571567
DV
6608 }
6609};
f79e7ea5 6610
0789e13b 6611static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
d1673304
DM
6612 [ARG_PTR_TO_MAP_KEY] = &mem_types,
6613 [ARG_PTR_TO_MAP_VALUE] = &mem_types,
f79e7ea5
LB
6614 [ARG_CONST_SIZE] = &scalar_types,
6615 [ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
6616 [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
6617 [ARG_CONST_MAP_PTR] = &const_map_ptr_types,
6618 [ARG_PTR_TO_CTX] = &context_types,
f79e7ea5 6619 [ARG_PTR_TO_SOCK_COMMON] = &sock_types,
49a2a4d4 6620#ifdef CONFIG_NET
1df8f55a 6621 [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
49a2a4d4 6622#endif
f79e7ea5 6623 [ARG_PTR_TO_SOCKET] = &fullsock_types,
f79e7ea5
LB
6624 [ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
6625 [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
6626 [ARG_PTR_TO_MEM] = &mem_types,
894f2a8b 6627 [ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types,
f79e7ea5
LB
6628 [ARG_PTR_TO_INT] = &int_ptr_types,
6629 [ARG_PTR_TO_LONG] = &int_ptr_types,
eaa6bcb7 6630 [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
69c087ba 6631 [ARG_PTR_TO_FUNC] = &func_ptr_types,
48946bd6 6632 [ARG_PTR_TO_STACK] = &stack_ptr_types,
fff13c4b 6633 [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
b00628b1 6634 [ARG_PTR_TO_TIMER] = &timer_types,
c0a5a21c 6635 [ARG_PTR_TO_KPTR] = &kptr_types,
20571567 6636 [ARG_PTR_TO_DYNPTR] = &dynptr_types,
f79e7ea5
LB
6637};
6638
6639static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
a968d5e2 6640 enum bpf_arg_type arg_type,
c0a5a21c
KKD
6641 const u32 *arg_btf_id,
6642 struct bpf_call_arg_meta *meta)
f79e7ea5
LB
6643{
6644 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6645 enum bpf_reg_type expected, type = reg->type;
a968d5e2 6646 const struct bpf_reg_types *compatible;
f79e7ea5
LB
6647 int i, j;
6648
48946bd6 6649 compatible = compatible_reg_types[base_type(arg_type)];
a968d5e2
MKL
6650 if (!compatible) {
6651 verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
6652 return -EFAULT;
6653 }
6654
216e3cd2
HL
6655 /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
6656 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
6657 *
6658 * Same for MAYBE_NULL:
6659 *
6660 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
6661 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
6662 *
6663 * Therefore we fold these flags depending on the arg_type before comparison.
6664 */
6665 if (arg_type & MEM_RDONLY)
6666 type &= ~MEM_RDONLY;
6667 if (arg_type & PTR_MAYBE_NULL)
6668 type &= ~PTR_MAYBE_NULL;
6669
f79e7ea5
LB
6670 for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
6671 expected = compatible->types[i];
6672 if (expected == NOT_INIT)
6673 break;
6674
6675 if (type == expected)
a968d5e2 6676 goto found;
f79e7ea5
LB
6677 }
6678
216e3cd2 6679 verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
f79e7ea5 6680 for (j = 0; j + 1 < i; j++)
c25b2ae1
HL
6681 verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
6682 verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
f79e7ea5 6683 return -EACCES;
a968d5e2
MKL
6684
6685found:
da03e43a
KKD
6686 if (base_type(reg->type) != PTR_TO_BTF_ID)
6687 return 0;
6688
6689 switch ((int)reg->type) {
6690 case PTR_TO_BTF_ID:
6691 case PTR_TO_BTF_ID | PTR_TRUSTED:
6692 case PTR_TO_BTF_ID | MEM_RCU:
6693 {
2ab3b380
KKD
6694 /* For bpf_sk_release, it needs to match against first member
6695 * 'struct sock_common', hence make an exception for it. This
6696 * allows bpf_sk_release to work for multiple socket types.
6697 */
6698 bool strict_type_match = arg_type_is_release(arg_type) &&
6699 meta->func_id != BPF_FUNC_sk_release;
6700
1df8f55a
MKL
6701 if (!arg_btf_id) {
6702 if (!compatible->btf_id) {
6703 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
6704 return -EFAULT;
6705 }
6706 arg_btf_id = compatible->btf_id;
6707 }
6708
c0a5a21c 6709 if (meta->func_id == BPF_FUNC_kptr_xchg) {
aa3496ac 6710 if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
c0a5a21c 6711 return -EACCES;
47e34cb7
DM
6712 } else {
6713 if (arg_btf_id == BPF_PTR_POISON) {
6714 verbose(env, "verifier internal error:");
6715 verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n",
6716 regno);
6717 return -EACCES;
6718 }
6719
6720 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
6721 btf_vmlinux, *arg_btf_id,
6722 strict_type_match)) {
6723 verbose(env, "R%d is of type %s but %s is expected\n",
6724 regno, kernel_type_name(reg->btf, reg->btf_id),
6725 kernel_type_name(btf_vmlinux, *arg_btf_id));
6726 return -EACCES;
6727 }
a968d5e2 6728 }
da03e43a
KKD
6729 break;
6730 }
6731 case PTR_TO_BTF_ID | MEM_ALLOC:
4e814da0
KKD
6732 if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock) {
6733 verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
6734 return -EFAULT;
6735 }
da03e43a
KKD
6736 /* Handled by helper specific checks */
6737 break;
6738 case PTR_TO_BTF_ID | MEM_PERCPU:
6739 case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
6740 /* Handled by helper specific checks */
6741 break;
6742 default:
6743 verbose(env, "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
6744 return -EFAULT;
a968d5e2 6745 }
a968d5e2 6746 return 0;
f79e7ea5
LB
6747}
6748
6a3cd331
DM
6749static struct btf_field *
6750reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
6751{
6752 struct btf_field *field;
6753 struct btf_record *rec;
6754
6755 rec = reg_btf_record(reg);
6756 if (!rec)
6757 return NULL;
6758
6759 field = btf_record_find(rec, off, fields);
6760 if (!field)
6761 return NULL;
6762
6763 return field;
6764}
6765
25b35dd2
KKD
6766int check_func_arg_reg_off(struct bpf_verifier_env *env,
6767 const struct bpf_reg_state *reg, int regno,
8f14852e 6768 enum bpf_arg_type arg_type)
25b35dd2 6769{
184c9bdb 6770 u32 type = reg->type;
25b35dd2 6771
184c9bdb
KKD
6772 /* When referenced register is passed to release function, its fixed
6773 * offset must be 0.
6774 *
6775 * We will check arg_type_is_release reg has ref_obj_id when storing
6776 * meta->release_regno.
6777 */
6778 if (arg_type_is_release(arg_type)) {
6779 /* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
6780 * may not directly point to the object being released, but to
6781 * dynptr pointing to such object, which might be at some offset
6782 * on the stack. In that case, we simply to fallback to the
6783 * default handling.
6784 */
6785 if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
6786 return 0;
6a3cd331
DM
6787
6788 if ((type_is_ptr_alloc_obj(type) || type_is_non_owning_ref(type)) && reg->off) {
6789 if (reg_find_field_offset(reg, reg->off, BPF_GRAPH_NODE_OR_ROOT))
6790 return __check_ptr_off_reg(env, reg, regno, true);
6791
6792 verbose(env, "R%d must have zero offset when passed to release func\n",
6793 regno);
6794 verbose(env, "No graph node or root found at R%d type:%s off:%d\n", regno,
6795 kernel_type_name(reg->btf, reg->btf_id), reg->off);
6796 return -EINVAL;
6797 }
6798
184c9bdb
KKD
6799 /* Doing check_ptr_off_reg check for the offset will catch this
6800 * because fixed_off_ok is false, but checking here allows us
6801 * to give the user a better error message.
6802 */
6803 if (reg->off) {
6804 verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
6805 regno);
6806 return -EINVAL;
6807 }
6808 return __check_ptr_off_reg(env, reg, regno, false);
6809 }
6810
6811 switch (type) {
6812 /* Pointer types where both fixed and variable offset is explicitly allowed: */
97e03f52 6813 case PTR_TO_STACK:
25b35dd2
KKD
6814 case PTR_TO_PACKET:
6815 case PTR_TO_PACKET_META:
6816 case PTR_TO_MAP_KEY:
6817 case PTR_TO_MAP_VALUE:
6818 case PTR_TO_MEM:
6819 case PTR_TO_MEM | MEM_RDONLY:
894f2a8b 6820 case PTR_TO_MEM | MEM_RINGBUF:
25b35dd2
KKD
6821 case PTR_TO_BUF:
6822 case PTR_TO_BUF | MEM_RDONLY:
97e03f52 6823 case SCALAR_VALUE:
184c9bdb 6824 return 0;
25b35dd2
KKD
6825 /* All the rest must be rejected, except PTR_TO_BTF_ID which allows
6826 * fixed offset.
6827 */
6828 case PTR_TO_BTF_ID:
282de143 6829 case PTR_TO_BTF_ID | MEM_ALLOC:
3f00c523 6830 case PTR_TO_BTF_ID | PTR_TRUSTED:
fca1aa75 6831 case PTR_TO_BTF_ID | MEM_RCU:
6a3cd331 6832 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
24d5bb80 6833 /* When referenced PTR_TO_BTF_ID is passed to release function,
184c9bdb
KKD
6834 * its fixed offset must be 0. In the other cases, fixed offset
6835 * can be non-zero. This was already checked above. So pass
6836 * fixed_off_ok as true to allow fixed offset for all other
6837 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
6838 * still need to do checks instead of returning.
24d5bb80 6839 */
184c9bdb 6840 return __check_ptr_off_reg(env, reg, regno, true);
25b35dd2 6841 default:
184c9bdb 6842 return __check_ptr_off_reg(env, reg, regno, false);
25b35dd2 6843 }
25b35dd2
KKD
6844}
6845
485ec51e
JK
6846static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
6847 const struct bpf_func_proto *fn,
6848 struct bpf_reg_state *regs)
6849{
6850 struct bpf_reg_state *state = NULL;
6851 int i;
6852
6853 for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
6854 if (arg_type_is_dynptr(fn->arg_type[i])) {
6855 if (state) {
6856 verbose(env, "verifier internal error: multiple dynptr args\n");
6857 return NULL;
6858 }
6859 state = &regs[BPF_REG_1 + i];
6860 }
6861
6862 if (!state)
6863 verbose(env, "verifier internal error: no dynptr arg found\n");
6864
6865 return state;
6866}
6867
f8064ab9 6868static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
34d4ef57
JK
6869{
6870 struct bpf_func_state *state = func(env, reg);
27060531 6871 int spi;
34d4ef57 6872
27060531 6873 if (reg->type == CONST_PTR_TO_DYNPTR)
f8064ab9
KKD
6874 return reg->id;
6875 spi = dynptr_get_spi(env, reg);
6876 if (spi < 0)
6877 return spi;
6878 return state->stack[spi].spilled_ptr.id;
6879}
6880
79168a66 6881static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
34d4ef57
JK
6882{
6883 struct bpf_func_state *state = func(env, reg);
27060531 6884 int spi;
27060531 6885
27060531
KKD
6886 if (reg->type == CONST_PTR_TO_DYNPTR)
6887 return reg->ref_obj_id;
79168a66
KKD
6888 spi = dynptr_get_spi(env, reg);
6889 if (spi < 0)
6890 return spi;
27060531 6891 return state->stack[spi].spilled_ptr.ref_obj_id;
34d4ef57
JK
6892}
6893
b5964b96
JK
6894static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
6895 struct bpf_reg_state *reg)
6896{
6897 struct bpf_func_state *state = func(env, reg);
6898 int spi;
6899
6900 if (reg->type == CONST_PTR_TO_DYNPTR)
6901 return reg->dynptr.type;
6902
6903 spi = __get_spi(reg->off);
6904 if (spi < 0) {
6905 verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
6906 return BPF_DYNPTR_TYPE_INVALID;
6907 }
6908
6909 return state->stack[spi].spilled_ptr.dynptr.type;
6910}
6911
af7ec138
YS
6912static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
6913 struct bpf_call_arg_meta *meta,
1d18feb2
JK
6914 const struct bpf_func_proto *fn,
6915 int insn_idx)
17a52670 6916{
af7ec138 6917 u32 regno = BPF_REG_1 + arg;
638f5b90 6918 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
af7ec138 6919 enum bpf_arg_type arg_type = fn->arg_type[arg];
f79e7ea5 6920 enum bpf_reg_type type = reg->type;
508362ac 6921 u32 *arg_btf_id = NULL;
17a52670
AS
6922 int err = 0;
6923
80f1d68c 6924 if (arg_type == ARG_DONTCARE)
17a52670
AS
6925 return 0;
6926
dc503a8a
EC
6927 err = check_reg_arg(env, regno, SRC_OP);
6928 if (err)
6929 return err;
17a52670 6930
1be7f75d
AS
6931 if (arg_type == ARG_ANYTHING) {
6932 if (is_pointer_value(env, regno)) {
61bd5218
JK
6933 verbose(env, "R%d leaks addr into helper function\n",
6934 regno);
1be7f75d
AS
6935 return -EACCES;
6936 }
80f1d68c 6937 return 0;
1be7f75d 6938 }
80f1d68c 6939
de8f3a83 6940 if (type_is_pkt_pointer(type) &&
3a0af8fd 6941 !may_access_direct_pkt_data(env, meta, BPF_READ)) {
61bd5218 6942 verbose(env, "helper access to the packet is not allowed\n");
6841de8b
AS
6943 return -EACCES;
6944 }
6945
16d1e00c 6946 if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
912f442c
LB
6947 err = resolve_map_arg_type(env, meta, &arg_type);
6948 if (err)
6949 return err;
6950 }
6951
48946bd6 6952 if (register_is_null(reg) && type_may_be_null(arg_type))
fd1b0d60
LB
6953 /* A NULL register has a SCALAR_VALUE type, so skip
6954 * type checking.
6955 */
6956 goto skip_type_check;
6957
508362ac 6958 /* arg_btf_id and arg_size are in a union. */
4e814da0
KKD
6959 if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
6960 base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
508362ac
MM
6961 arg_btf_id = fn->arg_btf_id[arg];
6962
6963 err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
f79e7ea5
LB
6964 if (err)
6965 return err;
6966
8f14852e 6967 err = check_func_arg_reg_off(env, reg, regno, arg_type);
25b35dd2
KKD
6968 if (err)
6969 return err;
d7b9454a 6970
fd1b0d60 6971skip_type_check:
8f14852e 6972 if (arg_type_is_release(arg_type)) {
bc34dee6
JK
6973 if (arg_type_is_dynptr(arg_type)) {
6974 struct bpf_func_state *state = func(env, reg);
27060531 6975 int spi;
bc34dee6 6976
27060531
KKD
6977 /* Only dynptr created on stack can be released, thus
6978 * the get_spi and stack state checks for spilled_ptr
6979 * should only be done before process_dynptr_func for
6980 * PTR_TO_STACK.
6981 */
6982 if (reg->type == PTR_TO_STACK) {
79168a66 6983 spi = dynptr_get_spi(env, reg);
f5b625e5 6984 if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) {
27060531
KKD
6985 verbose(env, "arg %d is an unacquired reference\n", regno);
6986 return -EINVAL;
6987 }
6988 } else {
6989 verbose(env, "cannot release unowned const bpf_dynptr\n");
bc34dee6
JK
6990 return -EINVAL;
6991 }
6992 } else if (!reg->ref_obj_id && !register_is_null(reg)) {
8f14852e
KKD
6993 verbose(env, "R%d must be referenced when passed to release function\n",
6994 regno);
6995 return -EINVAL;
6996 }
6997 if (meta->release_regno) {
6998 verbose(env, "verifier internal error: more than one release argument\n");
6999 return -EFAULT;
7000 }
7001 meta->release_regno = regno;
7002 }
7003
02f7c958 7004 if (reg->ref_obj_id) {
457f4436
AN
7005 if (meta->ref_obj_id) {
7006 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
7007 regno, reg->ref_obj_id,
7008 meta->ref_obj_id);
7009 return -EFAULT;
7010 }
7011 meta->ref_obj_id = reg->ref_obj_id;
17a52670
AS
7012 }
7013
8ab4cdcf
JK
7014 switch (base_type(arg_type)) {
7015 case ARG_CONST_MAP_PTR:
17a52670 7016 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
3e8ce298
AS
7017 if (meta->map_ptr) {
7018 /* Use map_uid (which is unique id of inner map) to reject:
7019 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
7020 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
7021 * if (inner_map1 && inner_map2) {
7022 * timer = bpf_map_lookup_elem(inner_map1);
7023 * if (timer)
7024 * // mismatch would have been allowed
7025 * bpf_timer_init(timer, inner_map2);
7026 * }
7027 *
7028 * Comparing map_ptr is enough to distinguish normal and outer maps.
7029 */
7030 if (meta->map_ptr != reg->map_ptr ||
7031 meta->map_uid != reg->map_uid) {
7032 verbose(env,
7033 "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
7034 meta->map_uid, reg->map_uid);
7035 return -EINVAL;
7036 }
b00628b1 7037 }
33ff9823 7038 meta->map_ptr = reg->map_ptr;
3e8ce298 7039 meta->map_uid = reg->map_uid;
8ab4cdcf
JK
7040 break;
7041 case ARG_PTR_TO_MAP_KEY:
17a52670
AS
7042 /* bpf_map_xxx(..., map_ptr, ..., key) call:
7043 * check that [key, key + map->key_size) are within
7044 * stack limits and initialized
7045 */
33ff9823 7046 if (!meta->map_ptr) {
17a52670
AS
7047 /* in function declaration map_ptr must come before
7048 * map_key, so that it's verified and known before
7049 * we have to check map_key here. Otherwise it means
7050 * that kernel subsystem misconfigured verifier
7051 */
61bd5218 7052 verbose(env, "invalid map_ptr to access map->key\n");
17a52670
AS
7053 return -EACCES;
7054 }
d71962f3
PC
7055 err = check_helper_mem_access(env, regno,
7056 meta->map_ptr->key_size, false,
7057 NULL);
8ab4cdcf
JK
7058 break;
7059 case ARG_PTR_TO_MAP_VALUE:
48946bd6
HL
7060 if (type_may_be_null(arg_type) && register_is_null(reg))
7061 return 0;
7062
17a52670
AS
7063 /* bpf_map_xxx(..., map_ptr, ..., value) call:
7064 * check [value, value + map->value_size) validity
7065 */
33ff9823 7066 if (!meta->map_ptr) {
17a52670 7067 /* kernel subsystem misconfigured verifier */
61bd5218 7068 verbose(env, "invalid map_ptr to access map->value\n");
17a52670
AS
7069 return -EACCES;
7070 }
16d1e00c 7071 meta->raw_mode = arg_type & MEM_UNINIT;
d71962f3
PC
7072 err = check_helper_mem_access(env, regno,
7073 meta->map_ptr->value_size, false,
2ea864c5 7074 meta);
8ab4cdcf
JK
7075 break;
7076 case ARG_PTR_TO_PERCPU_BTF_ID:
eaa6bcb7
HL
7077 if (!reg->btf_id) {
7078 verbose(env, "Helper has invalid btf_id in R%d\n", regno);
7079 return -EACCES;
7080 }
22dc4a0f 7081 meta->ret_btf = reg->btf;
eaa6bcb7 7082 meta->ret_btf_id = reg->btf_id;
8ab4cdcf
JK
7083 break;
7084 case ARG_PTR_TO_SPIN_LOCK:
5d92ddc3
DM
7085 if (in_rbtree_lock_required_cb(env)) {
7086 verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
7087 return -EACCES;
7088 }
c18f0b6a 7089 if (meta->func_id == BPF_FUNC_spin_lock) {
ac50fe51
KKD
7090 err = process_spin_lock(env, regno, true);
7091 if (err)
7092 return err;
c18f0b6a 7093 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
ac50fe51
KKD
7094 err = process_spin_lock(env, regno, false);
7095 if (err)
7096 return err;
c18f0b6a
LB
7097 } else {
7098 verbose(env, "verifier internal error\n");
7099 return -EFAULT;
7100 }
8ab4cdcf
JK
7101 break;
7102 case ARG_PTR_TO_TIMER:
ac50fe51
KKD
7103 err = process_timer_func(env, regno, meta);
7104 if (err)
7105 return err;
8ab4cdcf
JK
7106 break;
7107 case ARG_PTR_TO_FUNC:
69c087ba 7108 meta->subprogno = reg->subprogno;
8ab4cdcf
JK
7109 break;
7110 case ARG_PTR_TO_MEM:
a2bbe7cc
LB
7111 /* The access to this pointer is only checked when we hit the
7112 * next is_mem_size argument below.
7113 */
16d1e00c 7114 meta->raw_mode = arg_type & MEM_UNINIT;
508362ac
MM
7115 if (arg_type & MEM_FIXED_SIZE) {
7116 err = check_helper_mem_access(env, regno,
7117 fn->arg_size[arg], false,
7118 meta);
7119 }
8ab4cdcf
JK
7120 break;
7121 case ARG_CONST_SIZE:
7122 err = check_mem_size_reg(env, reg, regno, false, meta);
7123 break;
7124 case ARG_CONST_SIZE_OR_ZERO:
7125 err = check_mem_size_reg(env, reg, regno, true, meta);
7126 break;
7127 case ARG_PTR_TO_DYNPTR:
1d18feb2 7128 err = process_dynptr_func(env, regno, insn_idx, arg_type);
ac50fe51
KKD
7129 if (err)
7130 return err;
8ab4cdcf
JK
7131 break;
7132 case ARG_CONST_ALLOC_SIZE_OR_ZERO:
457f4436 7133 if (!tnum_is_const(reg->var_off)) {
28a8add6 7134 verbose(env, "R%d is not a known constant'\n",
457f4436
AN
7135 regno);
7136 return -EACCES;
7137 }
7138 meta->mem_size = reg->var_off.value;
2fc31465
KKD
7139 err = mark_chain_precision(env, regno);
7140 if (err)
7141 return err;
8ab4cdcf
JK
7142 break;
7143 case ARG_PTR_TO_INT:
7144 case ARG_PTR_TO_LONG:
7145 {
57c3bb72
AI
7146 int size = int_ptr_type_to_size(arg_type);
7147
7148 err = check_helper_mem_access(env, regno, size, false, meta);
7149 if (err)
7150 return err;
7151 err = check_ptr_alignment(env, reg, 0, size, true);
8ab4cdcf
JK
7152 break;
7153 }
7154 case ARG_PTR_TO_CONST_STR:
7155 {
fff13c4b
FR
7156 struct bpf_map *map = reg->map_ptr;
7157 int map_off;
7158 u64 map_addr;
7159 char *str_ptr;
7160
a8fad73e 7161 if (!bpf_map_is_rdonly(map)) {
fff13c4b
FR
7162 verbose(env, "R%d does not point to a readonly map'\n", regno);
7163 return -EACCES;
7164 }
7165
7166 if (!tnum_is_const(reg->var_off)) {
7167 verbose(env, "R%d is not a constant address'\n", regno);
7168 return -EACCES;
7169 }
7170
7171 if (!map->ops->map_direct_value_addr) {
7172 verbose(env, "no direct value access support for this map type\n");
7173 return -EACCES;
7174 }
7175
7176 err = check_map_access(env, regno, reg->off,
61df10c7
KKD
7177 map->value_size - reg->off, false,
7178 ACCESS_HELPER);
fff13c4b
FR
7179 if (err)
7180 return err;
7181
7182 map_off = reg->off + reg->var_off.value;
7183 err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
7184 if (err) {
7185 verbose(env, "direct value access on string failed\n");
7186 return err;
7187 }
7188
7189 str_ptr = (char *)(long)(map_addr);
7190 if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
7191 verbose(env, "string is not zero-terminated\n");
7192 return -EINVAL;
7193 }
8ab4cdcf
JK
7194 break;
7195 }
7196 case ARG_PTR_TO_KPTR:
ac50fe51
KKD
7197 err = process_kptr_func(env, regno, meta);
7198 if (err)
7199 return err;
8ab4cdcf 7200 break;
17a52670
AS
7201 }
7202
7203 return err;
7204}
7205
0126240f
LB
7206static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
7207{
7208 enum bpf_attach_type eatype = env->prog->expected_attach_type;
7e40781c 7209 enum bpf_prog_type type = resolve_prog_type(env->prog);
0126240f
LB
7210
7211 if (func_id != BPF_FUNC_map_update_elem)
7212 return false;
7213
7214 /* It's not possible to get access to a locked struct sock in these
7215 * contexts, so updating is safe.
7216 */
7217 switch (type) {
7218 case BPF_PROG_TYPE_TRACING:
7219 if (eatype == BPF_TRACE_ITER)
7220 return true;
7221 break;
7222 case BPF_PROG_TYPE_SOCKET_FILTER:
7223 case BPF_PROG_TYPE_SCHED_CLS:
7224 case BPF_PROG_TYPE_SCHED_ACT:
7225 case BPF_PROG_TYPE_XDP:
7226 case BPF_PROG_TYPE_SK_REUSEPORT:
7227 case BPF_PROG_TYPE_FLOW_DISSECTOR:
7228 case BPF_PROG_TYPE_SK_LOOKUP:
7229 return true;
7230 default:
7231 break;
7232 }
7233
7234 verbose(env, "cannot update sockmap in this context\n");
7235 return false;
7236}
7237
e411901c
MF
7238static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
7239{
95acd881
TA
7240 return env->prog->jit_requested &&
7241 bpf_jit_supports_subprog_tailcalls();
e411901c
MF
7242}
7243
61bd5218
JK
7244static int check_map_func_compatibility(struct bpf_verifier_env *env,
7245 struct bpf_map *map, int func_id)
35578d79 7246{
35578d79
KX
7247 if (!map)
7248 return 0;
7249
6aff67c8
AS
7250 /* We need a two way check, first is from map perspective ... */
7251 switch (map->map_type) {
7252 case BPF_MAP_TYPE_PROG_ARRAY:
7253 if (func_id != BPF_FUNC_tail_call)
7254 goto error;
7255 break;
7256 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
7257 if (func_id != BPF_FUNC_perf_event_read &&
908432ca 7258 func_id != BPF_FUNC_perf_event_output &&
a7658e1a 7259 func_id != BPF_FUNC_skb_output &&
d831ee84
EC
7260 func_id != BPF_FUNC_perf_event_read_value &&
7261 func_id != BPF_FUNC_xdp_output)
6aff67c8
AS
7262 goto error;
7263 break;
457f4436
AN
7264 case BPF_MAP_TYPE_RINGBUF:
7265 if (func_id != BPF_FUNC_ringbuf_output &&
7266 func_id != BPF_FUNC_ringbuf_reserve &&
bc34dee6
JK
7267 func_id != BPF_FUNC_ringbuf_query &&
7268 func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
7269 func_id != BPF_FUNC_ringbuf_submit_dynptr &&
7270 func_id != BPF_FUNC_ringbuf_discard_dynptr)
457f4436
AN
7271 goto error;
7272 break;
583c1f42 7273 case BPF_MAP_TYPE_USER_RINGBUF:
20571567
DV
7274 if (func_id != BPF_FUNC_user_ringbuf_drain)
7275 goto error;
7276 break;
6aff67c8
AS
7277 case BPF_MAP_TYPE_STACK_TRACE:
7278 if (func_id != BPF_FUNC_get_stackid)
7279 goto error;
7280 break;
4ed8ec52 7281 case BPF_MAP_TYPE_CGROUP_ARRAY:
60747ef4 7282 if (func_id != BPF_FUNC_skb_under_cgroup &&
60d20f91 7283 func_id != BPF_FUNC_current_task_under_cgroup)
4a482f34
MKL
7284 goto error;
7285 break;
cd339431 7286 case BPF_MAP_TYPE_CGROUP_STORAGE:
b741f163 7287 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
cd339431
RG
7288 if (func_id != BPF_FUNC_get_local_storage)
7289 goto error;
7290 break;
546ac1ff 7291 case BPF_MAP_TYPE_DEVMAP:
6f9d451a 7292 case BPF_MAP_TYPE_DEVMAP_HASH:
0cdbb4b0
THJ
7293 if (func_id != BPF_FUNC_redirect_map &&
7294 func_id != BPF_FUNC_map_lookup_elem)
546ac1ff
JF
7295 goto error;
7296 break;
fbfc504a
BT
7297 /* Restrict bpf side of cpumap and xskmap, open when use-cases
7298 * appear.
7299 */
6710e112
JDB
7300 case BPF_MAP_TYPE_CPUMAP:
7301 if (func_id != BPF_FUNC_redirect_map)
7302 goto error;
7303 break;
fada7fdc
JL
7304 case BPF_MAP_TYPE_XSKMAP:
7305 if (func_id != BPF_FUNC_redirect_map &&
7306 func_id != BPF_FUNC_map_lookup_elem)
7307 goto error;
7308 break;
56f668df 7309 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
bcc6b1b7 7310 case BPF_MAP_TYPE_HASH_OF_MAPS:
56f668df
MKL
7311 if (func_id != BPF_FUNC_map_lookup_elem)
7312 goto error;
16a43625 7313 break;
174a79ff
JF
7314 case BPF_MAP_TYPE_SOCKMAP:
7315 if (func_id != BPF_FUNC_sk_redirect_map &&
7316 func_id != BPF_FUNC_sock_map_update &&
4f738adb 7317 func_id != BPF_FUNC_map_delete_elem &&
9fed9000 7318 func_id != BPF_FUNC_msg_redirect_map &&
64d85290 7319 func_id != BPF_FUNC_sk_select_reuseport &&
0126240f
LB
7320 func_id != BPF_FUNC_map_lookup_elem &&
7321 !may_update_sockmap(env, func_id))
174a79ff
JF
7322 goto error;
7323 break;
81110384
JF
7324 case BPF_MAP_TYPE_SOCKHASH:
7325 if (func_id != BPF_FUNC_sk_redirect_hash &&
7326 func_id != BPF_FUNC_sock_hash_update &&
7327 func_id != BPF_FUNC_map_delete_elem &&
9fed9000 7328 func_id != BPF_FUNC_msg_redirect_hash &&
64d85290 7329 func_id != BPF_FUNC_sk_select_reuseport &&
0126240f
LB
7330 func_id != BPF_FUNC_map_lookup_elem &&
7331 !may_update_sockmap(env, func_id))
81110384
JF
7332 goto error;
7333 break;
2dbb9b9e
MKL
7334 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
7335 if (func_id != BPF_FUNC_sk_select_reuseport)
7336 goto error;
7337 break;
f1a2e44a
MV
7338 case BPF_MAP_TYPE_QUEUE:
7339 case BPF_MAP_TYPE_STACK:
7340 if (func_id != BPF_FUNC_map_peek_elem &&
7341 func_id != BPF_FUNC_map_pop_elem &&
7342 func_id != BPF_FUNC_map_push_elem)
7343 goto error;
7344 break;
6ac99e8f
MKL
7345 case BPF_MAP_TYPE_SK_STORAGE:
7346 if (func_id != BPF_FUNC_sk_storage_get &&
9db44fdd
KKD
7347 func_id != BPF_FUNC_sk_storage_delete &&
7348 func_id != BPF_FUNC_kptr_xchg)
6ac99e8f
MKL
7349 goto error;
7350 break;
8ea63684
KS
7351 case BPF_MAP_TYPE_INODE_STORAGE:
7352 if (func_id != BPF_FUNC_inode_storage_get &&
9db44fdd
KKD
7353 func_id != BPF_FUNC_inode_storage_delete &&
7354 func_id != BPF_FUNC_kptr_xchg)
8ea63684
KS
7355 goto error;
7356 break;
4cf1bc1f
KS
7357 case BPF_MAP_TYPE_TASK_STORAGE:
7358 if (func_id != BPF_FUNC_task_storage_get &&
9db44fdd
KKD
7359 func_id != BPF_FUNC_task_storage_delete &&
7360 func_id != BPF_FUNC_kptr_xchg)
4cf1bc1f
KS
7361 goto error;
7362 break;
c4bcfb38
YS
7363 case BPF_MAP_TYPE_CGRP_STORAGE:
7364 if (func_id != BPF_FUNC_cgrp_storage_get &&
9db44fdd
KKD
7365 func_id != BPF_FUNC_cgrp_storage_delete &&
7366 func_id != BPF_FUNC_kptr_xchg)
c4bcfb38
YS
7367 goto error;
7368 break;
9330986c
JK
7369 case BPF_MAP_TYPE_BLOOM_FILTER:
7370 if (func_id != BPF_FUNC_map_peek_elem &&
7371 func_id != BPF_FUNC_map_push_elem)
7372 goto error;
7373 break;
6aff67c8
AS
7374 default:
7375 break;
7376 }
7377
7378 /* ... and second from the function itself. */
7379 switch (func_id) {
7380 case BPF_FUNC_tail_call:
7381 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
7382 goto error;
e411901c
MF
7383 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
7384 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
f4d7e40a
AS
7385 return -EINVAL;
7386 }
6aff67c8
AS
7387 break;
7388 case BPF_FUNC_perf_event_read:
7389 case BPF_FUNC_perf_event_output:
908432ca 7390 case BPF_FUNC_perf_event_read_value:
a7658e1a 7391 case BPF_FUNC_skb_output:
d831ee84 7392 case BPF_FUNC_xdp_output:
6aff67c8
AS
7393 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
7394 goto error;
7395 break;
5b029a32
DB
7396 case BPF_FUNC_ringbuf_output:
7397 case BPF_FUNC_ringbuf_reserve:
7398 case BPF_FUNC_ringbuf_query:
bc34dee6
JK
7399 case BPF_FUNC_ringbuf_reserve_dynptr:
7400 case BPF_FUNC_ringbuf_submit_dynptr:
7401 case BPF_FUNC_ringbuf_discard_dynptr:
5b029a32
DB
7402 if (map->map_type != BPF_MAP_TYPE_RINGBUF)
7403 goto error;
7404 break;
20571567
DV
7405 case BPF_FUNC_user_ringbuf_drain:
7406 if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
7407 goto error;
7408 break;
6aff67c8
AS
7409 case BPF_FUNC_get_stackid:
7410 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
7411 goto error;
7412 break;
60d20f91 7413 case BPF_FUNC_current_task_under_cgroup:
747ea55e 7414 case BPF_FUNC_skb_under_cgroup:
4a482f34
MKL
7415 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
7416 goto error;
7417 break;
97f91a7c 7418 case BPF_FUNC_redirect_map:
9c270af3 7419 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
6f9d451a 7420 map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
fbfc504a
BT
7421 map->map_type != BPF_MAP_TYPE_CPUMAP &&
7422 map->map_type != BPF_MAP_TYPE_XSKMAP)
97f91a7c
JF
7423 goto error;
7424 break;
174a79ff 7425 case BPF_FUNC_sk_redirect_map:
4f738adb 7426 case BPF_FUNC_msg_redirect_map:
81110384 7427 case BPF_FUNC_sock_map_update:
174a79ff
JF
7428 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
7429 goto error;
7430 break;
81110384
JF
7431 case BPF_FUNC_sk_redirect_hash:
7432 case BPF_FUNC_msg_redirect_hash:
7433 case BPF_FUNC_sock_hash_update:
7434 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
174a79ff
JF
7435 goto error;
7436 break;
cd339431 7437 case BPF_FUNC_get_local_storage:
b741f163
RG
7438 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
7439 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
cd339431
RG
7440 goto error;
7441 break;
2dbb9b9e 7442 case BPF_FUNC_sk_select_reuseport:
9fed9000
JS
7443 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
7444 map->map_type != BPF_MAP_TYPE_SOCKMAP &&
7445 map->map_type != BPF_MAP_TYPE_SOCKHASH)
2dbb9b9e
MKL
7446 goto error;
7447 break;
f1a2e44a 7448 case BPF_FUNC_map_pop_elem:
f1a2e44a
MV
7449 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
7450 map->map_type != BPF_MAP_TYPE_STACK)
7451 goto error;
7452 break;
9330986c
JK
7453 case BPF_FUNC_map_peek_elem:
7454 case BPF_FUNC_map_push_elem:
7455 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
7456 map->map_type != BPF_MAP_TYPE_STACK &&
7457 map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
7458 goto error;
7459 break;
07343110
FZ
7460 case BPF_FUNC_map_lookup_percpu_elem:
7461 if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
7462 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
7463 map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
7464 goto error;
7465 break;
6ac99e8f
MKL
7466 case BPF_FUNC_sk_storage_get:
7467 case BPF_FUNC_sk_storage_delete:
7468 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
7469 goto error;
7470 break;
8ea63684
KS
7471 case BPF_FUNC_inode_storage_get:
7472 case BPF_FUNC_inode_storage_delete:
7473 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
7474 goto error;
7475 break;
4cf1bc1f
KS
7476 case BPF_FUNC_task_storage_get:
7477 case BPF_FUNC_task_storage_delete:
7478 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
7479 goto error;
7480 break;
c4bcfb38
YS
7481 case BPF_FUNC_cgrp_storage_get:
7482 case BPF_FUNC_cgrp_storage_delete:
7483 if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
7484 goto error;
7485 break;
6aff67c8
AS
7486 default:
7487 break;
35578d79
KX
7488 }
7489
7490 return 0;
6aff67c8 7491error:
61bd5218 7492 verbose(env, "cannot pass map_type %d into func %s#%d\n",
ebb676da 7493 map->map_type, func_id_name(func_id), func_id);
6aff67c8 7494 return -EINVAL;
35578d79
KX
7495}
7496
90133415 7497static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
435faee1
DB
7498{
7499 int count = 0;
7500
39f19ebb 7501 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 7502 count++;
39f19ebb 7503 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 7504 count++;
39f19ebb 7505 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 7506 count++;
39f19ebb 7507 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 7508 count++;
39f19ebb 7509 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
435faee1
DB
7510 count++;
7511
90133415
DB
7512 /* We only support one arg being in raw mode at the moment,
7513 * which is sufficient for the helper functions we have
7514 * right now.
7515 */
7516 return count <= 1;
7517}
7518
508362ac 7519static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
90133415 7520{
508362ac
MM
7521 bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
7522 bool has_size = fn->arg_size[arg] != 0;
7523 bool is_next_size = false;
7524
7525 if (arg + 1 < ARRAY_SIZE(fn->arg_type))
7526 is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
7527
7528 if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
7529 return is_next_size;
7530
7531 return has_size == is_next_size || is_next_size == is_fixed;
90133415
DB
7532}
7533
7534static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
7535{
7536 /* bpf_xxx(..., buf, len) call will access 'len'
7537 * bytes from memory 'buf'. Both arg types need
7538 * to be paired, so make sure there's no buggy
7539 * helper function specification.
7540 */
7541 if (arg_type_is_mem_size(fn->arg1_type) ||
508362ac
MM
7542 check_args_pair_invalid(fn, 0) ||
7543 check_args_pair_invalid(fn, 1) ||
7544 check_args_pair_invalid(fn, 2) ||
7545 check_args_pair_invalid(fn, 3) ||
7546 check_args_pair_invalid(fn, 4))
90133415
DB
7547 return false;
7548
7549 return true;
7550}
7551
9436ef6e
LB
7552static bool check_btf_id_ok(const struct bpf_func_proto *fn)
7553{
7554 int i;
7555
1df8f55a 7556 for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
4e814da0
KKD
7557 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
7558 return !!fn->arg_btf_id[i];
7559 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
7560 return fn->arg_btf_id[i] == BPF_PTR_POISON;
508362ac
MM
7561 if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
7562 /* arg_btf_id and arg_size are in a union. */
7563 (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
7564 !(fn->arg_type[i] & MEM_FIXED_SIZE)))
1df8f55a
MKL
7565 return false;
7566 }
7567
9436ef6e
LB
7568 return true;
7569}
7570
0c9a7a7e 7571static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
90133415
DB
7572{
7573 return check_raw_mode_ok(fn) &&
fd978bf7 7574 check_arg_pair_ok(fn) &&
b2d8ef19 7575 check_btf_id_ok(fn) ? 0 : -EINVAL;
435faee1
DB
7576}
7577
de8f3a83
DB
7578/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
7579 * are now invalid, so turn them into unknown SCALAR_VALUE.
66e3a13e
JK
7580 *
7581 * This also applies to dynptr slices belonging to skb and xdp dynptrs,
7582 * since these slices point to packet data.
f1174f77 7583 */
b239da34 7584static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
969bf05e 7585{
b239da34
KKD
7586 struct bpf_func_state *state;
7587 struct bpf_reg_state *reg;
969bf05e 7588
b239da34 7589 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
66e3a13e 7590 if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
dbd8d228 7591 mark_reg_invalid(env, reg);
b239da34 7592 }));
f4d7e40a
AS
7593}
7594
6d94e741
AS
7595enum {
7596 AT_PKT_END = -1,
7597 BEYOND_PKT_END = -2,
7598};
7599
7600static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
7601{
7602 struct bpf_func_state *state = vstate->frame[vstate->curframe];
7603 struct bpf_reg_state *reg = &state->regs[regn];
7604
7605 if (reg->type != PTR_TO_PACKET)
7606 /* PTR_TO_PACKET_META is not supported yet */
7607 return;
7608
7609 /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
7610 * How far beyond pkt_end it goes is unknown.
7611 * if (!range_open) it's the case of pkt >= pkt_end
7612 * if (range_open) it's the case of pkt > pkt_end
7613 * hence this pointer is at least 1 byte bigger than pkt_end
7614 */
7615 if (range_open)
7616 reg->range = BEYOND_PKT_END;
7617 else
7618 reg->range = AT_PKT_END;
7619}
7620
fd978bf7
JS
7621/* The pointer with the specified id has released its reference to kernel
7622 * resources. Identify all copies of the same pointer and clear the reference.
7623 */
7624static int release_reference(struct bpf_verifier_env *env,
1b986589 7625 int ref_obj_id)
fd978bf7 7626{
b239da34
KKD
7627 struct bpf_func_state *state;
7628 struct bpf_reg_state *reg;
1b986589 7629 int err;
fd978bf7 7630
1b986589
MKL
7631 err = release_reference_state(cur_func(env), ref_obj_id);
7632 if (err)
7633 return err;
7634
b239da34 7635 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
dbd8d228
KKD
7636 if (reg->ref_obj_id == ref_obj_id)
7637 mark_reg_invalid(env, reg);
b239da34 7638 }));
fd978bf7 7639
1b986589 7640 return 0;
fd978bf7
JS
7641}
7642
6a3cd331
DM
7643static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
7644{
7645 struct bpf_func_state *unused;
7646 struct bpf_reg_state *reg;
7647
7648 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
7649 if (type_is_non_owning_ref(reg->type))
dbd8d228 7650 mark_reg_invalid(env, reg);
6a3cd331
DM
7651 }));
7652}
7653
51c39bb1
AS
7654static void clear_caller_saved_regs(struct bpf_verifier_env *env,
7655 struct bpf_reg_state *regs)
7656{
7657 int i;
7658
7659 /* after the call registers r0 - r5 were scratched */
7660 for (i = 0; i < CALLER_SAVED_REGS; i++) {
7661 mark_reg_not_init(env, regs, caller_saved[i]);
7662 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
7663 }
7664}
7665
14351375
YS
7666typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
7667 struct bpf_func_state *caller,
7668 struct bpf_func_state *callee,
7669 int insn_idx);
7670
be2ef816
AN
7671static int set_callee_state(struct bpf_verifier_env *env,
7672 struct bpf_func_state *caller,
7673 struct bpf_func_state *callee, int insn_idx);
7674
5d92ddc3
DM
7675static bool is_callback_calling_kfunc(u32 btf_id);
7676
14351375
YS
7677static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
7678 int *insn_idx, int subprog,
7679 set_callee_state_fn set_callee_state_cb)
f4d7e40a
AS
7680{
7681 struct bpf_verifier_state *state = env->cur_state;
51c39bb1 7682 struct bpf_func_info_aux *func_info_aux;
f4d7e40a 7683 struct bpf_func_state *caller, *callee;
14351375 7684 int err;
51c39bb1 7685 bool is_global = false;
f4d7e40a 7686
aada9ce6 7687 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
f4d7e40a 7688 verbose(env, "the call stack of %d frames is too deep\n",
aada9ce6 7689 state->curframe + 2);
f4d7e40a
AS
7690 return -E2BIG;
7691 }
7692
f4d7e40a
AS
7693 caller = state->frame[state->curframe];
7694 if (state->frame[state->curframe + 1]) {
7695 verbose(env, "verifier bug. Frame %d already allocated\n",
7696 state->curframe + 1);
7697 return -EFAULT;
7698 }
7699
51c39bb1
AS
7700 func_info_aux = env->prog->aux->func_info_aux;
7701 if (func_info_aux)
7702 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
95f2f26f 7703 err = btf_check_subprog_call(env, subprog, caller->regs);
51c39bb1
AS
7704 if (err == -EFAULT)
7705 return err;
7706 if (is_global) {
7707 if (err) {
7708 verbose(env, "Caller passes invalid args into func#%d\n",
7709 subprog);
7710 return err;
7711 } else {
7712 if (env->log.level & BPF_LOG_LEVEL)
7713 verbose(env,
7714 "Func#%d is global and valid. Skipping.\n",
7715 subprog);
7716 clear_caller_saved_regs(env, caller->regs);
7717
45159b27 7718 /* All global functions return a 64-bit SCALAR_VALUE */
51c39bb1 7719 mark_reg_unknown(env, caller->regs, BPF_REG_0);
45159b27 7720 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
51c39bb1
AS
7721
7722 /* continue with next insn after call */
7723 return 0;
7724 }
7725 }
7726
be2ef816
AN
7727 /* set_callee_state is used for direct subprog calls, but we are
7728 * interested in validating only BPF helpers that can call subprogs as
7729 * callbacks
7730 */
5d92ddc3
DM
7731 if (set_callee_state_cb != set_callee_state) {
7732 if (bpf_pseudo_kfunc_call(insn) &&
7733 !is_callback_calling_kfunc(insn->imm)) {
7734 verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
7735 func_id_name(insn->imm), insn->imm);
7736 return -EFAULT;
7737 } else if (!bpf_pseudo_kfunc_call(insn) &&
7738 !is_callback_calling_function(insn->imm)) { /* helper */
7739 verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
7740 func_id_name(insn->imm), insn->imm);
7741 return -EFAULT;
7742 }
be2ef816
AN
7743 }
7744
bfc6bb74 7745 if (insn->code == (BPF_JMP | BPF_CALL) &&
a5bebc4f 7746 insn->src_reg == 0 &&
bfc6bb74
AS
7747 insn->imm == BPF_FUNC_timer_set_callback) {
7748 struct bpf_verifier_state *async_cb;
7749
7750 /* there is no real recursion here. timer callbacks are async */
7ddc80a4 7751 env->subprog_info[subprog].is_async_cb = true;
bfc6bb74
AS
7752 async_cb = push_async_cb(env, env->subprog_info[subprog].start,
7753 *insn_idx, subprog);
7754 if (!async_cb)
7755 return -EFAULT;
7756 callee = async_cb->frame[0];
7757 callee->async_entry_cnt = caller->async_entry_cnt + 1;
7758
7759 /* Convert bpf_timer_set_callback() args into timer callback args */
7760 err = set_callee_state_cb(env, caller, callee, *insn_idx);
7761 if (err)
7762 return err;
7763
7764 clear_caller_saved_regs(env, caller->regs);
7765 mark_reg_unknown(env, caller->regs, BPF_REG_0);
7766 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
7767 /* continue with next insn after call */
7768 return 0;
7769 }
7770
f4d7e40a
AS
7771 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
7772 if (!callee)
7773 return -ENOMEM;
7774 state->frame[state->curframe + 1] = callee;
7775
7776 /* callee cannot access r0, r6 - r9 for reading and has to write
7777 * into its own stack before reading from it.
7778 * callee can read/write into caller's stack
7779 */
7780 init_func_state(env, callee,
7781 /* remember the callsite, it will be used by bpf_exit */
7782 *insn_idx /* callsite */,
7783 state->curframe + 1 /* frameno within this callchain */,
f910cefa 7784 subprog /* subprog number within this prog */);
f4d7e40a 7785
fd978bf7 7786 /* Transfer references to the callee */
c69431aa 7787 err = copy_reference_state(callee, caller);
fd978bf7 7788 if (err)
eb86559a 7789 goto err_out;
fd978bf7 7790
14351375
YS
7791 err = set_callee_state_cb(env, caller, callee, *insn_idx);
7792 if (err)
eb86559a 7793 goto err_out;
f4d7e40a 7794
51c39bb1 7795 clear_caller_saved_regs(env, caller->regs);
f4d7e40a
AS
7796
7797 /* only increment it after check_reg_arg() finished */
7798 state->curframe++;
7799
7800 /* and go analyze first insn of the callee */
14351375 7801 *insn_idx = env->subprog_info[subprog].start - 1;
f4d7e40a 7802
06ee7115 7803 if (env->log.level & BPF_LOG_LEVEL) {
f4d7e40a 7804 verbose(env, "caller:\n");
0f55f9ed 7805 print_verifier_state(env, caller, true);
f4d7e40a 7806 verbose(env, "callee:\n");
0f55f9ed 7807 print_verifier_state(env, callee, true);
f4d7e40a
AS
7808 }
7809 return 0;
eb86559a
WY
7810
7811err_out:
7812 free_func_state(callee);
7813 state->frame[state->curframe + 1] = NULL;
7814 return err;
f4d7e40a
AS
7815}
7816
314ee05e
YS
7817int map_set_for_each_callback_args(struct bpf_verifier_env *env,
7818 struct bpf_func_state *caller,
7819 struct bpf_func_state *callee)
7820{
7821 /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
7822 * void *callback_ctx, u64 flags);
7823 * callback_fn(struct bpf_map *map, void *key, void *value,
7824 * void *callback_ctx);
7825 */
7826 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
7827
7828 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
7829 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
7830 callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
7831
7832 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
7833 __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
7834 callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
7835
7836 /* pointer to stack or null */
7837 callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
7838
7839 /* unused */
7840 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
7841 return 0;
7842}
7843
14351375
YS
7844static int set_callee_state(struct bpf_verifier_env *env,
7845 struct bpf_func_state *caller,
7846 struct bpf_func_state *callee, int insn_idx)
7847{
7848 int i;
7849
7850 /* copy r1 - r5 args that callee can access. The copy includes parent
7851 * pointers, which connects us up to the liveness chain
7852 */
7853 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
7854 callee->regs[i] = caller->regs[i];
7855 return 0;
7856}
7857
7858static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
7859 int *insn_idx)
7860{
7861 int subprog, target_insn;
7862
7863 target_insn = *insn_idx + insn->imm + 1;
7864 subprog = find_subprog(env, target_insn);
7865 if (subprog < 0) {
7866 verbose(env, "verifier bug. No program starts at insn %d\n",
7867 target_insn);
7868 return -EFAULT;
7869 }
7870
7871 return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
7872}
7873
69c087ba
YS
7874static int set_map_elem_callback_state(struct bpf_verifier_env *env,
7875 struct bpf_func_state *caller,
7876 struct bpf_func_state *callee,
7877 int insn_idx)
7878{
7879 struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
7880 struct bpf_map *map;
7881 int err;
7882
7883 if (bpf_map_ptr_poisoned(insn_aux)) {
7884 verbose(env, "tail_call abusing map_ptr\n");
7885 return -EINVAL;
7886 }
7887
7888 map = BPF_MAP_PTR(insn_aux->map_ptr_state);
7889 if (!map->ops->map_set_for_each_callback_args ||
7890 !map->ops->map_for_each_callback) {
7891 verbose(env, "callback function not allowed for map\n");
7892 return -ENOTSUPP;
7893 }
7894
7895 err = map->ops->map_set_for_each_callback_args(env, caller, callee);
7896 if (err)
7897 return err;
7898
7899 callee->in_callback_fn = true;
1bfe26fb 7900 callee->callback_ret_range = tnum_range(0, 1);
69c087ba
YS
7901 return 0;
7902}
7903
e6f2dd0f
JK
7904static int set_loop_callback_state(struct bpf_verifier_env *env,
7905 struct bpf_func_state *caller,
7906 struct bpf_func_state *callee,
7907 int insn_idx)
7908{
7909 /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
7910 * u64 flags);
7911 * callback_fn(u32 index, void *callback_ctx);
7912 */
7913 callee->regs[BPF_REG_1].type = SCALAR_VALUE;
7914 callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
7915
7916 /* unused */
7917 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
7918 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
7919 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
7920
7921 callee->in_callback_fn = true;
1bfe26fb 7922 callee->callback_ret_range = tnum_range(0, 1);
e6f2dd0f
JK
7923 return 0;
7924}
7925
b00628b1
AS
7926static int set_timer_callback_state(struct bpf_verifier_env *env,
7927 struct bpf_func_state *caller,
7928 struct bpf_func_state *callee,
7929 int insn_idx)
7930{
7931 struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
7932
7933 /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
7934 * callback_fn(struct bpf_map *map, void *key, void *value);
7935 */
7936 callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
7937 __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
7938 callee->regs[BPF_REG_1].map_ptr = map_ptr;
7939
7940 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
7941 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
7942 callee->regs[BPF_REG_2].map_ptr = map_ptr;
7943
7944 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
7945 __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
7946 callee->regs[BPF_REG_3].map_ptr = map_ptr;
7947
7948 /* unused */
7949 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
7950 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
bfc6bb74 7951 callee->in_async_callback_fn = true;
1bfe26fb 7952 callee->callback_ret_range = tnum_range(0, 1);
b00628b1
AS
7953 return 0;
7954}
7955
7c7e3d31
SL
7956static int set_find_vma_callback_state(struct bpf_verifier_env *env,
7957 struct bpf_func_state *caller,
7958 struct bpf_func_state *callee,
7959 int insn_idx)
7960{
7961 /* bpf_find_vma(struct task_struct *task, u64 addr,
7962 * void *callback_fn, void *callback_ctx, u64 flags)
7963 * (callback_fn)(struct task_struct *task,
7964 * struct vm_area_struct *vma, void *callback_ctx);
7965 */
7966 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
7967
7968 callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
7969 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
7970 callee->regs[BPF_REG_2].btf = btf_vmlinux;
d19ddb47 7971 callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA],
7c7e3d31
SL
7972
7973 /* pointer to stack or null */
7974 callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
7975
7976 /* unused */
7977 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
7978 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
7979 callee->in_callback_fn = true;
1bfe26fb 7980 callee->callback_ret_range = tnum_range(0, 1);
7c7e3d31
SL
7981 return 0;
7982}
7983
20571567
DV
7984static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
7985 struct bpf_func_state *caller,
7986 struct bpf_func_state *callee,
7987 int insn_idx)
7988{
7989 /* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
7990 * callback_ctx, u64 flags);
27060531 7991 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
20571567
DV
7992 */
7993 __mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
f8064ab9 7994 mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
20571567
DV
7995 callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
7996
7997 /* unused */
7998 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
7999 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
8000 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
8001
8002 callee->in_callback_fn = true;
c92a7a52 8003 callee->callback_ret_range = tnum_range(0, 1);
20571567
DV
8004 return 0;
8005}
8006
5d92ddc3
DM
8007static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
8008 struct bpf_func_state *caller,
8009 struct bpf_func_state *callee,
8010 int insn_idx)
8011{
8012 /* void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
8013 * bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
8014 *
8015 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add is the same PTR_TO_BTF_ID w/ offset
8016 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
8017 * by this point, so look at 'root'
8018 */
8019 struct btf_field *field;
8020
8021 field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
8022 BPF_RB_ROOT);
8023 if (!field || !field->graph_root.value_btf_id)
8024 return -EFAULT;
8025
8026 mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
8027 ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
8028 mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
8029 ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
8030
8031 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
8032 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
8033 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
8034 callee->in_callback_fn = true;
8035 callee->callback_ret_range = tnum_range(0, 1);
8036 return 0;
8037}
8038
8039static bool is_rbtree_lock_required_kfunc(u32 btf_id);
8040
8041/* Are we currently verifying the callback for a rbtree helper that must
8042 * be called with lock held? If so, no need to complain about unreleased
8043 * lock
8044 */
8045static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
8046{
8047 struct bpf_verifier_state *state = env->cur_state;
8048 struct bpf_insn *insn = env->prog->insnsi;
8049 struct bpf_func_state *callee;
8050 int kfunc_btf_id;
8051
8052 if (!state->curframe)
8053 return false;
8054
8055 callee = state->frame[state->curframe];
8056
8057 if (!callee->in_callback_fn)
8058 return false;
8059
8060 kfunc_btf_id = insn[callee->callsite].imm;
8061 return is_rbtree_lock_required_kfunc(kfunc_btf_id);
8062}
8063
f4d7e40a
AS
8064static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
8065{
8066 struct bpf_verifier_state *state = env->cur_state;
8067 struct bpf_func_state *caller, *callee;
8068 struct bpf_reg_state *r0;
fd978bf7 8069 int err;
f4d7e40a
AS
8070
8071 callee = state->frame[state->curframe];
8072 r0 = &callee->regs[BPF_REG_0];
8073 if (r0->type == PTR_TO_STACK) {
8074 /* technically it's ok to return caller's stack pointer
8075 * (or caller's caller's pointer) back to the caller,
8076 * since these pointers are valid. Only current stack
8077 * pointer will be invalid as soon as function exits,
8078 * but let's be conservative
8079 */
8080 verbose(env, "cannot return stack pointer to the caller\n");
8081 return -EINVAL;
8082 }
8083
eb86559a 8084 caller = state->frame[state->curframe - 1];
69c087ba
YS
8085 if (callee->in_callback_fn) {
8086 /* enforce R0 return value range [0, 1]. */
1bfe26fb 8087 struct tnum range = callee->callback_ret_range;
69c087ba
YS
8088
8089 if (r0->type != SCALAR_VALUE) {
8090 verbose(env, "R0 not a scalar value\n");
8091 return -EACCES;
8092 }
8093 if (!tnum_in(range, r0->var_off)) {
8094 verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
8095 return -EINVAL;
8096 }
8097 } else {
8098 /* return to the caller whatever r0 had in the callee */
8099 caller->regs[BPF_REG_0] = *r0;
8100 }
f4d7e40a 8101
9d9d00ac
KKD
8102 /* callback_fn frame should have released its own additions to parent's
8103 * reference state at this point, or check_reference_leak would
8104 * complain, hence it must be the same as the caller. There is no need
8105 * to copy it back.
8106 */
8107 if (!callee->in_callback_fn) {
8108 /* Transfer references to the caller */
8109 err = copy_reference_state(caller, callee);
8110 if (err)
8111 return err;
8112 }
fd978bf7 8113
f4d7e40a 8114 *insn_idx = callee->callsite + 1;
06ee7115 8115 if (env->log.level & BPF_LOG_LEVEL) {
f4d7e40a 8116 verbose(env, "returning from callee:\n");
0f55f9ed 8117 print_verifier_state(env, callee, true);
f4d7e40a 8118 verbose(env, "to caller at %d:\n", *insn_idx);
0f55f9ed 8119 print_verifier_state(env, caller, true);
f4d7e40a
AS
8120 }
8121 /* clear everything in the callee */
8122 free_func_state(callee);
eb86559a 8123 state->frame[state->curframe--] = NULL;
f4d7e40a
AS
8124 return 0;
8125}
8126
849fa506
YS
8127static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
8128 int func_id,
8129 struct bpf_call_arg_meta *meta)
8130{
8131 struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
8132
8133 if (ret_type != RET_INTEGER ||
8134 (func_id != BPF_FUNC_get_stack &&
fd0b88f7 8135 func_id != BPF_FUNC_get_task_stack &&
47cc0ed5
DB
8136 func_id != BPF_FUNC_probe_read_str &&
8137 func_id != BPF_FUNC_probe_read_kernel_str &&
8138 func_id != BPF_FUNC_probe_read_user_str))
849fa506
YS
8139 return;
8140
10060503 8141 ret_reg->smax_value = meta->msize_max_value;
fa123ac0 8142 ret_reg->s32_max_value = meta->msize_max_value;
b0270958
AS
8143 ret_reg->smin_value = -MAX_ERRNO;
8144 ret_reg->s32_min_value = -MAX_ERRNO;
3844d153 8145 reg_bounds_sync(ret_reg);
849fa506
YS
8146}
8147
c93552c4
DB
8148static int
8149record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
8150 int func_id, int insn_idx)
8151{
8152 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
591fe988 8153 struct bpf_map *map = meta->map_ptr;
c93552c4
DB
8154
8155 if (func_id != BPF_FUNC_tail_call &&
09772d92
DB
8156 func_id != BPF_FUNC_map_lookup_elem &&
8157 func_id != BPF_FUNC_map_update_elem &&
f1a2e44a
MV
8158 func_id != BPF_FUNC_map_delete_elem &&
8159 func_id != BPF_FUNC_map_push_elem &&
8160 func_id != BPF_FUNC_map_pop_elem &&
69c087ba 8161 func_id != BPF_FUNC_map_peek_elem &&
e6a4750f 8162 func_id != BPF_FUNC_for_each_map_elem &&
07343110
FZ
8163 func_id != BPF_FUNC_redirect_map &&
8164 func_id != BPF_FUNC_map_lookup_percpu_elem)
c93552c4 8165 return 0;
09772d92 8166
591fe988 8167 if (map == NULL) {
c93552c4
DB
8168 verbose(env, "kernel subsystem misconfigured verifier\n");
8169 return -EINVAL;
8170 }
8171
591fe988
DB
8172 /* In case of read-only, some additional restrictions
8173 * need to be applied in order to prevent altering the
8174 * state of the map from program side.
8175 */
8176 if ((map->map_flags & BPF_F_RDONLY_PROG) &&
8177 (func_id == BPF_FUNC_map_delete_elem ||
8178 func_id == BPF_FUNC_map_update_elem ||
8179 func_id == BPF_FUNC_map_push_elem ||
8180 func_id == BPF_FUNC_map_pop_elem)) {
8181 verbose(env, "write into map forbidden\n");
8182 return -EACCES;
8183 }
8184
d2e4c1e6 8185 if (!BPF_MAP_PTR(aux->map_ptr_state))
c93552c4 8186 bpf_map_ptr_store(aux, meta->map_ptr,
2c78ee89 8187 !meta->map_ptr->bypass_spec_v1);
d2e4c1e6 8188 else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
c93552c4 8189 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
2c78ee89 8190 !meta->map_ptr->bypass_spec_v1);
c93552c4
DB
8191 return 0;
8192}
8193
d2e4c1e6
DB
8194static int
8195record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
8196 int func_id, int insn_idx)
8197{
8198 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
8199 struct bpf_reg_state *regs = cur_regs(env), *reg;
8200 struct bpf_map *map = meta->map_ptr;
a657182a 8201 u64 val, max;
cc52d914 8202 int err;
d2e4c1e6
DB
8203
8204 if (func_id != BPF_FUNC_tail_call)
8205 return 0;
8206 if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
8207 verbose(env, "kernel subsystem misconfigured verifier\n");
8208 return -EINVAL;
8209 }
8210
d2e4c1e6 8211 reg = &regs[BPF_REG_3];
a657182a
DB
8212 val = reg->var_off.value;
8213 max = map->max_entries;
d2e4c1e6 8214
a657182a 8215 if (!(register_is_const(reg) && val < max)) {
d2e4c1e6
DB
8216 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
8217 return 0;
8218 }
8219
cc52d914
DB
8220 err = mark_chain_precision(env, BPF_REG_3);
8221 if (err)
8222 return err;
d2e4c1e6
DB
8223 if (bpf_map_key_unseen(aux))
8224 bpf_map_key_store(aux, val);
8225 else if (!bpf_map_key_poisoned(aux) &&
8226 bpf_map_key_immediate(aux) != val)
8227 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
8228 return 0;
8229}
8230
fd978bf7
JS
8231static int check_reference_leak(struct bpf_verifier_env *env)
8232{
8233 struct bpf_func_state *state = cur_func(env);
9d9d00ac 8234 bool refs_lingering = false;
fd978bf7
JS
8235 int i;
8236
9d9d00ac
KKD
8237 if (state->frameno && !state->in_callback_fn)
8238 return 0;
8239
fd978bf7 8240 for (i = 0; i < state->acquired_refs; i++) {
9d9d00ac
KKD
8241 if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
8242 continue;
fd978bf7
JS
8243 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
8244 state->refs[i].id, state->refs[i].insn_idx);
9d9d00ac 8245 refs_lingering = true;
fd978bf7 8246 }
9d9d00ac 8247 return refs_lingering ? -EINVAL : 0;
fd978bf7
JS
8248}
8249
7b15523a
FR
8250static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
8251 struct bpf_reg_state *regs)
8252{
8253 struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
8254 struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
8255 struct bpf_map *fmt_map = fmt_reg->map_ptr;
78aa1cc9 8256 struct bpf_bprintf_data data = {};
7b15523a
FR
8257 int err, fmt_map_off, num_args;
8258 u64 fmt_addr;
8259 char *fmt;
8260
8261 /* data must be an array of u64 */
8262 if (data_len_reg->var_off.value % 8)
8263 return -EINVAL;
8264 num_args = data_len_reg->var_off.value / 8;
8265
8266 /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
8267 * and map_direct_value_addr is set.
8268 */
8269 fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
8270 err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
8271 fmt_map_off);
8e8ee109
FR
8272 if (err) {
8273 verbose(env, "verifier bug\n");
8274 return -EFAULT;
8275 }
7b15523a
FR
8276 fmt = (char *)(long)fmt_addr + fmt_map_off;
8277
8278 /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
8279 * can focus on validating the format specifiers.
8280 */
78aa1cc9 8281 err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
7b15523a
FR
8282 if (err < 0)
8283 verbose(env, "Invalid format string\n");
8284
8285 return err;
8286}
8287
9b99edca
JO
8288static int check_get_func_ip(struct bpf_verifier_env *env)
8289{
9b99edca
JO
8290 enum bpf_prog_type type = resolve_prog_type(env->prog);
8291 int func_id = BPF_FUNC_get_func_ip;
8292
8293 if (type == BPF_PROG_TYPE_TRACING) {
f92c1e18 8294 if (!bpf_prog_has_trampoline(env->prog)) {
9b99edca
JO
8295 verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
8296 func_id_name(func_id), func_id);
8297 return -ENOTSUPP;
8298 }
8299 return 0;
9ffd9f3f
JO
8300 } else if (type == BPF_PROG_TYPE_KPROBE) {
8301 return 0;
9b99edca
JO
8302 }
8303
8304 verbose(env, "func %s#%d not supported for program type %d\n",
8305 func_id_name(func_id), func_id, type);
8306 return -ENOTSUPP;
8307}
8308
1ade2371
EZ
8309static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
8310{
8311 return &env->insn_aux_data[env->insn_idx];
8312}
8313
8314static bool loop_flag_is_zero(struct bpf_verifier_env *env)
8315{
8316 struct bpf_reg_state *regs = cur_regs(env);
8317 struct bpf_reg_state *reg = &regs[BPF_REG_4];
8318 bool reg_is_null = register_is_null(reg);
8319
8320 if (reg_is_null)
8321 mark_chain_precision(env, BPF_REG_4);
8322
8323 return reg_is_null;
8324}
8325
8326static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
8327{
8328 struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
8329
8330 if (!state->initialized) {
8331 state->initialized = 1;
8332 state->fit_for_inline = loop_flag_is_zero(env);
8333 state->callback_subprogno = subprogno;
8334 return;
8335 }
8336
8337 if (!state->fit_for_inline)
8338 return;
8339
8340 state->fit_for_inline = (loop_flag_is_zero(env) &&
8341 state->callback_subprogno == subprogno);
8342}
8343
69c087ba
YS
8344static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
8345 int *insn_idx_p)
17a52670 8346{
aef9d4a3 8347 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
17a52670 8348 const struct bpf_func_proto *fn = NULL;
3c480732 8349 enum bpf_return_type ret_type;
c25b2ae1 8350 enum bpf_type_flag ret_flag;
638f5b90 8351 struct bpf_reg_state *regs;
33ff9823 8352 struct bpf_call_arg_meta meta;
69c087ba 8353 int insn_idx = *insn_idx_p;
969bf05e 8354 bool changes_data;
69c087ba 8355 int i, err, func_id;
17a52670
AS
8356
8357 /* find function prototype */
69c087ba 8358 func_id = insn->imm;
17a52670 8359 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
61bd5218
JK
8360 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
8361 func_id);
17a52670
AS
8362 return -EINVAL;
8363 }
8364
00176a34 8365 if (env->ops->get_func_proto)
5e43f899 8366 fn = env->ops->get_func_proto(func_id, env->prog);
17a52670 8367 if (!fn) {
61bd5218
JK
8368 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
8369 func_id);
17a52670
AS
8370 return -EINVAL;
8371 }
8372
8373 /* eBPF programs must be GPL compatible to use GPL-ed functions */
24701ece 8374 if (!env->prog->gpl_compatible && fn->gpl_only) {
3fe2867c 8375 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
17a52670
AS
8376 return -EINVAL;
8377 }
8378
eae2e83e
JO
8379 if (fn->allowed && !fn->allowed(env->prog)) {
8380 verbose(env, "helper call is not allowed in probe\n");
8381 return -EINVAL;
8382 }
8383
01685c5b
YS
8384 if (!env->prog->aux->sleepable && fn->might_sleep) {
8385 verbose(env, "helper call might sleep in a non-sleepable prog\n");
8386 return -EINVAL;
8387 }
8388
04514d13 8389 /* With LD_ABS/IND some JITs save/restore skb from r1. */
17bedab2 8390 changes_data = bpf_helper_changes_pkt_data(fn->func);
04514d13
DB
8391 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
8392 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
8393 func_id_name(func_id), func_id);
8394 return -EINVAL;
8395 }
969bf05e 8396
33ff9823 8397 memset(&meta, 0, sizeof(meta));
36bbef52 8398 meta.pkt_access = fn->pkt_access;
33ff9823 8399
0c9a7a7e 8400 err = check_func_proto(fn, func_id);
435faee1 8401 if (err) {
61bd5218 8402 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
ebb676da 8403 func_id_name(func_id), func_id);
435faee1
DB
8404 return err;
8405 }
8406
9bb00b28
YS
8407 if (env->cur_state->active_rcu_lock) {
8408 if (fn->might_sleep) {
8409 verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
8410 func_id_name(func_id), func_id);
8411 return -EINVAL;
8412 }
8413
8414 if (env->prog->aux->sleepable && is_storage_get_function(func_id))
8415 env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
8416 }
8417
d83525ca 8418 meta.func_id = func_id;
17a52670 8419 /* check args */
523a4cf4 8420 for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
1d18feb2 8421 err = check_func_arg(env, i, &meta, fn, insn_idx);
a7658e1a
AS
8422 if (err)
8423 return err;
8424 }
17a52670 8425
c93552c4
DB
8426 err = record_func_map(env, &meta, func_id, insn_idx);
8427 if (err)
8428 return err;
8429
d2e4c1e6
DB
8430 err = record_func_key(env, &meta, func_id, insn_idx);
8431 if (err)
8432 return err;
8433
435faee1
DB
8434 /* Mark slots with STACK_MISC in case of raw mode, stack offset
8435 * is inferred from register state.
8436 */
8437 for (i = 0; i < meta.access_size; i++) {
ca369602
DB
8438 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
8439 BPF_WRITE, -1, false);
435faee1
DB
8440 if (err)
8441 return err;
8442 }
8443
8f14852e
KKD
8444 regs = cur_regs(env);
8445
8446 if (meta.release_regno) {
8447 err = -EINVAL;
27060531
KKD
8448 /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
8449 * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
8450 * is safe to do directly.
8451 */
8452 if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
8453 if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
8454 verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
8455 return -EFAULT;
8456 }
97e03f52 8457 err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
27060531 8458 } else if (meta.ref_obj_id) {
8f14852e 8459 err = release_reference(env, meta.ref_obj_id);
27060531
KKD
8460 } else if (register_is_null(&regs[meta.release_regno])) {
8461 /* meta.ref_obj_id can only be 0 if register that is meant to be
8462 * released is NULL, which must be > R0.
8463 */
8f14852e 8464 err = 0;
27060531 8465 }
46f8bc92
MKL
8466 if (err) {
8467 verbose(env, "func %s#%d reference has not been acquired before\n",
8468 func_id_name(func_id), func_id);
fd978bf7 8469 return err;
46f8bc92 8470 }
fd978bf7
JS
8471 }
8472
e6f2dd0f
JK
8473 switch (func_id) {
8474 case BPF_FUNC_tail_call:
8475 err = check_reference_leak(env);
8476 if (err) {
8477 verbose(env, "tail_call would lead to reference leak\n");
8478 return err;
8479 }
8480 break;
8481 case BPF_FUNC_get_local_storage:
8482 /* check that flags argument in get_local_storage(map, flags) is 0,
8483 * this is required because get_local_storage() can't return an error.
8484 */
8485 if (!register_is_null(&regs[BPF_REG_2])) {
8486 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
8487 return -EINVAL;
8488 }
8489 break;
8490 case BPF_FUNC_for_each_map_elem:
69c087ba
YS
8491 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8492 set_map_elem_callback_state);
e6f2dd0f
JK
8493 break;
8494 case BPF_FUNC_timer_set_callback:
b00628b1
AS
8495 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8496 set_timer_callback_state);
e6f2dd0f
JK
8497 break;
8498 case BPF_FUNC_find_vma:
7c7e3d31
SL
8499 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8500 set_find_vma_callback_state);
e6f2dd0f
JK
8501 break;
8502 case BPF_FUNC_snprintf:
7b15523a 8503 err = check_bpf_snprintf_call(env, regs);
e6f2dd0f
JK
8504 break;
8505 case BPF_FUNC_loop:
1ade2371 8506 update_loop_inline_state(env, meta.subprogno);
e6f2dd0f
JK
8507 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8508 set_loop_callback_state);
8509 break;
263ae152
JK
8510 case BPF_FUNC_dynptr_from_mem:
8511 if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
8512 verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
8513 reg_type_str(env, regs[BPF_REG_1].type));
8514 return -EACCES;
8515 }
69fd337a
SF
8516 break;
8517 case BPF_FUNC_set_retval:
aef9d4a3
SF
8518 if (prog_type == BPF_PROG_TYPE_LSM &&
8519 env->prog->expected_attach_type == BPF_LSM_CGROUP) {
69fd337a
SF
8520 if (!env->prog->aux->attach_func_proto->type) {
8521 /* Make sure programs that attach to void
8522 * hooks don't try to modify return value.
8523 */
8524 verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
8525 return -EINVAL;
8526 }
8527 }
8528 break;
88374342 8529 case BPF_FUNC_dynptr_data:
485ec51e
JK
8530 {
8531 struct bpf_reg_state *reg;
8532 int id, ref_obj_id;
20571567 8533
485ec51e
JK
8534 reg = get_dynptr_arg_reg(env, fn, regs);
8535 if (!reg)
8536 return -EFAULT;
f8064ab9 8537
f8064ab9 8538
485ec51e
JK
8539 if (meta.dynptr_id) {
8540 verbose(env, "verifier internal error: meta.dynptr_id already set\n");
8541 return -EFAULT;
88374342 8542 }
485ec51e
JK
8543 if (meta.ref_obj_id) {
8544 verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
88374342
JK
8545 return -EFAULT;
8546 }
485ec51e
JK
8547
8548 id = dynptr_id(env, reg);
8549 if (id < 0) {
8550 verbose(env, "verifier internal error: failed to obtain dynptr id\n");
8551 return id;
8552 }
8553
8554 ref_obj_id = dynptr_ref_obj_id(env, reg);
8555 if (ref_obj_id < 0) {
8556 verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
8557 return ref_obj_id;
8558 }
8559
8560 meta.dynptr_id = id;
8561 meta.ref_obj_id = ref_obj_id;
8562
88374342 8563 break;
485ec51e 8564 }
b5964b96
JK
8565 case BPF_FUNC_dynptr_write:
8566 {
8567 enum bpf_dynptr_type dynptr_type;
8568 struct bpf_reg_state *reg;
8569
8570 reg = get_dynptr_arg_reg(env, fn, regs);
8571 if (!reg)
8572 return -EFAULT;
8573
8574 dynptr_type = dynptr_get_type(env, reg);
8575 if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
8576 return -EFAULT;
8577
8578 if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
8579 /* this will trigger clear_all_pkt_pointers(), which will
8580 * invalidate all dynptr slices associated with the skb
8581 */
8582 changes_data = true;
8583
8584 break;
8585 }
20571567
DV
8586 case BPF_FUNC_user_ringbuf_drain:
8587 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
8588 set_user_ringbuf_callback_state);
8589 break;
7b15523a
FR
8590 }
8591
e6f2dd0f
JK
8592 if (err)
8593 return err;
8594
17a52670 8595 /* reset caller saved regs */
dc503a8a 8596 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 8597 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
8598 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
8599 }
17a52670 8600
5327ed3d
JW
8601 /* helper call returns 64-bit value. */
8602 regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
8603
dc503a8a 8604 /* update return register (already marked as written above) */
3c480732 8605 ret_type = fn->ret_type;
0c9a7a7e
JK
8606 ret_flag = type_flag(ret_type);
8607
8608 switch (base_type(ret_type)) {
8609 case RET_INTEGER:
f1174f77 8610 /* sets type to SCALAR_VALUE */
61bd5218 8611 mark_reg_unknown(env, regs, BPF_REG_0);
0c9a7a7e
JK
8612 break;
8613 case RET_VOID:
17a52670 8614 regs[BPF_REG_0].type = NOT_INIT;
0c9a7a7e
JK
8615 break;
8616 case RET_PTR_TO_MAP_VALUE:
f1174f77 8617 /* There is no offset yet applied, variable or fixed */
61bd5218 8618 mark_reg_known_zero(env, regs, BPF_REG_0);
17a52670
AS
8619 /* remember map_ptr, so that check_map_access()
8620 * can check 'value_size' boundary of memory access
8621 * to map element returned from bpf_map_lookup_elem()
8622 */
33ff9823 8623 if (meta.map_ptr == NULL) {
61bd5218
JK
8624 verbose(env,
8625 "kernel subsystem misconfigured verifier\n");
17a52670
AS
8626 return -EINVAL;
8627 }
33ff9823 8628 regs[BPF_REG_0].map_ptr = meta.map_ptr;
3e8ce298 8629 regs[BPF_REG_0].map_uid = meta.map_uid;
c25b2ae1
HL
8630 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
8631 if (!type_may_be_null(ret_type) &&
db559117 8632 btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) {
c25b2ae1 8633 regs[BPF_REG_0].id = ++env->id_gen;
4d31f301 8634 }
0c9a7a7e
JK
8635 break;
8636 case RET_PTR_TO_SOCKET:
c64b7983 8637 mark_reg_known_zero(env, regs, BPF_REG_0);
c25b2ae1 8638 regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
0c9a7a7e
JK
8639 break;
8640 case RET_PTR_TO_SOCK_COMMON:
85a51f8c 8641 mark_reg_known_zero(env, regs, BPF_REG_0);
c25b2ae1 8642 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
0c9a7a7e
JK
8643 break;
8644 case RET_PTR_TO_TCP_SOCK:
655a51e5 8645 mark_reg_known_zero(env, regs, BPF_REG_0);
c25b2ae1 8646 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
0c9a7a7e 8647 break;
2de2669b 8648 case RET_PTR_TO_MEM:
457f4436 8649 mark_reg_known_zero(env, regs, BPF_REG_0);
c25b2ae1 8650 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
457f4436 8651 regs[BPF_REG_0].mem_size = meta.mem_size;
0c9a7a7e
JK
8652 break;
8653 case RET_PTR_TO_MEM_OR_BTF_ID:
8654 {
eaa6bcb7
HL
8655 const struct btf_type *t;
8656
8657 mark_reg_known_zero(env, regs, BPF_REG_0);
22dc4a0f 8658 t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
eaa6bcb7
HL
8659 if (!btf_type_is_struct(t)) {
8660 u32 tsize;
8661 const struct btf_type *ret;
8662 const char *tname;
8663
8664 /* resolve the type size of ksym. */
22dc4a0f 8665 ret = btf_resolve_size(meta.ret_btf, t, &tsize);
eaa6bcb7 8666 if (IS_ERR(ret)) {
22dc4a0f 8667 tname = btf_name_by_offset(meta.ret_btf, t->name_off);
eaa6bcb7
HL
8668 verbose(env, "unable to resolve the size of type '%s': %ld\n",
8669 tname, PTR_ERR(ret));
8670 return -EINVAL;
8671 }
c25b2ae1 8672 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
eaa6bcb7
HL
8673 regs[BPF_REG_0].mem_size = tsize;
8674 } else {
34d3a78c
HL
8675 /* MEM_RDONLY may be carried from ret_flag, but it
8676 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
8677 * it will confuse the check of PTR_TO_BTF_ID in
8678 * check_mem_access().
8679 */
8680 ret_flag &= ~MEM_RDONLY;
8681
c25b2ae1 8682 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
22dc4a0f 8683 regs[BPF_REG_0].btf = meta.ret_btf;
eaa6bcb7
HL
8684 regs[BPF_REG_0].btf_id = meta.ret_btf_id;
8685 }
0c9a7a7e
JK
8686 break;
8687 }
8688 case RET_PTR_TO_BTF_ID:
8689 {
c0a5a21c 8690 struct btf *ret_btf;
af7ec138
YS
8691 int ret_btf_id;
8692
8693 mark_reg_known_zero(env, regs, BPF_REG_0);
c25b2ae1 8694 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
c0a5a21c 8695 if (func_id == BPF_FUNC_kptr_xchg) {
aa3496ac
KKD
8696 ret_btf = meta.kptr_field->kptr.btf;
8697 ret_btf_id = meta.kptr_field->kptr.btf_id;
c0a5a21c 8698 } else {
47e34cb7
DM
8699 if (fn->ret_btf_id == BPF_PTR_POISON) {
8700 verbose(env, "verifier internal error:");
8701 verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n",
8702 func_id_name(func_id));
8703 return -EINVAL;
8704 }
c0a5a21c
KKD
8705 ret_btf = btf_vmlinux;
8706 ret_btf_id = *fn->ret_btf_id;
8707 }
af7ec138 8708 if (ret_btf_id == 0) {
3c480732
HL
8709 verbose(env, "invalid return type %u of func %s#%d\n",
8710 base_type(ret_type), func_id_name(func_id),
8711 func_id);
af7ec138
YS
8712 return -EINVAL;
8713 }
c0a5a21c 8714 regs[BPF_REG_0].btf = ret_btf;
af7ec138 8715 regs[BPF_REG_0].btf_id = ret_btf_id;
0c9a7a7e
JK
8716 break;
8717 }
8718 default:
3c480732
HL
8719 verbose(env, "unknown return type %u of func %s#%d\n",
8720 base_type(ret_type), func_id_name(func_id), func_id);
17a52670
AS
8721 return -EINVAL;
8722 }
04fd61ab 8723
c25b2ae1 8724 if (type_may_be_null(regs[BPF_REG_0].type))
93c230e3
MKL
8725 regs[BPF_REG_0].id = ++env->id_gen;
8726
b2d8ef19
DM
8727 if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
8728 verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
8729 func_id_name(func_id), func_id);
8730 return -EFAULT;
8731 }
8732
f8064ab9
KKD
8733 if (is_dynptr_ref_function(func_id))
8734 regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
8735
88374342 8736 if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
1b986589
MKL
8737 /* For release_reference() */
8738 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
64d85290 8739 } else if (is_acquire_function(func_id, meta.map_ptr)) {
0f3adc28
LB
8740 int id = acquire_reference_state(env, insn_idx);
8741
8742 if (id < 0)
8743 return id;
8744 /* For mark_ptr_or_null_reg() */
8745 regs[BPF_REG_0].id = id;
8746 /* For release_reference() */
8747 regs[BPF_REG_0].ref_obj_id = id;
8748 }
1b986589 8749
849fa506
YS
8750 do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
8751
61bd5218 8752 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
35578d79
KX
8753 if (err)
8754 return err;
04fd61ab 8755
fa28dcb8
SL
8756 if ((func_id == BPF_FUNC_get_stack ||
8757 func_id == BPF_FUNC_get_task_stack) &&
8758 !env->prog->has_callchain_buf) {
c195651e
YS
8759 const char *err_str;
8760
8761#ifdef CONFIG_PERF_EVENTS
8762 err = get_callchain_buffers(sysctl_perf_event_max_stack);
8763 err_str = "cannot get callchain buffer for func %s#%d\n";
8764#else
8765 err = -ENOTSUPP;
8766 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
8767#endif
8768 if (err) {
8769 verbose(env, err_str, func_id_name(func_id), func_id);
8770 return err;
8771 }
8772
8773 env->prog->has_callchain_buf = true;
8774 }
8775
5d99cb2c
SL
8776 if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
8777 env->prog->call_get_stack = true;
8778
9b99edca
JO
8779 if (func_id == BPF_FUNC_get_func_ip) {
8780 if (check_get_func_ip(env))
8781 return -ENOTSUPP;
8782 env->prog->call_get_func_ip = true;
8783 }
8784
969bf05e
AS
8785 if (changes_data)
8786 clear_all_pkt_pointers(env);
8787 return 0;
8788}
8789
e6ac2450
MKL
8790/* mark_btf_func_reg_size() is used when the reg size is determined by
8791 * the BTF func_proto's return value size and argument.
8792 */
8793static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
8794 size_t reg_size)
8795{
8796 struct bpf_reg_state *reg = &cur_regs(env)[regno];
8797
8798 if (regno == BPF_REG_0) {
8799 /* Function return value */
8800 reg->live |= REG_LIVE_WRITTEN;
8801 reg->subreg_def = reg_size == sizeof(u64) ?
8802 DEF_NOT_SUBREG : env->insn_idx + 1;
8803 } else {
8804 /* Function argument */
8805 if (reg_size == sizeof(u64)) {
8806 mark_insn_zext(env, reg);
8807 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
8808 } else {
8809 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
8810 }
8811 }
8812}
8813
00b85860
KKD
8814struct bpf_kfunc_call_arg_meta {
8815 /* In parameters */
8816 struct btf *btf;
8817 u32 func_id;
8818 u32 kfunc_flags;
8819 const struct btf_type *func_proto;
8820 const char *func_name;
8821 /* Out parameters */
8822 u32 ref_obj_id;
8823 u8 release_regno;
8824 bool r0_rdonly;
fd264ca0 8825 u32 ret_btf_id;
00b85860 8826 u64 r0_size;
5d92ddc3 8827 u32 subprogno;
a50388db
KKD
8828 struct {
8829 u64 value;
8830 bool found;
8831 } arg_constant;
ac9f0605
KKD
8832 struct {
8833 struct btf *btf;
8834 u32 btf_id;
8835 } arg_obj_drop;
8cab76ec
KKD
8836 struct {
8837 struct btf_field *field;
8838 } arg_list_head;
cd6791b4
DM
8839 struct {
8840 struct btf_field *field;
8841 } arg_rbtree_root;
66e3a13e
JK
8842 struct {
8843 enum bpf_dynptr_type type;
8844 u32 id;
8845 } initialized_dynptr;
8846 u64 mem_size;
00b85860 8847};
e6ac2450 8848
00b85860
KKD
8849static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
8850{
8851 return meta->kfunc_flags & KF_ACQUIRE;
8852}
a5d82727 8853
00b85860
KKD
8854static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
8855{
8856 return meta->kfunc_flags & KF_RET_NULL;
8857}
2357672c 8858
00b85860
KKD
8859static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
8860{
8861 return meta->kfunc_flags & KF_RELEASE;
8862}
e6ac2450 8863
00b85860
KKD
8864static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
8865{
8866 return meta->kfunc_flags & KF_TRUSTED_ARGS;
8867}
4dd48c6f 8868
00b85860
KKD
8869static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
8870{
8871 return meta->kfunc_flags & KF_SLEEPABLE;
8872}
5c073f26 8873
00b85860
KKD
8874static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
8875{
8876 return meta->kfunc_flags & KF_DESTRUCTIVE;
8877}
eb1f7f71 8878
fca1aa75
YS
8879static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
8880{
8881 return meta->kfunc_flags & KF_RCU;
8882}
8883
00b85860
KKD
8884static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
8885{
8886 return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
8887}
e6ac2450 8888
a50388db
KKD
8889static bool __kfunc_param_match_suffix(const struct btf *btf,
8890 const struct btf_param *arg,
8891 const char *suffix)
00b85860 8892{
a50388db 8893 int suffix_len = strlen(suffix), len;
00b85860 8894 const char *param_name;
e6ac2450 8895
00b85860
KKD
8896 /* In the future, this can be ported to use BTF tagging */
8897 param_name = btf_name_by_offset(btf, arg->name_off);
8898 if (str_is_empty(param_name))
8899 return false;
8900 len = strlen(param_name);
a50388db 8901 if (len < suffix_len)
00b85860 8902 return false;
a50388db
KKD
8903 param_name += len - suffix_len;
8904 return !strncmp(param_name, suffix, suffix_len);
8905}
5c073f26 8906
a50388db
KKD
8907static bool is_kfunc_arg_mem_size(const struct btf *btf,
8908 const struct btf_param *arg,
8909 const struct bpf_reg_state *reg)
8910{
8911 const struct btf_type *t;
5c073f26 8912
a50388db
KKD
8913 t = btf_type_skip_modifiers(btf, arg->type, NULL);
8914 if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
00b85860 8915 return false;
eb1f7f71 8916
a50388db
KKD
8917 return __kfunc_param_match_suffix(btf, arg, "__sz");
8918}
eb1f7f71 8919
66e3a13e
JK
8920static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
8921 const struct btf_param *arg,
8922 const struct bpf_reg_state *reg)
8923{
8924 const struct btf_type *t;
8925
8926 t = btf_type_skip_modifiers(btf, arg->type, NULL);
8927 if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
8928 return false;
8929
8930 return __kfunc_param_match_suffix(btf, arg, "__szk");
8931}
8932
a50388db
KKD
8933static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
8934{
8935 return __kfunc_param_match_suffix(btf, arg, "__k");
00b85860 8936}
eb1f7f71 8937
958cf2e2
KKD
8938static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
8939{
8940 return __kfunc_param_match_suffix(btf, arg, "__ign");
8941}
5c073f26 8942
ac9f0605
KKD
8943static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
8944{
8945 return __kfunc_param_match_suffix(btf, arg, "__alloc");
8946}
e6ac2450 8947
d96d937d
JK
8948static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
8949{
8950 return __kfunc_param_match_suffix(btf, arg, "__uninit");
8951}
8952
00b85860
KKD
8953static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
8954 const struct btf_param *arg,
8955 const char *name)
8956{
8957 int len, target_len = strlen(name);
8958 const char *param_name;
e6ac2450 8959
00b85860
KKD
8960 param_name = btf_name_by_offset(btf, arg->name_off);
8961 if (str_is_empty(param_name))
8962 return false;
8963 len = strlen(param_name);
8964 if (len != target_len)
8965 return false;
8966 if (strcmp(param_name, name))
8967 return false;
e6ac2450 8968
00b85860 8969 return true;
e6ac2450
MKL
8970}
8971
00b85860
KKD
8972enum {
8973 KF_ARG_DYNPTR_ID,
8cab76ec
KKD
8974 KF_ARG_LIST_HEAD_ID,
8975 KF_ARG_LIST_NODE_ID,
cd6791b4
DM
8976 KF_ARG_RB_ROOT_ID,
8977 KF_ARG_RB_NODE_ID,
00b85860 8978};
b03c9f9f 8979
00b85860
KKD
8980BTF_ID_LIST(kf_arg_btf_ids)
8981BTF_ID(struct, bpf_dynptr_kern)
8cab76ec
KKD
8982BTF_ID(struct, bpf_list_head)
8983BTF_ID(struct, bpf_list_node)
bd1279ae
DM
8984BTF_ID(struct, bpf_rb_root)
8985BTF_ID(struct, bpf_rb_node)
b03c9f9f 8986
8cab76ec
KKD
8987static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
8988 const struct btf_param *arg, int type)
3f50f132 8989{
00b85860
KKD
8990 const struct btf_type *t;
8991 u32 res_id;
3f50f132 8992
00b85860
KKD
8993 t = btf_type_skip_modifiers(btf, arg->type, NULL);
8994 if (!t)
8995 return false;
8996 if (!btf_type_is_ptr(t))
8997 return false;
8998 t = btf_type_skip_modifiers(btf, t->type, &res_id);
8999 if (!t)
9000 return false;
8cab76ec 9001 return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
3f50f132
JF
9002}
9003
8cab76ec 9004static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
b03c9f9f 9005{
8cab76ec 9006 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
969bf05e
AS
9007}
9008
8cab76ec 9009static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
3f50f132 9010{
8cab76ec 9011 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
3f50f132
JF
9012}
9013
8cab76ec 9014static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
bb7f0f98 9015{
8cab76ec 9016 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
00b85860
KKD
9017}
9018
cd6791b4
DM
9019static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
9020{
9021 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
9022}
9023
9024static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
9025{
9026 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
9027}
9028
5d92ddc3
DM
9029static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
9030 const struct btf_param *arg)
9031{
9032 const struct btf_type *t;
9033
9034 t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
9035 if (!t)
9036 return false;
9037
9038 return true;
9039}
9040
00b85860
KKD
9041/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
9042static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
9043 const struct btf *btf,
9044 const struct btf_type *t, int rec)
9045{
9046 const struct btf_type *member_type;
9047 const struct btf_member *member;
9048 u32 i;
9049
9050 if (!btf_type_is_struct(t))
9051 return false;
9052
9053 for_each_member(i, t, member) {
9054 const struct btf_array *array;
9055
9056 member_type = btf_type_skip_modifiers(btf, member->type, NULL);
9057 if (btf_type_is_struct(member_type)) {
9058 if (rec >= 3) {
9059 verbose(env, "max struct nesting depth exceeded\n");
9060 return false;
9061 }
9062 if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
9063 return false;
9064 continue;
9065 }
9066 if (btf_type_is_array(member_type)) {
9067 array = btf_array(member_type);
9068 if (!array->nelems)
9069 return false;
9070 member_type = btf_type_skip_modifiers(btf, array->type, NULL);
9071 if (!btf_type_is_scalar(member_type))
9072 return false;
9073 continue;
9074 }
9075 if (!btf_type_is_scalar(member_type))
9076 return false;
9077 }
9078 return true;
9079}
9080
9081
9082static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
9083#ifdef CONFIG_NET
9084 [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
9085 [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
9086 [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
9087#endif
9088};
9089
9090enum kfunc_ptr_arg_type {
9091 KF_ARG_PTR_TO_CTX,
ac9f0605 9092 KF_ARG_PTR_TO_ALLOC_BTF_ID, /* Allocated object */
00b85860
KKD
9093 KF_ARG_PTR_TO_KPTR, /* PTR_TO_KPTR but type specific */
9094 KF_ARG_PTR_TO_DYNPTR,
8cab76ec
KKD
9095 KF_ARG_PTR_TO_LIST_HEAD,
9096 KF_ARG_PTR_TO_LIST_NODE,
00b85860
KKD
9097 KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */
9098 KF_ARG_PTR_TO_MEM,
9099 KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */
5d92ddc3 9100 KF_ARG_PTR_TO_CALLBACK,
cd6791b4
DM
9101 KF_ARG_PTR_TO_RB_ROOT,
9102 KF_ARG_PTR_TO_RB_NODE,
00b85860
KKD
9103};
9104
ac9f0605
KKD
9105enum special_kfunc_type {
9106 KF_bpf_obj_new_impl,
9107 KF_bpf_obj_drop_impl,
8cab76ec
KKD
9108 KF_bpf_list_push_front,
9109 KF_bpf_list_push_back,
9110 KF_bpf_list_pop_front,
9111 KF_bpf_list_pop_back,
fd264ca0 9112 KF_bpf_cast_to_kern_ctx,
a35b9af4 9113 KF_bpf_rdonly_cast,
9bb00b28
YS
9114 KF_bpf_rcu_read_lock,
9115 KF_bpf_rcu_read_unlock,
bd1279ae
DM
9116 KF_bpf_rbtree_remove,
9117 KF_bpf_rbtree_add,
9118 KF_bpf_rbtree_first,
b5964b96 9119 KF_bpf_dynptr_from_skb,
05421aec 9120 KF_bpf_dynptr_from_xdp,
66e3a13e
JK
9121 KF_bpf_dynptr_slice,
9122 KF_bpf_dynptr_slice_rdwr,
ac9f0605
KKD
9123};
9124
9125BTF_SET_START(special_kfunc_set)
9126BTF_ID(func, bpf_obj_new_impl)
9127BTF_ID(func, bpf_obj_drop_impl)
8cab76ec
KKD
9128BTF_ID(func, bpf_list_push_front)
9129BTF_ID(func, bpf_list_push_back)
9130BTF_ID(func, bpf_list_pop_front)
9131BTF_ID(func, bpf_list_pop_back)
fd264ca0 9132BTF_ID(func, bpf_cast_to_kern_ctx)
a35b9af4 9133BTF_ID(func, bpf_rdonly_cast)
bd1279ae
DM
9134BTF_ID(func, bpf_rbtree_remove)
9135BTF_ID(func, bpf_rbtree_add)
9136BTF_ID(func, bpf_rbtree_first)
b5964b96 9137BTF_ID(func, bpf_dynptr_from_skb)
05421aec 9138BTF_ID(func, bpf_dynptr_from_xdp)
66e3a13e
JK
9139BTF_ID(func, bpf_dynptr_slice)
9140BTF_ID(func, bpf_dynptr_slice_rdwr)
ac9f0605
KKD
9141BTF_SET_END(special_kfunc_set)
9142
9143BTF_ID_LIST(special_kfunc_list)
9144BTF_ID(func, bpf_obj_new_impl)
9145BTF_ID(func, bpf_obj_drop_impl)
8cab76ec
KKD
9146BTF_ID(func, bpf_list_push_front)
9147BTF_ID(func, bpf_list_push_back)
9148BTF_ID(func, bpf_list_pop_front)
9149BTF_ID(func, bpf_list_pop_back)
fd264ca0 9150BTF_ID(func, bpf_cast_to_kern_ctx)
a35b9af4 9151BTF_ID(func, bpf_rdonly_cast)
9bb00b28
YS
9152BTF_ID(func, bpf_rcu_read_lock)
9153BTF_ID(func, bpf_rcu_read_unlock)
bd1279ae
DM
9154BTF_ID(func, bpf_rbtree_remove)
9155BTF_ID(func, bpf_rbtree_add)
9156BTF_ID(func, bpf_rbtree_first)
b5964b96 9157BTF_ID(func, bpf_dynptr_from_skb)
05421aec 9158BTF_ID(func, bpf_dynptr_from_xdp)
66e3a13e
JK
9159BTF_ID(func, bpf_dynptr_slice)
9160BTF_ID(func, bpf_dynptr_slice_rdwr)
9bb00b28
YS
9161
9162static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
9163{
9164 return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
9165}
9166
9167static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
9168{
9169 return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
9170}
ac9f0605 9171
00b85860
KKD
9172static enum kfunc_ptr_arg_type
9173get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
9174 struct bpf_kfunc_call_arg_meta *meta,
9175 const struct btf_type *t, const struct btf_type *ref_t,
9176 const char *ref_tname, const struct btf_param *args,
9177 int argno, int nargs)
9178{
9179 u32 regno = argno + 1;
9180 struct bpf_reg_state *regs = cur_regs(env);
9181 struct bpf_reg_state *reg = &regs[regno];
9182 bool arg_mem_size = false;
9183
fd264ca0
YS
9184 if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
9185 return KF_ARG_PTR_TO_CTX;
9186
00b85860
KKD
9187 /* In this function, we verify the kfunc's BTF as per the argument type,
9188 * leaving the rest of the verification with respect to the register
9189 * type to our caller. When a set of conditions hold in the BTF type of
9190 * arguments, we resolve it to a known kfunc_ptr_arg_type.
9191 */
9192 if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
9193 return KF_ARG_PTR_TO_CTX;
9194
ac9f0605
KKD
9195 if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
9196 return KF_ARG_PTR_TO_ALLOC_BTF_ID;
9197
00b85860
KKD
9198 if (is_kfunc_arg_kptr_get(meta, argno)) {
9199 if (!btf_type_is_ptr(ref_t)) {
9200 verbose(env, "arg#0 BTF type must be a double pointer for kptr_get kfunc\n");
9201 return -EINVAL;
9202 }
9203 ref_t = btf_type_by_id(meta->btf, ref_t->type);
9204 ref_tname = btf_name_by_offset(meta->btf, ref_t->name_off);
9205 if (!btf_type_is_struct(ref_t)) {
9206 verbose(env, "kernel function %s args#0 pointer type %s %s is not supported\n",
9207 meta->func_name, btf_type_str(ref_t), ref_tname);
9208 return -EINVAL;
9209 }
9210 return KF_ARG_PTR_TO_KPTR;
9211 }
9212
9213 if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
9214 return KF_ARG_PTR_TO_DYNPTR;
9215
8cab76ec
KKD
9216 if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
9217 return KF_ARG_PTR_TO_LIST_HEAD;
9218
9219 if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
9220 return KF_ARG_PTR_TO_LIST_NODE;
9221
cd6791b4
DM
9222 if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
9223 return KF_ARG_PTR_TO_RB_ROOT;
9224
9225 if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
9226 return KF_ARG_PTR_TO_RB_NODE;
9227
00b85860
KKD
9228 if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
9229 if (!btf_type_is_struct(ref_t)) {
9230 verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
9231 meta->func_name, argno, btf_type_str(ref_t), ref_tname);
9232 return -EINVAL;
9233 }
9234 return KF_ARG_PTR_TO_BTF_ID;
9235 }
9236
5d92ddc3
DM
9237 if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
9238 return KF_ARG_PTR_TO_CALLBACK;
9239
66e3a13e
JK
9240
9241 if (argno + 1 < nargs &&
9242 (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
9243 is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
00b85860
KKD
9244 arg_mem_size = true;
9245
9246 /* This is the catch all argument type of register types supported by
9247 * check_helper_mem_access. However, we only allow when argument type is
9248 * pointer to scalar, or struct composed (recursively) of scalars. When
9249 * arg_mem_size is true, the pointer can be void *.
9250 */
9251 if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
9252 (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
9253 verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
9254 argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
9255 return -EINVAL;
9256 }
9257 return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
9258}
9259
9260static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
9261 struct bpf_reg_state *reg,
9262 const struct btf_type *ref_t,
9263 const char *ref_tname, u32 ref_id,
9264 struct bpf_kfunc_call_arg_meta *meta,
9265 int argno)
9266{
9267 const struct btf_type *reg_ref_t;
9268 bool strict_type_match = false;
9269 const struct btf *reg_btf;
9270 const char *reg_ref_tname;
9271 u32 reg_ref_id;
9272
3f00c523 9273 if (base_type(reg->type) == PTR_TO_BTF_ID) {
00b85860
KKD
9274 reg_btf = reg->btf;
9275 reg_ref_id = reg->btf_id;
9276 } else {
9277 reg_btf = btf_vmlinux;
9278 reg_ref_id = *reg2btf_ids[base_type(reg->type)];
9279 }
9280
b613d335
DV
9281 /* Enforce strict type matching for calls to kfuncs that are acquiring
9282 * or releasing a reference, or are no-cast aliases. We do _not_
9283 * enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
9284 * as we want to enable BPF programs to pass types that are bitwise
9285 * equivalent without forcing them to explicitly cast with something
9286 * like bpf_cast_to_kern_ctx().
9287 *
9288 * For example, say we had a type like the following:
9289 *
9290 * struct bpf_cpumask {
9291 * cpumask_t cpumask;
9292 * refcount_t usage;
9293 * };
9294 *
9295 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
9296 * to a struct cpumask, so it would be safe to pass a struct
9297 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
9298 *
9299 * The philosophy here is similar to how we allow scalars of different
9300 * types to be passed to kfuncs as long as the size is the same. The
9301 * only difference here is that we're simply allowing
9302 * btf_struct_ids_match() to walk the struct at the 0th offset, and
9303 * resolve types.
9304 */
9305 if (is_kfunc_acquire(meta) ||
9306 (is_kfunc_release(meta) && reg->ref_obj_id) ||
9307 btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
00b85860
KKD
9308 strict_type_match = true;
9309
b613d335
DV
9310 WARN_ON_ONCE(is_kfunc_trusted_args(meta) && reg->off);
9311
00b85860
KKD
9312 reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
9313 reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
9314 if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) {
9315 verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
9316 meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
9317 btf_type_str(reg_ref_t), reg_ref_tname);
9318 return -EINVAL;
9319 }
9320 return 0;
9321}
9322
9323static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env,
9324 struct bpf_reg_state *reg,
9325 const struct btf_type *ref_t,
9326 const char *ref_tname,
9327 struct bpf_kfunc_call_arg_meta *meta,
9328 int argno)
9329{
9330 struct btf_field *kptr_field;
9331
9332 /* check_func_arg_reg_off allows var_off for
9333 * PTR_TO_MAP_VALUE, but we need fixed offset to find
9334 * off_desc.
9335 */
9336 if (!tnum_is_const(reg->var_off)) {
9337 verbose(env, "arg#0 must have constant offset\n");
9338 return -EINVAL;
9339 }
9340
9341 kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR);
9342 if (!kptr_field || kptr_field->type != BPF_KPTR_REF) {
9343 verbose(env, "arg#0 no referenced kptr at map value offset=%llu\n",
9344 reg->off + reg->var_off.value);
9345 return -EINVAL;
9346 }
9347
9348 if (!btf_struct_ids_match(&env->log, meta->btf, ref_t->type, 0, kptr_field->kptr.btf,
9349 kptr_field->kptr.btf_id, true)) {
9350 verbose(env, "kernel function %s args#%d expected pointer to %s %s\n",
9351 meta->func_name, argno, btf_type_str(ref_t), ref_tname);
9352 return -EINVAL;
9353 }
9354 return 0;
9355}
9356
6a3cd331 9357static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
534e86bc 9358{
6a3cd331
DM
9359 struct bpf_verifier_state *state = env->cur_state;
9360
9361 if (!state->active_lock.ptr) {
9362 verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
9363 return -EFAULT;
9364 }
9365
9366 if (type_flag(reg->type) & NON_OWN_REF) {
9367 verbose(env, "verifier internal error: NON_OWN_REF already set\n");
9368 return -EFAULT;
9369 }
9370
9371 reg->type |= NON_OWN_REF;
9372 return 0;
9373}
9374
9375static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
9376{
9377 struct bpf_func_state *state, *unused;
534e86bc
KKD
9378 struct bpf_reg_state *reg;
9379 int i;
9380
6a3cd331
DM
9381 state = cur_func(env);
9382
534e86bc 9383 if (!ref_obj_id) {
6a3cd331
DM
9384 verbose(env, "verifier internal error: ref_obj_id is zero for "
9385 "owning -> non-owning conversion\n");
534e86bc
KKD
9386 return -EFAULT;
9387 }
6a3cd331 9388
534e86bc 9389 for (i = 0; i < state->acquired_refs; i++) {
6a3cd331
DM
9390 if (state->refs[i].id != ref_obj_id)
9391 continue;
9392
9393 /* Clear ref_obj_id here so release_reference doesn't clobber
9394 * the whole reg
9395 */
9396 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
9397 if (reg->ref_obj_id == ref_obj_id) {
9398 reg->ref_obj_id = 0;
9399 ref_set_non_owning(env, reg);
534e86bc 9400 }
6a3cd331
DM
9401 }));
9402 return 0;
534e86bc 9403 }
6a3cd331 9404
534e86bc
KKD
9405 verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
9406 return -EFAULT;
9407}
9408
8cab76ec
KKD
9409/* Implementation details:
9410 *
9411 * Each register points to some region of memory, which we define as an
9412 * allocation. Each allocation may embed a bpf_spin_lock which protects any
9413 * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
9414 * allocation. The lock and the data it protects are colocated in the same
9415 * memory region.
9416 *
9417 * Hence, everytime a register holds a pointer value pointing to such
9418 * allocation, the verifier preserves a unique reg->id for it.
9419 *
9420 * The verifier remembers the lock 'ptr' and the lock 'id' whenever
9421 * bpf_spin_lock is called.
9422 *
9423 * To enable this, lock state in the verifier captures two values:
9424 * active_lock.ptr = Register's type specific pointer
9425 * active_lock.id = A unique ID for each register pointer value
9426 *
9427 * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
9428 * supported register types.
9429 *
9430 * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
9431 * allocated objects is the reg->btf pointer.
9432 *
9433 * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
9434 * can establish the provenance of the map value statically for each distinct
9435 * lookup into such maps. They always contain a single map value hence unique
9436 * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
9437 *
9438 * So, in case of global variables, they use array maps with max_entries = 1,
9439 * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
9440 * into the same map value as max_entries is 1, as described above).
9441 *
9442 * In case of inner map lookups, the inner map pointer has same map_ptr as the
9443 * outer map pointer (in verifier context), but each lookup into an inner map
9444 * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
9445 * maps from the same outer map share the same map_ptr as active_lock.ptr, they
9446 * will get different reg->id assigned to each lookup, hence different
9447 * active_lock.id.
9448 *
9449 * In case of allocated objects, active_lock.ptr is the reg->btf, and the
9450 * reg->id is a unique ID preserved after the NULL pointer check on the pointer
9451 * returned from bpf_obj_new. Each allocation receives a new reg->id.
9452 */
9453static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
9454{
9455 void *ptr;
9456 u32 id;
9457
9458 switch ((int)reg->type) {
9459 case PTR_TO_MAP_VALUE:
9460 ptr = reg->map_ptr;
9461 break;
9462 case PTR_TO_BTF_ID | MEM_ALLOC:
9463 ptr = reg->btf;
9464 break;
9465 default:
9466 verbose(env, "verifier internal error: unknown reg type for lock check\n");
9467 return -EFAULT;
9468 }
9469 id = reg->id;
9470
9471 if (!env->cur_state->active_lock.ptr)
9472 return -EINVAL;
9473 if (env->cur_state->active_lock.ptr != ptr ||
9474 env->cur_state->active_lock.id != id) {
9475 verbose(env, "held lock and object are not in the same allocation\n");
9476 return -EINVAL;
9477 }
9478 return 0;
9479}
9480
9481static bool is_bpf_list_api_kfunc(u32 btf_id)
9482{
9483 return btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
9484 btf_id == special_kfunc_list[KF_bpf_list_push_back] ||
9485 btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
9486 btf_id == special_kfunc_list[KF_bpf_list_pop_back];
9487}
9488
cd6791b4
DM
9489static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
9490{
9491 return btf_id == special_kfunc_list[KF_bpf_rbtree_add] ||
9492 btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
9493 btf_id == special_kfunc_list[KF_bpf_rbtree_first];
9494}
9495
9496static bool is_bpf_graph_api_kfunc(u32 btf_id)
9497{
9498 return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id);
9499}
9500
5d92ddc3
DM
9501static bool is_callback_calling_kfunc(u32 btf_id)
9502{
9503 return btf_id == special_kfunc_list[KF_bpf_rbtree_add];
9504}
9505
9506static bool is_rbtree_lock_required_kfunc(u32 btf_id)
9507{
9508 return is_bpf_rbtree_api_kfunc(btf_id);
9509}
9510
cd6791b4
DM
9511static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
9512 enum btf_field_type head_field_type,
9513 u32 kfunc_btf_id)
9514{
9515 bool ret;
9516
9517 switch (head_field_type) {
9518 case BPF_LIST_HEAD:
9519 ret = is_bpf_list_api_kfunc(kfunc_btf_id);
9520 break;
9521 case BPF_RB_ROOT:
9522 ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
9523 break;
9524 default:
9525 verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
9526 btf_field_type_name(head_field_type));
9527 return false;
9528 }
9529
9530 if (!ret)
9531 verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
9532 btf_field_type_name(head_field_type));
9533 return ret;
9534}
9535
9536static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
9537 enum btf_field_type node_field_type,
9538 u32 kfunc_btf_id)
8cab76ec 9539{
cd6791b4
DM
9540 bool ret;
9541
9542 switch (node_field_type) {
9543 case BPF_LIST_NODE:
9544 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
9545 kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back]);
9546 break;
9547 case BPF_RB_NODE:
9548 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
9549 kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add]);
9550 break;
9551 default:
9552 verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
9553 btf_field_type_name(node_field_type));
9554 return false;
9555 }
9556
9557 if (!ret)
9558 verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
9559 btf_field_type_name(node_field_type));
9560 return ret;
9561}
9562
9563static int
9564__process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
9565 struct bpf_reg_state *reg, u32 regno,
9566 struct bpf_kfunc_call_arg_meta *meta,
9567 enum btf_field_type head_field_type,
9568 struct btf_field **head_field)
9569{
9570 const char *head_type_name;
8cab76ec
KKD
9571 struct btf_field *field;
9572 struct btf_record *rec;
cd6791b4 9573 u32 head_off;
8cab76ec 9574
cd6791b4
DM
9575 if (meta->btf != btf_vmlinux) {
9576 verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
8cab76ec
KKD
9577 return -EFAULT;
9578 }
9579
cd6791b4
DM
9580 if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
9581 return -EFAULT;
9582
9583 head_type_name = btf_field_type_name(head_field_type);
8cab76ec
KKD
9584 if (!tnum_is_const(reg->var_off)) {
9585 verbose(env,
cd6791b4
DM
9586 "R%d doesn't have constant offset. %s has to be at the constant offset\n",
9587 regno, head_type_name);
8cab76ec
KKD
9588 return -EINVAL;
9589 }
9590
9591 rec = reg_btf_record(reg);
cd6791b4
DM
9592 head_off = reg->off + reg->var_off.value;
9593 field = btf_record_find(rec, head_off, head_field_type);
8cab76ec 9594 if (!field) {
cd6791b4 9595 verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
8cab76ec
KKD
9596 return -EINVAL;
9597 }
9598
9599 /* All functions require bpf_list_head to be protected using a bpf_spin_lock */
9600 if (check_reg_allocation_locked(env, reg)) {
cd6791b4
DM
9601 verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
9602 rec->spin_lock_off, head_type_name);
8cab76ec
KKD
9603 return -EINVAL;
9604 }
9605
cd6791b4
DM
9606 if (*head_field) {
9607 verbose(env, "verifier internal error: repeating %s arg\n", head_type_name);
8cab76ec
KKD
9608 return -EFAULT;
9609 }
cd6791b4 9610 *head_field = field;
8cab76ec
KKD
9611 return 0;
9612}
9613
cd6791b4 9614static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
8cab76ec
KKD
9615 struct bpf_reg_state *reg, u32 regno,
9616 struct bpf_kfunc_call_arg_meta *meta)
9617{
cd6791b4
DM
9618 return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
9619 &meta->arg_list_head.field);
9620}
9621
9622static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
9623 struct bpf_reg_state *reg, u32 regno,
9624 struct bpf_kfunc_call_arg_meta *meta)
9625{
9626 return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
9627 &meta->arg_rbtree_root.field);
9628}
9629
9630static int
9631__process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
9632 struct bpf_reg_state *reg, u32 regno,
9633 struct bpf_kfunc_call_arg_meta *meta,
9634 enum btf_field_type head_field_type,
9635 enum btf_field_type node_field_type,
9636 struct btf_field **node_field)
9637{
9638 const char *node_type_name;
8cab76ec
KKD
9639 const struct btf_type *et, *t;
9640 struct btf_field *field;
cd6791b4 9641 u32 node_off;
8cab76ec 9642
cd6791b4
DM
9643 if (meta->btf != btf_vmlinux) {
9644 verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
8cab76ec
KKD
9645 return -EFAULT;
9646 }
9647
cd6791b4
DM
9648 if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
9649 return -EFAULT;
9650
9651 node_type_name = btf_field_type_name(node_field_type);
8cab76ec
KKD
9652 if (!tnum_is_const(reg->var_off)) {
9653 verbose(env,
cd6791b4
DM
9654 "R%d doesn't have constant offset. %s has to be at the constant offset\n",
9655 regno, node_type_name);
8cab76ec
KKD
9656 return -EINVAL;
9657 }
9658
cd6791b4
DM
9659 node_off = reg->off + reg->var_off.value;
9660 field = reg_find_field_offset(reg, node_off, node_field_type);
9661 if (!field || field->offset != node_off) {
9662 verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
8cab76ec
KKD
9663 return -EINVAL;
9664 }
9665
cd6791b4 9666 field = *node_field;
8cab76ec 9667
30465003 9668 et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
8cab76ec 9669 t = btf_type_by_id(reg->btf, reg->btf_id);
30465003
DM
9670 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
9671 field->graph_root.value_btf_id, true)) {
cd6791b4 9672 verbose(env, "operation on %s expects arg#1 %s at offset=%d "
8cab76ec 9673 "in struct %s, but arg is at offset=%d in struct %s\n",
cd6791b4
DM
9674 btf_field_type_name(head_field_type),
9675 btf_field_type_name(node_field_type),
30465003
DM
9676 field->graph_root.node_offset,
9677 btf_name_by_offset(field->graph_root.btf, et->name_off),
cd6791b4 9678 node_off, btf_name_by_offset(reg->btf, t->name_off));
8cab76ec
KKD
9679 return -EINVAL;
9680 }
9681
cd6791b4
DM
9682 if (node_off != field->graph_root.node_offset) {
9683 verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
9684 node_off, btf_field_type_name(node_field_type),
9685 field->graph_root.node_offset,
30465003 9686 btf_name_by_offset(field->graph_root.btf, et->name_off));
8cab76ec
KKD
9687 return -EINVAL;
9688 }
6a3cd331
DM
9689
9690 return 0;
8cab76ec
KKD
9691}
9692
cd6791b4
DM
9693static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
9694 struct bpf_reg_state *reg, u32 regno,
9695 struct bpf_kfunc_call_arg_meta *meta)
9696{
9697 return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
9698 BPF_LIST_HEAD, BPF_LIST_NODE,
9699 &meta->arg_list_head.field);
9700}
9701
9702static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
9703 struct bpf_reg_state *reg, u32 regno,
9704 struct bpf_kfunc_call_arg_meta *meta)
9705{
9706 return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
9707 BPF_RB_ROOT, BPF_RB_NODE,
9708 &meta->arg_rbtree_root.field);
9709}
9710
1d18feb2
JK
9711static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
9712 int insn_idx)
00b85860
KKD
9713{
9714 const char *func_name = meta->func_name, *ref_tname;
9715 const struct btf *btf = meta->btf;
9716 const struct btf_param *args;
9717 u32 i, nargs;
9718 int ret;
9719
9720 args = (const struct btf_param *)(meta->func_proto + 1);
9721 nargs = btf_type_vlen(meta->func_proto);
9722 if (nargs > MAX_BPF_FUNC_REG_ARGS) {
9723 verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
9724 MAX_BPF_FUNC_REG_ARGS);
9725 return -EINVAL;
9726 }
9727
9728 /* Check that BTF function arguments match actual types that the
9729 * verifier sees.
9730 */
9731 for (i = 0; i < nargs; i++) {
9732 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
9733 const struct btf_type *t, *ref_t, *resolve_ret;
9734 enum bpf_arg_type arg_type = ARG_DONTCARE;
9735 u32 regno = i + 1, ref_id, type_size;
9736 bool is_ret_buf_sz = false;
9737 int kf_arg_type;
9738
9739 t = btf_type_skip_modifiers(btf, args[i].type, NULL);
958cf2e2
KKD
9740
9741 if (is_kfunc_arg_ignore(btf, &args[i]))
9742 continue;
9743
00b85860
KKD
9744 if (btf_type_is_scalar(t)) {
9745 if (reg->type != SCALAR_VALUE) {
9746 verbose(env, "R%d is not a scalar\n", regno);
9747 return -EINVAL;
9748 }
a50388db
KKD
9749
9750 if (is_kfunc_arg_constant(meta->btf, &args[i])) {
9751 if (meta->arg_constant.found) {
9752 verbose(env, "verifier internal error: only one constant argument permitted\n");
9753 return -EFAULT;
9754 }
9755 if (!tnum_is_const(reg->var_off)) {
9756 verbose(env, "R%d must be a known constant\n", regno);
9757 return -EINVAL;
9758 }
9759 ret = mark_chain_precision(env, regno);
9760 if (ret < 0)
9761 return ret;
9762 meta->arg_constant.found = true;
9763 meta->arg_constant.value = reg->var_off.value;
9764 } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
00b85860
KKD
9765 meta->r0_rdonly = true;
9766 is_ret_buf_sz = true;
9767 } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
9768 is_ret_buf_sz = true;
9769 }
9770
9771 if (is_ret_buf_sz) {
9772 if (meta->r0_size) {
9773 verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
9774 return -EINVAL;
9775 }
9776
9777 if (!tnum_is_const(reg->var_off)) {
9778 verbose(env, "R%d is not a const\n", regno);
9779 return -EINVAL;
9780 }
9781
9782 meta->r0_size = reg->var_off.value;
9783 ret = mark_chain_precision(env, regno);
9784 if (ret)
9785 return ret;
9786 }
9787 continue;
9788 }
9789
9790 if (!btf_type_is_ptr(t)) {
9791 verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
9792 return -EINVAL;
9793 }
9794
20c09d92 9795 if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) &&
caf713c3
DV
9796 (register_is_null(reg) || type_may_be_null(reg->type))) {
9797 verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
9798 return -EACCES;
9799 }
9800
00b85860
KKD
9801 if (reg->ref_obj_id) {
9802 if (is_kfunc_release(meta) && meta->ref_obj_id) {
9803 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
9804 regno, reg->ref_obj_id,
9805 meta->ref_obj_id);
9806 return -EFAULT;
9807 }
9808 meta->ref_obj_id = reg->ref_obj_id;
9809 if (is_kfunc_release(meta))
9810 meta->release_regno = regno;
9811 }
9812
9813 ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
9814 ref_tname = btf_name_by_offset(btf, ref_t->name_off);
9815
9816 kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
9817 if (kf_arg_type < 0)
9818 return kf_arg_type;
9819
9820 switch (kf_arg_type) {
ac9f0605 9821 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
00b85860 9822 case KF_ARG_PTR_TO_BTF_ID:
fca1aa75 9823 if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
00b85860 9824 break;
3f00c523
DV
9825
9826 if (!is_trusted_reg(reg)) {
fca1aa75
YS
9827 if (!is_kfunc_rcu(meta)) {
9828 verbose(env, "R%d must be referenced or trusted\n", regno);
9829 return -EINVAL;
9830 }
9831 if (!is_rcu_reg(reg)) {
9832 verbose(env, "R%d must be a rcu pointer\n", regno);
9833 return -EINVAL;
9834 }
00b85860 9835 }
fca1aa75 9836
00b85860
KKD
9837 fallthrough;
9838 case KF_ARG_PTR_TO_CTX:
9839 /* Trusted arguments have the same offset checks as release arguments */
9840 arg_type |= OBJ_RELEASE;
9841 break;
9842 case KF_ARG_PTR_TO_KPTR:
9843 case KF_ARG_PTR_TO_DYNPTR:
8cab76ec
KKD
9844 case KF_ARG_PTR_TO_LIST_HEAD:
9845 case KF_ARG_PTR_TO_LIST_NODE:
cd6791b4
DM
9846 case KF_ARG_PTR_TO_RB_ROOT:
9847 case KF_ARG_PTR_TO_RB_NODE:
00b85860
KKD
9848 case KF_ARG_PTR_TO_MEM:
9849 case KF_ARG_PTR_TO_MEM_SIZE:
5d92ddc3 9850 case KF_ARG_PTR_TO_CALLBACK:
00b85860
KKD
9851 /* Trusted by default */
9852 break;
9853 default:
9854 WARN_ON_ONCE(1);
9855 return -EFAULT;
9856 }
9857
9858 if (is_kfunc_release(meta) && reg->ref_obj_id)
9859 arg_type |= OBJ_RELEASE;
9860 ret = check_func_arg_reg_off(env, reg, regno, arg_type);
9861 if (ret < 0)
9862 return ret;
9863
9864 switch (kf_arg_type) {
9865 case KF_ARG_PTR_TO_CTX:
9866 if (reg->type != PTR_TO_CTX) {
9867 verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
9868 return -EINVAL;
9869 }
fd264ca0
YS
9870
9871 if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
9872 ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
9873 if (ret < 0)
9874 return -EINVAL;
9875 meta->ret_btf_id = ret;
9876 }
00b85860 9877 break;
ac9f0605
KKD
9878 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
9879 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
9880 verbose(env, "arg#%d expected pointer to allocated object\n", i);
9881 return -EINVAL;
9882 }
9883 if (!reg->ref_obj_id) {
9884 verbose(env, "allocated object must be referenced\n");
9885 return -EINVAL;
9886 }
9887 if (meta->btf == btf_vmlinux &&
9888 meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
9889 meta->arg_obj_drop.btf = reg->btf;
9890 meta->arg_obj_drop.btf_id = reg->btf_id;
9891 }
9892 break;
00b85860
KKD
9893 case KF_ARG_PTR_TO_KPTR:
9894 if (reg->type != PTR_TO_MAP_VALUE) {
9895 verbose(env, "arg#0 expected pointer to map value\n");
9896 return -EINVAL;
9897 }
9898 ret = process_kf_arg_ptr_to_kptr(env, reg, ref_t, ref_tname, meta, i);
9899 if (ret < 0)
9900 return ret;
9901 break;
9902 case KF_ARG_PTR_TO_DYNPTR:
d96d937d
JK
9903 {
9904 enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
9905
6b75bd3d 9906 if (reg->type != PTR_TO_STACK &&
27060531 9907 reg->type != CONST_PTR_TO_DYNPTR) {
6b75bd3d 9908 verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
00b85860
KKD
9909 return -EINVAL;
9910 }
9911
d96d937d
JK
9912 if (reg->type == CONST_PTR_TO_DYNPTR)
9913 dynptr_arg_type |= MEM_RDONLY;
9914
9915 if (is_kfunc_arg_uninit(btf, &args[i]))
9916 dynptr_arg_type |= MEM_UNINIT;
9917
b5964b96
JK
9918 if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb])
9919 dynptr_arg_type |= DYNPTR_TYPE_SKB;
05421aec
JK
9920 else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp])
9921 dynptr_arg_type |= DYNPTR_TYPE_XDP;
b5964b96 9922
d96d937d 9923 ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type);
6b75bd3d
KKD
9924 if (ret < 0)
9925 return ret;
66e3a13e
JK
9926
9927 if (!(dynptr_arg_type & MEM_UNINIT)) {
9928 int id = dynptr_id(env, reg);
9929
9930 if (id < 0) {
9931 verbose(env, "verifier internal error: failed to obtain dynptr id\n");
9932 return id;
9933 }
9934 meta->initialized_dynptr.id = id;
9935 meta->initialized_dynptr.type = dynptr_get_type(env, reg);
9936 }
9937
00b85860 9938 break;
d96d937d 9939 }
8cab76ec
KKD
9940 case KF_ARG_PTR_TO_LIST_HEAD:
9941 if (reg->type != PTR_TO_MAP_VALUE &&
9942 reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
9943 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
9944 return -EINVAL;
9945 }
9946 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
9947 verbose(env, "allocated object must be referenced\n");
9948 return -EINVAL;
9949 }
9950 ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
9951 if (ret < 0)
9952 return ret;
9953 break;
cd6791b4
DM
9954 case KF_ARG_PTR_TO_RB_ROOT:
9955 if (reg->type != PTR_TO_MAP_VALUE &&
9956 reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
9957 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
9958 return -EINVAL;
9959 }
9960 if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
9961 verbose(env, "allocated object must be referenced\n");
9962 return -EINVAL;
9963 }
9964 ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
9965 if (ret < 0)
9966 return ret;
9967 break;
8cab76ec
KKD
9968 case KF_ARG_PTR_TO_LIST_NODE:
9969 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
9970 verbose(env, "arg#%d expected pointer to allocated object\n", i);
9971 return -EINVAL;
9972 }
9973 if (!reg->ref_obj_id) {
9974 verbose(env, "allocated object must be referenced\n");
9975 return -EINVAL;
9976 }
9977 ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
9978 if (ret < 0)
9979 return ret;
9980 break;
cd6791b4 9981 case KF_ARG_PTR_TO_RB_NODE:
a40d3632
DM
9982 if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
9983 if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
9984 verbose(env, "rbtree_remove node input must be non-owning ref\n");
9985 return -EINVAL;
9986 }
9987 if (in_rbtree_lock_required_cb(env)) {
9988 verbose(env, "rbtree_remove not allowed in rbtree cb\n");
9989 return -EINVAL;
9990 }
9991 } else {
9992 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
9993 verbose(env, "arg#%d expected pointer to allocated object\n", i);
9994 return -EINVAL;
9995 }
9996 if (!reg->ref_obj_id) {
9997 verbose(env, "allocated object must be referenced\n");
9998 return -EINVAL;
9999 }
cd6791b4 10000 }
a40d3632 10001
cd6791b4
DM
10002 ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
10003 if (ret < 0)
10004 return ret;
10005 break;
00b85860
KKD
10006 case KF_ARG_PTR_TO_BTF_ID:
10007 /* Only base_type is checked, further checks are done here */
3f00c523 10008 if ((base_type(reg->type) != PTR_TO_BTF_ID ||
fca1aa75 10009 (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
3f00c523
DV
10010 !reg2btf_ids[base_type(reg->type)]) {
10011 verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
10012 verbose(env, "expected %s or socket\n",
10013 reg_type_str(env, base_type(reg->type) |
10014 (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
00b85860
KKD
10015 return -EINVAL;
10016 }
10017 ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
10018 if (ret < 0)
10019 return ret;
10020 break;
10021 case KF_ARG_PTR_TO_MEM:
10022 resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
10023 if (IS_ERR(resolve_ret)) {
10024 verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
10025 i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
10026 return -EINVAL;
10027 }
10028 ret = check_mem_reg(env, reg, regno, type_size);
10029 if (ret < 0)
10030 return ret;
10031 break;
10032 case KF_ARG_PTR_TO_MEM_SIZE:
66e3a13e
JK
10033 {
10034 struct bpf_reg_state *size_reg = &regs[regno + 1];
10035 const struct btf_param *size_arg = &args[i + 1];
10036
10037 ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
00b85860
KKD
10038 if (ret < 0) {
10039 verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
10040 return ret;
10041 }
66e3a13e
JK
10042
10043 if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
10044 if (meta->arg_constant.found) {
10045 verbose(env, "verifier internal error: only one constant argument permitted\n");
10046 return -EFAULT;
10047 }
10048 if (!tnum_is_const(size_reg->var_off)) {
10049 verbose(env, "R%d must be a known constant\n", regno + 1);
10050 return -EINVAL;
10051 }
10052 meta->arg_constant.found = true;
10053 meta->arg_constant.value = size_reg->var_off.value;
10054 }
10055
10056 /* Skip next '__sz' or '__szk' argument */
00b85860
KKD
10057 i++;
10058 break;
66e3a13e 10059 }
5d92ddc3
DM
10060 case KF_ARG_PTR_TO_CALLBACK:
10061 meta->subprogno = reg->subprogno;
10062 break;
00b85860
KKD
10063 }
10064 }
10065
10066 if (is_kfunc_release(meta) && !meta->release_regno) {
10067 verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
10068 func_name);
10069 return -EINVAL;
10070 }
10071
10072 return 0;
10073}
10074
5c073f26
KKD
10075static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
10076 int *insn_idx_p)
e6ac2450
MKL
10077{
10078 const struct btf_type *t, *func, *func_proto, *ptr_type;
6a3cd331 10079 u32 i, nargs, func_id, ptr_type_id, release_ref_obj_id;
e6ac2450
MKL
10080 struct bpf_reg_state *regs = cur_regs(env);
10081 const char *func_name, *ptr_type_name;
9bb00b28 10082 bool sleepable, rcu_lock, rcu_unlock;
00b85860 10083 struct bpf_kfunc_call_arg_meta meta;
5c073f26 10084 int err, insn_idx = *insn_idx_p;
e6ac2450 10085 const struct btf_param *args;
a35b9af4 10086 const struct btf_type *ret_t;
2357672c 10087 struct btf *desc_btf;
a4703e31 10088 u32 *kfunc_flags;
e6ac2450 10089
a5d82727
KKD
10090 /* skip for now, but return error when we find this in fixup_kfunc_call */
10091 if (!insn->imm)
10092 return 0;
10093
43bf0878 10094 desc_btf = find_kfunc_desc_btf(env, insn->off);
2357672c
KKD
10095 if (IS_ERR(desc_btf))
10096 return PTR_ERR(desc_btf);
10097
e6ac2450 10098 func_id = insn->imm;
2357672c
KKD
10099 func = btf_type_by_id(desc_btf, func_id);
10100 func_name = btf_name_by_offset(desc_btf, func->name_off);
10101 func_proto = btf_type_by_id(desc_btf, func->type);
e6ac2450 10102
a4703e31
KKD
10103 kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id);
10104 if (!kfunc_flags) {
e6ac2450
MKL
10105 verbose(env, "calling kernel function %s is not allowed\n",
10106 func_name);
10107 return -EACCES;
10108 }
00b85860
KKD
10109
10110 /* Prepare kfunc call metadata */
10111 memset(&meta, 0, sizeof(meta));
10112 meta.btf = desc_btf;
10113 meta.func_id = func_id;
10114 meta.kfunc_flags = *kfunc_flags;
10115 meta.func_proto = func_proto;
10116 meta.func_name = func_name;
10117
10118 if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
10119 verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
4dd48c6f
AS
10120 return -EACCES;
10121 }
10122
9bb00b28
YS
10123 sleepable = is_kfunc_sleepable(&meta);
10124 if (sleepable && !env->prog->aux->sleepable) {
00b85860
KKD
10125 verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
10126 return -EACCES;
10127 }
eb1f7f71 10128
9bb00b28
YS
10129 rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
10130 rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
9bb00b28
YS
10131
10132 if (env->cur_state->active_rcu_lock) {
10133 struct bpf_func_state *state;
10134 struct bpf_reg_state *reg;
10135
10136 if (rcu_lock) {
10137 verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
10138 return -EINVAL;
10139 } else if (rcu_unlock) {
10140 bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
10141 if (reg->type & MEM_RCU) {
fca1aa75 10142 reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
9bb00b28
YS
10143 reg->type |= PTR_UNTRUSTED;
10144 }
10145 }));
10146 env->cur_state->active_rcu_lock = false;
10147 } else if (sleepable) {
10148 verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
10149 return -EACCES;
10150 }
10151 } else if (rcu_lock) {
10152 env->cur_state->active_rcu_lock = true;
10153 } else if (rcu_unlock) {
10154 verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
10155 return -EINVAL;
10156 }
10157
e6ac2450 10158 /* Check the arguments */
1d18feb2 10159 err = check_kfunc_args(env, &meta, insn_idx);
5c073f26 10160 if (err < 0)
e6ac2450 10161 return err;
5c073f26 10162 /* In case of release function, we get register number of refcounted
00b85860 10163 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
5c073f26 10164 */
00b85860
KKD
10165 if (meta.release_regno) {
10166 err = release_reference(env, regs[meta.release_regno].ref_obj_id);
5c073f26
KKD
10167 if (err) {
10168 verbose(env, "kfunc %s#%d reference has not been acquired before\n",
10169 func_name, func_id);
10170 return err;
10171 }
10172 }
e6ac2450 10173
6a3cd331 10174 if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front] ||
bd1279ae
DM
10175 meta.func_id == special_kfunc_list[KF_bpf_list_push_back] ||
10176 meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
6a3cd331
DM
10177 release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
10178 err = ref_convert_owning_non_owning(env, release_ref_obj_id);
10179 if (err) {
10180 verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
10181 func_name, func_id);
10182 return err;
10183 }
10184
10185 err = release_reference(env, release_ref_obj_id);
10186 if (err) {
10187 verbose(env, "kfunc %s#%d reference has not been acquired before\n",
10188 func_name, func_id);
10189 return err;
10190 }
10191 }
10192
5d92ddc3
DM
10193 if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
10194 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
10195 set_rbtree_add_callback_state);
10196 if (err) {
10197 verbose(env, "kfunc %s#%d failed callback verification\n",
10198 func_name, func_id);
10199 return err;
10200 }
10201 }
10202
e6ac2450
MKL
10203 for (i = 0; i < CALLER_SAVED_REGS; i++)
10204 mark_reg_not_init(env, regs, caller_saved[i]);
10205
10206 /* Check return type */
2357672c 10207 t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
5c073f26 10208
00b85860 10209 if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
958cf2e2
KKD
10210 /* Only exception is bpf_obj_new_impl */
10211 if (meta.btf != btf_vmlinux || meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl]) {
10212 verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
10213 return -EINVAL;
10214 }
5c073f26
KKD
10215 }
10216
e6ac2450
MKL
10217 if (btf_type_is_scalar(t)) {
10218 mark_reg_unknown(env, regs, BPF_REG_0);
10219 mark_btf_func_reg_size(env, BPF_REG_0, t->size);
10220 } else if (btf_type_is_ptr(t)) {
958cf2e2
KKD
10221 ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
10222
10223 if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
10224 if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
958cf2e2
KKD
10225 struct btf *ret_btf;
10226 u32 ret_btf_id;
10227
e181d3f1
KKD
10228 if (unlikely(!bpf_global_ma_set))
10229 return -ENOMEM;
10230
958cf2e2
KKD
10231 if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
10232 verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
10233 return -EINVAL;
10234 }
10235
10236 ret_btf = env->prog->aux->btf;
10237 ret_btf_id = meta.arg_constant.value;
10238
10239 /* This may be NULL due to user not supplying a BTF */
10240 if (!ret_btf) {
10241 verbose(env, "bpf_obj_new requires prog BTF\n");
10242 return -EINVAL;
10243 }
10244
10245 ret_t = btf_type_by_id(ret_btf, ret_btf_id);
10246 if (!ret_t || !__btf_type_is_struct(ret_t)) {
10247 verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
10248 return -EINVAL;
10249 }
10250
10251 mark_reg_known_zero(env, regs, BPF_REG_0);
10252 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
10253 regs[BPF_REG_0].btf = ret_btf;
10254 regs[BPF_REG_0].btf_id = ret_btf_id;
10255
10256 env->insn_aux_data[insn_idx].obj_new_size = ret_t->size;
10257 env->insn_aux_data[insn_idx].kptr_struct_meta =
10258 btf_find_struct_meta(ret_btf, ret_btf_id);
ac9f0605
KKD
10259 } else if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
10260 env->insn_aux_data[insn_idx].kptr_struct_meta =
10261 btf_find_struct_meta(meta.arg_obj_drop.btf,
10262 meta.arg_obj_drop.btf_id);
8cab76ec
KKD
10263 } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
10264 meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
10265 struct btf_field *field = meta.arg_list_head.field;
10266
a40d3632
DM
10267 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
10268 } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
10269 meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
10270 struct btf_field *field = meta.arg_rbtree_root.field;
10271
10272 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
fd264ca0
YS
10273 } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
10274 mark_reg_known_zero(env, regs, BPF_REG_0);
10275 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
10276 regs[BPF_REG_0].btf = desc_btf;
10277 regs[BPF_REG_0].btf_id = meta.ret_btf_id;
a35b9af4
YS
10278 } else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
10279 ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
10280 if (!ret_t || !btf_type_is_struct(ret_t)) {
10281 verbose(env,
10282 "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
10283 return -EINVAL;
10284 }
10285
10286 mark_reg_known_zero(env, regs, BPF_REG_0);
10287 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
10288 regs[BPF_REG_0].btf = desc_btf;
10289 regs[BPF_REG_0].btf_id = meta.arg_constant.value;
66e3a13e
JK
10290 } else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
10291 meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
10292 enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
10293
10294 mark_reg_known_zero(env, regs, BPF_REG_0);
10295
10296 if (!meta.arg_constant.found) {
10297 verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
10298 return -EFAULT;
10299 }
10300
10301 regs[BPF_REG_0].mem_size = meta.arg_constant.value;
10302
10303 /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
10304 regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
10305
10306 if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
10307 regs[BPF_REG_0].type |= MEM_RDONLY;
10308 } else {
10309 /* this will set env->seen_direct_write to true */
10310 if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
10311 verbose(env, "the prog does not allow writes to packet data\n");
10312 return -EINVAL;
10313 }
10314 }
10315
10316 if (!meta.initialized_dynptr.id) {
10317 verbose(env, "verifier internal error: no dynptr id\n");
10318 return -EFAULT;
10319 }
10320 regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
10321
10322 /* we don't need to set BPF_REG_0's ref obj id
10323 * because packet slices are not refcounted (see
10324 * dynptr_type_refcounted)
10325 */
958cf2e2
KKD
10326 } else {
10327 verbose(env, "kernel function %s unhandled dynamic return type\n",
10328 meta.func_name);
10329 return -EFAULT;
10330 }
10331 } else if (!__btf_type_is_struct(ptr_type)) {
eb1f7f71
BT
10332 if (!meta.r0_size) {
10333 ptr_type_name = btf_name_by_offset(desc_btf,
10334 ptr_type->name_off);
10335 verbose(env,
10336 "kernel function %s returns pointer type %s %s is not supported\n",
10337 func_name,
10338 btf_type_str(ptr_type),
10339 ptr_type_name);
10340 return -EINVAL;
10341 }
10342
10343 mark_reg_known_zero(env, regs, BPF_REG_0);
10344 regs[BPF_REG_0].type = PTR_TO_MEM;
10345 regs[BPF_REG_0].mem_size = meta.r0_size;
10346
10347 if (meta.r0_rdonly)
10348 regs[BPF_REG_0].type |= MEM_RDONLY;
10349
10350 /* Ensures we don't access the memory after a release_reference() */
10351 if (meta.ref_obj_id)
10352 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
10353 } else {
10354 mark_reg_known_zero(env, regs, BPF_REG_0);
10355 regs[BPF_REG_0].btf = desc_btf;
10356 regs[BPF_REG_0].type = PTR_TO_BTF_ID;
10357 regs[BPF_REG_0].btf_id = ptr_type_id;
e6ac2450 10358 }
958cf2e2 10359
00b85860 10360 if (is_kfunc_ret_null(&meta)) {
5c073f26
KKD
10361 regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
10362 /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
10363 regs[BPF_REG_0].id = ++env->id_gen;
10364 }
e6ac2450 10365 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
00b85860 10366 if (is_kfunc_acquire(&meta)) {
5c073f26
KKD
10367 int id = acquire_reference_state(env, insn_idx);
10368
10369 if (id < 0)
10370 return id;
00b85860
KKD
10371 if (is_kfunc_ret_null(&meta))
10372 regs[BPF_REG_0].id = id;
5c073f26 10373 regs[BPF_REG_0].ref_obj_id = id;
a40d3632
DM
10374 } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
10375 ref_set_non_owning(env, &regs[BPF_REG_0]);
5c073f26 10376 }
a40d3632
DM
10377
10378 if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove])
10379 invalidate_non_owning_refs(env);
10380
00b85860
KKD
10381 if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
10382 regs[BPF_REG_0].id = ++env->id_gen;
e6ac2450
MKL
10383 } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
10384
10385 nargs = btf_type_vlen(func_proto);
10386 args = (const struct btf_param *)(func_proto + 1);
10387 for (i = 0; i < nargs; i++) {
10388 u32 regno = i + 1;
10389
2357672c 10390 t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
e6ac2450
MKL
10391 if (btf_type_is_ptr(t))
10392 mark_btf_func_reg_size(env, regno, sizeof(void *));
10393 else
10394 /* scalar. ensured by btf_check_kfunc_arg_match() */
10395 mark_btf_func_reg_size(env, regno, t->size);
10396 }
10397
10398 return 0;
10399}
10400
b03c9f9f
EC
10401static bool signed_add_overflows(s64 a, s64 b)
10402{
10403 /* Do the add in u64, where overflow is well-defined */
10404 s64 res = (s64)((u64)a + (u64)b);
10405
10406 if (b < 0)
10407 return res > a;
10408 return res < a;
10409}
10410
bc895e8b 10411static bool signed_add32_overflows(s32 a, s32 b)
3f50f132
JF
10412{
10413 /* Do the add in u32, where overflow is well-defined */
10414 s32 res = (s32)((u32)a + (u32)b);
10415
10416 if (b < 0)
10417 return res > a;
10418 return res < a;
10419}
10420
bc895e8b 10421static bool signed_sub_overflows(s64 a, s64 b)
b03c9f9f
EC
10422{
10423 /* Do the sub in u64, where overflow is well-defined */
10424 s64 res = (s64)((u64)a - (u64)b);
10425
10426 if (b < 0)
10427 return res < a;
10428 return res > a;
969bf05e
AS
10429}
10430
3f50f132
JF
10431static bool signed_sub32_overflows(s32 a, s32 b)
10432{
bc895e8b 10433 /* Do the sub in u32, where overflow is well-defined */
3f50f132
JF
10434 s32 res = (s32)((u32)a - (u32)b);
10435
10436 if (b < 0)
10437 return res < a;
10438 return res > a;
10439}
10440
bb7f0f98
AS
10441static bool check_reg_sane_offset(struct bpf_verifier_env *env,
10442 const struct bpf_reg_state *reg,
10443 enum bpf_reg_type type)
10444{
10445 bool known = tnum_is_const(reg->var_off);
10446 s64 val = reg->var_off.value;
10447 s64 smin = reg->smin_value;
10448
10449 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
10450 verbose(env, "math between %s pointer and %lld is not allowed\n",
c25b2ae1 10451 reg_type_str(env, type), val);
bb7f0f98
AS
10452 return false;
10453 }
10454
10455 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
10456 verbose(env, "%s pointer offset %d is not allowed\n",
c25b2ae1 10457 reg_type_str(env, type), reg->off);
bb7f0f98
AS
10458 return false;
10459 }
10460
10461 if (smin == S64_MIN) {
10462 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
c25b2ae1 10463 reg_type_str(env, type));
bb7f0f98
AS
10464 return false;
10465 }
10466
10467 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
10468 verbose(env, "value %lld makes %s pointer be out of bounds\n",
c25b2ae1 10469 smin, reg_type_str(env, type));
bb7f0f98
AS
10470 return false;
10471 }
10472
10473 return true;
10474}
10475
a6aaece0
DB
10476enum {
10477 REASON_BOUNDS = -1,
10478 REASON_TYPE = -2,
10479 REASON_PATHS = -3,
10480 REASON_LIMIT = -4,
10481 REASON_STACK = -5,
10482};
10483
979d63d5 10484static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
bb01a1bb 10485 u32 *alu_limit, bool mask_to_left)
979d63d5 10486{
7fedb63a 10487 u32 max = 0, ptr_limit = 0;
979d63d5
DB
10488
10489 switch (ptr_reg->type) {
10490 case PTR_TO_STACK:
1b1597e6 10491 /* Offset 0 is out-of-bounds, but acceptable start for the
7fedb63a
DB
10492 * left direction, see BPF_REG_FP. Also, unknown scalar
10493 * offset where we would need to deal with min/max bounds is
10494 * currently prohibited for unprivileged.
1b1597e6
PK
10495 */
10496 max = MAX_BPF_STACK + mask_to_left;
7fedb63a 10497 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
b658bbb8 10498 break;
979d63d5 10499 case PTR_TO_MAP_VALUE:
1b1597e6 10500 max = ptr_reg->map_ptr->value_size;
7fedb63a
DB
10501 ptr_limit = (mask_to_left ?
10502 ptr_reg->smin_value :
10503 ptr_reg->umax_value) + ptr_reg->off;
b658bbb8 10504 break;
979d63d5 10505 default:
a6aaece0 10506 return REASON_TYPE;
979d63d5 10507 }
b658bbb8
DB
10508
10509 if (ptr_limit >= max)
a6aaece0 10510 return REASON_LIMIT;
b658bbb8
DB
10511 *alu_limit = ptr_limit;
10512 return 0;
979d63d5
DB
10513}
10514
d3bd7413
DB
10515static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
10516 const struct bpf_insn *insn)
10517{
2c78ee89 10518 return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
d3bd7413
DB
10519}
10520
10521static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
10522 u32 alu_state, u32 alu_limit)
10523{
10524 /* If we arrived here from different branches with different
10525 * state or limits to sanitize, then this won't work.
10526 */
10527 if (aux->alu_state &&
10528 (aux->alu_state != alu_state ||
10529 aux->alu_limit != alu_limit))
a6aaece0 10530 return REASON_PATHS;
d3bd7413 10531
e6ac5933 10532 /* Corresponding fixup done in do_misc_fixups(). */
d3bd7413
DB
10533 aux->alu_state = alu_state;
10534 aux->alu_limit = alu_limit;
10535 return 0;
10536}
10537
10538static int sanitize_val_alu(struct bpf_verifier_env *env,
10539 struct bpf_insn *insn)
10540{
10541 struct bpf_insn_aux_data *aux = cur_aux(env);
10542
10543 if (can_skip_alu_sanitation(env, insn))
10544 return 0;
10545
10546 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
10547}
10548
f5288193
DB
10549static bool sanitize_needed(u8 opcode)
10550{
10551 return opcode == BPF_ADD || opcode == BPF_SUB;
10552}
10553
3d0220f6
DB
10554struct bpf_sanitize_info {
10555 struct bpf_insn_aux_data aux;
bb01a1bb 10556 bool mask_to_left;
3d0220f6
DB
10557};
10558
9183671a
DB
10559static struct bpf_verifier_state *
10560sanitize_speculative_path(struct bpf_verifier_env *env,
10561 const struct bpf_insn *insn,
10562 u32 next_idx, u32 curr_idx)
10563{
10564 struct bpf_verifier_state *branch;
10565 struct bpf_reg_state *regs;
10566
10567 branch = push_stack(env, next_idx, curr_idx, true);
10568 if (branch && insn) {
10569 regs = branch->frame[branch->curframe]->regs;
10570 if (BPF_SRC(insn->code) == BPF_K) {
10571 mark_reg_unknown(env, regs, insn->dst_reg);
10572 } else if (BPF_SRC(insn->code) == BPF_X) {
10573 mark_reg_unknown(env, regs, insn->dst_reg);
10574 mark_reg_unknown(env, regs, insn->src_reg);
10575 }
10576 }
10577 return branch;
10578}
10579
979d63d5
DB
10580static int sanitize_ptr_alu(struct bpf_verifier_env *env,
10581 struct bpf_insn *insn,
10582 const struct bpf_reg_state *ptr_reg,
6f55b2f2 10583 const struct bpf_reg_state *off_reg,
979d63d5 10584 struct bpf_reg_state *dst_reg,
3d0220f6 10585 struct bpf_sanitize_info *info,
7fedb63a 10586 const bool commit_window)
979d63d5 10587{
3d0220f6 10588 struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
979d63d5 10589 struct bpf_verifier_state *vstate = env->cur_state;
801c6058 10590 bool off_is_imm = tnum_is_const(off_reg->var_off);
6f55b2f2 10591 bool off_is_neg = off_reg->smin_value < 0;
979d63d5
DB
10592 bool ptr_is_dst_reg = ptr_reg == dst_reg;
10593 u8 opcode = BPF_OP(insn->code);
10594 u32 alu_state, alu_limit;
10595 struct bpf_reg_state tmp;
10596 bool ret;
f232326f 10597 int err;
979d63d5 10598
d3bd7413 10599 if (can_skip_alu_sanitation(env, insn))
979d63d5
DB
10600 return 0;
10601
10602 /* We already marked aux for masking from non-speculative
10603 * paths, thus we got here in the first place. We only care
10604 * to explore bad access from here.
10605 */
10606 if (vstate->speculative)
10607 goto do_sim;
10608
bb01a1bb
DB
10609 if (!commit_window) {
10610 if (!tnum_is_const(off_reg->var_off) &&
10611 (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
10612 return REASON_BOUNDS;
10613
10614 info->mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
10615 (opcode == BPF_SUB && !off_is_neg);
10616 }
10617
10618 err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
f232326f
PK
10619 if (err < 0)
10620 return err;
10621
7fedb63a
DB
10622 if (commit_window) {
10623 /* In commit phase we narrow the masking window based on
10624 * the observed pointer move after the simulated operation.
10625 */
3d0220f6
DB
10626 alu_state = info->aux.alu_state;
10627 alu_limit = abs(info->aux.alu_limit - alu_limit);
7fedb63a
DB
10628 } else {
10629 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
801c6058 10630 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
7fedb63a
DB
10631 alu_state |= ptr_is_dst_reg ?
10632 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
e042aa53
DB
10633
10634 /* Limit pruning on unknown scalars to enable deep search for
10635 * potential masking differences from other program paths.
10636 */
10637 if (!off_is_imm)
10638 env->explore_alu_limits = true;
7fedb63a
DB
10639 }
10640
f232326f
PK
10641 err = update_alu_sanitation_state(aux, alu_state, alu_limit);
10642 if (err < 0)
10643 return err;
979d63d5 10644do_sim:
7fedb63a
DB
10645 /* If we're in commit phase, we're done here given we already
10646 * pushed the truncated dst_reg into the speculative verification
10647 * stack.
a7036191
DB
10648 *
10649 * Also, when register is a known constant, we rewrite register-based
10650 * operation to immediate-based, and thus do not need masking (and as
10651 * a consequence, do not need to simulate the zero-truncation either).
7fedb63a 10652 */
a7036191 10653 if (commit_window || off_is_imm)
7fedb63a
DB
10654 return 0;
10655
979d63d5
DB
10656 /* Simulate and find potential out-of-bounds access under
10657 * speculative execution from truncation as a result of
10658 * masking when off was not within expected range. If off
10659 * sits in dst, then we temporarily need to move ptr there
10660 * to simulate dst (== 0) +/-= ptr. Needed, for example,
10661 * for cases where we use K-based arithmetic in one direction
10662 * and truncated reg-based in the other in order to explore
10663 * bad access.
10664 */
10665 if (!ptr_is_dst_reg) {
10666 tmp = *dst_reg;
71f656a5 10667 copy_register_state(dst_reg, ptr_reg);
979d63d5 10668 }
9183671a
DB
10669 ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
10670 env->insn_idx);
0803278b 10671 if (!ptr_is_dst_reg && ret)
979d63d5 10672 *dst_reg = tmp;
a6aaece0
DB
10673 return !ret ? REASON_STACK : 0;
10674}
10675
fe9a5ca7
DB
10676static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
10677{
10678 struct bpf_verifier_state *vstate = env->cur_state;
10679
10680 /* If we simulate paths under speculation, we don't update the
10681 * insn as 'seen' such that when we verify unreachable paths in
10682 * the non-speculative domain, sanitize_dead_code() can still
10683 * rewrite/sanitize them.
10684 */
10685 if (!vstate->speculative)
10686 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
10687}
10688
a6aaece0
DB
10689static int sanitize_err(struct bpf_verifier_env *env,
10690 const struct bpf_insn *insn, int reason,
10691 const struct bpf_reg_state *off_reg,
10692 const struct bpf_reg_state *dst_reg)
10693{
10694 static const char *err = "pointer arithmetic with it prohibited for !root";
10695 const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
10696 u32 dst = insn->dst_reg, src = insn->src_reg;
10697
10698 switch (reason) {
10699 case REASON_BOUNDS:
10700 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
10701 off_reg == dst_reg ? dst : src, err);
10702 break;
10703 case REASON_TYPE:
10704 verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
10705 off_reg == dst_reg ? src : dst, err);
10706 break;
10707 case REASON_PATHS:
10708 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
10709 dst, op, err);
10710 break;
10711 case REASON_LIMIT:
10712 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
10713 dst, op, err);
10714 break;
10715 case REASON_STACK:
10716 verbose(env, "R%d could not be pushed for speculative verification, %s\n",
10717 dst, err);
10718 break;
10719 default:
10720 verbose(env, "verifier internal error: unknown reason (%d)\n",
10721 reason);
10722 break;
10723 }
10724
10725 return -EACCES;
979d63d5
DB
10726}
10727
01f810ac
AM
10728/* check that stack access falls within stack limits and that 'reg' doesn't
10729 * have a variable offset.
10730 *
10731 * Variable offset is prohibited for unprivileged mode for simplicity since it
10732 * requires corresponding support in Spectre masking for stack ALU. See also
10733 * retrieve_ptr_limit().
10734 *
10735 *
10736 * 'off' includes 'reg->off'.
10737 */
10738static int check_stack_access_for_ptr_arithmetic(
10739 struct bpf_verifier_env *env,
10740 int regno,
10741 const struct bpf_reg_state *reg,
10742 int off)
10743{
10744 if (!tnum_is_const(reg->var_off)) {
10745 char tn_buf[48];
10746
10747 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
10748 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
10749 regno, tn_buf, off);
10750 return -EACCES;
10751 }
10752
10753 if (off >= 0 || off < -MAX_BPF_STACK) {
10754 verbose(env, "R%d stack pointer arithmetic goes out of range, "
10755 "prohibited for !root; off=%d\n", regno, off);
10756 return -EACCES;
10757 }
10758
10759 return 0;
10760}
10761
073815b7
DB
10762static int sanitize_check_bounds(struct bpf_verifier_env *env,
10763 const struct bpf_insn *insn,
10764 const struct bpf_reg_state *dst_reg)
10765{
10766 u32 dst = insn->dst_reg;
10767
10768 /* For unprivileged we require that resulting offset must be in bounds
10769 * in order to be able to sanitize access later on.
10770 */
10771 if (env->bypass_spec_v1)
10772 return 0;
10773
10774 switch (dst_reg->type) {
10775 case PTR_TO_STACK:
10776 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
10777 dst_reg->off + dst_reg->var_off.value))
10778 return -EACCES;
10779 break;
10780 case PTR_TO_MAP_VALUE:
61df10c7 10781 if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
073815b7
DB
10782 verbose(env, "R%d pointer arithmetic of map value goes out of range, "
10783 "prohibited for !root\n", dst);
10784 return -EACCES;
10785 }
10786 break;
10787 default:
10788 break;
10789 }
10790
10791 return 0;
10792}
01f810ac 10793
f1174f77 10794/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
f1174f77
EC
10795 * Caller should also handle BPF_MOV case separately.
10796 * If we return -EACCES, caller may want to try again treating pointer as a
10797 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
10798 */
10799static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
10800 struct bpf_insn *insn,
10801 const struct bpf_reg_state *ptr_reg,
10802 const struct bpf_reg_state *off_reg)
969bf05e 10803{
f4d7e40a
AS
10804 struct bpf_verifier_state *vstate = env->cur_state;
10805 struct bpf_func_state *state = vstate->frame[vstate->curframe];
10806 struct bpf_reg_state *regs = state->regs, *dst_reg;
f1174f77 10807 bool known = tnum_is_const(off_reg->var_off);
b03c9f9f
EC
10808 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
10809 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
10810 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
10811 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
3d0220f6 10812 struct bpf_sanitize_info info = {};
969bf05e 10813 u8 opcode = BPF_OP(insn->code);
24c109bb 10814 u32 dst = insn->dst_reg;
979d63d5 10815 int ret;
969bf05e 10816
f1174f77 10817 dst_reg = &regs[dst];
969bf05e 10818
6f16101e
DB
10819 if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
10820 smin_val > smax_val || umin_val > umax_val) {
10821 /* Taint dst register if offset had invalid bounds derived from
10822 * e.g. dead branches.
10823 */
f54c7898 10824 __mark_reg_unknown(env, dst_reg);
6f16101e 10825 return 0;
f1174f77
EC
10826 }
10827
10828 if (BPF_CLASS(insn->code) != BPF_ALU64) {
10829 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
6c693541
YS
10830 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
10831 __mark_reg_unknown(env, dst_reg);
10832 return 0;
10833 }
10834
82abbf8d
AS
10835 verbose(env,
10836 "R%d 32-bit pointer arithmetic prohibited\n",
10837 dst);
f1174f77 10838 return -EACCES;
969bf05e
AS
10839 }
10840
c25b2ae1 10841 if (ptr_reg->type & PTR_MAYBE_NULL) {
aad2eeaf 10842 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
c25b2ae1 10843 dst, reg_type_str(env, ptr_reg->type));
f1174f77 10844 return -EACCES;
c25b2ae1
HL
10845 }
10846
10847 switch (base_type(ptr_reg->type)) {
aad2eeaf 10848 case CONST_PTR_TO_MAP:
7c696732
YS
10849 /* smin_val represents the known value */
10850 if (known && smin_val == 0 && opcode == BPF_ADD)
10851 break;
8731745e 10852 fallthrough;
aad2eeaf 10853 case PTR_TO_PACKET_END:
c64b7983 10854 case PTR_TO_SOCKET:
46f8bc92 10855 case PTR_TO_SOCK_COMMON:
655a51e5 10856 case PTR_TO_TCP_SOCK:
fada7fdc 10857 case PTR_TO_XDP_SOCK:
aad2eeaf 10858 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
c25b2ae1 10859 dst, reg_type_str(env, ptr_reg->type));
f1174f77 10860 return -EACCES;
aad2eeaf
JS
10861 default:
10862 break;
f1174f77
EC
10863 }
10864
10865 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
10866 * The id may be overwritten later if we create a new variable offset.
969bf05e 10867 */
f1174f77
EC
10868 dst_reg->type = ptr_reg->type;
10869 dst_reg->id = ptr_reg->id;
969bf05e 10870
bb7f0f98
AS
10871 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
10872 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
10873 return -EINVAL;
10874
3f50f132
JF
10875 /* pointer types do not carry 32-bit bounds at the moment. */
10876 __mark_reg32_unbounded(dst_reg);
10877
7fedb63a
DB
10878 if (sanitize_needed(opcode)) {
10879 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
3d0220f6 10880 &info, false);
a6aaece0
DB
10881 if (ret < 0)
10882 return sanitize_err(env, insn, ret, off_reg, dst_reg);
7fedb63a 10883 }
a6aaece0 10884
f1174f77
EC
10885 switch (opcode) {
10886 case BPF_ADD:
10887 /* We can take a fixed offset as long as it doesn't overflow
10888 * the s32 'off' field
969bf05e 10889 */
b03c9f9f
EC
10890 if (known && (ptr_reg->off + smin_val ==
10891 (s64)(s32)(ptr_reg->off + smin_val))) {
f1174f77 10892 /* pointer += K. Accumulate it into fixed offset */
b03c9f9f
EC
10893 dst_reg->smin_value = smin_ptr;
10894 dst_reg->smax_value = smax_ptr;
10895 dst_reg->umin_value = umin_ptr;
10896 dst_reg->umax_value = umax_ptr;
f1174f77 10897 dst_reg->var_off = ptr_reg->var_off;
b03c9f9f 10898 dst_reg->off = ptr_reg->off + smin_val;
0962590e 10899 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
10900 break;
10901 }
f1174f77
EC
10902 /* A new variable offset is created. Note that off_reg->off
10903 * == 0, since it's a scalar.
10904 * dst_reg gets the pointer type and since some positive
10905 * integer value was added to the pointer, give it a new 'id'
10906 * if it's a PTR_TO_PACKET.
10907 * this creates a new 'base' pointer, off_reg (variable) gets
10908 * added into the variable offset, and we copy the fixed offset
10909 * from ptr_reg.
969bf05e 10910 */
b03c9f9f
EC
10911 if (signed_add_overflows(smin_ptr, smin_val) ||
10912 signed_add_overflows(smax_ptr, smax_val)) {
10913 dst_reg->smin_value = S64_MIN;
10914 dst_reg->smax_value = S64_MAX;
10915 } else {
10916 dst_reg->smin_value = smin_ptr + smin_val;
10917 dst_reg->smax_value = smax_ptr + smax_val;
10918 }
10919 if (umin_ptr + umin_val < umin_ptr ||
10920 umax_ptr + umax_val < umax_ptr) {
10921 dst_reg->umin_value = 0;
10922 dst_reg->umax_value = U64_MAX;
10923 } else {
10924 dst_reg->umin_value = umin_ptr + umin_val;
10925 dst_reg->umax_value = umax_ptr + umax_val;
10926 }
f1174f77
EC
10927 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
10928 dst_reg->off = ptr_reg->off;
0962590e 10929 dst_reg->raw = ptr_reg->raw;
de8f3a83 10930 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
10931 dst_reg->id = ++env->id_gen;
10932 /* something was added to pkt_ptr, set range to zero */
22dc4a0f 10933 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
f1174f77
EC
10934 }
10935 break;
10936 case BPF_SUB:
10937 if (dst_reg == off_reg) {
10938 /* scalar -= pointer. Creates an unknown scalar */
82abbf8d
AS
10939 verbose(env, "R%d tried to subtract pointer from scalar\n",
10940 dst);
f1174f77
EC
10941 return -EACCES;
10942 }
10943 /* We don't allow subtraction from FP, because (according to
10944 * test_verifier.c test "invalid fp arithmetic", JITs might not
10945 * be able to deal with it.
969bf05e 10946 */
f1174f77 10947 if (ptr_reg->type == PTR_TO_STACK) {
82abbf8d
AS
10948 verbose(env, "R%d subtraction from stack pointer prohibited\n",
10949 dst);
f1174f77
EC
10950 return -EACCES;
10951 }
b03c9f9f
EC
10952 if (known && (ptr_reg->off - smin_val ==
10953 (s64)(s32)(ptr_reg->off - smin_val))) {
f1174f77 10954 /* pointer -= K. Subtract it from fixed offset */
b03c9f9f
EC
10955 dst_reg->smin_value = smin_ptr;
10956 dst_reg->smax_value = smax_ptr;
10957 dst_reg->umin_value = umin_ptr;
10958 dst_reg->umax_value = umax_ptr;
f1174f77
EC
10959 dst_reg->var_off = ptr_reg->var_off;
10960 dst_reg->id = ptr_reg->id;
b03c9f9f 10961 dst_reg->off = ptr_reg->off - smin_val;
0962590e 10962 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
10963 break;
10964 }
f1174f77
EC
10965 /* A new variable offset is created. If the subtrahend is known
10966 * nonnegative, then any reg->range we had before is still good.
969bf05e 10967 */
b03c9f9f
EC
10968 if (signed_sub_overflows(smin_ptr, smax_val) ||
10969 signed_sub_overflows(smax_ptr, smin_val)) {
10970 /* Overflow possible, we know nothing */
10971 dst_reg->smin_value = S64_MIN;
10972 dst_reg->smax_value = S64_MAX;
10973 } else {
10974 dst_reg->smin_value = smin_ptr - smax_val;
10975 dst_reg->smax_value = smax_ptr - smin_val;
10976 }
10977 if (umin_ptr < umax_val) {
10978 /* Overflow possible, we know nothing */
10979 dst_reg->umin_value = 0;
10980 dst_reg->umax_value = U64_MAX;
10981 } else {
10982 /* Cannot overflow (as long as bounds are consistent) */
10983 dst_reg->umin_value = umin_ptr - umax_val;
10984 dst_reg->umax_value = umax_ptr - umin_val;
10985 }
f1174f77
EC
10986 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
10987 dst_reg->off = ptr_reg->off;
0962590e 10988 dst_reg->raw = ptr_reg->raw;
de8f3a83 10989 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
10990 dst_reg->id = ++env->id_gen;
10991 /* something was added to pkt_ptr, set range to zero */
b03c9f9f 10992 if (smin_val < 0)
22dc4a0f 10993 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
43188702 10994 }
f1174f77
EC
10995 break;
10996 case BPF_AND:
10997 case BPF_OR:
10998 case BPF_XOR:
82abbf8d
AS
10999 /* bitwise ops on pointers are troublesome, prohibit. */
11000 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
11001 dst, bpf_alu_string[opcode >> 4]);
f1174f77
EC
11002 return -EACCES;
11003 default:
11004 /* other operators (e.g. MUL,LSH) produce non-pointer results */
82abbf8d
AS
11005 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
11006 dst, bpf_alu_string[opcode >> 4]);
f1174f77 11007 return -EACCES;
43188702
JF
11008 }
11009
bb7f0f98
AS
11010 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
11011 return -EINVAL;
3844d153 11012 reg_bounds_sync(dst_reg);
073815b7
DB
11013 if (sanitize_check_bounds(env, insn, dst_reg) < 0)
11014 return -EACCES;
7fedb63a
DB
11015 if (sanitize_needed(opcode)) {
11016 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
3d0220f6 11017 &info, true);
7fedb63a
DB
11018 if (ret < 0)
11019 return sanitize_err(env, insn, ret, off_reg, dst_reg);
0d6303db
DB
11020 }
11021
43188702
JF
11022 return 0;
11023}
11024
3f50f132
JF
11025static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
11026 struct bpf_reg_state *src_reg)
11027{
11028 s32 smin_val = src_reg->s32_min_value;
11029 s32 smax_val = src_reg->s32_max_value;
11030 u32 umin_val = src_reg->u32_min_value;
11031 u32 umax_val = src_reg->u32_max_value;
11032
11033 if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
11034 signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
11035 dst_reg->s32_min_value = S32_MIN;
11036 dst_reg->s32_max_value = S32_MAX;
11037 } else {
11038 dst_reg->s32_min_value += smin_val;
11039 dst_reg->s32_max_value += smax_val;
11040 }
11041 if (dst_reg->u32_min_value + umin_val < umin_val ||
11042 dst_reg->u32_max_value + umax_val < umax_val) {
11043 dst_reg->u32_min_value = 0;
11044 dst_reg->u32_max_value = U32_MAX;
11045 } else {
11046 dst_reg->u32_min_value += umin_val;
11047 dst_reg->u32_max_value += umax_val;
11048 }
11049}
11050
07cd2631
JF
11051static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
11052 struct bpf_reg_state *src_reg)
11053{
11054 s64 smin_val = src_reg->smin_value;
11055 s64 smax_val = src_reg->smax_value;
11056 u64 umin_val = src_reg->umin_value;
11057 u64 umax_val = src_reg->umax_value;
11058
11059 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
11060 signed_add_overflows(dst_reg->smax_value, smax_val)) {
11061 dst_reg->smin_value = S64_MIN;
11062 dst_reg->smax_value = S64_MAX;
11063 } else {
11064 dst_reg->smin_value += smin_val;
11065 dst_reg->smax_value += smax_val;
11066 }
11067 if (dst_reg->umin_value + umin_val < umin_val ||
11068 dst_reg->umax_value + umax_val < umax_val) {
11069 dst_reg->umin_value = 0;
11070 dst_reg->umax_value = U64_MAX;
11071 } else {
11072 dst_reg->umin_value += umin_val;
11073 dst_reg->umax_value += umax_val;
11074 }
3f50f132
JF
11075}
11076
11077static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
11078 struct bpf_reg_state *src_reg)
11079{
11080 s32 smin_val = src_reg->s32_min_value;
11081 s32 smax_val = src_reg->s32_max_value;
11082 u32 umin_val = src_reg->u32_min_value;
11083 u32 umax_val = src_reg->u32_max_value;
11084
11085 if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
11086 signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
11087 /* Overflow possible, we know nothing */
11088 dst_reg->s32_min_value = S32_MIN;
11089 dst_reg->s32_max_value = S32_MAX;
11090 } else {
11091 dst_reg->s32_min_value -= smax_val;
11092 dst_reg->s32_max_value -= smin_val;
11093 }
11094 if (dst_reg->u32_min_value < umax_val) {
11095 /* Overflow possible, we know nothing */
11096 dst_reg->u32_min_value = 0;
11097 dst_reg->u32_max_value = U32_MAX;
11098 } else {
11099 /* Cannot overflow (as long as bounds are consistent) */
11100 dst_reg->u32_min_value -= umax_val;
11101 dst_reg->u32_max_value -= umin_val;
11102 }
07cd2631
JF
11103}
11104
11105static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
11106 struct bpf_reg_state *src_reg)
11107{
11108 s64 smin_val = src_reg->smin_value;
11109 s64 smax_val = src_reg->smax_value;
11110 u64 umin_val = src_reg->umin_value;
11111 u64 umax_val = src_reg->umax_value;
11112
11113 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
11114 signed_sub_overflows(dst_reg->smax_value, smin_val)) {
11115 /* Overflow possible, we know nothing */
11116 dst_reg->smin_value = S64_MIN;
11117 dst_reg->smax_value = S64_MAX;
11118 } else {
11119 dst_reg->smin_value -= smax_val;
11120 dst_reg->smax_value -= smin_val;
11121 }
11122 if (dst_reg->umin_value < umax_val) {
11123 /* Overflow possible, we know nothing */
11124 dst_reg->umin_value = 0;
11125 dst_reg->umax_value = U64_MAX;
11126 } else {
11127 /* Cannot overflow (as long as bounds are consistent) */
11128 dst_reg->umin_value -= umax_val;
11129 dst_reg->umax_value -= umin_val;
11130 }
3f50f132
JF
11131}
11132
11133static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
11134 struct bpf_reg_state *src_reg)
11135{
11136 s32 smin_val = src_reg->s32_min_value;
11137 u32 umin_val = src_reg->u32_min_value;
11138 u32 umax_val = src_reg->u32_max_value;
11139
11140 if (smin_val < 0 || dst_reg->s32_min_value < 0) {
11141 /* Ain't nobody got time to multiply that sign */
11142 __mark_reg32_unbounded(dst_reg);
11143 return;
11144 }
11145 /* Both values are positive, so we can work with unsigned and
11146 * copy the result to signed (unless it exceeds S32_MAX).
11147 */
11148 if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
11149 /* Potential overflow, we know nothing */
11150 __mark_reg32_unbounded(dst_reg);
11151 return;
11152 }
11153 dst_reg->u32_min_value *= umin_val;
11154 dst_reg->u32_max_value *= umax_val;
11155 if (dst_reg->u32_max_value > S32_MAX) {
11156 /* Overflow possible, we know nothing */
11157 dst_reg->s32_min_value = S32_MIN;
11158 dst_reg->s32_max_value = S32_MAX;
11159 } else {
11160 dst_reg->s32_min_value = dst_reg->u32_min_value;
11161 dst_reg->s32_max_value = dst_reg->u32_max_value;
11162 }
07cd2631
JF
11163}
11164
11165static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
11166 struct bpf_reg_state *src_reg)
11167{
11168 s64 smin_val = src_reg->smin_value;
11169 u64 umin_val = src_reg->umin_value;
11170 u64 umax_val = src_reg->umax_value;
11171
07cd2631
JF
11172 if (smin_val < 0 || dst_reg->smin_value < 0) {
11173 /* Ain't nobody got time to multiply that sign */
3f50f132 11174 __mark_reg64_unbounded(dst_reg);
07cd2631
JF
11175 return;
11176 }
11177 /* Both values are positive, so we can work with unsigned and
11178 * copy the result to signed (unless it exceeds S64_MAX).
11179 */
11180 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
11181 /* Potential overflow, we know nothing */
3f50f132 11182 __mark_reg64_unbounded(dst_reg);
07cd2631
JF
11183 return;
11184 }
11185 dst_reg->umin_value *= umin_val;
11186 dst_reg->umax_value *= umax_val;
11187 if (dst_reg->umax_value > S64_MAX) {
11188 /* Overflow possible, we know nothing */
11189 dst_reg->smin_value = S64_MIN;
11190 dst_reg->smax_value = S64_MAX;
11191 } else {
11192 dst_reg->smin_value = dst_reg->umin_value;
11193 dst_reg->smax_value = dst_reg->umax_value;
11194 }
11195}
11196
3f50f132
JF
11197static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
11198 struct bpf_reg_state *src_reg)
11199{
11200 bool src_known = tnum_subreg_is_const(src_reg->var_off);
11201 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
11202 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
11203 s32 smin_val = src_reg->s32_min_value;
11204 u32 umax_val = src_reg->u32_max_value;
11205
049c4e13
DB
11206 if (src_known && dst_known) {
11207 __mark_reg32_known(dst_reg, var32_off.value);
3f50f132 11208 return;
049c4e13 11209 }
3f50f132
JF
11210
11211 /* We get our minimum from the var_off, since that's inherently
11212 * bitwise. Our maximum is the minimum of the operands' maxima.
11213 */
11214 dst_reg->u32_min_value = var32_off.value;
11215 dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
11216 if (dst_reg->s32_min_value < 0 || smin_val < 0) {
11217 /* Lose signed bounds when ANDing negative numbers,
11218 * ain't nobody got time for that.
11219 */
11220 dst_reg->s32_min_value = S32_MIN;
11221 dst_reg->s32_max_value = S32_MAX;
11222 } else {
11223 /* ANDing two positives gives a positive, so safe to
11224 * cast result into s64.
11225 */
11226 dst_reg->s32_min_value = dst_reg->u32_min_value;
11227 dst_reg->s32_max_value = dst_reg->u32_max_value;
11228 }
3f50f132
JF
11229}
11230
07cd2631
JF
11231static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
11232 struct bpf_reg_state *src_reg)
11233{
3f50f132
JF
11234 bool src_known = tnum_is_const(src_reg->var_off);
11235 bool dst_known = tnum_is_const(dst_reg->var_off);
07cd2631
JF
11236 s64 smin_val = src_reg->smin_value;
11237 u64 umax_val = src_reg->umax_value;
11238
3f50f132 11239 if (src_known && dst_known) {
4fbb38a3 11240 __mark_reg_known(dst_reg, dst_reg->var_off.value);
3f50f132
JF
11241 return;
11242 }
11243
07cd2631
JF
11244 /* We get our minimum from the var_off, since that's inherently
11245 * bitwise. Our maximum is the minimum of the operands' maxima.
11246 */
07cd2631
JF
11247 dst_reg->umin_value = dst_reg->var_off.value;
11248 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
11249 if (dst_reg->smin_value < 0 || smin_val < 0) {
11250 /* Lose signed bounds when ANDing negative numbers,
11251 * ain't nobody got time for that.
11252 */
11253 dst_reg->smin_value = S64_MIN;
11254 dst_reg->smax_value = S64_MAX;
11255 } else {
11256 /* ANDing two positives gives a positive, so safe to
11257 * cast result into s64.
11258 */
11259 dst_reg->smin_value = dst_reg->umin_value;
11260 dst_reg->smax_value = dst_reg->umax_value;
11261 }
11262 /* We may learn something more from the var_off */
11263 __update_reg_bounds(dst_reg);
11264}
11265
3f50f132
JF
11266static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
11267 struct bpf_reg_state *src_reg)
11268{
11269 bool src_known = tnum_subreg_is_const(src_reg->var_off);
11270 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
11271 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
5b9fbeb7
DB
11272 s32 smin_val = src_reg->s32_min_value;
11273 u32 umin_val = src_reg->u32_min_value;
3f50f132 11274
049c4e13
DB
11275 if (src_known && dst_known) {
11276 __mark_reg32_known(dst_reg, var32_off.value);
3f50f132 11277 return;
049c4e13 11278 }
3f50f132
JF
11279
11280 /* We get our maximum from the var_off, and our minimum is the
11281 * maximum of the operands' minima
11282 */
11283 dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
11284 dst_reg->u32_max_value = var32_off.value | var32_off.mask;
11285 if (dst_reg->s32_min_value < 0 || smin_val < 0) {
11286 /* Lose signed bounds when ORing negative numbers,
11287 * ain't nobody got time for that.
11288 */
11289 dst_reg->s32_min_value = S32_MIN;
11290 dst_reg->s32_max_value = S32_MAX;
11291 } else {
11292 /* ORing two positives gives a positive, so safe to
11293 * cast result into s64.
11294 */
5b9fbeb7
DB
11295 dst_reg->s32_min_value = dst_reg->u32_min_value;
11296 dst_reg->s32_max_value = dst_reg->u32_max_value;
3f50f132
JF
11297 }
11298}
11299
07cd2631
JF
11300static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
11301 struct bpf_reg_state *src_reg)
11302{
3f50f132
JF
11303 bool src_known = tnum_is_const(src_reg->var_off);
11304 bool dst_known = tnum_is_const(dst_reg->var_off);
07cd2631
JF
11305 s64 smin_val = src_reg->smin_value;
11306 u64 umin_val = src_reg->umin_value;
11307
3f50f132 11308 if (src_known && dst_known) {
4fbb38a3 11309 __mark_reg_known(dst_reg, dst_reg->var_off.value);
3f50f132
JF
11310 return;
11311 }
11312
07cd2631
JF
11313 /* We get our maximum from the var_off, and our minimum is the
11314 * maximum of the operands' minima
11315 */
07cd2631
JF
11316 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
11317 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
11318 if (dst_reg->smin_value < 0 || smin_val < 0) {
11319 /* Lose signed bounds when ORing negative numbers,
11320 * ain't nobody got time for that.
11321 */
11322 dst_reg->smin_value = S64_MIN;
11323 dst_reg->smax_value = S64_MAX;
11324 } else {
11325 /* ORing two positives gives a positive, so safe to
11326 * cast result into s64.
11327 */
11328 dst_reg->smin_value = dst_reg->umin_value;
11329 dst_reg->smax_value = dst_reg->umax_value;
11330 }
11331 /* We may learn something more from the var_off */
11332 __update_reg_bounds(dst_reg);
11333}
11334
2921c90d
YS
11335static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
11336 struct bpf_reg_state *src_reg)
11337{
11338 bool src_known = tnum_subreg_is_const(src_reg->var_off);
11339 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
11340 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
11341 s32 smin_val = src_reg->s32_min_value;
11342
049c4e13
DB
11343 if (src_known && dst_known) {
11344 __mark_reg32_known(dst_reg, var32_off.value);
2921c90d 11345 return;
049c4e13 11346 }
2921c90d
YS
11347
11348 /* We get both minimum and maximum from the var32_off. */
11349 dst_reg->u32_min_value = var32_off.value;
11350 dst_reg->u32_max_value = var32_off.value | var32_off.mask;
11351
11352 if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
11353 /* XORing two positive sign numbers gives a positive,
11354 * so safe to cast u32 result into s32.
11355 */
11356 dst_reg->s32_min_value = dst_reg->u32_min_value;
11357 dst_reg->s32_max_value = dst_reg->u32_max_value;
11358 } else {
11359 dst_reg->s32_min_value = S32_MIN;
11360 dst_reg->s32_max_value = S32_MAX;
11361 }
11362}
11363
11364static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
11365 struct bpf_reg_state *src_reg)
11366{
11367 bool src_known = tnum_is_const(src_reg->var_off);
11368 bool dst_known = tnum_is_const(dst_reg->var_off);
11369 s64 smin_val = src_reg->smin_value;
11370
11371 if (src_known && dst_known) {
11372 /* dst_reg->var_off.value has been updated earlier */
11373 __mark_reg_known(dst_reg, dst_reg->var_off.value);
11374 return;
11375 }
11376
11377 /* We get both minimum and maximum from the var_off. */
11378 dst_reg->umin_value = dst_reg->var_off.value;
11379 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
11380
11381 if (dst_reg->smin_value >= 0 && smin_val >= 0) {
11382 /* XORing two positive sign numbers gives a positive,
11383 * so safe to cast u64 result into s64.
11384 */
11385 dst_reg->smin_value = dst_reg->umin_value;
11386 dst_reg->smax_value = dst_reg->umax_value;
11387 } else {
11388 dst_reg->smin_value = S64_MIN;
11389 dst_reg->smax_value = S64_MAX;
11390 }
11391
11392 __update_reg_bounds(dst_reg);
11393}
11394
3f50f132
JF
11395static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
11396 u64 umin_val, u64 umax_val)
07cd2631 11397{
07cd2631
JF
11398 /* We lose all sign bit information (except what we can pick
11399 * up from var_off)
11400 */
3f50f132
JF
11401 dst_reg->s32_min_value = S32_MIN;
11402 dst_reg->s32_max_value = S32_MAX;
11403 /* If we might shift our top bit out, then we know nothing */
11404 if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
11405 dst_reg->u32_min_value = 0;
11406 dst_reg->u32_max_value = U32_MAX;
11407 } else {
11408 dst_reg->u32_min_value <<= umin_val;
11409 dst_reg->u32_max_value <<= umax_val;
11410 }
11411}
11412
11413static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
11414 struct bpf_reg_state *src_reg)
11415{
11416 u32 umax_val = src_reg->u32_max_value;
11417 u32 umin_val = src_reg->u32_min_value;
11418 /* u32 alu operation will zext upper bits */
11419 struct tnum subreg = tnum_subreg(dst_reg->var_off);
11420
11421 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
11422 dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
11423 /* Not required but being careful mark reg64 bounds as unknown so
11424 * that we are forced to pick them up from tnum and zext later and
11425 * if some path skips this step we are still safe.
11426 */
11427 __mark_reg64_unbounded(dst_reg);
11428 __update_reg32_bounds(dst_reg);
11429}
11430
11431static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
11432 u64 umin_val, u64 umax_val)
11433{
11434 /* Special case <<32 because it is a common compiler pattern to sign
11435 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
11436 * positive we know this shift will also be positive so we can track
11437 * bounds correctly. Otherwise we lose all sign bit information except
11438 * what we can pick up from var_off. Perhaps we can generalize this
11439 * later to shifts of any length.
11440 */
11441 if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
11442 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
11443 else
11444 dst_reg->smax_value = S64_MAX;
11445
11446 if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
11447 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
11448 else
11449 dst_reg->smin_value = S64_MIN;
11450
07cd2631
JF
11451 /* If we might shift our top bit out, then we know nothing */
11452 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
11453 dst_reg->umin_value = 0;
11454 dst_reg->umax_value = U64_MAX;
11455 } else {
11456 dst_reg->umin_value <<= umin_val;
11457 dst_reg->umax_value <<= umax_val;
11458 }
3f50f132
JF
11459}
11460
11461static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
11462 struct bpf_reg_state *src_reg)
11463{
11464 u64 umax_val = src_reg->umax_value;
11465 u64 umin_val = src_reg->umin_value;
11466
11467 /* scalar64 calc uses 32bit unshifted bounds so must be called first */
11468 __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
11469 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
11470
07cd2631
JF
11471 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
11472 /* We may learn something more from the var_off */
11473 __update_reg_bounds(dst_reg);
11474}
11475
3f50f132
JF
11476static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
11477 struct bpf_reg_state *src_reg)
11478{
11479 struct tnum subreg = tnum_subreg(dst_reg->var_off);
11480 u32 umax_val = src_reg->u32_max_value;
11481 u32 umin_val = src_reg->u32_min_value;
11482
11483 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
11484 * be negative, then either:
11485 * 1) src_reg might be zero, so the sign bit of the result is
11486 * unknown, so we lose our signed bounds
11487 * 2) it's known negative, thus the unsigned bounds capture the
11488 * signed bounds
11489 * 3) the signed bounds cross zero, so they tell us nothing
11490 * about the result
11491 * If the value in dst_reg is known nonnegative, then again the
18b24d78 11492 * unsigned bounds capture the signed bounds.
3f50f132
JF
11493 * Thus, in all cases it suffices to blow away our signed bounds
11494 * and rely on inferring new ones from the unsigned bounds and
11495 * var_off of the result.
11496 */
11497 dst_reg->s32_min_value = S32_MIN;
11498 dst_reg->s32_max_value = S32_MAX;
11499
11500 dst_reg->var_off = tnum_rshift(subreg, umin_val);
11501 dst_reg->u32_min_value >>= umax_val;
11502 dst_reg->u32_max_value >>= umin_val;
11503
11504 __mark_reg64_unbounded(dst_reg);
11505 __update_reg32_bounds(dst_reg);
11506}
11507
07cd2631
JF
11508static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
11509 struct bpf_reg_state *src_reg)
11510{
11511 u64 umax_val = src_reg->umax_value;
11512 u64 umin_val = src_reg->umin_value;
11513
11514 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
11515 * be negative, then either:
11516 * 1) src_reg might be zero, so the sign bit of the result is
11517 * unknown, so we lose our signed bounds
11518 * 2) it's known negative, thus the unsigned bounds capture the
11519 * signed bounds
11520 * 3) the signed bounds cross zero, so they tell us nothing
11521 * about the result
11522 * If the value in dst_reg is known nonnegative, then again the
18b24d78 11523 * unsigned bounds capture the signed bounds.
07cd2631
JF
11524 * Thus, in all cases it suffices to blow away our signed bounds
11525 * and rely on inferring new ones from the unsigned bounds and
11526 * var_off of the result.
11527 */
11528 dst_reg->smin_value = S64_MIN;
11529 dst_reg->smax_value = S64_MAX;
11530 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
11531 dst_reg->umin_value >>= umax_val;
11532 dst_reg->umax_value >>= umin_val;
3f50f132
JF
11533
11534 /* Its not easy to operate on alu32 bounds here because it depends
11535 * on bits being shifted in. Take easy way out and mark unbounded
11536 * so we can recalculate later from tnum.
11537 */
11538 __mark_reg32_unbounded(dst_reg);
07cd2631
JF
11539 __update_reg_bounds(dst_reg);
11540}
11541
3f50f132
JF
11542static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
11543 struct bpf_reg_state *src_reg)
07cd2631 11544{
3f50f132 11545 u64 umin_val = src_reg->u32_min_value;
07cd2631
JF
11546
11547 /* Upon reaching here, src_known is true and
11548 * umax_val is equal to umin_val.
11549 */
3f50f132
JF
11550 dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
11551 dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
07cd2631 11552
3f50f132
JF
11553 dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
11554
11555 /* blow away the dst_reg umin_value/umax_value and rely on
11556 * dst_reg var_off to refine the result.
11557 */
11558 dst_reg->u32_min_value = 0;
11559 dst_reg->u32_max_value = U32_MAX;
11560
11561 __mark_reg64_unbounded(dst_reg);
11562 __update_reg32_bounds(dst_reg);
11563}
11564
11565static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
11566 struct bpf_reg_state *src_reg)
11567{
11568 u64 umin_val = src_reg->umin_value;
11569
11570 /* Upon reaching here, src_known is true and umax_val is equal
11571 * to umin_val.
11572 */
11573 dst_reg->smin_value >>= umin_val;
11574 dst_reg->smax_value >>= umin_val;
11575
11576 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
07cd2631
JF
11577
11578 /* blow away the dst_reg umin_value/umax_value and rely on
11579 * dst_reg var_off to refine the result.
11580 */
11581 dst_reg->umin_value = 0;
11582 dst_reg->umax_value = U64_MAX;
3f50f132
JF
11583
11584 /* Its not easy to operate on alu32 bounds here because it depends
11585 * on bits being shifted in from upper 32-bits. Take easy way out
11586 * and mark unbounded so we can recalculate later from tnum.
11587 */
11588 __mark_reg32_unbounded(dst_reg);
07cd2631
JF
11589 __update_reg_bounds(dst_reg);
11590}
11591
468f6eaf
JH
11592/* WARNING: This function does calculations on 64-bit values, but the actual
11593 * execution may occur on 32-bit values. Therefore, things like bitshifts
11594 * need extra checks in the 32-bit case.
11595 */
f1174f77
EC
11596static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
11597 struct bpf_insn *insn,
11598 struct bpf_reg_state *dst_reg,
11599 struct bpf_reg_state src_reg)
969bf05e 11600{
638f5b90 11601 struct bpf_reg_state *regs = cur_regs(env);
48461135 11602 u8 opcode = BPF_OP(insn->code);
b0b3fb67 11603 bool src_known;
b03c9f9f
EC
11604 s64 smin_val, smax_val;
11605 u64 umin_val, umax_val;
3f50f132
JF
11606 s32 s32_min_val, s32_max_val;
11607 u32 u32_min_val, u32_max_val;
468f6eaf 11608 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
3f50f132 11609 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
a6aaece0 11610 int ret;
b799207e 11611
b03c9f9f
EC
11612 smin_val = src_reg.smin_value;
11613 smax_val = src_reg.smax_value;
11614 umin_val = src_reg.umin_value;
11615 umax_val = src_reg.umax_value;
f23cc643 11616
3f50f132
JF
11617 s32_min_val = src_reg.s32_min_value;
11618 s32_max_val = src_reg.s32_max_value;
11619 u32_min_val = src_reg.u32_min_value;
11620 u32_max_val = src_reg.u32_max_value;
11621
11622 if (alu32) {
11623 src_known = tnum_subreg_is_const(src_reg.var_off);
3f50f132
JF
11624 if ((src_known &&
11625 (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
11626 s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
11627 /* Taint dst register if offset had invalid bounds
11628 * derived from e.g. dead branches.
11629 */
11630 __mark_reg_unknown(env, dst_reg);
11631 return 0;
11632 }
11633 } else {
11634 src_known = tnum_is_const(src_reg.var_off);
3f50f132
JF
11635 if ((src_known &&
11636 (smin_val != smax_val || umin_val != umax_val)) ||
11637 smin_val > smax_val || umin_val > umax_val) {
11638 /* Taint dst register if offset had invalid bounds
11639 * derived from e.g. dead branches.
11640 */
11641 __mark_reg_unknown(env, dst_reg);
11642 return 0;
11643 }
6f16101e
DB
11644 }
11645
bb7f0f98
AS
11646 if (!src_known &&
11647 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
f54c7898 11648 __mark_reg_unknown(env, dst_reg);
bb7f0f98
AS
11649 return 0;
11650 }
11651
f5288193
DB
11652 if (sanitize_needed(opcode)) {
11653 ret = sanitize_val_alu(env, insn);
11654 if (ret < 0)
11655 return sanitize_err(env, insn, ret, NULL, NULL);
11656 }
11657
3f50f132
JF
11658 /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
11659 * There are two classes of instructions: The first class we track both
11660 * alu32 and alu64 sign/unsigned bounds independently this provides the
11661 * greatest amount of precision when alu operations are mixed with jmp32
11662 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
11663 * and BPF_OR. This is possible because these ops have fairly easy to
11664 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
11665 * See alu32 verifier tests for examples. The second class of
11666 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
11667 * with regards to tracking sign/unsigned bounds because the bits may
11668 * cross subreg boundaries in the alu64 case. When this happens we mark
11669 * the reg unbounded in the subreg bound space and use the resulting
11670 * tnum to calculate an approximation of the sign/unsigned bounds.
11671 */
48461135
JB
11672 switch (opcode) {
11673 case BPF_ADD:
3f50f132 11674 scalar32_min_max_add(dst_reg, &src_reg);
07cd2631 11675 scalar_min_max_add(dst_reg, &src_reg);
3f50f132 11676 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
48461135
JB
11677 break;
11678 case BPF_SUB:
3f50f132 11679 scalar32_min_max_sub(dst_reg, &src_reg);
07cd2631 11680 scalar_min_max_sub(dst_reg, &src_reg);
3f50f132 11681 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
48461135
JB
11682 break;
11683 case BPF_MUL:
3f50f132
JF
11684 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
11685 scalar32_min_max_mul(dst_reg, &src_reg);
07cd2631 11686 scalar_min_max_mul(dst_reg, &src_reg);
48461135
JB
11687 break;
11688 case BPF_AND:
3f50f132
JF
11689 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
11690 scalar32_min_max_and(dst_reg, &src_reg);
07cd2631 11691 scalar_min_max_and(dst_reg, &src_reg);
f1174f77
EC
11692 break;
11693 case BPF_OR:
3f50f132
JF
11694 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
11695 scalar32_min_max_or(dst_reg, &src_reg);
07cd2631 11696 scalar_min_max_or(dst_reg, &src_reg);
48461135 11697 break;
2921c90d
YS
11698 case BPF_XOR:
11699 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
11700 scalar32_min_max_xor(dst_reg, &src_reg);
11701 scalar_min_max_xor(dst_reg, &src_reg);
11702 break;
48461135 11703 case BPF_LSH:
468f6eaf
JH
11704 if (umax_val >= insn_bitness) {
11705 /* Shifts greater than 31 or 63 are undefined.
11706 * This includes shifts by a negative number.
b03c9f9f 11707 */
61bd5218 11708 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
11709 break;
11710 }
3f50f132
JF
11711 if (alu32)
11712 scalar32_min_max_lsh(dst_reg, &src_reg);
11713 else
11714 scalar_min_max_lsh(dst_reg, &src_reg);
48461135
JB
11715 break;
11716 case BPF_RSH:
468f6eaf
JH
11717 if (umax_val >= insn_bitness) {
11718 /* Shifts greater than 31 or 63 are undefined.
11719 * This includes shifts by a negative number.
b03c9f9f 11720 */
61bd5218 11721 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
11722 break;
11723 }
3f50f132
JF
11724 if (alu32)
11725 scalar32_min_max_rsh(dst_reg, &src_reg);
11726 else
11727 scalar_min_max_rsh(dst_reg, &src_reg);
48461135 11728 break;
9cbe1f5a
YS
11729 case BPF_ARSH:
11730 if (umax_val >= insn_bitness) {
11731 /* Shifts greater than 31 or 63 are undefined.
11732 * This includes shifts by a negative number.
11733 */
11734 mark_reg_unknown(env, regs, insn->dst_reg);
11735 break;
11736 }
3f50f132
JF
11737 if (alu32)
11738 scalar32_min_max_arsh(dst_reg, &src_reg);
11739 else
11740 scalar_min_max_arsh(dst_reg, &src_reg);
9cbe1f5a 11741 break;
48461135 11742 default:
61bd5218 11743 mark_reg_unknown(env, regs, insn->dst_reg);
48461135
JB
11744 break;
11745 }
11746
3f50f132
JF
11747 /* ALU32 ops are zero extended into 64bit register */
11748 if (alu32)
11749 zext_32_to_64(dst_reg);
3844d153 11750 reg_bounds_sync(dst_reg);
f1174f77
EC
11751 return 0;
11752}
11753
11754/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
11755 * and var_off.
11756 */
11757static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
11758 struct bpf_insn *insn)
11759{
f4d7e40a
AS
11760 struct bpf_verifier_state *vstate = env->cur_state;
11761 struct bpf_func_state *state = vstate->frame[vstate->curframe];
11762 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
f1174f77
EC
11763 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
11764 u8 opcode = BPF_OP(insn->code);
b5dc0163 11765 int err;
f1174f77
EC
11766
11767 dst_reg = &regs[insn->dst_reg];
f1174f77
EC
11768 src_reg = NULL;
11769 if (dst_reg->type != SCALAR_VALUE)
11770 ptr_reg = dst_reg;
75748837
AS
11771 else
11772 /* Make sure ID is cleared otherwise dst_reg min/max could be
11773 * incorrectly propagated into other registers by find_equal_scalars()
11774 */
11775 dst_reg->id = 0;
f1174f77
EC
11776 if (BPF_SRC(insn->code) == BPF_X) {
11777 src_reg = &regs[insn->src_reg];
f1174f77
EC
11778 if (src_reg->type != SCALAR_VALUE) {
11779 if (dst_reg->type != SCALAR_VALUE) {
11780 /* Combining two pointers by any ALU op yields
82abbf8d
AS
11781 * an arbitrary scalar. Disallow all math except
11782 * pointer subtraction
f1174f77 11783 */
dd066823 11784 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
82abbf8d
AS
11785 mark_reg_unknown(env, regs, insn->dst_reg);
11786 return 0;
f1174f77 11787 }
82abbf8d
AS
11788 verbose(env, "R%d pointer %s pointer prohibited\n",
11789 insn->dst_reg,
11790 bpf_alu_string[opcode >> 4]);
11791 return -EACCES;
f1174f77
EC
11792 } else {
11793 /* scalar += pointer
11794 * This is legal, but we have to reverse our
11795 * src/dest handling in computing the range
11796 */
b5dc0163
AS
11797 err = mark_chain_precision(env, insn->dst_reg);
11798 if (err)
11799 return err;
82abbf8d
AS
11800 return adjust_ptr_min_max_vals(env, insn,
11801 src_reg, dst_reg);
f1174f77
EC
11802 }
11803 } else if (ptr_reg) {
11804 /* pointer += scalar */
b5dc0163
AS
11805 err = mark_chain_precision(env, insn->src_reg);
11806 if (err)
11807 return err;
82abbf8d
AS
11808 return adjust_ptr_min_max_vals(env, insn,
11809 dst_reg, src_reg);
a3b666bf
AN
11810 } else if (dst_reg->precise) {
11811 /* if dst_reg is precise, src_reg should be precise as well */
11812 err = mark_chain_precision(env, insn->src_reg);
11813 if (err)
11814 return err;
f1174f77
EC
11815 }
11816 } else {
11817 /* Pretend the src is a reg with a known value, since we only
11818 * need to be able to read from this state.
11819 */
11820 off_reg.type = SCALAR_VALUE;
b03c9f9f 11821 __mark_reg_known(&off_reg, insn->imm);
f1174f77 11822 src_reg = &off_reg;
82abbf8d
AS
11823 if (ptr_reg) /* pointer += K */
11824 return adjust_ptr_min_max_vals(env, insn,
11825 ptr_reg, src_reg);
f1174f77
EC
11826 }
11827
11828 /* Got here implies adding two SCALAR_VALUEs */
11829 if (WARN_ON_ONCE(ptr_reg)) {
0f55f9ed 11830 print_verifier_state(env, state, true);
61bd5218 11831 verbose(env, "verifier internal error: unexpected ptr_reg\n");
f1174f77
EC
11832 return -EINVAL;
11833 }
11834 if (WARN_ON(!src_reg)) {
0f55f9ed 11835 print_verifier_state(env, state, true);
61bd5218 11836 verbose(env, "verifier internal error: no src_reg\n");
f1174f77
EC
11837 return -EINVAL;
11838 }
11839 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
48461135
JB
11840}
11841
17a52670 11842/* check validity of 32-bit and 64-bit arithmetic operations */
58e2af8b 11843static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 11844{
638f5b90 11845 struct bpf_reg_state *regs = cur_regs(env);
17a52670
AS
11846 u8 opcode = BPF_OP(insn->code);
11847 int err;
11848
11849 if (opcode == BPF_END || opcode == BPF_NEG) {
11850 if (opcode == BPF_NEG) {
395e942d 11851 if (BPF_SRC(insn->code) != BPF_K ||
17a52670
AS
11852 insn->src_reg != BPF_REG_0 ||
11853 insn->off != 0 || insn->imm != 0) {
61bd5218 11854 verbose(env, "BPF_NEG uses reserved fields\n");
17a52670
AS
11855 return -EINVAL;
11856 }
11857 } else {
11858 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
e67b8a68
EC
11859 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
11860 BPF_CLASS(insn->code) == BPF_ALU64) {
61bd5218 11861 verbose(env, "BPF_END uses reserved fields\n");
17a52670
AS
11862 return -EINVAL;
11863 }
11864 }
11865
11866 /* check src operand */
dc503a8a 11867 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
11868 if (err)
11869 return err;
11870
1be7f75d 11871 if (is_pointer_value(env, insn->dst_reg)) {
61bd5218 11872 verbose(env, "R%d pointer arithmetic prohibited\n",
1be7f75d
AS
11873 insn->dst_reg);
11874 return -EACCES;
11875 }
11876
17a52670 11877 /* check dest operand */
dc503a8a 11878 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
11879 if (err)
11880 return err;
11881
11882 } else if (opcode == BPF_MOV) {
11883
11884 if (BPF_SRC(insn->code) == BPF_X) {
11885 if (insn->imm != 0 || insn->off != 0) {
61bd5218 11886 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
11887 return -EINVAL;
11888 }
11889
11890 /* check src operand */
dc503a8a 11891 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
11892 if (err)
11893 return err;
11894 } else {
11895 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 11896 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
11897 return -EINVAL;
11898 }
11899 }
11900
fbeb1603
AF
11901 /* check dest operand, mark as required later */
11902 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
11903 if (err)
11904 return err;
11905
11906 if (BPF_SRC(insn->code) == BPF_X) {
e434b8cd
JW
11907 struct bpf_reg_state *src_reg = regs + insn->src_reg;
11908 struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
11909
17a52670
AS
11910 if (BPF_CLASS(insn->code) == BPF_ALU64) {
11911 /* case: R1 = R2
11912 * copy register state to dest reg
11913 */
75748837
AS
11914 if (src_reg->type == SCALAR_VALUE && !src_reg->id)
11915 /* Assign src and dst registers the same ID
11916 * that will be used by find_equal_scalars()
11917 * to propagate min/max range.
11918 */
11919 src_reg->id = ++env->id_gen;
71f656a5 11920 copy_register_state(dst_reg, src_reg);
e434b8cd 11921 dst_reg->live |= REG_LIVE_WRITTEN;
5327ed3d 11922 dst_reg->subreg_def = DEF_NOT_SUBREG;
17a52670 11923 } else {
f1174f77 11924 /* R1 = (u32) R2 */
1be7f75d 11925 if (is_pointer_value(env, insn->src_reg)) {
61bd5218
JK
11926 verbose(env,
11927 "R%d partial copy of pointer\n",
1be7f75d
AS
11928 insn->src_reg);
11929 return -EACCES;
e434b8cd 11930 } else if (src_reg->type == SCALAR_VALUE) {
71f656a5 11931 copy_register_state(dst_reg, src_reg);
75748837
AS
11932 /* Make sure ID is cleared otherwise
11933 * dst_reg min/max could be incorrectly
11934 * propagated into src_reg by find_equal_scalars()
11935 */
11936 dst_reg->id = 0;
e434b8cd 11937 dst_reg->live |= REG_LIVE_WRITTEN;
5327ed3d 11938 dst_reg->subreg_def = env->insn_idx + 1;
e434b8cd
JW
11939 } else {
11940 mark_reg_unknown(env, regs,
11941 insn->dst_reg);
1be7f75d 11942 }
3f50f132 11943 zext_32_to_64(dst_reg);
3844d153 11944 reg_bounds_sync(dst_reg);
17a52670
AS
11945 }
11946 } else {
11947 /* case: R = imm
11948 * remember the value we stored into this reg
11949 */
fbeb1603
AF
11950 /* clear any state __mark_reg_known doesn't set */
11951 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77 11952 regs[insn->dst_reg].type = SCALAR_VALUE;
95a762e2
JH
11953 if (BPF_CLASS(insn->code) == BPF_ALU64) {
11954 __mark_reg_known(regs + insn->dst_reg,
11955 insn->imm);
11956 } else {
11957 __mark_reg_known(regs + insn->dst_reg,
11958 (u32)insn->imm);
11959 }
17a52670
AS
11960 }
11961
11962 } else if (opcode > BPF_END) {
61bd5218 11963 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
17a52670
AS
11964 return -EINVAL;
11965
11966 } else { /* all other ALU ops: and, sub, xor, add, ... */
11967
17a52670
AS
11968 if (BPF_SRC(insn->code) == BPF_X) {
11969 if (insn->imm != 0 || insn->off != 0) {
61bd5218 11970 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
11971 return -EINVAL;
11972 }
11973 /* check src1 operand */
dc503a8a 11974 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
11975 if (err)
11976 return err;
11977 } else {
11978 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 11979 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
11980 return -EINVAL;
11981 }
11982 }
11983
11984 /* check src2 operand */
dc503a8a 11985 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
11986 if (err)
11987 return err;
11988
11989 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
11990 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
61bd5218 11991 verbose(env, "div by zero\n");
17a52670
AS
11992 return -EINVAL;
11993 }
11994
229394e8
RV
11995 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
11996 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
11997 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
11998
11999 if (insn->imm < 0 || insn->imm >= size) {
61bd5218 12000 verbose(env, "invalid shift %d\n", insn->imm);
229394e8
RV
12001 return -EINVAL;
12002 }
12003 }
12004
1a0dc1ac 12005 /* check dest operand */
dc503a8a 12006 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
1a0dc1ac
AS
12007 if (err)
12008 return err;
12009
f1174f77 12010 return adjust_reg_min_max_vals(env, insn);
17a52670
AS
12011 }
12012
12013 return 0;
12014}
12015
f4d7e40a 12016static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
de8f3a83 12017 struct bpf_reg_state *dst_reg,
f8ddadc4 12018 enum bpf_reg_type type,
fb2a311a 12019 bool range_right_open)
969bf05e 12020{
b239da34
KKD
12021 struct bpf_func_state *state;
12022 struct bpf_reg_state *reg;
12023 int new_range;
2d2be8ca 12024
fb2a311a
DB
12025 if (dst_reg->off < 0 ||
12026 (dst_reg->off == 0 && range_right_open))
f1174f77
EC
12027 /* This doesn't give us any range */
12028 return;
12029
b03c9f9f
EC
12030 if (dst_reg->umax_value > MAX_PACKET_OFF ||
12031 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
f1174f77
EC
12032 /* Risk of overflow. For instance, ptr + (1<<63) may be less
12033 * than pkt_end, but that's because it's also less than pkt.
12034 */
12035 return;
12036
fb2a311a
DB
12037 new_range = dst_reg->off;
12038 if (range_right_open)
2fa7d94a 12039 new_range++;
fb2a311a
DB
12040
12041 /* Examples for register markings:
2d2be8ca 12042 *
fb2a311a 12043 * pkt_data in dst register:
2d2be8ca
DB
12044 *
12045 * r2 = r3;
12046 * r2 += 8;
12047 * if (r2 > pkt_end) goto <handle exception>
12048 * <access okay>
12049 *
b4e432f1
DB
12050 * r2 = r3;
12051 * r2 += 8;
12052 * if (r2 < pkt_end) goto <access okay>
12053 * <handle exception>
12054 *
2d2be8ca
DB
12055 * Where:
12056 * r2 == dst_reg, pkt_end == src_reg
12057 * r2=pkt(id=n,off=8,r=0)
12058 * r3=pkt(id=n,off=0,r=0)
12059 *
fb2a311a 12060 * pkt_data in src register:
2d2be8ca
DB
12061 *
12062 * r2 = r3;
12063 * r2 += 8;
12064 * if (pkt_end >= r2) goto <access okay>
12065 * <handle exception>
12066 *
b4e432f1
DB
12067 * r2 = r3;
12068 * r2 += 8;
12069 * if (pkt_end <= r2) goto <handle exception>
12070 * <access okay>
12071 *
2d2be8ca
DB
12072 * Where:
12073 * pkt_end == dst_reg, r2 == src_reg
12074 * r2=pkt(id=n,off=8,r=0)
12075 * r3=pkt(id=n,off=0,r=0)
12076 *
12077 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
fb2a311a
DB
12078 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
12079 * and [r3, r3 + 8-1) respectively is safe to access depending on
12080 * the check.
969bf05e 12081 */
2d2be8ca 12082
f1174f77
EC
12083 /* If our ids match, then we must have the same max_value. And we
12084 * don't care about the other reg's fixed offset, since if it's too big
12085 * the range won't allow anything.
12086 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
12087 */
b239da34
KKD
12088 bpf_for_each_reg_in_vstate(vstate, state, reg, ({
12089 if (reg->type == type && reg->id == dst_reg->id)
12090 /* keep the maximum range already checked */
12091 reg->range = max(reg->range, new_range);
12092 }));
969bf05e
AS
12093}
12094
3f50f132 12095static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
4f7b3e82 12096{
3f50f132
JF
12097 struct tnum subreg = tnum_subreg(reg->var_off);
12098 s32 sval = (s32)val;
a72dafaf 12099
3f50f132
JF
12100 switch (opcode) {
12101 case BPF_JEQ:
12102 if (tnum_is_const(subreg))
12103 return !!tnum_equals_const(subreg, val);
12104 break;
12105 case BPF_JNE:
12106 if (tnum_is_const(subreg))
12107 return !tnum_equals_const(subreg, val);
12108 break;
12109 case BPF_JSET:
12110 if ((~subreg.mask & subreg.value) & val)
12111 return 1;
12112 if (!((subreg.mask | subreg.value) & val))
12113 return 0;
12114 break;
12115 case BPF_JGT:
12116 if (reg->u32_min_value > val)
12117 return 1;
12118 else if (reg->u32_max_value <= val)
12119 return 0;
12120 break;
12121 case BPF_JSGT:
12122 if (reg->s32_min_value > sval)
12123 return 1;
ee114dd6 12124 else if (reg->s32_max_value <= sval)
3f50f132
JF
12125 return 0;
12126 break;
12127 case BPF_JLT:
12128 if (reg->u32_max_value < val)
12129 return 1;
12130 else if (reg->u32_min_value >= val)
12131 return 0;
12132 break;
12133 case BPF_JSLT:
12134 if (reg->s32_max_value < sval)
12135 return 1;
12136 else if (reg->s32_min_value >= sval)
12137 return 0;
12138 break;
12139 case BPF_JGE:
12140 if (reg->u32_min_value >= val)
12141 return 1;
12142 else if (reg->u32_max_value < val)
12143 return 0;
12144 break;
12145 case BPF_JSGE:
12146 if (reg->s32_min_value >= sval)
12147 return 1;
12148 else if (reg->s32_max_value < sval)
12149 return 0;
12150 break;
12151 case BPF_JLE:
12152 if (reg->u32_max_value <= val)
12153 return 1;
12154 else if (reg->u32_min_value > val)
12155 return 0;
12156 break;
12157 case BPF_JSLE:
12158 if (reg->s32_max_value <= sval)
12159 return 1;
12160 else if (reg->s32_min_value > sval)
12161 return 0;
12162 break;
12163 }
4f7b3e82 12164
3f50f132
JF
12165 return -1;
12166}
092ed096 12167
3f50f132
JF
12168
12169static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
12170{
12171 s64 sval = (s64)val;
a72dafaf 12172
4f7b3e82
AS
12173 switch (opcode) {
12174 case BPF_JEQ:
12175 if (tnum_is_const(reg->var_off))
12176 return !!tnum_equals_const(reg->var_off, val);
12177 break;
12178 case BPF_JNE:
12179 if (tnum_is_const(reg->var_off))
12180 return !tnum_equals_const(reg->var_off, val);
12181 break;
960ea056
JK
12182 case BPF_JSET:
12183 if ((~reg->var_off.mask & reg->var_off.value) & val)
12184 return 1;
12185 if (!((reg->var_off.mask | reg->var_off.value) & val))
12186 return 0;
12187 break;
4f7b3e82
AS
12188 case BPF_JGT:
12189 if (reg->umin_value > val)
12190 return 1;
12191 else if (reg->umax_value <= val)
12192 return 0;
12193 break;
12194 case BPF_JSGT:
a72dafaf 12195 if (reg->smin_value > sval)
4f7b3e82 12196 return 1;
ee114dd6 12197 else if (reg->smax_value <= sval)
4f7b3e82
AS
12198 return 0;
12199 break;
12200 case BPF_JLT:
12201 if (reg->umax_value < val)
12202 return 1;
12203 else if (reg->umin_value >= val)
12204 return 0;
12205 break;
12206 case BPF_JSLT:
a72dafaf 12207 if (reg->smax_value < sval)
4f7b3e82 12208 return 1;
a72dafaf 12209 else if (reg->smin_value >= sval)
4f7b3e82
AS
12210 return 0;
12211 break;
12212 case BPF_JGE:
12213 if (reg->umin_value >= val)
12214 return 1;
12215 else if (reg->umax_value < val)
12216 return 0;
12217 break;
12218 case BPF_JSGE:
a72dafaf 12219 if (reg->smin_value >= sval)
4f7b3e82 12220 return 1;
a72dafaf 12221 else if (reg->smax_value < sval)
4f7b3e82
AS
12222 return 0;
12223 break;
12224 case BPF_JLE:
12225 if (reg->umax_value <= val)
12226 return 1;
12227 else if (reg->umin_value > val)
12228 return 0;
12229 break;
12230 case BPF_JSLE:
a72dafaf 12231 if (reg->smax_value <= sval)
4f7b3e82 12232 return 1;
a72dafaf 12233 else if (reg->smin_value > sval)
4f7b3e82
AS
12234 return 0;
12235 break;
12236 }
12237
12238 return -1;
12239}
12240
3f50f132
JF
12241/* compute branch direction of the expression "if (reg opcode val) goto target;"
12242 * and return:
12243 * 1 - branch will be taken and "goto target" will be executed
12244 * 0 - branch will not be taken and fall-through to next insn
12245 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
12246 * range [0,10]
604dca5e 12247 */
3f50f132
JF
12248static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
12249 bool is_jmp32)
604dca5e 12250{
cac616db
JF
12251 if (__is_pointer_value(false, reg)) {
12252 if (!reg_type_not_null(reg->type))
12253 return -1;
12254
12255 /* If pointer is valid tests against zero will fail so we can
12256 * use this to direct branch taken.
12257 */
12258 if (val != 0)
12259 return -1;
12260
12261 switch (opcode) {
12262 case BPF_JEQ:
12263 return 0;
12264 case BPF_JNE:
12265 return 1;
12266 default:
12267 return -1;
12268 }
12269 }
604dca5e 12270
3f50f132
JF
12271 if (is_jmp32)
12272 return is_branch32_taken(reg, val, opcode);
12273 return is_branch64_taken(reg, val, opcode);
604dca5e
JH
12274}
12275
6d94e741
AS
12276static int flip_opcode(u32 opcode)
12277{
12278 /* How can we transform "a <op> b" into "b <op> a"? */
12279 static const u8 opcode_flip[16] = {
12280 /* these stay the same */
12281 [BPF_JEQ >> 4] = BPF_JEQ,
12282 [BPF_JNE >> 4] = BPF_JNE,
12283 [BPF_JSET >> 4] = BPF_JSET,
12284 /* these swap "lesser" and "greater" (L and G in the opcodes) */
12285 [BPF_JGE >> 4] = BPF_JLE,
12286 [BPF_JGT >> 4] = BPF_JLT,
12287 [BPF_JLE >> 4] = BPF_JGE,
12288 [BPF_JLT >> 4] = BPF_JGT,
12289 [BPF_JSGE >> 4] = BPF_JSLE,
12290 [BPF_JSGT >> 4] = BPF_JSLT,
12291 [BPF_JSLE >> 4] = BPF_JSGE,
12292 [BPF_JSLT >> 4] = BPF_JSGT
12293 };
12294 return opcode_flip[opcode >> 4];
12295}
12296
12297static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
12298 struct bpf_reg_state *src_reg,
12299 u8 opcode)
12300{
12301 struct bpf_reg_state *pkt;
12302
12303 if (src_reg->type == PTR_TO_PACKET_END) {
12304 pkt = dst_reg;
12305 } else if (dst_reg->type == PTR_TO_PACKET_END) {
12306 pkt = src_reg;
12307 opcode = flip_opcode(opcode);
12308 } else {
12309 return -1;
12310 }
12311
12312 if (pkt->range >= 0)
12313 return -1;
12314
12315 switch (opcode) {
12316 case BPF_JLE:
12317 /* pkt <= pkt_end */
12318 fallthrough;
12319 case BPF_JGT:
12320 /* pkt > pkt_end */
12321 if (pkt->range == BEYOND_PKT_END)
12322 /* pkt has at last one extra byte beyond pkt_end */
12323 return opcode == BPF_JGT;
12324 break;
12325 case BPF_JLT:
12326 /* pkt < pkt_end */
12327 fallthrough;
12328 case BPF_JGE:
12329 /* pkt >= pkt_end */
12330 if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
12331 return opcode == BPF_JGE;
12332 break;
12333 }
12334 return -1;
12335}
12336
48461135
JB
12337/* Adjusts the register min/max values in the case that the dst_reg is the
12338 * variable register that we are working on, and src_reg is a constant or we're
12339 * simply doing a BPF_K check.
f1174f77 12340 * In JEQ/JNE cases we also adjust the var_off values.
48461135
JB
12341 */
12342static void reg_set_min_max(struct bpf_reg_state *true_reg,
3f50f132
JF
12343 struct bpf_reg_state *false_reg,
12344 u64 val, u32 val32,
092ed096 12345 u8 opcode, bool is_jmp32)
48461135 12346{
3f50f132
JF
12347 struct tnum false_32off = tnum_subreg(false_reg->var_off);
12348 struct tnum false_64off = false_reg->var_off;
12349 struct tnum true_32off = tnum_subreg(true_reg->var_off);
12350 struct tnum true_64off = true_reg->var_off;
12351 s64 sval = (s64)val;
12352 s32 sval32 = (s32)val32;
a72dafaf 12353
f1174f77
EC
12354 /* If the dst_reg is a pointer, we can't learn anything about its
12355 * variable offset from the compare (unless src_reg were a pointer into
12356 * the same object, but we don't bother with that.
12357 * Since false_reg and true_reg have the same type by construction, we
12358 * only need to check one of them for pointerness.
12359 */
12360 if (__is_pointer_value(false, false_reg))
12361 return;
4cabc5b1 12362
48461135 12363 switch (opcode) {
a12ca627
DB
12364 /* JEQ/JNE comparison doesn't change the register equivalence.
12365 *
12366 * r1 = r2;
12367 * if (r1 == 42) goto label;
12368 * ...
12369 * label: // here both r1 and r2 are known to be 42.
12370 *
12371 * Hence when marking register as known preserve it's ID.
12372 */
48461135 12373 case BPF_JEQ:
a12ca627
DB
12374 if (is_jmp32) {
12375 __mark_reg32_known(true_reg, val32);
12376 true_32off = tnum_subreg(true_reg->var_off);
12377 } else {
12378 ___mark_reg_known(true_reg, val);
12379 true_64off = true_reg->var_off;
12380 }
12381 break;
48461135 12382 case BPF_JNE:
a12ca627
DB
12383 if (is_jmp32) {
12384 __mark_reg32_known(false_reg, val32);
12385 false_32off = tnum_subreg(false_reg->var_off);
12386 } else {
12387 ___mark_reg_known(false_reg, val);
12388 false_64off = false_reg->var_off;
12389 }
48461135 12390 break;
960ea056 12391 case BPF_JSET:
3f50f132
JF
12392 if (is_jmp32) {
12393 false_32off = tnum_and(false_32off, tnum_const(~val32));
12394 if (is_power_of_2(val32))
12395 true_32off = tnum_or(true_32off,
12396 tnum_const(val32));
12397 } else {
12398 false_64off = tnum_and(false_64off, tnum_const(~val));
12399 if (is_power_of_2(val))
12400 true_64off = tnum_or(true_64off,
12401 tnum_const(val));
12402 }
960ea056 12403 break;
48461135 12404 case BPF_JGE:
a72dafaf
JW
12405 case BPF_JGT:
12406 {
3f50f132
JF
12407 if (is_jmp32) {
12408 u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1;
12409 u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
12410
12411 false_reg->u32_max_value = min(false_reg->u32_max_value,
12412 false_umax);
12413 true_reg->u32_min_value = max(true_reg->u32_min_value,
12414 true_umin);
12415 } else {
12416 u64 false_umax = opcode == BPF_JGT ? val : val - 1;
12417 u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
12418
12419 false_reg->umax_value = min(false_reg->umax_value, false_umax);
12420 true_reg->umin_value = max(true_reg->umin_value, true_umin);
12421 }
b03c9f9f 12422 break;
a72dafaf 12423 }
48461135 12424 case BPF_JSGE:
a72dafaf
JW
12425 case BPF_JSGT:
12426 {
3f50f132
JF
12427 if (is_jmp32) {
12428 s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1;
12429 s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
a72dafaf 12430
3f50f132
JF
12431 false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
12432 true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
12433 } else {
12434 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
12435 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
12436
12437 false_reg->smax_value = min(false_reg->smax_value, false_smax);
12438 true_reg->smin_value = max(true_reg->smin_value, true_smin);
12439 }
48461135 12440 break;
a72dafaf 12441 }
b4e432f1 12442 case BPF_JLE:
a72dafaf
JW
12443 case BPF_JLT:
12444 {
3f50f132
JF
12445 if (is_jmp32) {
12446 u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1;
12447 u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
12448
12449 false_reg->u32_min_value = max(false_reg->u32_min_value,
12450 false_umin);
12451 true_reg->u32_max_value = min(true_reg->u32_max_value,
12452 true_umax);
12453 } else {
12454 u64 false_umin = opcode == BPF_JLT ? val : val + 1;
12455 u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
12456
12457 false_reg->umin_value = max(false_reg->umin_value, false_umin);
12458 true_reg->umax_value = min(true_reg->umax_value, true_umax);
12459 }
b4e432f1 12460 break;
a72dafaf 12461 }
b4e432f1 12462 case BPF_JSLE:
a72dafaf
JW
12463 case BPF_JSLT:
12464 {
3f50f132
JF
12465 if (is_jmp32) {
12466 s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1;
12467 s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
a72dafaf 12468
3f50f132
JF
12469 false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
12470 true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
12471 } else {
12472 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
12473 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
12474
12475 false_reg->smin_value = max(false_reg->smin_value, false_smin);
12476 true_reg->smax_value = min(true_reg->smax_value, true_smax);
12477 }
b4e432f1 12478 break;
a72dafaf 12479 }
48461135 12480 default:
0fc31b10 12481 return;
48461135
JB
12482 }
12483
3f50f132
JF
12484 if (is_jmp32) {
12485 false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
12486 tnum_subreg(false_32off));
12487 true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
12488 tnum_subreg(true_32off));
12489 __reg_combine_32_into_64(false_reg);
12490 __reg_combine_32_into_64(true_reg);
12491 } else {
12492 false_reg->var_off = false_64off;
12493 true_reg->var_off = true_64off;
12494 __reg_combine_64_into_32(false_reg);
12495 __reg_combine_64_into_32(true_reg);
12496 }
48461135
JB
12497}
12498
f1174f77
EC
12499/* Same as above, but for the case that dst_reg holds a constant and src_reg is
12500 * the variable reg.
48461135
JB
12501 */
12502static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
3f50f132
JF
12503 struct bpf_reg_state *false_reg,
12504 u64 val, u32 val32,
092ed096 12505 u8 opcode, bool is_jmp32)
48461135 12506{
6d94e741 12507 opcode = flip_opcode(opcode);
0fc31b10
JH
12508 /* This uses zero as "not present in table"; luckily the zero opcode,
12509 * BPF_JA, can't get here.
b03c9f9f 12510 */
0fc31b10 12511 if (opcode)
3f50f132 12512 reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
f1174f77
EC
12513}
12514
12515/* Regs are known to be equal, so intersect their min/max/var_off */
12516static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
12517 struct bpf_reg_state *dst_reg)
12518{
b03c9f9f
EC
12519 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
12520 dst_reg->umin_value);
12521 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
12522 dst_reg->umax_value);
12523 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
12524 dst_reg->smin_value);
12525 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
12526 dst_reg->smax_value);
f1174f77
EC
12527 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
12528 dst_reg->var_off);
3844d153
DB
12529 reg_bounds_sync(src_reg);
12530 reg_bounds_sync(dst_reg);
f1174f77
EC
12531}
12532
12533static void reg_combine_min_max(struct bpf_reg_state *true_src,
12534 struct bpf_reg_state *true_dst,
12535 struct bpf_reg_state *false_src,
12536 struct bpf_reg_state *false_dst,
12537 u8 opcode)
12538{
12539 switch (opcode) {
12540 case BPF_JEQ:
12541 __reg_combine_min_max(true_src, true_dst);
12542 break;
12543 case BPF_JNE:
12544 __reg_combine_min_max(false_src, false_dst);
b03c9f9f 12545 break;
4cabc5b1 12546 }
48461135
JB
12547}
12548
fd978bf7
JS
12549static void mark_ptr_or_null_reg(struct bpf_func_state *state,
12550 struct bpf_reg_state *reg, u32 id,
840b9615 12551 bool is_null)
57a09bf0 12552{
c25b2ae1 12553 if (type_may_be_null(reg->type) && reg->id == id &&
fca1aa75 12554 (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
df57f38a
KKD
12555 /* Old offset (both fixed and variable parts) should have been
12556 * known-zero, because we don't allow pointer arithmetic on
12557 * pointers that might be NULL. If we see this happening, don't
12558 * convert the register.
12559 *
12560 * But in some cases, some helpers that return local kptrs
12561 * advance offset for the returned pointer. In those cases, it
12562 * is fine to expect to see reg->off.
12563 */
12564 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
12565 return;
6a3cd331
DM
12566 if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
12567 WARN_ON_ONCE(reg->off))
e60b0d12 12568 return;
6a3cd331 12569
f1174f77
EC
12570 if (is_null) {
12571 reg->type = SCALAR_VALUE;
1b986589
MKL
12572 /* We don't need id and ref_obj_id from this point
12573 * onwards anymore, thus we should better reset it,
12574 * so that state pruning has chances to take effect.
12575 */
12576 reg->id = 0;
12577 reg->ref_obj_id = 0;
4ddb7416
DB
12578
12579 return;
12580 }
12581
12582 mark_ptr_not_null_reg(reg);
12583
12584 if (!reg_may_point_to_spin_lock(reg)) {
1b986589 12585 /* For not-NULL ptr, reg->ref_obj_id will be reset
b239da34 12586 * in release_reference().
1b986589
MKL
12587 *
12588 * reg->id is still used by spin_lock ptr. Other
12589 * than spin_lock ptr type, reg->id can be reset.
fd978bf7
JS
12590 */
12591 reg->id = 0;
56f668df 12592 }
57a09bf0
TG
12593 }
12594}
12595
12596/* The logic is similar to find_good_pkt_pointers(), both could eventually
12597 * be folded together at some point.
12598 */
840b9615
JS
12599static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
12600 bool is_null)
57a09bf0 12601{
f4d7e40a 12602 struct bpf_func_state *state = vstate->frame[vstate->curframe];
b239da34 12603 struct bpf_reg_state *regs = state->regs, *reg;
1b986589 12604 u32 ref_obj_id = regs[regno].ref_obj_id;
a08dd0da 12605 u32 id = regs[regno].id;
57a09bf0 12606
1b986589
MKL
12607 if (ref_obj_id && ref_obj_id == id && is_null)
12608 /* regs[regno] is in the " == NULL" branch.
12609 * No one could have freed the reference state before
12610 * doing the NULL check.
12611 */
12612 WARN_ON_ONCE(release_reference_state(state, id));
fd978bf7 12613
b239da34
KKD
12614 bpf_for_each_reg_in_vstate(vstate, state, reg, ({
12615 mark_ptr_or_null_reg(state, reg, id, is_null);
12616 }));
57a09bf0
TG
12617}
12618
5beca081
DB
12619static bool try_match_pkt_pointers(const struct bpf_insn *insn,
12620 struct bpf_reg_state *dst_reg,
12621 struct bpf_reg_state *src_reg,
12622 struct bpf_verifier_state *this_branch,
12623 struct bpf_verifier_state *other_branch)
12624{
12625 if (BPF_SRC(insn->code) != BPF_X)
12626 return false;
12627
092ed096
JW
12628 /* Pointers are always 64-bit. */
12629 if (BPF_CLASS(insn->code) == BPF_JMP32)
12630 return false;
12631
5beca081
DB
12632 switch (BPF_OP(insn->code)) {
12633 case BPF_JGT:
12634 if ((dst_reg->type == PTR_TO_PACKET &&
12635 src_reg->type == PTR_TO_PACKET_END) ||
12636 (dst_reg->type == PTR_TO_PACKET_META &&
12637 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
12638 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
12639 find_good_pkt_pointers(this_branch, dst_reg,
12640 dst_reg->type, false);
6d94e741 12641 mark_pkt_end(other_branch, insn->dst_reg, true);
5beca081
DB
12642 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
12643 src_reg->type == PTR_TO_PACKET) ||
12644 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
12645 src_reg->type == PTR_TO_PACKET_META)) {
12646 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
12647 find_good_pkt_pointers(other_branch, src_reg,
12648 src_reg->type, true);
6d94e741 12649 mark_pkt_end(this_branch, insn->src_reg, false);
5beca081
DB
12650 } else {
12651 return false;
12652 }
12653 break;
12654 case BPF_JLT:
12655 if ((dst_reg->type == PTR_TO_PACKET &&
12656 src_reg->type == PTR_TO_PACKET_END) ||
12657 (dst_reg->type == PTR_TO_PACKET_META &&
12658 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
12659 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
12660 find_good_pkt_pointers(other_branch, dst_reg,
12661 dst_reg->type, true);
6d94e741 12662 mark_pkt_end(this_branch, insn->dst_reg, false);
5beca081
DB
12663 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
12664 src_reg->type == PTR_TO_PACKET) ||
12665 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
12666 src_reg->type == PTR_TO_PACKET_META)) {
12667 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
12668 find_good_pkt_pointers(this_branch, src_reg,
12669 src_reg->type, false);
6d94e741 12670 mark_pkt_end(other_branch, insn->src_reg, true);
5beca081
DB
12671 } else {
12672 return false;
12673 }
12674 break;
12675 case BPF_JGE:
12676 if ((dst_reg->type == PTR_TO_PACKET &&
12677 src_reg->type == PTR_TO_PACKET_END) ||
12678 (dst_reg->type == PTR_TO_PACKET_META &&
12679 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
12680 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
12681 find_good_pkt_pointers(this_branch, dst_reg,
12682 dst_reg->type, true);
6d94e741 12683 mark_pkt_end(other_branch, insn->dst_reg, false);
5beca081
DB
12684 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
12685 src_reg->type == PTR_TO_PACKET) ||
12686 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
12687 src_reg->type == PTR_TO_PACKET_META)) {
12688 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
12689 find_good_pkt_pointers(other_branch, src_reg,
12690 src_reg->type, false);
6d94e741 12691 mark_pkt_end(this_branch, insn->src_reg, true);
5beca081
DB
12692 } else {
12693 return false;
12694 }
12695 break;
12696 case BPF_JLE:
12697 if ((dst_reg->type == PTR_TO_PACKET &&
12698 src_reg->type == PTR_TO_PACKET_END) ||
12699 (dst_reg->type == PTR_TO_PACKET_META &&
12700 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
12701 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
12702 find_good_pkt_pointers(other_branch, dst_reg,
12703 dst_reg->type, false);
6d94e741 12704 mark_pkt_end(this_branch, insn->dst_reg, true);
5beca081
DB
12705 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
12706 src_reg->type == PTR_TO_PACKET) ||
12707 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
12708 src_reg->type == PTR_TO_PACKET_META)) {
12709 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
12710 find_good_pkt_pointers(this_branch, src_reg,
12711 src_reg->type, true);
6d94e741 12712 mark_pkt_end(other_branch, insn->src_reg, false);
5beca081
DB
12713 } else {
12714 return false;
12715 }
12716 break;
12717 default:
12718 return false;
12719 }
12720
12721 return true;
12722}
12723
75748837
AS
12724static void find_equal_scalars(struct bpf_verifier_state *vstate,
12725 struct bpf_reg_state *known_reg)
12726{
12727 struct bpf_func_state *state;
12728 struct bpf_reg_state *reg;
75748837 12729
b239da34
KKD
12730 bpf_for_each_reg_in_vstate(vstate, state, reg, ({
12731 if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
71f656a5 12732 copy_register_state(reg, known_reg);
b239da34 12733 }));
75748837
AS
12734}
12735
58e2af8b 12736static int check_cond_jmp_op(struct bpf_verifier_env *env,
17a52670
AS
12737 struct bpf_insn *insn, int *insn_idx)
12738{
f4d7e40a
AS
12739 struct bpf_verifier_state *this_branch = env->cur_state;
12740 struct bpf_verifier_state *other_branch;
12741 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
fb8d251e 12742 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
befae758 12743 struct bpf_reg_state *eq_branch_regs;
17a52670 12744 u8 opcode = BPF_OP(insn->code);
092ed096 12745 bool is_jmp32;
fb8d251e 12746 int pred = -1;
17a52670
AS
12747 int err;
12748
092ed096
JW
12749 /* Only conditional jumps are expected to reach here. */
12750 if (opcode == BPF_JA || opcode > BPF_JSLE) {
12751 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
17a52670
AS
12752 return -EINVAL;
12753 }
12754
12755 if (BPF_SRC(insn->code) == BPF_X) {
12756 if (insn->imm != 0) {
092ed096 12757 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17a52670
AS
12758 return -EINVAL;
12759 }
12760
12761 /* check src1 operand */
dc503a8a 12762 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
12763 if (err)
12764 return err;
1be7f75d
AS
12765
12766 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 12767 verbose(env, "R%d pointer comparison prohibited\n",
1be7f75d
AS
12768 insn->src_reg);
12769 return -EACCES;
12770 }
fb8d251e 12771 src_reg = &regs[insn->src_reg];
17a52670
AS
12772 } else {
12773 if (insn->src_reg != BPF_REG_0) {
092ed096 12774 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17a52670
AS
12775 return -EINVAL;
12776 }
12777 }
12778
12779 /* check src2 operand */
dc503a8a 12780 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
12781 if (err)
12782 return err;
12783
1a0dc1ac 12784 dst_reg = &regs[insn->dst_reg];
092ed096 12785 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
1a0dc1ac 12786
3f50f132
JF
12787 if (BPF_SRC(insn->code) == BPF_K) {
12788 pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
12789 } else if (src_reg->type == SCALAR_VALUE &&
12790 is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
12791 pred = is_branch_taken(dst_reg,
12792 tnum_subreg(src_reg->var_off).value,
12793 opcode,
12794 is_jmp32);
12795 } else if (src_reg->type == SCALAR_VALUE &&
12796 !is_jmp32 && tnum_is_const(src_reg->var_off)) {
12797 pred = is_branch_taken(dst_reg,
12798 src_reg->var_off.value,
12799 opcode,
12800 is_jmp32);
6d94e741
AS
12801 } else if (reg_is_pkt_pointer_any(dst_reg) &&
12802 reg_is_pkt_pointer_any(src_reg) &&
12803 !is_jmp32) {
12804 pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
3f50f132
JF
12805 }
12806
b5dc0163 12807 if (pred >= 0) {
cac616db
JF
12808 /* If we get here with a dst_reg pointer type it is because
12809 * above is_branch_taken() special cased the 0 comparison.
12810 */
12811 if (!__is_pointer_value(false, dst_reg))
12812 err = mark_chain_precision(env, insn->dst_reg);
6d94e741
AS
12813 if (BPF_SRC(insn->code) == BPF_X && !err &&
12814 !__is_pointer_value(false, src_reg))
b5dc0163
AS
12815 err = mark_chain_precision(env, insn->src_reg);
12816 if (err)
12817 return err;
12818 }
9183671a 12819
fb8d251e 12820 if (pred == 1) {
9183671a
DB
12821 /* Only follow the goto, ignore fall-through. If needed, push
12822 * the fall-through branch for simulation under speculative
12823 * execution.
12824 */
12825 if (!env->bypass_spec_v1 &&
12826 !sanitize_speculative_path(env, insn, *insn_idx + 1,
12827 *insn_idx))
12828 return -EFAULT;
fb8d251e
AS
12829 *insn_idx += insn->off;
12830 return 0;
12831 } else if (pred == 0) {
9183671a
DB
12832 /* Only follow the fall-through branch, since that's where the
12833 * program will go. If needed, push the goto branch for
12834 * simulation under speculative execution.
fb8d251e 12835 */
9183671a
DB
12836 if (!env->bypass_spec_v1 &&
12837 !sanitize_speculative_path(env, insn,
12838 *insn_idx + insn->off + 1,
12839 *insn_idx))
12840 return -EFAULT;
fb8d251e 12841 return 0;
17a52670
AS
12842 }
12843
979d63d5
DB
12844 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
12845 false);
17a52670
AS
12846 if (!other_branch)
12847 return -EFAULT;
f4d7e40a 12848 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
17a52670 12849
48461135
JB
12850 /* detect if we are comparing against a constant value so we can adjust
12851 * our min/max values for our dst register.
f1174f77 12852 * this is only legit if both are scalars (or pointers to the same
befae758
EZ
12853 * object, I suppose, see the PTR_MAYBE_NULL related if block below),
12854 * because otherwise the different base pointers mean the offsets aren't
f1174f77 12855 * comparable.
48461135
JB
12856 */
12857 if (BPF_SRC(insn->code) == BPF_X) {
092ed096 12858 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
092ed096 12859
f1174f77 12860 if (dst_reg->type == SCALAR_VALUE &&
092ed096
JW
12861 src_reg->type == SCALAR_VALUE) {
12862 if (tnum_is_const(src_reg->var_off) ||
3f50f132
JF
12863 (is_jmp32 &&
12864 tnum_is_const(tnum_subreg(src_reg->var_off))))
f4d7e40a 12865 reg_set_min_max(&other_branch_regs[insn->dst_reg],
092ed096 12866 dst_reg,
3f50f132
JF
12867 src_reg->var_off.value,
12868 tnum_subreg(src_reg->var_off).value,
092ed096
JW
12869 opcode, is_jmp32);
12870 else if (tnum_is_const(dst_reg->var_off) ||
3f50f132
JF
12871 (is_jmp32 &&
12872 tnum_is_const(tnum_subreg(dst_reg->var_off))))
f4d7e40a 12873 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
092ed096 12874 src_reg,
3f50f132
JF
12875 dst_reg->var_off.value,
12876 tnum_subreg(dst_reg->var_off).value,
092ed096
JW
12877 opcode, is_jmp32);
12878 else if (!is_jmp32 &&
12879 (opcode == BPF_JEQ || opcode == BPF_JNE))
f1174f77 12880 /* Comparing for equality, we can combine knowledge */
f4d7e40a
AS
12881 reg_combine_min_max(&other_branch_regs[insn->src_reg],
12882 &other_branch_regs[insn->dst_reg],
092ed096 12883 src_reg, dst_reg, opcode);
e688c3db
AS
12884 if (src_reg->id &&
12885 !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
75748837
AS
12886 find_equal_scalars(this_branch, src_reg);
12887 find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
12888 }
12889
f1174f77
EC
12890 }
12891 } else if (dst_reg->type == SCALAR_VALUE) {
f4d7e40a 12892 reg_set_min_max(&other_branch_regs[insn->dst_reg],
3f50f132
JF
12893 dst_reg, insn->imm, (u32)insn->imm,
12894 opcode, is_jmp32);
48461135
JB
12895 }
12896
e688c3db
AS
12897 if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
12898 !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
75748837
AS
12899 find_equal_scalars(this_branch, dst_reg);
12900 find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
12901 }
12902
befae758
EZ
12903 /* if one pointer register is compared to another pointer
12904 * register check if PTR_MAYBE_NULL could be lifted.
12905 * E.g. register A - maybe null
12906 * register B - not null
12907 * for JNE A, B, ... - A is not null in the false branch;
12908 * for JEQ A, B, ... - A is not null in the true branch.
8374bfd5
HS
12909 *
12910 * Since PTR_TO_BTF_ID points to a kernel struct that does
12911 * not need to be null checked by the BPF program, i.e.,
12912 * could be null even without PTR_MAYBE_NULL marking, so
12913 * only propagate nullness when neither reg is that type.
befae758
EZ
12914 */
12915 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
12916 __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
8374bfd5
HS
12917 type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
12918 base_type(src_reg->type) != PTR_TO_BTF_ID &&
12919 base_type(dst_reg->type) != PTR_TO_BTF_ID) {
befae758
EZ
12920 eq_branch_regs = NULL;
12921 switch (opcode) {
12922 case BPF_JEQ:
12923 eq_branch_regs = other_branch_regs;
12924 break;
12925 case BPF_JNE:
12926 eq_branch_regs = regs;
12927 break;
12928 default:
12929 /* do nothing */
12930 break;
12931 }
12932 if (eq_branch_regs) {
12933 if (type_may_be_null(src_reg->type))
12934 mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
12935 else
12936 mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
12937 }
12938 }
12939
092ed096
JW
12940 /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
12941 * NOTE: these optimizations below are related with pointer comparison
12942 * which will never be JMP32.
12943 */
12944 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
1a0dc1ac 12945 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
c25b2ae1 12946 type_may_be_null(dst_reg->type)) {
840b9615 12947 /* Mark all identical registers in each branch as either
57a09bf0
TG
12948 * safe or unknown depending R == 0 or R != 0 conditional.
12949 */
840b9615
JS
12950 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
12951 opcode == BPF_JNE);
12952 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
12953 opcode == BPF_JEQ);
5beca081
DB
12954 } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
12955 this_branch, other_branch) &&
12956 is_pointer_value(env, insn->dst_reg)) {
61bd5218
JK
12957 verbose(env, "R%d pointer comparison prohibited\n",
12958 insn->dst_reg);
1be7f75d 12959 return -EACCES;
17a52670 12960 }
06ee7115 12961 if (env->log.level & BPF_LOG_LEVEL)
2e576648 12962 print_insn_state(env, this_branch->frame[this_branch->curframe]);
17a52670
AS
12963 return 0;
12964}
12965
17a52670 12966/* verify BPF_LD_IMM64 instruction */
58e2af8b 12967static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 12968{
d8eca5bb 12969 struct bpf_insn_aux_data *aux = cur_aux(env);
638f5b90 12970 struct bpf_reg_state *regs = cur_regs(env);
4976b718 12971 struct bpf_reg_state *dst_reg;
d8eca5bb 12972 struct bpf_map *map;
17a52670
AS
12973 int err;
12974
12975 if (BPF_SIZE(insn->code) != BPF_DW) {
61bd5218 12976 verbose(env, "invalid BPF_LD_IMM insn\n");
17a52670
AS
12977 return -EINVAL;
12978 }
12979 if (insn->off != 0) {
61bd5218 12980 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
17a52670
AS
12981 return -EINVAL;
12982 }
12983
dc503a8a 12984 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
12985 if (err)
12986 return err;
12987
4976b718 12988 dst_reg = &regs[insn->dst_reg];
6b173873 12989 if (insn->src_reg == 0) {
6b173873
JK
12990 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
12991
4976b718 12992 dst_reg->type = SCALAR_VALUE;
b03c9f9f 12993 __mark_reg_known(&regs[insn->dst_reg], imm);
17a52670 12994 return 0;
6b173873 12995 }
17a52670 12996
d400a6cf
DB
12997 /* All special src_reg cases are listed below. From this point onwards
12998 * we either succeed and assign a corresponding dst_reg->type after
12999 * zeroing the offset, or fail and reject the program.
13000 */
13001 mark_reg_known_zero(env, regs, insn->dst_reg);
4976b718 13002
d400a6cf 13003 if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
4976b718 13004 dst_reg->type = aux->btf_var.reg_type;
34d3a78c 13005 switch (base_type(dst_reg->type)) {
4976b718
HL
13006 case PTR_TO_MEM:
13007 dst_reg->mem_size = aux->btf_var.mem_size;
13008 break;
13009 case PTR_TO_BTF_ID:
22dc4a0f 13010 dst_reg->btf = aux->btf_var.btf;
4976b718
HL
13011 dst_reg->btf_id = aux->btf_var.btf_id;
13012 break;
13013 default:
13014 verbose(env, "bpf verifier is misconfigured\n");
13015 return -EFAULT;
13016 }
13017 return 0;
13018 }
13019
69c087ba
YS
13020 if (insn->src_reg == BPF_PSEUDO_FUNC) {
13021 struct bpf_prog_aux *aux = env->prog->aux;
3990ed4c
MKL
13022 u32 subprogno = find_subprog(env,
13023 env->insn_idx + insn->imm + 1);
69c087ba
YS
13024
13025 if (!aux->func_info) {
13026 verbose(env, "missing btf func_info\n");
13027 return -EINVAL;
13028 }
13029 if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
13030 verbose(env, "callback function not static\n");
13031 return -EINVAL;
13032 }
13033
13034 dst_reg->type = PTR_TO_FUNC;
13035 dst_reg->subprogno = subprogno;
13036 return 0;
13037 }
13038
d8eca5bb 13039 map = env->used_maps[aux->map_index];
4976b718 13040 dst_reg->map_ptr = map;
d8eca5bb 13041
387544bf
AS
13042 if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
13043 insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
4976b718
HL
13044 dst_reg->type = PTR_TO_MAP_VALUE;
13045 dst_reg->off = aux->map_off;
d0d78c1d
KKD
13046 WARN_ON_ONCE(map->max_entries != 1);
13047 /* We want reg->id to be same (0) as map_value is not distinct */
387544bf
AS
13048 } else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
13049 insn->src_reg == BPF_PSEUDO_MAP_IDX) {
4976b718 13050 dst_reg->type = CONST_PTR_TO_MAP;
d8eca5bb
DB
13051 } else {
13052 verbose(env, "bpf verifier is misconfigured\n");
13053 return -EINVAL;
13054 }
17a52670 13055
17a52670
AS
13056 return 0;
13057}
13058
96be4325
DB
13059static bool may_access_skb(enum bpf_prog_type type)
13060{
13061 switch (type) {
13062 case BPF_PROG_TYPE_SOCKET_FILTER:
13063 case BPF_PROG_TYPE_SCHED_CLS:
94caee8c 13064 case BPF_PROG_TYPE_SCHED_ACT:
96be4325
DB
13065 return true;
13066 default:
13067 return false;
13068 }
13069}
13070
ddd872bc
AS
13071/* verify safety of LD_ABS|LD_IND instructions:
13072 * - they can only appear in the programs where ctx == skb
13073 * - since they are wrappers of function calls, they scratch R1-R5 registers,
13074 * preserve R6-R9, and store return value into R0
13075 *
13076 * Implicit input:
13077 * ctx == skb == R6 == CTX
13078 *
13079 * Explicit input:
13080 * SRC == any register
13081 * IMM == 32-bit immediate
13082 *
13083 * Output:
13084 * R0 - 8/16/32-bit skb data converted to cpu endianness
13085 */
58e2af8b 13086static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
ddd872bc 13087{
638f5b90 13088 struct bpf_reg_state *regs = cur_regs(env);
6d4f151a 13089 static const int ctx_reg = BPF_REG_6;
ddd872bc 13090 u8 mode = BPF_MODE(insn->code);
ddd872bc
AS
13091 int i, err;
13092
7e40781c 13093 if (!may_access_skb(resolve_prog_type(env->prog))) {
61bd5218 13094 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
ddd872bc
AS
13095 return -EINVAL;
13096 }
13097
e0cea7ce
DB
13098 if (!env->ops->gen_ld_abs) {
13099 verbose(env, "bpf verifier is misconfigured\n");
13100 return -EINVAL;
13101 }
13102
ddd872bc 13103 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
d82bccc6 13104 BPF_SIZE(insn->code) == BPF_DW ||
ddd872bc 13105 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
61bd5218 13106 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
ddd872bc
AS
13107 return -EINVAL;
13108 }
13109
13110 /* check whether implicit source operand (register R6) is readable */
6d4f151a 13111 err = check_reg_arg(env, ctx_reg, SRC_OP);
ddd872bc
AS
13112 if (err)
13113 return err;
13114
fd978bf7
JS
13115 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
13116 * gen_ld_abs() may terminate the program at runtime, leading to
13117 * reference leak.
13118 */
13119 err = check_reference_leak(env);
13120 if (err) {
13121 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
13122 return err;
13123 }
13124
d0d78c1d 13125 if (env->cur_state->active_lock.ptr) {
d83525ca
AS
13126 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
13127 return -EINVAL;
13128 }
13129
9bb00b28
YS
13130 if (env->cur_state->active_rcu_lock) {
13131 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
13132 return -EINVAL;
13133 }
13134
6d4f151a 13135 if (regs[ctx_reg].type != PTR_TO_CTX) {
61bd5218
JK
13136 verbose(env,
13137 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
ddd872bc
AS
13138 return -EINVAL;
13139 }
13140
13141 if (mode == BPF_IND) {
13142 /* check explicit source operand */
dc503a8a 13143 err = check_reg_arg(env, insn->src_reg, SRC_OP);
ddd872bc
AS
13144 if (err)
13145 return err;
13146 }
13147
be80a1d3 13148 err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
6d4f151a
DB
13149 if (err < 0)
13150 return err;
13151
ddd872bc 13152 /* reset caller saved regs to unreadable */
dc503a8a 13153 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 13154 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
13155 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
13156 }
ddd872bc
AS
13157
13158 /* mark destination R0 register as readable, since it contains
dc503a8a
EC
13159 * the value fetched from the packet.
13160 * Already marked as written above.
ddd872bc 13161 */
61bd5218 13162 mark_reg_unknown(env, regs, BPF_REG_0);
5327ed3d
JW
13163 /* ld_abs load up to 32-bit skb data. */
13164 regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
ddd872bc
AS
13165 return 0;
13166}
13167
390ee7e2
AS
13168static int check_return_code(struct bpf_verifier_env *env)
13169{
5cf1e914 13170 struct tnum enforce_attach_type_range = tnum_unknown;
27ae7997 13171 const struct bpf_prog *prog = env->prog;
390ee7e2
AS
13172 struct bpf_reg_state *reg;
13173 struct tnum range = tnum_range(0, 1);
7e40781c 13174 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
27ae7997 13175 int err;
bfc6bb74
AS
13176 struct bpf_func_state *frame = env->cur_state->frame[0];
13177 const bool is_subprog = frame->subprogno;
27ae7997 13178
9e4e01df 13179 /* LSM and struct_ops func-ptr's return type could be "void" */
d1a6edec
SF
13180 if (!is_subprog) {
13181 switch (prog_type) {
13182 case BPF_PROG_TYPE_LSM:
13183 if (prog->expected_attach_type == BPF_LSM_CGROUP)
13184 /* See below, can be 0 or 0-1 depending on hook. */
13185 break;
13186 fallthrough;
13187 case BPF_PROG_TYPE_STRUCT_OPS:
13188 if (!prog->aux->attach_func_proto->type)
13189 return 0;
13190 break;
13191 default:
13192 break;
13193 }
13194 }
27ae7997 13195
8fb33b60 13196 /* eBPF calling convention is such that R0 is used
27ae7997
MKL
13197 * to return the value from eBPF program.
13198 * Make sure that it's readable at this time
13199 * of bpf_exit, which means that program wrote
13200 * something into it earlier
13201 */
13202 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
13203 if (err)
13204 return err;
13205
13206 if (is_pointer_value(env, BPF_REG_0)) {
13207 verbose(env, "R0 leaks addr as return value\n");
13208 return -EACCES;
13209 }
390ee7e2 13210
f782e2c3 13211 reg = cur_regs(env) + BPF_REG_0;
bfc6bb74
AS
13212
13213 if (frame->in_async_callback_fn) {
13214 /* enforce return zero from async callbacks like timer */
13215 if (reg->type != SCALAR_VALUE) {
13216 verbose(env, "In async callback the register R0 is not a known value (%s)\n",
c25b2ae1 13217 reg_type_str(env, reg->type));
bfc6bb74
AS
13218 return -EINVAL;
13219 }
13220
13221 if (!tnum_in(tnum_const(0), reg->var_off)) {
13222 verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
13223 return -EINVAL;
13224 }
13225 return 0;
13226 }
13227
f782e2c3
DB
13228 if (is_subprog) {
13229 if (reg->type != SCALAR_VALUE) {
13230 verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
c25b2ae1 13231 reg_type_str(env, reg->type));
f782e2c3
DB
13232 return -EINVAL;
13233 }
13234 return 0;
13235 }
13236
7e40781c 13237 switch (prog_type) {
983695fa
DB
13238 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
13239 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
1b66d253
DB
13240 env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
13241 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
13242 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
13243 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
13244 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
983695fa 13245 range = tnum_range(1, 1);
77241217
SF
13246 if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
13247 env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
13248 range = tnum_range(0, 3);
ed4ed404 13249 break;
390ee7e2 13250 case BPF_PROG_TYPE_CGROUP_SKB:
5cf1e914 13251 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
13252 range = tnum_range(0, 3);
13253 enforce_attach_type_range = tnum_range(2, 3);
13254 }
ed4ed404 13255 break;
390ee7e2
AS
13256 case BPF_PROG_TYPE_CGROUP_SOCK:
13257 case BPF_PROG_TYPE_SOCK_OPS:
ebc614f6 13258 case BPF_PROG_TYPE_CGROUP_DEVICE:
7b146ceb 13259 case BPF_PROG_TYPE_CGROUP_SYSCTL:
0d01da6a 13260 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
390ee7e2 13261 break;
15ab09bd
AS
13262 case BPF_PROG_TYPE_RAW_TRACEPOINT:
13263 if (!env->prog->aux->attach_btf_id)
13264 return 0;
13265 range = tnum_const(0);
13266 break;
15d83c4d 13267 case BPF_PROG_TYPE_TRACING:
e92888c7
YS
13268 switch (env->prog->expected_attach_type) {
13269 case BPF_TRACE_FENTRY:
13270 case BPF_TRACE_FEXIT:
13271 range = tnum_const(0);
13272 break;
13273 case BPF_TRACE_RAW_TP:
13274 case BPF_MODIFY_RETURN:
15d83c4d 13275 return 0;
2ec0616e
DB
13276 case BPF_TRACE_ITER:
13277 break;
e92888c7
YS
13278 default:
13279 return -ENOTSUPP;
13280 }
15d83c4d 13281 break;
e9ddbb77
JS
13282 case BPF_PROG_TYPE_SK_LOOKUP:
13283 range = tnum_range(SK_DROP, SK_PASS);
13284 break;
69fd337a
SF
13285
13286 case BPF_PROG_TYPE_LSM:
13287 if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
13288 /* Regular BPF_PROG_TYPE_LSM programs can return
13289 * any value.
13290 */
13291 return 0;
13292 }
13293 if (!env->prog->aux->attach_func_proto->type) {
13294 /* Make sure programs that attach to void
13295 * hooks don't try to modify return value.
13296 */
13297 range = tnum_range(1, 1);
13298 }
13299 break;
13300
e92888c7
YS
13301 case BPF_PROG_TYPE_EXT:
13302 /* freplace program can return anything as its return value
13303 * depends on the to-be-replaced kernel func or bpf program.
13304 */
390ee7e2
AS
13305 default:
13306 return 0;
13307 }
13308
390ee7e2 13309 if (reg->type != SCALAR_VALUE) {
61bd5218 13310 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
c25b2ae1 13311 reg_type_str(env, reg->type));
390ee7e2
AS
13312 return -EINVAL;
13313 }
13314
13315 if (!tnum_in(range, reg->var_off)) {
bc2591d6 13316 verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
69fd337a 13317 if (prog->expected_attach_type == BPF_LSM_CGROUP &&
d1a6edec 13318 prog_type == BPF_PROG_TYPE_LSM &&
69fd337a
SF
13319 !prog->aux->attach_func_proto->type)
13320 verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
390ee7e2
AS
13321 return -EINVAL;
13322 }
5cf1e914 13323
13324 if (!tnum_is_unknown(enforce_attach_type_range) &&
13325 tnum_in(enforce_attach_type_range, reg->var_off))
13326 env->prog->enforce_expected_attach_type = 1;
390ee7e2
AS
13327 return 0;
13328}
13329
475fb78f
AS
13330/* non-recursive DFS pseudo code
13331 * 1 procedure DFS-iterative(G,v):
13332 * 2 label v as discovered
13333 * 3 let S be a stack
13334 * 4 S.push(v)
13335 * 5 while S is not empty
b6d20799 13336 * 6 t <- S.peek()
475fb78f
AS
13337 * 7 if t is what we're looking for:
13338 * 8 return t
13339 * 9 for all edges e in G.adjacentEdges(t) do
13340 * 10 if edge e is already labelled
13341 * 11 continue with the next edge
13342 * 12 w <- G.adjacentVertex(t,e)
13343 * 13 if vertex w is not discovered and not explored
13344 * 14 label e as tree-edge
13345 * 15 label w as discovered
13346 * 16 S.push(w)
13347 * 17 continue at 5
13348 * 18 else if vertex w is discovered
13349 * 19 label e as back-edge
13350 * 20 else
13351 * 21 // vertex w is explored
13352 * 22 label e as forward- or cross-edge
13353 * 23 label t as explored
13354 * 24 S.pop()
13355 *
13356 * convention:
13357 * 0x10 - discovered
13358 * 0x11 - discovered and fall-through edge labelled
13359 * 0x12 - discovered and fall-through and branch edges labelled
13360 * 0x20 - explored
13361 */
13362
13363enum {
13364 DISCOVERED = 0x10,
13365 EXPLORED = 0x20,
13366 FALLTHROUGH = 1,
13367 BRANCH = 2,
13368};
13369
dc2a4ebc
AS
13370static u32 state_htab_size(struct bpf_verifier_env *env)
13371{
13372 return env->prog->len;
13373}
13374
5d839021
AS
13375static struct bpf_verifier_state_list **explored_state(
13376 struct bpf_verifier_env *env,
13377 int idx)
13378{
dc2a4ebc
AS
13379 struct bpf_verifier_state *cur = env->cur_state;
13380 struct bpf_func_state *state = cur->frame[cur->curframe];
13381
13382 return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
5d839021
AS
13383}
13384
bffdeaa8 13385static void mark_prune_point(struct bpf_verifier_env *env, int idx)
5d839021 13386{
a8f500af 13387 env->insn_aux_data[idx].prune_point = true;
5d839021 13388}
f1bca824 13389
bffdeaa8
AN
13390static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
13391{
13392 return env->insn_aux_data[insn_idx].prune_point;
13393}
13394
59e2e27d
WAF
13395enum {
13396 DONE_EXPLORING = 0,
13397 KEEP_EXPLORING = 1,
13398};
13399
475fb78f
AS
13400/* t, w, e - match pseudo-code above:
13401 * t - index of current instruction
13402 * w - next instruction
13403 * e - edge
13404 */
2589726d
AS
13405static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
13406 bool loop_ok)
475fb78f 13407{
7df737e9
AS
13408 int *insn_stack = env->cfg.insn_stack;
13409 int *insn_state = env->cfg.insn_state;
13410
475fb78f 13411 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
59e2e27d 13412 return DONE_EXPLORING;
475fb78f
AS
13413
13414 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
59e2e27d 13415 return DONE_EXPLORING;
475fb78f
AS
13416
13417 if (w < 0 || w >= env->prog->len) {
d9762e84 13418 verbose_linfo(env, t, "%d: ", t);
61bd5218 13419 verbose(env, "jump out of range from insn %d to %d\n", t, w);
475fb78f
AS
13420 return -EINVAL;
13421 }
13422
bffdeaa8 13423 if (e == BRANCH) {
f1bca824 13424 /* mark branch target for state pruning */
bffdeaa8
AN
13425 mark_prune_point(env, w);
13426 mark_jmp_point(env, w);
13427 }
f1bca824 13428
475fb78f
AS
13429 if (insn_state[w] == 0) {
13430 /* tree-edge */
13431 insn_state[t] = DISCOVERED | e;
13432 insn_state[w] = DISCOVERED;
7df737e9 13433 if (env->cfg.cur_stack >= env->prog->len)
475fb78f 13434 return -E2BIG;
7df737e9 13435 insn_stack[env->cfg.cur_stack++] = w;
59e2e27d 13436 return KEEP_EXPLORING;
475fb78f 13437 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
2c78ee89 13438 if (loop_ok && env->bpf_capable)
59e2e27d 13439 return DONE_EXPLORING;
d9762e84
MKL
13440 verbose_linfo(env, t, "%d: ", t);
13441 verbose_linfo(env, w, "%d: ", w);
61bd5218 13442 verbose(env, "back-edge from insn %d to %d\n", t, w);
475fb78f
AS
13443 return -EINVAL;
13444 } else if (insn_state[w] == EXPLORED) {
13445 /* forward- or cross-edge */
13446 insn_state[t] = DISCOVERED | e;
13447 } else {
61bd5218 13448 verbose(env, "insn state internal bug\n");
475fb78f
AS
13449 return -EFAULT;
13450 }
59e2e27d
WAF
13451 return DONE_EXPLORING;
13452}
13453
dcb2288b 13454static int visit_func_call_insn(int t, struct bpf_insn *insns,
efdb22de
YS
13455 struct bpf_verifier_env *env,
13456 bool visit_callee)
13457{
13458 int ret;
13459
13460 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
13461 if (ret)
13462 return ret;
13463
618945fb
AN
13464 mark_prune_point(env, t + 1);
13465 /* when we exit from subprog, we need to record non-linear history */
13466 mark_jmp_point(env, t + 1);
13467
efdb22de 13468 if (visit_callee) {
bffdeaa8 13469 mark_prune_point(env, t);
86fc6ee6
AS
13470 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
13471 /* It's ok to allow recursion from CFG point of
13472 * view. __check_func_call() will do the actual
13473 * check.
13474 */
13475 bpf_pseudo_func(insns + t));
efdb22de
YS
13476 }
13477 return ret;
13478}
13479
59e2e27d
WAF
13480/* Visits the instruction at index t and returns one of the following:
13481 * < 0 - an error occurred
13482 * DONE_EXPLORING - the instruction was fully explored
13483 * KEEP_EXPLORING - there is still work to be done before it is fully explored
13484 */
dcb2288b 13485static int visit_insn(int t, struct bpf_verifier_env *env)
59e2e27d
WAF
13486{
13487 struct bpf_insn *insns = env->prog->insnsi;
13488 int ret;
13489
69c087ba 13490 if (bpf_pseudo_func(insns + t))
dcb2288b 13491 return visit_func_call_insn(t, insns, env, true);
69c087ba 13492
59e2e27d
WAF
13493 /* All non-branch instructions have a single fall-through edge. */
13494 if (BPF_CLASS(insns[t].code) != BPF_JMP &&
13495 BPF_CLASS(insns[t].code) != BPF_JMP32)
13496 return push_insn(t, t + 1, FALLTHROUGH, env, false);
13497
13498 switch (BPF_OP(insns[t].code)) {
13499 case BPF_EXIT:
13500 return DONE_EXPLORING;
13501
13502 case BPF_CALL:
618945fb
AN
13503 if (insns[t].imm == BPF_FUNC_timer_set_callback)
13504 /* Mark this call insn as a prune point to trigger
13505 * is_state_visited() check before call itself is
13506 * processed by __check_func_call(). Otherwise new
13507 * async state will be pushed for further exploration.
bfc6bb74 13508 */
bffdeaa8 13509 mark_prune_point(env, t);
dcb2288b 13510 return visit_func_call_insn(t, insns, env,
efdb22de 13511 insns[t].src_reg == BPF_PSEUDO_CALL);
59e2e27d
WAF
13512
13513 case BPF_JA:
13514 if (BPF_SRC(insns[t].code) != BPF_K)
13515 return -EINVAL;
13516
13517 /* unconditional jump with single edge */
13518 ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
13519 true);
13520 if (ret)
13521 return ret;
13522
bffdeaa8
AN
13523 mark_prune_point(env, t + insns[t].off + 1);
13524 mark_jmp_point(env, t + insns[t].off + 1);
59e2e27d
WAF
13525
13526 return ret;
13527
13528 default:
13529 /* conditional jump with two edges */
bffdeaa8 13530 mark_prune_point(env, t);
618945fb 13531
59e2e27d
WAF
13532 ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
13533 if (ret)
13534 return ret;
13535
13536 return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
13537 }
475fb78f
AS
13538}
13539
13540/* non-recursive depth-first-search to detect loops in BPF program
13541 * loop == back-edge in directed graph
13542 */
58e2af8b 13543static int check_cfg(struct bpf_verifier_env *env)
475fb78f 13544{
475fb78f 13545 int insn_cnt = env->prog->len;
7df737e9 13546 int *insn_stack, *insn_state;
475fb78f 13547 int ret = 0;
59e2e27d 13548 int i;
475fb78f 13549
7df737e9 13550 insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
475fb78f
AS
13551 if (!insn_state)
13552 return -ENOMEM;
13553
7df737e9 13554 insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
475fb78f 13555 if (!insn_stack) {
71dde681 13556 kvfree(insn_state);
475fb78f
AS
13557 return -ENOMEM;
13558 }
13559
13560 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
13561 insn_stack[0] = 0; /* 0 is the first instruction */
7df737e9 13562 env->cfg.cur_stack = 1;
475fb78f 13563
59e2e27d
WAF
13564 while (env->cfg.cur_stack > 0) {
13565 int t = insn_stack[env->cfg.cur_stack - 1];
475fb78f 13566
dcb2288b 13567 ret = visit_insn(t, env);
59e2e27d
WAF
13568 switch (ret) {
13569 case DONE_EXPLORING:
13570 insn_state[t] = EXPLORED;
13571 env->cfg.cur_stack--;
13572 break;
13573 case KEEP_EXPLORING:
13574 break;
13575 default:
13576 if (ret > 0) {
13577 verbose(env, "visit_insn internal bug\n");
13578 ret = -EFAULT;
475fb78f 13579 }
475fb78f 13580 goto err_free;
59e2e27d 13581 }
475fb78f
AS
13582 }
13583
59e2e27d 13584 if (env->cfg.cur_stack < 0) {
61bd5218 13585 verbose(env, "pop stack internal bug\n");
475fb78f
AS
13586 ret = -EFAULT;
13587 goto err_free;
13588 }
475fb78f 13589
475fb78f
AS
13590 for (i = 0; i < insn_cnt; i++) {
13591 if (insn_state[i] != EXPLORED) {
61bd5218 13592 verbose(env, "unreachable insn %d\n", i);
475fb78f
AS
13593 ret = -EINVAL;
13594 goto err_free;
13595 }
13596 }
13597 ret = 0; /* cfg looks good */
13598
13599err_free:
71dde681
AS
13600 kvfree(insn_state);
13601 kvfree(insn_stack);
7df737e9 13602 env->cfg.insn_state = env->cfg.insn_stack = NULL;
475fb78f
AS
13603 return ret;
13604}
13605
09b28d76
AS
13606static int check_abnormal_return(struct bpf_verifier_env *env)
13607{
13608 int i;
13609
13610 for (i = 1; i < env->subprog_cnt; i++) {
13611 if (env->subprog_info[i].has_ld_abs) {
13612 verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
13613 return -EINVAL;
13614 }
13615 if (env->subprog_info[i].has_tail_call) {
13616 verbose(env, "tail_call is not allowed in subprogs without BTF\n");
13617 return -EINVAL;
13618 }
13619 }
13620 return 0;
13621}
13622
838e9690
YS
13623/* The minimum supported BTF func info size */
13624#define MIN_BPF_FUNCINFO_SIZE 8
13625#define MAX_FUNCINFO_REC_SIZE 252
13626
c454a46b
MKL
13627static int check_btf_func(struct bpf_verifier_env *env,
13628 const union bpf_attr *attr,
af2ac3e1 13629 bpfptr_t uattr)
838e9690 13630{
09b28d76 13631 const struct btf_type *type, *func_proto, *ret_type;
d0b2818e 13632 u32 i, nfuncs, urec_size, min_size;
838e9690 13633 u32 krec_size = sizeof(struct bpf_func_info);
c454a46b 13634 struct bpf_func_info *krecord;
8c1b6e69 13635 struct bpf_func_info_aux *info_aux = NULL;
c454a46b
MKL
13636 struct bpf_prog *prog;
13637 const struct btf *btf;
af2ac3e1 13638 bpfptr_t urecord;
d0b2818e 13639 u32 prev_offset = 0;
09b28d76 13640 bool scalar_return;
e7ed83d6 13641 int ret = -ENOMEM;
838e9690
YS
13642
13643 nfuncs = attr->func_info_cnt;
09b28d76
AS
13644 if (!nfuncs) {
13645 if (check_abnormal_return(env))
13646 return -EINVAL;
838e9690 13647 return 0;
09b28d76 13648 }
838e9690
YS
13649
13650 if (nfuncs != env->subprog_cnt) {
13651 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
13652 return -EINVAL;
13653 }
13654
13655 urec_size = attr->func_info_rec_size;
13656 if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
13657 urec_size > MAX_FUNCINFO_REC_SIZE ||
13658 urec_size % sizeof(u32)) {
13659 verbose(env, "invalid func info rec size %u\n", urec_size);
13660 return -EINVAL;
13661 }
13662
c454a46b
MKL
13663 prog = env->prog;
13664 btf = prog->aux->btf;
838e9690 13665
af2ac3e1 13666 urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
838e9690
YS
13667 min_size = min_t(u32, krec_size, urec_size);
13668
ba64e7d8 13669 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
c454a46b
MKL
13670 if (!krecord)
13671 return -ENOMEM;
8c1b6e69
AS
13672 info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
13673 if (!info_aux)
13674 goto err_free;
ba64e7d8 13675
838e9690
YS
13676 for (i = 0; i < nfuncs; i++) {
13677 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
13678 if (ret) {
13679 if (ret == -E2BIG) {
13680 verbose(env, "nonzero tailing record in func info");
13681 /* set the size kernel expects so loader can zero
13682 * out the rest of the record.
13683 */
af2ac3e1
AS
13684 if (copy_to_bpfptr_offset(uattr,
13685 offsetof(union bpf_attr, func_info_rec_size),
13686 &min_size, sizeof(min_size)))
838e9690
YS
13687 ret = -EFAULT;
13688 }
c454a46b 13689 goto err_free;
838e9690
YS
13690 }
13691
af2ac3e1 13692 if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
838e9690 13693 ret = -EFAULT;
c454a46b 13694 goto err_free;
838e9690
YS
13695 }
13696
d30d42e0 13697 /* check insn_off */
09b28d76 13698 ret = -EINVAL;
838e9690 13699 if (i == 0) {
d30d42e0 13700 if (krecord[i].insn_off) {
838e9690 13701 verbose(env,
d30d42e0
MKL
13702 "nonzero insn_off %u for the first func info record",
13703 krecord[i].insn_off);
c454a46b 13704 goto err_free;
838e9690 13705 }
d30d42e0 13706 } else if (krecord[i].insn_off <= prev_offset) {
838e9690
YS
13707 verbose(env,
13708 "same or smaller insn offset (%u) than previous func info record (%u)",
d30d42e0 13709 krecord[i].insn_off, prev_offset);
c454a46b 13710 goto err_free;
838e9690
YS
13711 }
13712
d30d42e0 13713 if (env->subprog_info[i].start != krecord[i].insn_off) {
838e9690 13714 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
c454a46b 13715 goto err_free;
838e9690
YS
13716 }
13717
13718 /* check type_id */
ba64e7d8 13719 type = btf_type_by_id(btf, krecord[i].type_id);
51c39bb1 13720 if (!type || !btf_type_is_func(type)) {
838e9690 13721 verbose(env, "invalid type id %d in func info",
ba64e7d8 13722 krecord[i].type_id);
c454a46b 13723 goto err_free;
838e9690 13724 }
51c39bb1 13725 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
09b28d76
AS
13726
13727 func_proto = btf_type_by_id(btf, type->type);
13728 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
13729 /* btf_func_check() already verified it during BTF load */
13730 goto err_free;
13731 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
13732 scalar_return =
6089fb32 13733 btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
09b28d76
AS
13734 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
13735 verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
13736 goto err_free;
13737 }
13738 if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
13739 verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
13740 goto err_free;
13741 }
13742
d30d42e0 13743 prev_offset = krecord[i].insn_off;
af2ac3e1 13744 bpfptr_add(&urecord, urec_size);
838e9690
YS
13745 }
13746
ba64e7d8
YS
13747 prog->aux->func_info = krecord;
13748 prog->aux->func_info_cnt = nfuncs;
8c1b6e69 13749 prog->aux->func_info_aux = info_aux;
838e9690
YS
13750 return 0;
13751
c454a46b 13752err_free:
ba64e7d8 13753 kvfree(krecord);
8c1b6e69 13754 kfree(info_aux);
838e9690
YS
13755 return ret;
13756}
13757
ba64e7d8
YS
13758static void adjust_btf_func(struct bpf_verifier_env *env)
13759{
8c1b6e69 13760 struct bpf_prog_aux *aux = env->prog->aux;
ba64e7d8
YS
13761 int i;
13762
8c1b6e69 13763 if (!aux->func_info)
ba64e7d8
YS
13764 return;
13765
13766 for (i = 0; i < env->subprog_cnt; i++)
8c1b6e69 13767 aux->func_info[i].insn_off = env->subprog_info[i].start;
ba64e7d8
YS
13768}
13769
1b773d00 13770#define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col)
c454a46b
MKL
13771#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
13772
13773static int check_btf_line(struct bpf_verifier_env *env,
13774 const union bpf_attr *attr,
af2ac3e1 13775 bpfptr_t uattr)
c454a46b
MKL
13776{
13777 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
13778 struct bpf_subprog_info *sub;
13779 struct bpf_line_info *linfo;
13780 struct bpf_prog *prog;
13781 const struct btf *btf;
af2ac3e1 13782 bpfptr_t ulinfo;
c454a46b
MKL
13783 int err;
13784
13785 nr_linfo = attr->line_info_cnt;
13786 if (!nr_linfo)
13787 return 0;
0e6491b5
BC
13788 if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
13789 return -EINVAL;
c454a46b
MKL
13790
13791 rec_size = attr->line_info_rec_size;
13792 if (rec_size < MIN_BPF_LINEINFO_SIZE ||
13793 rec_size > MAX_LINEINFO_REC_SIZE ||
13794 rec_size & (sizeof(u32) - 1))
13795 return -EINVAL;
13796
13797 /* Need to zero it in case the userspace may
13798 * pass in a smaller bpf_line_info object.
13799 */
13800 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
13801 GFP_KERNEL | __GFP_NOWARN);
13802 if (!linfo)
13803 return -ENOMEM;
13804
13805 prog = env->prog;
13806 btf = prog->aux->btf;
13807
13808 s = 0;
13809 sub = env->subprog_info;
af2ac3e1 13810 ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
c454a46b
MKL
13811 expected_size = sizeof(struct bpf_line_info);
13812 ncopy = min_t(u32, expected_size, rec_size);
13813 for (i = 0; i < nr_linfo; i++) {
13814 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
13815 if (err) {
13816 if (err == -E2BIG) {
13817 verbose(env, "nonzero tailing record in line_info");
af2ac3e1
AS
13818 if (copy_to_bpfptr_offset(uattr,
13819 offsetof(union bpf_attr, line_info_rec_size),
13820 &expected_size, sizeof(expected_size)))
c454a46b
MKL
13821 err = -EFAULT;
13822 }
13823 goto err_free;
13824 }
13825
af2ac3e1 13826 if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
c454a46b
MKL
13827 err = -EFAULT;
13828 goto err_free;
13829 }
13830
13831 /*
13832 * Check insn_off to ensure
13833 * 1) strictly increasing AND
13834 * 2) bounded by prog->len
13835 *
13836 * The linfo[0].insn_off == 0 check logically falls into
13837 * the later "missing bpf_line_info for func..." case
13838 * because the first linfo[0].insn_off must be the
13839 * first sub also and the first sub must have
13840 * subprog_info[0].start == 0.
13841 */
13842 if ((i && linfo[i].insn_off <= prev_offset) ||
13843 linfo[i].insn_off >= prog->len) {
13844 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
13845 i, linfo[i].insn_off, prev_offset,
13846 prog->len);
13847 err = -EINVAL;
13848 goto err_free;
13849 }
13850
fdbaa0be
MKL
13851 if (!prog->insnsi[linfo[i].insn_off].code) {
13852 verbose(env,
13853 "Invalid insn code at line_info[%u].insn_off\n",
13854 i);
13855 err = -EINVAL;
13856 goto err_free;
13857 }
13858
23127b33
MKL
13859 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
13860 !btf_name_by_offset(btf, linfo[i].file_name_off)) {
c454a46b
MKL
13861 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
13862 err = -EINVAL;
13863 goto err_free;
13864 }
13865
13866 if (s != env->subprog_cnt) {
13867 if (linfo[i].insn_off == sub[s].start) {
13868 sub[s].linfo_idx = i;
13869 s++;
13870 } else if (sub[s].start < linfo[i].insn_off) {
13871 verbose(env, "missing bpf_line_info for func#%u\n", s);
13872 err = -EINVAL;
13873 goto err_free;
13874 }
13875 }
13876
13877 prev_offset = linfo[i].insn_off;
af2ac3e1 13878 bpfptr_add(&ulinfo, rec_size);
c454a46b
MKL
13879 }
13880
13881 if (s != env->subprog_cnt) {
13882 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
13883 env->subprog_cnt - s, s);
13884 err = -EINVAL;
13885 goto err_free;
13886 }
13887
13888 prog->aux->linfo = linfo;
13889 prog->aux->nr_linfo = nr_linfo;
13890
13891 return 0;
13892
13893err_free:
13894 kvfree(linfo);
13895 return err;
13896}
13897
fbd94c7a
AS
13898#define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo)
13899#define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE
13900
13901static int check_core_relo(struct bpf_verifier_env *env,
13902 const union bpf_attr *attr,
13903 bpfptr_t uattr)
13904{
13905 u32 i, nr_core_relo, ncopy, expected_size, rec_size;
13906 struct bpf_core_relo core_relo = {};
13907 struct bpf_prog *prog = env->prog;
13908 const struct btf *btf = prog->aux->btf;
13909 struct bpf_core_ctx ctx = {
13910 .log = &env->log,
13911 .btf = btf,
13912 };
13913 bpfptr_t u_core_relo;
13914 int err;
13915
13916 nr_core_relo = attr->core_relo_cnt;
13917 if (!nr_core_relo)
13918 return 0;
13919 if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
13920 return -EINVAL;
13921
13922 rec_size = attr->core_relo_rec_size;
13923 if (rec_size < MIN_CORE_RELO_SIZE ||
13924 rec_size > MAX_CORE_RELO_SIZE ||
13925 rec_size % sizeof(u32))
13926 return -EINVAL;
13927
13928 u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
13929 expected_size = sizeof(struct bpf_core_relo);
13930 ncopy = min_t(u32, expected_size, rec_size);
13931
13932 /* Unlike func_info and line_info, copy and apply each CO-RE
13933 * relocation record one at a time.
13934 */
13935 for (i = 0; i < nr_core_relo; i++) {
13936 /* future proofing when sizeof(bpf_core_relo) changes */
13937 err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
13938 if (err) {
13939 if (err == -E2BIG) {
13940 verbose(env, "nonzero tailing record in core_relo");
13941 if (copy_to_bpfptr_offset(uattr,
13942 offsetof(union bpf_attr, core_relo_rec_size),
13943 &expected_size, sizeof(expected_size)))
13944 err = -EFAULT;
13945 }
13946 break;
13947 }
13948
13949 if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
13950 err = -EFAULT;
13951 break;
13952 }
13953
13954 if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
13955 verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
13956 i, core_relo.insn_off, prog->len);
13957 err = -EINVAL;
13958 break;
13959 }
13960
13961 err = bpf_core_apply(&ctx, &core_relo, i,
13962 &prog->insnsi[core_relo.insn_off / 8]);
13963 if (err)
13964 break;
13965 bpfptr_add(&u_core_relo, rec_size);
13966 }
13967 return err;
13968}
13969
c454a46b
MKL
13970static int check_btf_info(struct bpf_verifier_env *env,
13971 const union bpf_attr *attr,
af2ac3e1 13972 bpfptr_t uattr)
c454a46b
MKL
13973{
13974 struct btf *btf;
13975 int err;
13976
09b28d76
AS
13977 if (!attr->func_info_cnt && !attr->line_info_cnt) {
13978 if (check_abnormal_return(env))
13979 return -EINVAL;
c454a46b 13980 return 0;
09b28d76 13981 }
c454a46b
MKL
13982
13983 btf = btf_get_by_fd(attr->prog_btf_fd);
13984 if (IS_ERR(btf))
13985 return PTR_ERR(btf);
350a5c4d
AS
13986 if (btf_is_kernel(btf)) {
13987 btf_put(btf);
13988 return -EACCES;
13989 }
c454a46b
MKL
13990 env->prog->aux->btf = btf;
13991
13992 err = check_btf_func(env, attr, uattr);
13993 if (err)
13994 return err;
13995
13996 err = check_btf_line(env, attr, uattr);
13997 if (err)
13998 return err;
13999
fbd94c7a
AS
14000 err = check_core_relo(env, attr, uattr);
14001 if (err)
14002 return err;
14003
c454a46b 14004 return 0;
ba64e7d8
YS
14005}
14006
f1174f77
EC
14007/* check %cur's range satisfies %old's */
14008static bool range_within(struct bpf_reg_state *old,
14009 struct bpf_reg_state *cur)
14010{
b03c9f9f
EC
14011 return old->umin_value <= cur->umin_value &&
14012 old->umax_value >= cur->umax_value &&
14013 old->smin_value <= cur->smin_value &&
fd675184
DB
14014 old->smax_value >= cur->smax_value &&
14015 old->u32_min_value <= cur->u32_min_value &&
14016 old->u32_max_value >= cur->u32_max_value &&
14017 old->s32_min_value <= cur->s32_min_value &&
14018 old->s32_max_value >= cur->s32_max_value;
f1174f77
EC
14019}
14020
f1174f77
EC
14021/* If in the old state two registers had the same id, then they need to have
14022 * the same id in the new state as well. But that id could be different from
14023 * the old state, so we need to track the mapping from old to new ids.
14024 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
14025 * regs with old id 5 must also have new id 9 for the new state to be safe. But
14026 * regs with a different old id could still have new id 9, we don't care about
14027 * that.
14028 * So we look through our idmap to see if this old id has been seen before. If
14029 * so, we require the new id to match; otherwise, we add the id pair to the map.
969bf05e 14030 */
c9e73e3d 14031static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
969bf05e 14032{
f1174f77 14033 unsigned int i;
969bf05e 14034
4633a006
AN
14035 /* either both IDs should be set or both should be zero */
14036 if (!!old_id != !!cur_id)
14037 return false;
14038
14039 if (old_id == 0) /* cur_id == 0 as well */
14040 return true;
14041
c9e73e3d 14042 for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
f1174f77
EC
14043 if (!idmap[i].old) {
14044 /* Reached an empty slot; haven't seen this id before */
14045 idmap[i].old = old_id;
14046 idmap[i].cur = cur_id;
14047 return true;
14048 }
14049 if (idmap[i].old == old_id)
14050 return idmap[i].cur == cur_id;
14051 }
14052 /* We ran out of idmap slots, which should be impossible */
14053 WARN_ON_ONCE(1);
14054 return false;
14055}
14056
9242b5f5
AS
14057static void clean_func_state(struct bpf_verifier_env *env,
14058 struct bpf_func_state *st)
14059{
14060 enum bpf_reg_liveness live;
14061 int i, j;
14062
14063 for (i = 0; i < BPF_REG_FP; i++) {
14064 live = st->regs[i].live;
14065 /* liveness must not touch this register anymore */
14066 st->regs[i].live |= REG_LIVE_DONE;
14067 if (!(live & REG_LIVE_READ))
14068 /* since the register is unused, clear its state
14069 * to make further comparison simpler
14070 */
f54c7898 14071 __mark_reg_not_init(env, &st->regs[i]);
9242b5f5
AS
14072 }
14073
14074 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
14075 live = st->stack[i].spilled_ptr.live;
14076 /* liveness must not touch this stack slot anymore */
14077 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
14078 if (!(live & REG_LIVE_READ)) {
f54c7898 14079 __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
9242b5f5
AS
14080 for (j = 0; j < BPF_REG_SIZE; j++)
14081 st->stack[i].slot_type[j] = STACK_INVALID;
14082 }
14083 }
14084}
14085
14086static void clean_verifier_state(struct bpf_verifier_env *env,
14087 struct bpf_verifier_state *st)
14088{
14089 int i;
14090
14091 if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
14092 /* all regs in this state in all frames were already marked */
14093 return;
14094
14095 for (i = 0; i <= st->curframe; i++)
14096 clean_func_state(env, st->frame[i]);
14097}
14098
14099/* the parentage chains form a tree.
14100 * the verifier states are added to state lists at given insn and
14101 * pushed into state stack for future exploration.
14102 * when the verifier reaches bpf_exit insn some of the verifer states
14103 * stored in the state lists have their final liveness state already,
14104 * but a lot of states will get revised from liveness point of view when
14105 * the verifier explores other branches.
14106 * Example:
14107 * 1: r0 = 1
14108 * 2: if r1 == 100 goto pc+1
14109 * 3: r0 = 2
14110 * 4: exit
14111 * when the verifier reaches exit insn the register r0 in the state list of
14112 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
14113 * of insn 2 and goes exploring further. At the insn 4 it will walk the
14114 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
14115 *
14116 * Since the verifier pushes the branch states as it sees them while exploring
14117 * the program the condition of walking the branch instruction for the second
14118 * time means that all states below this branch were already explored and
8fb33b60 14119 * their final liveness marks are already propagated.
9242b5f5
AS
14120 * Hence when the verifier completes the search of state list in is_state_visited()
14121 * we can call this clean_live_states() function to mark all liveness states
14122 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
14123 * will not be used.
14124 * This function also clears the registers and stack for states that !READ
14125 * to simplify state merging.
14126 *
14127 * Important note here that walking the same branch instruction in the callee
14128 * doesn't meant that the states are DONE. The verifier has to compare
14129 * the callsites
14130 */
14131static void clean_live_states(struct bpf_verifier_env *env, int insn,
14132 struct bpf_verifier_state *cur)
14133{
14134 struct bpf_verifier_state_list *sl;
14135 int i;
14136
5d839021 14137 sl = *explored_state(env, insn);
a8f500af 14138 while (sl) {
2589726d
AS
14139 if (sl->state.branches)
14140 goto next;
dc2a4ebc
AS
14141 if (sl->state.insn_idx != insn ||
14142 sl->state.curframe != cur->curframe)
9242b5f5
AS
14143 goto next;
14144 for (i = 0; i <= cur->curframe; i++)
14145 if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
14146 goto next;
14147 clean_verifier_state(env, &sl->state);
14148next:
14149 sl = sl->next;
14150 }
14151}
14152
4a95c85c 14153static bool regs_exact(const struct bpf_reg_state *rold,
4633a006
AN
14154 const struct bpf_reg_state *rcur,
14155 struct bpf_id_pair *idmap)
4a95c85c 14156{
4633a006
AN
14157 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
14158 check_ids(rold->id, rcur->id, idmap) &&
14159 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
4a95c85c
AN
14160}
14161
f1174f77 14162/* Returns true if (rold safe implies rcur safe) */
e042aa53
DB
14163static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
14164 struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
f1174f77 14165{
dc503a8a
EC
14166 if (!(rold->live & REG_LIVE_READ))
14167 /* explored state didn't use this */
14168 return true;
f1174f77
EC
14169 if (rold->type == NOT_INIT)
14170 /* explored state can't have used this */
969bf05e 14171 return true;
f1174f77
EC
14172 if (rcur->type == NOT_INIT)
14173 return false;
7f4ce97c 14174
910f6999
AN
14175 /* Enforce that register types have to match exactly, including their
14176 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
14177 * rule.
14178 *
14179 * One can make a point that using a pointer register as unbounded
14180 * SCALAR would be technically acceptable, but this could lead to
14181 * pointer leaks because scalars are allowed to leak while pointers
14182 * are not. We could make this safe in special cases if root is
14183 * calling us, but it's probably not worth the hassle.
14184 *
14185 * Also, register types that are *not* MAYBE_NULL could technically be
14186 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
14187 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
14188 * to the same map).
7f4ce97c
AN
14189 * However, if the old MAYBE_NULL register then got NULL checked,
14190 * doing so could have affected others with the same id, and we can't
14191 * check for that because we lost the id when we converted to
14192 * a non-MAYBE_NULL variant.
14193 * So, as a general rule we don't allow mixing MAYBE_NULL and
910f6999 14194 * non-MAYBE_NULL registers as well.
7f4ce97c 14195 */
910f6999 14196 if (rold->type != rcur->type)
7f4ce97c
AN
14197 return false;
14198
c25b2ae1 14199 switch (base_type(rold->type)) {
f1174f77 14200 case SCALAR_VALUE:
4633a006 14201 if (regs_exact(rold, rcur, idmap))
7c884339 14202 return true;
e042aa53
DB
14203 if (env->explore_alu_limits)
14204 return false;
910f6999
AN
14205 if (!rold->precise)
14206 return true;
14207 /* new val must satisfy old val knowledge */
14208 return range_within(rold, rcur) &&
14209 tnum_in(rold->var_off, rcur->var_off);
69c087ba 14210 case PTR_TO_MAP_KEY:
f1174f77 14211 case PTR_TO_MAP_VALUE:
567da5d2
AN
14212 case PTR_TO_MEM:
14213 case PTR_TO_BUF:
14214 case PTR_TO_TP_BUFFER:
1b688a19
EC
14215 /* If the new min/max/var_off satisfy the old ones and
14216 * everything else matches, we are OK.
1b688a19 14217 */
a73bf9f2 14218 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
1b688a19 14219 range_within(rold, rcur) &&
4ea2bb15 14220 tnum_in(rold->var_off, rcur->var_off) &&
567da5d2
AN
14221 check_ids(rold->id, rcur->id, idmap) &&
14222 check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
de8f3a83 14223 case PTR_TO_PACKET_META:
f1174f77 14224 case PTR_TO_PACKET:
f1174f77
EC
14225 /* We must have at least as much range as the old ptr
14226 * did, so that any accesses which were safe before are
14227 * still safe. This is true even if old range < old off,
14228 * since someone could have accessed through (ptr - k), or
14229 * even done ptr -= k in a register, to get a safe access.
14230 */
14231 if (rold->range > rcur->range)
14232 return false;
14233 /* If the offsets don't match, we can't trust our alignment;
14234 * nor can we be sure that we won't fall out of range.
14235 */
14236 if (rold->off != rcur->off)
14237 return false;
14238 /* id relations must be preserved */
4633a006 14239 if (!check_ids(rold->id, rcur->id, idmap))
f1174f77
EC
14240 return false;
14241 /* new val must satisfy old val knowledge */
14242 return range_within(rold, rcur) &&
14243 tnum_in(rold->var_off, rcur->var_off);
7c884339
EZ
14244 case PTR_TO_STACK:
14245 /* two stack pointers are equal only if they're pointing to
14246 * the same stack frame, since fp-8 in foo != fp-8 in bar
f1174f77 14247 */
4633a006 14248 return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
f1174f77 14249 default:
4633a006 14250 return regs_exact(rold, rcur, idmap);
f1174f77 14251 }
969bf05e
AS
14252}
14253
e042aa53
DB
14254static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
14255 struct bpf_func_state *cur, struct bpf_id_pair *idmap)
638f5b90
AS
14256{
14257 int i, spi;
14258
638f5b90
AS
14259 /* walk slots of the explored stack and ignore any additional
14260 * slots in the current stack, since explored(safe) state
14261 * didn't use them
14262 */
14263 for (i = 0; i < old->allocated_stack; i++) {
14264 spi = i / BPF_REG_SIZE;
14265
b233920c
AS
14266 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
14267 i += BPF_REG_SIZE - 1;
cc2b14d5 14268 /* explored state didn't use this */
fd05e57b 14269 continue;
b233920c 14270 }
cc2b14d5 14271
638f5b90
AS
14272 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
14273 continue;
19e2dbb7 14274
6715df8d
EZ
14275 if (env->allow_uninit_stack &&
14276 old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
14277 continue;
14278
19e2dbb7
AS
14279 /* explored stack has more populated slots than current stack
14280 * and these slots were used
14281 */
14282 if (i >= cur->allocated_stack)
14283 return false;
14284
cc2b14d5
AS
14285 /* if old state was safe with misc data in the stack
14286 * it will be safe with zero-initialized stack.
14287 * The opposite is not true
14288 */
14289 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
14290 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
14291 continue;
638f5b90
AS
14292 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
14293 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
14294 /* Ex: old explored (safe) state has STACK_SPILL in
b8c1a309 14295 * this stack slot, but current has STACK_MISC ->
638f5b90
AS
14296 * this verifier states are not equivalent,
14297 * return false to continue verification of this path
14298 */
14299 return false;
27113c59 14300 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
638f5b90 14301 continue;
d6fefa11
KKD
14302 /* Both old and cur are having same slot_type */
14303 switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
14304 case STACK_SPILL:
638f5b90
AS
14305 /* when explored and current stack slot are both storing
14306 * spilled registers, check that stored pointers types
14307 * are the same as well.
14308 * Ex: explored safe path could have stored
14309 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
14310 * but current path has stored:
14311 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
14312 * such verifier states are not equivalent.
14313 * return false to continue verification of this path
14314 */
d6fefa11
KKD
14315 if (!regsafe(env, &old->stack[spi].spilled_ptr,
14316 &cur->stack[spi].spilled_ptr, idmap))
14317 return false;
14318 break;
14319 case STACK_DYNPTR:
14320 {
14321 const struct bpf_reg_state *old_reg, *cur_reg;
14322
14323 old_reg = &old->stack[spi].spilled_ptr;
14324 cur_reg = &cur->stack[spi].spilled_ptr;
14325 if (old_reg->dynptr.type != cur_reg->dynptr.type ||
14326 old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
14327 !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
14328 return false;
14329 break;
14330 }
14331 case STACK_MISC:
14332 case STACK_ZERO:
14333 case STACK_INVALID:
14334 continue;
14335 /* Ensure that new unhandled slot types return false by default */
14336 default:
638f5b90 14337 return false;
d6fefa11 14338 }
638f5b90
AS
14339 }
14340 return true;
14341}
14342
e8f55fcf
AN
14343static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
14344 struct bpf_id_pair *idmap)
fd978bf7 14345{
e8f55fcf
AN
14346 int i;
14347
fd978bf7
JS
14348 if (old->acquired_refs != cur->acquired_refs)
14349 return false;
e8f55fcf
AN
14350
14351 for (i = 0; i < old->acquired_refs; i++) {
14352 if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap))
14353 return false;
14354 }
14355
14356 return true;
fd978bf7
JS
14357}
14358
f1bca824
AS
14359/* compare two verifier states
14360 *
14361 * all states stored in state_list are known to be valid, since
14362 * verifier reached 'bpf_exit' instruction through them
14363 *
14364 * this function is called when verifier exploring different branches of
14365 * execution popped from the state stack. If it sees an old state that has
14366 * more strict register state and more strict stack state then this execution
14367 * branch doesn't need to be explored further, since verifier already
14368 * concluded that more strict state leads to valid finish.
14369 *
14370 * Therefore two states are equivalent if register state is more conservative
14371 * and explored stack state is more conservative than the current one.
14372 * Example:
14373 * explored current
14374 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
14375 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
14376 *
14377 * In other words if current stack state (one being explored) has more
14378 * valid slots than old one that already passed validation, it means
14379 * the verifier can stop exploring and conclude that current state is valid too
14380 *
14381 * Similarly with registers. If explored state has register type as invalid
14382 * whereas register type in current state is meaningful, it means that
14383 * the current state will reach 'bpf_exit' instruction safely
14384 */
c9e73e3d 14385static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
f4d7e40a 14386 struct bpf_func_state *cur)
f1bca824
AS
14387{
14388 int i;
14389
c9e73e3d 14390 for (i = 0; i < MAX_BPF_REG; i++)
e042aa53
DB
14391 if (!regsafe(env, &old->regs[i], &cur->regs[i],
14392 env->idmap_scratch))
c9e73e3d 14393 return false;
f1bca824 14394
e042aa53 14395 if (!stacksafe(env, old, cur, env->idmap_scratch))
c9e73e3d 14396 return false;
fd978bf7 14397
e8f55fcf 14398 if (!refsafe(old, cur, env->idmap_scratch))
c9e73e3d
LB
14399 return false;
14400
14401 return true;
f1bca824
AS
14402}
14403
f4d7e40a
AS
14404static bool states_equal(struct bpf_verifier_env *env,
14405 struct bpf_verifier_state *old,
14406 struct bpf_verifier_state *cur)
14407{
14408 int i;
14409
14410 if (old->curframe != cur->curframe)
14411 return false;
14412
5dd9cdbc
EZ
14413 memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
14414
979d63d5
DB
14415 /* Verification state from speculative execution simulation
14416 * must never prune a non-speculative execution one.
14417 */
14418 if (old->speculative && !cur->speculative)
14419 return false;
14420
4ea2bb15
EZ
14421 if (old->active_lock.ptr != cur->active_lock.ptr)
14422 return false;
14423
14424 /* Old and cur active_lock's have to be either both present
14425 * or both absent.
14426 */
14427 if (!!old->active_lock.id != !!cur->active_lock.id)
14428 return false;
14429
14430 if (old->active_lock.id &&
14431 !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch))
d83525ca
AS
14432 return false;
14433
9bb00b28 14434 if (old->active_rcu_lock != cur->active_rcu_lock)
d83525ca
AS
14435 return false;
14436
f4d7e40a
AS
14437 /* for states to be equal callsites have to be the same
14438 * and all frame states need to be equivalent
14439 */
14440 for (i = 0; i <= old->curframe; i++) {
14441 if (old->frame[i]->callsite != cur->frame[i]->callsite)
14442 return false;
c9e73e3d 14443 if (!func_states_equal(env, old->frame[i], cur->frame[i]))
f4d7e40a
AS
14444 return false;
14445 }
14446 return true;
14447}
14448
5327ed3d
JW
14449/* Return 0 if no propagation happened. Return negative error code if error
14450 * happened. Otherwise, return the propagated bit.
14451 */
55e7f3b5
JW
14452static int propagate_liveness_reg(struct bpf_verifier_env *env,
14453 struct bpf_reg_state *reg,
14454 struct bpf_reg_state *parent_reg)
14455{
5327ed3d
JW
14456 u8 parent_flag = parent_reg->live & REG_LIVE_READ;
14457 u8 flag = reg->live & REG_LIVE_READ;
55e7f3b5
JW
14458 int err;
14459
5327ed3d
JW
14460 /* When comes here, read flags of PARENT_REG or REG could be any of
14461 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
14462 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
14463 */
14464 if (parent_flag == REG_LIVE_READ64 ||
14465 /* Or if there is no read flag from REG. */
14466 !flag ||
14467 /* Or if the read flag from REG is the same as PARENT_REG. */
14468 parent_flag == flag)
55e7f3b5
JW
14469 return 0;
14470
5327ed3d 14471 err = mark_reg_read(env, reg, parent_reg, flag);
55e7f3b5
JW
14472 if (err)
14473 return err;
14474
5327ed3d 14475 return flag;
55e7f3b5
JW
14476}
14477
8e9cd9ce 14478/* A write screens off any subsequent reads; but write marks come from the
f4d7e40a
AS
14479 * straight-line code between a state and its parent. When we arrive at an
14480 * equivalent state (jump target or such) we didn't arrive by the straight-line
14481 * code, so read marks in the state must propagate to the parent regardless
14482 * of the state's write marks. That's what 'parent == state->parent' comparison
679c782d 14483 * in mark_reg_read() is for.
8e9cd9ce 14484 */
f4d7e40a
AS
14485static int propagate_liveness(struct bpf_verifier_env *env,
14486 const struct bpf_verifier_state *vstate,
14487 struct bpf_verifier_state *vparent)
dc503a8a 14488{
3f8cafa4 14489 struct bpf_reg_state *state_reg, *parent_reg;
f4d7e40a 14490 struct bpf_func_state *state, *parent;
3f8cafa4 14491 int i, frame, err = 0;
dc503a8a 14492
f4d7e40a
AS
14493 if (vparent->curframe != vstate->curframe) {
14494 WARN(1, "propagate_live: parent frame %d current frame %d\n",
14495 vparent->curframe, vstate->curframe);
14496 return -EFAULT;
14497 }
dc503a8a
EC
14498 /* Propagate read liveness of registers... */
14499 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
83d16312 14500 for (frame = 0; frame <= vstate->curframe; frame++) {
3f8cafa4
JW
14501 parent = vparent->frame[frame];
14502 state = vstate->frame[frame];
14503 parent_reg = parent->regs;
14504 state_reg = state->regs;
83d16312
JK
14505 /* We don't need to worry about FP liveness, it's read-only */
14506 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
55e7f3b5
JW
14507 err = propagate_liveness_reg(env, &state_reg[i],
14508 &parent_reg[i]);
5327ed3d 14509 if (err < 0)
3f8cafa4 14510 return err;
5327ed3d
JW
14511 if (err == REG_LIVE_READ64)
14512 mark_insn_zext(env, &parent_reg[i]);
dc503a8a 14513 }
f4d7e40a 14514
1b04aee7 14515 /* Propagate stack slots. */
f4d7e40a
AS
14516 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
14517 i < parent->allocated_stack / BPF_REG_SIZE; i++) {
3f8cafa4
JW
14518 parent_reg = &parent->stack[i].spilled_ptr;
14519 state_reg = &state->stack[i].spilled_ptr;
55e7f3b5
JW
14520 err = propagate_liveness_reg(env, state_reg,
14521 parent_reg);
5327ed3d 14522 if (err < 0)
3f8cafa4 14523 return err;
dc503a8a
EC
14524 }
14525 }
5327ed3d 14526 return 0;
dc503a8a
EC
14527}
14528
a3ce685d
AS
14529/* find precise scalars in the previous equivalent state and
14530 * propagate them into the current state
14531 */
14532static int propagate_precision(struct bpf_verifier_env *env,
14533 const struct bpf_verifier_state *old)
14534{
14535 struct bpf_reg_state *state_reg;
14536 struct bpf_func_state *state;
529409ea 14537 int i, err = 0, fr;
a3ce685d 14538
529409ea
AN
14539 for (fr = old->curframe; fr >= 0; fr--) {
14540 state = old->frame[fr];
14541 state_reg = state->regs;
14542 for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
14543 if (state_reg->type != SCALAR_VALUE ||
14544 !state_reg->precise)
14545 continue;
14546 if (env->log.level & BPF_LOG_LEVEL2)
14547 verbose(env, "frame %d: propagating r%d\n", i, fr);
14548 err = mark_chain_precision_frame(env, fr, i);
14549 if (err < 0)
14550 return err;
14551 }
a3ce685d 14552
529409ea
AN
14553 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
14554 if (!is_spilled_reg(&state->stack[i]))
14555 continue;
14556 state_reg = &state->stack[i].spilled_ptr;
14557 if (state_reg->type != SCALAR_VALUE ||
14558 !state_reg->precise)
14559 continue;
14560 if (env->log.level & BPF_LOG_LEVEL2)
14561 verbose(env, "frame %d: propagating fp%d\n",
14562 (-i - 1) * BPF_REG_SIZE, fr);
14563 err = mark_chain_precision_stack_frame(env, fr, i);
14564 if (err < 0)
14565 return err;
14566 }
a3ce685d
AS
14567 }
14568 return 0;
14569}
14570
2589726d
AS
14571static bool states_maybe_looping(struct bpf_verifier_state *old,
14572 struct bpf_verifier_state *cur)
14573{
14574 struct bpf_func_state *fold, *fcur;
14575 int i, fr = cur->curframe;
14576
14577 if (old->curframe != fr)
14578 return false;
14579
14580 fold = old->frame[fr];
14581 fcur = cur->frame[fr];
14582 for (i = 0; i < MAX_BPF_REG; i++)
14583 if (memcmp(&fold->regs[i], &fcur->regs[i],
14584 offsetof(struct bpf_reg_state, parent)))
14585 return false;
14586 return true;
14587}
14588
14589
58e2af8b 14590static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
f1bca824 14591{
58e2af8b 14592 struct bpf_verifier_state_list *new_sl;
9f4686c4 14593 struct bpf_verifier_state_list *sl, **pprev;
679c782d 14594 struct bpf_verifier_state *cur = env->cur_state, *new;
ceefbc96 14595 int i, j, err, states_cnt = 0;
10d274e8 14596 bool add_new_state = env->test_state_freq ? true : false;
f1bca824 14597
2589726d
AS
14598 /* bpf progs typically have pruning point every 4 instructions
14599 * http://vger.kernel.org/bpfconf2019.html#session-1
14600 * Do not add new state for future pruning if the verifier hasn't seen
14601 * at least 2 jumps and at least 8 instructions.
14602 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
14603 * In tests that amounts to up to 50% reduction into total verifier
14604 * memory consumption and 20% verifier time speedup.
14605 */
14606 if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
14607 env->insn_processed - env->prev_insn_processed >= 8)
14608 add_new_state = true;
14609
a8f500af
AS
14610 pprev = explored_state(env, insn_idx);
14611 sl = *pprev;
14612
9242b5f5
AS
14613 clean_live_states(env, insn_idx, cur);
14614
a8f500af 14615 while (sl) {
dc2a4ebc
AS
14616 states_cnt++;
14617 if (sl->state.insn_idx != insn_idx)
14618 goto next;
bfc6bb74 14619
2589726d 14620 if (sl->state.branches) {
bfc6bb74
AS
14621 struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
14622
14623 if (frame->in_async_callback_fn &&
14624 frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
14625 /* Different async_entry_cnt means that the verifier is
14626 * processing another entry into async callback.
14627 * Seeing the same state is not an indication of infinite
14628 * loop or infinite recursion.
14629 * But finding the same state doesn't mean that it's safe
14630 * to stop processing the current state. The previous state
14631 * hasn't yet reached bpf_exit, since state.branches > 0.
14632 * Checking in_async_callback_fn alone is not enough either.
14633 * Since the verifier still needs to catch infinite loops
14634 * inside async callbacks.
14635 */
14636 } else if (states_maybe_looping(&sl->state, cur) &&
14637 states_equal(env, &sl->state, cur)) {
2589726d
AS
14638 verbose_linfo(env, insn_idx, "; ");
14639 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
14640 return -EINVAL;
14641 }
14642 /* if the verifier is processing a loop, avoid adding new state
14643 * too often, since different loop iterations have distinct
14644 * states and may not help future pruning.
14645 * This threshold shouldn't be too low to make sure that
14646 * a loop with large bound will be rejected quickly.
14647 * The most abusive loop will be:
14648 * r1 += 1
14649 * if r1 < 1000000 goto pc-2
14650 * 1M insn_procssed limit / 100 == 10k peak states.
14651 * This threshold shouldn't be too high either, since states
14652 * at the end of the loop are likely to be useful in pruning.
14653 */
14654 if (env->jmps_processed - env->prev_jmps_processed < 20 &&
14655 env->insn_processed - env->prev_insn_processed < 100)
14656 add_new_state = false;
14657 goto miss;
14658 }
638f5b90 14659 if (states_equal(env, &sl->state, cur)) {
9f4686c4 14660 sl->hit_cnt++;
f1bca824 14661 /* reached equivalent register/stack state,
dc503a8a
EC
14662 * prune the search.
14663 * Registers read by the continuation are read by us.
8e9cd9ce
EC
14664 * If we have any write marks in env->cur_state, they
14665 * will prevent corresponding reads in the continuation
14666 * from reaching our parent (an explored_state). Our
14667 * own state will get the read marks recorded, but
14668 * they'll be immediately forgotten as we're pruning
14669 * this state and will pop a new one.
f1bca824 14670 */
f4d7e40a 14671 err = propagate_liveness(env, &sl->state, cur);
a3ce685d
AS
14672
14673 /* if previous state reached the exit with precision and
14674 * current state is equivalent to it (except precsion marks)
14675 * the precision needs to be propagated back in
14676 * the current state.
14677 */
14678 err = err ? : push_jmp_history(env, cur);
14679 err = err ? : propagate_precision(env, &sl->state);
f4d7e40a
AS
14680 if (err)
14681 return err;
f1bca824 14682 return 1;
dc503a8a 14683 }
2589726d
AS
14684miss:
14685 /* when new state is not going to be added do not increase miss count.
14686 * Otherwise several loop iterations will remove the state
14687 * recorded earlier. The goal of these heuristics is to have
14688 * states from some iterations of the loop (some in the beginning
14689 * and some at the end) to help pruning.
14690 */
14691 if (add_new_state)
14692 sl->miss_cnt++;
9f4686c4
AS
14693 /* heuristic to determine whether this state is beneficial
14694 * to keep checking from state equivalence point of view.
14695 * Higher numbers increase max_states_per_insn and verification time,
14696 * but do not meaningfully decrease insn_processed.
14697 */
14698 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
14699 /* the state is unlikely to be useful. Remove it to
14700 * speed up verification
14701 */
14702 *pprev = sl->next;
14703 if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
2589726d
AS
14704 u32 br = sl->state.branches;
14705
14706 WARN_ONCE(br,
14707 "BUG live_done but branches_to_explore %d\n",
14708 br);
9f4686c4
AS
14709 free_verifier_state(&sl->state, false);
14710 kfree(sl);
14711 env->peak_states--;
14712 } else {
14713 /* cannot free this state, since parentage chain may
14714 * walk it later. Add it for free_list instead to
14715 * be freed at the end of verification
14716 */
14717 sl->next = env->free_list;
14718 env->free_list = sl;
14719 }
14720 sl = *pprev;
14721 continue;
14722 }
dc2a4ebc 14723next:
9f4686c4
AS
14724 pprev = &sl->next;
14725 sl = *pprev;
f1bca824
AS
14726 }
14727
06ee7115
AS
14728 if (env->max_states_per_insn < states_cnt)
14729 env->max_states_per_insn = states_cnt;
14730
2c78ee89 14731 if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
a095f421 14732 return 0;
ceefbc96 14733
2589726d 14734 if (!add_new_state)
a095f421 14735 return 0;
ceefbc96 14736
2589726d
AS
14737 /* There were no equivalent states, remember the current one.
14738 * Technically the current state is not proven to be safe yet,
f4d7e40a 14739 * but it will either reach outer most bpf_exit (which means it's safe)
2589726d 14740 * or it will be rejected. When there are no loops the verifier won't be
f4d7e40a 14741 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
2589726d
AS
14742 * again on the way to bpf_exit.
14743 * When looping the sl->state.branches will be > 0 and this state
14744 * will not be considered for equivalence until branches == 0.
f1bca824 14745 */
638f5b90 14746 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
f1bca824
AS
14747 if (!new_sl)
14748 return -ENOMEM;
06ee7115
AS
14749 env->total_states++;
14750 env->peak_states++;
2589726d
AS
14751 env->prev_jmps_processed = env->jmps_processed;
14752 env->prev_insn_processed = env->insn_processed;
f1bca824 14753
7a830b53
AN
14754 /* forget precise markings we inherited, see __mark_chain_precision */
14755 if (env->bpf_capable)
14756 mark_all_scalars_imprecise(env, cur);
14757
f1bca824 14758 /* add new state to the head of linked list */
679c782d
EC
14759 new = &new_sl->state;
14760 err = copy_verifier_state(new, cur);
1969db47 14761 if (err) {
679c782d 14762 free_verifier_state(new, false);
1969db47
AS
14763 kfree(new_sl);
14764 return err;
14765 }
dc2a4ebc 14766 new->insn_idx = insn_idx;
2589726d
AS
14767 WARN_ONCE(new->branches != 1,
14768 "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
b5dc0163 14769
2589726d 14770 cur->parent = new;
b5dc0163
AS
14771 cur->first_insn_idx = insn_idx;
14772 clear_jmp_history(cur);
5d839021
AS
14773 new_sl->next = *explored_state(env, insn_idx);
14774 *explored_state(env, insn_idx) = new_sl;
7640ead9
JK
14775 /* connect new state to parentage chain. Current frame needs all
14776 * registers connected. Only r6 - r9 of the callers are alive (pushed
14777 * to the stack implicitly by JITs) so in callers' frames connect just
14778 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
14779 * the state of the call instruction (with WRITTEN set), and r0 comes
14780 * from callee with its full parentage chain, anyway.
14781 */
8e9cd9ce
EC
14782 /* clear write marks in current state: the writes we did are not writes
14783 * our child did, so they don't screen off its reads from us.
14784 * (There are no read marks in current state, because reads always mark
14785 * their parent and current state never has children yet. Only
14786 * explored_states can get read marks.)
14787 */
eea1c227
AS
14788 for (j = 0; j <= cur->curframe; j++) {
14789 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
14790 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
14791 for (i = 0; i < BPF_REG_FP; i++)
14792 cur->frame[j]->regs[i].live = REG_LIVE_NONE;
14793 }
f4d7e40a
AS
14794
14795 /* all stack frames are accessible from callee, clear them all */
14796 for (j = 0; j <= cur->curframe; j++) {
14797 struct bpf_func_state *frame = cur->frame[j];
679c782d 14798 struct bpf_func_state *newframe = new->frame[j];
f4d7e40a 14799
679c782d 14800 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
cc2b14d5 14801 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
679c782d
EC
14802 frame->stack[i].spilled_ptr.parent =
14803 &newframe->stack[i].spilled_ptr;
14804 }
f4d7e40a 14805 }
f1bca824
AS
14806 return 0;
14807}
14808
c64b7983
JS
14809/* Return true if it's OK to have the same insn return a different type. */
14810static bool reg_type_mismatch_ok(enum bpf_reg_type type)
14811{
c25b2ae1 14812 switch (base_type(type)) {
c64b7983
JS
14813 case PTR_TO_CTX:
14814 case PTR_TO_SOCKET:
46f8bc92 14815 case PTR_TO_SOCK_COMMON:
655a51e5 14816 case PTR_TO_TCP_SOCK:
fada7fdc 14817 case PTR_TO_XDP_SOCK:
2a02759e 14818 case PTR_TO_BTF_ID:
c64b7983
JS
14819 return false;
14820 default:
14821 return true;
14822 }
14823}
14824
14825/* If an instruction was previously used with particular pointer types, then we
14826 * need to be careful to avoid cases such as the below, where it may be ok
14827 * for one branch accessing the pointer, but not ok for the other branch:
14828 *
14829 * R1 = sock_ptr
14830 * goto X;
14831 * ...
14832 * R1 = some_other_valid_ptr;
14833 * goto X;
14834 * ...
14835 * R2 = *(u32 *)(R1 + 0);
14836 */
14837static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
14838{
14839 return src != prev && (!reg_type_mismatch_ok(src) ||
14840 !reg_type_mismatch_ok(prev));
14841}
14842
0d80a619
EZ
14843static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
14844 bool allow_trust_missmatch)
14845{
14846 enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
14847
14848 if (*prev_type == NOT_INIT) {
14849 /* Saw a valid insn
14850 * dst_reg = *(u32 *)(src_reg + off)
14851 * save type to validate intersecting paths
14852 */
14853 *prev_type = type;
14854 } else if (reg_type_mismatch(type, *prev_type)) {
14855 /* Abuser program is trying to use the same insn
14856 * dst_reg = *(u32*) (src_reg + off)
14857 * with different pointer types:
14858 * src_reg == ctx in one branch and
14859 * src_reg == stack|map in some other branch.
14860 * Reject it.
14861 */
14862 if (allow_trust_missmatch &&
14863 base_type(type) == PTR_TO_BTF_ID &&
14864 base_type(*prev_type) == PTR_TO_BTF_ID) {
14865 /*
14866 * Have to support a use case when one path through
14867 * the program yields TRUSTED pointer while another
14868 * is UNTRUSTED. Fallback to UNTRUSTED to generate
14869 * BPF_PROBE_MEM.
14870 */
14871 *prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
14872 } else {
14873 verbose(env, "same insn cannot be used with different pointers\n");
14874 return -EINVAL;
14875 }
14876 }
14877
14878 return 0;
14879}
14880
58e2af8b 14881static int do_check(struct bpf_verifier_env *env)
17a52670 14882{
6f8a57cc 14883 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
51c39bb1 14884 struct bpf_verifier_state *state = env->cur_state;
17a52670 14885 struct bpf_insn *insns = env->prog->insnsi;
638f5b90 14886 struct bpf_reg_state *regs;
06ee7115 14887 int insn_cnt = env->prog->len;
17a52670 14888 bool do_print_state = false;
b5dc0163 14889 int prev_insn_idx = -1;
17a52670 14890
17a52670
AS
14891 for (;;) {
14892 struct bpf_insn *insn;
14893 u8 class;
14894 int err;
14895
b5dc0163 14896 env->prev_insn_idx = prev_insn_idx;
c08435ec 14897 if (env->insn_idx >= insn_cnt) {
61bd5218 14898 verbose(env, "invalid insn idx %d insn_cnt %d\n",
c08435ec 14899 env->insn_idx, insn_cnt);
17a52670
AS
14900 return -EFAULT;
14901 }
14902
c08435ec 14903 insn = &insns[env->insn_idx];
17a52670
AS
14904 class = BPF_CLASS(insn->code);
14905
06ee7115 14906 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
61bd5218
JK
14907 verbose(env,
14908 "BPF program is too large. Processed %d insn\n",
06ee7115 14909 env->insn_processed);
17a52670
AS
14910 return -E2BIG;
14911 }
14912
a095f421
AN
14913 state->last_insn_idx = env->prev_insn_idx;
14914
14915 if (is_prune_point(env, env->insn_idx)) {
14916 err = is_state_visited(env, env->insn_idx);
14917 if (err < 0)
14918 return err;
14919 if (err == 1) {
14920 /* found equivalent state, can prune the search */
14921 if (env->log.level & BPF_LOG_LEVEL) {
14922 if (do_print_state)
14923 verbose(env, "\nfrom %d to %d%s: safe\n",
14924 env->prev_insn_idx, env->insn_idx,
14925 env->cur_state->speculative ?
14926 " (speculative execution)" : "");
14927 else
14928 verbose(env, "%d: safe\n", env->insn_idx);
14929 }
14930 goto process_bpf_exit;
f1bca824 14931 }
a095f421
AN
14932 }
14933
14934 if (is_jmp_point(env, env->insn_idx)) {
14935 err = push_jmp_history(env, state);
14936 if (err)
14937 return err;
f1bca824
AS
14938 }
14939
c3494801
AS
14940 if (signal_pending(current))
14941 return -EAGAIN;
14942
3c2ce60b
DB
14943 if (need_resched())
14944 cond_resched();
14945
2e576648
CL
14946 if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
14947 verbose(env, "\nfrom %d to %d%s:",
14948 env->prev_insn_idx, env->insn_idx,
14949 env->cur_state->speculative ?
14950 " (speculative execution)" : "");
14951 print_verifier_state(env, state->frame[state->curframe], true);
17a52670
AS
14952 do_print_state = false;
14953 }
14954
06ee7115 14955 if (env->log.level & BPF_LOG_LEVEL) {
7105e828 14956 const struct bpf_insn_cbs cbs = {
e6ac2450 14957 .cb_call = disasm_kfunc_name,
7105e828 14958 .cb_print = verbose,
abe08840 14959 .private_data = env,
7105e828
DB
14960 };
14961
2e576648
CL
14962 if (verifier_state_scratched(env))
14963 print_insn_state(env, state->frame[state->curframe]);
14964
c08435ec 14965 verbose_linfo(env, env->insn_idx, "; ");
2e576648 14966 env->prev_log_len = env->log.len_used;
c08435ec 14967 verbose(env, "%d: ", env->insn_idx);
abe08840 14968 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
2e576648
CL
14969 env->prev_insn_print_len = env->log.len_used - env->prev_log_len;
14970 env->prev_log_len = env->log.len_used;
17a52670
AS
14971 }
14972
9d03ebc7 14973 if (bpf_prog_is_offloaded(env->prog->aux)) {
c08435ec
DB
14974 err = bpf_prog_offload_verify_insn(env, env->insn_idx,
14975 env->prev_insn_idx);
cae1927c
JK
14976 if (err)
14977 return err;
14978 }
13a27dfc 14979
638f5b90 14980 regs = cur_regs(env);
fe9a5ca7 14981 sanitize_mark_insn_seen(env);
b5dc0163 14982 prev_insn_idx = env->insn_idx;
fd978bf7 14983
17a52670 14984 if (class == BPF_ALU || class == BPF_ALU64) {
1be7f75d 14985 err = check_alu_op(env, insn);
17a52670
AS
14986 if (err)
14987 return err;
14988
14989 } else if (class == BPF_LDX) {
0d80a619 14990 enum bpf_reg_type src_reg_type;
9bac3d6d
AS
14991
14992 /* check for reserved fields is already done */
14993
17a52670 14994 /* check src operand */
dc503a8a 14995 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
14996 if (err)
14997 return err;
14998
dc503a8a 14999 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
15000 if (err)
15001 return err;
15002
725f9dcd
AS
15003 src_reg_type = regs[insn->src_reg].type;
15004
17a52670
AS
15005 /* check that memory (src_reg + off) is readable,
15006 * the state of dst_reg will be updated by this func
15007 */
c08435ec
DB
15008 err = check_mem_access(env, env->insn_idx, insn->src_reg,
15009 insn->off, BPF_SIZE(insn->code),
15010 BPF_READ, insn->dst_reg, false);
17a52670
AS
15011 if (err)
15012 return err;
15013
0d80a619
EZ
15014 err = save_aux_ptr_type(env, src_reg_type, true);
15015 if (err)
15016 return err;
17a52670 15017 } else if (class == BPF_STX) {
0d80a619 15018 enum bpf_reg_type dst_reg_type;
d691f9e8 15019
91c960b0
BJ
15020 if (BPF_MODE(insn->code) == BPF_ATOMIC) {
15021 err = check_atomic(env, env->insn_idx, insn);
17a52670
AS
15022 if (err)
15023 return err;
c08435ec 15024 env->insn_idx++;
17a52670
AS
15025 continue;
15026 }
15027
5ca419f2
BJ
15028 if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
15029 verbose(env, "BPF_STX uses reserved fields\n");
15030 return -EINVAL;
15031 }
15032
17a52670 15033 /* check src1 operand */
dc503a8a 15034 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
15035 if (err)
15036 return err;
15037 /* check src2 operand */
dc503a8a 15038 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
15039 if (err)
15040 return err;
15041
d691f9e8
AS
15042 dst_reg_type = regs[insn->dst_reg].type;
15043
17a52670 15044 /* check that memory (dst_reg + off) is writeable */
c08435ec
DB
15045 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
15046 insn->off, BPF_SIZE(insn->code),
15047 BPF_WRITE, insn->src_reg, false);
17a52670
AS
15048 if (err)
15049 return err;
15050
0d80a619
EZ
15051 err = save_aux_ptr_type(env, dst_reg_type, false);
15052 if (err)
15053 return err;
17a52670 15054 } else if (class == BPF_ST) {
0d80a619
EZ
15055 enum bpf_reg_type dst_reg_type;
15056
17a52670
AS
15057 if (BPF_MODE(insn->code) != BPF_MEM ||
15058 insn->src_reg != BPF_REG_0) {
61bd5218 15059 verbose(env, "BPF_ST uses reserved fields\n");
17a52670
AS
15060 return -EINVAL;
15061 }
15062 /* check src operand */
dc503a8a 15063 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
15064 if (err)
15065 return err;
15066
0d80a619 15067 dst_reg_type = regs[insn->dst_reg].type;
f37a8cb8 15068
17a52670 15069 /* check that memory (dst_reg + off) is writeable */
c08435ec
DB
15070 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
15071 insn->off, BPF_SIZE(insn->code),
15072 BPF_WRITE, -1, false);
17a52670
AS
15073 if (err)
15074 return err;
15075
0d80a619
EZ
15076 err = save_aux_ptr_type(env, dst_reg_type, false);
15077 if (err)
15078 return err;
092ed096 15079 } else if (class == BPF_JMP || class == BPF_JMP32) {
17a52670
AS
15080 u8 opcode = BPF_OP(insn->code);
15081
2589726d 15082 env->jmps_processed++;
17a52670
AS
15083 if (opcode == BPF_CALL) {
15084 if (BPF_SRC(insn->code) != BPF_K ||
2357672c
KKD
15085 (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
15086 && insn->off != 0) ||
f4d7e40a 15087 (insn->src_reg != BPF_REG_0 &&
e6ac2450
MKL
15088 insn->src_reg != BPF_PSEUDO_CALL &&
15089 insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
092ed096
JW
15090 insn->dst_reg != BPF_REG_0 ||
15091 class == BPF_JMP32) {
61bd5218 15092 verbose(env, "BPF_CALL uses reserved fields\n");
17a52670
AS
15093 return -EINVAL;
15094 }
15095
8cab76ec
KKD
15096 if (env->cur_state->active_lock.ptr) {
15097 if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
15098 (insn->src_reg == BPF_PSEUDO_CALL) ||
15099 (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
cd6791b4 15100 (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) {
8cab76ec
KKD
15101 verbose(env, "function calls are not allowed while holding a lock\n");
15102 return -EINVAL;
15103 }
d83525ca 15104 }
f4d7e40a 15105 if (insn->src_reg == BPF_PSEUDO_CALL)
c08435ec 15106 err = check_func_call(env, insn, &env->insn_idx);
e6ac2450 15107 else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
5c073f26 15108 err = check_kfunc_call(env, insn, &env->insn_idx);
f4d7e40a 15109 else
69c087ba 15110 err = check_helper_call(env, insn, &env->insn_idx);
17a52670
AS
15111 if (err)
15112 return err;
17a52670
AS
15113 } else if (opcode == BPF_JA) {
15114 if (BPF_SRC(insn->code) != BPF_K ||
15115 insn->imm != 0 ||
15116 insn->src_reg != BPF_REG_0 ||
092ed096
JW
15117 insn->dst_reg != BPF_REG_0 ||
15118 class == BPF_JMP32) {
61bd5218 15119 verbose(env, "BPF_JA uses reserved fields\n");
17a52670
AS
15120 return -EINVAL;
15121 }
15122
c08435ec 15123 env->insn_idx += insn->off + 1;
17a52670
AS
15124 continue;
15125
15126 } else if (opcode == BPF_EXIT) {
15127 if (BPF_SRC(insn->code) != BPF_K ||
15128 insn->imm != 0 ||
15129 insn->src_reg != BPF_REG_0 ||
092ed096
JW
15130 insn->dst_reg != BPF_REG_0 ||
15131 class == BPF_JMP32) {
61bd5218 15132 verbose(env, "BPF_EXIT uses reserved fields\n");
17a52670
AS
15133 return -EINVAL;
15134 }
15135
5d92ddc3
DM
15136 if (env->cur_state->active_lock.ptr &&
15137 !in_rbtree_lock_required_cb(env)) {
d83525ca
AS
15138 verbose(env, "bpf_spin_unlock is missing\n");
15139 return -EINVAL;
15140 }
15141
9bb00b28
YS
15142 if (env->cur_state->active_rcu_lock) {
15143 verbose(env, "bpf_rcu_read_unlock is missing\n");
15144 return -EINVAL;
15145 }
15146
9d9d00ac
KKD
15147 /* We must do check_reference_leak here before
15148 * prepare_func_exit to handle the case when
15149 * state->curframe > 0, it may be a callback
15150 * function, for which reference_state must
15151 * match caller reference state when it exits.
15152 */
15153 err = check_reference_leak(env);
15154 if (err)
15155 return err;
15156
f4d7e40a
AS
15157 if (state->curframe) {
15158 /* exit from nested function */
c08435ec 15159 err = prepare_func_exit(env, &env->insn_idx);
f4d7e40a
AS
15160 if (err)
15161 return err;
15162 do_print_state = true;
15163 continue;
15164 }
15165
390ee7e2
AS
15166 err = check_return_code(env);
15167 if (err)
15168 return err;
f1bca824 15169process_bpf_exit:
0f55f9ed 15170 mark_verifier_state_scratched(env);
2589726d 15171 update_branch_counts(env, env->cur_state);
b5dc0163 15172 err = pop_stack(env, &prev_insn_idx,
6f8a57cc 15173 &env->insn_idx, pop_log);
638f5b90
AS
15174 if (err < 0) {
15175 if (err != -ENOENT)
15176 return err;
17a52670
AS
15177 break;
15178 } else {
15179 do_print_state = true;
15180 continue;
15181 }
15182 } else {
c08435ec 15183 err = check_cond_jmp_op(env, insn, &env->insn_idx);
17a52670
AS
15184 if (err)
15185 return err;
15186 }
15187 } else if (class == BPF_LD) {
15188 u8 mode = BPF_MODE(insn->code);
15189
15190 if (mode == BPF_ABS || mode == BPF_IND) {
ddd872bc
AS
15191 err = check_ld_abs(env, insn);
15192 if (err)
15193 return err;
15194
17a52670
AS
15195 } else if (mode == BPF_IMM) {
15196 err = check_ld_imm(env, insn);
15197 if (err)
15198 return err;
15199
c08435ec 15200 env->insn_idx++;
fe9a5ca7 15201 sanitize_mark_insn_seen(env);
17a52670 15202 } else {
61bd5218 15203 verbose(env, "invalid BPF_LD mode\n");
17a52670
AS
15204 return -EINVAL;
15205 }
15206 } else {
61bd5218 15207 verbose(env, "unknown insn class %d\n", class);
17a52670
AS
15208 return -EINVAL;
15209 }
15210
c08435ec 15211 env->insn_idx++;
17a52670
AS
15212 }
15213
15214 return 0;
15215}
15216
541c3bad
AN
15217static int find_btf_percpu_datasec(struct btf *btf)
15218{
15219 const struct btf_type *t;
15220 const char *tname;
15221 int i, n;
15222
15223 /*
15224 * Both vmlinux and module each have their own ".data..percpu"
15225 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
15226 * types to look at only module's own BTF types.
15227 */
15228 n = btf_nr_types(btf);
15229 if (btf_is_module(btf))
15230 i = btf_nr_types(btf_vmlinux);
15231 else
15232 i = 1;
15233
15234 for(; i < n; i++) {
15235 t = btf_type_by_id(btf, i);
15236 if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
15237 continue;
15238
15239 tname = btf_name_by_offset(btf, t->name_off);
15240 if (!strcmp(tname, ".data..percpu"))
15241 return i;
15242 }
15243
15244 return -ENOENT;
15245}
15246
4976b718
HL
15247/* replace pseudo btf_id with kernel symbol address */
15248static int check_pseudo_btf_id(struct bpf_verifier_env *env,
15249 struct bpf_insn *insn,
15250 struct bpf_insn_aux_data *aux)
15251{
eaa6bcb7
HL
15252 const struct btf_var_secinfo *vsi;
15253 const struct btf_type *datasec;
541c3bad 15254 struct btf_mod_pair *btf_mod;
4976b718
HL
15255 const struct btf_type *t;
15256 const char *sym_name;
eaa6bcb7 15257 bool percpu = false;
f16e6313 15258 u32 type, id = insn->imm;
541c3bad 15259 struct btf *btf;
f16e6313 15260 s32 datasec_id;
4976b718 15261 u64 addr;
541c3bad 15262 int i, btf_fd, err;
4976b718 15263
541c3bad
AN
15264 btf_fd = insn[1].imm;
15265 if (btf_fd) {
15266 btf = btf_get_by_fd(btf_fd);
15267 if (IS_ERR(btf)) {
15268 verbose(env, "invalid module BTF object FD specified.\n");
15269 return -EINVAL;
15270 }
15271 } else {
15272 if (!btf_vmlinux) {
15273 verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
15274 return -EINVAL;
15275 }
15276 btf = btf_vmlinux;
15277 btf_get(btf);
4976b718
HL
15278 }
15279
541c3bad 15280 t = btf_type_by_id(btf, id);
4976b718
HL
15281 if (!t) {
15282 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
541c3bad
AN
15283 err = -ENOENT;
15284 goto err_put;
4976b718
HL
15285 }
15286
15287 if (!btf_type_is_var(t)) {
541c3bad
AN
15288 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
15289 err = -EINVAL;
15290 goto err_put;
4976b718
HL
15291 }
15292
541c3bad 15293 sym_name = btf_name_by_offset(btf, t->name_off);
4976b718
HL
15294 addr = kallsyms_lookup_name(sym_name);
15295 if (!addr) {
15296 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
15297 sym_name);
541c3bad
AN
15298 err = -ENOENT;
15299 goto err_put;
4976b718
HL
15300 }
15301
541c3bad 15302 datasec_id = find_btf_percpu_datasec(btf);
eaa6bcb7 15303 if (datasec_id > 0) {
541c3bad 15304 datasec = btf_type_by_id(btf, datasec_id);
eaa6bcb7
HL
15305 for_each_vsi(i, datasec, vsi) {
15306 if (vsi->type == id) {
15307 percpu = true;
15308 break;
15309 }
15310 }
15311 }
15312
4976b718
HL
15313 insn[0].imm = (u32)addr;
15314 insn[1].imm = addr >> 32;
15315
15316 type = t->type;
541c3bad 15317 t = btf_type_skip_modifiers(btf, type, NULL);
eaa6bcb7 15318 if (percpu) {
5844101a 15319 aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
541c3bad 15320 aux->btf_var.btf = btf;
eaa6bcb7
HL
15321 aux->btf_var.btf_id = type;
15322 } else if (!btf_type_is_struct(t)) {
4976b718
HL
15323 const struct btf_type *ret;
15324 const char *tname;
15325 u32 tsize;
15326
15327 /* resolve the type size of ksym. */
541c3bad 15328 ret = btf_resolve_size(btf, t, &tsize);
4976b718 15329 if (IS_ERR(ret)) {
541c3bad 15330 tname = btf_name_by_offset(btf, t->name_off);
4976b718
HL
15331 verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
15332 tname, PTR_ERR(ret));
541c3bad
AN
15333 err = -EINVAL;
15334 goto err_put;
4976b718 15335 }
34d3a78c 15336 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
4976b718
HL
15337 aux->btf_var.mem_size = tsize;
15338 } else {
15339 aux->btf_var.reg_type = PTR_TO_BTF_ID;
541c3bad 15340 aux->btf_var.btf = btf;
4976b718
HL
15341 aux->btf_var.btf_id = type;
15342 }
541c3bad
AN
15343
15344 /* check whether we recorded this BTF (and maybe module) already */
15345 for (i = 0; i < env->used_btf_cnt; i++) {
15346 if (env->used_btfs[i].btf == btf) {
15347 btf_put(btf);
15348 return 0;
15349 }
15350 }
15351
15352 if (env->used_btf_cnt >= MAX_USED_BTFS) {
15353 err = -E2BIG;
15354 goto err_put;
15355 }
15356
15357 btf_mod = &env->used_btfs[env->used_btf_cnt];
15358 btf_mod->btf = btf;
15359 btf_mod->module = NULL;
15360
15361 /* if we reference variables from kernel module, bump its refcount */
15362 if (btf_is_module(btf)) {
15363 btf_mod->module = btf_try_get_module(btf);
15364 if (!btf_mod->module) {
15365 err = -ENXIO;
15366 goto err_put;
15367 }
15368 }
15369
15370 env->used_btf_cnt++;
15371
4976b718 15372 return 0;
541c3bad
AN
15373err_put:
15374 btf_put(btf);
15375 return err;
4976b718
HL
15376}
15377
d83525ca
AS
15378static bool is_tracing_prog_type(enum bpf_prog_type type)
15379{
15380 switch (type) {
15381 case BPF_PROG_TYPE_KPROBE:
15382 case BPF_PROG_TYPE_TRACEPOINT:
15383 case BPF_PROG_TYPE_PERF_EVENT:
15384 case BPF_PROG_TYPE_RAW_TRACEPOINT:
5002615a 15385 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
d83525ca
AS
15386 return true;
15387 default:
15388 return false;
15389 }
15390}
15391
61bd5218
JK
15392static int check_map_prog_compatibility(struct bpf_verifier_env *env,
15393 struct bpf_map *map,
fdc15d38
AS
15394 struct bpf_prog *prog)
15395
15396{
7e40781c 15397 enum bpf_prog_type prog_type = resolve_prog_type(prog);
a3884572 15398
9c395c1b
DM
15399 if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
15400 btf_record_has_field(map->record, BPF_RB_ROOT)) {
f0c5941f 15401 if (is_tracing_prog_type(prog_type)) {
9c395c1b 15402 verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
f0c5941f
KKD
15403 return -EINVAL;
15404 }
15405 }
15406
db559117 15407 if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
9e7a4d98
KS
15408 if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
15409 verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
15410 return -EINVAL;
15411 }
15412
15413 if (is_tracing_prog_type(prog_type)) {
15414 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
15415 return -EINVAL;
15416 }
15417
15418 if (prog->aux->sleepable) {
15419 verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n");
15420 return -EINVAL;
15421 }
d83525ca
AS
15422 }
15423
db559117 15424 if (btf_record_has_field(map->record, BPF_TIMER)) {
5e0bc308
DB
15425 if (is_tracing_prog_type(prog_type)) {
15426 verbose(env, "tracing progs cannot use bpf_timer yet\n");
15427 return -EINVAL;
15428 }
15429 }
15430
9d03ebc7 15431 if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
09728266 15432 !bpf_offload_prog_map_match(prog, map)) {
a3884572
JK
15433 verbose(env, "offload device mismatch between prog and map\n");
15434 return -EINVAL;
15435 }
15436
85d33df3
MKL
15437 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
15438 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
15439 return -EINVAL;
15440 }
15441
1e6c62a8
AS
15442 if (prog->aux->sleepable)
15443 switch (map->map_type) {
15444 case BPF_MAP_TYPE_HASH:
15445 case BPF_MAP_TYPE_LRU_HASH:
15446 case BPF_MAP_TYPE_ARRAY:
638e4b82
AS
15447 case BPF_MAP_TYPE_PERCPU_HASH:
15448 case BPF_MAP_TYPE_PERCPU_ARRAY:
15449 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
15450 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
15451 case BPF_MAP_TYPE_HASH_OF_MAPS:
ba90c2cc 15452 case BPF_MAP_TYPE_RINGBUF:
583c1f42 15453 case BPF_MAP_TYPE_USER_RINGBUF:
0fe4b381
KS
15454 case BPF_MAP_TYPE_INODE_STORAGE:
15455 case BPF_MAP_TYPE_SK_STORAGE:
15456 case BPF_MAP_TYPE_TASK_STORAGE:
2c40d97d 15457 case BPF_MAP_TYPE_CGRP_STORAGE:
ba90c2cc 15458 break;
1e6c62a8
AS
15459 default:
15460 verbose(env,
2c40d97d 15461 "Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
1e6c62a8
AS
15462 return -EINVAL;
15463 }
15464
fdc15d38
AS
15465 return 0;
15466}
15467
b741f163
RG
15468static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
15469{
15470 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
15471 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
15472}
15473
4976b718
HL
15474/* find and rewrite pseudo imm in ld_imm64 instructions:
15475 *
15476 * 1. if it accesses map FD, replace it with actual map pointer.
15477 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
15478 *
15479 * NOTE: btf_vmlinux is required for converting pseudo btf_id.
0246e64d 15480 */
4976b718 15481static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
0246e64d
AS
15482{
15483 struct bpf_insn *insn = env->prog->insnsi;
15484 int insn_cnt = env->prog->len;
fdc15d38 15485 int i, j, err;
0246e64d 15486
f1f7714e 15487 err = bpf_prog_calc_tag(env->prog);
aafe6ae9
DB
15488 if (err)
15489 return err;
15490
0246e64d 15491 for (i = 0; i < insn_cnt; i++, insn++) {
9bac3d6d 15492 if (BPF_CLASS(insn->code) == BPF_LDX &&
d691f9e8 15493 (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
61bd5218 15494 verbose(env, "BPF_LDX uses reserved fields\n");
d691f9e8
AS
15495 return -EINVAL;
15496 }
15497
0246e64d 15498 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
d8eca5bb 15499 struct bpf_insn_aux_data *aux;
0246e64d
AS
15500 struct bpf_map *map;
15501 struct fd f;
d8eca5bb 15502 u64 addr;
387544bf 15503 u32 fd;
0246e64d
AS
15504
15505 if (i == insn_cnt - 1 || insn[1].code != 0 ||
15506 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
15507 insn[1].off != 0) {
61bd5218 15508 verbose(env, "invalid bpf_ld_imm64 insn\n");
0246e64d
AS
15509 return -EINVAL;
15510 }
15511
d8eca5bb 15512 if (insn[0].src_reg == 0)
0246e64d
AS
15513 /* valid generic load 64-bit imm */
15514 goto next_insn;
15515
4976b718
HL
15516 if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
15517 aux = &env->insn_aux_data[i];
15518 err = check_pseudo_btf_id(env, insn, aux);
15519 if (err)
15520 return err;
15521 goto next_insn;
15522 }
15523
69c087ba
YS
15524 if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
15525 aux = &env->insn_aux_data[i];
15526 aux->ptr_type = PTR_TO_FUNC;
15527 goto next_insn;
15528 }
15529
d8eca5bb
DB
15530 /* In final convert_pseudo_ld_imm64() step, this is
15531 * converted into regular 64-bit imm load insn.
15532 */
387544bf
AS
15533 switch (insn[0].src_reg) {
15534 case BPF_PSEUDO_MAP_VALUE:
15535 case BPF_PSEUDO_MAP_IDX_VALUE:
15536 break;
15537 case BPF_PSEUDO_MAP_FD:
15538 case BPF_PSEUDO_MAP_IDX:
15539 if (insn[1].imm == 0)
15540 break;
15541 fallthrough;
15542 default:
15543 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
0246e64d
AS
15544 return -EINVAL;
15545 }
15546
387544bf
AS
15547 switch (insn[0].src_reg) {
15548 case BPF_PSEUDO_MAP_IDX_VALUE:
15549 case BPF_PSEUDO_MAP_IDX:
15550 if (bpfptr_is_null(env->fd_array)) {
15551 verbose(env, "fd_idx without fd_array is invalid\n");
15552 return -EPROTO;
15553 }
15554 if (copy_from_bpfptr_offset(&fd, env->fd_array,
15555 insn[0].imm * sizeof(fd),
15556 sizeof(fd)))
15557 return -EFAULT;
15558 break;
15559 default:
15560 fd = insn[0].imm;
15561 break;
15562 }
15563
15564 f = fdget(fd);
c2101297 15565 map = __bpf_map_get(f);
0246e64d 15566 if (IS_ERR(map)) {
61bd5218 15567 verbose(env, "fd %d is not pointing to valid bpf_map\n",
20182390 15568 insn[0].imm);
0246e64d
AS
15569 return PTR_ERR(map);
15570 }
15571
61bd5218 15572 err = check_map_prog_compatibility(env, map, env->prog);
fdc15d38
AS
15573 if (err) {
15574 fdput(f);
15575 return err;
15576 }
15577
d8eca5bb 15578 aux = &env->insn_aux_data[i];
387544bf
AS
15579 if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
15580 insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
d8eca5bb
DB
15581 addr = (unsigned long)map;
15582 } else {
15583 u32 off = insn[1].imm;
15584
15585 if (off >= BPF_MAX_VAR_OFF) {
15586 verbose(env, "direct value offset of %u is not allowed\n", off);
15587 fdput(f);
15588 return -EINVAL;
15589 }
15590
15591 if (!map->ops->map_direct_value_addr) {
15592 verbose(env, "no direct value access support for this map type\n");
15593 fdput(f);
15594 return -EINVAL;
15595 }
15596
15597 err = map->ops->map_direct_value_addr(map, &addr, off);
15598 if (err) {
15599 verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
15600 map->value_size, off);
15601 fdput(f);
15602 return err;
15603 }
15604
15605 aux->map_off = off;
15606 addr += off;
15607 }
15608
15609 insn[0].imm = (u32)addr;
15610 insn[1].imm = addr >> 32;
0246e64d
AS
15611
15612 /* check whether we recorded this map already */
d8eca5bb 15613 for (j = 0; j < env->used_map_cnt; j++) {
0246e64d 15614 if (env->used_maps[j] == map) {
d8eca5bb 15615 aux->map_index = j;
0246e64d
AS
15616 fdput(f);
15617 goto next_insn;
15618 }
d8eca5bb 15619 }
0246e64d
AS
15620
15621 if (env->used_map_cnt >= MAX_USED_MAPS) {
15622 fdput(f);
15623 return -E2BIG;
15624 }
15625
0246e64d
AS
15626 /* hold the map. If the program is rejected by verifier,
15627 * the map will be released by release_maps() or it
15628 * will be used by the valid program until it's unloaded
ab7f5bf0 15629 * and all maps are released in free_used_maps()
0246e64d 15630 */
1e0bd5a0 15631 bpf_map_inc(map);
d8eca5bb
DB
15632
15633 aux->map_index = env->used_map_cnt;
92117d84
AS
15634 env->used_maps[env->used_map_cnt++] = map;
15635
b741f163 15636 if (bpf_map_is_cgroup_storage(map) &&
e4730423 15637 bpf_cgroup_storage_assign(env->prog->aux, map)) {
b741f163 15638 verbose(env, "only one cgroup storage of each type is allowed\n");
de9cbbaa
RG
15639 fdput(f);
15640 return -EBUSY;
15641 }
15642
0246e64d
AS
15643 fdput(f);
15644next_insn:
15645 insn++;
15646 i++;
5e581dad
DB
15647 continue;
15648 }
15649
15650 /* Basic sanity check before we invest more work here. */
15651 if (!bpf_opcode_in_insntable(insn->code)) {
15652 verbose(env, "unknown opcode %02x\n", insn->code);
15653 return -EINVAL;
0246e64d
AS
15654 }
15655 }
15656
15657 /* now all pseudo BPF_LD_IMM64 instructions load valid
15658 * 'struct bpf_map *' into a register instead of user map_fd.
15659 * These pointers will be used later by verifier to validate map access.
15660 */
15661 return 0;
15662}
15663
15664/* drop refcnt of maps used by the rejected program */
58e2af8b 15665static void release_maps(struct bpf_verifier_env *env)
0246e64d 15666{
a2ea0746
DB
15667 __bpf_free_used_maps(env->prog->aux, env->used_maps,
15668 env->used_map_cnt);
0246e64d
AS
15669}
15670
541c3bad
AN
15671/* drop refcnt of maps used by the rejected program */
15672static void release_btfs(struct bpf_verifier_env *env)
15673{
15674 __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
15675 env->used_btf_cnt);
15676}
15677
0246e64d 15678/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
58e2af8b 15679static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
0246e64d
AS
15680{
15681 struct bpf_insn *insn = env->prog->insnsi;
15682 int insn_cnt = env->prog->len;
15683 int i;
15684
69c087ba
YS
15685 for (i = 0; i < insn_cnt; i++, insn++) {
15686 if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
15687 continue;
15688 if (insn->src_reg == BPF_PSEUDO_FUNC)
15689 continue;
15690 insn->src_reg = 0;
15691 }
0246e64d
AS
15692}
15693
8041902d
AS
15694/* single env->prog->insni[off] instruction was replaced with the range
15695 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
15696 * [0, off) and [off, end) to new locations, so the patched range stays zero
15697 */
75f0fc7b
HF
15698static void adjust_insn_aux_data(struct bpf_verifier_env *env,
15699 struct bpf_insn_aux_data *new_data,
15700 struct bpf_prog *new_prog, u32 off, u32 cnt)
8041902d 15701{
75f0fc7b 15702 struct bpf_insn_aux_data *old_data = env->insn_aux_data;
b325fbca 15703 struct bpf_insn *insn = new_prog->insnsi;
d203b0fd 15704 u32 old_seen = old_data[off].seen;
b325fbca 15705 u32 prog_len;
c131187d 15706 int i;
8041902d 15707
b325fbca
JW
15708 /* aux info at OFF always needs adjustment, no matter fast path
15709 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
15710 * original insn at old prog.
15711 */
15712 old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
15713
8041902d 15714 if (cnt == 1)
75f0fc7b 15715 return;
b325fbca 15716 prog_len = new_prog->len;
75f0fc7b 15717
8041902d
AS
15718 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
15719 memcpy(new_data + off + cnt - 1, old_data + off,
15720 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
b325fbca 15721 for (i = off; i < off + cnt - 1; i++) {
d203b0fd
DB
15722 /* Expand insni[off]'s seen count to the patched range. */
15723 new_data[i].seen = old_seen;
b325fbca
JW
15724 new_data[i].zext_dst = insn_has_def32(env, insn + i);
15725 }
8041902d
AS
15726 env->insn_aux_data = new_data;
15727 vfree(old_data);
8041902d
AS
15728}
15729
cc8b0b92
AS
15730static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
15731{
15732 int i;
15733
15734 if (len == 1)
15735 return;
4cb3d99c
JW
15736 /* NOTE: fake 'exit' subprog should be updated as well. */
15737 for (i = 0; i <= env->subprog_cnt; i++) {
afd59424 15738 if (env->subprog_info[i].start <= off)
cc8b0b92 15739 continue;
9c8105bd 15740 env->subprog_info[i].start += len - 1;
cc8b0b92
AS
15741 }
15742}
15743
7506d211 15744static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
a748c697
MF
15745{
15746 struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
15747 int i, sz = prog->aux->size_poke_tab;
15748 struct bpf_jit_poke_descriptor *desc;
15749
15750 for (i = 0; i < sz; i++) {
15751 desc = &tab[i];
7506d211
JF
15752 if (desc->insn_idx <= off)
15753 continue;
a748c697
MF
15754 desc->insn_idx += len - 1;
15755 }
15756}
15757
8041902d
AS
15758static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
15759 const struct bpf_insn *patch, u32 len)
15760{
15761 struct bpf_prog *new_prog;
75f0fc7b
HF
15762 struct bpf_insn_aux_data *new_data = NULL;
15763
15764 if (len > 1) {
15765 new_data = vzalloc(array_size(env->prog->len + len - 1,
15766 sizeof(struct bpf_insn_aux_data)));
15767 if (!new_data)
15768 return NULL;
15769 }
8041902d
AS
15770
15771 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
4f73379e
AS
15772 if (IS_ERR(new_prog)) {
15773 if (PTR_ERR(new_prog) == -ERANGE)
15774 verbose(env,
15775 "insn %d cannot be patched due to 16-bit range\n",
15776 env->insn_aux_data[off].orig_idx);
75f0fc7b 15777 vfree(new_data);
8041902d 15778 return NULL;
4f73379e 15779 }
75f0fc7b 15780 adjust_insn_aux_data(env, new_data, new_prog, off, len);
cc8b0b92 15781 adjust_subprog_starts(env, off, len);
7506d211 15782 adjust_poke_descs(new_prog, off, len);
8041902d
AS
15783 return new_prog;
15784}
15785
52875a04
JK
15786static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
15787 u32 off, u32 cnt)
15788{
15789 int i, j;
15790
15791 /* find first prog starting at or after off (first to remove) */
15792 for (i = 0; i < env->subprog_cnt; i++)
15793 if (env->subprog_info[i].start >= off)
15794 break;
15795 /* find first prog starting at or after off + cnt (first to stay) */
15796 for (j = i; j < env->subprog_cnt; j++)
15797 if (env->subprog_info[j].start >= off + cnt)
15798 break;
15799 /* if j doesn't start exactly at off + cnt, we are just removing
15800 * the front of previous prog
15801 */
15802 if (env->subprog_info[j].start != off + cnt)
15803 j--;
15804
15805 if (j > i) {
15806 struct bpf_prog_aux *aux = env->prog->aux;
15807 int move;
15808
15809 /* move fake 'exit' subprog as well */
15810 move = env->subprog_cnt + 1 - j;
15811
15812 memmove(env->subprog_info + i,
15813 env->subprog_info + j,
15814 sizeof(*env->subprog_info) * move);
15815 env->subprog_cnt -= j - i;
15816
15817 /* remove func_info */
15818 if (aux->func_info) {
15819 move = aux->func_info_cnt - j;
15820
15821 memmove(aux->func_info + i,
15822 aux->func_info + j,
15823 sizeof(*aux->func_info) * move);
15824 aux->func_info_cnt -= j - i;
15825 /* func_info->insn_off is set after all code rewrites,
15826 * in adjust_btf_func() - no need to adjust
15827 */
15828 }
15829 } else {
15830 /* convert i from "first prog to remove" to "first to adjust" */
15831 if (env->subprog_info[i].start == off)
15832 i++;
15833 }
15834
15835 /* update fake 'exit' subprog as well */
15836 for (; i <= env->subprog_cnt; i++)
15837 env->subprog_info[i].start -= cnt;
15838
15839 return 0;
15840}
15841
15842static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
15843 u32 cnt)
15844{
15845 struct bpf_prog *prog = env->prog;
15846 u32 i, l_off, l_cnt, nr_linfo;
15847 struct bpf_line_info *linfo;
15848
15849 nr_linfo = prog->aux->nr_linfo;
15850 if (!nr_linfo)
15851 return 0;
15852
15853 linfo = prog->aux->linfo;
15854
15855 /* find first line info to remove, count lines to be removed */
15856 for (i = 0; i < nr_linfo; i++)
15857 if (linfo[i].insn_off >= off)
15858 break;
15859
15860 l_off = i;
15861 l_cnt = 0;
15862 for (; i < nr_linfo; i++)
15863 if (linfo[i].insn_off < off + cnt)
15864 l_cnt++;
15865 else
15866 break;
15867
15868 /* First live insn doesn't match first live linfo, it needs to "inherit"
15869 * last removed linfo. prog is already modified, so prog->len == off
15870 * means no live instructions after (tail of the program was removed).
15871 */
15872 if (prog->len != off && l_cnt &&
15873 (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
15874 l_cnt--;
15875 linfo[--i].insn_off = off + cnt;
15876 }
15877
15878 /* remove the line info which refer to the removed instructions */
15879 if (l_cnt) {
15880 memmove(linfo + l_off, linfo + i,
15881 sizeof(*linfo) * (nr_linfo - i));
15882
15883 prog->aux->nr_linfo -= l_cnt;
15884 nr_linfo = prog->aux->nr_linfo;
15885 }
15886
15887 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
15888 for (i = l_off; i < nr_linfo; i++)
15889 linfo[i].insn_off -= cnt;
15890
15891 /* fix up all subprogs (incl. 'exit') which start >= off */
15892 for (i = 0; i <= env->subprog_cnt; i++)
15893 if (env->subprog_info[i].linfo_idx > l_off) {
15894 /* program may have started in the removed region but
15895 * may not be fully removed
15896 */
15897 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
15898 env->subprog_info[i].linfo_idx -= l_cnt;
15899 else
15900 env->subprog_info[i].linfo_idx = l_off;
15901 }
15902
15903 return 0;
15904}
15905
15906static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
15907{
15908 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
15909 unsigned int orig_prog_len = env->prog->len;
15910 int err;
15911
9d03ebc7 15912 if (bpf_prog_is_offloaded(env->prog->aux))
08ca90af
JK
15913 bpf_prog_offload_remove_insns(env, off, cnt);
15914
52875a04
JK
15915 err = bpf_remove_insns(env->prog, off, cnt);
15916 if (err)
15917 return err;
15918
15919 err = adjust_subprog_starts_after_remove(env, off, cnt);
15920 if (err)
15921 return err;
15922
15923 err = bpf_adj_linfo_after_remove(env, off, cnt);
15924 if (err)
15925 return err;
15926
15927 memmove(aux_data + off, aux_data + off + cnt,
15928 sizeof(*aux_data) * (orig_prog_len - off - cnt));
15929
15930 return 0;
15931}
15932
2a5418a1
DB
15933/* The verifier does more data flow analysis than llvm and will not
15934 * explore branches that are dead at run time. Malicious programs can
15935 * have dead code too. Therefore replace all dead at-run-time code
15936 * with 'ja -1'.
15937 *
15938 * Just nops are not optimal, e.g. if they would sit at the end of the
15939 * program and through another bug we would manage to jump there, then
15940 * we'd execute beyond program memory otherwise. Returning exception
15941 * code also wouldn't work since we can have subprogs where the dead
15942 * code could be located.
c131187d
AS
15943 */
15944static void sanitize_dead_code(struct bpf_verifier_env *env)
15945{
15946 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
2a5418a1 15947 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
c131187d
AS
15948 struct bpf_insn *insn = env->prog->insnsi;
15949 const int insn_cnt = env->prog->len;
15950 int i;
15951
15952 for (i = 0; i < insn_cnt; i++) {
15953 if (aux_data[i].seen)
15954 continue;
2a5418a1 15955 memcpy(insn + i, &trap, sizeof(trap));
45c709f8 15956 aux_data[i].zext_dst = false;
c131187d
AS
15957 }
15958}
15959
e2ae4ca2
JK
15960static bool insn_is_cond_jump(u8 code)
15961{
15962 u8 op;
15963
092ed096
JW
15964 if (BPF_CLASS(code) == BPF_JMP32)
15965 return true;
15966
e2ae4ca2
JK
15967 if (BPF_CLASS(code) != BPF_JMP)
15968 return false;
15969
15970 op = BPF_OP(code);
15971 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
15972}
15973
15974static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
15975{
15976 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
15977 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
15978 struct bpf_insn *insn = env->prog->insnsi;
15979 const int insn_cnt = env->prog->len;
15980 int i;
15981
15982 for (i = 0; i < insn_cnt; i++, insn++) {
15983 if (!insn_is_cond_jump(insn->code))
15984 continue;
15985
15986 if (!aux_data[i + 1].seen)
15987 ja.off = insn->off;
15988 else if (!aux_data[i + 1 + insn->off].seen)
15989 ja.off = 0;
15990 else
15991 continue;
15992
9d03ebc7 15993 if (bpf_prog_is_offloaded(env->prog->aux))
08ca90af
JK
15994 bpf_prog_offload_replace_insn(env, i, &ja);
15995
e2ae4ca2
JK
15996 memcpy(insn, &ja, sizeof(ja));
15997 }
15998}
15999
52875a04
JK
16000static int opt_remove_dead_code(struct bpf_verifier_env *env)
16001{
16002 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
16003 int insn_cnt = env->prog->len;
16004 int i, err;
16005
16006 for (i = 0; i < insn_cnt; i++) {
16007 int j;
16008
16009 j = 0;
16010 while (i + j < insn_cnt && !aux_data[i + j].seen)
16011 j++;
16012 if (!j)
16013 continue;
16014
16015 err = verifier_remove_insns(env, i, j);
16016 if (err)
16017 return err;
16018 insn_cnt = env->prog->len;
16019 }
16020
16021 return 0;
16022}
16023
a1b14abc
JK
16024static int opt_remove_nops(struct bpf_verifier_env *env)
16025{
16026 const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
16027 struct bpf_insn *insn = env->prog->insnsi;
16028 int insn_cnt = env->prog->len;
16029 int i, err;
16030
16031 for (i = 0; i < insn_cnt; i++) {
16032 if (memcmp(&insn[i], &ja, sizeof(ja)))
16033 continue;
16034
16035 err = verifier_remove_insns(env, i, 1);
16036 if (err)
16037 return err;
16038 insn_cnt--;
16039 i--;
16040 }
16041
16042 return 0;
16043}
16044
d6c2308c
JW
16045static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
16046 const union bpf_attr *attr)
a4b1d3c1 16047{
d6c2308c 16048 struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
a4b1d3c1 16049 struct bpf_insn_aux_data *aux = env->insn_aux_data;
d6c2308c 16050 int i, patch_len, delta = 0, len = env->prog->len;
a4b1d3c1 16051 struct bpf_insn *insns = env->prog->insnsi;
a4b1d3c1 16052 struct bpf_prog *new_prog;
d6c2308c 16053 bool rnd_hi32;
a4b1d3c1 16054
d6c2308c 16055 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
a4b1d3c1 16056 zext_patch[1] = BPF_ZEXT_REG(0);
d6c2308c
JW
16057 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
16058 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
16059 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
a4b1d3c1
JW
16060 for (i = 0; i < len; i++) {
16061 int adj_idx = i + delta;
16062 struct bpf_insn insn;
83a28819 16063 int load_reg;
a4b1d3c1 16064
d6c2308c 16065 insn = insns[adj_idx];
83a28819 16066 load_reg = insn_def_regno(&insn);
d6c2308c
JW
16067 if (!aux[adj_idx].zext_dst) {
16068 u8 code, class;
16069 u32 imm_rnd;
16070
16071 if (!rnd_hi32)
16072 continue;
16073
16074 code = insn.code;
16075 class = BPF_CLASS(code);
83a28819 16076 if (load_reg == -1)
d6c2308c
JW
16077 continue;
16078
16079 /* NOTE: arg "reg" (the fourth one) is only used for
83a28819
IL
16080 * BPF_STX + SRC_OP, so it is safe to pass NULL
16081 * here.
d6c2308c 16082 */
83a28819 16083 if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
d6c2308c
JW
16084 if (class == BPF_LD &&
16085 BPF_MODE(code) == BPF_IMM)
16086 i++;
16087 continue;
16088 }
16089
16090 /* ctx load could be transformed into wider load. */
16091 if (class == BPF_LDX &&
16092 aux[adj_idx].ptr_type == PTR_TO_CTX)
16093 continue;
16094
a251c17a 16095 imm_rnd = get_random_u32();
d6c2308c
JW
16096 rnd_hi32_patch[0] = insn;
16097 rnd_hi32_patch[1].imm = imm_rnd;
83a28819 16098 rnd_hi32_patch[3].dst_reg = load_reg;
d6c2308c
JW
16099 patch = rnd_hi32_patch;
16100 patch_len = 4;
16101 goto apply_patch_buffer;
16102 }
16103
39491867
BJ
16104 /* Add in an zero-extend instruction if a) the JIT has requested
16105 * it or b) it's a CMPXCHG.
16106 *
16107 * The latter is because: BPF_CMPXCHG always loads a value into
16108 * R0, therefore always zero-extends. However some archs'
16109 * equivalent instruction only does this load when the
16110 * comparison is successful. This detail of CMPXCHG is
16111 * orthogonal to the general zero-extension behaviour of the
16112 * CPU, so it's treated independently of bpf_jit_needs_zext.
16113 */
16114 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
a4b1d3c1
JW
16115 continue;
16116
d35af0a7
BT
16117 /* Zero-extension is done by the caller. */
16118 if (bpf_pseudo_kfunc_call(&insn))
16119 continue;
16120
83a28819
IL
16121 if (WARN_ON(load_reg == -1)) {
16122 verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
16123 return -EFAULT;
b2e37a71
IL
16124 }
16125
a4b1d3c1 16126 zext_patch[0] = insn;
b2e37a71
IL
16127 zext_patch[1].dst_reg = load_reg;
16128 zext_patch[1].src_reg = load_reg;
d6c2308c
JW
16129 patch = zext_patch;
16130 patch_len = 2;
16131apply_patch_buffer:
16132 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
a4b1d3c1
JW
16133 if (!new_prog)
16134 return -ENOMEM;
16135 env->prog = new_prog;
16136 insns = new_prog->insnsi;
16137 aux = env->insn_aux_data;
d6c2308c 16138 delta += patch_len - 1;
a4b1d3c1
JW
16139 }
16140
16141 return 0;
16142}
16143
c64b7983
JS
16144/* convert load instructions that access fields of a context type into a
16145 * sequence of instructions that access fields of the underlying structure:
16146 * struct __sk_buff -> struct sk_buff
16147 * struct bpf_sock_ops -> struct sock
9bac3d6d 16148 */
58e2af8b 16149static int convert_ctx_accesses(struct bpf_verifier_env *env)
9bac3d6d 16150{
00176a34 16151 const struct bpf_verifier_ops *ops = env->ops;
f96da094 16152 int i, cnt, size, ctx_field_size, delta = 0;
3df126f3 16153 const int insn_cnt = env->prog->len;
36bbef52 16154 struct bpf_insn insn_buf[16], *insn;
46f53a65 16155 u32 target_size, size_default, off;
9bac3d6d 16156 struct bpf_prog *new_prog;
d691f9e8 16157 enum bpf_access_type type;
f96da094 16158 bool is_narrower_load;
9bac3d6d 16159
b09928b9
DB
16160 if (ops->gen_prologue || env->seen_direct_write) {
16161 if (!ops->gen_prologue) {
16162 verbose(env, "bpf verifier is misconfigured\n");
16163 return -EINVAL;
16164 }
36bbef52
DB
16165 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
16166 env->prog);
16167 if (cnt >= ARRAY_SIZE(insn_buf)) {
61bd5218 16168 verbose(env, "bpf verifier is misconfigured\n");
36bbef52
DB
16169 return -EINVAL;
16170 } else if (cnt) {
8041902d 16171 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
36bbef52
DB
16172 if (!new_prog)
16173 return -ENOMEM;
8041902d 16174
36bbef52 16175 env->prog = new_prog;
3df126f3 16176 delta += cnt - 1;
36bbef52
DB
16177 }
16178 }
16179
9d03ebc7 16180 if (bpf_prog_is_offloaded(env->prog->aux))
9bac3d6d
AS
16181 return 0;
16182
3df126f3 16183 insn = env->prog->insnsi + delta;
36bbef52 16184
9bac3d6d 16185 for (i = 0; i < insn_cnt; i++, insn++) {
c64b7983
JS
16186 bpf_convert_ctx_access_t convert_ctx_access;
16187
62c7989b
DB
16188 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
16189 insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
16190 insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
2039f26f 16191 insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
d691f9e8 16192 type = BPF_READ;
2039f26f
DB
16193 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
16194 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
16195 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
16196 insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
16197 insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
16198 insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
16199 insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
16200 insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
d691f9e8 16201 type = BPF_WRITE;
2039f26f 16202 } else {
9bac3d6d 16203 continue;
2039f26f 16204 }
9bac3d6d 16205
af86ca4e 16206 if (type == BPF_WRITE &&
2039f26f 16207 env->insn_aux_data[i + delta].sanitize_stack_spill) {
af86ca4e 16208 struct bpf_insn patch[] = {
af86ca4e 16209 *insn,
2039f26f 16210 BPF_ST_NOSPEC(),
af86ca4e
AS
16211 };
16212
16213 cnt = ARRAY_SIZE(patch);
16214 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
16215 if (!new_prog)
16216 return -ENOMEM;
16217
16218 delta += cnt - 1;
16219 env->prog = new_prog;
16220 insn = new_prog->insnsi + i + delta;
16221 continue;
16222 }
16223
6efe152d 16224 switch ((int)env->insn_aux_data[i + delta].ptr_type) {
c64b7983
JS
16225 case PTR_TO_CTX:
16226 if (!ops->convert_ctx_access)
16227 continue;
16228 convert_ctx_access = ops->convert_ctx_access;
16229 break;
16230 case PTR_TO_SOCKET:
46f8bc92 16231 case PTR_TO_SOCK_COMMON:
c64b7983
JS
16232 convert_ctx_access = bpf_sock_convert_ctx_access;
16233 break;
655a51e5
MKL
16234 case PTR_TO_TCP_SOCK:
16235 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
16236 break;
fada7fdc
JL
16237 case PTR_TO_XDP_SOCK:
16238 convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
16239 break;
2a02759e 16240 case PTR_TO_BTF_ID:
6efe152d 16241 case PTR_TO_BTF_ID | PTR_UNTRUSTED:
282de143
KKD
16242 /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
16243 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
16244 * be said once it is marked PTR_UNTRUSTED, hence we must handle
16245 * any faults for loads into such types. BPF_WRITE is disallowed
16246 * for this case.
16247 */
16248 case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
27ae7997
MKL
16249 if (type == BPF_READ) {
16250 insn->code = BPF_LDX | BPF_PROBE_MEM |
16251 BPF_SIZE((insn)->code);
16252 env->prog->aux->num_exentries++;
2a02759e 16253 }
2a02759e 16254 continue;
c64b7983 16255 default:
9bac3d6d 16256 continue;
c64b7983 16257 }
9bac3d6d 16258
31fd8581 16259 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
f96da094 16260 size = BPF_LDST_BYTES(insn);
31fd8581
YS
16261
16262 /* If the read access is a narrower load of the field,
16263 * convert to a 4/8-byte load, to minimum program type specific
16264 * convert_ctx_access changes. If conversion is successful,
16265 * we will apply proper mask to the result.
16266 */
f96da094 16267 is_narrower_load = size < ctx_field_size;
46f53a65
AI
16268 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
16269 off = insn->off;
31fd8581 16270 if (is_narrower_load) {
f96da094
DB
16271 u8 size_code;
16272
16273 if (type == BPF_WRITE) {
61bd5218 16274 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
f96da094
DB
16275 return -EINVAL;
16276 }
31fd8581 16277
f96da094 16278 size_code = BPF_H;
31fd8581
YS
16279 if (ctx_field_size == 4)
16280 size_code = BPF_W;
16281 else if (ctx_field_size == 8)
16282 size_code = BPF_DW;
f96da094 16283
bc23105c 16284 insn->off = off & ~(size_default - 1);
31fd8581
YS
16285 insn->code = BPF_LDX | BPF_MEM | size_code;
16286 }
f96da094
DB
16287
16288 target_size = 0;
c64b7983
JS
16289 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
16290 &target_size);
f96da094
DB
16291 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
16292 (ctx_field_size && !target_size)) {
61bd5218 16293 verbose(env, "bpf verifier is misconfigured\n");
9bac3d6d
AS
16294 return -EINVAL;
16295 }
f96da094
DB
16296
16297 if (is_narrower_load && size < target_size) {
d895a0f1
IL
16298 u8 shift = bpf_ctx_narrow_access_offset(
16299 off, size, size_default) * 8;
d7af7e49
AI
16300 if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
16301 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
16302 return -EINVAL;
16303 }
46f53a65
AI
16304 if (ctx_field_size <= 4) {
16305 if (shift)
16306 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
16307 insn->dst_reg,
16308 shift);
31fd8581 16309 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
f96da094 16310 (1 << size * 8) - 1);
46f53a65
AI
16311 } else {
16312 if (shift)
16313 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
16314 insn->dst_reg,
16315 shift);
31fd8581 16316 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
e2f7fc0a 16317 (1ULL << size * 8) - 1);
46f53a65 16318 }
31fd8581 16319 }
9bac3d6d 16320
8041902d 16321 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9bac3d6d
AS
16322 if (!new_prog)
16323 return -ENOMEM;
16324
3df126f3 16325 delta += cnt - 1;
9bac3d6d
AS
16326
16327 /* keep walking new program and skip insns we just inserted */
16328 env->prog = new_prog;
3df126f3 16329 insn = new_prog->insnsi + i + delta;
9bac3d6d
AS
16330 }
16331
16332 return 0;
16333}
16334
1c2a088a
AS
16335static int jit_subprogs(struct bpf_verifier_env *env)
16336{
16337 struct bpf_prog *prog = env->prog, **func, *tmp;
16338 int i, j, subprog_start, subprog_end = 0, len, subprog;
a748c697 16339 struct bpf_map *map_ptr;
7105e828 16340 struct bpf_insn *insn;
1c2a088a 16341 void *old_bpf_func;
c4c0bdc0 16342 int err, num_exentries;
1c2a088a 16343
f910cefa 16344 if (env->subprog_cnt <= 1)
1c2a088a
AS
16345 return 0;
16346
7105e828 16347 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
3990ed4c 16348 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
69c087ba 16349 continue;
69c087ba 16350
c7a89784
DB
16351 /* Upon error here we cannot fall back to interpreter but
16352 * need a hard reject of the program. Thus -EFAULT is
16353 * propagated in any case.
16354 */
1c2a088a
AS
16355 subprog = find_subprog(env, i + insn->imm + 1);
16356 if (subprog < 0) {
16357 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
16358 i + insn->imm + 1);
16359 return -EFAULT;
16360 }
16361 /* temporarily remember subprog id inside insn instead of
16362 * aux_data, since next loop will split up all insns into funcs
16363 */
f910cefa 16364 insn->off = subprog;
1c2a088a
AS
16365 /* remember original imm in case JIT fails and fallback
16366 * to interpreter will be needed
16367 */
16368 env->insn_aux_data[i].call_imm = insn->imm;
16369 /* point imm to __bpf_call_base+1 from JITs point of view */
16370 insn->imm = 1;
3990ed4c
MKL
16371 if (bpf_pseudo_func(insn))
16372 /* jit (e.g. x86_64) may emit fewer instructions
16373 * if it learns a u32 imm is the same as a u64 imm.
16374 * Force a non zero here.
16375 */
16376 insn[1].imm = 1;
1c2a088a
AS
16377 }
16378
c454a46b
MKL
16379 err = bpf_prog_alloc_jited_linfo(prog);
16380 if (err)
16381 goto out_undo_insn;
16382
16383 err = -ENOMEM;
6396bb22 16384 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
1c2a088a 16385 if (!func)
c7a89784 16386 goto out_undo_insn;
1c2a088a 16387
f910cefa 16388 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a 16389 subprog_start = subprog_end;
4cb3d99c 16390 subprog_end = env->subprog_info[i + 1].start;
1c2a088a
AS
16391
16392 len = subprog_end - subprog_start;
fb7dd8bc 16393 /* bpf_prog_run() doesn't call subprogs directly,
492ecee8
AS
16394 * hence main prog stats include the runtime of subprogs.
16395 * subprogs don't have IDs and not reachable via prog_get_next_id
700d4796 16396 * func[i]->stats will never be accessed and stays NULL
492ecee8
AS
16397 */
16398 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
1c2a088a
AS
16399 if (!func[i])
16400 goto out_free;
16401 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
16402 len * sizeof(struct bpf_insn));
4f74d809 16403 func[i]->type = prog->type;
1c2a088a 16404 func[i]->len = len;
4f74d809
DB
16405 if (bpf_prog_calc_tag(func[i]))
16406 goto out_free;
1c2a088a 16407 func[i]->is_func = 1;
ba64e7d8 16408 func[i]->aux->func_idx = i;
f263a814 16409 /* Below members will be freed only at prog->aux */
ba64e7d8
YS
16410 func[i]->aux->btf = prog->aux->btf;
16411 func[i]->aux->func_info = prog->aux->func_info;
9c7c48d6 16412 func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
f263a814
JF
16413 func[i]->aux->poke_tab = prog->aux->poke_tab;
16414 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
ba64e7d8 16415
a748c697 16416 for (j = 0; j < prog->aux->size_poke_tab; j++) {
f263a814 16417 struct bpf_jit_poke_descriptor *poke;
a748c697 16418
f263a814
JF
16419 poke = &prog->aux->poke_tab[j];
16420 if (poke->insn_idx < subprog_end &&
16421 poke->insn_idx >= subprog_start)
16422 poke->aux = func[i]->aux;
a748c697
MF
16423 }
16424
1c2a088a 16425 func[i]->aux->name[0] = 'F';
9c8105bd 16426 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
1c2a088a 16427 func[i]->jit_requested = 1;
d2a3b7c5 16428 func[i]->blinding_requested = prog->blinding_requested;
e6ac2450 16429 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
2357672c 16430 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
c454a46b
MKL
16431 func[i]->aux->linfo = prog->aux->linfo;
16432 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
16433 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
16434 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
c4c0bdc0
YS
16435 num_exentries = 0;
16436 insn = func[i]->insnsi;
16437 for (j = 0; j < func[i]->len; j++, insn++) {
16438 if (BPF_CLASS(insn->code) == BPF_LDX &&
16439 BPF_MODE(insn->code) == BPF_PROBE_MEM)
16440 num_exentries++;
16441 }
16442 func[i]->aux->num_exentries = num_exentries;
ebf7d1f5 16443 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
1c2a088a
AS
16444 func[i] = bpf_int_jit_compile(func[i]);
16445 if (!func[i]->jited) {
16446 err = -ENOTSUPP;
16447 goto out_free;
16448 }
16449 cond_resched();
16450 }
a748c697 16451
1c2a088a
AS
16452 /* at this point all bpf functions were successfully JITed
16453 * now populate all bpf_calls with correct addresses and
16454 * run last pass of JIT
16455 */
f910cefa 16456 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
16457 insn = func[i]->insnsi;
16458 for (j = 0; j < func[i]->len; j++, insn++) {
69c087ba 16459 if (bpf_pseudo_func(insn)) {
3990ed4c 16460 subprog = insn->off;
69c087ba
YS
16461 insn[0].imm = (u32)(long)func[subprog]->bpf_func;
16462 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
16463 continue;
16464 }
23a2d70c 16465 if (!bpf_pseudo_call(insn))
1c2a088a
AS
16466 continue;
16467 subprog = insn->off;
3d717fad 16468 insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
1c2a088a 16469 }
2162fed4
SD
16470
16471 /* we use the aux data to keep a list of the start addresses
16472 * of the JITed images for each function in the program
16473 *
16474 * for some architectures, such as powerpc64, the imm field
16475 * might not be large enough to hold the offset of the start
16476 * address of the callee's JITed image from __bpf_call_base
16477 *
16478 * in such cases, we can lookup the start address of a callee
16479 * by using its subprog id, available from the off field of
16480 * the call instruction, as an index for this list
16481 */
16482 func[i]->aux->func = func;
16483 func[i]->aux->func_cnt = env->subprog_cnt;
1c2a088a 16484 }
f910cefa 16485 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
16486 old_bpf_func = func[i]->bpf_func;
16487 tmp = bpf_int_jit_compile(func[i]);
16488 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
16489 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
c7a89784 16490 err = -ENOTSUPP;
1c2a088a
AS
16491 goto out_free;
16492 }
16493 cond_resched();
16494 }
16495
16496 /* finally lock prog and jit images for all functions and
16497 * populate kallsysm
16498 */
f910cefa 16499 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
16500 bpf_prog_lock_ro(func[i]);
16501 bpf_prog_kallsyms_add(func[i]);
16502 }
7105e828
DB
16503
16504 /* Last step: make now unused interpreter insns from main
16505 * prog consistent for later dump requests, so they can
16506 * later look the same as if they were interpreted only.
16507 */
16508 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
69c087ba
YS
16509 if (bpf_pseudo_func(insn)) {
16510 insn[0].imm = env->insn_aux_data[i].call_imm;
3990ed4c
MKL
16511 insn[1].imm = insn->off;
16512 insn->off = 0;
69c087ba
YS
16513 continue;
16514 }
23a2d70c 16515 if (!bpf_pseudo_call(insn))
7105e828
DB
16516 continue;
16517 insn->off = env->insn_aux_data[i].call_imm;
16518 subprog = find_subprog(env, i + insn->off + 1);
dbecd738 16519 insn->imm = subprog;
7105e828
DB
16520 }
16521
1c2a088a
AS
16522 prog->jited = 1;
16523 prog->bpf_func = func[0]->bpf_func;
d00c6473 16524 prog->jited_len = func[0]->jited_len;
1c2a088a 16525 prog->aux->func = func;
f910cefa 16526 prog->aux->func_cnt = env->subprog_cnt;
e16301fb 16527 bpf_prog_jit_attempt_done(prog);
1c2a088a
AS
16528 return 0;
16529out_free:
f263a814
JF
16530 /* We failed JIT'ing, so at this point we need to unregister poke
16531 * descriptors from subprogs, so that kernel is not attempting to
16532 * patch it anymore as we're freeing the subprog JIT memory.
16533 */
16534 for (i = 0; i < prog->aux->size_poke_tab; i++) {
16535 map_ptr = prog->aux->poke_tab[i].tail_call.map;
16536 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
16537 }
16538 /* At this point we're guaranteed that poke descriptors are not
16539 * live anymore. We can just unlink its descriptor table as it's
16540 * released with the main prog.
16541 */
a748c697
MF
16542 for (i = 0; i < env->subprog_cnt; i++) {
16543 if (!func[i])
16544 continue;
f263a814 16545 func[i]->aux->poke_tab = NULL;
a748c697
MF
16546 bpf_jit_free(func[i]);
16547 }
1c2a088a 16548 kfree(func);
c7a89784 16549out_undo_insn:
1c2a088a
AS
16550 /* cleanup main prog to be interpreted */
16551 prog->jit_requested = 0;
d2a3b7c5 16552 prog->blinding_requested = 0;
1c2a088a 16553 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
23a2d70c 16554 if (!bpf_pseudo_call(insn))
1c2a088a
AS
16555 continue;
16556 insn->off = 0;
16557 insn->imm = env->insn_aux_data[i].call_imm;
16558 }
e16301fb 16559 bpf_prog_jit_attempt_done(prog);
1c2a088a
AS
16560 return err;
16561}
16562
1ea47e01
AS
16563static int fixup_call_args(struct bpf_verifier_env *env)
16564{
19d28fbd 16565#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
16566 struct bpf_prog *prog = env->prog;
16567 struct bpf_insn *insn = prog->insnsi;
e6ac2450 16568 bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
1ea47e01 16569 int i, depth;
19d28fbd 16570#endif
e4052d06 16571 int err = 0;
1ea47e01 16572
e4052d06 16573 if (env->prog->jit_requested &&
9d03ebc7 16574 !bpf_prog_is_offloaded(env->prog->aux)) {
19d28fbd
DM
16575 err = jit_subprogs(env);
16576 if (err == 0)
1c2a088a 16577 return 0;
c7a89784
DB
16578 if (err == -EFAULT)
16579 return err;
19d28fbd
DM
16580 }
16581#ifndef CONFIG_BPF_JIT_ALWAYS_ON
e6ac2450
MKL
16582 if (has_kfunc_call) {
16583 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
16584 return -EINVAL;
16585 }
e411901c
MF
16586 if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
16587 /* When JIT fails the progs with bpf2bpf calls and tail_calls
16588 * have to be rejected, since interpreter doesn't support them yet.
16589 */
16590 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
16591 return -EINVAL;
16592 }
1ea47e01 16593 for (i = 0; i < prog->len; i++, insn++) {
69c087ba
YS
16594 if (bpf_pseudo_func(insn)) {
16595 /* When JIT fails the progs with callback calls
16596 * have to be rejected, since interpreter doesn't support them yet.
16597 */
16598 verbose(env, "callbacks are not allowed in non-JITed programs\n");
16599 return -EINVAL;
16600 }
16601
23a2d70c 16602 if (!bpf_pseudo_call(insn))
1ea47e01
AS
16603 continue;
16604 depth = get_callee_stack_depth(env, insn, i);
16605 if (depth < 0)
16606 return depth;
16607 bpf_patch_call_args(insn, depth);
16608 }
19d28fbd
DM
16609 err = 0;
16610#endif
16611 return err;
1ea47e01
AS
16612}
16613
958cf2e2
KKD
16614static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
16615 struct bpf_insn *insn_buf, int insn_idx, int *cnt)
e6ac2450
MKL
16616{
16617 const struct bpf_kfunc_desc *desc;
3d76a4d3 16618 void *xdp_kfunc;
e6ac2450 16619
a5d82727
KKD
16620 if (!insn->imm) {
16621 verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
16622 return -EINVAL;
16623 }
16624
3d76a4d3
SF
16625 *cnt = 0;
16626
16627 if (bpf_dev_bound_kfunc_id(insn->imm)) {
16628 xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
16629 if (xdp_kfunc) {
16630 insn->imm = BPF_CALL_IMM(xdp_kfunc);
16631 return 0;
16632 }
16633
16634 /* fallback to default kfunc when not supported by netdev */
16635 }
16636
e6ac2450 16637 /* insn->imm has the btf func_id. Replace it with
c2cc0ce7 16638 * an address (relative to __bpf_call_base).
e6ac2450 16639 */
2357672c 16640 desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
e6ac2450
MKL
16641 if (!desc) {
16642 verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
16643 insn->imm);
16644 return -EFAULT;
16645 }
16646
16647 insn->imm = desc->imm;
958cf2e2
KKD
16648 if (insn->off)
16649 return 0;
16650 if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
16651 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
16652 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
16653 u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
e6ac2450 16654
958cf2e2
KKD
16655 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
16656 insn_buf[1] = addr[0];
16657 insn_buf[2] = addr[1];
16658 insn_buf[3] = *insn;
16659 *cnt = 4;
ac9f0605
KKD
16660 } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
16661 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
16662 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
16663
16664 insn_buf[0] = addr[0];
16665 insn_buf[1] = addr[1];
16666 insn_buf[2] = *insn;
16667 *cnt = 3;
a35b9af4
YS
16668 } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
16669 desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
fd264ca0
YS
16670 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
16671 *cnt = 1;
b5964b96
JK
16672 } else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
16673 bool seen_direct_write = env->seen_direct_write;
16674 bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
16675
16676 if (is_rdonly)
16677 insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly);
16678
16679 /* restore env->seen_direct_write to its original value, since
16680 * may_access_direct_pkt_data mutates it
16681 */
16682 env->seen_direct_write = seen_direct_write;
958cf2e2 16683 }
e6ac2450
MKL
16684 return 0;
16685}
16686
e6ac5933
BJ
16687/* Do various post-verification rewrites in a single program pass.
16688 * These rewrites simplify JIT and interpreter implementations.
e245c5c6 16689 */
e6ac5933 16690static int do_misc_fixups(struct bpf_verifier_env *env)
e245c5c6 16691{
79741b3b 16692 struct bpf_prog *prog = env->prog;
f92c1e18 16693 enum bpf_attach_type eatype = prog->expected_attach_type;
9b99edca 16694 enum bpf_prog_type prog_type = resolve_prog_type(prog);
79741b3b 16695 struct bpf_insn *insn = prog->insnsi;
e245c5c6 16696 const struct bpf_func_proto *fn;
79741b3b 16697 const int insn_cnt = prog->len;
09772d92 16698 const struct bpf_map_ops *ops;
c93552c4 16699 struct bpf_insn_aux_data *aux;
81ed18ab
AS
16700 struct bpf_insn insn_buf[16];
16701 struct bpf_prog *new_prog;
16702 struct bpf_map *map_ptr;
d2e4c1e6 16703 int i, ret, cnt, delta = 0;
e245c5c6 16704
79741b3b 16705 for (i = 0; i < insn_cnt; i++, insn++) {
e6ac5933 16706 /* Make divide-by-zero exceptions impossible. */
f6b1b3bf
DB
16707 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
16708 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
16709 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
68fda450 16710 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
f6b1b3bf 16711 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
e88b2c6e
DB
16712 bool isdiv = BPF_OP(insn->code) == BPF_DIV;
16713 struct bpf_insn *patchlet;
16714 struct bpf_insn chk_and_div[] = {
9b00f1b7 16715 /* [R,W]x div 0 -> 0 */
e88b2c6e
DB
16716 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
16717 BPF_JNE | BPF_K, insn->src_reg,
16718 0, 2, 0),
f6b1b3bf
DB
16719 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
16720 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
16721 *insn,
16722 };
e88b2c6e 16723 struct bpf_insn chk_and_mod[] = {
9b00f1b7 16724 /* [R,W]x mod 0 -> [R,W]x */
e88b2c6e
DB
16725 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
16726 BPF_JEQ | BPF_K, insn->src_reg,
9b00f1b7 16727 0, 1 + (is64 ? 0 : 1), 0),
f6b1b3bf 16728 *insn,
9b00f1b7
DB
16729 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
16730 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
f6b1b3bf 16731 };
f6b1b3bf 16732
e88b2c6e
DB
16733 patchlet = isdiv ? chk_and_div : chk_and_mod;
16734 cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
9b00f1b7 16735 ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
f6b1b3bf
DB
16736
16737 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
68fda450
AS
16738 if (!new_prog)
16739 return -ENOMEM;
16740
16741 delta += cnt - 1;
16742 env->prog = prog = new_prog;
16743 insn = new_prog->insnsi + i + delta;
16744 continue;
16745 }
16746
e6ac5933 16747 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
e0cea7ce
DB
16748 if (BPF_CLASS(insn->code) == BPF_LD &&
16749 (BPF_MODE(insn->code) == BPF_ABS ||
16750 BPF_MODE(insn->code) == BPF_IND)) {
16751 cnt = env->ops->gen_ld_abs(insn, insn_buf);
16752 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
16753 verbose(env, "bpf verifier is misconfigured\n");
16754 return -EINVAL;
16755 }
16756
16757 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16758 if (!new_prog)
16759 return -ENOMEM;
16760
16761 delta += cnt - 1;
16762 env->prog = prog = new_prog;
16763 insn = new_prog->insnsi + i + delta;
16764 continue;
16765 }
16766
e6ac5933 16767 /* Rewrite pointer arithmetic to mitigate speculation attacks. */
979d63d5
DB
16768 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
16769 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
16770 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
16771 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
979d63d5 16772 struct bpf_insn *patch = &insn_buf[0];
801c6058 16773 bool issrc, isneg, isimm;
979d63d5
DB
16774 u32 off_reg;
16775
16776 aux = &env->insn_aux_data[i + delta];
3612af78
DB
16777 if (!aux->alu_state ||
16778 aux->alu_state == BPF_ALU_NON_POINTER)
979d63d5
DB
16779 continue;
16780
16781 isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
16782 issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
16783 BPF_ALU_SANITIZE_SRC;
801c6058 16784 isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
979d63d5
DB
16785
16786 off_reg = issrc ? insn->src_reg : insn->dst_reg;
801c6058
DB
16787 if (isimm) {
16788 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
16789 } else {
16790 if (isneg)
16791 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
16792 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
16793 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
16794 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
16795 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
16796 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
16797 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
16798 }
b9b34ddb
DB
16799 if (!issrc)
16800 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
16801 insn->src_reg = BPF_REG_AX;
979d63d5
DB
16802 if (isneg)
16803 insn->code = insn->code == code_add ?
16804 code_sub : code_add;
16805 *patch++ = *insn;
801c6058 16806 if (issrc && isneg && !isimm)
979d63d5
DB
16807 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
16808 cnt = patch - insn_buf;
16809
16810 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16811 if (!new_prog)
16812 return -ENOMEM;
16813
16814 delta += cnt - 1;
16815 env->prog = prog = new_prog;
16816 insn = new_prog->insnsi + i + delta;
16817 continue;
16818 }
16819
79741b3b
AS
16820 if (insn->code != (BPF_JMP | BPF_CALL))
16821 continue;
cc8b0b92
AS
16822 if (insn->src_reg == BPF_PSEUDO_CALL)
16823 continue;
e6ac2450 16824 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
958cf2e2 16825 ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
e6ac2450
MKL
16826 if (ret)
16827 return ret;
958cf2e2
KKD
16828 if (cnt == 0)
16829 continue;
16830
16831 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16832 if (!new_prog)
16833 return -ENOMEM;
16834
16835 delta += cnt - 1;
16836 env->prog = prog = new_prog;
16837 insn = new_prog->insnsi + i + delta;
e6ac2450
MKL
16838 continue;
16839 }
e245c5c6 16840
79741b3b
AS
16841 if (insn->imm == BPF_FUNC_get_route_realm)
16842 prog->dst_needed = 1;
16843 if (insn->imm == BPF_FUNC_get_prandom_u32)
16844 bpf_user_rnd_init_once();
9802d865
JB
16845 if (insn->imm == BPF_FUNC_override_return)
16846 prog->kprobe_override = 1;
79741b3b 16847 if (insn->imm == BPF_FUNC_tail_call) {
7b9f6da1
DM
16848 /* If we tail call into other programs, we
16849 * cannot make any assumptions since they can
16850 * be replaced dynamically during runtime in
16851 * the program array.
16852 */
16853 prog->cb_access = 1;
e411901c
MF
16854 if (!allow_tail_call_in_subprogs(env))
16855 prog->aux->stack_depth = MAX_BPF_STACK;
16856 prog->aux->max_pkt_offset = MAX_PACKET_OFF;
7b9f6da1 16857
79741b3b 16858 /* mark bpf_tail_call as different opcode to avoid
8fb33b60 16859 * conditional branch in the interpreter for every normal
79741b3b
AS
16860 * call and to prevent accidental JITing by JIT compiler
16861 * that doesn't support bpf_tail_call yet
e245c5c6 16862 */
79741b3b 16863 insn->imm = 0;
71189fa9 16864 insn->code = BPF_JMP | BPF_TAIL_CALL;
b2157399 16865
c93552c4 16866 aux = &env->insn_aux_data[i + delta];
d2a3b7c5 16867 if (env->bpf_capable && !prog->blinding_requested &&
cc52d914 16868 prog->jit_requested &&
d2e4c1e6
DB
16869 !bpf_map_key_poisoned(aux) &&
16870 !bpf_map_ptr_poisoned(aux) &&
16871 !bpf_map_ptr_unpriv(aux)) {
16872 struct bpf_jit_poke_descriptor desc = {
16873 .reason = BPF_POKE_REASON_TAIL_CALL,
16874 .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
16875 .tail_call.key = bpf_map_key_immediate(aux),
a748c697 16876 .insn_idx = i + delta,
d2e4c1e6
DB
16877 };
16878
16879 ret = bpf_jit_add_poke_descriptor(prog, &desc);
16880 if (ret < 0) {
16881 verbose(env, "adding tail call poke descriptor failed\n");
16882 return ret;
16883 }
16884
16885 insn->imm = ret + 1;
16886 continue;
16887 }
16888
c93552c4
DB
16889 if (!bpf_map_ptr_unpriv(aux))
16890 continue;
16891
b2157399
AS
16892 /* instead of changing every JIT dealing with tail_call
16893 * emit two extra insns:
16894 * if (index >= max_entries) goto out;
16895 * index &= array->index_mask;
16896 * to avoid out-of-bounds cpu speculation
16897 */
c93552c4 16898 if (bpf_map_ptr_poisoned(aux)) {
40950343 16899 verbose(env, "tail_call abusing map_ptr\n");
b2157399
AS
16900 return -EINVAL;
16901 }
c93552c4 16902
d2e4c1e6 16903 map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
b2157399
AS
16904 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
16905 map_ptr->max_entries, 2);
16906 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
16907 container_of(map_ptr,
16908 struct bpf_array,
16909 map)->index_mask);
16910 insn_buf[2] = *insn;
16911 cnt = 3;
16912 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16913 if (!new_prog)
16914 return -ENOMEM;
16915
16916 delta += cnt - 1;
16917 env->prog = prog = new_prog;
16918 insn = new_prog->insnsi + i + delta;
79741b3b
AS
16919 continue;
16920 }
e245c5c6 16921
b00628b1
AS
16922 if (insn->imm == BPF_FUNC_timer_set_callback) {
16923 /* The verifier will process callback_fn as many times as necessary
16924 * with different maps and the register states prepared by
16925 * set_timer_callback_state will be accurate.
16926 *
16927 * The following use case is valid:
16928 * map1 is shared by prog1, prog2, prog3.
16929 * prog1 calls bpf_timer_init for some map1 elements
16930 * prog2 calls bpf_timer_set_callback for some map1 elements.
16931 * Those that were not bpf_timer_init-ed will return -EINVAL.
16932 * prog3 calls bpf_timer_start for some map1 elements.
16933 * Those that were not both bpf_timer_init-ed and
16934 * bpf_timer_set_callback-ed will return -EINVAL.
16935 */
16936 struct bpf_insn ld_addrs[2] = {
16937 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
16938 };
16939
16940 insn_buf[0] = ld_addrs[0];
16941 insn_buf[1] = ld_addrs[1];
16942 insn_buf[2] = *insn;
16943 cnt = 3;
16944
16945 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16946 if (!new_prog)
16947 return -ENOMEM;
16948
16949 delta += cnt - 1;
16950 env->prog = prog = new_prog;
16951 insn = new_prog->insnsi + i + delta;
16952 goto patch_call_imm;
16953 }
16954
9bb00b28
YS
16955 if (is_storage_get_function(insn->imm)) {
16956 if (!env->prog->aux->sleepable ||
16957 env->insn_aux_data[i + delta].storage_get_func_atomic)
d56c9fe6 16958 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
9bb00b28
YS
16959 else
16960 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
b00fa38a
JK
16961 insn_buf[1] = *insn;
16962 cnt = 2;
16963
16964 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16965 if (!new_prog)
16966 return -ENOMEM;
16967
16968 delta += cnt - 1;
16969 env->prog = prog = new_prog;
16970 insn = new_prog->insnsi + i + delta;
16971 goto patch_call_imm;
16972 }
16973
89c63074 16974 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
09772d92
DB
16975 * and other inlining handlers are currently limited to 64 bit
16976 * only.
89c63074 16977 */
60b58afc 16978 if (prog->jit_requested && BITS_PER_LONG == 64 &&
09772d92
DB
16979 (insn->imm == BPF_FUNC_map_lookup_elem ||
16980 insn->imm == BPF_FUNC_map_update_elem ||
84430d42
DB
16981 insn->imm == BPF_FUNC_map_delete_elem ||
16982 insn->imm == BPF_FUNC_map_push_elem ||
16983 insn->imm == BPF_FUNC_map_pop_elem ||
e6a4750f 16984 insn->imm == BPF_FUNC_map_peek_elem ||
0640c77c 16985 insn->imm == BPF_FUNC_redirect_map ||
07343110
FZ
16986 insn->imm == BPF_FUNC_for_each_map_elem ||
16987 insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
c93552c4
DB
16988 aux = &env->insn_aux_data[i + delta];
16989 if (bpf_map_ptr_poisoned(aux))
16990 goto patch_call_imm;
16991
d2e4c1e6 16992 map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
09772d92
DB
16993 ops = map_ptr->ops;
16994 if (insn->imm == BPF_FUNC_map_lookup_elem &&
16995 ops->map_gen_lookup) {
16996 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
4a8f87e6
DB
16997 if (cnt == -EOPNOTSUPP)
16998 goto patch_map_ops_generic;
16999 if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
09772d92
DB
17000 verbose(env, "bpf verifier is misconfigured\n");
17001 return -EINVAL;
17002 }
81ed18ab 17003
09772d92
DB
17004 new_prog = bpf_patch_insn_data(env, i + delta,
17005 insn_buf, cnt);
17006 if (!new_prog)
17007 return -ENOMEM;
81ed18ab 17008
09772d92
DB
17009 delta += cnt - 1;
17010 env->prog = prog = new_prog;
17011 insn = new_prog->insnsi + i + delta;
17012 continue;
17013 }
81ed18ab 17014
09772d92
DB
17015 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
17016 (void *(*)(struct bpf_map *map, void *key))NULL));
17017 BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
17018 (int (*)(struct bpf_map *map, void *key))NULL));
17019 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
17020 (int (*)(struct bpf_map *map, void *key, void *value,
17021 u64 flags))NULL));
84430d42
DB
17022 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
17023 (int (*)(struct bpf_map *map, void *value,
17024 u64 flags))NULL));
17025 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
17026 (int (*)(struct bpf_map *map, void *value))NULL));
17027 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
17028 (int (*)(struct bpf_map *map, void *value))NULL));
e6a4750f 17029 BUILD_BUG_ON(!__same_type(ops->map_redirect,
32637e33 17030 (int (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
0640c77c
AI
17031 BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
17032 (int (*)(struct bpf_map *map,
17033 bpf_callback_t callback_fn,
17034 void *callback_ctx,
17035 u64 flags))NULL));
07343110
FZ
17036 BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
17037 (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
e6a4750f 17038
4a8f87e6 17039patch_map_ops_generic:
09772d92
DB
17040 switch (insn->imm) {
17041 case BPF_FUNC_map_lookup_elem:
3d717fad 17042 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
09772d92
DB
17043 continue;
17044 case BPF_FUNC_map_update_elem:
3d717fad 17045 insn->imm = BPF_CALL_IMM(ops->map_update_elem);
09772d92
DB
17046 continue;
17047 case BPF_FUNC_map_delete_elem:
3d717fad 17048 insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
09772d92 17049 continue;
84430d42 17050 case BPF_FUNC_map_push_elem:
3d717fad 17051 insn->imm = BPF_CALL_IMM(ops->map_push_elem);
84430d42
DB
17052 continue;
17053 case BPF_FUNC_map_pop_elem:
3d717fad 17054 insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
84430d42
DB
17055 continue;
17056 case BPF_FUNC_map_peek_elem:
3d717fad 17057 insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
84430d42 17058 continue;
e6a4750f 17059 case BPF_FUNC_redirect_map:
3d717fad 17060 insn->imm = BPF_CALL_IMM(ops->map_redirect);
e6a4750f 17061 continue;
0640c77c
AI
17062 case BPF_FUNC_for_each_map_elem:
17063 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
e6a4750f 17064 continue;
07343110
FZ
17065 case BPF_FUNC_map_lookup_percpu_elem:
17066 insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
17067 continue;
09772d92 17068 }
81ed18ab 17069
09772d92 17070 goto patch_call_imm;
81ed18ab
AS
17071 }
17072
e6ac5933 17073 /* Implement bpf_jiffies64 inline. */
5576b991
MKL
17074 if (prog->jit_requested && BITS_PER_LONG == 64 &&
17075 insn->imm == BPF_FUNC_jiffies64) {
17076 struct bpf_insn ld_jiffies_addr[2] = {
17077 BPF_LD_IMM64(BPF_REG_0,
17078 (unsigned long)&jiffies),
17079 };
17080
17081 insn_buf[0] = ld_jiffies_addr[0];
17082 insn_buf[1] = ld_jiffies_addr[1];
17083 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
17084 BPF_REG_0, 0);
17085 cnt = 3;
17086
17087 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
17088 cnt);
17089 if (!new_prog)
17090 return -ENOMEM;
17091
17092 delta += cnt - 1;
17093 env->prog = prog = new_prog;
17094 insn = new_prog->insnsi + i + delta;
17095 continue;
17096 }
17097
f92c1e18
JO
17098 /* Implement bpf_get_func_arg inline. */
17099 if (prog_type == BPF_PROG_TYPE_TRACING &&
17100 insn->imm == BPF_FUNC_get_func_arg) {
17101 /* Load nr_args from ctx - 8 */
17102 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
17103 insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
17104 insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
17105 insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
17106 insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
17107 insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
17108 insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
17109 insn_buf[7] = BPF_JMP_A(1);
17110 insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
17111 cnt = 9;
17112
17113 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17114 if (!new_prog)
17115 return -ENOMEM;
17116
17117 delta += cnt - 1;
17118 env->prog = prog = new_prog;
17119 insn = new_prog->insnsi + i + delta;
17120 continue;
17121 }
17122
17123 /* Implement bpf_get_func_ret inline. */
17124 if (prog_type == BPF_PROG_TYPE_TRACING &&
17125 insn->imm == BPF_FUNC_get_func_ret) {
17126 if (eatype == BPF_TRACE_FEXIT ||
17127 eatype == BPF_MODIFY_RETURN) {
17128 /* Load nr_args from ctx - 8 */
17129 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
17130 insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
17131 insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
17132 insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
17133 insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
17134 insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
17135 cnt = 6;
17136 } else {
17137 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
17138 cnt = 1;
17139 }
17140
17141 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
17142 if (!new_prog)
17143 return -ENOMEM;
17144
17145 delta += cnt - 1;
17146 env->prog = prog = new_prog;
17147 insn = new_prog->insnsi + i + delta;
17148 continue;
17149 }
17150
17151 /* Implement get_func_arg_cnt inline. */
17152 if (prog_type == BPF_PROG_TYPE_TRACING &&
17153 insn->imm == BPF_FUNC_get_func_arg_cnt) {
17154 /* Load nr_args from ctx - 8 */
17155 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
17156
17157 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
17158 if (!new_prog)
17159 return -ENOMEM;
17160
17161 env->prog = prog = new_prog;
17162 insn = new_prog->insnsi + i + delta;
17163 continue;
17164 }
17165
f705ec76 17166 /* Implement bpf_get_func_ip inline. */
9b99edca
JO
17167 if (prog_type == BPF_PROG_TYPE_TRACING &&
17168 insn->imm == BPF_FUNC_get_func_ip) {
f92c1e18
JO
17169 /* Load IP address from ctx - 16 */
17170 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
9b99edca
JO
17171
17172 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
17173 if (!new_prog)
17174 return -ENOMEM;
17175
17176 env->prog = prog = new_prog;
17177 insn = new_prog->insnsi + i + delta;
17178 continue;
17179 }
17180
81ed18ab 17181patch_call_imm:
5e43f899 17182 fn = env->ops->get_func_proto(insn->imm, env->prog);
79741b3b
AS
17183 /* all functions that have prototype and verifier allowed
17184 * programs to call them, must be real in-kernel functions
17185 */
17186 if (!fn->func) {
61bd5218
JK
17187 verbose(env,
17188 "kernel subsystem misconfigured func %s#%d\n",
79741b3b
AS
17189 func_id_name(insn->imm), insn->imm);
17190 return -EFAULT;
e245c5c6 17191 }
79741b3b 17192 insn->imm = fn->func - __bpf_call_base;
e245c5c6 17193 }
e245c5c6 17194
d2e4c1e6
DB
17195 /* Since poke tab is now finalized, publish aux to tracker. */
17196 for (i = 0; i < prog->aux->size_poke_tab; i++) {
17197 map_ptr = prog->aux->poke_tab[i].tail_call.map;
17198 if (!map_ptr->ops->map_poke_track ||
17199 !map_ptr->ops->map_poke_untrack ||
17200 !map_ptr->ops->map_poke_run) {
17201 verbose(env, "bpf verifier is misconfigured\n");
17202 return -EINVAL;
17203 }
17204
17205 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
17206 if (ret < 0) {
17207 verbose(env, "tracking tail call prog failed\n");
17208 return ret;
17209 }
17210 }
17211
e6ac2450
MKL
17212 sort_kfunc_descs_by_imm(env->prog);
17213
79741b3b
AS
17214 return 0;
17215}
e245c5c6 17216
1ade2371
EZ
17217static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
17218 int position,
17219 s32 stack_base,
17220 u32 callback_subprogno,
17221 u32 *cnt)
17222{
17223 s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
17224 s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
17225 s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
17226 int reg_loop_max = BPF_REG_6;
17227 int reg_loop_cnt = BPF_REG_7;
17228 int reg_loop_ctx = BPF_REG_8;
17229
17230 struct bpf_prog *new_prog;
17231 u32 callback_start;
17232 u32 call_insn_offset;
17233 s32 callback_offset;
17234
17235 /* This represents an inlined version of bpf_iter.c:bpf_loop,
17236 * be careful to modify this code in sync.
17237 */
17238 struct bpf_insn insn_buf[] = {
17239 /* Return error and jump to the end of the patch if
17240 * expected number of iterations is too big.
17241 */
17242 BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
17243 BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
17244 BPF_JMP_IMM(BPF_JA, 0, 0, 16),
17245 /* spill R6, R7, R8 to use these as loop vars */
17246 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
17247 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
17248 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
17249 /* initialize loop vars */
17250 BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
17251 BPF_MOV32_IMM(reg_loop_cnt, 0),
17252 BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
17253 /* loop header,
17254 * if reg_loop_cnt >= reg_loop_max skip the loop body
17255 */
17256 BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
17257 /* callback call,
17258 * correct callback offset would be set after patching
17259 */
17260 BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
17261 BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
17262 BPF_CALL_REL(0),
17263 /* increment loop counter */
17264 BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
17265 /* jump to loop header if callback returned 0 */
17266 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
17267 /* return value of bpf_loop,
17268 * set R0 to the number of iterations
17269 */
17270 BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
17271 /* restore original values of R6, R7, R8 */
17272 BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
17273 BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
17274 BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
17275 };
17276
17277 *cnt = ARRAY_SIZE(insn_buf);
17278 new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
17279 if (!new_prog)
17280 return new_prog;
17281
17282 /* callback start is known only after patching */
17283 callback_start = env->subprog_info[callback_subprogno].start;
17284 /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
17285 call_insn_offset = position + 12;
17286 callback_offset = callback_start - call_insn_offset - 1;
fb4e3b33 17287 new_prog->insnsi[call_insn_offset].imm = callback_offset;
1ade2371
EZ
17288
17289 return new_prog;
17290}
17291
17292static bool is_bpf_loop_call(struct bpf_insn *insn)
17293{
17294 return insn->code == (BPF_JMP | BPF_CALL) &&
17295 insn->src_reg == 0 &&
17296 insn->imm == BPF_FUNC_loop;
17297}
17298
17299/* For all sub-programs in the program (including main) check
17300 * insn_aux_data to see if there are bpf_loop calls that require
17301 * inlining. If such calls are found the calls are replaced with a
17302 * sequence of instructions produced by `inline_bpf_loop` function and
17303 * subprog stack_depth is increased by the size of 3 registers.
17304 * This stack space is used to spill values of the R6, R7, R8. These
17305 * registers are used to store the loop bound, counter and context
17306 * variables.
17307 */
17308static int optimize_bpf_loop(struct bpf_verifier_env *env)
17309{
17310 struct bpf_subprog_info *subprogs = env->subprog_info;
17311 int i, cur_subprog = 0, cnt, delta = 0;
17312 struct bpf_insn *insn = env->prog->insnsi;
17313 int insn_cnt = env->prog->len;
17314 u16 stack_depth = subprogs[cur_subprog].stack_depth;
17315 u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
17316 u16 stack_depth_extra = 0;
17317
17318 for (i = 0; i < insn_cnt; i++, insn++) {
17319 struct bpf_loop_inline_state *inline_state =
17320 &env->insn_aux_data[i + delta].loop_inline_state;
17321
17322 if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
17323 struct bpf_prog *new_prog;
17324
17325 stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
17326 new_prog = inline_bpf_loop(env,
17327 i + delta,
17328 -(stack_depth + stack_depth_extra),
17329 inline_state->callback_subprogno,
17330 &cnt);
17331 if (!new_prog)
17332 return -ENOMEM;
17333
17334 delta += cnt - 1;
17335 env->prog = new_prog;
17336 insn = new_prog->insnsi + i + delta;
17337 }
17338
17339 if (subprogs[cur_subprog + 1].start == i + delta + 1) {
17340 subprogs[cur_subprog].stack_depth += stack_depth_extra;
17341 cur_subprog++;
17342 stack_depth = subprogs[cur_subprog].stack_depth;
17343 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
17344 stack_depth_extra = 0;
17345 }
17346 }
17347
17348 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
17349
17350 return 0;
17351}
17352
58e2af8b 17353static void free_states(struct bpf_verifier_env *env)
f1bca824 17354{
58e2af8b 17355 struct bpf_verifier_state_list *sl, *sln;
f1bca824
AS
17356 int i;
17357
9f4686c4
AS
17358 sl = env->free_list;
17359 while (sl) {
17360 sln = sl->next;
17361 free_verifier_state(&sl->state, false);
17362 kfree(sl);
17363 sl = sln;
17364 }
51c39bb1 17365 env->free_list = NULL;
9f4686c4 17366
f1bca824
AS
17367 if (!env->explored_states)
17368 return;
17369
dc2a4ebc 17370 for (i = 0; i < state_htab_size(env); i++) {
f1bca824
AS
17371 sl = env->explored_states[i];
17372
a8f500af
AS
17373 while (sl) {
17374 sln = sl->next;
17375 free_verifier_state(&sl->state, false);
17376 kfree(sl);
17377 sl = sln;
17378 }
51c39bb1 17379 env->explored_states[i] = NULL;
f1bca824 17380 }
51c39bb1 17381}
f1bca824 17382
51c39bb1
AS
17383static int do_check_common(struct bpf_verifier_env *env, int subprog)
17384{
6f8a57cc 17385 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
51c39bb1
AS
17386 struct bpf_verifier_state *state;
17387 struct bpf_reg_state *regs;
17388 int ret, i;
17389
17390 env->prev_linfo = NULL;
17391 env->pass_cnt++;
17392
17393 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
17394 if (!state)
17395 return -ENOMEM;
17396 state->curframe = 0;
17397 state->speculative = false;
17398 state->branches = 1;
17399 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
17400 if (!state->frame[0]) {
17401 kfree(state);
17402 return -ENOMEM;
17403 }
17404 env->cur_state = state;
17405 init_func_state(env, state->frame[0],
17406 BPF_MAIN_FUNC /* callsite */,
17407 0 /* frameno */,
17408 subprog);
be2ef816
AN
17409 state->first_insn_idx = env->subprog_info[subprog].start;
17410 state->last_insn_idx = -1;
51c39bb1
AS
17411
17412 regs = state->frame[state->curframe]->regs;
be8704ff 17413 if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
51c39bb1
AS
17414 ret = btf_prepare_func_args(env, subprog, regs);
17415 if (ret)
17416 goto out;
17417 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
17418 if (regs[i].type == PTR_TO_CTX)
17419 mark_reg_known_zero(env, regs, i);
17420 else if (regs[i].type == SCALAR_VALUE)
17421 mark_reg_unknown(env, regs, i);
cf9f2f8d 17422 else if (base_type(regs[i].type) == PTR_TO_MEM) {
e5069b9c
DB
17423 const u32 mem_size = regs[i].mem_size;
17424
17425 mark_reg_known_zero(env, regs, i);
17426 regs[i].mem_size = mem_size;
17427 regs[i].id = ++env->id_gen;
17428 }
51c39bb1
AS
17429 }
17430 } else {
17431 /* 1st arg to a function */
17432 regs[BPF_REG_1].type = PTR_TO_CTX;
17433 mark_reg_known_zero(env, regs, BPF_REG_1);
34747c41 17434 ret = btf_check_subprog_arg_match(env, subprog, regs);
51c39bb1
AS
17435 if (ret == -EFAULT)
17436 /* unlikely verifier bug. abort.
17437 * ret == 0 and ret < 0 are sadly acceptable for
17438 * main() function due to backward compatibility.
17439 * Like socket filter program may be written as:
17440 * int bpf_prog(struct pt_regs *ctx)
17441 * and never dereference that ctx in the program.
17442 * 'struct pt_regs' is a type mismatch for socket
17443 * filter that should be using 'struct __sk_buff'.
17444 */
17445 goto out;
17446 }
17447
17448 ret = do_check(env);
17449out:
f59bbfc2
AS
17450 /* check for NULL is necessary, since cur_state can be freed inside
17451 * do_check() under memory pressure.
17452 */
17453 if (env->cur_state) {
17454 free_verifier_state(env->cur_state, true);
17455 env->cur_state = NULL;
17456 }
6f8a57cc
AN
17457 while (!pop_stack(env, NULL, NULL, false));
17458 if (!ret && pop_log)
17459 bpf_vlog_reset(&env->log, 0);
51c39bb1 17460 free_states(env);
51c39bb1
AS
17461 return ret;
17462}
17463
17464/* Verify all global functions in a BPF program one by one based on their BTF.
17465 * All global functions must pass verification. Otherwise the whole program is rejected.
17466 * Consider:
17467 * int bar(int);
17468 * int foo(int f)
17469 * {
17470 * return bar(f);
17471 * }
17472 * int bar(int b)
17473 * {
17474 * ...
17475 * }
17476 * foo() will be verified first for R1=any_scalar_value. During verification it
17477 * will be assumed that bar() already verified successfully and call to bar()
17478 * from foo() will be checked for type match only. Later bar() will be verified
17479 * independently to check that it's safe for R1=any_scalar_value.
17480 */
17481static int do_check_subprogs(struct bpf_verifier_env *env)
17482{
17483 struct bpf_prog_aux *aux = env->prog->aux;
17484 int i, ret;
17485
17486 if (!aux->func_info)
17487 return 0;
17488
17489 for (i = 1; i < env->subprog_cnt; i++) {
17490 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
17491 continue;
17492 env->insn_idx = env->subprog_info[i].start;
17493 WARN_ON_ONCE(env->insn_idx == 0);
17494 ret = do_check_common(env, i);
17495 if (ret) {
17496 return ret;
17497 } else if (env->log.level & BPF_LOG_LEVEL) {
17498 verbose(env,
17499 "Func#%d is safe for any args that match its prototype\n",
17500 i);
17501 }
17502 }
17503 return 0;
17504}
17505
17506static int do_check_main(struct bpf_verifier_env *env)
17507{
17508 int ret;
17509
17510 env->insn_idx = 0;
17511 ret = do_check_common(env, 0);
17512 if (!ret)
17513 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
17514 return ret;
17515}
17516
17517
06ee7115
AS
17518static void print_verification_stats(struct bpf_verifier_env *env)
17519{
17520 int i;
17521
17522 if (env->log.level & BPF_LOG_STATS) {
17523 verbose(env, "verification time %lld usec\n",
17524 div_u64(env->verification_time, 1000));
17525 verbose(env, "stack depth ");
17526 for (i = 0; i < env->subprog_cnt; i++) {
17527 u32 depth = env->subprog_info[i].stack_depth;
17528
17529 verbose(env, "%d", depth);
17530 if (i + 1 < env->subprog_cnt)
17531 verbose(env, "+");
17532 }
17533 verbose(env, "\n");
17534 }
17535 verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
17536 "total_states %d peak_states %d mark_read %d\n",
17537 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
17538 env->max_states_per_insn, env->total_states,
17539 env->peak_states, env->longest_mark_read_walk);
f1bca824
AS
17540}
17541
27ae7997
MKL
17542static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
17543{
17544 const struct btf_type *t, *func_proto;
17545 const struct bpf_struct_ops *st_ops;
17546 const struct btf_member *member;
17547 struct bpf_prog *prog = env->prog;
17548 u32 btf_id, member_idx;
17549 const char *mname;
17550
12aa8a94
THJ
17551 if (!prog->gpl_compatible) {
17552 verbose(env, "struct ops programs must have a GPL compatible license\n");
17553 return -EINVAL;
17554 }
17555
27ae7997
MKL
17556 btf_id = prog->aux->attach_btf_id;
17557 st_ops = bpf_struct_ops_find(btf_id);
17558 if (!st_ops) {
17559 verbose(env, "attach_btf_id %u is not a supported struct\n",
17560 btf_id);
17561 return -ENOTSUPP;
17562 }
17563
17564 t = st_ops->type;
17565 member_idx = prog->expected_attach_type;
17566 if (member_idx >= btf_type_vlen(t)) {
17567 verbose(env, "attach to invalid member idx %u of struct %s\n",
17568 member_idx, st_ops->name);
17569 return -EINVAL;
17570 }
17571
17572 member = &btf_type_member(t)[member_idx];
17573 mname = btf_name_by_offset(btf_vmlinux, member->name_off);
17574 func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
17575 NULL);
17576 if (!func_proto) {
17577 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
17578 mname, member_idx, st_ops->name);
17579 return -EINVAL;
17580 }
17581
17582 if (st_ops->check_member) {
51a52a29 17583 int err = st_ops->check_member(t, member, prog);
27ae7997
MKL
17584
17585 if (err) {
17586 verbose(env, "attach to unsupported member %s of struct %s\n",
17587 mname, st_ops->name);
17588 return err;
17589 }
17590 }
17591
17592 prog->aux->attach_func_proto = func_proto;
17593 prog->aux->attach_func_name = mname;
17594 env->ops = st_ops->verifier_ops;
17595
17596 return 0;
17597}
6ba43b76
KS
17598#define SECURITY_PREFIX "security_"
17599
f7b12b6f 17600static int check_attach_modify_return(unsigned long addr, const char *func_name)
6ba43b76 17601{
69191754 17602 if (within_error_injection_list(addr) ||
f7b12b6f 17603 !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
6ba43b76 17604 return 0;
6ba43b76 17605
6ba43b76
KS
17606 return -EINVAL;
17607}
27ae7997 17608
1e6c62a8
AS
17609/* list of non-sleepable functions that are otherwise on
17610 * ALLOW_ERROR_INJECTION list
17611 */
17612BTF_SET_START(btf_non_sleepable_error_inject)
17613/* Three functions below can be called from sleepable and non-sleepable context.
17614 * Assume non-sleepable from bpf safety point of view.
17615 */
9dd3d069 17616BTF_ID(func, __filemap_add_folio)
1e6c62a8
AS
17617BTF_ID(func, should_fail_alloc_page)
17618BTF_ID(func, should_failslab)
17619BTF_SET_END(btf_non_sleepable_error_inject)
17620
17621static int check_non_sleepable_error_inject(u32 btf_id)
17622{
17623 return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
17624}
17625
f7b12b6f
THJ
17626int bpf_check_attach_target(struct bpf_verifier_log *log,
17627 const struct bpf_prog *prog,
17628 const struct bpf_prog *tgt_prog,
17629 u32 btf_id,
17630 struct bpf_attach_target_info *tgt_info)
38207291 17631{
be8704ff 17632 bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
f1b9509c 17633 const char prefix[] = "btf_trace_";
5b92a28a 17634 int ret = 0, subprog = -1, i;
38207291 17635 const struct btf_type *t;
5b92a28a 17636 bool conservative = true;
38207291 17637 const char *tname;
5b92a28a 17638 struct btf *btf;
f7b12b6f 17639 long addr = 0;
38207291 17640
f1b9509c 17641 if (!btf_id) {
efc68158 17642 bpf_log(log, "Tracing programs must provide btf_id\n");
f1b9509c
AS
17643 return -EINVAL;
17644 }
22dc4a0f 17645 btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
5b92a28a 17646 if (!btf) {
efc68158 17647 bpf_log(log,
5b92a28a
AS
17648 "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
17649 return -EINVAL;
17650 }
17651 t = btf_type_by_id(btf, btf_id);
f1b9509c 17652 if (!t) {
efc68158 17653 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
f1b9509c
AS
17654 return -EINVAL;
17655 }
5b92a28a 17656 tname = btf_name_by_offset(btf, t->name_off);
f1b9509c 17657 if (!tname) {
efc68158 17658 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
f1b9509c
AS
17659 return -EINVAL;
17660 }
5b92a28a
AS
17661 if (tgt_prog) {
17662 struct bpf_prog_aux *aux = tgt_prog->aux;
17663
fd7c211d
THJ
17664 if (bpf_prog_is_dev_bound(prog->aux) &&
17665 !bpf_prog_dev_bound_match(prog, tgt_prog)) {
17666 bpf_log(log, "Target program bound device mismatch");
3d76a4d3
SF
17667 return -EINVAL;
17668 }
17669
5b92a28a
AS
17670 for (i = 0; i < aux->func_info_cnt; i++)
17671 if (aux->func_info[i].type_id == btf_id) {
17672 subprog = i;
17673 break;
17674 }
17675 if (subprog == -1) {
efc68158 17676 bpf_log(log, "Subprog %s doesn't exist\n", tname);
5b92a28a
AS
17677 return -EINVAL;
17678 }
17679 conservative = aux->func_info_aux[subprog].unreliable;
be8704ff
AS
17680 if (prog_extension) {
17681 if (conservative) {
efc68158 17682 bpf_log(log,
be8704ff
AS
17683 "Cannot replace static functions\n");
17684 return -EINVAL;
17685 }
17686 if (!prog->jit_requested) {
efc68158 17687 bpf_log(log,
be8704ff
AS
17688 "Extension programs should be JITed\n");
17689 return -EINVAL;
17690 }
be8704ff
AS
17691 }
17692 if (!tgt_prog->jited) {
efc68158 17693 bpf_log(log, "Can attach to only JITed progs\n");
be8704ff
AS
17694 return -EINVAL;
17695 }
17696 if (tgt_prog->type == prog->type) {
17697 /* Cannot fentry/fexit another fentry/fexit program.
17698 * Cannot attach program extension to another extension.
17699 * It's ok to attach fentry/fexit to extension program.
17700 */
efc68158 17701 bpf_log(log, "Cannot recursively attach\n");
be8704ff
AS
17702 return -EINVAL;
17703 }
17704 if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
17705 prog_extension &&
17706 (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
17707 tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
17708 /* Program extensions can extend all program types
17709 * except fentry/fexit. The reason is the following.
17710 * The fentry/fexit programs are used for performance
17711 * analysis, stats and can be attached to any program
17712 * type except themselves. When extension program is
17713 * replacing XDP function it is necessary to allow
17714 * performance analysis of all functions. Both original
17715 * XDP program and its program extension. Hence
17716 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
17717 * allowed. If extending of fentry/fexit was allowed it
17718 * would be possible to create long call chain
17719 * fentry->extension->fentry->extension beyond
17720 * reasonable stack size. Hence extending fentry is not
17721 * allowed.
17722 */
efc68158 17723 bpf_log(log, "Cannot extend fentry/fexit\n");
be8704ff
AS
17724 return -EINVAL;
17725 }
5b92a28a 17726 } else {
be8704ff 17727 if (prog_extension) {
efc68158 17728 bpf_log(log, "Cannot replace kernel functions\n");
be8704ff
AS
17729 return -EINVAL;
17730 }
5b92a28a 17731 }
f1b9509c
AS
17732
17733 switch (prog->expected_attach_type) {
17734 case BPF_TRACE_RAW_TP:
5b92a28a 17735 if (tgt_prog) {
efc68158 17736 bpf_log(log,
5b92a28a
AS
17737 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
17738 return -EINVAL;
17739 }
38207291 17740 if (!btf_type_is_typedef(t)) {
efc68158 17741 bpf_log(log, "attach_btf_id %u is not a typedef\n",
38207291
MKL
17742 btf_id);
17743 return -EINVAL;
17744 }
f1b9509c 17745 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
efc68158 17746 bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
38207291
MKL
17747 btf_id, tname);
17748 return -EINVAL;
17749 }
17750 tname += sizeof(prefix) - 1;
5b92a28a 17751 t = btf_type_by_id(btf, t->type);
38207291
MKL
17752 if (!btf_type_is_ptr(t))
17753 /* should never happen in valid vmlinux build */
17754 return -EINVAL;
5b92a28a 17755 t = btf_type_by_id(btf, t->type);
38207291
MKL
17756 if (!btf_type_is_func_proto(t))
17757 /* should never happen in valid vmlinux build */
17758 return -EINVAL;
17759
f7b12b6f 17760 break;
15d83c4d
YS
17761 case BPF_TRACE_ITER:
17762 if (!btf_type_is_func(t)) {
efc68158 17763 bpf_log(log, "attach_btf_id %u is not a function\n",
15d83c4d
YS
17764 btf_id);
17765 return -EINVAL;
17766 }
17767 t = btf_type_by_id(btf, t->type);
17768 if (!btf_type_is_func_proto(t))
17769 return -EINVAL;
f7b12b6f
THJ
17770 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
17771 if (ret)
17772 return ret;
17773 break;
be8704ff
AS
17774 default:
17775 if (!prog_extension)
17776 return -EINVAL;
df561f66 17777 fallthrough;
ae240823 17778 case BPF_MODIFY_RETURN:
9e4e01df 17779 case BPF_LSM_MAC:
69fd337a 17780 case BPF_LSM_CGROUP:
fec56f58
AS
17781 case BPF_TRACE_FENTRY:
17782 case BPF_TRACE_FEXIT:
17783 if (!btf_type_is_func(t)) {
efc68158 17784 bpf_log(log, "attach_btf_id %u is not a function\n",
fec56f58
AS
17785 btf_id);
17786 return -EINVAL;
17787 }
be8704ff 17788 if (prog_extension &&
efc68158 17789 btf_check_type_match(log, prog, btf, t))
be8704ff 17790 return -EINVAL;
5b92a28a 17791 t = btf_type_by_id(btf, t->type);
fec56f58
AS
17792 if (!btf_type_is_func_proto(t))
17793 return -EINVAL;
f7b12b6f 17794
4a1e7c0c
THJ
17795 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
17796 (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
17797 prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
17798 return -EINVAL;
17799
f7b12b6f 17800 if (tgt_prog && conservative)
5b92a28a 17801 t = NULL;
f7b12b6f
THJ
17802
17803 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
fec56f58 17804 if (ret < 0)
f7b12b6f
THJ
17805 return ret;
17806
5b92a28a 17807 if (tgt_prog) {
e9eeec58
YS
17808 if (subprog == 0)
17809 addr = (long) tgt_prog->bpf_func;
17810 else
17811 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
5b92a28a
AS
17812 } else {
17813 addr = kallsyms_lookup_name(tname);
17814 if (!addr) {
efc68158 17815 bpf_log(log,
5b92a28a
AS
17816 "The address of function %s cannot be found\n",
17817 tname);
f7b12b6f 17818 return -ENOENT;
5b92a28a 17819 }
fec56f58 17820 }
18644cec 17821
1e6c62a8
AS
17822 if (prog->aux->sleepable) {
17823 ret = -EINVAL;
17824 switch (prog->type) {
17825 case BPF_PROG_TYPE_TRACING:
5b481aca
BT
17826
17827 /* fentry/fexit/fmod_ret progs can be sleepable if they are
1e6c62a8
AS
17828 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
17829 */
17830 if (!check_non_sleepable_error_inject(btf_id) &&
17831 within_error_injection_list(addr))
17832 ret = 0;
5b481aca
BT
17833 /* fentry/fexit/fmod_ret progs can also be sleepable if they are
17834 * in the fmodret id set with the KF_SLEEPABLE flag.
17835 */
17836 else {
17837 u32 *flags = btf_kfunc_is_modify_return(btf, btf_id);
17838
17839 if (flags && (*flags & KF_SLEEPABLE))
17840 ret = 0;
17841 }
1e6c62a8
AS
17842 break;
17843 case BPF_PROG_TYPE_LSM:
17844 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
17845 * Only some of them are sleepable.
17846 */
423f1610 17847 if (bpf_lsm_is_sleepable_hook(btf_id))
1e6c62a8
AS
17848 ret = 0;
17849 break;
17850 default:
17851 break;
17852 }
f7b12b6f
THJ
17853 if (ret) {
17854 bpf_log(log, "%s is not sleepable\n", tname);
17855 return ret;
17856 }
1e6c62a8 17857 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
1af9270e 17858 if (tgt_prog) {
efc68158 17859 bpf_log(log, "can't modify return codes of BPF programs\n");
f7b12b6f
THJ
17860 return -EINVAL;
17861 }
5b481aca
BT
17862 ret = -EINVAL;
17863 if (btf_kfunc_is_modify_return(btf, btf_id) ||
17864 !check_attach_modify_return(addr, tname))
17865 ret = 0;
f7b12b6f
THJ
17866 if (ret) {
17867 bpf_log(log, "%s() is not modifiable\n", tname);
17868 return ret;
1af9270e 17869 }
18644cec 17870 }
f7b12b6f
THJ
17871
17872 break;
17873 }
17874 tgt_info->tgt_addr = addr;
17875 tgt_info->tgt_name = tname;
17876 tgt_info->tgt_type = t;
17877 return 0;
17878}
17879
35e3815f
JO
17880BTF_SET_START(btf_id_deny)
17881BTF_ID_UNUSED
17882#ifdef CONFIG_SMP
17883BTF_ID(func, migrate_disable)
17884BTF_ID(func, migrate_enable)
17885#endif
17886#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
17887BTF_ID(func, rcu_read_unlock_strict)
17888#endif
17889BTF_SET_END(btf_id_deny)
17890
700e6f85
JO
17891static bool can_be_sleepable(struct bpf_prog *prog)
17892{
17893 if (prog->type == BPF_PROG_TYPE_TRACING) {
17894 switch (prog->expected_attach_type) {
17895 case BPF_TRACE_FENTRY:
17896 case BPF_TRACE_FEXIT:
17897 case BPF_MODIFY_RETURN:
17898 case BPF_TRACE_ITER:
17899 return true;
17900 default:
17901 return false;
17902 }
17903 }
17904 return prog->type == BPF_PROG_TYPE_LSM ||
1e12d3ef
DV
17905 prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
17906 prog->type == BPF_PROG_TYPE_STRUCT_OPS;
700e6f85
JO
17907}
17908
f7b12b6f
THJ
17909static int check_attach_btf_id(struct bpf_verifier_env *env)
17910{
17911 struct bpf_prog *prog = env->prog;
3aac1ead 17912 struct bpf_prog *tgt_prog = prog->aux->dst_prog;
f7b12b6f
THJ
17913 struct bpf_attach_target_info tgt_info = {};
17914 u32 btf_id = prog->aux->attach_btf_id;
17915 struct bpf_trampoline *tr;
17916 int ret;
17917 u64 key;
17918
79a7f8bd
AS
17919 if (prog->type == BPF_PROG_TYPE_SYSCALL) {
17920 if (prog->aux->sleepable)
17921 /* attach_btf_id checked to be zero already */
17922 return 0;
17923 verbose(env, "Syscall programs can only be sleepable\n");
17924 return -EINVAL;
17925 }
17926
700e6f85 17927 if (prog->aux->sleepable && !can_be_sleepable(prog)) {
1e12d3ef 17928 verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
f7b12b6f
THJ
17929 return -EINVAL;
17930 }
17931
17932 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
17933 return check_struct_ops_btf_id(env);
17934
17935 if (prog->type != BPF_PROG_TYPE_TRACING &&
17936 prog->type != BPF_PROG_TYPE_LSM &&
17937 prog->type != BPF_PROG_TYPE_EXT)
17938 return 0;
17939
17940 ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
17941 if (ret)
fec56f58 17942 return ret;
f7b12b6f
THJ
17943
17944 if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
3aac1ead
THJ
17945 /* to make freplace equivalent to their targets, they need to
17946 * inherit env->ops and expected_attach_type for the rest of the
17947 * verification
17948 */
f7b12b6f
THJ
17949 env->ops = bpf_verifier_ops[tgt_prog->type];
17950 prog->expected_attach_type = tgt_prog->expected_attach_type;
17951 }
17952
17953 /* store info about the attachment target that will be used later */
17954 prog->aux->attach_func_proto = tgt_info.tgt_type;
17955 prog->aux->attach_func_name = tgt_info.tgt_name;
17956
4a1e7c0c
THJ
17957 if (tgt_prog) {
17958 prog->aux->saved_dst_prog_type = tgt_prog->type;
17959 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
17960 }
17961
f7b12b6f
THJ
17962 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
17963 prog->aux->attach_btf_trace = true;
17964 return 0;
17965 } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
17966 if (!bpf_iter_prog_supported(prog))
17967 return -EINVAL;
17968 return 0;
17969 }
17970
17971 if (prog->type == BPF_PROG_TYPE_LSM) {
17972 ret = bpf_lsm_verify_prog(&env->log, prog);
17973 if (ret < 0)
17974 return ret;
35e3815f
JO
17975 } else if (prog->type == BPF_PROG_TYPE_TRACING &&
17976 btf_id_set_contains(&btf_id_deny, btf_id)) {
17977 return -EINVAL;
38207291 17978 }
f7b12b6f 17979
22dc4a0f 17980 key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
f7b12b6f
THJ
17981 tr = bpf_trampoline_get(key, &tgt_info);
17982 if (!tr)
17983 return -ENOMEM;
17984
3aac1ead 17985 prog->aux->dst_trampoline = tr;
f7b12b6f 17986 return 0;
38207291
MKL
17987}
17988
76654e67
AM
17989struct btf *bpf_get_btf_vmlinux(void)
17990{
17991 if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
17992 mutex_lock(&bpf_verifier_lock);
17993 if (!btf_vmlinux)
17994 btf_vmlinux = btf_parse_vmlinux();
17995 mutex_unlock(&bpf_verifier_lock);
17996 }
17997 return btf_vmlinux;
17998}
17999
af2ac3e1 18000int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
51580e79 18001{
06ee7115 18002 u64 start_time = ktime_get_ns();
58e2af8b 18003 struct bpf_verifier_env *env;
b9193c1b 18004 struct bpf_verifier_log *log;
9e4c24e7 18005 int i, len, ret = -EINVAL;
e2ae4ca2 18006 bool is_priv;
51580e79 18007
eba0c929
AB
18008 /* no program is valid */
18009 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
18010 return -EINVAL;
18011
58e2af8b 18012 /* 'struct bpf_verifier_env' can be global, but since it's not small,
cbd35700
AS
18013 * allocate/free it every time bpf_check() is called
18014 */
58e2af8b 18015 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
cbd35700
AS
18016 if (!env)
18017 return -ENOMEM;
61bd5218 18018 log = &env->log;
cbd35700 18019
9e4c24e7 18020 len = (*prog)->len;
fad953ce 18021 env->insn_aux_data =
9e4c24e7 18022 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
3df126f3
JK
18023 ret = -ENOMEM;
18024 if (!env->insn_aux_data)
18025 goto err_free_env;
9e4c24e7
JK
18026 for (i = 0; i < len; i++)
18027 env->insn_aux_data[i].orig_idx = i;
9bac3d6d 18028 env->prog = *prog;
00176a34 18029 env->ops = bpf_verifier_ops[env->prog->type];
387544bf 18030 env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
2c78ee89 18031 is_priv = bpf_capable();
0246e64d 18032
76654e67 18033 bpf_get_btf_vmlinux();
8580ac94 18034
cbd35700 18035 /* grab the mutex to protect few globals used by verifier */
45a73c17
AS
18036 if (!is_priv)
18037 mutex_lock(&bpf_verifier_lock);
cbd35700
AS
18038
18039 if (attr->log_level || attr->log_buf || attr->log_size) {
18040 /* user requested verbose verifier output
18041 * and supplied buffer to store the verification trace
18042 */
e7bf8249
JK
18043 log->level = attr->log_level;
18044 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
18045 log->len_total = attr->log_size;
cbd35700 18046
e7bf8249 18047 /* log attributes have to be sane */
866de407
HT
18048 if (!bpf_verifier_log_attr_valid(log)) {
18049 ret = -EINVAL;
3df126f3 18050 goto err_unlock;
866de407 18051 }
cbd35700 18052 }
1ad2f583 18053
0f55f9ed
CL
18054 mark_verifier_state_clean(env);
18055
8580ac94
AS
18056 if (IS_ERR(btf_vmlinux)) {
18057 /* Either gcc or pahole or kernel are broken. */
18058 verbose(env, "in-kernel BTF is malformed\n");
18059 ret = PTR_ERR(btf_vmlinux);
38207291 18060 goto skip_full_check;
8580ac94
AS
18061 }
18062
1ad2f583
DB
18063 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
18064 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
e07b98d9 18065 env->strict_alignment = true;
e9ee9efc
DM
18066 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
18067 env->strict_alignment = false;
cbd35700 18068
2c78ee89 18069 env->allow_ptr_leaks = bpf_allow_ptr_leaks();
01f810ac 18070 env->allow_uninit_stack = bpf_allow_uninit_stack();
2c78ee89
AS
18071 env->bypass_spec_v1 = bpf_bypass_spec_v1();
18072 env->bypass_spec_v4 = bpf_bypass_spec_v4();
18073 env->bpf_capable = bpf_capable();
e2ae4ca2 18074
10d274e8
AS
18075 if (is_priv)
18076 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
18077
dc2a4ebc 18078 env->explored_states = kvcalloc(state_htab_size(env),
58e2af8b 18079 sizeof(struct bpf_verifier_state_list *),
f1bca824
AS
18080 GFP_USER);
18081 ret = -ENOMEM;
18082 if (!env->explored_states)
18083 goto skip_full_check;
18084
e6ac2450
MKL
18085 ret = add_subprog_and_kfunc(env);
18086 if (ret < 0)
18087 goto skip_full_check;
18088
d9762e84 18089 ret = check_subprogs(env);
475fb78f
AS
18090 if (ret < 0)
18091 goto skip_full_check;
18092
c454a46b 18093 ret = check_btf_info(env, attr, uattr);
838e9690
YS
18094 if (ret < 0)
18095 goto skip_full_check;
18096
be8704ff
AS
18097 ret = check_attach_btf_id(env);
18098 if (ret)
18099 goto skip_full_check;
18100
4976b718
HL
18101 ret = resolve_pseudo_ldimm64(env);
18102 if (ret < 0)
18103 goto skip_full_check;
18104
9d03ebc7 18105 if (bpf_prog_is_offloaded(env->prog->aux)) {
ceb11679
YZ
18106 ret = bpf_prog_offload_verifier_prep(env->prog);
18107 if (ret)
18108 goto skip_full_check;
18109 }
18110
d9762e84
MKL
18111 ret = check_cfg(env);
18112 if (ret < 0)
18113 goto skip_full_check;
18114
51c39bb1
AS
18115 ret = do_check_subprogs(env);
18116 ret = ret ?: do_check_main(env);
cbd35700 18117
9d03ebc7 18118 if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
c941ce9c
QM
18119 ret = bpf_prog_offload_finalize(env);
18120
0246e64d 18121skip_full_check:
51c39bb1 18122 kvfree(env->explored_states);
0246e64d 18123
c131187d 18124 if (ret == 0)
9b38c405 18125 ret = check_max_stack_depth(env);
c131187d 18126
9b38c405 18127 /* instruction rewrites happen after this point */
1ade2371
EZ
18128 if (ret == 0)
18129 ret = optimize_bpf_loop(env);
18130
e2ae4ca2
JK
18131 if (is_priv) {
18132 if (ret == 0)
18133 opt_hard_wire_dead_code_branches(env);
52875a04
JK
18134 if (ret == 0)
18135 ret = opt_remove_dead_code(env);
a1b14abc
JK
18136 if (ret == 0)
18137 ret = opt_remove_nops(env);
52875a04
JK
18138 } else {
18139 if (ret == 0)
18140 sanitize_dead_code(env);
e2ae4ca2
JK
18141 }
18142
9bac3d6d
AS
18143 if (ret == 0)
18144 /* program is valid, convert *(u32*)(ctx + off) accesses */
18145 ret = convert_ctx_accesses(env);
18146
e245c5c6 18147 if (ret == 0)
e6ac5933 18148 ret = do_misc_fixups(env);
e245c5c6 18149
a4b1d3c1
JW
18150 /* do 32-bit optimization after insn patching has done so those patched
18151 * insns could be handled correctly.
18152 */
9d03ebc7 18153 if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
d6c2308c
JW
18154 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
18155 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
18156 : false;
a4b1d3c1
JW
18157 }
18158
1ea47e01
AS
18159 if (ret == 0)
18160 ret = fixup_call_args(env);
18161
06ee7115
AS
18162 env->verification_time = ktime_get_ns() - start_time;
18163 print_verification_stats(env);
aba64c7d 18164 env->prog->aux->verified_insns = env->insn_processed;
06ee7115 18165
a2a7d570 18166 if (log->level && bpf_verifier_log_full(log))
cbd35700 18167 ret = -ENOSPC;
a2a7d570 18168 if (log->level && !log->ubuf) {
cbd35700 18169 ret = -EFAULT;
a2a7d570 18170 goto err_release_maps;
cbd35700
AS
18171 }
18172
541c3bad
AN
18173 if (ret)
18174 goto err_release_maps;
18175
18176 if (env->used_map_cnt) {
0246e64d 18177 /* if program passed verifier, update used_maps in bpf_prog_info */
9bac3d6d
AS
18178 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
18179 sizeof(env->used_maps[0]),
18180 GFP_KERNEL);
0246e64d 18181
9bac3d6d 18182 if (!env->prog->aux->used_maps) {
0246e64d 18183 ret = -ENOMEM;
a2a7d570 18184 goto err_release_maps;
0246e64d
AS
18185 }
18186
9bac3d6d 18187 memcpy(env->prog->aux->used_maps, env->used_maps,
0246e64d 18188 sizeof(env->used_maps[0]) * env->used_map_cnt);
9bac3d6d 18189 env->prog->aux->used_map_cnt = env->used_map_cnt;
541c3bad
AN
18190 }
18191 if (env->used_btf_cnt) {
18192 /* if program passed verifier, update used_btfs in bpf_prog_aux */
18193 env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
18194 sizeof(env->used_btfs[0]),
18195 GFP_KERNEL);
18196 if (!env->prog->aux->used_btfs) {
18197 ret = -ENOMEM;
18198 goto err_release_maps;
18199 }
0246e64d 18200
541c3bad
AN
18201 memcpy(env->prog->aux->used_btfs, env->used_btfs,
18202 sizeof(env->used_btfs[0]) * env->used_btf_cnt);
18203 env->prog->aux->used_btf_cnt = env->used_btf_cnt;
18204 }
18205 if (env->used_map_cnt || env->used_btf_cnt) {
0246e64d
AS
18206 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
18207 * bpf_ld_imm64 instructions
18208 */
18209 convert_pseudo_ld_imm64(env);
18210 }
cbd35700 18211
541c3bad 18212 adjust_btf_func(env);
ba64e7d8 18213
a2a7d570 18214err_release_maps:
9bac3d6d 18215 if (!env->prog->aux->used_maps)
0246e64d 18216 /* if we didn't copy map pointers into bpf_prog_info, release
ab7f5bf0 18217 * them now. Otherwise free_used_maps() will release them.
0246e64d
AS
18218 */
18219 release_maps(env);
541c3bad
AN
18220 if (!env->prog->aux->used_btfs)
18221 release_btfs(env);
03f87c0b
THJ
18222
18223 /* extension progs temporarily inherit the attach_type of their targets
18224 for verification purposes, so set it back to zero before returning
18225 */
18226 if (env->prog->type == BPF_PROG_TYPE_EXT)
18227 env->prog->expected_attach_type = 0;
18228
9bac3d6d 18229 *prog = env->prog;
3df126f3 18230err_unlock:
45a73c17
AS
18231 if (!is_priv)
18232 mutex_unlock(&bpf_verifier_lock);
3df126f3
JK
18233 vfree(env->insn_aux_data);
18234err_free_env:
18235 kfree(env);
51580e79
AS
18236 return ret;
18237}