Merge branch 'Follow ups for kptr series'
[linux-block.git] / kernel / bpf / verifier.c
CommitLineData
5b497af4 1// SPDX-License-Identifier: GPL-2.0-only
51580e79 2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
969bf05e 3 * Copyright (c) 2016 Facebook
fd978bf7 4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
51580e79 5 */
838e9690 6#include <uapi/linux/btf.h>
aef2feda 7#include <linux/bpf-cgroup.h>
51580e79
AS
8#include <linux/kernel.h>
9#include <linux/types.h>
10#include <linux/slab.h>
11#include <linux/bpf.h>
838e9690 12#include <linux/btf.h>
58e2af8b 13#include <linux/bpf_verifier.h>
51580e79
AS
14#include <linux/filter.h>
15#include <net/netlink.h>
16#include <linux/file.h>
17#include <linux/vmalloc.h>
ebb676da 18#include <linux/stringify.h>
cc8b0b92
AS
19#include <linux/bsearch.h>
20#include <linux/sort.h>
c195651e 21#include <linux/perf_event.h>
d9762e84 22#include <linux/ctype.h>
6ba43b76 23#include <linux/error-injection.h>
9e4e01df 24#include <linux/bpf_lsm.h>
1e6c62a8 25#include <linux/btf_ids.h>
51580e79 26
f4ac7e0b
JK
27#include "disasm.h"
28
00176a34 29static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
91cc1a99 30#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
00176a34
JK
31 [_id] = & _name ## _verifier_ops,
32#define BPF_MAP_TYPE(_id, _ops)
f2e10bff 33#define BPF_LINK_TYPE(_id, _name)
00176a34
JK
34#include <linux/bpf_types.h>
35#undef BPF_PROG_TYPE
36#undef BPF_MAP_TYPE
f2e10bff 37#undef BPF_LINK_TYPE
00176a34
JK
38};
39
51580e79
AS
40/* bpf_check() is a static code analyzer that walks eBPF program
41 * instruction by instruction and updates register/stack state.
42 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
43 *
44 * The first pass is depth-first-search to check that the program is a DAG.
45 * It rejects the following programs:
46 * - larger than BPF_MAXINSNS insns
47 * - if loop is present (detected via back-edge)
48 * - unreachable insns exist (shouldn't be a forest. program = one function)
49 * - out of bounds or malformed jumps
50 * The second pass is all possible path descent from the 1st insn.
8fb33b60 51 * Since it's analyzing all paths through the program, the length of the
eba38a96 52 * analysis is limited to 64k insn, which may be hit even if total number of
51580e79
AS
53 * insn is less then 4K, but there are too many branches that change stack/regs.
54 * Number of 'branches to be analyzed' is limited to 1k
55 *
56 * On entry to each instruction, each register has a type, and the instruction
57 * changes the types of the registers depending on instruction semantics.
58 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
59 * copied to R1.
60 *
61 * All registers are 64-bit.
62 * R0 - return register
63 * R1-R5 argument passing registers
64 * R6-R9 callee saved registers
65 * R10 - frame pointer read-only
66 *
67 * At the start of BPF program the register R1 contains a pointer to bpf_context
68 * and has type PTR_TO_CTX.
69 *
70 * Verifier tracks arithmetic operations on pointers in case:
71 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
72 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
73 * 1st insn copies R10 (which has FRAME_PTR) type into R1
74 * and 2nd arithmetic instruction is pattern matched to recognize
75 * that it wants to construct a pointer to some element within stack.
76 * So after 2nd insn, the register R1 has type PTR_TO_STACK
77 * (and -20 constant is saved for further stack bounds checking).
78 * Meaning that this reg is a pointer to stack plus known immediate constant.
79 *
f1174f77 80 * Most of the time the registers have SCALAR_VALUE type, which
51580e79 81 * means the register has some value, but it's not a valid pointer.
f1174f77 82 * (like pointer plus pointer becomes SCALAR_VALUE type)
51580e79
AS
83 *
84 * When verifier sees load or store instructions the type of base register
c64b7983
JS
85 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
86 * four pointer types recognized by check_mem_access() function.
51580e79
AS
87 *
88 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
89 * and the range of [ptr, ptr + map's value_size) is accessible.
90 *
91 * registers used to pass values to function calls are checked against
92 * function argument constraints.
93 *
94 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
95 * It means that the register type passed to this function must be
96 * PTR_TO_STACK and it will be used inside the function as
97 * 'pointer to map element key'
98 *
99 * For example the argument constraints for bpf_map_lookup_elem():
100 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
101 * .arg1_type = ARG_CONST_MAP_PTR,
102 * .arg2_type = ARG_PTR_TO_MAP_KEY,
103 *
104 * ret_type says that this function returns 'pointer to map elem value or null'
105 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
106 * 2nd argument should be a pointer to stack, which will be used inside
107 * the helper function as a pointer to map element key.
108 *
109 * On the kernel side the helper function looks like:
110 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
111 * {
112 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
113 * void *key = (void *) (unsigned long) r2;
114 * void *value;
115 *
116 * here kernel can access 'key' and 'map' pointers safely, knowing that
117 * [key, key + map->key_size) bytes are valid and were initialized on
118 * the stack of eBPF program.
119 * }
120 *
121 * Corresponding eBPF program may look like:
122 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
123 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
124 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
125 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
126 * here verifier looks at prototype of map_lookup_elem() and sees:
127 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
128 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
129 *
130 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
131 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
132 * and were initialized prior to this call.
133 * If it's ok, then verifier allows this BPF_CALL insn and looks at
134 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
135 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
8fb33b60 136 * returns either pointer to map value or NULL.
51580e79
AS
137 *
138 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
139 * insn, the register holding that pointer in the true branch changes state to
140 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
141 * branch. See check_cond_jmp_op().
142 *
143 * After the call R0 is set to return type of the function and registers R1-R5
144 * are set to NOT_INIT to indicate that they are no longer readable.
fd978bf7
JS
145 *
146 * The following reference types represent a potential reference to a kernel
147 * resource which, after first being allocated, must be checked and freed by
148 * the BPF program:
149 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
150 *
151 * When the verifier sees a helper call return a reference type, it allocates a
152 * pointer id for the reference and stores it in the current function state.
153 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
154 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
155 * passes through a NULL-check conditional. For the branch wherein the state is
156 * changed to CONST_IMM, the verifier releases the reference.
6acc9b43
JS
157 *
158 * For each helper function that allocates a reference, such as
159 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
160 * bpf_sk_release(). When a reference type passes into the release function,
161 * the verifier also releases the reference. If any unchecked or unreleased
162 * reference remains at the end of the program, the verifier rejects it.
51580e79
AS
163 */
164
17a52670 165/* verifier_state + insn_idx are pushed to stack when branch is encountered */
58e2af8b 166struct bpf_verifier_stack_elem {
17a52670
AS
167 /* verifer state is 'st'
168 * before processing instruction 'insn_idx'
169 * and after processing instruction 'prev_insn_idx'
170 */
58e2af8b 171 struct bpf_verifier_state st;
17a52670
AS
172 int insn_idx;
173 int prev_insn_idx;
58e2af8b 174 struct bpf_verifier_stack_elem *next;
6f8a57cc
AN
175 /* length of verifier log at the time this state was pushed on stack */
176 u32 log_pos;
cbd35700
AS
177};
178
b285fcb7 179#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
ceefbc96 180#define BPF_COMPLEXITY_LIMIT_STATES 64
07016151 181
d2e4c1e6
DB
182#define BPF_MAP_KEY_POISON (1ULL << 63)
183#define BPF_MAP_KEY_SEEN (1ULL << 62)
184
c93552c4
DB
185#define BPF_MAP_PTR_UNPRIV 1UL
186#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
187 POISON_POINTER_DELTA))
188#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
189
190static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
191{
d2e4c1e6 192 return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
c93552c4
DB
193}
194
195static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
196{
d2e4c1e6 197 return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
c93552c4
DB
198}
199
200static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
201 const struct bpf_map *map, bool unpriv)
202{
203 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
204 unpriv |= bpf_map_ptr_unpriv(aux);
d2e4c1e6
DB
205 aux->map_ptr_state = (unsigned long)map |
206 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
207}
208
209static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
210{
211 return aux->map_key_state & BPF_MAP_KEY_POISON;
212}
213
214static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
215{
216 return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
217}
218
219static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
220{
221 return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
222}
223
224static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
225{
226 bool poisoned = bpf_map_key_poisoned(aux);
227
228 aux->map_key_state = state | BPF_MAP_KEY_SEEN |
229 (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
c93552c4 230}
fad73a1a 231
23a2d70c
YS
232static bool bpf_pseudo_call(const struct bpf_insn *insn)
233{
234 return insn->code == (BPF_JMP | BPF_CALL) &&
235 insn->src_reg == BPF_PSEUDO_CALL;
236}
237
e6ac2450
MKL
238static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
239{
240 return insn->code == (BPF_JMP | BPF_CALL) &&
241 insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
242}
243
33ff9823
DB
244struct bpf_call_arg_meta {
245 struct bpf_map *map_ptr;
435faee1 246 bool raw_mode;
36bbef52 247 bool pkt_access;
8f14852e 248 u8 release_regno;
435faee1
DB
249 int regno;
250 int access_size;
457f4436 251 int mem_size;
10060503 252 u64 msize_max_value;
1b986589 253 int ref_obj_id;
3e8ce298 254 int map_uid;
d83525ca 255 int func_id;
22dc4a0f 256 struct btf *btf;
eaa6bcb7 257 u32 btf_id;
22dc4a0f 258 struct btf *ret_btf;
eaa6bcb7 259 u32 ret_btf_id;
69c087ba 260 u32 subprogno;
c0a5a21c 261 struct bpf_map_value_off_desc *kptr_off_desc;
33ff9823
DB
262};
263
8580ac94
AS
264struct btf *btf_vmlinux;
265
cbd35700
AS
266static DEFINE_MUTEX(bpf_verifier_lock);
267
d9762e84
MKL
268static const struct bpf_line_info *
269find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
270{
271 const struct bpf_line_info *linfo;
272 const struct bpf_prog *prog;
273 u32 i, nr_linfo;
274
275 prog = env->prog;
276 nr_linfo = prog->aux->nr_linfo;
277
278 if (!nr_linfo || insn_off >= prog->len)
279 return NULL;
280
281 linfo = prog->aux->linfo;
282 for (i = 1; i < nr_linfo; i++)
283 if (insn_off < linfo[i].insn_off)
284 break;
285
286 return &linfo[i - 1];
287}
288
77d2e05a
MKL
289void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
290 va_list args)
cbd35700 291{
a2a7d570 292 unsigned int n;
cbd35700 293
a2a7d570 294 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
a2a7d570
JK
295
296 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
297 "verifier log line truncated - local buffer too short\n");
298
8580ac94 299 if (log->level == BPF_LOG_KERNEL) {
436d404c
HT
300 bool newline = n > 0 && log->kbuf[n - 1] == '\n';
301
302 pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
8580ac94
AS
303 return;
304 }
436d404c
HT
305
306 n = min(log->len_total - log->len_used - 1, n);
307 log->kbuf[n] = '\0';
a2a7d570
JK
308 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
309 log->len_used += n;
310 else
311 log->ubuf = NULL;
cbd35700 312}
abe08840 313
6f8a57cc
AN
314static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
315{
316 char zero = 0;
317
318 if (!bpf_verifier_log_needed(log))
319 return;
320
321 log->len_used = new_pos;
322 if (put_user(zero, log->ubuf + new_pos))
323 log->ubuf = NULL;
324}
325
abe08840
JO
326/* log_level controls verbosity level of eBPF verifier.
327 * bpf_verifier_log_write() is used to dump the verification trace to the log,
328 * so the user can figure out what's wrong with the program
430e68d1 329 */
abe08840
JO
330__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
331 const char *fmt, ...)
332{
333 va_list args;
334
77d2e05a
MKL
335 if (!bpf_verifier_log_needed(&env->log))
336 return;
337
abe08840 338 va_start(args, fmt);
77d2e05a 339 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
340 va_end(args);
341}
342EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
343
344__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
345{
77d2e05a 346 struct bpf_verifier_env *env = private_data;
abe08840
JO
347 va_list args;
348
77d2e05a
MKL
349 if (!bpf_verifier_log_needed(&env->log))
350 return;
351
abe08840 352 va_start(args, fmt);
77d2e05a 353 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
354 va_end(args);
355}
cbd35700 356
9e15db66
AS
357__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
358 const char *fmt, ...)
359{
360 va_list args;
361
362 if (!bpf_verifier_log_needed(log))
363 return;
364
365 va_start(args, fmt);
366 bpf_verifier_vlog(log, fmt, args);
367 va_end(args);
368}
369
d9762e84
MKL
370static const char *ltrim(const char *s)
371{
372 while (isspace(*s))
373 s++;
374
375 return s;
376}
377
378__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
379 u32 insn_off,
380 const char *prefix_fmt, ...)
381{
382 const struct bpf_line_info *linfo;
383
384 if (!bpf_verifier_log_needed(&env->log))
385 return;
386
387 linfo = find_linfo(env, insn_off);
388 if (!linfo || linfo == env->prev_linfo)
389 return;
390
391 if (prefix_fmt) {
392 va_list args;
393
394 va_start(args, prefix_fmt);
395 bpf_verifier_vlog(&env->log, prefix_fmt, args);
396 va_end(args);
397 }
398
399 verbose(env, "%s\n",
400 ltrim(btf_name_by_offset(env->prog->aux->btf,
401 linfo->line_off)));
402
403 env->prev_linfo = linfo;
404}
405
bc2591d6
YS
406static void verbose_invalid_scalar(struct bpf_verifier_env *env,
407 struct bpf_reg_state *reg,
408 struct tnum *range, const char *ctx,
409 const char *reg_name)
410{
411 char tn_buf[48];
412
413 verbose(env, "At %s the register %s ", ctx, reg_name);
414 if (!tnum_is_unknown(reg->var_off)) {
415 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
416 verbose(env, "has value %s", tn_buf);
417 } else {
418 verbose(env, "has unknown scalar value");
419 }
420 tnum_strn(tn_buf, sizeof(tn_buf), *range);
421 verbose(env, " should have been in %s\n", tn_buf);
422}
423
de8f3a83
DB
424static bool type_is_pkt_pointer(enum bpf_reg_type type)
425{
426 return type == PTR_TO_PACKET ||
427 type == PTR_TO_PACKET_META;
428}
429
46f8bc92
MKL
430static bool type_is_sk_pointer(enum bpf_reg_type type)
431{
432 return type == PTR_TO_SOCKET ||
655a51e5 433 type == PTR_TO_SOCK_COMMON ||
fada7fdc
JL
434 type == PTR_TO_TCP_SOCK ||
435 type == PTR_TO_XDP_SOCK;
46f8bc92
MKL
436}
437
cac616db
JF
438static bool reg_type_not_null(enum bpf_reg_type type)
439{
440 return type == PTR_TO_SOCKET ||
441 type == PTR_TO_TCP_SOCK ||
442 type == PTR_TO_MAP_VALUE ||
69c087ba 443 type == PTR_TO_MAP_KEY ||
01c66c48 444 type == PTR_TO_SOCK_COMMON;
cac616db
JF
445}
446
d83525ca
AS
447static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
448{
449 return reg->type == PTR_TO_MAP_VALUE &&
450 map_value_has_spin_lock(reg->map_ptr);
451}
452
cba368c1
MKL
453static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
454{
c25b2ae1
HL
455 return base_type(type) == PTR_TO_SOCKET ||
456 base_type(type) == PTR_TO_TCP_SOCK ||
5c073f26
KKD
457 base_type(type) == PTR_TO_MEM ||
458 base_type(type) == PTR_TO_BTF_ID;
cba368c1
MKL
459}
460
20b2aff4
HL
461static bool type_is_rdonly_mem(u32 type)
462{
463 return type & MEM_RDONLY;
cba368c1
MKL
464}
465
1b986589 466static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
fd978bf7 467{
1b986589 468 return type == ARG_PTR_TO_SOCK_COMMON;
fd978bf7
JS
469}
470
48946bd6 471static bool type_may_be_null(u32 type)
fd1b0d60 472{
48946bd6 473 return type & PTR_MAYBE_NULL;
fd1b0d60
LB
474}
475
64d85290 476static bool may_be_acquire_function(enum bpf_func_id func_id)
46f8bc92
MKL
477{
478 return func_id == BPF_FUNC_sk_lookup_tcp ||
edbf8c01 479 func_id == BPF_FUNC_sk_lookup_udp ||
64d85290 480 func_id == BPF_FUNC_skc_lookup_tcp ||
457f4436
AN
481 func_id == BPF_FUNC_map_lookup_elem ||
482 func_id == BPF_FUNC_ringbuf_reserve;
64d85290
JS
483}
484
485static bool is_acquire_function(enum bpf_func_id func_id,
486 const struct bpf_map *map)
487{
488 enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
489
490 if (func_id == BPF_FUNC_sk_lookup_tcp ||
491 func_id == BPF_FUNC_sk_lookup_udp ||
457f4436 492 func_id == BPF_FUNC_skc_lookup_tcp ||
c0a5a21c
KKD
493 func_id == BPF_FUNC_ringbuf_reserve ||
494 func_id == BPF_FUNC_kptr_xchg)
64d85290
JS
495 return true;
496
497 if (func_id == BPF_FUNC_map_lookup_elem &&
498 (map_type == BPF_MAP_TYPE_SOCKMAP ||
499 map_type == BPF_MAP_TYPE_SOCKHASH))
500 return true;
501
502 return false;
46f8bc92
MKL
503}
504
1b986589
MKL
505static bool is_ptr_cast_function(enum bpf_func_id func_id)
506{
507 return func_id == BPF_FUNC_tcp_sock ||
1df8f55a
MKL
508 func_id == BPF_FUNC_sk_fullsock ||
509 func_id == BPF_FUNC_skc_to_tcp_sock ||
510 func_id == BPF_FUNC_skc_to_tcp6_sock ||
511 func_id == BPF_FUNC_skc_to_udp6_sock ||
512 func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
513 func_id == BPF_FUNC_skc_to_tcp_request_sock;
1b986589
MKL
514}
515
39491867
BJ
516static bool is_cmpxchg_insn(const struct bpf_insn *insn)
517{
518 return BPF_CLASS(insn->code) == BPF_STX &&
519 BPF_MODE(insn->code) == BPF_ATOMIC &&
520 insn->imm == BPF_CMPXCHG;
521}
522
c25b2ae1
HL
523/* string representation of 'enum bpf_reg_type'
524 *
525 * Note that reg_type_str() can not appear more than once in a single verbose()
526 * statement.
527 */
528static const char *reg_type_str(struct bpf_verifier_env *env,
529 enum bpf_reg_type type)
530{
c6f1bfe8 531 char postfix[16] = {0}, prefix[32] = {0};
c25b2ae1
HL
532 static const char * const str[] = {
533 [NOT_INIT] = "?",
7df5072c 534 [SCALAR_VALUE] = "scalar",
c25b2ae1
HL
535 [PTR_TO_CTX] = "ctx",
536 [CONST_PTR_TO_MAP] = "map_ptr",
537 [PTR_TO_MAP_VALUE] = "map_value",
538 [PTR_TO_STACK] = "fp",
539 [PTR_TO_PACKET] = "pkt",
540 [PTR_TO_PACKET_META] = "pkt_meta",
541 [PTR_TO_PACKET_END] = "pkt_end",
542 [PTR_TO_FLOW_KEYS] = "flow_keys",
543 [PTR_TO_SOCKET] = "sock",
544 [PTR_TO_SOCK_COMMON] = "sock_common",
545 [PTR_TO_TCP_SOCK] = "tcp_sock",
546 [PTR_TO_TP_BUFFER] = "tp_buffer",
547 [PTR_TO_XDP_SOCK] = "xdp_sock",
548 [PTR_TO_BTF_ID] = "ptr_",
c25b2ae1 549 [PTR_TO_MEM] = "mem",
20b2aff4 550 [PTR_TO_BUF] = "buf",
c25b2ae1
HL
551 [PTR_TO_FUNC] = "func",
552 [PTR_TO_MAP_KEY] = "map_key",
553 };
554
555 if (type & PTR_MAYBE_NULL) {
5844101a 556 if (base_type(type) == PTR_TO_BTF_ID)
c25b2ae1
HL
557 strncpy(postfix, "or_null_", 16);
558 else
559 strncpy(postfix, "_or_null", 16);
560 }
561
20b2aff4 562 if (type & MEM_RDONLY)
c6f1bfe8 563 strncpy(prefix, "rdonly_", 32);
a672b2e3 564 if (type & MEM_ALLOC)
c6f1bfe8
YS
565 strncpy(prefix, "alloc_", 32);
566 if (type & MEM_USER)
567 strncpy(prefix, "user_", 32);
5844101a
HL
568 if (type & MEM_PERCPU)
569 strncpy(prefix, "percpu_", 32);
6efe152d
KKD
570 if (type & PTR_UNTRUSTED)
571 strncpy(prefix, "untrusted_", 32);
20b2aff4
HL
572
573 snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
574 prefix, str[base_type(type)], postfix);
c25b2ae1
HL
575 return env->type_str_buf;
576}
17a52670 577
8efea21d
EC
578static char slot_type_char[] = {
579 [STACK_INVALID] = '?',
580 [STACK_SPILL] = 'r',
581 [STACK_MISC] = 'm',
582 [STACK_ZERO] = '0',
583};
584
4e92024a
AS
585static void print_liveness(struct bpf_verifier_env *env,
586 enum bpf_reg_liveness live)
587{
9242b5f5 588 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
4e92024a
AS
589 verbose(env, "_");
590 if (live & REG_LIVE_READ)
591 verbose(env, "r");
592 if (live & REG_LIVE_WRITTEN)
593 verbose(env, "w");
9242b5f5
AS
594 if (live & REG_LIVE_DONE)
595 verbose(env, "D");
4e92024a
AS
596}
597
f4d7e40a
AS
598static struct bpf_func_state *func(struct bpf_verifier_env *env,
599 const struct bpf_reg_state *reg)
600{
601 struct bpf_verifier_state *cur = env->cur_state;
602
603 return cur->frame[reg->frameno];
604}
605
22dc4a0f 606static const char *kernel_type_name(const struct btf* btf, u32 id)
9e15db66 607{
22dc4a0f 608 return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
9e15db66
AS
609}
610
0f55f9ed
CL
611static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
612{
613 env->scratched_regs |= 1U << regno;
614}
615
616static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
617{
343e5375 618 env->scratched_stack_slots |= 1ULL << spi;
0f55f9ed
CL
619}
620
621static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
622{
623 return (env->scratched_regs >> regno) & 1;
624}
625
626static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
627{
628 return (env->scratched_stack_slots >> regno) & 1;
629}
630
631static bool verifier_state_scratched(const struct bpf_verifier_env *env)
632{
633 return env->scratched_regs || env->scratched_stack_slots;
634}
635
636static void mark_verifier_state_clean(struct bpf_verifier_env *env)
637{
638 env->scratched_regs = 0U;
343e5375 639 env->scratched_stack_slots = 0ULL;
0f55f9ed
CL
640}
641
642/* Used for printing the entire verifier state. */
643static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
644{
645 env->scratched_regs = ~0U;
343e5375 646 env->scratched_stack_slots = ~0ULL;
0f55f9ed
CL
647}
648
27113c59
MKL
649/* The reg state of a pointer or a bounded scalar was saved when
650 * it was spilled to the stack.
651 */
652static bool is_spilled_reg(const struct bpf_stack_state *stack)
653{
654 return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
655}
656
354e8f19
MKL
657static void scrub_spilled_slot(u8 *stype)
658{
659 if (*stype != STACK_INVALID)
660 *stype = STACK_MISC;
661}
662
61bd5218 663static void print_verifier_state(struct bpf_verifier_env *env,
0f55f9ed
CL
664 const struct bpf_func_state *state,
665 bool print_all)
17a52670 666{
f4d7e40a 667 const struct bpf_reg_state *reg;
17a52670
AS
668 enum bpf_reg_type t;
669 int i;
670
f4d7e40a
AS
671 if (state->frameno)
672 verbose(env, " frame%d:", state->frameno);
17a52670 673 for (i = 0; i < MAX_BPF_REG; i++) {
1a0dc1ac
AS
674 reg = &state->regs[i];
675 t = reg->type;
17a52670
AS
676 if (t == NOT_INIT)
677 continue;
0f55f9ed
CL
678 if (!print_all && !reg_scratched(env, i))
679 continue;
4e92024a
AS
680 verbose(env, " R%d", i);
681 print_liveness(env, reg->live);
7df5072c 682 verbose(env, "=");
b5dc0163
AS
683 if (t == SCALAR_VALUE && reg->precise)
684 verbose(env, "P");
f1174f77
EC
685 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
686 tnum_is_const(reg->var_off)) {
687 /* reg->off should be 0 for SCALAR_VALUE */
7df5072c 688 verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
61bd5218 689 verbose(env, "%lld", reg->var_off.value + reg->off);
f1174f77 690 } else {
7df5072c
ML
691 const char *sep = "";
692
693 verbose(env, "%s", reg_type_str(env, t));
5844101a 694 if (base_type(t) == PTR_TO_BTF_ID)
22dc4a0f 695 verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
7df5072c
ML
696 verbose(env, "(");
697/*
698 * _a stands for append, was shortened to avoid multiline statements below.
699 * This macro is used to output a comma separated list of attributes.
700 */
701#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
702
703 if (reg->id)
704 verbose_a("id=%d", reg->id);
705 if (reg_type_may_be_refcounted_or_null(t) && reg->ref_obj_id)
706 verbose_a("ref_obj_id=%d", reg->ref_obj_id);
f1174f77 707 if (t != SCALAR_VALUE)
7df5072c 708 verbose_a("off=%d", reg->off);
de8f3a83 709 if (type_is_pkt_pointer(t))
7df5072c 710 verbose_a("r=%d", reg->range);
c25b2ae1
HL
711 else if (base_type(t) == CONST_PTR_TO_MAP ||
712 base_type(t) == PTR_TO_MAP_KEY ||
713 base_type(t) == PTR_TO_MAP_VALUE)
7df5072c
ML
714 verbose_a("ks=%d,vs=%d",
715 reg->map_ptr->key_size,
716 reg->map_ptr->value_size);
7d1238f2
EC
717 if (tnum_is_const(reg->var_off)) {
718 /* Typically an immediate SCALAR_VALUE, but
719 * could be a pointer whose offset is too big
720 * for reg->off
721 */
7df5072c 722 verbose_a("imm=%llx", reg->var_off.value);
7d1238f2
EC
723 } else {
724 if (reg->smin_value != reg->umin_value &&
725 reg->smin_value != S64_MIN)
7df5072c 726 verbose_a("smin=%lld", (long long)reg->smin_value);
7d1238f2
EC
727 if (reg->smax_value != reg->umax_value &&
728 reg->smax_value != S64_MAX)
7df5072c 729 verbose_a("smax=%lld", (long long)reg->smax_value);
7d1238f2 730 if (reg->umin_value != 0)
7df5072c 731 verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
7d1238f2 732 if (reg->umax_value != U64_MAX)
7df5072c 733 verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
7d1238f2
EC
734 if (!tnum_is_unknown(reg->var_off)) {
735 char tn_buf[48];
f1174f77 736
7d1238f2 737 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7df5072c 738 verbose_a("var_off=%s", tn_buf);
7d1238f2 739 }
3f50f132
JF
740 if (reg->s32_min_value != reg->smin_value &&
741 reg->s32_min_value != S32_MIN)
7df5072c 742 verbose_a("s32_min=%d", (int)(reg->s32_min_value));
3f50f132
JF
743 if (reg->s32_max_value != reg->smax_value &&
744 reg->s32_max_value != S32_MAX)
7df5072c 745 verbose_a("s32_max=%d", (int)(reg->s32_max_value));
3f50f132
JF
746 if (reg->u32_min_value != reg->umin_value &&
747 reg->u32_min_value != U32_MIN)
7df5072c 748 verbose_a("u32_min=%d", (int)(reg->u32_min_value));
3f50f132
JF
749 if (reg->u32_max_value != reg->umax_value &&
750 reg->u32_max_value != U32_MAX)
7df5072c 751 verbose_a("u32_max=%d", (int)(reg->u32_max_value));
f1174f77 752 }
7df5072c
ML
753#undef verbose_a
754
61bd5218 755 verbose(env, ")");
f1174f77 756 }
17a52670 757 }
638f5b90 758 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
8efea21d
EC
759 char types_buf[BPF_REG_SIZE + 1];
760 bool valid = false;
761 int j;
762
763 for (j = 0; j < BPF_REG_SIZE; j++) {
764 if (state->stack[i].slot_type[j] != STACK_INVALID)
765 valid = true;
766 types_buf[j] = slot_type_char[
767 state->stack[i].slot_type[j]];
768 }
769 types_buf[BPF_REG_SIZE] = 0;
770 if (!valid)
771 continue;
0f55f9ed
CL
772 if (!print_all && !stack_slot_scratched(env, i))
773 continue;
8efea21d
EC
774 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
775 print_liveness(env, state->stack[i].spilled_ptr.live);
27113c59 776 if (is_spilled_reg(&state->stack[i])) {
b5dc0163
AS
777 reg = &state->stack[i].spilled_ptr;
778 t = reg->type;
7df5072c 779 verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
b5dc0163
AS
780 if (t == SCALAR_VALUE && reg->precise)
781 verbose(env, "P");
782 if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
783 verbose(env, "%lld", reg->var_off.value + reg->off);
784 } else {
8efea21d 785 verbose(env, "=%s", types_buf);
b5dc0163 786 }
17a52670 787 }
fd978bf7
JS
788 if (state->acquired_refs && state->refs[0].id) {
789 verbose(env, " refs=%d", state->refs[0].id);
790 for (i = 1; i < state->acquired_refs; i++)
791 if (state->refs[i].id)
792 verbose(env, ",%d", state->refs[i].id);
793 }
bfc6bb74
AS
794 if (state->in_callback_fn)
795 verbose(env, " cb");
796 if (state->in_async_callback_fn)
797 verbose(env, " async_cb");
61bd5218 798 verbose(env, "\n");
0f55f9ed 799 mark_verifier_state_clean(env);
17a52670
AS
800}
801
2e576648
CL
802static inline u32 vlog_alignment(u32 pos)
803{
804 return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
805 BPF_LOG_MIN_ALIGNMENT) - pos - 1;
806}
807
808static void print_insn_state(struct bpf_verifier_env *env,
809 const struct bpf_func_state *state)
810{
811 if (env->prev_log_len && env->prev_log_len == env->log.len_used) {
812 /* remove new line character */
813 bpf_vlog_reset(&env->log, env->prev_log_len - 1);
814 verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' ');
815 } else {
816 verbose(env, "%d:", env->insn_idx);
817 }
818 print_verifier_state(env, state, false);
17a52670
AS
819}
820
c69431aa
LB
821/* copy array src of length n * size bytes to dst. dst is reallocated if it's too
822 * small to hold src. This is different from krealloc since we don't want to preserve
823 * the contents of dst.
824 *
825 * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
826 * not be allocated.
638f5b90 827 */
c69431aa 828static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
638f5b90 829{
c69431aa
LB
830 size_t bytes;
831
832 if (ZERO_OR_NULL_PTR(src))
833 goto out;
834
835 if (unlikely(check_mul_overflow(n, size, &bytes)))
836 return NULL;
837
838 if (ksize(dst) < bytes) {
839 kfree(dst);
840 dst = kmalloc_track_caller(bytes, flags);
841 if (!dst)
842 return NULL;
843 }
844
845 memcpy(dst, src, bytes);
846out:
847 return dst ? dst : ZERO_SIZE_PTR;
848}
849
850/* resize an array from old_n items to new_n items. the array is reallocated if it's too
851 * small to hold new_n items. new items are zeroed out if the array grows.
852 *
853 * Contrary to krealloc_array, does not free arr if new_n is zero.
854 */
855static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
856{
857 if (!new_n || old_n == new_n)
858 goto out;
859
860 arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
861 if (!arr)
862 return NULL;
863
864 if (new_n > old_n)
865 memset(arr + old_n * size, 0, (new_n - old_n) * size);
866
867out:
868 return arr ? arr : ZERO_SIZE_PTR;
869}
870
871static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
872{
873 dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
874 sizeof(struct bpf_reference_state), GFP_KERNEL);
875 if (!dst->refs)
876 return -ENOMEM;
877
878 dst->acquired_refs = src->acquired_refs;
879 return 0;
880}
881
882static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
883{
884 size_t n = src->allocated_stack / BPF_REG_SIZE;
885
886 dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
887 GFP_KERNEL);
888 if (!dst->stack)
889 return -ENOMEM;
890
891 dst->allocated_stack = src->allocated_stack;
892 return 0;
893}
894
895static int resize_reference_state(struct bpf_func_state *state, size_t n)
896{
897 state->refs = realloc_array(state->refs, state->acquired_refs, n,
898 sizeof(struct bpf_reference_state));
899 if (!state->refs)
900 return -ENOMEM;
901
902 state->acquired_refs = n;
903 return 0;
904}
905
906static int grow_stack_state(struct bpf_func_state *state, int size)
907{
908 size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
909
910 if (old_n >= n)
911 return 0;
912
913 state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
914 if (!state->stack)
915 return -ENOMEM;
916
917 state->allocated_stack = size;
918 return 0;
fd978bf7
JS
919}
920
921/* Acquire a pointer id from the env and update the state->refs to include
922 * this new pointer reference.
923 * On success, returns a valid pointer id to associate with the register
924 * On failure, returns a negative errno.
638f5b90 925 */
fd978bf7 926static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
638f5b90 927{
fd978bf7
JS
928 struct bpf_func_state *state = cur_func(env);
929 int new_ofs = state->acquired_refs;
930 int id, err;
931
c69431aa 932 err = resize_reference_state(state, state->acquired_refs + 1);
fd978bf7
JS
933 if (err)
934 return err;
935 id = ++env->id_gen;
936 state->refs[new_ofs].id = id;
937 state->refs[new_ofs].insn_idx = insn_idx;
638f5b90 938
fd978bf7
JS
939 return id;
940}
941
942/* release function corresponding to acquire_reference_state(). Idempotent. */
46f8bc92 943static int release_reference_state(struct bpf_func_state *state, int ptr_id)
fd978bf7
JS
944{
945 int i, last_idx;
946
fd978bf7
JS
947 last_idx = state->acquired_refs - 1;
948 for (i = 0; i < state->acquired_refs; i++) {
949 if (state->refs[i].id == ptr_id) {
950 if (last_idx && i != last_idx)
951 memcpy(&state->refs[i], &state->refs[last_idx],
952 sizeof(*state->refs));
953 memset(&state->refs[last_idx], 0, sizeof(*state->refs));
954 state->acquired_refs--;
638f5b90 955 return 0;
638f5b90 956 }
638f5b90 957 }
46f8bc92 958 return -EINVAL;
fd978bf7
JS
959}
960
f4d7e40a
AS
961static void free_func_state(struct bpf_func_state *state)
962{
5896351e
AS
963 if (!state)
964 return;
fd978bf7 965 kfree(state->refs);
f4d7e40a
AS
966 kfree(state->stack);
967 kfree(state);
968}
969
b5dc0163
AS
970static void clear_jmp_history(struct bpf_verifier_state *state)
971{
972 kfree(state->jmp_history);
973 state->jmp_history = NULL;
974 state->jmp_history_cnt = 0;
975}
976
1969db47
AS
977static void free_verifier_state(struct bpf_verifier_state *state,
978 bool free_self)
638f5b90 979{
f4d7e40a
AS
980 int i;
981
982 for (i = 0; i <= state->curframe; i++) {
983 free_func_state(state->frame[i]);
984 state->frame[i] = NULL;
985 }
b5dc0163 986 clear_jmp_history(state);
1969db47
AS
987 if (free_self)
988 kfree(state);
638f5b90
AS
989}
990
991/* copy verifier state from src to dst growing dst stack space
992 * when necessary to accommodate larger src stack
993 */
f4d7e40a
AS
994static int copy_func_state(struct bpf_func_state *dst,
995 const struct bpf_func_state *src)
638f5b90
AS
996{
997 int err;
998
fd978bf7
JS
999 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
1000 err = copy_reference_state(dst, src);
638f5b90
AS
1001 if (err)
1002 return err;
638f5b90
AS
1003 return copy_stack_state(dst, src);
1004}
1005
f4d7e40a
AS
1006static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1007 const struct bpf_verifier_state *src)
1008{
1009 struct bpf_func_state *dst;
1010 int i, err;
1011
06ab6a50
LB
1012 dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1013 src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
1014 GFP_USER);
1015 if (!dst_state->jmp_history)
1016 return -ENOMEM;
b5dc0163
AS
1017 dst_state->jmp_history_cnt = src->jmp_history_cnt;
1018
f4d7e40a
AS
1019 /* if dst has more stack frames then src frame, free them */
1020 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1021 free_func_state(dst_state->frame[i]);
1022 dst_state->frame[i] = NULL;
1023 }
979d63d5 1024 dst_state->speculative = src->speculative;
f4d7e40a 1025 dst_state->curframe = src->curframe;
d83525ca 1026 dst_state->active_spin_lock = src->active_spin_lock;
2589726d
AS
1027 dst_state->branches = src->branches;
1028 dst_state->parent = src->parent;
b5dc0163
AS
1029 dst_state->first_insn_idx = src->first_insn_idx;
1030 dst_state->last_insn_idx = src->last_insn_idx;
f4d7e40a
AS
1031 for (i = 0; i <= src->curframe; i++) {
1032 dst = dst_state->frame[i];
1033 if (!dst) {
1034 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1035 if (!dst)
1036 return -ENOMEM;
1037 dst_state->frame[i] = dst;
1038 }
1039 err = copy_func_state(dst, src->frame[i]);
1040 if (err)
1041 return err;
1042 }
1043 return 0;
1044}
1045
2589726d
AS
1046static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1047{
1048 while (st) {
1049 u32 br = --st->branches;
1050
1051 /* WARN_ON(br > 1) technically makes sense here,
1052 * but see comment in push_stack(), hence:
1053 */
1054 WARN_ONCE((int)br < 0,
1055 "BUG update_branch_counts:branches_to_explore=%d\n",
1056 br);
1057 if (br)
1058 break;
1059 st = st->parent;
1060 }
1061}
1062
638f5b90 1063static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
6f8a57cc 1064 int *insn_idx, bool pop_log)
638f5b90
AS
1065{
1066 struct bpf_verifier_state *cur = env->cur_state;
1067 struct bpf_verifier_stack_elem *elem, *head = env->head;
1068 int err;
17a52670
AS
1069
1070 if (env->head == NULL)
638f5b90 1071 return -ENOENT;
17a52670 1072
638f5b90
AS
1073 if (cur) {
1074 err = copy_verifier_state(cur, &head->st);
1075 if (err)
1076 return err;
1077 }
6f8a57cc
AN
1078 if (pop_log)
1079 bpf_vlog_reset(&env->log, head->log_pos);
638f5b90
AS
1080 if (insn_idx)
1081 *insn_idx = head->insn_idx;
17a52670 1082 if (prev_insn_idx)
638f5b90
AS
1083 *prev_insn_idx = head->prev_insn_idx;
1084 elem = head->next;
1969db47 1085 free_verifier_state(&head->st, false);
638f5b90 1086 kfree(head);
17a52670
AS
1087 env->head = elem;
1088 env->stack_size--;
638f5b90 1089 return 0;
17a52670
AS
1090}
1091
58e2af8b 1092static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
979d63d5
DB
1093 int insn_idx, int prev_insn_idx,
1094 bool speculative)
17a52670 1095{
638f5b90 1096 struct bpf_verifier_state *cur = env->cur_state;
58e2af8b 1097 struct bpf_verifier_stack_elem *elem;
638f5b90 1098 int err;
17a52670 1099
638f5b90 1100 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
17a52670
AS
1101 if (!elem)
1102 goto err;
1103
17a52670
AS
1104 elem->insn_idx = insn_idx;
1105 elem->prev_insn_idx = prev_insn_idx;
1106 elem->next = env->head;
6f8a57cc 1107 elem->log_pos = env->log.len_used;
17a52670
AS
1108 env->head = elem;
1109 env->stack_size++;
1969db47
AS
1110 err = copy_verifier_state(&elem->st, cur);
1111 if (err)
1112 goto err;
979d63d5 1113 elem->st.speculative |= speculative;
b285fcb7
AS
1114 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1115 verbose(env, "The sequence of %d jumps is too complex.\n",
1116 env->stack_size);
17a52670
AS
1117 goto err;
1118 }
2589726d
AS
1119 if (elem->st.parent) {
1120 ++elem->st.parent->branches;
1121 /* WARN_ON(branches > 2) technically makes sense here,
1122 * but
1123 * 1. speculative states will bump 'branches' for non-branch
1124 * instructions
1125 * 2. is_state_visited() heuristics may decide not to create
1126 * a new state for a sequence of branches and all such current
1127 * and cloned states will be pointing to a single parent state
1128 * which might have large 'branches' count.
1129 */
1130 }
17a52670
AS
1131 return &elem->st;
1132err:
5896351e
AS
1133 free_verifier_state(env->cur_state, true);
1134 env->cur_state = NULL;
17a52670 1135 /* pop all elements and return */
6f8a57cc 1136 while (!pop_stack(env, NULL, NULL, false));
17a52670
AS
1137 return NULL;
1138}
1139
1140#define CALLER_SAVED_REGS 6
1141static const int caller_saved[CALLER_SAVED_REGS] = {
1142 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1143};
1144
f54c7898
DB
1145static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1146 struct bpf_reg_state *reg);
f1174f77 1147
e688c3db
AS
1148/* This helper doesn't clear reg->id */
1149static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
b03c9f9f 1150{
b03c9f9f
EC
1151 reg->var_off = tnum_const(imm);
1152 reg->smin_value = (s64)imm;
1153 reg->smax_value = (s64)imm;
1154 reg->umin_value = imm;
1155 reg->umax_value = imm;
3f50f132
JF
1156
1157 reg->s32_min_value = (s32)imm;
1158 reg->s32_max_value = (s32)imm;
1159 reg->u32_min_value = (u32)imm;
1160 reg->u32_max_value = (u32)imm;
1161}
1162
e688c3db
AS
1163/* Mark the unknown part of a register (variable offset or scalar value) as
1164 * known to have the value @imm.
1165 */
1166static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1167{
1168 /* Clear id, off, and union(map_ptr, range) */
1169 memset(((u8 *)reg) + sizeof(reg->type), 0,
1170 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1171 ___mark_reg_known(reg, imm);
1172}
1173
3f50f132
JF
1174static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1175{
1176 reg->var_off = tnum_const_subreg(reg->var_off, imm);
1177 reg->s32_min_value = (s32)imm;
1178 reg->s32_max_value = (s32)imm;
1179 reg->u32_min_value = (u32)imm;
1180 reg->u32_max_value = (u32)imm;
b03c9f9f
EC
1181}
1182
f1174f77
EC
1183/* Mark the 'variable offset' part of a register as zero. This should be
1184 * used only on registers holding a pointer type.
1185 */
1186static void __mark_reg_known_zero(struct bpf_reg_state *reg)
a9789ef9 1187{
b03c9f9f 1188 __mark_reg_known(reg, 0);
f1174f77 1189}
a9789ef9 1190
cc2b14d5
AS
1191static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1192{
1193 __mark_reg_known(reg, 0);
cc2b14d5
AS
1194 reg->type = SCALAR_VALUE;
1195}
1196
61bd5218
JK
1197static void mark_reg_known_zero(struct bpf_verifier_env *env,
1198 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
1199{
1200 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 1201 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
f1174f77
EC
1202 /* Something bad happened, let's kill all regs */
1203 for (regno = 0; regno < MAX_BPF_REG; regno++)
f54c7898 1204 __mark_reg_not_init(env, regs + regno);
f1174f77
EC
1205 return;
1206 }
1207 __mark_reg_known_zero(regs + regno);
1208}
1209
4ddb7416
DB
1210static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1211{
c25b2ae1 1212 if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
4ddb7416
DB
1213 const struct bpf_map *map = reg->map_ptr;
1214
1215 if (map->inner_map_meta) {
1216 reg->type = CONST_PTR_TO_MAP;
1217 reg->map_ptr = map->inner_map_meta;
3e8ce298
AS
1218 /* transfer reg's id which is unique for every map_lookup_elem
1219 * as UID of the inner map.
1220 */
34d11a44
AS
1221 if (map_value_has_timer(map->inner_map_meta))
1222 reg->map_uid = reg->id;
4ddb7416
DB
1223 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1224 reg->type = PTR_TO_XDP_SOCK;
1225 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1226 map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1227 reg->type = PTR_TO_SOCKET;
1228 } else {
1229 reg->type = PTR_TO_MAP_VALUE;
1230 }
c25b2ae1 1231 return;
4ddb7416 1232 }
c25b2ae1
HL
1233
1234 reg->type &= ~PTR_MAYBE_NULL;
4ddb7416
DB
1235}
1236
de8f3a83
DB
1237static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1238{
1239 return type_is_pkt_pointer(reg->type);
1240}
1241
1242static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1243{
1244 return reg_is_pkt_pointer(reg) ||
1245 reg->type == PTR_TO_PACKET_END;
1246}
1247
1248/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1249static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1250 enum bpf_reg_type which)
1251{
1252 /* The register can already have a range from prior markings.
1253 * This is fine as long as it hasn't been advanced from its
1254 * origin.
1255 */
1256 return reg->type == which &&
1257 reg->id == 0 &&
1258 reg->off == 0 &&
1259 tnum_equals_const(reg->var_off, 0);
1260}
1261
3f50f132
JF
1262/* Reset the min/max bounds of a register */
1263static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1264{
1265 reg->smin_value = S64_MIN;
1266 reg->smax_value = S64_MAX;
1267 reg->umin_value = 0;
1268 reg->umax_value = U64_MAX;
1269
1270 reg->s32_min_value = S32_MIN;
1271 reg->s32_max_value = S32_MAX;
1272 reg->u32_min_value = 0;
1273 reg->u32_max_value = U32_MAX;
1274}
1275
1276static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1277{
1278 reg->smin_value = S64_MIN;
1279 reg->smax_value = S64_MAX;
1280 reg->umin_value = 0;
1281 reg->umax_value = U64_MAX;
1282}
1283
1284static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1285{
1286 reg->s32_min_value = S32_MIN;
1287 reg->s32_max_value = S32_MAX;
1288 reg->u32_min_value = 0;
1289 reg->u32_max_value = U32_MAX;
1290}
1291
1292static void __update_reg32_bounds(struct bpf_reg_state *reg)
1293{
1294 struct tnum var32_off = tnum_subreg(reg->var_off);
1295
1296 /* min signed is max(sign bit) | min(other bits) */
1297 reg->s32_min_value = max_t(s32, reg->s32_min_value,
1298 var32_off.value | (var32_off.mask & S32_MIN));
1299 /* max signed is min(sign bit) | max(other bits) */
1300 reg->s32_max_value = min_t(s32, reg->s32_max_value,
1301 var32_off.value | (var32_off.mask & S32_MAX));
1302 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1303 reg->u32_max_value = min(reg->u32_max_value,
1304 (u32)(var32_off.value | var32_off.mask));
1305}
1306
1307static void __update_reg64_bounds(struct bpf_reg_state *reg)
b03c9f9f
EC
1308{
1309 /* min signed is max(sign bit) | min(other bits) */
1310 reg->smin_value = max_t(s64, reg->smin_value,
1311 reg->var_off.value | (reg->var_off.mask & S64_MIN));
1312 /* max signed is min(sign bit) | max(other bits) */
1313 reg->smax_value = min_t(s64, reg->smax_value,
1314 reg->var_off.value | (reg->var_off.mask & S64_MAX));
1315 reg->umin_value = max(reg->umin_value, reg->var_off.value);
1316 reg->umax_value = min(reg->umax_value,
1317 reg->var_off.value | reg->var_off.mask);
1318}
1319
3f50f132
JF
1320static void __update_reg_bounds(struct bpf_reg_state *reg)
1321{
1322 __update_reg32_bounds(reg);
1323 __update_reg64_bounds(reg);
1324}
1325
b03c9f9f 1326/* Uses signed min/max values to inform unsigned, and vice-versa */
3f50f132
JF
1327static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1328{
1329 /* Learn sign from signed bounds.
1330 * If we cannot cross the sign boundary, then signed and unsigned bounds
1331 * are the same, so combine. This works even in the negative case, e.g.
1332 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1333 */
1334 if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1335 reg->s32_min_value = reg->u32_min_value =
1336 max_t(u32, reg->s32_min_value, reg->u32_min_value);
1337 reg->s32_max_value = reg->u32_max_value =
1338 min_t(u32, reg->s32_max_value, reg->u32_max_value);
1339 return;
1340 }
1341 /* Learn sign from unsigned bounds. Signed bounds cross the sign
1342 * boundary, so we must be careful.
1343 */
1344 if ((s32)reg->u32_max_value >= 0) {
1345 /* Positive. We can't learn anything from the smin, but smax
1346 * is positive, hence safe.
1347 */
1348 reg->s32_min_value = reg->u32_min_value;
1349 reg->s32_max_value = reg->u32_max_value =
1350 min_t(u32, reg->s32_max_value, reg->u32_max_value);
1351 } else if ((s32)reg->u32_min_value < 0) {
1352 /* Negative. We can't learn anything from the smax, but smin
1353 * is negative, hence safe.
1354 */
1355 reg->s32_min_value = reg->u32_min_value =
1356 max_t(u32, reg->s32_min_value, reg->u32_min_value);
1357 reg->s32_max_value = reg->u32_max_value;
1358 }
1359}
1360
1361static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
b03c9f9f
EC
1362{
1363 /* Learn sign from signed bounds.
1364 * If we cannot cross the sign boundary, then signed and unsigned bounds
1365 * are the same, so combine. This works even in the negative case, e.g.
1366 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1367 */
1368 if (reg->smin_value >= 0 || reg->smax_value < 0) {
1369 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1370 reg->umin_value);
1371 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1372 reg->umax_value);
1373 return;
1374 }
1375 /* Learn sign from unsigned bounds. Signed bounds cross the sign
1376 * boundary, so we must be careful.
1377 */
1378 if ((s64)reg->umax_value >= 0) {
1379 /* Positive. We can't learn anything from the smin, but smax
1380 * is positive, hence safe.
1381 */
1382 reg->smin_value = reg->umin_value;
1383 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1384 reg->umax_value);
1385 } else if ((s64)reg->umin_value < 0) {
1386 /* Negative. We can't learn anything from the smax, but smin
1387 * is negative, hence safe.
1388 */
1389 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1390 reg->umin_value);
1391 reg->smax_value = reg->umax_value;
1392 }
1393}
1394
3f50f132
JF
1395static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1396{
1397 __reg32_deduce_bounds(reg);
1398 __reg64_deduce_bounds(reg);
1399}
1400
b03c9f9f
EC
1401/* Attempts to improve var_off based on unsigned min/max information */
1402static void __reg_bound_offset(struct bpf_reg_state *reg)
1403{
3f50f132
JF
1404 struct tnum var64_off = tnum_intersect(reg->var_off,
1405 tnum_range(reg->umin_value,
1406 reg->umax_value));
1407 struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1408 tnum_range(reg->u32_min_value,
1409 reg->u32_max_value));
1410
1411 reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
b03c9f9f
EC
1412}
1413
e572ff80
DB
1414static bool __reg32_bound_s64(s32 a)
1415{
1416 return a >= 0 && a <= S32_MAX;
1417}
1418
3f50f132 1419static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
b03c9f9f 1420{
3f50f132
JF
1421 reg->umin_value = reg->u32_min_value;
1422 reg->umax_value = reg->u32_max_value;
e572ff80
DB
1423
1424 /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1425 * be positive otherwise set to worse case bounds and refine later
1426 * from tnum.
3f50f132 1427 */
e572ff80
DB
1428 if (__reg32_bound_s64(reg->s32_min_value) &&
1429 __reg32_bound_s64(reg->s32_max_value)) {
3a71dc36 1430 reg->smin_value = reg->s32_min_value;
e572ff80
DB
1431 reg->smax_value = reg->s32_max_value;
1432 } else {
3a71dc36 1433 reg->smin_value = 0;
e572ff80
DB
1434 reg->smax_value = U32_MAX;
1435 }
3f50f132
JF
1436}
1437
1438static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1439{
1440 /* special case when 64-bit register has upper 32-bit register
1441 * zeroed. Typically happens after zext or <<32, >>32 sequence
1442 * allowing us to use 32-bit bounds directly,
1443 */
1444 if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1445 __reg_assign_32_into_64(reg);
1446 } else {
1447 /* Otherwise the best we can do is push lower 32bit known and
1448 * unknown bits into register (var_off set from jmp logic)
1449 * then learn as much as possible from the 64-bit tnum
1450 * known and unknown bits. The previous smin/smax bounds are
1451 * invalid here because of jmp32 compare so mark them unknown
1452 * so they do not impact tnum bounds calculation.
1453 */
1454 __mark_reg64_unbounded(reg);
1455 __update_reg_bounds(reg);
1456 }
1457
1458 /* Intersecting with the old var_off might have improved our bounds
1459 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1460 * then new var_off is (0; 0x7f...fc) which improves our umax.
1461 */
1462 __reg_deduce_bounds(reg);
1463 __reg_bound_offset(reg);
1464 __update_reg_bounds(reg);
1465}
1466
1467static bool __reg64_bound_s32(s64 a)
1468{
388e2c0b 1469 return a >= S32_MIN && a <= S32_MAX;
3f50f132
JF
1470}
1471
1472static bool __reg64_bound_u32(u64 a)
1473{
b9979db8 1474 return a >= U32_MIN && a <= U32_MAX;
3f50f132
JF
1475}
1476
1477static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1478{
1479 __mark_reg32_unbounded(reg);
1480
b0270958 1481 if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
3f50f132 1482 reg->s32_min_value = (s32)reg->smin_value;
3f50f132 1483 reg->s32_max_value = (s32)reg->smax_value;
b0270958 1484 }
10bf4e83 1485 if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
3f50f132 1486 reg->u32_min_value = (u32)reg->umin_value;
3f50f132 1487 reg->u32_max_value = (u32)reg->umax_value;
10bf4e83 1488 }
3f50f132
JF
1489
1490 /* Intersecting with the old var_off might have improved our bounds
1491 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1492 * then new var_off is (0; 0x7f...fc) which improves our umax.
1493 */
1494 __reg_deduce_bounds(reg);
1495 __reg_bound_offset(reg);
1496 __update_reg_bounds(reg);
b03c9f9f
EC
1497}
1498
f1174f77 1499/* Mark a register as having a completely unknown (scalar) value. */
f54c7898
DB
1500static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1501 struct bpf_reg_state *reg)
f1174f77 1502{
a9c676bc
AS
1503 /*
1504 * Clear type, id, off, and union(map_ptr, range) and
1505 * padding between 'type' and union
1506 */
1507 memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
f1174f77 1508 reg->type = SCALAR_VALUE;
f1174f77 1509 reg->var_off = tnum_unknown;
f4d7e40a 1510 reg->frameno = 0;
2c78ee89 1511 reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
b03c9f9f 1512 __mark_reg_unbounded(reg);
f1174f77
EC
1513}
1514
61bd5218
JK
1515static void mark_reg_unknown(struct bpf_verifier_env *env,
1516 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
1517{
1518 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 1519 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
19ceb417
AS
1520 /* Something bad happened, let's kill all regs except FP */
1521 for (regno = 0; regno < BPF_REG_FP; regno++)
f54c7898 1522 __mark_reg_not_init(env, regs + regno);
f1174f77
EC
1523 return;
1524 }
f54c7898 1525 __mark_reg_unknown(env, regs + regno);
f1174f77
EC
1526}
1527
f54c7898
DB
1528static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1529 struct bpf_reg_state *reg)
f1174f77 1530{
f54c7898 1531 __mark_reg_unknown(env, reg);
f1174f77
EC
1532 reg->type = NOT_INIT;
1533}
1534
61bd5218
JK
1535static void mark_reg_not_init(struct bpf_verifier_env *env,
1536 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
1537{
1538 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 1539 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
19ceb417
AS
1540 /* Something bad happened, let's kill all regs except FP */
1541 for (regno = 0; regno < BPF_REG_FP; regno++)
f54c7898 1542 __mark_reg_not_init(env, regs + regno);
f1174f77
EC
1543 return;
1544 }
f54c7898 1545 __mark_reg_not_init(env, regs + regno);
a9789ef9
DB
1546}
1547
41c48f3a
AI
1548static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1549 struct bpf_reg_state *regs, u32 regno,
22dc4a0f 1550 enum bpf_reg_type reg_type,
c6f1bfe8
YS
1551 struct btf *btf, u32 btf_id,
1552 enum bpf_type_flag flag)
41c48f3a
AI
1553{
1554 if (reg_type == SCALAR_VALUE) {
1555 mark_reg_unknown(env, regs, regno);
1556 return;
1557 }
1558 mark_reg_known_zero(env, regs, regno);
c6f1bfe8 1559 regs[regno].type = PTR_TO_BTF_ID | flag;
22dc4a0f 1560 regs[regno].btf = btf;
41c48f3a
AI
1561 regs[regno].btf_id = btf_id;
1562}
1563
5327ed3d 1564#define DEF_NOT_SUBREG (0)
61bd5218 1565static void init_reg_state(struct bpf_verifier_env *env,
f4d7e40a 1566 struct bpf_func_state *state)
17a52670 1567{
f4d7e40a 1568 struct bpf_reg_state *regs = state->regs;
17a52670
AS
1569 int i;
1570
dc503a8a 1571 for (i = 0; i < MAX_BPF_REG; i++) {
61bd5218 1572 mark_reg_not_init(env, regs, i);
dc503a8a 1573 regs[i].live = REG_LIVE_NONE;
679c782d 1574 regs[i].parent = NULL;
5327ed3d 1575 regs[i].subreg_def = DEF_NOT_SUBREG;
dc503a8a 1576 }
17a52670
AS
1577
1578 /* frame pointer */
f1174f77 1579 regs[BPF_REG_FP].type = PTR_TO_STACK;
61bd5218 1580 mark_reg_known_zero(env, regs, BPF_REG_FP);
f4d7e40a 1581 regs[BPF_REG_FP].frameno = state->frameno;
6760bf2d
DB
1582}
1583
f4d7e40a
AS
1584#define BPF_MAIN_FUNC (-1)
1585static void init_func_state(struct bpf_verifier_env *env,
1586 struct bpf_func_state *state,
1587 int callsite, int frameno, int subprogno)
1588{
1589 state->callsite = callsite;
1590 state->frameno = frameno;
1591 state->subprogno = subprogno;
1592 init_reg_state(env, state);
0f55f9ed 1593 mark_verifier_state_scratched(env);
f4d7e40a
AS
1594}
1595
bfc6bb74
AS
1596/* Similar to push_stack(), but for async callbacks */
1597static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
1598 int insn_idx, int prev_insn_idx,
1599 int subprog)
1600{
1601 struct bpf_verifier_stack_elem *elem;
1602 struct bpf_func_state *frame;
1603
1604 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1605 if (!elem)
1606 goto err;
1607
1608 elem->insn_idx = insn_idx;
1609 elem->prev_insn_idx = prev_insn_idx;
1610 elem->next = env->head;
1611 elem->log_pos = env->log.len_used;
1612 env->head = elem;
1613 env->stack_size++;
1614 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1615 verbose(env,
1616 "The sequence of %d jumps is too complex for async cb.\n",
1617 env->stack_size);
1618 goto err;
1619 }
1620 /* Unlike push_stack() do not copy_verifier_state().
1621 * The caller state doesn't matter.
1622 * This is async callback. It starts in a fresh stack.
1623 * Initialize it similar to do_check_common().
1624 */
1625 elem->st.branches = 1;
1626 frame = kzalloc(sizeof(*frame), GFP_KERNEL);
1627 if (!frame)
1628 goto err;
1629 init_func_state(env, frame,
1630 BPF_MAIN_FUNC /* callsite */,
1631 0 /* frameno within this callchain */,
1632 subprog /* subprog number within this prog */);
1633 elem->st.frame[0] = frame;
1634 return &elem->st;
1635err:
1636 free_verifier_state(env->cur_state, true);
1637 env->cur_state = NULL;
1638 /* pop all elements and return */
1639 while (!pop_stack(env, NULL, NULL, false));
1640 return NULL;
1641}
1642
1643
17a52670
AS
1644enum reg_arg_type {
1645 SRC_OP, /* register is used as source operand */
1646 DST_OP, /* register is used as destination operand */
1647 DST_OP_NO_MARK /* same as above, check only, don't mark */
1648};
1649
cc8b0b92
AS
1650static int cmp_subprogs(const void *a, const void *b)
1651{
9c8105bd
JW
1652 return ((struct bpf_subprog_info *)a)->start -
1653 ((struct bpf_subprog_info *)b)->start;
cc8b0b92
AS
1654}
1655
1656static int find_subprog(struct bpf_verifier_env *env, int off)
1657{
9c8105bd 1658 struct bpf_subprog_info *p;
cc8b0b92 1659
9c8105bd
JW
1660 p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1661 sizeof(env->subprog_info[0]), cmp_subprogs);
cc8b0b92
AS
1662 if (!p)
1663 return -ENOENT;
9c8105bd 1664 return p - env->subprog_info;
cc8b0b92
AS
1665
1666}
1667
1668static int add_subprog(struct bpf_verifier_env *env, int off)
1669{
1670 int insn_cnt = env->prog->len;
1671 int ret;
1672
1673 if (off >= insn_cnt || off < 0) {
1674 verbose(env, "call to invalid destination\n");
1675 return -EINVAL;
1676 }
1677 ret = find_subprog(env, off);
1678 if (ret >= 0)
282a0f46 1679 return ret;
4cb3d99c 1680 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
cc8b0b92
AS
1681 verbose(env, "too many subprograms\n");
1682 return -E2BIG;
1683 }
e6ac2450 1684 /* determine subprog starts. The end is one before the next starts */
9c8105bd
JW
1685 env->subprog_info[env->subprog_cnt++].start = off;
1686 sort(env->subprog_info, env->subprog_cnt,
1687 sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
282a0f46 1688 return env->subprog_cnt - 1;
cc8b0b92
AS
1689}
1690
2357672c
KKD
1691#define MAX_KFUNC_DESCS 256
1692#define MAX_KFUNC_BTFS 256
1693
e6ac2450
MKL
1694struct bpf_kfunc_desc {
1695 struct btf_func_model func_model;
1696 u32 func_id;
1697 s32 imm;
2357672c
KKD
1698 u16 offset;
1699};
1700
1701struct bpf_kfunc_btf {
1702 struct btf *btf;
1703 struct module *module;
1704 u16 offset;
e6ac2450
MKL
1705};
1706
e6ac2450
MKL
1707struct bpf_kfunc_desc_tab {
1708 struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
1709 u32 nr_descs;
1710};
1711
2357672c
KKD
1712struct bpf_kfunc_btf_tab {
1713 struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
1714 u32 nr_descs;
1715};
1716
1717static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
e6ac2450
MKL
1718{
1719 const struct bpf_kfunc_desc *d0 = a;
1720 const struct bpf_kfunc_desc *d1 = b;
1721
1722 /* func_id is not greater than BTF_MAX_TYPE */
2357672c
KKD
1723 return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
1724}
1725
1726static int kfunc_btf_cmp_by_off(const void *a, const void *b)
1727{
1728 const struct bpf_kfunc_btf *d0 = a;
1729 const struct bpf_kfunc_btf *d1 = b;
1730
1731 return d0->offset - d1->offset;
e6ac2450
MKL
1732}
1733
1734static const struct bpf_kfunc_desc *
2357672c 1735find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
e6ac2450
MKL
1736{
1737 struct bpf_kfunc_desc desc = {
1738 .func_id = func_id,
2357672c 1739 .offset = offset,
e6ac2450
MKL
1740 };
1741 struct bpf_kfunc_desc_tab *tab;
1742
1743 tab = prog->aux->kfunc_tab;
1744 return bsearch(&desc, tab->descs, tab->nr_descs,
2357672c
KKD
1745 sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
1746}
1747
1748static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
b202d844 1749 s16 offset)
2357672c
KKD
1750{
1751 struct bpf_kfunc_btf kf_btf = { .offset = offset };
1752 struct bpf_kfunc_btf_tab *tab;
1753 struct bpf_kfunc_btf *b;
1754 struct module *mod;
1755 struct btf *btf;
1756 int btf_fd;
1757
1758 tab = env->prog->aux->kfunc_btf_tab;
1759 b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
1760 sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
1761 if (!b) {
1762 if (tab->nr_descs == MAX_KFUNC_BTFS) {
1763 verbose(env, "too many different module BTFs\n");
1764 return ERR_PTR(-E2BIG);
1765 }
1766
1767 if (bpfptr_is_null(env->fd_array)) {
1768 verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
1769 return ERR_PTR(-EPROTO);
1770 }
1771
1772 if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
1773 offset * sizeof(btf_fd),
1774 sizeof(btf_fd)))
1775 return ERR_PTR(-EFAULT);
1776
1777 btf = btf_get_by_fd(btf_fd);
588cd7ef
KKD
1778 if (IS_ERR(btf)) {
1779 verbose(env, "invalid module BTF fd specified\n");
2357672c 1780 return btf;
588cd7ef 1781 }
2357672c
KKD
1782
1783 if (!btf_is_module(btf)) {
1784 verbose(env, "BTF fd for kfunc is not a module BTF\n");
1785 btf_put(btf);
1786 return ERR_PTR(-EINVAL);
1787 }
1788
1789 mod = btf_try_get_module(btf);
1790 if (!mod) {
1791 btf_put(btf);
1792 return ERR_PTR(-ENXIO);
1793 }
1794
1795 b = &tab->descs[tab->nr_descs++];
1796 b->btf = btf;
1797 b->module = mod;
1798 b->offset = offset;
1799
1800 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1801 kfunc_btf_cmp_by_off, NULL);
1802 }
2357672c 1803 return b->btf;
e6ac2450
MKL
1804}
1805
2357672c
KKD
1806void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
1807{
1808 if (!tab)
1809 return;
1810
1811 while (tab->nr_descs--) {
1812 module_put(tab->descs[tab->nr_descs].module);
1813 btf_put(tab->descs[tab->nr_descs].btf);
1814 }
1815 kfree(tab);
1816}
1817
43bf0878 1818static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2357672c 1819{
2357672c
KKD
1820 if (offset) {
1821 if (offset < 0) {
1822 /* In the future, this can be allowed to increase limit
1823 * of fd index into fd_array, interpreted as u16.
1824 */
1825 verbose(env, "negative offset disallowed for kernel module function call\n");
1826 return ERR_PTR(-EINVAL);
1827 }
1828
b202d844 1829 return __find_kfunc_desc_btf(env, offset);
2357672c
KKD
1830 }
1831 return btf_vmlinux ?: ERR_PTR(-ENOENT);
e6ac2450
MKL
1832}
1833
2357672c 1834static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
e6ac2450
MKL
1835{
1836 const struct btf_type *func, *func_proto;
2357672c 1837 struct bpf_kfunc_btf_tab *btf_tab;
e6ac2450
MKL
1838 struct bpf_kfunc_desc_tab *tab;
1839 struct bpf_prog_aux *prog_aux;
1840 struct bpf_kfunc_desc *desc;
1841 const char *func_name;
2357672c 1842 struct btf *desc_btf;
8cbf062a 1843 unsigned long call_imm;
e6ac2450
MKL
1844 unsigned long addr;
1845 int err;
1846
1847 prog_aux = env->prog->aux;
1848 tab = prog_aux->kfunc_tab;
2357672c 1849 btf_tab = prog_aux->kfunc_btf_tab;
e6ac2450
MKL
1850 if (!tab) {
1851 if (!btf_vmlinux) {
1852 verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
1853 return -ENOTSUPP;
1854 }
1855
1856 if (!env->prog->jit_requested) {
1857 verbose(env, "JIT is required for calling kernel function\n");
1858 return -ENOTSUPP;
1859 }
1860
1861 if (!bpf_jit_supports_kfunc_call()) {
1862 verbose(env, "JIT does not support calling kernel function\n");
1863 return -ENOTSUPP;
1864 }
1865
1866 if (!env->prog->gpl_compatible) {
1867 verbose(env, "cannot call kernel function from non-GPL compatible program\n");
1868 return -EINVAL;
1869 }
1870
1871 tab = kzalloc(sizeof(*tab), GFP_KERNEL);
1872 if (!tab)
1873 return -ENOMEM;
1874 prog_aux->kfunc_tab = tab;
1875 }
1876
a5d82727
KKD
1877 /* func_id == 0 is always invalid, but instead of returning an error, be
1878 * conservative and wait until the code elimination pass before returning
1879 * error, so that invalid calls that get pruned out can be in BPF programs
1880 * loaded from userspace. It is also required that offset be untouched
1881 * for such calls.
1882 */
1883 if (!func_id && !offset)
1884 return 0;
1885
2357672c
KKD
1886 if (!btf_tab && offset) {
1887 btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
1888 if (!btf_tab)
1889 return -ENOMEM;
1890 prog_aux->kfunc_btf_tab = btf_tab;
1891 }
1892
43bf0878 1893 desc_btf = find_kfunc_desc_btf(env, offset);
2357672c
KKD
1894 if (IS_ERR(desc_btf)) {
1895 verbose(env, "failed to find BTF for kernel function\n");
1896 return PTR_ERR(desc_btf);
1897 }
1898
1899 if (find_kfunc_desc(env->prog, func_id, offset))
e6ac2450
MKL
1900 return 0;
1901
1902 if (tab->nr_descs == MAX_KFUNC_DESCS) {
1903 verbose(env, "too many different kernel function calls\n");
1904 return -E2BIG;
1905 }
1906
2357672c 1907 func = btf_type_by_id(desc_btf, func_id);
e6ac2450
MKL
1908 if (!func || !btf_type_is_func(func)) {
1909 verbose(env, "kernel btf_id %u is not a function\n",
1910 func_id);
1911 return -EINVAL;
1912 }
2357672c 1913 func_proto = btf_type_by_id(desc_btf, func->type);
e6ac2450
MKL
1914 if (!func_proto || !btf_type_is_func_proto(func_proto)) {
1915 verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
1916 func_id);
1917 return -EINVAL;
1918 }
1919
2357672c 1920 func_name = btf_name_by_offset(desc_btf, func->name_off);
e6ac2450
MKL
1921 addr = kallsyms_lookup_name(func_name);
1922 if (!addr) {
1923 verbose(env, "cannot find address for kernel function %s\n",
1924 func_name);
1925 return -EINVAL;
1926 }
1927
8cbf062a
HT
1928 call_imm = BPF_CALL_IMM(addr);
1929 /* Check whether or not the relative offset overflows desc->imm */
1930 if ((unsigned long)(s32)call_imm != call_imm) {
1931 verbose(env, "address of kernel function %s is out of range\n",
1932 func_name);
1933 return -EINVAL;
1934 }
1935
e6ac2450
MKL
1936 desc = &tab->descs[tab->nr_descs++];
1937 desc->func_id = func_id;
8cbf062a 1938 desc->imm = call_imm;
2357672c
KKD
1939 desc->offset = offset;
1940 err = btf_distill_func_proto(&env->log, desc_btf,
e6ac2450
MKL
1941 func_proto, func_name,
1942 &desc->func_model);
1943 if (!err)
1944 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2357672c 1945 kfunc_desc_cmp_by_id_off, NULL);
e6ac2450
MKL
1946 return err;
1947}
1948
1949static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
1950{
1951 const struct bpf_kfunc_desc *d0 = a;
1952 const struct bpf_kfunc_desc *d1 = b;
1953
1954 if (d0->imm > d1->imm)
1955 return 1;
1956 else if (d0->imm < d1->imm)
1957 return -1;
1958 return 0;
1959}
1960
1961static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
1962{
1963 struct bpf_kfunc_desc_tab *tab;
1964
1965 tab = prog->aux->kfunc_tab;
1966 if (!tab)
1967 return;
1968
1969 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1970 kfunc_desc_cmp_by_imm, NULL);
1971}
1972
1973bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
1974{
1975 return !!prog->aux->kfunc_tab;
1976}
1977
1978const struct btf_func_model *
1979bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
1980 const struct bpf_insn *insn)
1981{
1982 const struct bpf_kfunc_desc desc = {
1983 .imm = insn->imm,
1984 };
1985 const struct bpf_kfunc_desc *res;
1986 struct bpf_kfunc_desc_tab *tab;
1987
1988 tab = prog->aux->kfunc_tab;
1989 res = bsearch(&desc, tab->descs, tab->nr_descs,
1990 sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
1991
1992 return res ? &res->func_model : NULL;
1993}
1994
1995static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
cc8b0b92 1996{
9c8105bd 1997 struct bpf_subprog_info *subprog = env->subprog_info;
cc8b0b92 1998 struct bpf_insn *insn = env->prog->insnsi;
e6ac2450 1999 int i, ret, insn_cnt = env->prog->len;
cc8b0b92 2000
f910cefa
JW
2001 /* Add entry function. */
2002 ret = add_subprog(env, 0);
e6ac2450 2003 if (ret)
f910cefa
JW
2004 return ret;
2005
e6ac2450
MKL
2006 for (i = 0; i < insn_cnt; i++, insn++) {
2007 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
2008 !bpf_pseudo_kfunc_call(insn))
cc8b0b92 2009 continue;
e6ac2450 2010
2c78ee89 2011 if (!env->bpf_capable) {
e6ac2450 2012 verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
cc8b0b92
AS
2013 return -EPERM;
2014 }
e6ac2450 2015
3990ed4c 2016 if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
e6ac2450 2017 ret = add_subprog(env, i + insn->imm + 1);
3990ed4c 2018 else
2357672c 2019 ret = add_kfunc_call(env, insn->imm, insn->off);
e6ac2450 2020
cc8b0b92
AS
2021 if (ret < 0)
2022 return ret;
2023 }
2024
4cb3d99c
JW
2025 /* Add a fake 'exit' subprog which could simplify subprog iteration
2026 * logic. 'subprog_cnt' should not be increased.
2027 */
2028 subprog[env->subprog_cnt].start = insn_cnt;
2029
06ee7115 2030 if (env->log.level & BPF_LOG_LEVEL2)
cc8b0b92 2031 for (i = 0; i < env->subprog_cnt; i++)
9c8105bd 2032 verbose(env, "func#%d @%d\n", i, subprog[i].start);
cc8b0b92 2033
e6ac2450
MKL
2034 return 0;
2035}
2036
2037static int check_subprogs(struct bpf_verifier_env *env)
2038{
2039 int i, subprog_start, subprog_end, off, cur_subprog = 0;
2040 struct bpf_subprog_info *subprog = env->subprog_info;
2041 struct bpf_insn *insn = env->prog->insnsi;
2042 int insn_cnt = env->prog->len;
2043
cc8b0b92 2044 /* now check that all jumps are within the same subprog */
4cb3d99c
JW
2045 subprog_start = subprog[cur_subprog].start;
2046 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
2047 for (i = 0; i < insn_cnt; i++) {
2048 u8 code = insn[i].code;
2049
7f6e4312
MF
2050 if (code == (BPF_JMP | BPF_CALL) &&
2051 insn[i].imm == BPF_FUNC_tail_call &&
2052 insn[i].src_reg != BPF_PSEUDO_CALL)
2053 subprog[cur_subprog].has_tail_call = true;
09b28d76
AS
2054 if (BPF_CLASS(code) == BPF_LD &&
2055 (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2056 subprog[cur_subprog].has_ld_abs = true;
092ed096 2057 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
cc8b0b92
AS
2058 goto next;
2059 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
2060 goto next;
2061 off = i + insn[i].off + 1;
2062 if (off < subprog_start || off >= subprog_end) {
2063 verbose(env, "jump out of range from insn %d to %d\n", i, off);
2064 return -EINVAL;
2065 }
2066next:
2067 if (i == subprog_end - 1) {
2068 /* to avoid fall-through from one subprog into another
2069 * the last insn of the subprog should be either exit
2070 * or unconditional jump back
2071 */
2072 if (code != (BPF_JMP | BPF_EXIT) &&
2073 code != (BPF_JMP | BPF_JA)) {
2074 verbose(env, "last insn is not an exit or jmp\n");
2075 return -EINVAL;
2076 }
2077 subprog_start = subprog_end;
4cb3d99c
JW
2078 cur_subprog++;
2079 if (cur_subprog < env->subprog_cnt)
9c8105bd 2080 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
2081 }
2082 }
2083 return 0;
2084}
2085
679c782d
EC
2086/* Parentage chain of this register (or stack slot) should take care of all
2087 * issues like callee-saved registers, stack slot allocation time, etc.
2088 */
f4d7e40a 2089static int mark_reg_read(struct bpf_verifier_env *env,
679c782d 2090 const struct bpf_reg_state *state,
5327ed3d 2091 struct bpf_reg_state *parent, u8 flag)
f4d7e40a
AS
2092{
2093 bool writes = parent == state->parent; /* Observe write marks */
06ee7115 2094 int cnt = 0;
dc503a8a
EC
2095
2096 while (parent) {
2097 /* if read wasn't screened by an earlier write ... */
679c782d 2098 if (writes && state->live & REG_LIVE_WRITTEN)
dc503a8a 2099 break;
9242b5f5
AS
2100 if (parent->live & REG_LIVE_DONE) {
2101 verbose(env, "verifier BUG type %s var_off %lld off %d\n",
c25b2ae1 2102 reg_type_str(env, parent->type),
9242b5f5
AS
2103 parent->var_off.value, parent->off);
2104 return -EFAULT;
2105 }
5327ed3d
JW
2106 /* The first condition is more likely to be true than the
2107 * second, checked it first.
2108 */
2109 if ((parent->live & REG_LIVE_READ) == flag ||
2110 parent->live & REG_LIVE_READ64)
25af32da
AS
2111 /* The parentage chain never changes and
2112 * this parent was already marked as LIVE_READ.
2113 * There is no need to keep walking the chain again and
2114 * keep re-marking all parents as LIVE_READ.
2115 * This case happens when the same register is read
2116 * multiple times without writes into it in-between.
5327ed3d
JW
2117 * Also, if parent has the stronger REG_LIVE_READ64 set,
2118 * then no need to set the weak REG_LIVE_READ32.
25af32da
AS
2119 */
2120 break;
dc503a8a 2121 /* ... then we depend on parent's value */
5327ed3d
JW
2122 parent->live |= flag;
2123 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
2124 if (flag == REG_LIVE_READ64)
2125 parent->live &= ~REG_LIVE_READ32;
dc503a8a
EC
2126 state = parent;
2127 parent = state->parent;
f4d7e40a 2128 writes = true;
06ee7115 2129 cnt++;
dc503a8a 2130 }
06ee7115
AS
2131
2132 if (env->longest_mark_read_walk < cnt)
2133 env->longest_mark_read_walk = cnt;
f4d7e40a 2134 return 0;
dc503a8a
EC
2135}
2136
5327ed3d
JW
2137/* This function is supposed to be used by the following 32-bit optimization
2138 * code only. It returns TRUE if the source or destination register operates
2139 * on 64-bit, otherwise return FALSE.
2140 */
2141static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
2142 u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
2143{
2144 u8 code, class, op;
2145
2146 code = insn->code;
2147 class = BPF_CLASS(code);
2148 op = BPF_OP(code);
2149 if (class == BPF_JMP) {
2150 /* BPF_EXIT for "main" will reach here. Return TRUE
2151 * conservatively.
2152 */
2153 if (op == BPF_EXIT)
2154 return true;
2155 if (op == BPF_CALL) {
2156 /* BPF to BPF call will reach here because of marking
2157 * caller saved clobber with DST_OP_NO_MARK for which we
2158 * don't care the register def because they are anyway
2159 * marked as NOT_INIT already.
2160 */
2161 if (insn->src_reg == BPF_PSEUDO_CALL)
2162 return false;
2163 /* Helper call will reach here because of arg type
2164 * check, conservatively return TRUE.
2165 */
2166 if (t == SRC_OP)
2167 return true;
2168
2169 return false;
2170 }
2171 }
2172
2173 if (class == BPF_ALU64 || class == BPF_JMP ||
2174 /* BPF_END always use BPF_ALU class. */
2175 (class == BPF_ALU && op == BPF_END && insn->imm == 64))
2176 return true;
2177
2178 if (class == BPF_ALU || class == BPF_JMP32)
2179 return false;
2180
2181 if (class == BPF_LDX) {
2182 if (t != SRC_OP)
2183 return BPF_SIZE(code) == BPF_DW;
2184 /* LDX source must be ptr. */
2185 return true;
2186 }
2187
2188 if (class == BPF_STX) {
83a28819
IL
2189 /* BPF_STX (including atomic variants) has multiple source
2190 * operands, one of which is a ptr. Check whether the caller is
2191 * asking about it.
2192 */
2193 if (t == SRC_OP && reg->type != SCALAR_VALUE)
5327ed3d
JW
2194 return true;
2195 return BPF_SIZE(code) == BPF_DW;
2196 }
2197
2198 if (class == BPF_LD) {
2199 u8 mode = BPF_MODE(code);
2200
2201 /* LD_IMM64 */
2202 if (mode == BPF_IMM)
2203 return true;
2204
2205 /* Both LD_IND and LD_ABS return 32-bit data. */
2206 if (t != SRC_OP)
2207 return false;
2208
2209 /* Implicit ctx ptr. */
2210 if (regno == BPF_REG_6)
2211 return true;
2212
2213 /* Explicit source could be any width. */
2214 return true;
2215 }
2216
2217 if (class == BPF_ST)
2218 /* The only source register for BPF_ST is a ptr. */
2219 return true;
2220
2221 /* Conservatively return true at default. */
2222 return true;
2223}
2224
83a28819
IL
2225/* Return the regno defined by the insn, or -1. */
2226static int insn_def_regno(const struct bpf_insn *insn)
b325fbca 2227{
83a28819
IL
2228 switch (BPF_CLASS(insn->code)) {
2229 case BPF_JMP:
2230 case BPF_JMP32:
2231 case BPF_ST:
2232 return -1;
2233 case BPF_STX:
2234 if (BPF_MODE(insn->code) == BPF_ATOMIC &&
2235 (insn->imm & BPF_FETCH)) {
2236 if (insn->imm == BPF_CMPXCHG)
2237 return BPF_REG_0;
2238 else
2239 return insn->src_reg;
2240 } else {
2241 return -1;
2242 }
2243 default:
2244 return insn->dst_reg;
2245 }
b325fbca
JW
2246}
2247
2248/* Return TRUE if INSN has defined any 32-bit value explicitly. */
2249static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
2250{
83a28819
IL
2251 int dst_reg = insn_def_regno(insn);
2252
2253 if (dst_reg == -1)
b325fbca
JW
2254 return false;
2255
83a28819 2256 return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
b325fbca
JW
2257}
2258
5327ed3d
JW
2259static void mark_insn_zext(struct bpf_verifier_env *env,
2260 struct bpf_reg_state *reg)
2261{
2262 s32 def_idx = reg->subreg_def;
2263
2264 if (def_idx == DEF_NOT_SUBREG)
2265 return;
2266
2267 env->insn_aux_data[def_idx - 1].zext_dst = true;
2268 /* The dst will be zero extended, so won't be sub-register anymore. */
2269 reg->subreg_def = DEF_NOT_SUBREG;
2270}
2271
dc503a8a 2272static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
17a52670
AS
2273 enum reg_arg_type t)
2274{
f4d7e40a
AS
2275 struct bpf_verifier_state *vstate = env->cur_state;
2276 struct bpf_func_state *state = vstate->frame[vstate->curframe];
5327ed3d 2277 struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
c342dc10 2278 struct bpf_reg_state *reg, *regs = state->regs;
5327ed3d 2279 bool rw64;
dc503a8a 2280
17a52670 2281 if (regno >= MAX_BPF_REG) {
61bd5218 2282 verbose(env, "R%d is invalid\n", regno);
17a52670
AS
2283 return -EINVAL;
2284 }
2285
0f55f9ed
CL
2286 mark_reg_scratched(env, regno);
2287
c342dc10 2288 reg = &regs[regno];
5327ed3d 2289 rw64 = is_reg64(env, insn, regno, reg, t);
17a52670
AS
2290 if (t == SRC_OP) {
2291 /* check whether register used as source operand can be read */
c342dc10 2292 if (reg->type == NOT_INIT) {
61bd5218 2293 verbose(env, "R%d !read_ok\n", regno);
17a52670
AS
2294 return -EACCES;
2295 }
679c782d 2296 /* We don't need to worry about FP liveness because it's read-only */
c342dc10
JW
2297 if (regno == BPF_REG_FP)
2298 return 0;
2299
5327ed3d
JW
2300 if (rw64)
2301 mark_insn_zext(env, reg);
2302
2303 return mark_reg_read(env, reg, reg->parent,
2304 rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
17a52670
AS
2305 } else {
2306 /* check whether register used as dest operand can be written to */
2307 if (regno == BPF_REG_FP) {
61bd5218 2308 verbose(env, "frame pointer is read only\n");
17a52670
AS
2309 return -EACCES;
2310 }
c342dc10 2311 reg->live |= REG_LIVE_WRITTEN;
5327ed3d 2312 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
17a52670 2313 if (t == DST_OP)
61bd5218 2314 mark_reg_unknown(env, regs, regno);
17a52670
AS
2315 }
2316 return 0;
2317}
2318
b5dc0163
AS
2319/* for any branch, call, exit record the history of jmps in the given state */
2320static int push_jmp_history(struct bpf_verifier_env *env,
2321 struct bpf_verifier_state *cur)
2322{
2323 u32 cnt = cur->jmp_history_cnt;
2324 struct bpf_idx_pair *p;
2325
2326 cnt++;
2327 p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
2328 if (!p)
2329 return -ENOMEM;
2330 p[cnt - 1].idx = env->insn_idx;
2331 p[cnt - 1].prev_idx = env->prev_insn_idx;
2332 cur->jmp_history = p;
2333 cur->jmp_history_cnt = cnt;
2334 return 0;
2335}
2336
2337/* Backtrack one insn at a time. If idx is not at the top of recorded
2338 * history then previous instruction came from straight line execution.
2339 */
2340static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
2341 u32 *history)
2342{
2343 u32 cnt = *history;
2344
2345 if (cnt && st->jmp_history[cnt - 1].idx == i) {
2346 i = st->jmp_history[cnt - 1].prev_idx;
2347 (*history)--;
2348 } else {
2349 i--;
2350 }
2351 return i;
2352}
2353
e6ac2450
MKL
2354static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
2355{
2356 const struct btf_type *func;
2357672c 2357 struct btf *desc_btf;
e6ac2450
MKL
2358
2359 if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
2360 return NULL;
2361
43bf0878 2362 desc_btf = find_kfunc_desc_btf(data, insn->off);
2357672c
KKD
2363 if (IS_ERR(desc_btf))
2364 return "<error>";
2365
2366 func = btf_type_by_id(desc_btf, insn->imm);
2367 return btf_name_by_offset(desc_btf, func->name_off);
e6ac2450
MKL
2368}
2369
b5dc0163
AS
2370/* For given verifier state backtrack_insn() is called from the last insn to
2371 * the first insn. Its purpose is to compute a bitmask of registers and
2372 * stack slots that needs precision in the parent verifier state.
2373 */
2374static int backtrack_insn(struct bpf_verifier_env *env, int idx,
2375 u32 *reg_mask, u64 *stack_mask)
2376{
2377 const struct bpf_insn_cbs cbs = {
e6ac2450 2378 .cb_call = disasm_kfunc_name,
b5dc0163
AS
2379 .cb_print = verbose,
2380 .private_data = env,
2381 };
2382 struct bpf_insn *insn = env->prog->insnsi + idx;
2383 u8 class = BPF_CLASS(insn->code);
2384 u8 opcode = BPF_OP(insn->code);
2385 u8 mode = BPF_MODE(insn->code);
2386 u32 dreg = 1u << insn->dst_reg;
2387 u32 sreg = 1u << insn->src_reg;
2388 u32 spi;
2389
2390 if (insn->code == 0)
2391 return 0;
496f3324 2392 if (env->log.level & BPF_LOG_LEVEL2) {
b5dc0163
AS
2393 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
2394 verbose(env, "%d: ", idx);
2395 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
2396 }
2397
2398 if (class == BPF_ALU || class == BPF_ALU64) {
2399 if (!(*reg_mask & dreg))
2400 return 0;
2401 if (opcode == BPF_MOV) {
2402 if (BPF_SRC(insn->code) == BPF_X) {
2403 /* dreg = sreg
2404 * dreg needs precision after this insn
2405 * sreg needs precision before this insn
2406 */
2407 *reg_mask &= ~dreg;
2408 *reg_mask |= sreg;
2409 } else {
2410 /* dreg = K
2411 * dreg needs precision after this insn.
2412 * Corresponding register is already marked
2413 * as precise=true in this verifier state.
2414 * No further markings in parent are necessary
2415 */
2416 *reg_mask &= ~dreg;
2417 }
2418 } else {
2419 if (BPF_SRC(insn->code) == BPF_X) {
2420 /* dreg += sreg
2421 * both dreg and sreg need precision
2422 * before this insn
2423 */
2424 *reg_mask |= sreg;
2425 } /* else dreg += K
2426 * dreg still needs precision before this insn
2427 */
2428 }
2429 } else if (class == BPF_LDX) {
2430 if (!(*reg_mask & dreg))
2431 return 0;
2432 *reg_mask &= ~dreg;
2433
2434 /* scalars can only be spilled into stack w/o losing precision.
2435 * Load from any other memory can be zero extended.
2436 * The desire to keep that precision is already indicated
2437 * by 'precise' mark in corresponding register of this state.
2438 * No further tracking necessary.
2439 */
2440 if (insn->src_reg != BPF_REG_FP)
2441 return 0;
b5dc0163
AS
2442
2443 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
2444 * that [fp - off] slot contains scalar that needs to be
2445 * tracked with precision
2446 */
2447 spi = (-insn->off - 1) / BPF_REG_SIZE;
2448 if (spi >= 64) {
2449 verbose(env, "BUG spi %d\n", spi);
2450 WARN_ONCE(1, "verifier backtracking bug");
2451 return -EFAULT;
2452 }
2453 *stack_mask |= 1ull << spi;
b3b50f05 2454 } else if (class == BPF_STX || class == BPF_ST) {
b5dc0163 2455 if (*reg_mask & dreg)
b3b50f05 2456 /* stx & st shouldn't be using _scalar_ dst_reg
b5dc0163
AS
2457 * to access memory. It means backtracking
2458 * encountered a case of pointer subtraction.
2459 */
2460 return -ENOTSUPP;
2461 /* scalars can only be spilled into stack */
2462 if (insn->dst_reg != BPF_REG_FP)
2463 return 0;
b5dc0163
AS
2464 spi = (-insn->off - 1) / BPF_REG_SIZE;
2465 if (spi >= 64) {
2466 verbose(env, "BUG spi %d\n", spi);
2467 WARN_ONCE(1, "verifier backtracking bug");
2468 return -EFAULT;
2469 }
2470 if (!(*stack_mask & (1ull << spi)))
2471 return 0;
2472 *stack_mask &= ~(1ull << spi);
b3b50f05
AN
2473 if (class == BPF_STX)
2474 *reg_mask |= sreg;
b5dc0163
AS
2475 } else if (class == BPF_JMP || class == BPF_JMP32) {
2476 if (opcode == BPF_CALL) {
2477 if (insn->src_reg == BPF_PSEUDO_CALL)
2478 return -ENOTSUPP;
2479 /* regular helper call sets R0 */
2480 *reg_mask &= ~1;
2481 if (*reg_mask & 0x3f) {
2482 /* if backtracing was looking for registers R1-R5
2483 * they should have been found already.
2484 */
2485 verbose(env, "BUG regs %x\n", *reg_mask);
2486 WARN_ONCE(1, "verifier backtracking bug");
2487 return -EFAULT;
2488 }
2489 } else if (opcode == BPF_EXIT) {
2490 return -ENOTSUPP;
2491 }
2492 } else if (class == BPF_LD) {
2493 if (!(*reg_mask & dreg))
2494 return 0;
2495 *reg_mask &= ~dreg;
2496 /* It's ld_imm64 or ld_abs or ld_ind.
2497 * For ld_imm64 no further tracking of precision
2498 * into parent is necessary
2499 */
2500 if (mode == BPF_IND || mode == BPF_ABS)
2501 /* to be analyzed */
2502 return -ENOTSUPP;
b5dc0163
AS
2503 }
2504 return 0;
2505}
2506
2507/* the scalar precision tracking algorithm:
2508 * . at the start all registers have precise=false.
2509 * . scalar ranges are tracked as normal through alu and jmp insns.
2510 * . once precise value of the scalar register is used in:
2511 * . ptr + scalar alu
2512 * . if (scalar cond K|scalar)
2513 * . helper_call(.., scalar, ...) where ARG_CONST is expected
2514 * backtrack through the verifier states and mark all registers and
2515 * stack slots with spilled constants that these scalar regisers
2516 * should be precise.
2517 * . during state pruning two registers (or spilled stack slots)
2518 * are equivalent if both are not precise.
2519 *
2520 * Note the verifier cannot simply walk register parentage chain,
2521 * since many different registers and stack slots could have been
2522 * used to compute single precise scalar.
2523 *
2524 * The approach of starting with precise=true for all registers and then
2525 * backtrack to mark a register as not precise when the verifier detects
2526 * that program doesn't care about specific value (e.g., when helper
2527 * takes register as ARG_ANYTHING parameter) is not safe.
2528 *
2529 * It's ok to walk single parentage chain of the verifier states.
2530 * It's possible that this backtracking will go all the way till 1st insn.
2531 * All other branches will be explored for needing precision later.
2532 *
2533 * The backtracking needs to deal with cases like:
2534 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
2535 * r9 -= r8
2536 * r5 = r9
2537 * if r5 > 0x79f goto pc+7
2538 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
2539 * r5 += 1
2540 * ...
2541 * call bpf_perf_event_output#25
2542 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
2543 *
2544 * and this case:
2545 * r6 = 1
2546 * call foo // uses callee's r6 inside to compute r0
2547 * r0 += r6
2548 * if r0 == 0 goto
2549 *
2550 * to track above reg_mask/stack_mask needs to be independent for each frame.
2551 *
2552 * Also if parent's curframe > frame where backtracking started,
2553 * the verifier need to mark registers in both frames, otherwise callees
2554 * may incorrectly prune callers. This is similar to
2555 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
2556 *
2557 * For now backtracking falls back into conservative marking.
2558 */
2559static void mark_all_scalars_precise(struct bpf_verifier_env *env,
2560 struct bpf_verifier_state *st)
2561{
2562 struct bpf_func_state *func;
2563 struct bpf_reg_state *reg;
2564 int i, j;
2565
2566 /* big hammer: mark all scalars precise in this path.
2567 * pop_stack may still get !precise scalars.
2568 */
2569 for (; st; st = st->parent)
2570 for (i = 0; i <= st->curframe; i++) {
2571 func = st->frame[i];
2572 for (j = 0; j < BPF_REG_FP; j++) {
2573 reg = &func->regs[j];
2574 if (reg->type != SCALAR_VALUE)
2575 continue;
2576 reg->precise = true;
2577 }
2578 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
27113c59 2579 if (!is_spilled_reg(&func->stack[j]))
b5dc0163
AS
2580 continue;
2581 reg = &func->stack[j].spilled_ptr;
2582 if (reg->type != SCALAR_VALUE)
2583 continue;
2584 reg->precise = true;
2585 }
2586 }
2587}
2588
a3ce685d
AS
2589static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
2590 int spi)
b5dc0163
AS
2591{
2592 struct bpf_verifier_state *st = env->cur_state;
2593 int first_idx = st->first_insn_idx;
2594 int last_idx = env->insn_idx;
2595 struct bpf_func_state *func;
2596 struct bpf_reg_state *reg;
a3ce685d
AS
2597 u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2598 u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
b5dc0163 2599 bool skip_first = true;
a3ce685d 2600 bool new_marks = false;
b5dc0163
AS
2601 int i, err;
2602
2c78ee89 2603 if (!env->bpf_capable)
b5dc0163
AS
2604 return 0;
2605
2606 func = st->frame[st->curframe];
a3ce685d
AS
2607 if (regno >= 0) {
2608 reg = &func->regs[regno];
2609 if (reg->type != SCALAR_VALUE) {
2610 WARN_ONCE(1, "backtracing misuse");
2611 return -EFAULT;
2612 }
2613 if (!reg->precise)
2614 new_marks = true;
2615 else
2616 reg_mask = 0;
2617 reg->precise = true;
b5dc0163 2618 }
b5dc0163 2619
a3ce685d 2620 while (spi >= 0) {
27113c59 2621 if (!is_spilled_reg(&func->stack[spi])) {
a3ce685d
AS
2622 stack_mask = 0;
2623 break;
2624 }
2625 reg = &func->stack[spi].spilled_ptr;
2626 if (reg->type != SCALAR_VALUE) {
2627 stack_mask = 0;
2628 break;
2629 }
2630 if (!reg->precise)
2631 new_marks = true;
2632 else
2633 stack_mask = 0;
2634 reg->precise = true;
2635 break;
2636 }
2637
2638 if (!new_marks)
2639 return 0;
2640 if (!reg_mask && !stack_mask)
2641 return 0;
b5dc0163
AS
2642 for (;;) {
2643 DECLARE_BITMAP(mask, 64);
b5dc0163
AS
2644 u32 history = st->jmp_history_cnt;
2645
496f3324 2646 if (env->log.level & BPF_LOG_LEVEL2)
b5dc0163
AS
2647 verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2648 for (i = last_idx;;) {
2649 if (skip_first) {
2650 err = 0;
2651 skip_first = false;
2652 } else {
2653 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2654 }
2655 if (err == -ENOTSUPP) {
2656 mark_all_scalars_precise(env, st);
2657 return 0;
2658 } else if (err) {
2659 return err;
2660 }
2661 if (!reg_mask && !stack_mask)
2662 /* Found assignment(s) into tracked register in this state.
2663 * Since this state is already marked, just return.
2664 * Nothing to be tracked further in the parent state.
2665 */
2666 return 0;
2667 if (i == first_idx)
2668 break;
2669 i = get_prev_insn_idx(st, i, &history);
2670 if (i >= env->prog->len) {
2671 /* This can happen if backtracking reached insn 0
2672 * and there are still reg_mask or stack_mask
2673 * to backtrack.
2674 * It means the backtracking missed the spot where
2675 * particular register was initialized with a constant.
2676 */
2677 verbose(env, "BUG backtracking idx %d\n", i);
2678 WARN_ONCE(1, "verifier backtracking bug");
2679 return -EFAULT;
2680 }
2681 }
2682 st = st->parent;
2683 if (!st)
2684 break;
2685
a3ce685d 2686 new_marks = false;
b5dc0163
AS
2687 func = st->frame[st->curframe];
2688 bitmap_from_u64(mask, reg_mask);
2689 for_each_set_bit(i, mask, 32) {
2690 reg = &func->regs[i];
a3ce685d
AS
2691 if (reg->type != SCALAR_VALUE) {
2692 reg_mask &= ~(1u << i);
b5dc0163 2693 continue;
a3ce685d 2694 }
b5dc0163
AS
2695 if (!reg->precise)
2696 new_marks = true;
2697 reg->precise = true;
2698 }
2699
2700 bitmap_from_u64(mask, stack_mask);
2701 for_each_set_bit(i, mask, 64) {
2702 if (i >= func->allocated_stack / BPF_REG_SIZE) {
2339cd6c
AS
2703 /* the sequence of instructions:
2704 * 2: (bf) r3 = r10
2705 * 3: (7b) *(u64 *)(r3 -8) = r0
2706 * 4: (79) r4 = *(u64 *)(r10 -8)
2707 * doesn't contain jmps. It's backtracked
2708 * as a single block.
2709 * During backtracking insn 3 is not recognized as
2710 * stack access, so at the end of backtracking
2711 * stack slot fp-8 is still marked in stack_mask.
2712 * However the parent state may not have accessed
2713 * fp-8 and it's "unallocated" stack space.
2714 * In such case fallback to conservative.
b5dc0163 2715 */
2339cd6c
AS
2716 mark_all_scalars_precise(env, st);
2717 return 0;
b5dc0163
AS
2718 }
2719
27113c59 2720 if (!is_spilled_reg(&func->stack[i])) {
a3ce685d 2721 stack_mask &= ~(1ull << i);
b5dc0163 2722 continue;
a3ce685d 2723 }
b5dc0163 2724 reg = &func->stack[i].spilled_ptr;
a3ce685d
AS
2725 if (reg->type != SCALAR_VALUE) {
2726 stack_mask &= ~(1ull << i);
b5dc0163 2727 continue;
a3ce685d 2728 }
b5dc0163
AS
2729 if (!reg->precise)
2730 new_marks = true;
2731 reg->precise = true;
2732 }
496f3324 2733 if (env->log.level & BPF_LOG_LEVEL2) {
2e576648 2734 verbose(env, "parent %s regs=%x stack=%llx marks:",
b5dc0163
AS
2735 new_marks ? "didn't have" : "already had",
2736 reg_mask, stack_mask);
2e576648 2737 print_verifier_state(env, func, true);
b5dc0163
AS
2738 }
2739
a3ce685d
AS
2740 if (!reg_mask && !stack_mask)
2741 break;
b5dc0163
AS
2742 if (!new_marks)
2743 break;
2744
2745 last_idx = st->last_insn_idx;
2746 first_idx = st->first_insn_idx;
2747 }
2748 return 0;
2749}
2750
a3ce685d
AS
2751static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2752{
2753 return __mark_chain_precision(env, regno, -1);
2754}
2755
2756static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2757{
2758 return __mark_chain_precision(env, -1, spi);
2759}
b5dc0163 2760
1be7f75d
AS
2761static bool is_spillable_regtype(enum bpf_reg_type type)
2762{
c25b2ae1 2763 switch (base_type(type)) {
1be7f75d 2764 case PTR_TO_MAP_VALUE:
1be7f75d
AS
2765 case PTR_TO_STACK:
2766 case PTR_TO_CTX:
969bf05e 2767 case PTR_TO_PACKET:
de8f3a83 2768 case PTR_TO_PACKET_META:
969bf05e 2769 case PTR_TO_PACKET_END:
d58e468b 2770 case PTR_TO_FLOW_KEYS:
1be7f75d 2771 case CONST_PTR_TO_MAP:
c64b7983 2772 case PTR_TO_SOCKET:
46f8bc92 2773 case PTR_TO_SOCK_COMMON:
655a51e5 2774 case PTR_TO_TCP_SOCK:
fada7fdc 2775 case PTR_TO_XDP_SOCK:
65726b5b 2776 case PTR_TO_BTF_ID:
20b2aff4 2777 case PTR_TO_BUF:
744ea4e3 2778 case PTR_TO_MEM:
69c087ba
YS
2779 case PTR_TO_FUNC:
2780 case PTR_TO_MAP_KEY:
1be7f75d
AS
2781 return true;
2782 default:
2783 return false;
2784 }
2785}
2786
cc2b14d5
AS
2787/* Does this register contain a constant zero? */
2788static bool register_is_null(struct bpf_reg_state *reg)
2789{
2790 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2791}
2792
f7cf25b2
AS
2793static bool register_is_const(struct bpf_reg_state *reg)
2794{
2795 return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2796}
2797
5689d49b
YS
2798static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2799{
2800 return tnum_is_unknown(reg->var_off) &&
2801 reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2802 reg->umin_value == 0 && reg->umax_value == U64_MAX &&
2803 reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
2804 reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2805}
2806
2807static bool register_is_bounded(struct bpf_reg_state *reg)
2808{
2809 return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2810}
2811
6e7e63cb
JH
2812static bool __is_pointer_value(bool allow_ptr_leaks,
2813 const struct bpf_reg_state *reg)
2814{
2815 if (allow_ptr_leaks)
2816 return false;
2817
2818 return reg->type != SCALAR_VALUE;
2819}
2820
f7cf25b2 2821static void save_register_state(struct bpf_func_state *state,
354e8f19
MKL
2822 int spi, struct bpf_reg_state *reg,
2823 int size)
f7cf25b2
AS
2824{
2825 int i;
2826
2827 state->stack[spi].spilled_ptr = *reg;
354e8f19
MKL
2828 if (size == BPF_REG_SIZE)
2829 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
f7cf25b2 2830
354e8f19
MKL
2831 for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
2832 state->stack[spi].slot_type[i - 1] = STACK_SPILL;
f7cf25b2 2833
354e8f19
MKL
2834 /* size < 8 bytes spill */
2835 for (; i; i--)
2836 scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
f7cf25b2
AS
2837}
2838
01f810ac 2839/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
17a52670
AS
2840 * stack boundary and alignment are checked in check_mem_access()
2841 */
01f810ac
AM
2842static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2843 /* stack frame we're writing to */
2844 struct bpf_func_state *state,
2845 int off, int size, int value_regno,
2846 int insn_idx)
17a52670 2847{
f4d7e40a 2848 struct bpf_func_state *cur; /* state of the current function */
638f5b90 2849 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
b5dc0163 2850 u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
f7cf25b2 2851 struct bpf_reg_state *reg = NULL;
638f5b90 2852
c69431aa 2853 err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
638f5b90
AS
2854 if (err)
2855 return err;
9c399760
AS
2856 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
2857 * so it's aligned access and [off, off + size) are within stack limits
2858 */
638f5b90
AS
2859 if (!env->allow_ptr_leaks &&
2860 state->stack[spi].slot_type[0] == STACK_SPILL &&
2861 size != BPF_REG_SIZE) {
2862 verbose(env, "attempt to corrupt spilled pointer on stack\n");
2863 return -EACCES;
2864 }
17a52670 2865
f4d7e40a 2866 cur = env->cur_state->frame[env->cur_state->curframe];
f7cf25b2
AS
2867 if (value_regno >= 0)
2868 reg = &cur->regs[value_regno];
2039f26f
DB
2869 if (!env->bypass_spec_v4) {
2870 bool sanitize = reg && is_spillable_regtype(reg->type);
2871
2872 for (i = 0; i < size; i++) {
2873 if (state->stack[spi].slot_type[i] == STACK_INVALID) {
2874 sanitize = true;
2875 break;
2876 }
2877 }
2878
2879 if (sanitize)
2880 env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
2881 }
17a52670 2882
0f55f9ed 2883 mark_stack_slot_scratched(env, spi);
354e8f19 2884 if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
2c78ee89 2885 !register_is_null(reg) && env->bpf_capable) {
b5dc0163
AS
2886 if (dst_reg != BPF_REG_FP) {
2887 /* The backtracking logic can only recognize explicit
2888 * stack slot address like [fp - 8]. Other spill of
8fb33b60 2889 * scalar via different register has to be conservative.
b5dc0163
AS
2890 * Backtrack from here and mark all registers as precise
2891 * that contributed into 'reg' being a constant.
2892 */
2893 err = mark_chain_precision(env, value_regno);
2894 if (err)
2895 return err;
2896 }
354e8f19 2897 save_register_state(state, spi, reg, size);
f7cf25b2 2898 } else if (reg && is_spillable_regtype(reg->type)) {
17a52670 2899 /* register containing pointer is being spilled into stack */
9c399760 2900 if (size != BPF_REG_SIZE) {
f7cf25b2 2901 verbose_linfo(env, insn_idx, "; ");
61bd5218 2902 verbose(env, "invalid size of register spill\n");
17a52670
AS
2903 return -EACCES;
2904 }
f7cf25b2 2905 if (state != cur && reg->type == PTR_TO_STACK) {
f4d7e40a
AS
2906 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
2907 return -EINVAL;
2908 }
354e8f19 2909 save_register_state(state, spi, reg, size);
9c399760 2910 } else {
cc2b14d5
AS
2911 u8 type = STACK_MISC;
2912
679c782d
EC
2913 /* regular write of data into stack destroys any spilled ptr */
2914 state->stack[spi].spilled_ptr.type = NOT_INIT;
0bae2d4d 2915 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
27113c59 2916 if (is_spilled_reg(&state->stack[spi]))
0bae2d4d 2917 for (i = 0; i < BPF_REG_SIZE; i++)
354e8f19 2918 scrub_spilled_slot(&state->stack[spi].slot_type[i]);
9c399760 2919
cc2b14d5
AS
2920 /* only mark the slot as written if all 8 bytes were written
2921 * otherwise read propagation may incorrectly stop too soon
2922 * when stack slots are partially written.
2923 * This heuristic means that read propagation will be
2924 * conservative, since it will add reg_live_read marks
2925 * to stack slots all the way to first state when programs
2926 * writes+reads less than 8 bytes
2927 */
2928 if (size == BPF_REG_SIZE)
2929 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2930
2931 /* when we zero initialize stack slots mark them as such */
b5dc0163
AS
2932 if (reg && register_is_null(reg)) {
2933 /* backtracking doesn't work for STACK_ZERO yet. */
2934 err = mark_chain_precision(env, value_regno);
2935 if (err)
2936 return err;
cc2b14d5 2937 type = STACK_ZERO;
b5dc0163 2938 }
cc2b14d5 2939
0bae2d4d 2940 /* Mark slots affected by this stack write. */
9c399760 2941 for (i = 0; i < size; i++)
638f5b90 2942 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
cc2b14d5 2943 type;
17a52670
AS
2944 }
2945 return 0;
2946}
2947
01f810ac
AM
2948/* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
2949 * known to contain a variable offset.
2950 * This function checks whether the write is permitted and conservatively
2951 * tracks the effects of the write, considering that each stack slot in the
2952 * dynamic range is potentially written to.
2953 *
2954 * 'off' includes 'regno->off'.
2955 * 'value_regno' can be -1, meaning that an unknown value is being written to
2956 * the stack.
2957 *
2958 * Spilled pointers in range are not marked as written because we don't know
2959 * what's going to be actually written. This means that read propagation for
2960 * future reads cannot be terminated by this write.
2961 *
2962 * For privileged programs, uninitialized stack slots are considered
2963 * initialized by this write (even though we don't know exactly what offsets
2964 * are going to be written to). The idea is that we don't want the verifier to
2965 * reject future reads that access slots written to through variable offsets.
2966 */
2967static int check_stack_write_var_off(struct bpf_verifier_env *env,
2968 /* func where register points to */
2969 struct bpf_func_state *state,
2970 int ptr_regno, int off, int size,
2971 int value_regno, int insn_idx)
2972{
2973 struct bpf_func_state *cur; /* state of the current function */
2974 int min_off, max_off;
2975 int i, err;
2976 struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
2977 bool writing_zero = false;
2978 /* set if the fact that we're writing a zero is used to let any
2979 * stack slots remain STACK_ZERO
2980 */
2981 bool zero_used = false;
2982
2983 cur = env->cur_state->frame[env->cur_state->curframe];
2984 ptr_reg = &cur->regs[ptr_regno];
2985 min_off = ptr_reg->smin_value + off;
2986 max_off = ptr_reg->smax_value + off + size;
2987 if (value_regno >= 0)
2988 value_reg = &cur->regs[value_regno];
2989 if (value_reg && register_is_null(value_reg))
2990 writing_zero = true;
2991
c69431aa 2992 err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
01f810ac
AM
2993 if (err)
2994 return err;
2995
2996
2997 /* Variable offset writes destroy any spilled pointers in range. */
2998 for (i = min_off; i < max_off; i++) {
2999 u8 new_type, *stype;
3000 int slot, spi;
3001
3002 slot = -i - 1;
3003 spi = slot / BPF_REG_SIZE;
3004 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
0f55f9ed 3005 mark_stack_slot_scratched(env, spi);
01f810ac
AM
3006
3007 if (!env->allow_ptr_leaks
3008 && *stype != NOT_INIT
3009 && *stype != SCALAR_VALUE) {
3010 /* Reject the write if there's are spilled pointers in
3011 * range. If we didn't reject here, the ptr status
3012 * would be erased below (even though not all slots are
3013 * actually overwritten), possibly opening the door to
3014 * leaks.
3015 */
3016 verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3017 insn_idx, i);
3018 return -EINVAL;
3019 }
3020
3021 /* Erase all spilled pointers. */
3022 state->stack[spi].spilled_ptr.type = NOT_INIT;
3023
3024 /* Update the slot type. */
3025 new_type = STACK_MISC;
3026 if (writing_zero && *stype == STACK_ZERO) {
3027 new_type = STACK_ZERO;
3028 zero_used = true;
3029 }
3030 /* If the slot is STACK_INVALID, we check whether it's OK to
3031 * pretend that it will be initialized by this write. The slot
3032 * might not actually be written to, and so if we mark it as
3033 * initialized future reads might leak uninitialized memory.
3034 * For privileged programs, we will accept such reads to slots
3035 * that may or may not be written because, if we're reject
3036 * them, the error would be too confusing.
3037 */
3038 if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
3039 verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
3040 insn_idx, i);
3041 return -EINVAL;
3042 }
3043 *stype = new_type;
3044 }
3045 if (zero_used) {
3046 /* backtracking doesn't work for STACK_ZERO yet. */
3047 err = mark_chain_precision(env, value_regno);
3048 if (err)
3049 return err;
3050 }
3051 return 0;
3052}
3053
3054/* When register 'dst_regno' is assigned some values from stack[min_off,
3055 * max_off), we set the register's type according to the types of the
3056 * respective stack slots. If all the stack values are known to be zeros, then
3057 * so is the destination reg. Otherwise, the register is considered to be
3058 * SCALAR. This function does not deal with register filling; the caller must
3059 * ensure that all spilled registers in the stack range have been marked as
3060 * read.
3061 */
3062static void mark_reg_stack_read(struct bpf_verifier_env *env,
3063 /* func where src register points to */
3064 struct bpf_func_state *ptr_state,
3065 int min_off, int max_off, int dst_regno)
3066{
3067 struct bpf_verifier_state *vstate = env->cur_state;
3068 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3069 int i, slot, spi;
3070 u8 *stype;
3071 int zeros = 0;
3072
3073 for (i = min_off; i < max_off; i++) {
3074 slot = -i - 1;
3075 spi = slot / BPF_REG_SIZE;
3076 stype = ptr_state->stack[spi].slot_type;
3077 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3078 break;
3079 zeros++;
3080 }
3081 if (zeros == max_off - min_off) {
3082 /* any access_size read into register is zero extended,
3083 * so the whole register == const_zero
3084 */
3085 __mark_reg_const_zero(&state->regs[dst_regno]);
3086 /* backtracking doesn't support STACK_ZERO yet,
3087 * so mark it precise here, so that later
3088 * backtracking can stop here.
3089 * Backtracking may not need this if this register
3090 * doesn't participate in pointer adjustment.
3091 * Forward propagation of precise flag is not
3092 * necessary either. This mark is only to stop
3093 * backtracking. Any register that contributed
3094 * to const 0 was marked precise before spill.
3095 */
3096 state->regs[dst_regno].precise = true;
3097 } else {
3098 /* have read misc data from the stack */
3099 mark_reg_unknown(env, state->regs, dst_regno);
3100 }
3101 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3102}
3103
3104/* Read the stack at 'off' and put the results into the register indicated by
3105 * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3106 * spilled reg.
3107 *
3108 * 'dst_regno' can be -1, meaning that the read value is not going to a
3109 * register.
3110 *
3111 * The access is assumed to be within the current stack bounds.
3112 */
3113static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3114 /* func where src register points to */
3115 struct bpf_func_state *reg_state,
3116 int off, int size, int dst_regno)
17a52670 3117{
f4d7e40a
AS
3118 struct bpf_verifier_state *vstate = env->cur_state;
3119 struct bpf_func_state *state = vstate->frame[vstate->curframe];
638f5b90 3120 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
f7cf25b2 3121 struct bpf_reg_state *reg;
354e8f19 3122 u8 *stype, type;
17a52670 3123
f4d7e40a 3124 stype = reg_state->stack[spi].slot_type;
f7cf25b2 3125 reg = &reg_state->stack[spi].spilled_ptr;
17a52670 3126
27113c59 3127 if (is_spilled_reg(&reg_state->stack[spi])) {
f30d4968
MKL
3128 u8 spill_size = 1;
3129
3130 for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3131 spill_size++;
354e8f19 3132
f30d4968 3133 if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
f7cf25b2
AS
3134 if (reg->type != SCALAR_VALUE) {
3135 verbose_linfo(env, env->insn_idx, "; ");
3136 verbose(env, "invalid size of register fill\n");
3137 return -EACCES;
3138 }
354e8f19
MKL
3139
3140 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3141 if (dst_regno < 0)
3142 return 0;
3143
f30d4968 3144 if (!(off % BPF_REG_SIZE) && size == spill_size) {
354e8f19
MKL
3145 /* The earlier check_reg_arg() has decided the
3146 * subreg_def for this insn. Save it first.
3147 */
3148 s32 subreg_def = state->regs[dst_regno].subreg_def;
3149
3150 state->regs[dst_regno] = *reg;
3151 state->regs[dst_regno].subreg_def = subreg_def;
3152 } else {
3153 for (i = 0; i < size; i++) {
3154 type = stype[(slot - i) % BPF_REG_SIZE];
3155 if (type == STACK_SPILL)
3156 continue;
3157 if (type == STACK_MISC)
3158 continue;
3159 verbose(env, "invalid read from stack off %d+%d size %d\n",
3160 off, i, size);
3161 return -EACCES;
3162 }
01f810ac 3163 mark_reg_unknown(env, state->regs, dst_regno);
f7cf25b2 3164 }
354e8f19 3165 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
f7cf25b2 3166 return 0;
17a52670 3167 }
17a52670 3168
01f810ac 3169 if (dst_regno >= 0) {
17a52670 3170 /* restore register state from stack */
01f810ac 3171 state->regs[dst_regno] = *reg;
2f18f62e
AS
3172 /* mark reg as written since spilled pointer state likely
3173 * has its liveness marks cleared by is_state_visited()
3174 * which resets stack/reg liveness for state transitions
3175 */
01f810ac 3176 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
6e7e63cb 3177 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
01f810ac 3178 /* If dst_regno==-1, the caller is asking us whether
6e7e63cb
JH
3179 * it is acceptable to use this value as a SCALAR_VALUE
3180 * (e.g. for XADD).
3181 * We must not allow unprivileged callers to do that
3182 * with spilled pointers.
3183 */
3184 verbose(env, "leaking pointer from stack off %d\n",
3185 off);
3186 return -EACCES;
dc503a8a 3187 }
f7cf25b2 3188 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
17a52670
AS
3189 } else {
3190 for (i = 0; i < size; i++) {
01f810ac
AM
3191 type = stype[(slot - i) % BPF_REG_SIZE];
3192 if (type == STACK_MISC)
cc2b14d5 3193 continue;
01f810ac 3194 if (type == STACK_ZERO)
cc2b14d5 3195 continue;
cc2b14d5
AS
3196 verbose(env, "invalid read from stack off %d+%d size %d\n",
3197 off, i, size);
3198 return -EACCES;
3199 }
f7cf25b2 3200 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
01f810ac
AM
3201 if (dst_regno >= 0)
3202 mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
17a52670 3203 }
f7cf25b2 3204 return 0;
17a52670
AS
3205}
3206
61df10c7 3207enum bpf_access_src {
01f810ac
AM
3208 ACCESS_DIRECT = 1, /* the access is performed by an instruction */
3209 ACCESS_HELPER = 2, /* the access is performed by a helper */
3210};
3211
3212static int check_stack_range_initialized(struct bpf_verifier_env *env,
3213 int regno, int off, int access_size,
3214 bool zero_size_allowed,
61df10c7 3215 enum bpf_access_src type,
01f810ac
AM
3216 struct bpf_call_arg_meta *meta);
3217
3218static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3219{
3220 return cur_regs(env) + regno;
3221}
3222
3223/* Read the stack at 'ptr_regno + off' and put the result into the register
3224 * 'dst_regno'.
3225 * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
3226 * but not its variable offset.
3227 * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3228 *
3229 * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3230 * filling registers (i.e. reads of spilled register cannot be detected when
3231 * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3232 * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
3233 * offset; for a fixed offset check_stack_read_fixed_off should be used
3234 * instead.
3235 */
3236static int check_stack_read_var_off(struct bpf_verifier_env *env,
3237 int ptr_regno, int off, int size, int dst_regno)
e4298d25 3238{
01f810ac
AM
3239 /* The state of the source register. */
3240 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3241 struct bpf_func_state *ptr_state = func(env, reg);
3242 int err;
3243 int min_off, max_off;
3244
3245 /* Note that we pass a NULL meta, so raw access will not be permitted.
e4298d25 3246 */
01f810ac
AM
3247 err = check_stack_range_initialized(env, ptr_regno, off, size,
3248 false, ACCESS_DIRECT, NULL);
3249 if (err)
3250 return err;
3251
3252 min_off = reg->smin_value + off;
3253 max_off = reg->smax_value + off;
3254 mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
3255 return 0;
3256}
3257
3258/* check_stack_read dispatches to check_stack_read_fixed_off or
3259 * check_stack_read_var_off.
3260 *
3261 * The caller must ensure that the offset falls within the allocated stack
3262 * bounds.
3263 *
3264 * 'dst_regno' is a register which will receive the value from the stack. It
3265 * can be -1, meaning that the read value is not going to a register.
3266 */
3267static int check_stack_read(struct bpf_verifier_env *env,
3268 int ptr_regno, int off, int size,
3269 int dst_regno)
3270{
3271 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3272 struct bpf_func_state *state = func(env, reg);
3273 int err;
3274 /* Some accesses are only permitted with a static offset. */
3275 bool var_off = !tnum_is_const(reg->var_off);
3276
3277 /* The offset is required to be static when reads don't go to a
3278 * register, in order to not leak pointers (see
3279 * check_stack_read_fixed_off).
3280 */
3281 if (dst_regno < 0 && var_off) {
e4298d25
DB
3282 char tn_buf[48];
3283
3284 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
01f810ac 3285 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
e4298d25
DB
3286 tn_buf, off, size);
3287 return -EACCES;
3288 }
01f810ac
AM
3289 /* Variable offset is prohibited for unprivileged mode for simplicity
3290 * since it requires corresponding support in Spectre masking for stack
3291 * ALU. See also retrieve_ptr_limit().
3292 */
3293 if (!env->bypass_spec_v1 && var_off) {
3294 char tn_buf[48];
e4298d25 3295
01f810ac
AM
3296 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3297 verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
3298 ptr_regno, tn_buf);
e4298d25
DB
3299 return -EACCES;
3300 }
3301
01f810ac
AM
3302 if (!var_off) {
3303 off += reg->var_off.value;
3304 err = check_stack_read_fixed_off(env, state, off, size,
3305 dst_regno);
3306 } else {
3307 /* Variable offset stack reads need more conservative handling
3308 * than fixed offset ones. Note that dst_regno >= 0 on this
3309 * branch.
3310 */
3311 err = check_stack_read_var_off(env, ptr_regno, off, size,
3312 dst_regno);
3313 }
3314 return err;
3315}
3316
3317
3318/* check_stack_write dispatches to check_stack_write_fixed_off or
3319 * check_stack_write_var_off.
3320 *
3321 * 'ptr_regno' is the register used as a pointer into the stack.
3322 * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
3323 * 'value_regno' is the register whose value we're writing to the stack. It can
3324 * be -1, meaning that we're not writing from a register.
3325 *
3326 * The caller must ensure that the offset falls within the maximum stack size.
3327 */
3328static int check_stack_write(struct bpf_verifier_env *env,
3329 int ptr_regno, int off, int size,
3330 int value_regno, int insn_idx)
3331{
3332 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3333 struct bpf_func_state *state = func(env, reg);
3334 int err;
3335
3336 if (tnum_is_const(reg->var_off)) {
3337 off += reg->var_off.value;
3338 err = check_stack_write_fixed_off(env, state, off, size,
3339 value_regno, insn_idx);
3340 } else {
3341 /* Variable offset stack reads need more conservative handling
3342 * than fixed offset ones.
3343 */
3344 err = check_stack_write_var_off(env, state,
3345 ptr_regno, off, size,
3346 value_regno, insn_idx);
3347 }
3348 return err;
e4298d25
DB
3349}
3350
591fe988
DB
3351static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
3352 int off, int size, enum bpf_access_type type)
3353{
3354 struct bpf_reg_state *regs = cur_regs(env);
3355 struct bpf_map *map = regs[regno].map_ptr;
3356 u32 cap = bpf_map_flags_to_cap(map);
3357
3358 if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
3359 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
3360 map->value_size, off, size);
3361 return -EACCES;
3362 }
3363
3364 if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
3365 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
3366 map->value_size, off, size);
3367 return -EACCES;
3368 }
3369
3370 return 0;
3371}
3372
457f4436
AN
3373/* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
3374static int __check_mem_access(struct bpf_verifier_env *env, int regno,
3375 int off, int size, u32 mem_size,
3376 bool zero_size_allowed)
17a52670 3377{
457f4436
AN
3378 bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
3379 struct bpf_reg_state *reg;
3380
3381 if (off >= 0 && size_ok && (u64)off + size <= mem_size)
3382 return 0;
17a52670 3383
457f4436
AN
3384 reg = &cur_regs(env)[regno];
3385 switch (reg->type) {
69c087ba
YS
3386 case PTR_TO_MAP_KEY:
3387 verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
3388 mem_size, off, size);
3389 break;
457f4436 3390 case PTR_TO_MAP_VALUE:
61bd5218 3391 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
457f4436
AN
3392 mem_size, off, size);
3393 break;
3394 case PTR_TO_PACKET:
3395 case PTR_TO_PACKET_META:
3396 case PTR_TO_PACKET_END:
3397 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
3398 off, size, regno, reg->id, off, mem_size);
3399 break;
3400 case PTR_TO_MEM:
3401 default:
3402 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
3403 mem_size, off, size);
17a52670 3404 }
457f4436
AN
3405
3406 return -EACCES;
17a52670
AS
3407}
3408
457f4436
AN
3409/* check read/write into a memory region with possible variable offset */
3410static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
3411 int off, int size, u32 mem_size,
3412 bool zero_size_allowed)
dbcfe5f7 3413{
f4d7e40a
AS
3414 struct bpf_verifier_state *vstate = env->cur_state;
3415 struct bpf_func_state *state = vstate->frame[vstate->curframe];
dbcfe5f7
GB
3416 struct bpf_reg_state *reg = &state->regs[regno];
3417 int err;
3418
457f4436 3419 /* We may have adjusted the register pointing to memory region, so we
f1174f77
EC
3420 * need to try adding each of min_value and max_value to off
3421 * to make sure our theoretical access will be safe.
2e576648
CL
3422 *
3423 * The minimum value is only important with signed
dbcfe5f7
GB
3424 * comparisons where we can't assume the floor of a
3425 * value is 0. If we are using signed variables for our
3426 * index'es we need to make sure that whatever we use
3427 * will have a set floor within our range.
3428 */
b7137c4e
DB
3429 if (reg->smin_value < 0 &&
3430 (reg->smin_value == S64_MIN ||
3431 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
3432 reg->smin_value + off < 0)) {
61bd5218 3433 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
dbcfe5f7
GB
3434 regno);
3435 return -EACCES;
3436 }
457f4436
AN
3437 err = __check_mem_access(env, regno, reg->smin_value + off, size,
3438 mem_size, zero_size_allowed);
dbcfe5f7 3439 if (err) {
457f4436 3440 verbose(env, "R%d min value is outside of the allowed memory range\n",
61bd5218 3441 regno);
dbcfe5f7
GB
3442 return err;
3443 }
3444
b03c9f9f
EC
3445 /* If we haven't set a max value then we need to bail since we can't be
3446 * sure we won't do bad things.
3447 * If reg->umax_value + off could overflow, treat that as unbounded too.
dbcfe5f7 3448 */
b03c9f9f 3449 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
457f4436 3450 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
dbcfe5f7
GB
3451 regno);
3452 return -EACCES;
3453 }
457f4436
AN
3454 err = __check_mem_access(env, regno, reg->umax_value + off, size,
3455 mem_size, zero_size_allowed);
3456 if (err) {
3457 verbose(env, "R%d max value is outside of the allowed memory range\n",
61bd5218 3458 regno);
457f4436
AN
3459 return err;
3460 }
3461
3462 return 0;
3463}
d83525ca 3464
e9147b44
KKD
3465static int __check_ptr_off_reg(struct bpf_verifier_env *env,
3466 const struct bpf_reg_state *reg, int regno,
3467 bool fixed_off_ok)
3468{
3469 /* Access to this pointer-typed register or passing it to a helper
3470 * is only allowed in its original, unmodified form.
3471 */
3472
3473 if (reg->off < 0) {
3474 verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
3475 reg_type_str(env, reg->type), regno, reg->off);
3476 return -EACCES;
3477 }
3478
3479 if (!fixed_off_ok && reg->off) {
3480 verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
3481 reg_type_str(env, reg->type), regno, reg->off);
3482 return -EACCES;
3483 }
3484
3485 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3486 char tn_buf[48];
3487
3488 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3489 verbose(env, "variable %s access var_off=%s disallowed\n",
3490 reg_type_str(env, reg->type), tn_buf);
3491 return -EACCES;
3492 }
3493
3494 return 0;
3495}
3496
3497int check_ptr_off_reg(struct bpf_verifier_env *env,
3498 const struct bpf_reg_state *reg, int regno)
3499{
3500 return __check_ptr_off_reg(env, reg, regno, false);
3501}
3502
61df10c7
KKD
3503static int map_kptr_match_type(struct bpf_verifier_env *env,
3504 struct bpf_map_value_off_desc *off_desc,
3505 struct bpf_reg_state *reg, u32 regno)
3506{
3507 const char *targ_name = kernel_type_name(off_desc->kptr.btf, off_desc->kptr.btf_id);
6efe152d 3508 int perm_flags = PTR_MAYBE_NULL;
61df10c7
KKD
3509 const char *reg_name = "";
3510
6efe152d
KKD
3511 /* Only unreferenced case accepts untrusted pointers */
3512 if (off_desc->type == BPF_KPTR_UNREF)
3513 perm_flags |= PTR_UNTRUSTED;
3514
3515 if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
61df10c7
KKD
3516 goto bad_type;
3517
3518 if (!btf_is_kernel(reg->btf)) {
3519 verbose(env, "R%d must point to kernel BTF\n", regno);
3520 return -EINVAL;
3521 }
3522 /* We need to verify reg->type and reg->btf, before accessing reg->btf */
3523 reg_name = kernel_type_name(reg->btf, reg->btf_id);
3524
c0a5a21c
KKD
3525 /* For ref_ptr case, release function check should ensure we get one
3526 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
3527 * normal store of unreferenced kptr, we must ensure var_off is zero.
3528 * Since ref_ptr cannot be accessed directly by BPF insns, checks for
3529 * reg->off and reg->ref_obj_id are not needed here.
3530 */
61df10c7
KKD
3531 if (__check_ptr_off_reg(env, reg, regno, true))
3532 return -EACCES;
3533
3534 /* A full type match is needed, as BTF can be vmlinux or module BTF, and
3535 * we also need to take into account the reg->off.
3536 *
3537 * We want to support cases like:
3538 *
3539 * struct foo {
3540 * struct bar br;
3541 * struct baz bz;
3542 * };
3543 *
3544 * struct foo *v;
3545 * v = func(); // PTR_TO_BTF_ID
3546 * val->foo = v; // reg->off is zero, btf and btf_id match type
3547 * val->bar = &v->br; // reg->off is still zero, but we need to retry with
3548 * // first member type of struct after comparison fails
3549 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
3550 * // to match type
3551 *
3552 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
2ab3b380
KKD
3553 * is zero. We must also ensure that btf_struct_ids_match does not walk
3554 * the struct to match type against first member of struct, i.e. reject
3555 * second case from above. Hence, when type is BPF_KPTR_REF, we set
3556 * strict mode to true for type match.
61df10c7
KKD
3557 */
3558 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
2ab3b380
KKD
3559 off_desc->kptr.btf, off_desc->kptr.btf_id,
3560 off_desc->type == BPF_KPTR_REF))
61df10c7
KKD
3561 goto bad_type;
3562 return 0;
3563bad_type:
3564 verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
3565 reg_type_str(env, reg->type), reg_name);
6efe152d
KKD
3566 verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
3567 if (off_desc->type == BPF_KPTR_UNREF)
3568 verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
3569 targ_name);
3570 else
3571 verbose(env, "\n");
61df10c7
KKD
3572 return -EINVAL;
3573}
3574
3575static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
3576 int value_regno, int insn_idx,
3577 struct bpf_map_value_off_desc *off_desc)
3578{
3579 struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
3580 int class = BPF_CLASS(insn->code);
3581 struct bpf_reg_state *val_reg;
3582
3583 /* Things we already checked for in check_map_access and caller:
3584 * - Reject cases where variable offset may touch kptr
3585 * - size of access (must be BPF_DW)
3586 * - tnum_is_const(reg->var_off)
3587 * - off_desc->offset == off + reg->var_off.value
3588 */
3589 /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
3590 if (BPF_MODE(insn->code) != BPF_MEM) {
3591 verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
3592 return -EACCES;
3593 }
3594
6efe152d
KKD
3595 /* We only allow loading referenced kptr, since it will be marked as
3596 * untrusted, similar to unreferenced kptr.
3597 */
3598 if (class != BPF_LDX && off_desc->type == BPF_KPTR_REF) {
3599 verbose(env, "store to referenced kptr disallowed\n");
c0a5a21c
KKD
3600 return -EACCES;
3601 }
3602
61df10c7
KKD
3603 if (class == BPF_LDX) {
3604 val_reg = reg_state(env, value_regno);
3605 /* We can simply mark the value_regno receiving the pointer
3606 * value from map as PTR_TO_BTF_ID, with the correct type.
3607 */
3608 mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, off_desc->kptr.btf,
6efe152d 3609 off_desc->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED);
61df10c7
KKD
3610 /* For mark_ptr_or_null_reg */
3611 val_reg->id = ++env->id_gen;
3612 } else if (class == BPF_STX) {
3613 val_reg = reg_state(env, value_regno);
3614 if (!register_is_null(val_reg) &&
3615 map_kptr_match_type(env, off_desc, val_reg, value_regno))
3616 return -EACCES;
3617 } else if (class == BPF_ST) {
3618 if (insn->imm) {
3619 verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
3620 off_desc->offset);
3621 return -EACCES;
3622 }
3623 } else {
3624 verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
3625 return -EACCES;
3626 }
3627 return 0;
3628}
3629
457f4436
AN
3630/* check read/write into a map element with possible variable offset */
3631static int check_map_access(struct bpf_verifier_env *env, u32 regno,
61df10c7
KKD
3632 int off, int size, bool zero_size_allowed,
3633 enum bpf_access_src src)
457f4436
AN
3634{
3635 struct bpf_verifier_state *vstate = env->cur_state;
3636 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3637 struct bpf_reg_state *reg = &state->regs[regno];
3638 struct bpf_map *map = reg->map_ptr;
3639 int err;
3640
3641 err = check_mem_region_access(env, regno, off, size, map->value_size,
3642 zero_size_allowed);
3643 if (err)
3644 return err;
3645
3646 if (map_value_has_spin_lock(map)) {
3647 u32 lock = map->spin_lock_off;
d83525ca
AS
3648
3649 /* if any part of struct bpf_spin_lock can be touched by
3650 * load/store reject this program.
3651 * To check that [x1, x2) overlaps with [y1, y2)
3652 * it is sufficient to check x1 < y2 && y1 < x2.
3653 */
3654 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
3655 lock < reg->umax_value + off + size) {
3656 verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
3657 return -EACCES;
3658 }
3659 }
68134668
AS
3660 if (map_value_has_timer(map)) {
3661 u32 t = map->timer_off;
3662
3663 if (reg->smin_value + off < t + sizeof(struct bpf_timer) &&
3664 t < reg->umax_value + off + size) {
3665 verbose(env, "bpf_timer cannot be accessed directly by load/store\n");
3666 return -EACCES;
3667 }
3668 }
61df10c7
KKD
3669 if (map_value_has_kptrs(map)) {
3670 struct bpf_map_value_off *tab = map->kptr_off_tab;
3671 int i;
3672
3673 for (i = 0; i < tab->nr_off; i++) {
3674 u32 p = tab->off[i].offset;
3675
3676 if (reg->smin_value + off < p + sizeof(u64) &&
3677 p < reg->umax_value + off + size) {
3678 if (src != ACCESS_DIRECT) {
3679 verbose(env, "kptr cannot be accessed indirectly by helper\n");
3680 return -EACCES;
3681 }
3682 if (!tnum_is_const(reg->var_off)) {
3683 verbose(env, "kptr access cannot have variable offset\n");
3684 return -EACCES;
3685 }
3686 if (p != off + reg->var_off.value) {
3687 verbose(env, "kptr access misaligned expected=%u off=%llu\n",
3688 p, off + reg->var_off.value);
3689 return -EACCES;
3690 }
3691 if (size != bpf_size_to_bytes(BPF_DW)) {
3692 verbose(env, "kptr access size must be BPF_DW\n");
3693 return -EACCES;
3694 }
3695 break;
3696 }
3697 }
3698 }
f1174f77 3699 return err;
dbcfe5f7
GB
3700}
3701
969bf05e
AS
3702#define MAX_PACKET_OFF 0xffff
3703
58e2af8b 3704static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3a0af8fd
TG
3705 const struct bpf_call_arg_meta *meta,
3706 enum bpf_access_type t)
4acf6c0b 3707{
7e40781c
UP
3708 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
3709
3710 switch (prog_type) {
5d66fa7d 3711 /* Program types only with direct read access go here! */
3a0af8fd
TG
3712 case BPF_PROG_TYPE_LWT_IN:
3713 case BPF_PROG_TYPE_LWT_OUT:
004d4b27 3714 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2dbb9b9e 3715 case BPF_PROG_TYPE_SK_REUSEPORT:
5d66fa7d 3716 case BPF_PROG_TYPE_FLOW_DISSECTOR:
d5563d36 3717 case BPF_PROG_TYPE_CGROUP_SKB:
3a0af8fd
TG
3718 if (t == BPF_WRITE)
3719 return false;
8731745e 3720 fallthrough;
5d66fa7d
DB
3721
3722 /* Program types with direct read + write access go here! */
36bbef52
DB
3723 case BPF_PROG_TYPE_SCHED_CLS:
3724 case BPF_PROG_TYPE_SCHED_ACT:
4acf6c0b 3725 case BPF_PROG_TYPE_XDP:
3a0af8fd 3726 case BPF_PROG_TYPE_LWT_XMIT:
8a31db56 3727 case BPF_PROG_TYPE_SK_SKB:
4f738adb 3728 case BPF_PROG_TYPE_SK_MSG:
36bbef52
DB
3729 if (meta)
3730 return meta->pkt_access;
3731
3732 env->seen_direct_write = true;
4acf6c0b 3733 return true;
0d01da6a
SF
3734
3735 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3736 if (t == BPF_WRITE)
3737 env->seen_direct_write = true;
3738
3739 return true;
3740
4acf6c0b
BB
3741 default:
3742 return false;
3743 }
3744}
3745
f1174f77 3746static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 3747 int size, bool zero_size_allowed)
f1174f77 3748{
638f5b90 3749 struct bpf_reg_state *regs = cur_regs(env);
f1174f77
EC
3750 struct bpf_reg_state *reg = &regs[regno];
3751 int err;
3752
3753 /* We may have added a variable offset to the packet pointer; but any
3754 * reg->range we have comes after that. We are only checking the fixed
3755 * offset.
3756 */
3757
3758 /* We don't allow negative numbers, because we aren't tracking enough
3759 * detail to prove they're safe.
3760 */
b03c9f9f 3761 if (reg->smin_value < 0) {
61bd5218 3762 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
f1174f77
EC
3763 regno);
3764 return -EACCES;
3765 }
6d94e741
AS
3766
3767 err = reg->range < 0 ? -EINVAL :
3768 __check_mem_access(env, regno, off, size, reg->range,
457f4436 3769 zero_size_allowed);
f1174f77 3770 if (err) {
61bd5218 3771 verbose(env, "R%d offset is outside of the packet\n", regno);
f1174f77
EC
3772 return err;
3773 }
e647815a 3774
457f4436 3775 /* __check_mem_access has made sure "off + size - 1" is within u16.
e647815a
JW
3776 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
3777 * otherwise find_good_pkt_pointers would have refused to set range info
457f4436 3778 * that __check_mem_access would have rejected this pkt access.
e647815a
JW
3779 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
3780 */
3781 env->prog->aux->max_pkt_offset =
3782 max_t(u32, env->prog->aux->max_pkt_offset,
3783 off + reg->umax_value + size - 1);
3784
f1174f77
EC
3785 return err;
3786}
3787
3788/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
31fd8581 3789static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
9e15db66 3790 enum bpf_access_type t, enum bpf_reg_type *reg_type,
22dc4a0f 3791 struct btf **btf, u32 *btf_id)
17a52670 3792{
f96da094
DB
3793 struct bpf_insn_access_aux info = {
3794 .reg_type = *reg_type,
9e15db66 3795 .log = &env->log,
f96da094 3796 };
31fd8581 3797
4f9218aa 3798 if (env->ops->is_valid_access &&
5e43f899 3799 env->ops->is_valid_access(off, size, t, env->prog, &info)) {
f96da094
DB
3800 /* A non zero info.ctx_field_size indicates that this field is a
3801 * candidate for later verifier transformation to load the whole
3802 * field and then apply a mask when accessed with a narrower
3803 * access than actual ctx access size. A zero info.ctx_field_size
3804 * will only allow for whole field access and rejects any other
3805 * type of narrower access.
31fd8581 3806 */
23994631 3807 *reg_type = info.reg_type;
31fd8581 3808
c25b2ae1 3809 if (base_type(*reg_type) == PTR_TO_BTF_ID) {
22dc4a0f 3810 *btf = info.btf;
9e15db66 3811 *btf_id = info.btf_id;
22dc4a0f 3812 } else {
9e15db66 3813 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
22dc4a0f 3814 }
32bbe007
AS
3815 /* remember the offset of last byte accessed in ctx */
3816 if (env->prog->aux->max_ctx_offset < off + size)
3817 env->prog->aux->max_ctx_offset = off + size;
17a52670 3818 return 0;
32bbe007 3819 }
17a52670 3820
61bd5218 3821 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
17a52670
AS
3822 return -EACCES;
3823}
3824
d58e468b
PP
3825static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
3826 int size)
3827{
3828 if (size < 0 || off < 0 ||
3829 (u64)off + size > sizeof(struct bpf_flow_keys)) {
3830 verbose(env, "invalid access to flow keys off=%d size=%d\n",
3831 off, size);
3832 return -EACCES;
3833 }
3834 return 0;
3835}
3836
5f456649
MKL
3837static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
3838 u32 regno, int off, int size,
3839 enum bpf_access_type t)
c64b7983
JS
3840{
3841 struct bpf_reg_state *regs = cur_regs(env);
3842 struct bpf_reg_state *reg = &regs[regno];
5f456649 3843 struct bpf_insn_access_aux info = {};
46f8bc92 3844 bool valid;
c64b7983
JS
3845
3846 if (reg->smin_value < 0) {
3847 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3848 regno);
3849 return -EACCES;
3850 }
3851
46f8bc92
MKL
3852 switch (reg->type) {
3853 case PTR_TO_SOCK_COMMON:
3854 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
3855 break;
3856 case PTR_TO_SOCKET:
3857 valid = bpf_sock_is_valid_access(off, size, t, &info);
3858 break;
655a51e5
MKL
3859 case PTR_TO_TCP_SOCK:
3860 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
3861 break;
fada7fdc
JL
3862 case PTR_TO_XDP_SOCK:
3863 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
3864 break;
46f8bc92
MKL
3865 default:
3866 valid = false;
c64b7983
JS
3867 }
3868
5f456649 3869
46f8bc92
MKL
3870 if (valid) {
3871 env->insn_aux_data[insn_idx].ctx_field_size =
3872 info.ctx_field_size;
3873 return 0;
3874 }
3875
3876 verbose(env, "R%d invalid %s access off=%d size=%d\n",
c25b2ae1 3877 regno, reg_type_str(env, reg->type), off, size);
46f8bc92
MKL
3878
3879 return -EACCES;
c64b7983
JS
3880}
3881
4cabc5b1
DB
3882static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
3883{
2a159c6f 3884 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4cabc5b1
DB
3885}
3886
f37a8cb8
DB
3887static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
3888{
2a159c6f 3889 const struct bpf_reg_state *reg = reg_state(env, regno);
f37a8cb8 3890
46f8bc92
MKL
3891 return reg->type == PTR_TO_CTX;
3892}
3893
3894static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
3895{
3896 const struct bpf_reg_state *reg = reg_state(env, regno);
3897
3898 return type_is_sk_pointer(reg->type);
f37a8cb8
DB
3899}
3900
ca369602
DB
3901static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
3902{
2a159c6f 3903 const struct bpf_reg_state *reg = reg_state(env, regno);
ca369602
DB
3904
3905 return type_is_pkt_pointer(reg->type);
3906}
3907
4b5defde
DB
3908static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
3909{
3910 const struct bpf_reg_state *reg = reg_state(env, regno);
3911
3912 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
3913 return reg->type == PTR_TO_FLOW_KEYS;
3914}
3915
61bd5218
JK
3916static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
3917 const struct bpf_reg_state *reg,
d1174416 3918 int off, int size, bool strict)
969bf05e 3919{
f1174f77 3920 struct tnum reg_off;
e07b98d9 3921 int ip_align;
d1174416
DM
3922
3923 /* Byte size accesses are always allowed. */
3924 if (!strict || size == 1)
3925 return 0;
3926
e4eda884
DM
3927 /* For platforms that do not have a Kconfig enabling
3928 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
3929 * NET_IP_ALIGN is universally set to '2'. And on platforms
3930 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
3931 * to this code only in strict mode where we want to emulate
3932 * the NET_IP_ALIGN==2 checking. Therefore use an
3933 * unconditional IP align value of '2'.
e07b98d9 3934 */
e4eda884 3935 ip_align = 2;
f1174f77
EC
3936
3937 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
3938 if (!tnum_is_aligned(reg_off, size)) {
3939 char tn_buf[48];
3940
3941 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218
JK
3942 verbose(env,
3943 "misaligned packet access off %d+%s+%d+%d size %d\n",
f1174f77 3944 ip_align, tn_buf, reg->off, off, size);
969bf05e
AS
3945 return -EACCES;
3946 }
79adffcd 3947
969bf05e
AS
3948 return 0;
3949}
3950
61bd5218
JK
3951static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
3952 const struct bpf_reg_state *reg,
f1174f77
EC
3953 const char *pointer_desc,
3954 int off, int size, bool strict)
79adffcd 3955{
f1174f77
EC
3956 struct tnum reg_off;
3957
3958 /* Byte size accesses are always allowed. */
3959 if (!strict || size == 1)
3960 return 0;
3961
3962 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
3963 if (!tnum_is_aligned(reg_off, size)) {
3964 char tn_buf[48];
3965
3966 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 3967 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
f1174f77 3968 pointer_desc, tn_buf, reg->off, off, size);
79adffcd
DB
3969 return -EACCES;
3970 }
3971
969bf05e
AS
3972 return 0;
3973}
3974
e07b98d9 3975static int check_ptr_alignment(struct bpf_verifier_env *env,
ca369602
DB
3976 const struct bpf_reg_state *reg, int off,
3977 int size, bool strict_alignment_once)
79adffcd 3978{
ca369602 3979 bool strict = env->strict_alignment || strict_alignment_once;
f1174f77 3980 const char *pointer_desc = "";
d1174416 3981
79adffcd
DB
3982 switch (reg->type) {
3983 case PTR_TO_PACKET:
de8f3a83
DB
3984 case PTR_TO_PACKET_META:
3985 /* Special case, because of NET_IP_ALIGN. Given metadata sits
3986 * right in front, treat it the very same way.
3987 */
61bd5218 3988 return check_pkt_ptr_alignment(env, reg, off, size, strict);
d58e468b
PP
3989 case PTR_TO_FLOW_KEYS:
3990 pointer_desc = "flow keys ";
3991 break;
69c087ba
YS
3992 case PTR_TO_MAP_KEY:
3993 pointer_desc = "key ";
3994 break;
f1174f77
EC
3995 case PTR_TO_MAP_VALUE:
3996 pointer_desc = "value ";
3997 break;
3998 case PTR_TO_CTX:
3999 pointer_desc = "context ";
4000 break;
4001 case PTR_TO_STACK:
4002 pointer_desc = "stack ";
01f810ac
AM
4003 /* The stack spill tracking logic in check_stack_write_fixed_off()
4004 * and check_stack_read_fixed_off() relies on stack accesses being
a5ec6ae1
JH
4005 * aligned.
4006 */
4007 strict = true;
f1174f77 4008 break;
c64b7983
JS
4009 case PTR_TO_SOCKET:
4010 pointer_desc = "sock ";
4011 break;
46f8bc92
MKL
4012 case PTR_TO_SOCK_COMMON:
4013 pointer_desc = "sock_common ";
4014 break;
655a51e5
MKL
4015 case PTR_TO_TCP_SOCK:
4016 pointer_desc = "tcp_sock ";
4017 break;
fada7fdc
JL
4018 case PTR_TO_XDP_SOCK:
4019 pointer_desc = "xdp_sock ";
4020 break;
79adffcd 4021 default:
f1174f77 4022 break;
79adffcd 4023 }
61bd5218
JK
4024 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
4025 strict);
79adffcd
DB
4026}
4027
f4d7e40a
AS
4028static int update_stack_depth(struct bpf_verifier_env *env,
4029 const struct bpf_func_state *func,
4030 int off)
4031{
9c8105bd 4032 u16 stack = env->subprog_info[func->subprogno].stack_depth;
f4d7e40a
AS
4033
4034 if (stack >= -off)
4035 return 0;
4036
4037 /* update known max for given subprogram */
9c8105bd 4038 env->subprog_info[func->subprogno].stack_depth = -off;
70a87ffe
AS
4039 return 0;
4040}
f4d7e40a 4041
70a87ffe
AS
4042/* starting from main bpf function walk all instructions of the function
4043 * and recursively walk all callees that given function can call.
4044 * Ignore jump and exit insns.
4045 * Since recursion is prevented by check_cfg() this algorithm
4046 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
4047 */
4048static int check_max_stack_depth(struct bpf_verifier_env *env)
4049{
9c8105bd
JW
4050 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
4051 struct bpf_subprog_info *subprog = env->subprog_info;
70a87ffe 4052 struct bpf_insn *insn = env->prog->insnsi;
ebf7d1f5 4053 bool tail_call_reachable = false;
70a87ffe
AS
4054 int ret_insn[MAX_CALL_FRAMES];
4055 int ret_prog[MAX_CALL_FRAMES];
ebf7d1f5 4056 int j;
f4d7e40a 4057
70a87ffe 4058process_func:
7f6e4312
MF
4059 /* protect against potential stack overflow that might happen when
4060 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
4061 * depth for such case down to 256 so that the worst case scenario
4062 * would result in 8k stack size (32 which is tailcall limit * 256 =
4063 * 8k).
4064 *
4065 * To get the idea what might happen, see an example:
4066 * func1 -> sub rsp, 128
4067 * subfunc1 -> sub rsp, 256
4068 * tailcall1 -> add rsp, 256
4069 * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
4070 * subfunc2 -> sub rsp, 64
4071 * subfunc22 -> sub rsp, 128
4072 * tailcall2 -> add rsp, 128
4073 * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
4074 *
4075 * tailcall will unwind the current stack frame but it will not get rid
4076 * of caller's stack as shown on the example above.
4077 */
4078 if (idx && subprog[idx].has_tail_call && depth >= 256) {
4079 verbose(env,
4080 "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
4081 depth);
4082 return -EACCES;
4083 }
70a87ffe
AS
4084 /* round up to 32-bytes, since this is granularity
4085 * of interpreter stack size
4086 */
9c8105bd 4087 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe 4088 if (depth > MAX_BPF_STACK) {
f4d7e40a 4089 verbose(env, "combined stack size of %d calls is %d. Too large\n",
70a87ffe 4090 frame + 1, depth);
f4d7e40a
AS
4091 return -EACCES;
4092 }
70a87ffe 4093continue_func:
4cb3d99c 4094 subprog_end = subprog[idx + 1].start;
70a87ffe 4095 for (; i < subprog_end; i++) {
7ddc80a4
AS
4096 int next_insn;
4097
69c087ba 4098 if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
70a87ffe
AS
4099 continue;
4100 /* remember insn and function to return to */
4101 ret_insn[frame] = i + 1;
9c8105bd 4102 ret_prog[frame] = idx;
70a87ffe
AS
4103
4104 /* find the callee */
7ddc80a4
AS
4105 next_insn = i + insn[i].imm + 1;
4106 idx = find_subprog(env, next_insn);
9c8105bd 4107 if (idx < 0) {
70a87ffe 4108 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
7ddc80a4 4109 next_insn);
70a87ffe
AS
4110 return -EFAULT;
4111 }
7ddc80a4
AS
4112 if (subprog[idx].is_async_cb) {
4113 if (subprog[idx].has_tail_call) {
4114 verbose(env, "verifier bug. subprog has tail_call and async cb\n");
4115 return -EFAULT;
4116 }
4117 /* async callbacks don't increase bpf prog stack size */
4118 continue;
4119 }
4120 i = next_insn;
ebf7d1f5
MF
4121
4122 if (subprog[idx].has_tail_call)
4123 tail_call_reachable = true;
4124
70a87ffe
AS
4125 frame++;
4126 if (frame >= MAX_CALL_FRAMES) {
927cb781
PC
4127 verbose(env, "the call stack of %d frames is too deep !\n",
4128 frame);
4129 return -E2BIG;
70a87ffe
AS
4130 }
4131 goto process_func;
4132 }
ebf7d1f5
MF
4133 /* if tail call got detected across bpf2bpf calls then mark each of the
4134 * currently present subprog frames as tail call reachable subprogs;
4135 * this info will be utilized by JIT so that we will be preserving the
4136 * tail call counter throughout bpf2bpf calls combined with tailcalls
4137 */
4138 if (tail_call_reachable)
4139 for (j = 0; j < frame; j++)
4140 subprog[ret_prog[j]].tail_call_reachable = true;
5dd0a6b8
DB
4141 if (subprog[0].tail_call_reachable)
4142 env->prog->aux->tail_call_reachable = true;
ebf7d1f5 4143
70a87ffe
AS
4144 /* end of for() loop means the last insn of the 'subprog'
4145 * was reached. Doesn't matter whether it was JA or EXIT
4146 */
4147 if (frame == 0)
4148 return 0;
9c8105bd 4149 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe
AS
4150 frame--;
4151 i = ret_insn[frame];
9c8105bd 4152 idx = ret_prog[frame];
70a87ffe 4153 goto continue_func;
f4d7e40a
AS
4154}
4155
19d28fbd 4156#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
4157static int get_callee_stack_depth(struct bpf_verifier_env *env,
4158 const struct bpf_insn *insn, int idx)
4159{
4160 int start = idx + insn->imm + 1, subprog;
4161
4162 subprog = find_subprog(env, start);
4163 if (subprog < 0) {
4164 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
4165 start);
4166 return -EFAULT;
4167 }
9c8105bd 4168 return env->subprog_info[subprog].stack_depth;
1ea47e01 4169}
19d28fbd 4170#endif
1ea47e01 4171
afbf21dc
YS
4172static int __check_buffer_access(struct bpf_verifier_env *env,
4173 const char *buf_info,
4174 const struct bpf_reg_state *reg,
4175 int regno, int off, int size)
9df1c28b
MM
4176{
4177 if (off < 0) {
4178 verbose(env,
4fc00b79 4179 "R%d invalid %s buffer access: off=%d, size=%d\n",
afbf21dc 4180 regno, buf_info, off, size);
9df1c28b
MM
4181 return -EACCES;
4182 }
4183 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4184 char tn_buf[48];
4185
4186 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4187 verbose(env,
4fc00b79 4188 "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
9df1c28b
MM
4189 regno, off, tn_buf);
4190 return -EACCES;
4191 }
afbf21dc
YS
4192
4193 return 0;
4194}
4195
4196static int check_tp_buffer_access(struct bpf_verifier_env *env,
4197 const struct bpf_reg_state *reg,
4198 int regno, int off, int size)
4199{
4200 int err;
4201
4202 err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
4203 if (err)
4204 return err;
4205
9df1c28b
MM
4206 if (off + size > env->prog->aux->max_tp_access)
4207 env->prog->aux->max_tp_access = off + size;
4208
4209 return 0;
4210}
4211
afbf21dc
YS
4212static int check_buffer_access(struct bpf_verifier_env *env,
4213 const struct bpf_reg_state *reg,
4214 int regno, int off, int size,
4215 bool zero_size_allowed,
afbf21dc
YS
4216 u32 *max_access)
4217{
44e9a741 4218 const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
afbf21dc
YS
4219 int err;
4220
4221 err = __check_buffer_access(env, buf_info, reg, regno, off, size);
4222 if (err)
4223 return err;
4224
4225 if (off + size > *max_access)
4226 *max_access = off + size;
4227
4228 return 0;
4229}
4230
3f50f132
JF
4231/* BPF architecture zero extends alu32 ops into 64-bit registesr */
4232static void zext_32_to_64(struct bpf_reg_state *reg)
4233{
4234 reg->var_off = tnum_subreg(reg->var_off);
4235 __reg_assign_32_into_64(reg);
4236}
9df1c28b 4237
0c17d1d2
JH
4238/* truncate register to smaller size (in bytes)
4239 * must be called with size < BPF_REG_SIZE
4240 */
4241static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
4242{
4243 u64 mask;
4244
4245 /* clear high bits in bit representation */
4246 reg->var_off = tnum_cast(reg->var_off, size);
4247
4248 /* fix arithmetic bounds */
4249 mask = ((u64)1 << (size * 8)) - 1;
4250 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
4251 reg->umin_value &= mask;
4252 reg->umax_value &= mask;
4253 } else {
4254 reg->umin_value = 0;
4255 reg->umax_value = mask;
4256 }
4257 reg->smin_value = reg->umin_value;
4258 reg->smax_value = reg->umax_value;
3f50f132
JF
4259
4260 /* If size is smaller than 32bit register the 32bit register
4261 * values are also truncated so we push 64-bit bounds into
4262 * 32-bit bounds. Above were truncated < 32-bits already.
4263 */
4264 if (size >= 4)
4265 return;
4266 __reg_combine_64_into_32(reg);
0c17d1d2
JH
4267}
4268
a23740ec
AN
4269static bool bpf_map_is_rdonly(const struct bpf_map *map)
4270{
353050be
DB
4271 /* A map is considered read-only if the following condition are true:
4272 *
4273 * 1) BPF program side cannot change any of the map content. The
4274 * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
4275 * and was set at map creation time.
4276 * 2) The map value(s) have been initialized from user space by a
4277 * loader and then "frozen", such that no new map update/delete
4278 * operations from syscall side are possible for the rest of
4279 * the map's lifetime from that point onwards.
4280 * 3) Any parallel/pending map update/delete operations from syscall
4281 * side have been completed. Only after that point, it's safe to
4282 * assume that map value(s) are immutable.
4283 */
4284 return (map->map_flags & BPF_F_RDONLY_PROG) &&
4285 READ_ONCE(map->frozen) &&
4286 !bpf_map_write_active(map);
a23740ec
AN
4287}
4288
4289static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
4290{
4291 void *ptr;
4292 u64 addr;
4293 int err;
4294
4295 err = map->ops->map_direct_value_addr(map, &addr, off);
4296 if (err)
4297 return err;
2dedd7d2 4298 ptr = (void *)(long)addr + off;
a23740ec
AN
4299
4300 switch (size) {
4301 case sizeof(u8):
4302 *val = (u64)*(u8 *)ptr;
4303 break;
4304 case sizeof(u16):
4305 *val = (u64)*(u16 *)ptr;
4306 break;
4307 case sizeof(u32):
4308 *val = (u64)*(u32 *)ptr;
4309 break;
4310 case sizeof(u64):
4311 *val = *(u64 *)ptr;
4312 break;
4313 default:
4314 return -EINVAL;
4315 }
4316 return 0;
4317}
4318
9e15db66
AS
4319static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
4320 struct bpf_reg_state *regs,
4321 int regno, int off, int size,
4322 enum bpf_access_type atype,
4323 int value_regno)
4324{
4325 struct bpf_reg_state *reg = regs + regno;
22dc4a0f
AN
4326 const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
4327 const char *tname = btf_name_by_offset(reg->btf, t->name_off);
c6f1bfe8 4328 enum bpf_type_flag flag = 0;
9e15db66
AS
4329 u32 btf_id;
4330 int ret;
4331
9e15db66
AS
4332 if (off < 0) {
4333 verbose(env,
4334 "R%d is ptr_%s invalid negative access: off=%d\n",
4335 regno, tname, off);
4336 return -EACCES;
4337 }
4338 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4339 char tn_buf[48];
4340
4341 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4342 verbose(env,
4343 "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
4344 regno, tname, off, tn_buf);
4345 return -EACCES;
4346 }
4347
c6f1bfe8
YS
4348 if (reg->type & MEM_USER) {
4349 verbose(env,
4350 "R%d is ptr_%s access user memory: off=%d\n",
4351 regno, tname, off);
4352 return -EACCES;
4353 }
4354
5844101a
HL
4355 if (reg->type & MEM_PERCPU) {
4356 verbose(env,
4357 "R%d is ptr_%s access percpu memory: off=%d\n",
4358 regno, tname, off);
4359 return -EACCES;
4360 }
4361
27ae7997 4362 if (env->ops->btf_struct_access) {
22dc4a0f 4363 ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
c6f1bfe8 4364 off, size, atype, &btf_id, &flag);
27ae7997
MKL
4365 } else {
4366 if (atype != BPF_READ) {
4367 verbose(env, "only read is supported\n");
4368 return -EACCES;
4369 }
4370
22dc4a0f 4371 ret = btf_struct_access(&env->log, reg->btf, t, off, size,
c6f1bfe8 4372 atype, &btf_id, &flag);
27ae7997
MKL
4373 }
4374
9e15db66
AS
4375 if (ret < 0)
4376 return ret;
4377
6efe152d
KKD
4378 /* If this is an untrusted pointer, all pointers formed by walking it
4379 * also inherit the untrusted flag.
4380 */
4381 if (type_flag(reg->type) & PTR_UNTRUSTED)
4382 flag |= PTR_UNTRUSTED;
4383
41c48f3a 4384 if (atype == BPF_READ && value_regno >= 0)
c6f1bfe8 4385 mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
41c48f3a
AI
4386
4387 return 0;
4388}
4389
4390static int check_ptr_to_map_access(struct bpf_verifier_env *env,
4391 struct bpf_reg_state *regs,
4392 int regno, int off, int size,
4393 enum bpf_access_type atype,
4394 int value_regno)
4395{
4396 struct bpf_reg_state *reg = regs + regno;
4397 struct bpf_map *map = reg->map_ptr;
c6f1bfe8 4398 enum bpf_type_flag flag = 0;
41c48f3a
AI
4399 const struct btf_type *t;
4400 const char *tname;
4401 u32 btf_id;
4402 int ret;
4403
4404 if (!btf_vmlinux) {
4405 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
4406 return -ENOTSUPP;
4407 }
4408
4409 if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
4410 verbose(env, "map_ptr access not supported for map type %d\n",
4411 map->map_type);
4412 return -ENOTSUPP;
4413 }
4414
4415 t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
4416 tname = btf_name_by_offset(btf_vmlinux, t->name_off);
4417
4418 if (!env->allow_ptr_to_map_access) {
4419 verbose(env,
4420 "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
4421 tname);
4422 return -EPERM;
9e15db66 4423 }
27ae7997 4424
41c48f3a
AI
4425 if (off < 0) {
4426 verbose(env, "R%d is %s invalid negative access: off=%d\n",
4427 regno, tname, off);
4428 return -EACCES;
4429 }
4430
4431 if (atype != BPF_READ) {
4432 verbose(env, "only read from %s is supported\n", tname);
4433 return -EACCES;
4434 }
4435
c6f1bfe8 4436 ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id, &flag);
41c48f3a
AI
4437 if (ret < 0)
4438 return ret;
4439
4440 if (value_regno >= 0)
c6f1bfe8 4441 mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
41c48f3a 4442
9e15db66
AS
4443 return 0;
4444}
4445
01f810ac
AM
4446/* Check that the stack access at the given offset is within bounds. The
4447 * maximum valid offset is -1.
4448 *
4449 * The minimum valid offset is -MAX_BPF_STACK for writes, and
4450 * -state->allocated_stack for reads.
4451 */
4452static int check_stack_slot_within_bounds(int off,
4453 struct bpf_func_state *state,
4454 enum bpf_access_type t)
4455{
4456 int min_valid_off;
4457
4458 if (t == BPF_WRITE)
4459 min_valid_off = -MAX_BPF_STACK;
4460 else
4461 min_valid_off = -state->allocated_stack;
4462
4463 if (off < min_valid_off || off > -1)
4464 return -EACCES;
4465 return 0;
4466}
4467
4468/* Check that the stack access at 'regno + off' falls within the maximum stack
4469 * bounds.
4470 *
4471 * 'off' includes `regno->offset`, but not its dynamic part (if any).
4472 */
4473static int check_stack_access_within_bounds(
4474 struct bpf_verifier_env *env,
4475 int regno, int off, int access_size,
61df10c7 4476 enum bpf_access_src src, enum bpf_access_type type)
01f810ac
AM
4477{
4478 struct bpf_reg_state *regs = cur_regs(env);
4479 struct bpf_reg_state *reg = regs + regno;
4480 struct bpf_func_state *state = func(env, reg);
4481 int min_off, max_off;
4482 int err;
4483 char *err_extra;
4484
4485 if (src == ACCESS_HELPER)
4486 /* We don't know if helpers are reading or writing (or both). */
4487 err_extra = " indirect access to";
4488 else if (type == BPF_READ)
4489 err_extra = " read from";
4490 else
4491 err_extra = " write to";
4492
4493 if (tnum_is_const(reg->var_off)) {
4494 min_off = reg->var_off.value + off;
4495 if (access_size > 0)
4496 max_off = min_off + access_size - 1;
4497 else
4498 max_off = min_off;
4499 } else {
4500 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
4501 reg->smin_value <= -BPF_MAX_VAR_OFF) {
4502 verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
4503 err_extra, regno);
4504 return -EACCES;
4505 }
4506 min_off = reg->smin_value + off;
4507 if (access_size > 0)
4508 max_off = reg->smax_value + off + access_size - 1;
4509 else
4510 max_off = min_off;
4511 }
4512
4513 err = check_stack_slot_within_bounds(min_off, state, type);
4514 if (!err)
4515 err = check_stack_slot_within_bounds(max_off, state, type);
4516
4517 if (err) {
4518 if (tnum_is_const(reg->var_off)) {
4519 verbose(env, "invalid%s stack R%d off=%d size=%d\n",
4520 err_extra, regno, off, access_size);
4521 } else {
4522 char tn_buf[48];
4523
4524 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4525 verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
4526 err_extra, regno, tn_buf, access_size);
4527 }
4528 }
4529 return err;
4530}
41c48f3a 4531
17a52670
AS
4532/* check whether memory at (regno + off) is accessible for t = (read | write)
4533 * if t==write, value_regno is a register which value is stored into memory
4534 * if t==read, value_regno is a register which will receive the value from memory
4535 * if t==write && value_regno==-1, some unknown value is stored into memory
4536 * if t==read && value_regno==-1, don't care what we read from memory
4537 */
ca369602
DB
4538static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
4539 int off, int bpf_size, enum bpf_access_type t,
4540 int value_regno, bool strict_alignment_once)
17a52670 4541{
638f5b90
AS
4542 struct bpf_reg_state *regs = cur_regs(env);
4543 struct bpf_reg_state *reg = regs + regno;
f4d7e40a 4544 struct bpf_func_state *state;
17a52670
AS
4545 int size, err = 0;
4546
4547 size = bpf_size_to_bytes(bpf_size);
4548 if (size < 0)
4549 return size;
4550
f1174f77 4551 /* alignment checks will add in reg->off themselves */
ca369602 4552 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
969bf05e
AS
4553 if (err)
4554 return err;
17a52670 4555
f1174f77
EC
4556 /* for access checks, reg->off is just part of off */
4557 off += reg->off;
4558
69c087ba
YS
4559 if (reg->type == PTR_TO_MAP_KEY) {
4560 if (t == BPF_WRITE) {
4561 verbose(env, "write to change key R%d not allowed\n", regno);
4562 return -EACCES;
4563 }
4564
4565 err = check_mem_region_access(env, regno, off, size,
4566 reg->map_ptr->key_size, false);
4567 if (err)
4568 return err;
4569 if (value_regno >= 0)
4570 mark_reg_unknown(env, regs, value_regno);
4571 } else if (reg->type == PTR_TO_MAP_VALUE) {
61df10c7
KKD
4572 struct bpf_map_value_off_desc *kptr_off_desc = NULL;
4573
1be7f75d
AS
4574 if (t == BPF_WRITE && value_regno >= 0 &&
4575 is_pointer_value(env, value_regno)) {
61bd5218 4576 verbose(env, "R%d leaks addr into map\n", value_regno);
1be7f75d
AS
4577 return -EACCES;
4578 }
591fe988
DB
4579 err = check_map_access_type(env, regno, off, size, t);
4580 if (err)
4581 return err;
61df10c7
KKD
4582 err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
4583 if (err)
4584 return err;
4585 if (tnum_is_const(reg->var_off))
4586 kptr_off_desc = bpf_map_kptr_off_contains(reg->map_ptr,
4587 off + reg->var_off.value);
4588 if (kptr_off_desc) {
4589 err = check_map_kptr_access(env, regno, value_regno, insn_idx,
4590 kptr_off_desc);
4591 } else if (t == BPF_READ && value_regno >= 0) {
a23740ec
AN
4592 struct bpf_map *map = reg->map_ptr;
4593
4594 /* if map is read-only, track its contents as scalars */
4595 if (tnum_is_const(reg->var_off) &&
4596 bpf_map_is_rdonly(map) &&
4597 map->ops->map_direct_value_addr) {
4598 int map_off = off + reg->var_off.value;
4599 u64 val = 0;
4600
4601 err = bpf_map_direct_read(map, map_off, size,
4602 &val);
4603 if (err)
4604 return err;
4605
4606 regs[value_regno].type = SCALAR_VALUE;
4607 __mark_reg_known(&regs[value_regno], val);
4608 } else {
4609 mark_reg_unknown(env, regs, value_regno);
4610 }
4611 }
34d3a78c
HL
4612 } else if (base_type(reg->type) == PTR_TO_MEM) {
4613 bool rdonly_mem = type_is_rdonly_mem(reg->type);
4614
4615 if (type_may_be_null(reg->type)) {
4616 verbose(env, "R%d invalid mem access '%s'\n", regno,
4617 reg_type_str(env, reg->type));
4618 return -EACCES;
4619 }
4620
4621 if (t == BPF_WRITE && rdonly_mem) {
4622 verbose(env, "R%d cannot write into %s\n",
4623 regno, reg_type_str(env, reg->type));
4624 return -EACCES;
4625 }
4626
457f4436
AN
4627 if (t == BPF_WRITE && value_regno >= 0 &&
4628 is_pointer_value(env, value_regno)) {
4629 verbose(env, "R%d leaks addr into mem\n", value_regno);
4630 return -EACCES;
4631 }
34d3a78c 4632
457f4436
AN
4633 err = check_mem_region_access(env, regno, off, size,
4634 reg->mem_size, false);
34d3a78c 4635 if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
457f4436 4636 mark_reg_unknown(env, regs, value_regno);
1a0dc1ac 4637 } else if (reg->type == PTR_TO_CTX) {
f1174f77 4638 enum bpf_reg_type reg_type = SCALAR_VALUE;
22dc4a0f 4639 struct btf *btf = NULL;
9e15db66 4640 u32 btf_id = 0;
19de99f7 4641
1be7f75d
AS
4642 if (t == BPF_WRITE && value_regno >= 0 &&
4643 is_pointer_value(env, value_regno)) {
61bd5218 4644 verbose(env, "R%d leaks addr into ctx\n", value_regno);
1be7f75d
AS
4645 return -EACCES;
4646 }
f1174f77 4647
be80a1d3 4648 err = check_ptr_off_reg(env, reg, regno);
58990d1f
DB
4649 if (err < 0)
4650 return err;
4651
c6f1bfe8
YS
4652 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
4653 &btf_id);
9e15db66
AS
4654 if (err)
4655 verbose_linfo(env, insn_idx, "; ");
969bf05e 4656 if (!err && t == BPF_READ && value_regno >= 0) {
f1174f77 4657 /* ctx access returns either a scalar, or a
de8f3a83
DB
4658 * PTR_TO_PACKET[_META,_END]. In the latter
4659 * case, we know the offset is zero.
f1174f77 4660 */
46f8bc92 4661 if (reg_type == SCALAR_VALUE) {
638f5b90 4662 mark_reg_unknown(env, regs, value_regno);
46f8bc92 4663 } else {
638f5b90 4664 mark_reg_known_zero(env, regs,
61bd5218 4665 value_regno);
c25b2ae1 4666 if (type_may_be_null(reg_type))
46f8bc92 4667 regs[value_regno].id = ++env->id_gen;
5327ed3d
JW
4668 /* A load of ctx field could have different
4669 * actual load size with the one encoded in the
4670 * insn. When the dst is PTR, it is for sure not
4671 * a sub-register.
4672 */
4673 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
c25b2ae1 4674 if (base_type(reg_type) == PTR_TO_BTF_ID) {
22dc4a0f 4675 regs[value_regno].btf = btf;
9e15db66 4676 regs[value_regno].btf_id = btf_id;
22dc4a0f 4677 }
46f8bc92 4678 }
638f5b90 4679 regs[value_regno].type = reg_type;
969bf05e 4680 }
17a52670 4681
f1174f77 4682 } else if (reg->type == PTR_TO_STACK) {
01f810ac
AM
4683 /* Basic bounds checks. */
4684 err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
e4298d25
DB
4685 if (err)
4686 return err;
8726679a 4687
f4d7e40a
AS
4688 state = func(env, reg);
4689 err = update_stack_depth(env, state, off);
4690 if (err)
4691 return err;
8726679a 4692
01f810ac
AM
4693 if (t == BPF_READ)
4694 err = check_stack_read(env, regno, off, size,
61bd5218 4695 value_regno);
01f810ac
AM
4696 else
4697 err = check_stack_write(env, regno, off, size,
4698 value_regno, insn_idx);
de8f3a83 4699 } else if (reg_is_pkt_pointer(reg)) {
3a0af8fd 4700 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
61bd5218 4701 verbose(env, "cannot write into packet\n");
969bf05e
AS
4702 return -EACCES;
4703 }
4acf6c0b
BB
4704 if (t == BPF_WRITE && value_regno >= 0 &&
4705 is_pointer_value(env, value_regno)) {
61bd5218
JK
4706 verbose(env, "R%d leaks addr into packet\n",
4707 value_regno);
4acf6c0b
BB
4708 return -EACCES;
4709 }
9fd29c08 4710 err = check_packet_access(env, regno, off, size, false);
969bf05e 4711 if (!err && t == BPF_READ && value_regno >= 0)
638f5b90 4712 mark_reg_unknown(env, regs, value_regno);
d58e468b
PP
4713 } else if (reg->type == PTR_TO_FLOW_KEYS) {
4714 if (t == BPF_WRITE && value_regno >= 0 &&
4715 is_pointer_value(env, value_regno)) {
4716 verbose(env, "R%d leaks addr into flow keys\n",
4717 value_regno);
4718 return -EACCES;
4719 }
4720
4721 err = check_flow_keys_access(env, off, size);
4722 if (!err && t == BPF_READ && value_regno >= 0)
4723 mark_reg_unknown(env, regs, value_regno);
46f8bc92 4724 } else if (type_is_sk_pointer(reg->type)) {
c64b7983 4725 if (t == BPF_WRITE) {
46f8bc92 4726 verbose(env, "R%d cannot write into %s\n",
c25b2ae1 4727 regno, reg_type_str(env, reg->type));
c64b7983
JS
4728 return -EACCES;
4729 }
5f456649 4730 err = check_sock_access(env, insn_idx, regno, off, size, t);
c64b7983
JS
4731 if (!err && value_regno >= 0)
4732 mark_reg_unknown(env, regs, value_regno);
9df1c28b
MM
4733 } else if (reg->type == PTR_TO_TP_BUFFER) {
4734 err = check_tp_buffer_access(env, reg, regno, off, size);
4735 if (!err && t == BPF_READ && value_regno >= 0)
4736 mark_reg_unknown(env, regs, value_regno);
bff61f6f
HL
4737 } else if (base_type(reg->type) == PTR_TO_BTF_ID &&
4738 !type_may_be_null(reg->type)) {
9e15db66
AS
4739 err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
4740 value_regno);
41c48f3a
AI
4741 } else if (reg->type == CONST_PTR_TO_MAP) {
4742 err = check_ptr_to_map_access(env, regs, regno, off, size, t,
4743 value_regno);
20b2aff4
HL
4744 } else if (base_type(reg->type) == PTR_TO_BUF) {
4745 bool rdonly_mem = type_is_rdonly_mem(reg->type);
20b2aff4
HL
4746 u32 *max_access;
4747
4748 if (rdonly_mem) {
4749 if (t == BPF_WRITE) {
4750 verbose(env, "R%d cannot write into %s\n",
4751 regno, reg_type_str(env, reg->type));
4752 return -EACCES;
4753 }
20b2aff4
HL
4754 max_access = &env->prog->aux->max_rdonly_access;
4755 } else {
20b2aff4 4756 max_access = &env->prog->aux->max_rdwr_access;
afbf21dc 4757 }
20b2aff4 4758
f6dfbe31 4759 err = check_buffer_access(env, reg, regno, off, size, false,
44e9a741 4760 max_access);
20b2aff4
HL
4761
4762 if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
afbf21dc 4763 mark_reg_unknown(env, regs, value_regno);
17a52670 4764 } else {
61bd5218 4765 verbose(env, "R%d invalid mem access '%s'\n", regno,
c25b2ae1 4766 reg_type_str(env, reg->type));
17a52670
AS
4767 return -EACCES;
4768 }
969bf05e 4769
f1174f77 4770 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
638f5b90 4771 regs[value_regno].type == SCALAR_VALUE) {
f1174f77 4772 /* b/h/w load zero-extends, mark upper bits as known 0 */
0c17d1d2 4773 coerce_reg_to_size(&regs[value_regno], size);
969bf05e 4774 }
17a52670
AS
4775 return err;
4776}
4777
91c960b0 4778static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
17a52670 4779{
5ffa2550 4780 int load_reg;
17a52670
AS
4781 int err;
4782
5ca419f2
BJ
4783 switch (insn->imm) {
4784 case BPF_ADD:
4785 case BPF_ADD | BPF_FETCH:
981f94c3
BJ
4786 case BPF_AND:
4787 case BPF_AND | BPF_FETCH:
4788 case BPF_OR:
4789 case BPF_OR | BPF_FETCH:
4790 case BPF_XOR:
4791 case BPF_XOR | BPF_FETCH:
5ffa2550
BJ
4792 case BPF_XCHG:
4793 case BPF_CMPXCHG:
5ca419f2
BJ
4794 break;
4795 default:
91c960b0
BJ
4796 verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
4797 return -EINVAL;
4798 }
4799
4800 if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
4801 verbose(env, "invalid atomic operand size\n");
17a52670
AS
4802 return -EINVAL;
4803 }
4804
4805 /* check src1 operand */
dc503a8a 4806 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
4807 if (err)
4808 return err;
4809
4810 /* check src2 operand */
dc503a8a 4811 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
4812 if (err)
4813 return err;
4814
5ffa2550
BJ
4815 if (insn->imm == BPF_CMPXCHG) {
4816 /* Check comparison of R0 with memory location */
a82fe085
DB
4817 const u32 aux_reg = BPF_REG_0;
4818
4819 err = check_reg_arg(env, aux_reg, SRC_OP);
5ffa2550
BJ
4820 if (err)
4821 return err;
a82fe085
DB
4822
4823 if (is_pointer_value(env, aux_reg)) {
4824 verbose(env, "R%d leaks addr into mem\n", aux_reg);
4825 return -EACCES;
4826 }
5ffa2550
BJ
4827 }
4828
6bdf6abc 4829 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 4830 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6bdf6abc
DB
4831 return -EACCES;
4832 }
4833
ca369602 4834 if (is_ctx_reg(env, insn->dst_reg) ||
4b5defde 4835 is_pkt_reg(env, insn->dst_reg) ||
46f8bc92
MKL
4836 is_flow_key_reg(env, insn->dst_reg) ||
4837 is_sk_reg(env, insn->dst_reg)) {
91c960b0 4838 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
2a159c6f 4839 insn->dst_reg,
c25b2ae1 4840 reg_type_str(env, reg_state(env, insn->dst_reg)->type));
f37a8cb8
DB
4841 return -EACCES;
4842 }
4843
37086bfd
BJ
4844 if (insn->imm & BPF_FETCH) {
4845 if (insn->imm == BPF_CMPXCHG)
4846 load_reg = BPF_REG_0;
4847 else
4848 load_reg = insn->src_reg;
4849
4850 /* check and record load of old value */
4851 err = check_reg_arg(env, load_reg, DST_OP);
4852 if (err)
4853 return err;
4854 } else {
4855 /* This instruction accesses a memory location but doesn't
4856 * actually load it into a register.
4857 */
4858 load_reg = -1;
4859 }
4860
7d3baf0a
DB
4861 /* Check whether we can read the memory, with second call for fetch
4862 * case to simulate the register fill.
4863 */
31fd8581 4864 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
7d3baf0a
DB
4865 BPF_SIZE(insn->code), BPF_READ, -1, true);
4866 if (!err && load_reg >= 0)
4867 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4868 BPF_SIZE(insn->code), BPF_READ, load_reg,
4869 true);
17a52670
AS
4870 if (err)
4871 return err;
4872
7d3baf0a 4873 /* Check whether we can write into the same memory. */
5ca419f2
BJ
4874 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4875 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
4876 if (err)
4877 return err;
4878
5ca419f2 4879 return 0;
17a52670
AS
4880}
4881
01f810ac
AM
4882/* When register 'regno' is used to read the stack (either directly or through
4883 * a helper function) make sure that it's within stack boundary and, depending
4884 * on the access type, that all elements of the stack are initialized.
4885 *
4886 * 'off' includes 'regno->off', but not its dynamic part (if any).
4887 *
4888 * All registers that have been spilled on the stack in the slots within the
4889 * read offsets are marked as read.
4890 */
4891static int check_stack_range_initialized(
4892 struct bpf_verifier_env *env, int regno, int off,
4893 int access_size, bool zero_size_allowed,
61df10c7 4894 enum bpf_access_src type, struct bpf_call_arg_meta *meta)
2011fccf
AI
4895{
4896 struct bpf_reg_state *reg = reg_state(env, regno);
01f810ac
AM
4897 struct bpf_func_state *state = func(env, reg);
4898 int err, min_off, max_off, i, j, slot, spi;
4899 char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
4900 enum bpf_access_type bounds_check_type;
4901 /* Some accesses can write anything into the stack, others are
4902 * read-only.
4903 */
4904 bool clobber = false;
2011fccf 4905
01f810ac
AM
4906 if (access_size == 0 && !zero_size_allowed) {
4907 verbose(env, "invalid zero-sized read\n");
2011fccf
AI
4908 return -EACCES;
4909 }
2011fccf 4910
01f810ac
AM
4911 if (type == ACCESS_HELPER) {
4912 /* The bounds checks for writes are more permissive than for
4913 * reads. However, if raw_mode is not set, we'll do extra
4914 * checks below.
4915 */
4916 bounds_check_type = BPF_WRITE;
4917 clobber = true;
4918 } else {
4919 bounds_check_type = BPF_READ;
4920 }
4921 err = check_stack_access_within_bounds(env, regno, off, access_size,
4922 type, bounds_check_type);
4923 if (err)
4924 return err;
4925
17a52670 4926
2011fccf 4927 if (tnum_is_const(reg->var_off)) {
01f810ac 4928 min_off = max_off = reg->var_off.value + off;
2011fccf 4929 } else {
088ec26d
AI
4930 /* Variable offset is prohibited for unprivileged mode for
4931 * simplicity since it requires corresponding support in
4932 * Spectre masking for stack ALU.
4933 * See also retrieve_ptr_limit().
4934 */
2c78ee89 4935 if (!env->bypass_spec_v1) {
088ec26d 4936 char tn_buf[48];
f1174f77 4937
088ec26d 4938 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
01f810ac
AM
4939 verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
4940 regno, err_extra, tn_buf);
088ec26d
AI
4941 return -EACCES;
4942 }
f2bcd05e
AI
4943 /* Only initialized buffer on stack is allowed to be accessed
4944 * with variable offset. With uninitialized buffer it's hard to
4945 * guarantee that whole memory is marked as initialized on
4946 * helper return since specific bounds are unknown what may
4947 * cause uninitialized stack leaking.
4948 */
4949 if (meta && meta->raw_mode)
4950 meta = NULL;
4951
01f810ac
AM
4952 min_off = reg->smin_value + off;
4953 max_off = reg->smax_value + off;
17a52670
AS
4954 }
4955
435faee1
DB
4956 if (meta && meta->raw_mode) {
4957 meta->access_size = access_size;
4958 meta->regno = regno;
4959 return 0;
4960 }
4961
2011fccf 4962 for (i = min_off; i < max_off + access_size; i++) {
cc2b14d5
AS
4963 u8 *stype;
4964
2011fccf 4965 slot = -i - 1;
638f5b90 4966 spi = slot / BPF_REG_SIZE;
cc2b14d5
AS
4967 if (state->allocated_stack <= slot)
4968 goto err;
4969 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4970 if (*stype == STACK_MISC)
4971 goto mark;
4972 if (*stype == STACK_ZERO) {
01f810ac
AM
4973 if (clobber) {
4974 /* helper can write anything into the stack */
4975 *stype = STACK_MISC;
4976 }
cc2b14d5 4977 goto mark;
17a52670 4978 }
1d68f22b 4979
27113c59 4980 if (is_spilled_reg(&state->stack[spi]) &&
5844101a 4981 base_type(state->stack[spi].spilled_ptr.type) == PTR_TO_BTF_ID)
1d68f22b
YS
4982 goto mark;
4983
27113c59 4984 if (is_spilled_reg(&state->stack[spi]) &&
cd17d38f
YS
4985 (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
4986 env->allow_ptr_leaks)) {
01f810ac
AM
4987 if (clobber) {
4988 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
4989 for (j = 0; j < BPF_REG_SIZE; j++)
354e8f19 4990 scrub_spilled_slot(&state->stack[spi].slot_type[j]);
01f810ac 4991 }
f7cf25b2
AS
4992 goto mark;
4993 }
4994
cc2b14d5 4995err:
2011fccf 4996 if (tnum_is_const(reg->var_off)) {
01f810ac
AM
4997 verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
4998 err_extra, regno, min_off, i - min_off, access_size);
2011fccf
AI
4999 } else {
5000 char tn_buf[48];
5001
5002 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
01f810ac
AM
5003 verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
5004 err_extra, regno, tn_buf, i - min_off, access_size);
2011fccf 5005 }
cc2b14d5
AS
5006 return -EACCES;
5007mark:
5008 /* reading any byte out of 8-byte 'spill_slot' will cause
5009 * the whole slot to be marked as 'read'
5010 */
679c782d 5011 mark_reg_read(env, &state->stack[spi].spilled_ptr,
5327ed3d
JW
5012 state->stack[spi].spilled_ptr.parent,
5013 REG_LIVE_READ64);
17a52670 5014 }
2011fccf 5015 return update_stack_depth(env, state, min_off);
17a52670
AS
5016}
5017
06c1c049
GB
5018static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
5019 int access_size, bool zero_size_allowed,
5020 struct bpf_call_arg_meta *meta)
5021{
638f5b90 5022 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
20b2aff4 5023 u32 *max_access;
06c1c049 5024
20b2aff4 5025 switch (base_type(reg->type)) {
06c1c049 5026 case PTR_TO_PACKET:
de8f3a83 5027 case PTR_TO_PACKET_META:
9fd29c08
YS
5028 return check_packet_access(env, regno, reg->off, access_size,
5029 zero_size_allowed);
69c087ba 5030 case PTR_TO_MAP_KEY:
7b3552d3
KKD
5031 if (meta && meta->raw_mode) {
5032 verbose(env, "R%d cannot write into %s\n", regno,
5033 reg_type_str(env, reg->type));
5034 return -EACCES;
5035 }
69c087ba
YS
5036 return check_mem_region_access(env, regno, reg->off, access_size,
5037 reg->map_ptr->key_size, false);
06c1c049 5038 case PTR_TO_MAP_VALUE:
591fe988
DB
5039 if (check_map_access_type(env, regno, reg->off, access_size,
5040 meta && meta->raw_mode ? BPF_WRITE :
5041 BPF_READ))
5042 return -EACCES;
9fd29c08 5043 return check_map_access(env, regno, reg->off, access_size,
61df10c7 5044 zero_size_allowed, ACCESS_HELPER);
457f4436 5045 case PTR_TO_MEM:
97e6d7da
KKD
5046 if (type_is_rdonly_mem(reg->type)) {
5047 if (meta && meta->raw_mode) {
5048 verbose(env, "R%d cannot write into %s\n", regno,
5049 reg_type_str(env, reg->type));
5050 return -EACCES;
5051 }
5052 }
457f4436
AN
5053 return check_mem_region_access(env, regno, reg->off,
5054 access_size, reg->mem_size,
5055 zero_size_allowed);
20b2aff4
HL
5056 case PTR_TO_BUF:
5057 if (type_is_rdonly_mem(reg->type)) {
97e6d7da
KKD
5058 if (meta && meta->raw_mode) {
5059 verbose(env, "R%d cannot write into %s\n", regno,
5060 reg_type_str(env, reg->type));
20b2aff4 5061 return -EACCES;
97e6d7da 5062 }
20b2aff4 5063
20b2aff4
HL
5064 max_access = &env->prog->aux->max_rdonly_access;
5065 } else {
20b2aff4
HL
5066 max_access = &env->prog->aux->max_rdwr_access;
5067 }
afbf21dc
YS
5068 return check_buffer_access(env, reg, regno, reg->off,
5069 access_size, zero_size_allowed,
44e9a741 5070 max_access);
0d004c02 5071 case PTR_TO_STACK:
01f810ac
AM
5072 return check_stack_range_initialized(
5073 env,
5074 regno, reg->off, access_size,
5075 zero_size_allowed, ACCESS_HELPER, meta);
0d004c02
LB
5076 default: /* scalar_value or invalid ptr */
5077 /* Allow zero-byte read from NULL, regardless of pointer type */
5078 if (zero_size_allowed && access_size == 0 &&
5079 register_is_null(reg))
5080 return 0;
5081
c25b2ae1
HL
5082 verbose(env, "R%d type=%s ", regno,
5083 reg_type_str(env, reg->type));
5084 verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
0d004c02 5085 return -EACCES;
06c1c049
GB
5086 }
5087}
5088
d583691c
KKD
5089static int check_mem_size_reg(struct bpf_verifier_env *env,
5090 struct bpf_reg_state *reg, u32 regno,
5091 bool zero_size_allowed,
5092 struct bpf_call_arg_meta *meta)
5093{
5094 int err;
5095
5096 /* This is used to refine r0 return value bounds for helpers
5097 * that enforce this value as an upper bound on return values.
5098 * See do_refine_retval_range() for helpers that can refine
5099 * the return value. C type of helper is u32 so we pull register
5100 * bound from umax_value however, if negative verifier errors
5101 * out. Only upper bounds can be learned because retval is an
5102 * int type and negative retvals are allowed.
5103 */
be77354a 5104 meta->msize_max_value = reg->umax_value;
d583691c
KKD
5105
5106 /* The register is SCALAR_VALUE; the access check
5107 * happens using its boundaries.
5108 */
5109 if (!tnum_is_const(reg->var_off))
5110 /* For unprivileged variable accesses, disable raw
5111 * mode so that the program is required to
5112 * initialize all the memory that the helper could
5113 * just partially fill up.
5114 */
5115 meta = NULL;
5116
5117 if (reg->smin_value < 0) {
5118 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
5119 regno);
5120 return -EACCES;
5121 }
5122
5123 if (reg->umin_value == 0) {
5124 err = check_helper_mem_access(env, regno - 1, 0,
5125 zero_size_allowed,
5126 meta);
5127 if (err)
5128 return err;
5129 }
5130
5131 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
5132 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
5133 regno);
5134 return -EACCES;
5135 }
5136 err = check_helper_mem_access(env, regno - 1,
5137 reg->umax_value,
5138 zero_size_allowed, meta);
5139 if (!err)
5140 err = mark_chain_precision(env, regno);
5141 return err;
5142}
5143
e5069b9c
DB
5144int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
5145 u32 regno, u32 mem_size)
5146{
be77354a
KKD
5147 bool may_be_null = type_may_be_null(reg->type);
5148 struct bpf_reg_state saved_reg;
5149 struct bpf_call_arg_meta meta;
5150 int err;
5151
e5069b9c
DB
5152 if (register_is_null(reg))
5153 return 0;
5154
be77354a
KKD
5155 memset(&meta, 0, sizeof(meta));
5156 /* Assuming that the register contains a value check if the memory
5157 * access is safe. Temporarily save and restore the register's state as
5158 * the conversion shouldn't be visible to a caller.
5159 */
5160 if (may_be_null) {
5161 saved_reg = *reg;
e5069b9c 5162 mark_ptr_not_null_reg(reg);
e5069b9c
DB
5163 }
5164
be77354a
KKD
5165 err = check_helper_mem_access(env, regno, mem_size, true, &meta);
5166 /* Check access for BPF_WRITE */
5167 meta.raw_mode = true;
5168 err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
5169
5170 if (may_be_null)
5171 *reg = saved_reg;
5172
5173 return err;
e5069b9c
DB
5174}
5175
d583691c
KKD
5176int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
5177 u32 regno)
5178{
5179 struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
5180 bool may_be_null = type_may_be_null(mem_reg->type);
5181 struct bpf_reg_state saved_reg;
be77354a 5182 struct bpf_call_arg_meta meta;
d583691c
KKD
5183 int err;
5184
5185 WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
5186
be77354a
KKD
5187 memset(&meta, 0, sizeof(meta));
5188
d583691c
KKD
5189 if (may_be_null) {
5190 saved_reg = *mem_reg;
5191 mark_ptr_not_null_reg(mem_reg);
5192 }
5193
be77354a
KKD
5194 err = check_mem_size_reg(env, reg, regno, true, &meta);
5195 /* Check access for BPF_WRITE */
5196 meta.raw_mode = true;
5197 err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
d583691c
KKD
5198
5199 if (may_be_null)
5200 *mem_reg = saved_reg;
5201 return err;
5202}
5203
d83525ca
AS
5204/* Implementation details:
5205 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
5206 * Two bpf_map_lookups (even with the same key) will have different reg->id.
5207 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
5208 * value_or_null->value transition, since the verifier only cares about
5209 * the range of access to valid map value pointer and doesn't care about actual
5210 * address of the map element.
5211 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
5212 * reg->id > 0 after value_or_null->value transition. By doing so
5213 * two bpf_map_lookups will be considered two different pointers that
5214 * point to different bpf_spin_locks.
5215 * The verifier allows taking only one bpf_spin_lock at a time to avoid
5216 * dead-locks.
5217 * Since only one bpf_spin_lock is allowed the checks are simpler than
5218 * reg_is_refcounted() logic. The verifier needs to remember only
5219 * one spin_lock instead of array of acquired_refs.
5220 * cur_state->active_spin_lock remembers which map value element got locked
5221 * and clears it after bpf_spin_unlock.
5222 */
5223static int process_spin_lock(struct bpf_verifier_env *env, int regno,
5224 bool is_lock)
5225{
5226 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5227 struct bpf_verifier_state *cur = env->cur_state;
5228 bool is_const = tnum_is_const(reg->var_off);
5229 struct bpf_map *map = reg->map_ptr;
5230 u64 val = reg->var_off.value;
5231
d83525ca
AS
5232 if (!is_const) {
5233 verbose(env,
5234 "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
5235 regno);
5236 return -EINVAL;
5237 }
5238 if (!map->btf) {
5239 verbose(env,
5240 "map '%s' has to have BTF in order to use bpf_spin_lock\n",
5241 map->name);
5242 return -EINVAL;
5243 }
5244 if (!map_value_has_spin_lock(map)) {
5245 if (map->spin_lock_off == -E2BIG)
5246 verbose(env,
5247 "map '%s' has more than one 'struct bpf_spin_lock'\n",
5248 map->name);
5249 else if (map->spin_lock_off == -ENOENT)
5250 verbose(env,
5251 "map '%s' doesn't have 'struct bpf_spin_lock'\n",
5252 map->name);
5253 else
5254 verbose(env,
5255 "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
5256 map->name);
5257 return -EINVAL;
5258 }
5259 if (map->spin_lock_off != val + reg->off) {
5260 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
5261 val + reg->off);
5262 return -EINVAL;
5263 }
5264 if (is_lock) {
5265 if (cur->active_spin_lock) {
5266 verbose(env,
5267 "Locking two bpf_spin_locks are not allowed\n");
5268 return -EINVAL;
5269 }
5270 cur->active_spin_lock = reg->id;
5271 } else {
5272 if (!cur->active_spin_lock) {
5273 verbose(env, "bpf_spin_unlock without taking a lock\n");
5274 return -EINVAL;
5275 }
5276 if (cur->active_spin_lock != reg->id) {
5277 verbose(env, "bpf_spin_unlock of different lock\n");
5278 return -EINVAL;
5279 }
5280 cur->active_spin_lock = 0;
5281 }
5282 return 0;
5283}
5284
b00628b1
AS
5285static int process_timer_func(struct bpf_verifier_env *env, int regno,
5286 struct bpf_call_arg_meta *meta)
5287{
5288 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5289 bool is_const = tnum_is_const(reg->var_off);
5290 struct bpf_map *map = reg->map_ptr;
5291 u64 val = reg->var_off.value;
5292
5293 if (!is_const) {
5294 verbose(env,
5295 "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
5296 regno);
5297 return -EINVAL;
5298 }
5299 if (!map->btf) {
5300 verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
5301 map->name);
5302 return -EINVAL;
5303 }
68134668
AS
5304 if (!map_value_has_timer(map)) {
5305 if (map->timer_off == -E2BIG)
5306 verbose(env,
5307 "map '%s' has more than one 'struct bpf_timer'\n",
5308 map->name);
5309 else if (map->timer_off == -ENOENT)
5310 verbose(env,
5311 "map '%s' doesn't have 'struct bpf_timer'\n",
5312 map->name);
5313 else
5314 verbose(env,
5315 "map '%s' is not a struct type or bpf_timer is mangled\n",
5316 map->name);
5317 return -EINVAL;
5318 }
5319 if (map->timer_off != val + reg->off) {
5320 verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
5321 val + reg->off, map->timer_off);
b00628b1
AS
5322 return -EINVAL;
5323 }
5324 if (meta->map_ptr) {
5325 verbose(env, "verifier bug. Two map pointers in a timer helper\n");
5326 return -EFAULT;
5327 }
3e8ce298 5328 meta->map_uid = reg->map_uid;
b00628b1
AS
5329 meta->map_ptr = map;
5330 return 0;
5331}
5332
c0a5a21c
KKD
5333static int process_kptr_func(struct bpf_verifier_env *env, int regno,
5334 struct bpf_call_arg_meta *meta)
5335{
5336 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5337 struct bpf_map_value_off_desc *off_desc;
5338 struct bpf_map *map_ptr = reg->map_ptr;
5339 u32 kptr_off;
5340 int ret;
5341
5342 if (!tnum_is_const(reg->var_off)) {
5343 verbose(env,
5344 "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
5345 regno);
5346 return -EINVAL;
5347 }
5348 if (!map_ptr->btf) {
5349 verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
5350 map_ptr->name);
5351 return -EINVAL;
5352 }
5353 if (!map_value_has_kptrs(map_ptr)) {
5354 ret = PTR_ERR(map_ptr->kptr_off_tab);
5355 if (ret == -E2BIG)
5356 verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name,
5357 BPF_MAP_VALUE_OFF_MAX);
5358 else if (ret == -EEXIST)
5359 verbose(env, "map '%s' has repeating kptr BTF tags\n", map_ptr->name);
5360 else
5361 verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
5362 return -EINVAL;
5363 }
5364
5365 meta->map_ptr = map_ptr;
5366 kptr_off = reg->off + reg->var_off.value;
5367 off_desc = bpf_map_kptr_off_contains(map_ptr, kptr_off);
5368 if (!off_desc) {
5369 verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
5370 return -EACCES;
5371 }
5372 if (off_desc->type != BPF_KPTR_REF) {
5373 verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
5374 return -EACCES;
5375 }
5376 meta->kptr_off_desc = off_desc;
5377 return 0;
5378}
5379
90133415
DB
5380static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
5381{
48946bd6
HL
5382 return base_type(type) == ARG_PTR_TO_MEM ||
5383 base_type(type) == ARG_PTR_TO_UNINIT_MEM;
90133415
DB
5384}
5385
5386static bool arg_type_is_mem_size(enum bpf_arg_type type)
5387{
5388 return type == ARG_CONST_SIZE ||
5389 type == ARG_CONST_SIZE_OR_ZERO;
5390}
5391
457f4436
AN
5392static bool arg_type_is_alloc_size(enum bpf_arg_type type)
5393{
5394 return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
5395}
5396
57c3bb72
AI
5397static bool arg_type_is_int_ptr(enum bpf_arg_type type)
5398{
5399 return type == ARG_PTR_TO_INT ||
5400 type == ARG_PTR_TO_LONG;
5401}
5402
8f14852e
KKD
5403static bool arg_type_is_release(enum bpf_arg_type type)
5404{
5405 return type & OBJ_RELEASE;
5406}
5407
57c3bb72
AI
5408static int int_ptr_type_to_size(enum bpf_arg_type type)
5409{
5410 if (type == ARG_PTR_TO_INT)
5411 return sizeof(u32);
5412 else if (type == ARG_PTR_TO_LONG)
5413 return sizeof(u64);
5414
5415 return -EINVAL;
5416}
5417
912f442c
LB
5418static int resolve_map_arg_type(struct bpf_verifier_env *env,
5419 const struct bpf_call_arg_meta *meta,
5420 enum bpf_arg_type *arg_type)
5421{
5422 if (!meta->map_ptr) {
5423 /* kernel subsystem misconfigured verifier */
5424 verbose(env, "invalid map_ptr to access map->type\n");
5425 return -EACCES;
5426 }
5427
5428 switch (meta->map_ptr->map_type) {
5429 case BPF_MAP_TYPE_SOCKMAP:
5430 case BPF_MAP_TYPE_SOCKHASH:
5431 if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
6550f2dd 5432 *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
912f442c
LB
5433 } else {
5434 verbose(env, "invalid arg_type for sockmap/sockhash\n");
5435 return -EINVAL;
5436 }
5437 break;
9330986c
JK
5438 case BPF_MAP_TYPE_BLOOM_FILTER:
5439 if (meta->func_id == BPF_FUNC_map_peek_elem)
5440 *arg_type = ARG_PTR_TO_MAP_VALUE;
5441 break;
912f442c
LB
5442 default:
5443 break;
5444 }
5445 return 0;
5446}
5447
f79e7ea5
LB
5448struct bpf_reg_types {
5449 const enum bpf_reg_type types[10];
1df8f55a 5450 u32 *btf_id;
f79e7ea5
LB
5451};
5452
5453static const struct bpf_reg_types map_key_value_types = {
5454 .types = {
5455 PTR_TO_STACK,
5456 PTR_TO_PACKET,
5457 PTR_TO_PACKET_META,
69c087ba 5458 PTR_TO_MAP_KEY,
f79e7ea5
LB
5459 PTR_TO_MAP_VALUE,
5460 },
5461};
5462
5463static const struct bpf_reg_types sock_types = {
5464 .types = {
5465 PTR_TO_SOCK_COMMON,
5466 PTR_TO_SOCKET,
5467 PTR_TO_TCP_SOCK,
5468 PTR_TO_XDP_SOCK,
5469 },
5470};
5471
49a2a4d4 5472#ifdef CONFIG_NET
1df8f55a
MKL
5473static const struct bpf_reg_types btf_id_sock_common_types = {
5474 .types = {
5475 PTR_TO_SOCK_COMMON,
5476 PTR_TO_SOCKET,
5477 PTR_TO_TCP_SOCK,
5478 PTR_TO_XDP_SOCK,
5479 PTR_TO_BTF_ID,
5480 },
5481 .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5482};
49a2a4d4 5483#endif
1df8f55a 5484
f79e7ea5
LB
5485static const struct bpf_reg_types mem_types = {
5486 .types = {
5487 PTR_TO_STACK,
5488 PTR_TO_PACKET,
5489 PTR_TO_PACKET_META,
69c087ba 5490 PTR_TO_MAP_KEY,
f79e7ea5
LB
5491 PTR_TO_MAP_VALUE,
5492 PTR_TO_MEM,
a672b2e3 5493 PTR_TO_MEM | MEM_ALLOC,
20b2aff4 5494 PTR_TO_BUF,
f79e7ea5
LB
5495 },
5496};
5497
5498static const struct bpf_reg_types int_ptr_types = {
5499 .types = {
5500 PTR_TO_STACK,
5501 PTR_TO_PACKET,
5502 PTR_TO_PACKET_META,
69c087ba 5503 PTR_TO_MAP_KEY,
f79e7ea5
LB
5504 PTR_TO_MAP_VALUE,
5505 },
5506};
5507
5508static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
5509static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
5510static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
a672b2e3 5511static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
f79e7ea5
LB
5512static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
5513static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
5514static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
5844101a 5515static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_BTF_ID | MEM_PERCPU } };
69c087ba
YS
5516static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
5517static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
fff13c4b 5518static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
b00628b1 5519static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
c0a5a21c 5520static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
f79e7ea5 5521
0789e13b 5522static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
f79e7ea5
LB
5523 [ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
5524 [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types,
5525 [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types,
f79e7ea5
LB
5526 [ARG_CONST_SIZE] = &scalar_types,
5527 [ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
5528 [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
5529 [ARG_CONST_MAP_PTR] = &const_map_ptr_types,
5530 [ARG_PTR_TO_CTX] = &context_types,
f79e7ea5 5531 [ARG_PTR_TO_SOCK_COMMON] = &sock_types,
49a2a4d4 5532#ifdef CONFIG_NET
1df8f55a 5533 [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
49a2a4d4 5534#endif
f79e7ea5 5535 [ARG_PTR_TO_SOCKET] = &fullsock_types,
f79e7ea5
LB
5536 [ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
5537 [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
5538 [ARG_PTR_TO_MEM] = &mem_types,
f79e7ea5
LB
5539 [ARG_PTR_TO_UNINIT_MEM] = &mem_types,
5540 [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
f79e7ea5
LB
5541 [ARG_PTR_TO_INT] = &int_ptr_types,
5542 [ARG_PTR_TO_LONG] = &int_ptr_types,
eaa6bcb7 5543 [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
69c087ba 5544 [ARG_PTR_TO_FUNC] = &func_ptr_types,
48946bd6 5545 [ARG_PTR_TO_STACK] = &stack_ptr_types,
fff13c4b 5546 [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
b00628b1 5547 [ARG_PTR_TO_TIMER] = &timer_types,
c0a5a21c 5548 [ARG_PTR_TO_KPTR] = &kptr_types,
f79e7ea5
LB
5549};
5550
5551static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
a968d5e2 5552 enum bpf_arg_type arg_type,
c0a5a21c
KKD
5553 const u32 *arg_btf_id,
5554 struct bpf_call_arg_meta *meta)
f79e7ea5
LB
5555{
5556 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5557 enum bpf_reg_type expected, type = reg->type;
a968d5e2 5558 const struct bpf_reg_types *compatible;
f79e7ea5
LB
5559 int i, j;
5560
48946bd6 5561 compatible = compatible_reg_types[base_type(arg_type)];
a968d5e2
MKL
5562 if (!compatible) {
5563 verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
5564 return -EFAULT;
5565 }
5566
216e3cd2
HL
5567 /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
5568 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
5569 *
5570 * Same for MAYBE_NULL:
5571 *
5572 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
5573 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
5574 *
5575 * Therefore we fold these flags depending on the arg_type before comparison.
5576 */
5577 if (arg_type & MEM_RDONLY)
5578 type &= ~MEM_RDONLY;
5579 if (arg_type & PTR_MAYBE_NULL)
5580 type &= ~PTR_MAYBE_NULL;
5581
f79e7ea5
LB
5582 for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
5583 expected = compatible->types[i];
5584 if (expected == NOT_INIT)
5585 break;
5586
5587 if (type == expected)
a968d5e2 5588 goto found;
f79e7ea5
LB
5589 }
5590
216e3cd2 5591 verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
f79e7ea5 5592 for (j = 0; j + 1 < i; j++)
c25b2ae1
HL
5593 verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
5594 verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
f79e7ea5 5595 return -EACCES;
a968d5e2
MKL
5596
5597found:
216e3cd2 5598 if (reg->type == PTR_TO_BTF_ID) {
2ab3b380
KKD
5599 /* For bpf_sk_release, it needs to match against first member
5600 * 'struct sock_common', hence make an exception for it. This
5601 * allows bpf_sk_release to work for multiple socket types.
5602 */
5603 bool strict_type_match = arg_type_is_release(arg_type) &&
5604 meta->func_id != BPF_FUNC_sk_release;
5605
1df8f55a
MKL
5606 if (!arg_btf_id) {
5607 if (!compatible->btf_id) {
5608 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
5609 return -EFAULT;
5610 }
5611 arg_btf_id = compatible->btf_id;
5612 }
5613
c0a5a21c
KKD
5614 if (meta->func_id == BPF_FUNC_kptr_xchg) {
5615 if (map_kptr_match_type(env, meta->kptr_off_desc, reg, regno))
5616 return -EACCES;
5617 } else if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
2ab3b380
KKD
5618 btf_vmlinux, *arg_btf_id,
5619 strict_type_match)) {
a968d5e2 5620 verbose(env, "R%d is of type %s but %s is expected\n",
22dc4a0f
AN
5621 regno, kernel_type_name(reg->btf, reg->btf_id),
5622 kernel_type_name(btf_vmlinux, *arg_btf_id));
a968d5e2
MKL
5623 return -EACCES;
5624 }
a968d5e2
MKL
5625 }
5626
5627 return 0;
f79e7ea5
LB
5628}
5629
25b35dd2
KKD
5630int check_func_arg_reg_off(struct bpf_verifier_env *env,
5631 const struct bpf_reg_state *reg, int regno,
8f14852e 5632 enum bpf_arg_type arg_type)
25b35dd2
KKD
5633{
5634 enum bpf_reg_type type = reg->type;
8f14852e 5635 bool fixed_off_ok = false;
25b35dd2
KKD
5636
5637 switch ((u32)type) {
5638 case SCALAR_VALUE:
5639 /* Pointer types where reg offset is explicitly allowed: */
5640 case PTR_TO_PACKET:
5641 case PTR_TO_PACKET_META:
5642 case PTR_TO_MAP_KEY:
5643 case PTR_TO_MAP_VALUE:
5644 case PTR_TO_MEM:
5645 case PTR_TO_MEM | MEM_RDONLY:
5646 case PTR_TO_MEM | MEM_ALLOC:
5647 case PTR_TO_BUF:
5648 case PTR_TO_BUF | MEM_RDONLY:
5649 case PTR_TO_STACK:
5650 /* Some of the argument types nevertheless require a
5651 * zero register offset.
5652 */
8f14852e 5653 if (base_type(arg_type) != ARG_PTR_TO_ALLOC_MEM)
25b35dd2
KKD
5654 return 0;
5655 break;
5656 /* All the rest must be rejected, except PTR_TO_BTF_ID which allows
5657 * fixed offset.
5658 */
5659 case PTR_TO_BTF_ID:
24d5bb80 5660 /* When referenced PTR_TO_BTF_ID is passed to release function,
8f14852e
KKD
5661 * it's fixed offset must be 0. In the other cases, fixed offset
5662 * can be non-zero.
24d5bb80 5663 */
8f14852e 5664 if (arg_type_is_release(arg_type) && reg->off) {
24d5bb80
KKD
5665 verbose(env, "R%d must have zero offset when passed to release func\n",
5666 regno);
5667 return -EINVAL;
5668 }
8f14852e
KKD
5669 /* For arg is release pointer, fixed_off_ok must be false, but
5670 * we already checked and rejected reg->off != 0 above, so set
5671 * to true to allow fixed offset for all other cases.
24d5bb80 5672 */
25b35dd2
KKD
5673 fixed_off_ok = true;
5674 break;
5675 default:
5676 break;
5677 }
5678 return __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
5679}
5680
af7ec138
YS
5681static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
5682 struct bpf_call_arg_meta *meta,
5683 const struct bpf_func_proto *fn)
17a52670 5684{
af7ec138 5685 u32 regno = BPF_REG_1 + arg;
638f5b90 5686 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
af7ec138 5687 enum bpf_arg_type arg_type = fn->arg_type[arg];
f79e7ea5 5688 enum bpf_reg_type type = reg->type;
17a52670
AS
5689 int err = 0;
5690
80f1d68c 5691 if (arg_type == ARG_DONTCARE)
17a52670
AS
5692 return 0;
5693
dc503a8a
EC
5694 err = check_reg_arg(env, regno, SRC_OP);
5695 if (err)
5696 return err;
17a52670 5697
1be7f75d
AS
5698 if (arg_type == ARG_ANYTHING) {
5699 if (is_pointer_value(env, regno)) {
61bd5218
JK
5700 verbose(env, "R%d leaks addr into helper function\n",
5701 regno);
1be7f75d
AS
5702 return -EACCES;
5703 }
80f1d68c 5704 return 0;
1be7f75d 5705 }
80f1d68c 5706
de8f3a83 5707 if (type_is_pkt_pointer(type) &&
3a0af8fd 5708 !may_access_direct_pkt_data(env, meta, BPF_READ)) {
61bd5218 5709 verbose(env, "helper access to the packet is not allowed\n");
6841de8b
AS
5710 return -EACCES;
5711 }
5712
48946bd6
HL
5713 if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
5714 base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
912f442c
LB
5715 err = resolve_map_arg_type(env, meta, &arg_type);
5716 if (err)
5717 return err;
5718 }
5719
48946bd6 5720 if (register_is_null(reg) && type_may_be_null(arg_type))
fd1b0d60
LB
5721 /* A NULL register has a SCALAR_VALUE type, so skip
5722 * type checking.
5723 */
5724 goto skip_type_check;
5725
c0a5a21c 5726 err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta);
f79e7ea5
LB
5727 if (err)
5728 return err;
5729
8f14852e 5730 err = check_func_arg_reg_off(env, reg, regno, arg_type);
25b35dd2
KKD
5731 if (err)
5732 return err;
d7b9454a 5733
fd1b0d60 5734skip_type_check:
8f14852e
KKD
5735 if (arg_type_is_release(arg_type)) {
5736 if (!reg->ref_obj_id && !register_is_null(reg)) {
5737 verbose(env, "R%d must be referenced when passed to release function\n",
5738 regno);
5739 return -EINVAL;
5740 }
5741 if (meta->release_regno) {
5742 verbose(env, "verifier internal error: more than one release argument\n");
5743 return -EFAULT;
5744 }
5745 meta->release_regno = regno;
5746 }
5747
02f7c958 5748 if (reg->ref_obj_id) {
457f4436
AN
5749 if (meta->ref_obj_id) {
5750 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
5751 regno, reg->ref_obj_id,
5752 meta->ref_obj_id);
5753 return -EFAULT;
5754 }
5755 meta->ref_obj_id = reg->ref_obj_id;
17a52670
AS
5756 }
5757
17a52670
AS
5758 if (arg_type == ARG_CONST_MAP_PTR) {
5759 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
3e8ce298
AS
5760 if (meta->map_ptr) {
5761 /* Use map_uid (which is unique id of inner map) to reject:
5762 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
5763 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
5764 * if (inner_map1 && inner_map2) {
5765 * timer = bpf_map_lookup_elem(inner_map1);
5766 * if (timer)
5767 * // mismatch would have been allowed
5768 * bpf_timer_init(timer, inner_map2);
5769 * }
5770 *
5771 * Comparing map_ptr is enough to distinguish normal and outer maps.
5772 */
5773 if (meta->map_ptr != reg->map_ptr ||
5774 meta->map_uid != reg->map_uid) {
5775 verbose(env,
5776 "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
5777 meta->map_uid, reg->map_uid);
5778 return -EINVAL;
5779 }
b00628b1 5780 }
33ff9823 5781 meta->map_ptr = reg->map_ptr;
3e8ce298 5782 meta->map_uid = reg->map_uid;
17a52670
AS
5783 } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
5784 /* bpf_map_xxx(..., map_ptr, ..., key) call:
5785 * check that [key, key + map->key_size) are within
5786 * stack limits and initialized
5787 */
33ff9823 5788 if (!meta->map_ptr) {
17a52670
AS
5789 /* in function declaration map_ptr must come before
5790 * map_key, so that it's verified and known before
5791 * we have to check map_key here. Otherwise it means
5792 * that kernel subsystem misconfigured verifier
5793 */
61bd5218 5794 verbose(env, "invalid map_ptr to access map->key\n");
17a52670
AS
5795 return -EACCES;
5796 }
d71962f3
PC
5797 err = check_helper_mem_access(env, regno,
5798 meta->map_ptr->key_size, false,
5799 NULL);
48946bd6
HL
5800 } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
5801 base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
5802 if (type_may_be_null(arg_type) && register_is_null(reg))
5803 return 0;
5804
17a52670
AS
5805 /* bpf_map_xxx(..., map_ptr, ..., value) call:
5806 * check [value, value + map->value_size) validity
5807 */
33ff9823 5808 if (!meta->map_ptr) {
17a52670 5809 /* kernel subsystem misconfigured verifier */
61bd5218 5810 verbose(env, "invalid map_ptr to access map->value\n");
17a52670
AS
5811 return -EACCES;
5812 }
2ea864c5 5813 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
d71962f3
PC
5814 err = check_helper_mem_access(env, regno,
5815 meta->map_ptr->value_size, false,
2ea864c5 5816 meta);
eaa6bcb7
HL
5817 } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
5818 if (!reg->btf_id) {
5819 verbose(env, "Helper has invalid btf_id in R%d\n", regno);
5820 return -EACCES;
5821 }
22dc4a0f 5822 meta->ret_btf = reg->btf;
eaa6bcb7 5823 meta->ret_btf_id = reg->btf_id;
c18f0b6a
LB
5824 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
5825 if (meta->func_id == BPF_FUNC_spin_lock) {
5826 if (process_spin_lock(env, regno, true))
5827 return -EACCES;
5828 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
5829 if (process_spin_lock(env, regno, false))
5830 return -EACCES;
5831 } else {
5832 verbose(env, "verifier internal error\n");
5833 return -EFAULT;
5834 }
b00628b1
AS
5835 } else if (arg_type == ARG_PTR_TO_TIMER) {
5836 if (process_timer_func(env, regno, meta))
5837 return -EACCES;
69c087ba
YS
5838 } else if (arg_type == ARG_PTR_TO_FUNC) {
5839 meta->subprogno = reg->subprogno;
a2bbe7cc
LB
5840 } else if (arg_type_is_mem_ptr(arg_type)) {
5841 /* The access to this pointer is only checked when we hit the
5842 * next is_mem_size argument below.
5843 */
5844 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
90133415 5845 } else if (arg_type_is_mem_size(arg_type)) {
39f19ebb 5846 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
17a52670 5847
d583691c 5848 err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
457f4436
AN
5849 } else if (arg_type_is_alloc_size(arg_type)) {
5850 if (!tnum_is_const(reg->var_off)) {
28a8add6 5851 verbose(env, "R%d is not a known constant'\n",
457f4436
AN
5852 regno);
5853 return -EACCES;
5854 }
5855 meta->mem_size = reg->var_off.value;
57c3bb72
AI
5856 } else if (arg_type_is_int_ptr(arg_type)) {
5857 int size = int_ptr_type_to_size(arg_type);
5858
5859 err = check_helper_mem_access(env, regno, size, false, meta);
5860 if (err)
5861 return err;
5862 err = check_ptr_alignment(env, reg, 0, size, true);
fff13c4b
FR
5863 } else if (arg_type == ARG_PTR_TO_CONST_STR) {
5864 struct bpf_map *map = reg->map_ptr;
5865 int map_off;
5866 u64 map_addr;
5867 char *str_ptr;
5868
a8fad73e 5869 if (!bpf_map_is_rdonly(map)) {
fff13c4b
FR
5870 verbose(env, "R%d does not point to a readonly map'\n", regno);
5871 return -EACCES;
5872 }
5873
5874 if (!tnum_is_const(reg->var_off)) {
5875 verbose(env, "R%d is not a constant address'\n", regno);
5876 return -EACCES;
5877 }
5878
5879 if (!map->ops->map_direct_value_addr) {
5880 verbose(env, "no direct value access support for this map type\n");
5881 return -EACCES;
5882 }
5883
5884 err = check_map_access(env, regno, reg->off,
61df10c7
KKD
5885 map->value_size - reg->off, false,
5886 ACCESS_HELPER);
fff13c4b
FR
5887 if (err)
5888 return err;
5889
5890 map_off = reg->off + reg->var_off.value;
5891 err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
5892 if (err) {
5893 verbose(env, "direct value access on string failed\n");
5894 return err;
5895 }
5896
5897 str_ptr = (char *)(long)(map_addr);
5898 if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
5899 verbose(env, "string is not zero-terminated\n");
5900 return -EINVAL;
5901 }
c0a5a21c
KKD
5902 } else if (arg_type == ARG_PTR_TO_KPTR) {
5903 if (process_kptr_func(env, regno, meta))
5904 return -EACCES;
17a52670
AS
5905 }
5906
5907 return err;
5908}
5909
0126240f
LB
5910static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
5911{
5912 enum bpf_attach_type eatype = env->prog->expected_attach_type;
7e40781c 5913 enum bpf_prog_type type = resolve_prog_type(env->prog);
0126240f
LB
5914
5915 if (func_id != BPF_FUNC_map_update_elem)
5916 return false;
5917
5918 /* It's not possible to get access to a locked struct sock in these
5919 * contexts, so updating is safe.
5920 */
5921 switch (type) {
5922 case BPF_PROG_TYPE_TRACING:
5923 if (eatype == BPF_TRACE_ITER)
5924 return true;
5925 break;
5926 case BPF_PROG_TYPE_SOCKET_FILTER:
5927 case BPF_PROG_TYPE_SCHED_CLS:
5928 case BPF_PROG_TYPE_SCHED_ACT:
5929 case BPF_PROG_TYPE_XDP:
5930 case BPF_PROG_TYPE_SK_REUSEPORT:
5931 case BPF_PROG_TYPE_FLOW_DISSECTOR:
5932 case BPF_PROG_TYPE_SK_LOOKUP:
5933 return true;
5934 default:
5935 break;
5936 }
5937
5938 verbose(env, "cannot update sockmap in this context\n");
5939 return false;
5940}
5941
e411901c
MF
5942static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
5943{
5944 return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
5945}
5946
61bd5218
JK
5947static int check_map_func_compatibility(struct bpf_verifier_env *env,
5948 struct bpf_map *map, int func_id)
35578d79 5949{
35578d79
KX
5950 if (!map)
5951 return 0;
5952
6aff67c8
AS
5953 /* We need a two way check, first is from map perspective ... */
5954 switch (map->map_type) {
5955 case BPF_MAP_TYPE_PROG_ARRAY:
5956 if (func_id != BPF_FUNC_tail_call)
5957 goto error;
5958 break;
5959 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5960 if (func_id != BPF_FUNC_perf_event_read &&
908432ca 5961 func_id != BPF_FUNC_perf_event_output &&
a7658e1a 5962 func_id != BPF_FUNC_skb_output &&
d831ee84
EC
5963 func_id != BPF_FUNC_perf_event_read_value &&
5964 func_id != BPF_FUNC_xdp_output)
6aff67c8
AS
5965 goto error;
5966 break;
457f4436
AN
5967 case BPF_MAP_TYPE_RINGBUF:
5968 if (func_id != BPF_FUNC_ringbuf_output &&
5969 func_id != BPF_FUNC_ringbuf_reserve &&
457f4436
AN
5970 func_id != BPF_FUNC_ringbuf_query)
5971 goto error;
5972 break;
6aff67c8
AS
5973 case BPF_MAP_TYPE_STACK_TRACE:
5974 if (func_id != BPF_FUNC_get_stackid)
5975 goto error;
5976 break;
4ed8ec52 5977 case BPF_MAP_TYPE_CGROUP_ARRAY:
60747ef4 5978 if (func_id != BPF_FUNC_skb_under_cgroup &&
60d20f91 5979 func_id != BPF_FUNC_current_task_under_cgroup)
4a482f34
MKL
5980 goto error;
5981 break;
cd339431 5982 case BPF_MAP_TYPE_CGROUP_STORAGE:
b741f163 5983 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
cd339431
RG
5984 if (func_id != BPF_FUNC_get_local_storage)
5985 goto error;
5986 break;
546ac1ff 5987 case BPF_MAP_TYPE_DEVMAP:
6f9d451a 5988 case BPF_MAP_TYPE_DEVMAP_HASH:
0cdbb4b0
THJ
5989 if (func_id != BPF_FUNC_redirect_map &&
5990 func_id != BPF_FUNC_map_lookup_elem)
546ac1ff
JF
5991 goto error;
5992 break;
fbfc504a
BT
5993 /* Restrict bpf side of cpumap and xskmap, open when use-cases
5994 * appear.
5995 */
6710e112
JDB
5996 case BPF_MAP_TYPE_CPUMAP:
5997 if (func_id != BPF_FUNC_redirect_map)
5998 goto error;
5999 break;
fada7fdc
JL
6000 case BPF_MAP_TYPE_XSKMAP:
6001 if (func_id != BPF_FUNC_redirect_map &&
6002 func_id != BPF_FUNC_map_lookup_elem)
6003 goto error;
6004 break;
56f668df 6005 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
bcc6b1b7 6006 case BPF_MAP_TYPE_HASH_OF_MAPS:
56f668df
MKL
6007 if (func_id != BPF_FUNC_map_lookup_elem)
6008 goto error;
16a43625 6009 break;
174a79ff
JF
6010 case BPF_MAP_TYPE_SOCKMAP:
6011 if (func_id != BPF_FUNC_sk_redirect_map &&
6012 func_id != BPF_FUNC_sock_map_update &&
4f738adb 6013 func_id != BPF_FUNC_map_delete_elem &&
9fed9000 6014 func_id != BPF_FUNC_msg_redirect_map &&
64d85290 6015 func_id != BPF_FUNC_sk_select_reuseport &&
0126240f
LB
6016 func_id != BPF_FUNC_map_lookup_elem &&
6017 !may_update_sockmap(env, func_id))
174a79ff
JF
6018 goto error;
6019 break;
81110384
JF
6020 case BPF_MAP_TYPE_SOCKHASH:
6021 if (func_id != BPF_FUNC_sk_redirect_hash &&
6022 func_id != BPF_FUNC_sock_hash_update &&
6023 func_id != BPF_FUNC_map_delete_elem &&
9fed9000 6024 func_id != BPF_FUNC_msg_redirect_hash &&
64d85290 6025 func_id != BPF_FUNC_sk_select_reuseport &&
0126240f
LB
6026 func_id != BPF_FUNC_map_lookup_elem &&
6027 !may_update_sockmap(env, func_id))
81110384
JF
6028 goto error;
6029 break;
2dbb9b9e
MKL
6030 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
6031 if (func_id != BPF_FUNC_sk_select_reuseport)
6032 goto error;
6033 break;
f1a2e44a
MV
6034 case BPF_MAP_TYPE_QUEUE:
6035 case BPF_MAP_TYPE_STACK:
6036 if (func_id != BPF_FUNC_map_peek_elem &&
6037 func_id != BPF_FUNC_map_pop_elem &&
6038 func_id != BPF_FUNC_map_push_elem)
6039 goto error;
6040 break;
6ac99e8f
MKL
6041 case BPF_MAP_TYPE_SK_STORAGE:
6042 if (func_id != BPF_FUNC_sk_storage_get &&
6043 func_id != BPF_FUNC_sk_storage_delete)
6044 goto error;
6045 break;
8ea63684
KS
6046 case BPF_MAP_TYPE_INODE_STORAGE:
6047 if (func_id != BPF_FUNC_inode_storage_get &&
6048 func_id != BPF_FUNC_inode_storage_delete)
6049 goto error;
6050 break;
4cf1bc1f
KS
6051 case BPF_MAP_TYPE_TASK_STORAGE:
6052 if (func_id != BPF_FUNC_task_storage_get &&
6053 func_id != BPF_FUNC_task_storage_delete)
6054 goto error;
6055 break;
9330986c
JK
6056 case BPF_MAP_TYPE_BLOOM_FILTER:
6057 if (func_id != BPF_FUNC_map_peek_elem &&
6058 func_id != BPF_FUNC_map_push_elem)
6059 goto error;
6060 break;
6aff67c8
AS
6061 default:
6062 break;
6063 }
6064
6065 /* ... and second from the function itself. */
6066 switch (func_id) {
6067 case BPF_FUNC_tail_call:
6068 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
6069 goto error;
e411901c
MF
6070 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
6071 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
f4d7e40a
AS
6072 return -EINVAL;
6073 }
6aff67c8
AS
6074 break;
6075 case BPF_FUNC_perf_event_read:
6076 case BPF_FUNC_perf_event_output:
908432ca 6077 case BPF_FUNC_perf_event_read_value:
a7658e1a 6078 case BPF_FUNC_skb_output:
d831ee84 6079 case BPF_FUNC_xdp_output:
6aff67c8
AS
6080 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
6081 goto error;
6082 break;
5b029a32
DB
6083 case BPF_FUNC_ringbuf_output:
6084 case BPF_FUNC_ringbuf_reserve:
6085 case BPF_FUNC_ringbuf_query:
6086 if (map->map_type != BPF_MAP_TYPE_RINGBUF)
6087 goto error;
6088 break;
6aff67c8
AS
6089 case BPF_FUNC_get_stackid:
6090 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
6091 goto error;
6092 break;
60d20f91 6093 case BPF_FUNC_current_task_under_cgroup:
747ea55e 6094 case BPF_FUNC_skb_under_cgroup:
4a482f34
MKL
6095 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
6096 goto error;
6097 break;
97f91a7c 6098 case BPF_FUNC_redirect_map:
9c270af3 6099 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
6f9d451a 6100 map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
fbfc504a
BT
6101 map->map_type != BPF_MAP_TYPE_CPUMAP &&
6102 map->map_type != BPF_MAP_TYPE_XSKMAP)
97f91a7c
JF
6103 goto error;
6104 break;
174a79ff 6105 case BPF_FUNC_sk_redirect_map:
4f738adb 6106 case BPF_FUNC_msg_redirect_map:
81110384 6107 case BPF_FUNC_sock_map_update:
174a79ff
JF
6108 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
6109 goto error;
6110 break;
81110384
JF
6111 case BPF_FUNC_sk_redirect_hash:
6112 case BPF_FUNC_msg_redirect_hash:
6113 case BPF_FUNC_sock_hash_update:
6114 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
174a79ff
JF
6115 goto error;
6116 break;
cd339431 6117 case BPF_FUNC_get_local_storage:
b741f163
RG
6118 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
6119 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
cd339431
RG
6120 goto error;
6121 break;
2dbb9b9e 6122 case BPF_FUNC_sk_select_reuseport:
9fed9000
JS
6123 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
6124 map->map_type != BPF_MAP_TYPE_SOCKMAP &&
6125 map->map_type != BPF_MAP_TYPE_SOCKHASH)
2dbb9b9e
MKL
6126 goto error;
6127 break;
f1a2e44a 6128 case BPF_FUNC_map_pop_elem:
f1a2e44a
MV
6129 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
6130 map->map_type != BPF_MAP_TYPE_STACK)
6131 goto error;
6132 break;
9330986c
JK
6133 case BPF_FUNC_map_peek_elem:
6134 case BPF_FUNC_map_push_elem:
6135 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
6136 map->map_type != BPF_MAP_TYPE_STACK &&
6137 map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
6138 goto error;
6139 break;
6ac99e8f
MKL
6140 case BPF_FUNC_sk_storage_get:
6141 case BPF_FUNC_sk_storage_delete:
6142 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
6143 goto error;
6144 break;
8ea63684
KS
6145 case BPF_FUNC_inode_storage_get:
6146 case BPF_FUNC_inode_storage_delete:
6147 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
6148 goto error;
6149 break;
4cf1bc1f
KS
6150 case BPF_FUNC_task_storage_get:
6151 case BPF_FUNC_task_storage_delete:
6152 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
6153 goto error;
6154 break;
6aff67c8
AS
6155 default:
6156 break;
35578d79
KX
6157 }
6158
6159 return 0;
6aff67c8 6160error:
61bd5218 6161 verbose(env, "cannot pass map_type %d into func %s#%d\n",
ebb676da 6162 map->map_type, func_id_name(func_id), func_id);
6aff67c8 6163 return -EINVAL;
35578d79
KX
6164}
6165
90133415 6166static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
435faee1
DB
6167{
6168 int count = 0;
6169
39f19ebb 6170 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 6171 count++;
39f19ebb 6172 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 6173 count++;
39f19ebb 6174 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 6175 count++;
39f19ebb 6176 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 6177 count++;
39f19ebb 6178 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
435faee1
DB
6179 count++;
6180
90133415
DB
6181 /* We only support one arg being in raw mode at the moment,
6182 * which is sufficient for the helper functions we have
6183 * right now.
6184 */
6185 return count <= 1;
6186}
6187
6188static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
6189 enum bpf_arg_type arg_next)
6190{
6191 return (arg_type_is_mem_ptr(arg_curr) &&
6192 !arg_type_is_mem_size(arg_next)) ||
6193 (!arg_type_is_mem_ptr(arg_curr) &&
6194 arg_type_is_mem_size(arg_next));
6195}
6196
6197static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
6198{
6199 /* bpf_xxx(..., buf, len) call will access 'len'
6200 * bytes from memory 'buf'. Both arg types need
6201 * to be paired, so make sure there's no buggy
6202 * helper function specification.
6203 */
6204 if (arg_type_is_mem_size(fn->arg1_type) ||
6205 arg_type_is_mem_ptr(fn->arg5_type) ||
6206 check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
6207 check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
6208 check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
6209 check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
6210 return false;
6211
6212 return true;
6213}
6214
1b986589 6215static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
fd978bf7
JS
6216{
6217 int count = 0;
6218
1b986589 6219 if (arg_type_may_be_refcounted(fn->arg1_type))
fd978bf7 6220 count++;
1b986589 6221 if (arg_type_may_be_refcounted(fn->arg2_type))
fd978bf7 6222 count++;
1b986589 6223 if (arg_type_may_be_refcounted(fn->arg3_type))
fd978bf7 6224 count++;
1b986589 6225 if (arg_type_may_be_refcounted(fn->arg4_type))
fd978bf7 6226 count++;
1b986589 6227 if (arg_type_may_be_refcounted(fn->arg5_type))
fd978bf7
JS
6228 count++;
6229
1b986589
MKL
6230 /* A reference acquiring function cannot acquire
6231 * another refcounted ptr.
6232 */
64d85290 6233 if (may_be_acquire_function(func_id) && count)
1b986589
MKL
6234 return false;
6235
fd978bf7
JS
6236 /* We only support one arg being unreferenced at the moment,
6237 * which is sufficient for the helper functions we have right now.
6238 */
6239 return count <= 1;
6240}
6241
9436ef6e
LB
6242static bool check_btf_id_ok(const struct bpf_func_proto *fn)
6243{
6244 int i;
6245
1df8f55a 6246 for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
c0a5a21c 6247 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
9436ef6e
LB
6248 return false;
6249
c0a5a21c 6250 if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
1df8f55a
MKL
6251 return false;
6252 }
6253
9436ef6e
LB
6254 return true;
6255}
6256
8f14852e
KKD
6257static int check_func_proto(const struct bpf_func_proto *fn, int func_id,
6258 struct bpf_call_arg_meta *meta)
90133415
DB
6259{
6260 return check_raw_mode_ok(fn) &&
fd978bf7 6261 check_arg_pair_ok(fn) &&
9436ef6e 6262 check_btf_id_ok(fn) &&
1b986589 6263 check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
435faee1
DB
6264}
6265
de8f3a83
DB
6266/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
6267 * are now invalid, so turn them into unknown SCALAR_VALUE.
f1174f77 6268 */
f4d7e40a
AS
6269static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
6270 struct bpf_func_state *state)
969bf05e 6271{
58e2af8b 6272 struct bpf_reg_state *regs = state->regs, *reg;
969bf05e
AS
6273 int i;
6274
6275 for (i = 0; i < MAX_BPF_REG; i++)
de8f3a83 6276 if (reg_is_pkt_pointer_any(&regs[i]))
61bd5218 6277 mark_reg_unknown(env, regs, i);
969bf05e 6278
f3709f69
JS
6279 bpf_for_each_spilled_reg(i, state, reg) {
6280 if (!reg)
969bf05e 6281 continue;
de8f3a83 6282 if (reg_is_pkt_pointer_any(reg))
f54c7898 6283 __mark_reg_unknown(env, reg);
969bf05e
AS
6284 }
6285}
6286
f4d7e40a
AS
6287static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
6288{
6289 struct bpf_verifier_state *vstate = env->cur_state;
6290 int i;
6291
6292 for (i = 0; i <= vstate->curframe; i++)
6293 __clear_all_pkt_pointers(env, vstate->frame[i]);
6294}
6295
6d94e741
AS
6296enum {
6297 AT_PKT_END = -1,
6298 BEYOND_PKT_END = -2,
6299};
6300
6301static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
6302{
6303 struct bpf_func_state *state = vstate->frame[vstate->curframe];
6304 struct bpf_reg_state *reg = &state->regs[regn];
6305
6306 if (reg->type != PTR_TO_PACKET)
6307 /* PTR_TO_PACKET_META is not supported yet */
6308 return;
6309
6310 /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
6311 * How far beyond pkt_end it goes is unknown.
6312 * if (!range_open) it's the case of pkt >= pkt_end
6313 * if (range_open) it's the case of pkt > pkt_end
6314 * hence this pointer is at least 1 byte bigger than pkt_end
6315 */
6316 if (range_open)
6317 reg->range = BEYOND_PKT_END;
6318 else
6319 reg->range = AT_PKT_END;
6320}
6321
fd978bf7 6322static void release_reg_references(struct bpf_verifier_env *env,
1b986589
MKL
6323 struct bpf_func_state *state,
6324 int ref_obj_id)
fd978bf7
JS
6325{
6326 struct bpf_reg_state *regs = state->regs, *reg;
6327 int i;
6328
6329 for (i = 0; i < MAX_BPF_REG; i++)
1b986589 6330 if (regs[i].ref_obj_id == ref_obj_id)
fd978bf7
JS
6331 mark_reg_unknown(env, regs, i);
6332
6333 bpf_for_each_spilled_reg(i, state, reg) {
6334 if (!reg)
6335 continue;
1b986589 6336 if (reg->ref_obj_id == ref_obj_id)
f54c7898 6337 __mark_reg_unknown(env, reg);
fd978bf7
JS
6338 }
6339}
6340
6341/* The pointer with the specified id has released its reference to kernel
6342 * resources. Identify all copies of the same pointer and clear the reference.
6343 */
6344static int release_reference(struct bpf_verifier_env *env,
1b986589 6345 int ref_obj_id)
fd978bf7
JS
6346{
6347 struct bpf_verifier_state *vstate = env->cur_state;
1b986589 6348 int err;
fd978bf7
JS
6349 int i;
6350
1b986589
MKL
6351 err = release_reference_state(cur_func(env), ref_obj_id);
6352 if (err)
6353 return err;
6354
fd978bf7 6355 for (i = 0; i <= vstate->curframe; i++)
1b986589 6356 release_reg_references(env, vstate->frame[i], ref_obj_id);
fd978bf7 6357
1b986589 6358 return 0;
fd978bf7
JS
6359}
6360
51c39bb1
AS
6361static void clear_caller_saved_regs(struct bpf_verifier_env *env,
6362 struct bpf_reg_state *regs)
6363{
6364 int i;
6365
6366 /* after the call registers r0 - r5 were scratched */
6367 for (i = 0; i < CALLER_SAVED_REGS; i++) {
6368 mark_reg_not_init(env, regs, caller_saved[i]);
6369 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
6370 }
6371}
6372
14351375
YS
6373typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
6374 struct bpf_func_state *caller,
6375 struct bpf_func_state *callee,
6376 int insn_idx);
6377
6378static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6379 int *insn_idx, int subprog,
6380 set_callee_state_fn set_callee_state_cb)
f4d7e40a
AS
6381{
6382 struct bpf_verifier_state *state = env->cur_state;
51c39bb1 6383 struct bpf_func_info_aux *func_info_aux;
f4d7e40a 6384 struct bpf_func_state *caller, *callee;
14351375 6385 int err;
51c39bb1 6386 bool is_global = false;
f4d7e40a 6387
aada9ce6 6388 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
f4d7e40a 6389 verbose(env, "the call stack of %d frames is too deep\n",
aada9ce6 6390 state->curframe + 2);
f4d7e40a
AS
6391 return -E2BIG;
6392 }
6393
f4d7e40a
AS
6394 caller = state->frame[state->curframe];
6395 if (state->frame[state->curframe + 1]) {
6396 verbose(env, "verifier bug. Frame %d already allocated\n",
6397 state->curframe + 1);
6398 return -EFAULT;
6399 }
6400
51c39bb1
AS
6401 func_info_aux = env->prog->aux->func_info_aux;
6402 if (func_info_aux)
6403 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
34747c41 6404 err = btf_check_subprog_arg_match(env, subprog, caller->regs);
51c39bb1
AS
6405 if (err == -EFAULT)
6406 return err;
6407 if (is_global) {
6408 if (err) {
6409 verbose(env, "Caller passes invalid args into func#%d\n",
6410 subprog);
6411 return err;
6412 } else {
6413 if (env->log.level & BPF_LOG_LEVEL)
6414 verbose(env,
6415 "Func#%d is global and valid. Skipping.\n",
6416 subprog);
6417 clear_caller_saved_regs(env, caller->regs);
6418
45159b27 6419 /* All global functions return a 64-bit SCALAR_VALUE */
51c39bb1 6420 mark_reg_unknown(env, caller->regs, BPF_REG_0);
45159b27 6421 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
51c39bb1
AS
6422
6423 /* continue with next insn after call */
6424 return 0;
6425 }
6426 }
6427
bfc6bb74 6428 if (insn->code == (BPF_JMP | BPF_CALL) &&
a5bebc4f 6429 insn->src_reg == 0 &&
bfc6bb74
AS
6430 insn->imm == BPF_FUNC_timer_set_callback) {
6431 struct bpf_verifier_state *async_cb;
6432
6433 /* there is no real recursion here. timer callbacks are async */
7ddc80a4 6434 env->subprog_info[subprog].is_async_cb = true;
bfc6bb74
AS
6435 async_cb = push_async_cb(env, env->subprog_info[subprog].start,
6436 *insn_idx, subprog);
6437 if (!async_cb)
6438 return -EFAULT;
6439 callee = async_cb->frame[0];
6440 callee->async_entry_cnt = caller->async_entry_cnt + 1;
6441
6442 /* Convert bpf_timer_set_callback() args into timer callback args */
6443 err = set_callee_state_cb(env, caller, callee, *insn_idx);
6444 if (err)
6445 return err;
6446
6447 clear_caller_saved_regs(env, caller->regs);
6448 mark_reg_unknown(env, caller->regs, BPF_REG_0);
6449 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
6450 /* continue with next insn after call */
6451 return 0;
6452 }
6453
f4d7e40a
AS
6454 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
6455 if (!callee)
6456 return -ENOMEM;
6457 state->frame[state->curframe + 1] = callee;
6458
6459 /* callee cannot access r0, r6 - r9 for reading and has to write
6460 * into its own stack before reading from it.
6461 * callee can read/write into caller's stack
6462 */
6463 init_func_state(env, callee,
6464 /* remember the callsite, it will be used by bpf_exit */
6465 *insn_idx /* callsite */,
6466 state->curframe + 1 /* frameno within this callchain */,
f910cefa 6467 subprog /* subprog number within this prog */);
f4d7e40a 6468
fd978bf7 6469 /* Transfer references to the callee */
c69431aa 6470 err = copy_reference_state(callee, caller);
fd978bf7
JS
6471 if (err)
6472 return err;
6473
14351375
YS
6474 err = set_callee_state_cb(env, caller, callee, *insn_idx);
6475 if (err)
6476 return err;
f4d7e40a 6477
51c39bb1 6478 clear_caller_saved_regs(env, caller->regs);
f4d7e40a
AS
6479
6480 /* only increment it after check_reg_arg() finished */
6481 state->curframe++;
6482
6483 /* and go analyze first insn of the callee */
14351375 6484 *insn_idx = env->subprog_info[subprog].start - 1;
f4d7e40a 6485
06ee7115 6486 if (env->log.level & BPF_LOG_LEVEL) {
f4d7e40a 6487 verbose(env, "caller:\n");
0f55f9ed 6488 print_verifier_state(env, caller, true);
f4d7e40a 6489 verbose(env, "callee:\n");
0f55f9ed 6490 print_verifier_state(env, callee, true);
f4d7e40a
AS
6491 }
6492 return 0;
6493}
6494
314ee05e
YS
6495int map_set_for_each_callback_args(struct bpf_verifier_env *env,
6496 struct bpf_func_state *caller,
6497 struct bpf_func_state *callee)
6498{
6499 /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
6500 * void *callback_ctx, u64 flags);
6501 * callback_fn(struct bpf_map *map, void *key, void *value,
6502 * void *callback_ctx);
6503 */
6504 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
6505
6506 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6507 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6508 callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6509
6510 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6511 __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6512 callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6513
6514 /* pointer to stack or null */
6515 callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
6516
6517 /* unused */
6518 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6519 return 0;
6520}
6521
14351375
YS
6522static int set_callee_state(struct bpf_verifier_env *env,
6523 struct bpf_func_state *caller,
6524 struct bpf_func_state *callee, int insn_idx)
6525{
6526 int i;
6527
6528 /* copy r1 - r5 args that callee can access. The copy includes parent
6529 * pointers, which connects us up to the liveness chain
6530 */
6531 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
6532 callee->regs[i] = caller->regs[i];
6533 return 0;
6534}
6535
6536static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6537 int *insn_idx)
6538{
6539 int subprog, target_insn;
6540
6541 target_insn = *insn_idx + insn->imm + 1;
6542 subprog = find_subprog(env, target_insn);
6543 if (subprog < 0) {
6544 verbose(env, "verifier bug. No program starts at insn %d\n",
6545 target_insn);
6546 return -EFAULT;
6547 }
6548
6549 return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
6550}
6551
69c087ba
YS
6552static int set_map_elem_callback_state(struct bpf_verifier_env *env,
6553 struct bpf_func_state *caller,
6554 struct bpf_func_state *callee,
6555 int insn_idx)
6556{
6557 struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
6558 struct bpf_map *map;
6559 int err;
6560
6561 if (bpf_map_ptr_poisoned(insn_aux)) {
6562 verbose(env, "tail_call abusing map_ptr\n");
6563 return -EINVAL;
6564 }
6565
6566 map = BPF_MAP_PTR(insn_aux->map_ptr_state);
6567 if (!map->ops->map_set_for_each_callback_args ||
6568 !map->ops->map_for_each_callback) {
6569 verbose(env, "callback function not allowed for map\n");
6570 return -ENOTSUPP;
6571 }
6572
6573 err = map->ops->map_set_for_each_callback_args(env, caller, callee);
6574 if (err)
6575 return err;
6576
6577 callee->in_callback_fn = true;
6578 return 0;
6579}
6580
e6f2dd0f
JK
6581static int set_loop_callback_state(struct bpf_verifier_env *env,
6582 struct bpf_func_state *caller,
6583 struct bpf_func_state *callee,
6584 int insn_idx)
6585{
6586 /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
6587 * u64 flags);
6588 * callback_fn(u32 index, void *callback_ctx);
6589 */
6590 callee->regs[BPF_REG_1].type = SCALAR_VALUE;
6591 callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
6592
6593 /* unused */
6594 __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
6595 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6596 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6597
6598 callee->in_callback_fn = true;
6599 return 0;
6600}
6601
b00628b1
AS
6602static int set_timer_callback_state(struct bpf_verifier_env *env,
6603 struct bpf_func_state *caller,
6604 struct bpf_func_state *callee,
6605 int insn_idx)
6606{
6607 struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
6608
6609 /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
6610 * callback_fn(struct bpf_map *map, void *key, void *value);
6611 */
6612 callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
6613 __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
6614 callee->regs[BPF_REG_1].map_ptr = map_ptr;
6615
6616 callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6617 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6618 callee->regs[BPF_REG_2].map_ptr = map_ptr;
6619
6620 callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6621 __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6622 callee->regs[BPF_REG_3].map_ptr = map_ptr;
6623
6624 /* unused */
6625 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6626 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
bfc6bb74 6627 callee->in_async_callback_fn = true;
b00628b1
AS
6628 return 0;
6629}
6630
7c7e3d31
SL
6631static int set_find_vma_callback_state(struct bpf_verifier_env *env,
6632 struct bpf_func_state *caller,
6633 struct bpf_func_state *callee,
6634 int insn_idx)
6635{
6636 /* bpf_find_vma(struct task_struct *task, u64 addr,
6637 * void *callback_fn, void *callback_ctx, u64 flags)
6638 * (callback_fn)(struct task_struct *task,
6639 * struct vm_area_struct *vma, void *callback_ctx);
6640 */
6641 callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
6642
6643 callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
6644 __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6645 callee->regs[BPF_REG_2].btf = btf_vmlinux;
d19ddb47 6646 callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA],
7c7e3d31
SL
6647
6648 /* pointer to stack or null */
6649 callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
6650
6651 /* unused */
6652 __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6653 __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6654 callee->in_callback_fn = true;
6655 return 0;
6656}
6657
f4d7e40a
AS
6658static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
6659{
6660 struct bpf_verifier_state *state = env->cur_state;
6661 struct bpf_func_state *caller, *callee;
6662 struct bpf_reg_state *r0;
fd978bf7 6663 int err;
f4d7e40a
AS
6664
6665 callee = state->frame[state->curframe];
6666 r0 = &callee->regs[BPF_REG_0];
6667 if (r0->type == PTR_TO_STACK) {
6668 /* technically it's ok to return caller's stack pointer
6669 * (or caller's caller's pointer) back to the caller,
6670 * since these pointers are valid. Only current stack
6671 * pointer will be invalid as soon as function exits,
6672 * but let's be conservative
6673 */
6674 verbose(env, "cannot return stack pointer to the caller\n");
6675 return -EINVAL;
6676 }
6677
6678 state->curframe--;
6679 caller = state->frame[state->curframe];
69c087ba
YS
6680 if (callee->in_callback_fn) {
6681 /* enforce R0 return value range [0, 1]. */
6682 struct tnum range = tnum_range(0, 1);
6683
6684 if (r0->type != SCALAR_VALUE) {
6685 verbose(env, "R0 not a scalar value\n");
6686 return -EACCES;
6687 }
6688 if (!tnum_in(range, r0->var_off)) {
6689 verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
6690 return -EINVAL;
6691 }
6692 } else {
6693 /* return to the caller whatever r0 had in the callee */
6694 caller->regs[BPF_REG_0] = *r0;
6695 }
f4d7e40a 6696
fd978bf7 6697 /* Transfer references to the caller */
c69431aa 6698 err = copy_reference_state(caller, callee);
fd978bf7
JS
6699 if (err)
6700 return err;
6701
f4d7e40a 6702 *insn_idx = callee->callsite + 1;
06ee7115 6703 if (env->log.level & BPF_LOG_LEVEL) {
f4d7e40a 6704 verbose(env, "returning from callee:\n");
0f55f9ed 6705 print_verifier_state(env, callee, true);
f4d7e40a 6706 verbose(env, "to caller at %d:\n", *insn_idx);
0f55f9ed 6707 print_verifier_state(env, caller, true);
f4d7e40a
AS
6708 }
6709 /* clear everything in the callee */
6710 free_func_state(callee);
6711 state->frame[state->curframe + 1] = NULL;
6712 return 0;
6713}
6714
849fa506
YS
6715static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
6716 int func_id,
6717 struct bpf_call_arg_meta *meta)
6718{
6719 struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
6720
6721 if (ret_type != RET_INTEGER ||
6722 (func_id != BPF_FUNC_get_stack &&
fd0b88f7 6723 func_id != BPF_FUNC_get_task_stack &&
47cc0ed5
DB
6724 func_id != BPF_FUNC_probe_read_str &&
6725 func_id != BPF_FUNC_probe_read_kernel_str &&
6726 func_id != BPF_FUNC_probe_read_user_str))
849fa506
YS
6727 return;
6728
10060503 6729 ret_reg->smax_value = meta->msize_max_value;
fa123ac0 6730 ret_reg->s32_max_value = meta->msize_max_value;
b0270958
AS
6731 ret_reg->smin_value = -MAX_ERRNO;
6732 ret_reg->s32_min_value = -MAX_ERRNO;
849fa506
YS
6733 __reg_deduce_bounds(ret_reg);
6734 __reg_bound_offset(ret_reg);
10060503 6735 __update_reg_bounds(ret_reg);
849fa506
YS
6736}
6737
c93552c4
DB
6738static int
6739record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
6740 int func_id, int insn_idx)
6741{
6742 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
591fe988 6743 struct bpf_map *map = meta->map_ptr;
c93552c4
DB
6744
6745 if (func_id != BPF_FUNC_tail_call &&
09772d92
DB
6746 func_id != BPF_FUNC_map_lookup_elem &&
6747 func_id != BPF_FUNC_map_update_elem &&
f1a2e44a
MV
6748 func_id != BPF_FUNC_map_delete_elem &&
6749 func_id != BPF_FUNC_map_push_elem &&
6750 func_id != BPF_FUNC_map_pop_elem &&
69c087ba 6751 func_id != BPF_FUNC_map_peek_elem &&
e6a4750f
BT
6752 func_id != BPF_FUNC_for_each_map_elem &&
6753 func_id != BPF_FUNC_redirect_map)
c93552c4 6754 return 0;
09772d92 6755
591fe988 6756 if (map == NULL) {
c93552c4
DB
6757 verbose(env, "kernel subsystem misconfigured verifier\n");
6758 return -EINVAL;
6759 }
6760
591fe988
DB
6761 /* In case of read-only, some additional restrictions
6762 * need to be applied in order to prevent altering the
6763 * state of the map from program side.
6764 */
6765 if ((map->map_flags & BPF_F_RDONLY_PROG) &&
6766 (func_id == BPF_FUNC_map_delete_elem ||
6767 func_id == BPF_FUNC_map_update_elem ||
6768 func_id == BPF_FUNC_map_push_elem ||
6769 func_id == BPF_FUNC_map_pop_elem)) {
6770 verbose(env, "write into map forbidden\n");
6771 return -EACCES;
6772 }
6773
d2e4c1e6 6774 if (!BPF_MAP_PTR(aux->map_ptr_state))
c93552c4 6775 bpf_map_ptr_store(aux, meta->map_ptr,
2c78ee89 6776 !meta->map_ptr->bypass_spec_v1);
d2e4c1e6 6777 else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
c93552c4 6778 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
2c78ee89 6779 !meta->map_ptr->bypass_spec_v1);
c93552c4
DB
6780 return 0;
6781}
6782
d2e4c1e6
DB
6783static int
6784record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
6785 int func_id, int insn_idx)
6786{
6787 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
6788 struct bpf_reg_state *regs = cur_regs(env), *reg;
6789 struct bpf_map *map = meta->map_ptr;
6790 struct tnum range;
6791 u64 val;
cc52d914 6792 int err;
d2e4c1e6
DB
6793
6794 if (func_id != BPF_FUNC_tail_call)
6795 return 0;
6796 if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
6797 verbose(env, "kernel subsystem misconfigured verifier\n");
6798 return -EINVAL;
6799 }
6800
6801 range = tnum_range(0, map->max_entries - 1);
6802 reg = &regs[BPF_REG_3];
6803
6804 if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
6805 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
6806 return 0;
6807 }
6808
cc52d914
DB
6809 err = mark_chain_precision(env, BPF_REG_3);
6810 if (err)
6811 return err;
6812
d2e4c1e6
DB
6813 val = reg->var_off.value;
6814 if (bpf_map_key_unseen(aux))
6815 bpf_map_key_store(aux, val);
6816 else if (!bpf_map_key_poisoned(aux) &&
6817 bpf_map_key_immediate(aux) != val)
6818 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
6819 return 0;
6820}
6821
fd978bf7
JS
6822static int check_reference_leak(struct bpf_verifier_env *env)
6823{
6824 struct bpf_func_state *state = cur_func(env);
6825 int i;
6826
6827 for (i = 0; i < state->acquired_refs; i++) {
6828 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
6829 state->refs[i].id, state->refs[i].insn_idx);
6830 }
6831 return state->acquired_refs ? -EINVAL : 0;
6832}
6833
7b15523a
FR
6834static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
6835 struct bpf_reg_state *regs)
6836{
6837 struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
6838 struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
6839 struct bpf_map *fmt_map = fmt_reg->map_ptr;
6840 int err, fmt_map_off, num_args;
6841 u64 fmt_addr;
6842 char *fmt;
6843
6844 /* data must be an array of u64 */
6845 if (data_len_reg->var_off.value % 8)
6846 return -EINVAL;
6847 num_args = data_len_reg->var_off.value / 8;
6848
6849 /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
6850 * and map_direct_value_addr is set.
6851 */
6852 fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
6853 err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
6854 fmt_map_off);
8e8ee109
FR
6855 if (err) {
6856 verbose(env, "verifier bug\n");
6857 return -EFAULT;
6858 }
7b15523a
FR
6859 fmt = (char *)(long)fmt_addr + fmt_map_off;
6860
6861 /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
6862 * can focus on validating the format specifiers.
6863 */
48cac3f4 6864 err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
7b15523a
FR
6865 if (err < 0)
6866 verbose(env, "Invalid format string\n");
6867
6868 return err;
6869}
6870
9b99edca
JO
6871static int check_get_func_ip(struct bpf_verifier_env *env)
6872{
9b99edca
JO
6873 enum bpf_prog_type type = resolve_prog_type(env->prog);
6874 int func_id = BPF_FUNC_get_func_ip;
6875
6876 if (type == BPF_PROG_TYPE_TRACING) {
f92c1e18 6877 if (!bpf_prog_has_trampoline(env->prog)) {
9b99edca
JO
6878 verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
6879 func_id_name(func_id), func_id);
6880 return -ENOTSUPP;
6881 }
6882 return 0;
9ffd9f3f
JO
6883 } else if (type == BPF_PROG_TYPE_KPROBE) {
6884 return 0;
9b99edca
JO
6885 }
6886
6887 verbose(env, "func %s#%d not supported for program type %d\n",
6888 func_id_name(func_id), func_id, type);
6889 return -ENOTSUPP;
6890}
6891
69c087ba
YS
6892static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6893 int *insn_idx_p)
17a52670 6894{
17a52670 6895 const struct bpf_func_proto *fn = NULL;
3c480732 6896 enum bpf_return_type ret_type;
c25b2ae1 6897 enum bpf_type_flag ret_flag;
638f5b90 6898 struct bpf_reg_state *regs;
33ff9823 6899 struct bpf_call_arg_meta meta;
69c087ba 6900 int insn_idx = *insn_idx_p;
969bf05e 6901 bool changes_data;
69c087ba 6902 int i, err, func_id;
17a52670
AS
6903
6904 /* find function prototype */
69c087ba 6905 func_id = insn->imm;
17a52670 6906 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
61bd5218
JK
6907 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
6908 func_id);
17a52670
AS
6909 return -EINVAL;
6910 }
6911
00176a34 6912 if (env->ops->get_func_proto)
5e43f899 6913 fn = env->ops->get_func_proto(func_id, env->prog);
17a52670 6914 if (!fn) {
61bd5218
JK
6915 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
6916 func_id);
17a52670
AS
6917 return -EINVAL;
6918 }
6919
6920 /* eBPF programs must be GPL compatible to use GPL-ed functions */
24701ece 6921 if (!env->prog->gpl_compatible && fn->gpl_only) {
3fe2867c 6922 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
17a52670
AS
6923 return -EINVAL;
6924 }
6925
eae2e83e
JO
6926 if (fn->allowed && !fn->allowed(env->prog)) {
6927 verbose(env, "helper call is not allowed in probe\n");
6928 return -EINVAL;
6929 }
6930
04514d13 6931 /* With LD_ABS/IND some JITs save/restore skb from r1. */
17bedab2 6932 changes_data = bpf_helper_changes_pkt_data(fn->func);
04514d13
DB
6933 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
6934 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
6935 func_id_name(func_id), func_id);
6936 return -EINVAL;
6937 }
969bf05e 6938
33ff9823 6939 memset(&meta, 0, sizeof(meta));
36bbef52 6940 meta.pkt_access = fn->pkt_access;
33ff9823 6941
8f14852e 6942 err = check_func_proto(fn, func_id, &meta);
435faee1 6943 if (err) {
61bd5218 6944 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
ebb676da 6945 func_id_name(func_id), func_id);
435faee1
DB
6946 return err;
6947 }
6948
d83525ca 6949 meta.func_id = func_id;
17a52670 6950 /* check args */
523a4cf4 6951 for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
af7ec138 6952 err = check_func_arg(env, i, &meta, fn);
a7658e1a
AS
6953 if (err)
6954 return err;
6955 }
17a52670 6956
c93552c4
DB
6957 err = record_func_map(env, &meta, func_id, insn_idx);
6958 if (err)
6959 return err;
6960
d2e4c1e6
DB
6961 err = record_func_key(env, &meta, func_id, insn_idx);
6962 if (err)
6963 return err;
6964
435faee1
DB
6965 /* Mark slots with STACK_MISC in case of raw mode, stack offset
6966 * is inferred from register state.
6967 */
6968 for (i = 0; i < meta.access_size; i++) {
ca369602
DB
6969 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
6970 BPF_WRITE, -1, false);
435faee1
DB
6971 if (err)
6972 return err;
6973 }
6974
8f14852e
KKD
6975 regs = cur_regs(env);
6976
6977 if (meta.release_regno) {
6978 err = -EINVAL;
6979 if (meta.ref_obj_id)
6980 err = release_reference(env, meta.ref_obj_id);
6981 /* meta.ref_obj_id can only be 0 if register that is meant to be
6982 * released is NULL, which must be > R0.
6983 */
6984 else if (register_is_null(&regs[meta.release_regno]))
6985 err = 0;
46f8bc92
MKL
6986 if (err) {
6987 verbose(env, "func %s#%d reference has not been acquired before\n",
6988 func_id_name(func_id), func_id);
fd978bf7 6989 return err;
46f8bc92 6990 }
fd978bf7
JS
6991 }
6992
e6f2dd0f
JK
6993 switch (func_id) {
6994 case BPF_FUNC_tail_call:
6995 err = check_reference_leak(env);
6996 if (err) {
6997 verbose(env, "tail_call would lead to reference leak\n");
6998 return err;
6999 }
7000 break;
7001 case BPF_FUNC_get_local_storage:
7002 /* check that flags argument in get_local_storage(map, flags) is 0,
7003 * this is required because get_local_storage() can't return an error.
7004 */
7005 if (!register_is_null(&regs[BPF_REG_2])) {
7006 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
7007 return -EINVAL;
7008 }
7009 break;
7010 case BPF_FUNC_for_each_map_elem:
69c087ba
YS
7011 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
7012 set_map_elem_callback_state);
e6f2dd0f
JK
7013 break;
7014 case BPF_FUNC_timer_set_callback:
b00628b1
AS
7015 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
7016 set_timer_callback_state);
e6f2dd0f
JK
7017 break;
7018 case BPF_FUNC_find_vma:
7c7e3d31
SL
7019 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
7020 set_find_vma_callback_state);
e6f2dd0f
JK
7021 break;
7022 case BPF_FUNC_snprintf:
7b15523a 7023 err = check_bpf_snprintf_call(env, regs);
e6f2dd0f
JK
7024 break;
7025 case BPF_FUNC_loop:
7026 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
7027 set_loop_callback_state);
7028 break;
7b15523a
FR
7029 }
7030
e6f2dd0f
JK
7031 if (err)
7032 return err;
7033
17a52670 7034 /* reset caller saved regs */
dc503a8a 7035 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 7036 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
7037 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
7038 }
17a52670 7039
5327ed3d
JW
7040 /* helper call returns 64-bit value. */
7041 regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
7042
dc503a8a 7043 /* update return register (already marked as written above) */
3c480732 7044 ret_type = fn->ret_type;
c25b2ae1 7045 ret_flag = type_flag(fn->ret_type);
3c480732 7046 if (ret_type == RET_INTEGER) {
f1174f77 7047 /* sets type to SCALAR_VALUE */
61bd5218 7048 mark_reg_unknown(env, regs, BPF_REG_0);
3c480732 7049 } else if (ret_type == RET_VOID) {
17a52670 7050 regs[BPF_REG_0].type = NOT_INIT;
3c480732 7051 } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) {
f1174f77 7052 /* There is no offset yet applied, variable or fixed */
61bd5218 7053 mark_reg_known_zero(env, regs, BPF_REG_0);
17a52670
AS
7054 /* remember map_ptr, so that check_map_access()
7055 * can check 'value_size' boundary of memory access
7056 * to map element returned from bpf_map_lookup_elem()
7057 */
33ff9823 7058 if (meta.map_ptr == NULL) {
61bd5218
JK
7059 verbose(env,
7060 "kernel subsystem misconfigured verifier\n");
17a52670
AS
7061 return -EINVAL;
7062 }
33ff9823 7063 regs[BPF_REG_0].map_ptr = meta.map_ptr;
3e8ce298 7064 regs[BPF_REG_0].map_uid = meta.map_uid;
c25b2ae1
HL
7065 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
7066 if (!type_may_be_null(ret_type) &&
7067 map_value_has_spin_lock(meta.map_ptr)) {
7068 regs[BPF_REG_0].id = ++env->id_gen;
4d31f301 7069 }
3c480732 7070 } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) {
c64b7983 7071 mark_reg_known_zero(env, regs, BPF_REG_0);
c25b2ae1 7072 regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
3c480732 7073 } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) {
85a51f8c 7074 mark_reg_known_zero(env, regs, BPF_REG_0);
c25b2ae1 7075 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
3c480732 7076 } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) {
655a51e5 7077 mark_reg_known_zero(env, regs, BPF_REG_0);
c25b2ae1 7078 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
3c480732 7079 } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) {
457f4436 7080 mark_reg_known_zero(env, regs, BPF_REG_0);
c25b2ae1 7081 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
457f4436 7082 regs[BPF_REG_0].mem_size = meta.mem_size;
3c480732 7083 } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) {
eaa6bcb7
HL
7084 const struct btf_type *t;
7085
7086 mark_reg_known_zero(env, regs, BPF_REG_0);
22dc4a0f 7087 t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
eaa6bcb7
HL
7088 if (!btf_type_is_struct(t)) {
7089 u32 tsize;
7090 const struct btf_type *ret;
7091 const char *tname;
7092
7093 /* resolve the type size of ksym. */
22dc4a0f 7094 ret = btf_resolve_size(meta.ret_btf, t, &tsize);
eaa6bcb7 7095 if (IS_ERR(ret)) {
22dc4a0f 7096 tname = btf_name_by_offset(meta.ret_btf, t->name_off);
eaa6bcb7
HL
7097 verbose(env, "unable to resolve the size of type '%s': %ld\n",
7098 tname, PTR_ERR(ret));
7099 return -EINVAL;
7100 }
c25b2ae1 7101 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
eaa6bcb7
HL
7102 regs[BPF_REG_0].mem_size = tsize;
7103 } else {
34d3a78c
HL
7104 /* MEM_RDONLY may be carried from ret_flag, but it
7105 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
7106 * it will confuse the check of PTR_TO_BTF_ID in
7107 * check_mem_access().
7108 */
7109 ret_flag &= ~MEM_RDONLY;
7110
c25b2ae1 7111 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
22dc4a0f 7112 regs[BPF_REG_0].btf = meta.ret_btf;
eaa6bcb7
HL
7113 regs[BPF_REG_0].btf_id = meta.ret_btf_id;
7114 }
3c480732 7115 } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
c0a5a21c 7116 struct btf *ret_btf;
af7ec138
YS
7117 int ret_btf_id;
7118
7119 mark_reg_known_zero(env, regs, BPF_REG_0);
c25b2ae1 7120 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
c0a5a21c
KKD
7121 if (func_id == BPF_FUNC_kptr_xchg) {
7122 ret_btf = meta.kptr_off_desc->kptr.btf;
7123 ret_btf_id = meta.kptr_off_desc->kptr.btf_id;
7124 } else {
7125 ret_btf = btf_vmlinux;
7126 ret_btf_id = *fn->ret_btf_id;
7127 }
af7ec138 7128 if (ret_btf_id == 0) {
3c480732
HL
7129 verbose(env, "invalid return type %u of func %s#%d\n",
7130 base_type(ret_type), func_id_name(func_id),
7131 func_id);
af7ec138
YS
7132 return -EINVAL;
7133 }
c0a5a21c 7134 regs[BPF_REG_0].btf = ret_btf;
af7ec138 7135 regs[BPF_REG_0].btf_id = ret_btf_id;
17a52670 7136 } else {
3c480732
HL
7137 verbose(env, "unknown return type %u of func %s#%d\n",
7138 base_type(ret_type), func_id_name(func_id), func_id);
17a52670
AS
7139 return -EINVAL;
7140 }
04fd61ab 7141
c25b2ae1 7142 if (type_may_be_null(regs[BPF_REG_0].type))
93c230e3
MKL
7143 regs[BPF_REG_0].id = ++env->id_gen;
7144
0f3adc28 7145 if (is_ptr_cast_function(func_id)) {
1b986589
MKL
7146 /* For release_reference() */
7147 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
64d85290 7148 } else if (is_acquire_function(func_id, meta.map_ptr)) {
0f3adc28
LB
7149 int id = acquire_reference_state(env, insn_idx);
7150
7151 if (id < 0)
7152 return id;
7153 /* For mark_ptr_or_null_reg() */
7154 regs[BPF_REG_0].id = id;
7155 /* For release_reference() */
7156 regs[BPF_REG_0].ref_obj_id = id;
7157 }
1b986589 7158
849fa506
YS
7159 do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
7160
61bd5218 7161 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
35578d79
KX
7162 if (err)
7163 return err;
04fd61ab 7164
fa28dcb8
SL
7165 if ((func_id == BPF_FUNC_get_stack ||
7166 func_id == BPF_FUNC_get_task_stack) &&
7167 !env->prog->has_callchain_buf) {
c195651e
YS
7168 const char *err_str;
7169
7170#ifdef CONFIG_PERF_EVENTS
7171 err = get_callchain_buffers(sysctl_perf_event_max_stack);
7172 err_str = "cannot get callchain buffer for func %s#%d\n";
7173#else
7174 err = -ENOTSUPP;
7175 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
7176#endif
7177 if (err) {
7178 verbose(env, err_str, func_id_name(func_id), func_id);
7179 return err;
7180 }
7181
7182 env->prog->has_callchain_buf = true;
7183 }
7184
5d99cb2c
SL
7185 if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
7186 env->prog->call_get_stack = true;
7187
9b99edca
JO
7188 if (func_id == BPF_FUNC_get_func_ip) {
7189 if (check_get_func_ip(env))
7190 return -ENOTSUPP;
7191 env->prog->call_get_func_ip = true;
7192 }
7193
969bf05e
AS
7194 if (changes_data)
7195 clear_all_pkt_pointers(env);
7196 return 0;
7197}
7198
e6ac2450
MKL
7199/* mark_btf_func_reg_size() is used when the reg size is determined by
7200 * the BTF func_proto's return value size and argument.
7201 */
7202static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
7203 size_t reg_size)
7204{
7205 struct bpf_reg_state *reg = &cur_regs(env)[regno];
7206
7207 if (regno == BPF_REG_0) {
7208 /* Function return value */
7209 reg->live |= REG_LIVE_WRITTEN;
7210 reg->subreg_def = reg_size == sizeof(u64) ?
7211 DEF_NOT_SUBREG : env->insn_idx + 1;
7212 } else {
7213 /* Function argument */
7214 if (reg_size == sizeof(u64)) {
7215 mark_insn_zext(env, reg);
7216 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
7217 } else {
7218 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
7219 }
7220 }
7221}
7222
5c073f26
KKD
7223static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
7224 int *insn_idx_p)
e6ac2450
MKL
7225{
7226 const struct btf_type *t, *func, *func_proto, *ptr_type;
7227 struct bpf_reg_state *regs = cur_regs(env);
7228 const char *func_name, *ptr_type_name;
7229 u32 i, nargs, func_id, ptr_type_id;
5c073f26 7230 int err, insn_idx = *insn_idx_p;
e6ac2450 7231 const struct btf_param *args;
2357672c 7232 struct btf *desc_btf;
5c073f26 7233 bool acq;
e6ac2450 7234
a5d82727
KKD
7235 /* skip for now, but return error when we find this in fixup_kfunc_call */
7236 if (!insn->imm)
7237 return 0;
7238
43bf0878 7239 desc_btf = find_kfunc_desc_btf(env, insn->off);
2357672c
KKD
7240 if (IS_ERR(desc_btf))
7241 return PTR_ERR(desc_btf);
7242
e6ac2450 7243 func_id = insn->imm;
2357672c
KKD
7244 func = btf_type_by_id(desc_btf, func_id);
7245 func_name = btf_name_by_offset(desc_btf, func->name_off);
7246 func_proto = btf_type_by_id(desc_btf, func->type);
e6ac2450 7247
b202d844
KKD
7248 if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
7249 BTF_KFUNC_TYPE_CHECK, func_id)) {
e6ac2450
MKL
7250 verbose(env, "calling kernel function %s is not allowed\n",
7251 func_name);
7252 return -EACCES;
7253 }
7254
5c073f26
KKD
7255 acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
7256 BTF_KFUNC_TYPE_ACQUIRE, func_id);
7257
e6ac2450 7258 /* Check the arguments */
2357672c 7259 err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
5c073f26 7260 if (err < 0)
e6ac2450 7261 return err;
5c073f26
KKD
7262 /* In case of release function, we get register number of refcounted
7263 * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
7264 */
7265 if (err) {
7266 err = release_reference(env, regs[err].ref_obj_id);
7267 if (err) {
7268 verbose(env, "kfunc %s#%d reference has not been acquired before\n",
7269 func_name, func_id);
7270 return err;
7271 }
7272 }
e6ac2450
MKL
7273
7274 for (i = 0; i < CALLER_SAVED_REGS; i++)
7275 mark_reg_not_init(env, regs, caller_saved[i]);
7276
7277 /* Check return type */
2357672c 7278 t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
5c073f26
KKD
7279
7280 if (acq && !btf_type_is_ptr(t)) {
7281 verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
7282 return -EINVAL;
7283 }
7284
e6ac2450
MKL
7285 if (btf_type_is_scalar(t)) {
7286 mark_reg_unknown(env, regs, BPF_REG_0);
7287 mark_btf_func_reg_size(env, BPF_REG_0, t->size);
7288 } else if (btf_type_is_ptr(t)) {
2357672c 7289 ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
e6ac2450
MKL
7290 &ptr_type_id);
7291 if (!btf_type_is_struct(ptr_type)) {
2357672c 7292 ptr_type_name = btf_name_by_offset(desc_btf,
e6ac2450
MKL
7293 ptr_type->name_off);
7294 verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
7295 func_name, btf_type_str(ptr_type),
7296 ptr_type_name);
7297 return -EINVAL;
7298 }
7299 mark_reg_known_zero(env, regs, BPF_REG_0);
2357672c 7300 regs[BPF_REG_0].btf = desc_btf;
e6ac2450
MKL
7301 regs[BPF_REG_0].type = PTR_TO_BTF_ID;
7302 regs[BPF_REG_0].btf_id = ptr_type_id;
5c073f26
KKD
7303 if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
7304 BTF_KFUNC_TYPE_RET_NULL, func_id)) {
7305 regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
7306 /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
7307 regs[BPF_REG_0].id = ++env->id_gen;
7308 }
e6ac2450 7309 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
5c073f26
KKD
7310 if (acq) {
7311 int id = acquire_reference_state(env, insn_idx);
7312
7313 if (id < 0)
7314 return id;
7315 regs[BPF_REG_0].id = id;
7316 regs[BPF_REG_0].ref_obj_id = id;
7317 }
e6ac2450
MKL
7318 } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
7319
7320 nargs = btf_type_vlen(func_proto);
7321 args = (const struct btf_param *)(func_proto + 1);
7322 for (i = 0; i < nargs; i++) {
7323 u32 regno = i + 1;
7324
2357672c 7325 t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
e6ac2450
MKL
7326 if (btf_type_is_ptr(t))
7327 mark_btf_func_reg_size(env, regno, sizeof(void *));
7328 else
7329 /* scalar. ensured by btf_check_kfunc_arg_match() */
7330 mark_btf_func_reg_size(env, regno, t->size);
7331 }
7332
7333 return 0;
7334}
7335
b03c9f9f
EC
7336static bool signed_add_overflows(s64 a, s64 b)
7337{
7338 /* Do the add in u64, where overflow is well-defined */
7339 s64 res = (s64)((u64)a + (u64)b);
7340
7341 if (b < 0)
7342 return res > a;
7343 return res < a;
7344}
7345
bc895e8b 7346static bool signed_add32_overflows(s32 a, s32 b)
3f50f132
JF
7347{
7348 /* Do the add in u32, where overflow is well-defined */
7349 s32 res = (s32)((u32)a + (u32)b);
7350
7351 if (b < 0)
7352 return res > a;
7353 return res < a;
7354}
7355
bc895e8b 7356static bool signed_sub_overflows(s64 a, s64 b)
b03c9f9f
EC
7357{
7358 /* Do the sub in u64, where overflow is well-defined */
7359 s64 res = (s64)((u64)a - (u64)b);
7360
7361 if (b < 0)
7362 return res < a;
7363 return res > a;
969bf05e
AS
7364}
7365
3f50f132
JF
7366static bool signed_sub32_overflows(s32 a, s32 b)
7367{
bc895e8b 7368 /* Do the sub in u32, where overflow is well-defined */
3f50f132
JF
7369 s32 res = (s32)((u32)a - (u32)b);
7370
7371 if (b < 0)
7372 return res < a;
7373 return res > a;
7374}
7375
bb7f0f98
AS
7376static bool check_reg_sane_offset(struct bpf_verifier_env *env,
7377 const struct bpf_reg_state *reg,
7378 enum bpf_reg_type type)
7379{
7380 bool known = tnum_is_const(reg->var_off);
7381 s64 val = reg->var_off.value;
7382 s64 smin = reg->smin_value;
7383
7384 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
7385 verbose(env, "math between %s pointer and %lld is not allowed\n",
c25b2ae1 7386 reg_type_str(env, type), val);
bb7f0f98
AS
7387 return false;
7388 }
7389
7390 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
7391 verbose(env, "%s pointer offset %d is not allowed\n",
c25b2ae1 7392 reg_type_str(env, type), reg->off);
bb7f0f98
AS
7393 return false;
7394 }
7395
7396 if (smin == S64_MIN) {
7397 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
c25b2ae1 7398 reg_type_str(env, type));
bb7f0f98
AS
7399 return false;
7400 }
7401
7402 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
7403 verbose(env, "value %lld makes %s pointer be out of bounds\n",
c25b2ae1 7404 smin, reg_type_str(env, type));
bb7f0f98
AS
7405 return false;
7406 }
7407
7408 return true;
7409}
7410
979d63d5
DB
7411static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
7412{
7413 return &env->insn_aux_data[env->insn_idx];
7414}
7415
a6aaece0
DB
7416enum {
7417 REASON_BOUNDS = -1,
7418 REASON_TYPE = -2,
7419 REASON_PATHS = -3,
7420 REASON_LIMIT = -4,
7421 REASON_STACK = -5,
7422};
7423
979d63d5 7424static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
bb01a1bb 7425 u32 *alu_limit, bool mask_to_left)
979d63d5 7426{
7fedb63a 7427 u32 max = 0, ptr_limit = 0;
979d63d5
DB
7428
7429 switch (ptr_reg->type) {
7430 case PTR_TO_STACK:
1b1597e6 7431 /* Offset 0 is out-of-bounds, but acceptable start for the
7fedb63a
DB
7432 * left direction, see BPF_REG_FP. Also, unknown scalar
7433 * offset where we would need to deal with min/max bounds is
7434 * currently prohibited for unprivileged.
1b1597e6
PK
7435 */
7436 max = MAX_BPF_STACK + mask_to_left;
7fedb63a 7437 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
b658bbb8 7438 break;
979d63d5 7439 case PTR_TO_MAP_VALUE:
1b1597e6 7440 max = ptr_reg->map_ptr->value_size;
7fedb63a
DB
7441 ptr_limit = (mask_to_left ?
7442 ptr_reg->smin_value :
7443 ptr_reg->umax_value) + ptr_reg->off;
b658bbb8 7444 break;
979d63d5 7445 default:
a6aaece0 7446 return REASON_TYPE;
979d63d5 7447 }
b658bbb8
DB
7448
7449 if (ptr_limit >= max)
a6aaece0 7450 return REASON_LIMIT;
b658bbb8
DB
7451 *alu_limit = ptr_limit;
7452 return 0;
979d63d5
DB
7453}
7454
d3bd7413
DB
7455static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
7456 const struct bpf_insn *insn)
7457{
2c78ee89 7458 return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
d3bd7413
DB
7459}
7460
7461static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
7462 u32 alu_state, u32 alu_limit)
7463{
7464 /* If we arrived here from different branches with different
7465 * state or limits to sanitize, then this won't work.
7466 */
7467 if (aux->alu_state &&
7468 (aux->alu_state != alu_state ||
7469 aux->alu_limit != alu_limit))
a6aaece0 7470 return REASON_PATHS;
d3bd7413 7471
e6ac5933 7472 /* Corresponding fixup done in do_misc_fixups(). */
d3bd7413
DB
7473 aux->alu_state = alu_state;
7474 aux->alu_limit = alu_limit;
7475 return 0;
7476}
7477
7478static int sanitize_val_alu(struct bpf_verifier_env *env,
7479 struct bpf_insn *insn)
7480{
7481 struct bpf_insn_aux_data *aux = cur_aux(env);
7482
7483 if (can_skip_alu_sanitation(env, insn))
7484 return 0;
7485
7486 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
7487}
7488
f5288193
DB
7489static bool sanitize_needed(u8 opcode)
7490{
7491 return opcode == BPF_ADD || opcode == BPF_SUB;
7492}
7493
3d0220f6
DB
7494struct bpf_sanitize_info {
7495 struct bpf_insn_aux_data aux;
bb01a1bb 7496 bool mask_to_left;
3d0220f6
DB
7497};
7498
9183671a
DB
7499static struct bpf_verifier_state *
7500sanitize_speculative_path(struct bpf_verifier_env *env,
7501 const struct bpf_insn *insn,
7502 u32 next_idx, u32 curr_idx)
7503{
7504 struct bpf_verifier_state *branch;
7505 struct bpf_reg_state *regs;
7506
7507 branch = push_stack(env, next_idx, curr_idx, true);
7508 if (branch && insn) {
7509 regs = branch->frame[branch->curframe]->regs;
7510 if (BPF_SRC(insn->code) == BPF_K) {
7511 mark_reg_unknown(env, regs, insn->dst_reg);
7512 } else if (BPF_SRC(insn->code) == BPF_X) {
7513 mark_reg_unknown(env, regs, insn->dst_reg);
7514 mark_reg_unknown(env, regs, insn->src_reg);
7515 }
7516 }
7517 return branch;
7518}
7519
979d63d5
DB
7520static int sanitize_ptr_alu(struct bpf_verifier_env *env,
7521 struct bpf_insn *insn,
7522 const struct bpf_reg_state *ptr_reg,
6f55b2f2 7523 const struct bpf_reg_state *off_reg,
979d63d5 7524 struct bpf_reg_state *dst_reg,
3d0220f6 7525 struct bpf_sanitize_info *info,
7fedb63a 7526 const bool commit_window)
979d63d5 7527{
3d0220f6 7528 struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
979d63d5 7529 struct bpf_verifier_state *vstate = env->cur_state;
801c6058 7530 bool off_is_imm = tnum_is_const(off_reg->var_off);
6f55b2f2 7531 bool off_is_neg = off_reg->smin_value < 0;
979d63d5
DB
7532 bool ptr_is_dst_reg = ptr_reg == dst_reg;
7533 u8 opcode = BPF_OP(insn->code);
7534 u32 alu_state, alu_limit;
7535 struct bpf_reg_state tmp;
7536 bool ret;
f232326f 7537 int err;
979d63d5 7538
d3bd7413 7539 if (can_skip_alu_sanitation(env, insn))
979d63d5
DB
7540 return 0;
7541
7542 /* We already marked aux for masking from non-speculative
7543 * paths, thus we got here in the first place. We only care
7544 * to explore bad access from here.
7545 */
7546 if (vstate->speculative)
7547 goto do_sim;
7548
bb01a1bb
DB
7549 if (!commit_window) {
7550 if (!tnum_is_const(off_reg->var_off) &&
7551 (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
7552 return REASON_BOUNDS;
7553
7554 info->mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
7555 (opcode == BPF_SUB && !off_is_neg);
7556 }
7557
7558 err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
f232326f
PK
7559 if (err < 0)
7560 return err;
7561
7fedb63a
DB
7562 if (commit_window) {
7563 /* In commit phase we narrow the masking window based on
7564 * the observed pointer move after the simulated operation.
7565 */
3d0220f6
DB
7566 alu_state = info->aux.alu_state;
7567 alu_limit = abs(info->aux.alu_limit - alu_limit);
7fedb63a
DB
7568 } else {
7569 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
801c6058 7570 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
7fedb63a
DB
7571 alu_state |= ptr_is_dst_reg ?
7572 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
e042aa53
DB
7573
7574 /* Limit pruning on unknown scalars to enable deep search for
7575 * potential masking differences from other program paths.
7576 */
7577 if (!off_is_imm)
7578 env->explore_alu_limits = true;
7fedb63a
DB
7579 }
7580
f232326f
PK
7581 err = update_alu_sanitation_state(aux, alu_state, alu_limit);
7582 if (err < 0)
7583 return err;
979d63d5 7584do_sim:
7fedb63a
DB
7585 /* If we're in commit phase, we're done here given we already
7586 * pushed the truncated dst_reg into the speculative verification
7587 * stack.
a7036191
DB
7588 *
7589 * Also, when register is a known constant, we rewrite register-based
7590 * operation to immediate-based, and thus do not need masking (and as
7591 * a consequence, do not need to simulate the zero-truncation either).
7fedb63a 7592 */
a7036191 7593 if (commit_window || off_is_imm)
7fedb63a
DB
7594 return 0;
7595
979d63d5
DB
7596 /* Simulate and find potential out-of-bounds access under
7597 * speculative execution from truncation as a result of
7598 * masking when off was not within expected range. If off
7599 * sits in dst, then we temporarily need to move ptr there
7600 * to simulate dst (== 0) +/-= ptr. Needed, for example,
7601 * for cases where we use K-based arithmetic in one direction
7602 * and truncated reg-based in the other in order to explore
7603 * bad access.
7604 */
7605 if (!ptr_is_dst_reg) {
7606 tmp = *dst_reg;
7607 *dst_reg = *ptr_reg;
7608 }
9183671a
DB
7609 ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
7610 env->insn_idx);
0803278b 7611 if (!ptr_is_dst_reg && ret)
979d63d5 7612 *dst_reg = tmp;
a6aaece0
DB
7613 return !ret ? REASON_STACK : 0;
7614}
7615
fe9a5ca7
DB
7616static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
7617{
7618 struct bpf_verifier_state *vstate = env->cur_state;
7619
7620 /* If we simulate paths under speculation, we don't update the
7621 * insn as 'seen' such that when we verify unreachable paths in
7622 * the non-speculative domain, sanitize_dead_code() can still
7623 * rewrite/sanitize them.
7624 */
7625 if (!vstate->speculative)
7626 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
7627}
7628
a6aaece0
DB
7629static int sanitize_err(struct bpf_verifier_env *env,
7630 const struct bpf_insn *insn, int reason,
7631 const struct bpf_reg_state *off_reg,
7632 const struct bpf_reg_state *dst_reg)
7633{
7634 static const char *err = "pointer arithmetic with it prohibited for !root";
7635 const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
7636 u32 dst = insn->dst_reg, src = insn->src_reg;
7637
7638 switch (reason) {
7639 case REASON_BOUNDS:
7640 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
7641 off_reg == dst_reg ? dst : src, err);
7642 break;
7643 case REASON_TYPE:
7644 verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
7645 off_reg == dst_reg ? src : dst, err);
7646 break;
7647 case REASON_PATHS:
7648 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
7649 dst, op, err);
7650 break;
7651 case REASON_LIMIT:
7652 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
7653 dst, op, err);
7654 break;
7655 case REASON_STACK:
7656 verbose(env, "R%d could not be pushed for speculative verification, %s\n",
7657 dst, err);
7658 break;
7659 default:
7660 verbose(env, "verifier internal error: unknown reason (%d)\n",
7661 reason);
7662 break;
7663 }
7664
7665 return -EACCES;
979d63d5
DB
7666}
7667
01f810ac
AM
7668/* check that stack access falls within stack limits and that 'reg' doesn't
7669 * have a variable offset.
7670 *
7671 * Variable offset is prohibited for unprivileged mode for simplicity since it
7672 * requires corresponding support in Spectre masking for stack ALU. See also
7673 * retrieve_ptr_limit().
7674 *
7675 *
7676 * 'off' includes 'reg->off'.
7677 */
7678static int check_stack_access_for_ptr_arithmetic(
7679 struct bpf_verifier_env *env,
7680 int regno,
7681 const struct bpf_reg_state *reg,
7682 int off)
7683{
7684 if (!tnum_is_const(reg->var_off)) {
7685 char tn_buf[48];
7686
7687 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7688 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
7689 regno, tn_buf, off);
7690 return -EACCES;
7691 }
7692
7693 if (off >= 0 || off < -MAX_BPF_STACK) {
7694 verbose(env, "R%d stack pointer arithmetic goes out of range, "
7695 "prohibited for !root; off=%d\n", regno, off);
7696 return -EACCES;
7697 }
7698
7699 return 0;
7700}
7701
073815b7
DB
7702static int sanitize_check_bounds(struct bpf_verifier_env *env,
7703 const struct bpf_insn *insn,
7704 const struct bpf_reg_state *dst_reg)
7705{
7706 u32 dst = insn->dst_reg;
7707
7708 /* For unprivileged we require that resulting offset must be in bounds
7709 * in order to be able to sanitize access later on.
7710 */
7711 if (env->bypass_spec_v1)
7712 return 0;
7713
7714 switch (dst_reg->type) {
7715 case PTR_TO_STACK:
7716 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
7717 dst_reg->off + dst_reg->var_off.value))
7718 return -EACCES;
7719 break;
7720 case PTR_TO_MAP_VALUE:
61df10c7 7721 if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
073815b7
DB
7722 verbose(env, "R%d pointer arithmetic of map value goes out of range, "
7723 "prohibited for !root\n", dst);
7724 return -EACCES;
7725 }
7726 break;
7727 default:
7728 break;
7729 }
7730
7731 return 0;
7732}
01f810ac 7733
f1174f77 7734/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
f1174f77
EC
7735 * Caller should also handle BPF_MOV case separately.
7736 * If we return -EACCES, caller may want to try again treating pointer as a
7737 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
7738 */
7739static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
7740 struct bpf_insn *insn,
7741 const struct bpf_reg_state *ptr_reg,
7742 const struct bpf_reg_state *off_reg)
969bf05e 7743{
f4d7e40a
AS
7744 struct bpf_verifier_state *vstate = env->cur_state;
7745 struct bpf_func_state *state = vstate->frame[vstate->curframe];
7746 struct bpf_reg_state *regs = state->regs, *dst_reg;
f1174f77 7747 bool known = tnum_is_const(off_reg->var_off);
b03c9f9f
EC
7748 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
7749 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
7750 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
7751 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
3d0220f6 7752 struct bpf_sanitize_info info = {};
969bf05e 7753 u8 opcode = BPF_OP(insn->code);
24c109bb 7754 u32 dst = insn->dst_reg;
979d63d5 7755 int ret;
969bf05e 7756
f1174f77 7757 dst_reg = &regs[dst];
969bf05e 7758
6f16101e
DB
7759 if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
7760 smin_val > smax_val || umin_val > umax_val) {
7761 /* Taint dst register if offset had invalid bounds derived from
7762 * e.g. dead branches.
7763 */
f54c7898 7764 __mark_reg_unknown(env, dst_reg);
6f16101e 7765 return 0;
f1174f77
EC
7766 }
7767
7768 if (BPF_CLASS(insn->code) != BPF_ALU64) {
7769 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
6c693541
YS
7770 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
7771 __mark_reg_unknown(env, dst_reg);
7772 return 0;
7773 }
7774
82abbf8d
AS
7775 verbose(env,
7776 "R%d 32-bit pointer arithmetic prohibited\n",
7777 dst);
f1174f77 7778 return -EACCES;
969bf05e
AS
7779 }
7780
c25b2ae1 7781 if (ptr_reg->type & PTR_MAYBE_NULL) {
aad2eeaf 7782 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
c25b2ae1 7783 dst, reg_type_str(env, ptr_reg->type));
f1174f77 7784 return -EACCES;
c25b2ae1
HL
7785 }
7786
7787 switch (base_type(ptr_reg->type)) {
aad2eeaf 7788 case CONST_PTR_TO_MAP:
7c696732
YS
7789 /* smin_val represents the known value */
7790 if (known && smin_val == 0 && opcode == BPF_ADD)
7791 break;
8731745e 7792 fallthrough;
aad2eeaf 7793 case PTR_TO_PACKET_END:
c64b7983 7794 case PTR_TO_SOCKET:
46f8bc92 7795 case PTR_TO_SOCK_COMMON:
655a51e5 7796 case PTR_TO_TCP_SOCK:
fada7fdc 7797 case PTR_TO_XDP_SOCK:
aad2eeaf 7798 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
c25b2ae1 7799 dst, reg_type_str(env, ptr_reg->type));
f1174f77 7800 return -EACCES;
aad2eeaf
JS
7801 default:
7802 break;
f1174f77
EC
7803 }
7804
7805 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
7806 * The id may be overwritten later if we create a new variable offset.
969bf05e 7807 */
f1174f77
EC
7808 dst_reg->type = ptr_reg->type;
7809 dst_reg->id = ptr_reg->id;
969bf05e 7810
bb7f0f98
AS
7811 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
7812 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
7813 return -EINVAL;
7814
3f50f132
JF
7815 /* pointer types do not carry 32-bit bounds at the moment. */
7816 __mark_reg32_unbounded(dst_reg);
7817
7fedb63a
DB
7818 if (sanitize_needed(opcode)) {
7819 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
3d0220f6 7820 &info, false);
a6aaece0
DB
7821 if (ret < 0)
7822 return sanitize_err(env, insn, ret, off_reg, dst_reg);
7fedb63a 7823 }
a6aaece0 7824
f1174f77
EC
7825 switch (opcode) {
7826 case BPF_ADD:
7827 /* We can take a fixed offset as long as it doesn't overflow
7828 * the s32 'off' field
969bf05e 7829 */
b03c9f9f
EC
7830 if (known && (ptr_reg->off + smin_val ==
7831 (s64)(s32)(ptr_reg->off + smin_val))) {
f1174f77 7832 /* pointer += K. Accumulate it into fixed offset */
b03c9f9f
EC
7833 dst_reg->smin_value = smin_ptr;
7834 dst_reg->smax_value = smax_ptr;
7835 dst_reg->umin_value = umin_ptr;
7836 dst_reg->umax_value = umax_ptr;
f1174f77 7837 dst_reg->var_off = ptr_reg->var_off;
b03c9f9f 7838 dst_reg->off = ptr_reg->off + smin_val;
0962590e 7839 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
7840 break;
7841 }
f1174f77
EC
7842 /* A new variable offset is created. Note that off_reg->off
7843 * == 0, since it's a scalar.
7844 * dst_reg gets the pointer type and since some positive
7845 * integer value was added to the pointer, give it a new 'id'
7846 * if it's a PTR_TO_PACKET.
7847 * this creates a new 'base' pointer, off_reg (variable) gets
7848 * added into the variable offset, and we copy the fixed offset
7849 * from ptr_reg.
969bf05e 7850 */
b03c9f9f
EC
7851 if (signed_add_overflows(smin_ptr, smin_val) ||
7852 signed_add_overflows(smax_ptr, smax_val)) {
7853 dst_reg->smin_value = S64_MIN;
7854 dst_reg->smax_value = S64_MAX;
7855 } else {
7856 dst_reg->smin_value = smin_ptr + smin_val;
7857 dst_reg->smax_value = smax_ptr + smax_val;
7858 }
7859 if (umin_ptr + umin_val < umin_ptr ||
7860 umax_ptr + umax_val < umax_ptr) {
7861 dst_reg->umin_value = 0;
7862 dst_reg->umax_value = U64_MAX;
7863 } else {
7864 dst_reg->umin_value = umin_ptr + umin_val;
7865 dst_reg->umax_value = umax_ptr + umax_val;
7866 }
f1174f77
EC
7867 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
7868 dst_reg->off = ptr_reg->off;
0962590e 7869 dst_reg->raw = ptr_reg->raw;
de8f3a83 7870 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
7871 dst_reg->id = ++env->id_gen;
7872 /* something was added to pkt_ptr, set range to zero */
22dc4a0f 7873 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
f1174f77
EC
7874 }
7875 break;
7876 case BPF_SUB:
7877 if (dst_reg == off_reg) {
7878 /* scalar -= pointer. Creates an unknown scalar */
82abbf8d
AS
7879 verbose(env, "R%d tried to subtract pointer from scalar\n",
7880 dst);
f1174f77
EC
7881 return -EACCES;
7882 }
7883 /* We don't allow subtraction from FP, because (according to
7884 * test_verifier.c test "invalid fp arithmetic", JITs might not
7885 * be able to deal with it.
969bf05e 7886 */
f1174f77 7887 if (ptr_reg->type == PTR_TO_STACK) {
82abbf8d
AS
7888 verbose(env, "R%d subtraction from stack pointer prohibited\n",
7889 dst);
f1174f77
EC
7890 return -EACCES;
7891 }
b03c9f9f
EC
7892 if (known && (ptr_reg->off - smin_val ==
7893 (s64)(s32)(ptr_reg->off - smin_val))) {
f1174f77 7894 /* pointer -= K. Subtract it from fixed offset */
b03c9f9f
EC
7895 dst_reg->smin_value = smin_ptr;
7896 dst_reg->smax_value = smax_ptr;
7897 dst_reg->umin_value = umin_ptr;
7898 dst_reg->umax_value = umax_ptr;
f1174f77
EC
7899 dst_reg->var_off = ptr_reg->var_off;
7900 dst_reg->id = ptr_reg->id;
b03c9f9f 7901 dst_reg->off = ptr_reg->off - smin_val;
0962590e 7902 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
7903 break;
7904 }
f1174f77
EC
7905 /* A new variable offset is created. If the subtrahend is known
7906 * nonnegative, then any reg->range we had before is still good.
969bf05e 7907 */
b03c9f9f
EC
7908 if (signed_sub_overflows(smin_ptr, smax_val) ||
7909 signed_sub_overflows(smax_ptr, smin_val)) {
7910 /* Overflow possible, we know nothing */
7911 dst_reg->smin_value = S64_MIN;
7912 dst_reg->smax_value = S64_MAX;
7913 } else {
7914 dst_reg->smin_value = smin_ptr - smax_val;
7915 dst_reg->smax_value = smax_ptr - smin_val;
7916 }
7917 if (umin_ptr < umax_val) {
7918 /* Overflow possible, we know nothing */
7919 dst_reg->umin_value = 0;
7920 dst_reg->umax_value = U64_MAX;
7921 } else {
7922 /* Cannot overflow (as long as bounds are consistent) */
7923 dst_reg->umin_value = umin_ptr - umax_val;
7924 dst_reg->umax_value = umax_ptr - umin_val;
7925 }
f1174f77
EC
7926 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
7927 dst_reg->off = ptr_reg->off;
0962590e 7928 dst_reg->raw = ptr_reg->raw;
de8f3a83 7929 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
7930 dst_reg->id = ++env->id_gen;
7931 /* something was added to pkt_ptr, set range to zero */
b03c9f9f 7932 if (smin_val < 0)
22dc4a0f 7933 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
43188702 7934 }
f1174f77
EC
7935 break;
7936 case BPF_AND:
7937 case BPF_OR:
7938 case BPF_XOR:
82abbf8d
AS
7939 /* bitwise ops on pointers are troublesome, prohibit. */
7940 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
7941 dst, bpf_alu_string[opcode >> 4]);
f1174f77
EC
7942 return -EACCES;
7943 default:
7944 /* other operators (e.g. MUL,LSH) produce non-pointer results */
82abbf8d
AS
7945 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
7946 dst, bpf_alu_string[opcode >> 4]);
f1174f77 7947 return -EACCES;
43188702
JF
7948 }
7949
bb7f0f98
AS
7950 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
7951 return -EINVAL;
7952
b03c9f9f
EC
7953 __update_reg_bounds(dst_reg);
7954 __reg_deduce_bounds(dst_reg);
7955 __reg_bound_offset(dst_reg);
0d6303db 7956
073815b7
DB
7957 if (sanitize_check_bounds(env, insn, dst_reg) < 0)
7958 return -EACCES;
7fedb63a
DB
7959 if (sanitize_needed(opcode)) {
7960 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
3d0220f6 7961 &info, true);
7fedb63a
DB
7962 if (ret < 0)
7963 return sanitize_err(env, insn, ret, off_reg, dst_reg);
0d6303db
DB
7964 }
7965
43188702
JF
7966 return 0;
7967}
7968
3f50f132
JF
7969static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
7970 struct bpf_reg_state *src_reg)
7971{
7972 s32 smin_val = src_reg->s32_min_value;
7973 s32 smax_val = src_reg->s32_max_value;
7974 u32 umin_val = src_reg->u32_min_value;
7975 u32 umax_val = src_reg->u32_max_value;
7976
7977 if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
7978 signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
7979 dst_reg->s32_min_value = S32_MIN;
7980 dst_reg->s32_max_value = S32_MAX;
7981 } else {
7982 dst_reg->s32_min_value += smin_val;
7983 dst_reg->s32_max_value += smax_val;
7984 }
7985 if (dst_reg->u32_min_value + umin_val < umin_val ||
7986 dst_reg->u32_max_value + umax_val < umax_val) {
7987 dst_reg->u32_min_value = 0;
7988 dst_reg->u32_max_value = U32_MAX;
7989 } else {
7990 dst_reg->u32_min_value += umin_val;
7991 dst_reg->u32_max_value += umax_val;
7992 }
7993}
7994
07cd2631
JF
7995static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
7996 struct bpf_reg_state *src_reg)
7997{
7998 s64 smin_val = src_reg->smin_value;
7999 s64 smax_val = src_reg->smax_value;
8000 u64 umin_val = src_reg->umin_value;
8001 u64 umax_val = src_reg->umax_value;
8002
8003 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
8004 signed_add_overflows(dst_reg->smax_value, smax_val)) {
8005 dst_reg->smin_value = S64_MIN;
8006 dst_reg->smax_value = S64_MAX;
8007 } else {
8008 dst_reg->smin_value += smin_val;
8009 dst_reg->smax_value += smax_val;
8010 }
8011 if (dst_reg->umin_value + umin_val < umin_val ||
8012 dst_reg->umax_value + umax_val < umax_val) {
8013 dst_reg->umin_value = 0;
8014 dst_reg->umax_value = U64_MAX;
8015 } else {
8016 dst_reg->umin_value += umin_val;
8017 dst_reg->umax_value += umax_val;
8018 }
3f50f132
JF
8019}
8020
8021static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
8022 struct bpf_reg_state *src_reg)
8023{
8024 s32 smin_val = src_reg->s32_min_value;
8025 s32 smax_val = src_reg->s32_max_value;
8026 u32 umin_val = src_reg->u32_min_value;
8027 u32 umax_val = src_reg->u32_max_value;
8028
8029 if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
8030 signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
8031 /* Overflow possible, we know nothing */
8032 dst_reg->s32_min_value = S32_MIN;
8033 dst_reg->s32_max_value = S32_MAX;
8034 } else {
8035 dst_reg->s32_min_value -= smax_val;
8036 dst_reg->s32_max_value -= smin_val;
8037 }
8038 if (dst_reg->u32_min_value < umax_val) {
8039 /* Overflow possible, we know nothing */
8040 dst_reg->u32_min_value = 0;
8041 dst_reg->u32_max_value = U32_MAX;
8042 } else {
8043 /* Cannot overflow (as long as bounds are consistent) */
8044 dst_reg->u32_min_value -= umax_val;
8045 dst_reg->u32_max_value -= umin_val;
8046 }
07cd2631
JF
8047}
8048
8049static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
8050 struct bpf_reg_state *src_reg)
8051{
8052 s64 smin_val = src_reg->smin_value;
8053 s64 smax_val = src_reg->smax_value;
8054 u64 umin_val = src_reg->umin_value;
8055 u64 umax_val = src_reg->umax_value;
8056
8057 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
8058 signed_sub_overflows(dst_reg->smax_value, smin_val)) {
8059 /* Overflow possible, we know nothing */
8060 dst_reg->smin_value = S64_MIN;
8061 dst_reg->smax_value = S64_MAX;
8062 } else {
8063 dst_reg->smin_value -= smax_val;
8064 dst_reg->smax_value -= smin_val;
8065 }
8066 if (dst_reg->umin_value < umax_val) {
8067 /* Overflow possible, we know nothing */
8068 dst_reg->umin_value = 0;
8069 dst_reg->umax_value = U64_MAX;
8070 } else {
8071 /* Cannot overflow (as long as bounds are consistent) */
8072 dst_reg->umin_value -= umax_val;
8073 dst_reg->umax_value -= umin_val;
8074 }
3f50f132
JF
8075}
8076
8077static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
8078 struct bpf_reg_state *src_reg)
8079{
8080 s32 smin_val = src_reg->s32_min_value;
8081 u32 umin_val = src_reg->u32_min_value;
8082 u32 umax_val = src_reg->u32_max_value;
8083
8084 if (smin_val < 0 || dst_reg->s32_min_value < 0) {
8085 /* Ain't nobody got time to multiply that sign */
8086 __mark_reg32_unbounded(dst_reg);
8087 return;
8088 }
8089 /* Both values are positive, so we can work with unsigned and
8090 * copy the result to signed (unless it exceeds S32_MAX).
8091 */
8092 if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
8093 /* Potential overflow, we know nothing */
8094 __mark_reg32_unbounded(dst_reg);
8095 return;
8096 }
8097 dst_reg->u32_min_value *= umin_val;
8098 dst_reg->u32_max_value *= umax_val;
8099 if (dst_reg->u32_max_value > S32_MAX) {
8100 /* Overflow possible, we know nothing */
8101 dst_reg->s32_min_value = S32_MIN;
8102 dst_reg->s32_max_value = S32_MAX;
8103 } else {
8104 dst_reg->s32_min_value = dst_reg->u32_min_value;
8105 dst_reg->s32_max_value = dst_reg->u32_max_value;
8106 }
07cd2631
JF
8107}
8108
8109static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
8110 struct bpf_reg_state *src_reg)
8111{
8112 s64 smin_val = src_reg->smin_value;
8113 u64 umin_val = src_reg->umin_value;
8114 u64 umax_val = src_reg->umax_value;
8115
07cd2631
JF
8116 if (smin_val < 0 || dst_reg->smin_value < 0) {
8117 /* Ain't nobody got time to multiply that sign */
3f50f132 8118 __mark_reg64_unbounded(dst_reg);
07cd2631
JF
8119 return;
8120 }
8121 /* Both values are positive, so we can work with unsigned and
8122 * copy the result to signed (unless it exceeds S64_MAX).
8123 */
8124 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
8125 /* Potential overflow, we know nothing */
3f50f132 8126 __mark_reg64_unbounded(dst_reg);
07cd2631
JF
8127 return;
8128 }
8129 dst_reg->umin_value *= umin_val;
8130 dst_reg->umax_value *= umax_val;
8131 if (dst_reg->umax_value > S64_MAX) {
8132 /* Overflow possible, we know nothing */
8133 dst_reg->smin_value = S64_MIN;
8134 dst_reg->smax_value = S64_MAX;
8135 } else {
8136 dst_reg->smin_value = dst_reg->umin_value;
8137 dst_reg->smax_value = dst_reg->umax_value;
8138 }
8139}
8140
3f50f132
JF
8141static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
8142 struct bpf_reg_state *src_reg)
8143{
8144 bool src_known = tnum_subreg_is_const(src_reg->var_off);
8145 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
8146 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
8147 s32 smin_val = src_reg->s32_min_value;
8148 u32 umax_val = src_reg->u32_max_value;
8149
049c4e13
DB
8150 if (src_known && dst_known) {
8151 __mark_reg32_known(dst_reg, var32_off.value);
3f50f132 8152 return;
049c4e13 8153 }
3f50f132
JF
8154
8155 /* We get our minimum from the var_off, since that's inherently
8156 * bitwise. Our maximum is the minimum of the operands' maxima.
8157 */
8158 dst_reg->u32_min_value = var32_off.value;
8159 dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
8160 if (dst_reg->s32_min_value < 0 || smin_val < 0) {
8161 /* Lose signed bounds when ANDing negative numbers,
8162 * ain't nobody got time for that.
8163 */
8164 dst_reg->s32_min_value = S32_MIN;
8165 dst_reg->s32_max_value = S32_MAX;
8166 } else {
8167 /* ANDing two positives gives a positive, so safe to
8168 * cast result into s64.
8169 */
8170 dst_reg->s32_min_value = dst_reg->u32_min_value;
8171 dst_reg->s32_max_value = dst_reg->u32_max_value;
8172 }
3f50f132
JF
8173}
8174
07cd2631
JF
8175static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
8176 struct bpf_reg_state *src_reg)
8177{
3f50f132
JF
8178 bool src_known = tnum_is_const(src_reg->var_off);
8179 bool dst_known = tnum_is_const(dst_reg->var_off);
07cd2631
JF
8180 s64 smin_val = src_reg->smin_value;
8181 u64 umax_val = src_reg->umax_value;
8182
3f50f132 8183 if (src_known && dst_known) {
4fbb38a3 8184 __mark_reg_known(dst_reg, dst_reg->var_off.value);
3f50f132
JF
8185 return;
8186 }
8187
07cd2631
JF
8188 /* We get our minimum from the var_off, since that's inherently
8189 * bitwise. Our maximum is the minimum of the operands' maxima.
8190 */
07cd2631
JF
8191 dst_reg->umin_value = dst_reg->var_off.value;
8192 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
8193 if (dst_reg->smin_value < 0 || smin_val < 0) {
8194 /* Lose signed bounds when ANDing negative numbers,
8195 * ain't nobody got time for that.
8196 */
8197 dst_reg->smin_value = S64_MIN;
8198 dst_reg->smax_value = S64_MAX;
8199 } else {
8200 /* ANDing two positives gives a positive, so safe to
8201 * cast result into s64.
8202 */
8203 dst_reg->smin_value = dst_reg->umin_value;
8204 dst_reg->smax_value = dst_reg->umax_value;
8205 }
8206 /* We may learn something more from the var_off */
8207 __update_reg_bounds(dst_reg);
8208}
8209
3f50f132
JF
8210static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
8211 struct bpf_reg_state *src_reg)
8212{
8213 bool src_known = tnum_subreg_is_const(src_reg->var_off);
8214 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
8215 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
5b9fbeb7
DB
8216 s32 smin_val = src_reg->s32_min_value;
8217 u32 umin_val = src_reg->u32_min_value;
3f50f132 8218
049c4e13
DB
8219 if (src_known && dst_known) {
8220 __mark_reg32_known(dst_reg, var32_off.value);
3f50f132 8221 return;
049c4e13 8222 }
3f50f132
JF
8223
8224 /* We get our maximum from the var_off, and our minimum is the
8225 * maximum of the operands' minima
8226 */
8227 dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
8228 dst_reg->u32_max_value = var32_off.value | var32_off.mask;
8229 if (dst_reg->s32_min_value < 0 || smin_val < 0) {
8230 /* Lose signed bounds when ORing negative numbers,
8231 * ain't nobody got time for that.
8232 */
8233 dst_reg->s32_min_value = S32_MIN;
8234 dst_reg->s32_max_value = S32_MAX;
8235 } else {
8236 /* ORing two positives gives a positive, so safe to
8237 * cast result into s64.
8238 */
5b9fbeb7
DB
8239 dst_reg->s32_min_value = dst_reg->u32_min_value;
8240 dst_reg->s32_max_value = dst_reg->u32_max_value;
3f50f132
JF
8241 }
8242}
8243
07cd2631
JF
8244static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
8245 struct bpf_reg_state *src_reg)
8246{
3f50f132
JF
8247 bool src_known = tnum_is_const(src_reg->var_off);
8248 bool dst_known = tnum_is_const(dst_reg->var_off);
07cd2631
JF
8249 s64 smin_val = src_reg->smin_value;
8250 u64 umin_val = src_reg->umin_value;
8251
3f50f132 8252 if (src_known && dst_known) {
4fbb38a3 8253 __mark_reg_known(dst_reg, dst_reg->var_off.value);
3f50f132
JF
8254 return;
8255 }
8256
07cd2631
JF
8257 /* We get our maximum from the var_off, and our minimum is the
8258 * maximum of the operands' minima
8259 */
07cd2631
JF
8260 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
8261 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
8262 if (dst_reg->smin_value < 0 || smin_val < 0) {
8263 /* Lose signed bounds when ORing negative numbers,
8264 * ain't nobody got time for that.
8265 */
8266 dst_reg->smin_value = S64_MIN;
8267 dst_reg->smax_value = S64_MAX;
8268 } else {
8269 /* ORing two positives gives a positive, so safe to
8270 * cast result into s64.
8271 */
8272 dst_reg->smin_value = dst_reg->umin_value;
8273 dst_reg->smax_value = dst_reg->umax_value;
8274 }
8275 /* We may learn something more from the var_off */
8276 __update_reg_bounds(dst_reg);
8277}
8278
2921c90d
YS
8279static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
8280 struct bpf_reg_state *src_reg)
8281{
8282 bool src_known = tnum_subreg_is_const(src_reg->var_off);
8283 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
8284 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
8285 s32 smin_val = src_reg->s32_min_value;
8286
049c4e13
DB
8287 if (src_known && dst_known) {
8288 __mark_reg32_known(dst_reg, var32_off.value);
2921c90d 8289 return;
049c4e13 8290 }
2921c90d
YS
8291
8292 /* We get both minimum and maximum from the var32_off. */
8293 dst_reg->u32_min_value = var32_off.value;
8294 dst_reg->u32_max_value = var32_off.value | var32_off.mask;
8295
8296 if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
8297 /* XORing two positive sign numbers gives a positive,
8298 * so safe to cast u32 result into s32.
8299 */
8300 dst_reg->s32_min_value = dst_reg->u32_min_value;
8301 dst_reg->s32_max_value = dst_reg->u32_max_value;
8302 } else {
8303 dst_reg->s32_min_value = S32_MIN;
8304 dst_reg->s32_max_value = S32_MAX;
8305 }
8306}
8307
8308static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
8309 struct bpf_reg_state *src_reg)
8310{
8311 bool src_known = tnum_is_const(src_reg->var_off);
8312 bool dst_known = tnum_is_const(dst_reg->var_off);
8313 s64 smin_val = src_reg->smin_value;
8314
8315 if (src_known && dst_known) {
8316 /* dst_reg->var_off.value has been updated earlier */
8317 __mark_reg_known(dst_reg, dst_reg->var_off.value);
8318 return;
8319 }
8320
8321 /* We get both minimum and maximum from the var_off. */
8322 dst_reg->umin_value = dst_reg->var_off.value;
8323 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
8324
8325 if (dst_reg->smin_value >= 0 && smin_val >= 0) {
8326 /* XORing two positive sign numbers gives a positive,
8327 * so safe to cast u64 result into s64.
8328 */
8329 dst_reg->smin_value = dst_reg->umin_value;
8330 dst_reg->smax_value = dst_reg->umax_value;
8331 } else {
8332 dst_reg->smin_value = S64_MIN;
8333 dst_reg->smax_value = S64_MAX;
8334 }
8335
8336 __update_reg_bounds(dst_reg);
8337}
8338
3f50f132
JF
8339static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
8340 u64 umin_val, u64 umax_val)
07cd2631 8341{
07cd2631
JF
8342 /* We lose all sign bit information (except what we can pick
8343 * up from var_off)
8344 */
3f50f132
JF
8345 dst_reg->s32_min_value = S32_MIN;
8346 dst_reg->s32_max_value = S32_MAX;
8347 /* If we might shift our top bit out, then we know nothing */
8348 if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
8349 dst_reg->u32_min_value = 0;
8350 dst_reg->u32_max_value = U32_MAX;
8351 } else {
8352 dst_reg->u32_min_value <<= umin_val;
8353 dst_reg->u32_max_value <<= umax_val;
8354 }
8355}
8356
8357static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
8358 struct bpf_reg_state *src_reg)
8359{
8360 u32 umax_val = src_reg->u32_max_value;
8361 u32 umin_val = src_reg->u32_min_value;
8362 /* u32 alu operation will zext upper bits */
8363 struct tnum subreg = tnum_subreg(dst_reg->var_off);
8364
8365 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
8366 dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
8367 /* Not required but being careful mark reg64 bounds as unknown so
8368 * that we are forced to pick them up from tnum and zext later and
8369 * if some path skips this step we are still safe.
8370 */
8371 __mark_reg64_unbounded(dst_reg);
8372 __update_reg32_bounds(dst_reg);
8373}
8374
8375static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
8376 u64 umin_val, u64 umax_val)
8377{
8378 /* Special case <<32 because it is a common compiler pattern to sign
8379 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
8380 * positive we know this shift will also be positive so we can track
8381 * bounds correctly. Otherwise we lose all sign bit information except
8382 * what we can pick up from var_off. Perhaps we can generalize this
8383 * later to shifts of any length.
8384 */
8385 if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
8386 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
8387 else
8388 dst_reg->smax_value = S64_MAX;
8389
8390 if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
8391 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
8392 else
8393 dst_reg->smin_value = S64_MIN;
8394
07cd2631
JF
8395 /* If we might shift our top bit out, then we know nothing */
8396 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
8397 dst_reg->umin_value = 0;
8398 dst_reg->umax_value = U64_MAX;
8399 } else {
8400 dst_reg->umin_value <<= umin_val;
8401 dst_reg->umax_value <<= umax_val;
8402 }
3f50f132
JF
8403}
8404
8405static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
8406 struct bpf_reg_state *src_reg)
8407{
8408 u64 umax_val = src_reg->umax_value;
8409 u64 umin_val = src_reg->umin_value;
8410
8411 /* scalar64 calc uses 32bit unshifted bounds so must be called first */
8412 __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
8413 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
8414
07cd2631
JF
8415 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
8416 /* We may learn something more from the var_off */
8417 __update_reg_bounds(dst_reg);
8418}
8419
3f50f132
JF
8420static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
8421 struct bpf_reg_state *src_reg)
8422{
8423 struct tnum subreg = tnum_subreg(dst_reg->var_off);
8424 u32 umax_val = src_reg->u32_max_value;
8425 u32 umin_val = src_reg->u32_min_value;
8426
8427 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
8428 * be negative, then either:
8429 * 1) src_reg might be zero, so the sign bit of the result is
8430 * unknown, so we lose our signed bounds
8431 * 2) it's known negative, thus the unsigned bounds capture the
8432 * signed bounds
8433 * 3) the signed bounds cross zero, so they tell us nothing
8434 * about the result
8435 * If the value in dst_reg is known nonnegative, then again the
18b24d78 8436 * unsigned bounds capture the signed bounds.
3f50f132
JF
8437 * Thus, in all cases it suffices to blow away our signed bounds
8438 * and rely on inferring new ones from the unsigned bounds and
8439 * var_off of the result.
8440 */
8441 dst_reg->s32_min_value = S32_MIN;
8442 dst_reg->s32_max_value = S32_MAX;
8443
8444 dst_reg->var_off = tnum_rshift(subreg, umin_val);
8445 dst_reg->u32_min_value >>= umax_val;
8446 dst_reg->u32_max_value >>= umin_val;
8447
8448 __mark_reg64_unbounded(dst_reg);
8449 __update_reg32_bounds(dst_reg);
8450}
8451
07cd2631
JF
8452static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
8453 struct bpf_reg_state *src_reg)
8454{
8455 u64 umax_val = src_reg->umax_value;
8456 u64 umin_val = src_reg->umin_value;
8457
8458 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
8459 * be negative, then either:
8460 * 1) src_reg might be zero, so the sign bit of the result is
8461 * unknown, so we lose our signed bounds
8462 * 2) it's known negative, thus the unsigned bounds capture the
8463 * signed bounds
8464 * 3) the signed bounds cross zero, so they tell us nothing
8465 * about the result
8466 * If the value in dst_reg is known nonnegative, then again the
18b24d78 8467 * unsigned bounds capture the signed bounds.
07cd2631
JF
8468 * Thus, in all cases it suffices to blow away our signed bounds
8469 * and rely on inferring new ones from the unsigned bounds and
8470 * var_off of the result.
8471 */
8472 dst_reg->smin_value = S64_MIN;
8473 dst_reg->smax_value = S64_MAX;
8474 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
8475 dst_reg->umin_value >>= umax_val;
8476 dst_reg->umax_value >>= umin_val;
3f50f132
JF
8477
8478 /* Its not easy to operate on alu32 bounds here because it depends
8479 * on bits being shifted in. Take easy way out and mark unbounded
8480 * so we can recalculate later from tnum.
8481 */
8482 __mark_reg32_unbounded(dst_reg);
07cd2631
JF
8483 __update_reg_bounds(dst_reg);
8484}
8485
3f50f132
JF
8486static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
8487 struct bpf_reg_state *src_reg)
07cd2631 8488{
3f50f132 8489 u64 umin_val = src_reg->u32_min_value;
07cd2631
JF
8490
8491 /* Upon reaching here, src_known is true and
8492 * umax_val is equal to umin_val.
8493 */
3f50f132
JF
8494 dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
8495 dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
07cd2631 8496
3f50f132
JF
8497 dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
8498
8499 /* blow away the dst_reg umin_value/umax_value and rely on
8500 * dst_reg var_off to refine the result.
8501 */
8502 dst_reg->u32_min_value = 0;
8503 dst_reg->u32_max_value = U32_MAX;
8504
8505 __mark_reg64_unbounded(dst_reg);
8506 __update_reg32_bounds(dst_reg);
8507}
8508
8509static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
8510 struct bpf_reg_state *src_reg)
8511{
8512 u64 umin_val = src_reg->umin_value;
8513
8514 /* Upon reaching here, src_known is true and umax_val is equal
8515 * to umin_val.
8516 */
8517 dst_reg->smin_value >>= umin_val;
8518 dst_reg->smax_value >>= umin_val;
8519
8520 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
07cd2631
JF
8521
8522 /* blow away the dst_reg umin_value/umax_value and rely on
8523 * dst_reg var_off to refine the result.
8524 */
8525 dst_reg->umin_value = 0;
8526 dst_reg->umax_value = U64_MAX;
3f50f132
JF
8527
8528 /* Its not easy to operate on alu32 bounds here because it depends
8529 * on bits being shifted in from upper 32-bits. Take easy way out
8530 * and mark unbounded so we can recalculate later from tnum.
8531 */
8532 __mark_reg32_unbounded(dst_reg);
07cd2631
JF
8533 __update_reg_bounds(dst_reg);
8534}
8535
468f6eaf
JH
8536/* WARNING: This function does calculations on 64-bit values, but the actual
8537 * execution may occur on 32-bit values. Therefore, things like bitshifts
8538 * need extra checks in the 32-bit case.
8539 */
f1174f77
EC
8540static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8541 struct bpf_insn *insn,
8542 struct bpf_reg_state *dst_reg,
8543 struct bpf_reg_state src_reg)
969bf05e 8544{
638f5b90 8545 struct bpf_reg_state *regs = cur_regs(env);
48461135 8546 u8 opcode = BPF_OP(insn->code);
b0b3fb67 8547 bool src_known;
b03c9f9f
EC
8548 s64 smin_val, smax_val;
8549 u64 umin_val, umax_val;
3f50f132
JF
8550 s32 s32_min_val, s32_max_val;
8551 u32 u32_min_val, u32_max_val;
468f6eaf 8552 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
3f50f132 8553 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
a6aaece0 8554 int ret;
b799207e 8555
b03c9f9f
EC
8556 smin_val = src_reg.smin_value;
8557 smax_val = src_reg.smax_value;
8558 umin_val = src_reg.umin_value;
8559 umax_val = src_reg.umax_value;
f23cc643 8560
3f50f132
JF
8561 s32_min_val = src_reg.s32_min_value;
8562 s32_max_val = src_reg.s32_max_value;
8563 u32_min_val = src_reg.u32_min_value;
8564 u32_max_val = src_reg.u32_max_value;
8565
8566 if (alu32) {
8567 src_known = tnum_subreg_is_const(src_reg.var_off);
3f50f132
JF
8568 if ((src_known &&
8569 (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
8570 s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
8571 /* Taint dst register if offset had invalid bounds
8572 * derived from e.g. dead branches.
8573 */
8574 __mark_reg_unknown(env, dst_reg);
8575 return 0;
8576 }
8577 } else {
8578 src_known = tnum_is_const(src_reg.var_off);
3f50f132
JF
8579 if ((src_known &&
8580 (smin_val != smax_val || umin_val != umax_val)) ||
8581 smin_val > smax_val || umin_val > umax_val) {
8582 /* Taint dst register if offset had invalid bounds
8583 * derived from e.g. dead branches.
8584 */
8585 __mark_reg_unknown(env, dst_reg);
8586 return 0;
8587 }
6f16101e
DB
8588 }
8589
bb7f0f98
AS
8590 if (!src_known &&
8591 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
f54c7898 8592 __mark_reg_unknown(env, dst_reg);
bb7f0f98
AS
8593 return 0;
8594 }
8595
f5288193
DB
8596 if (sanitize_needed(opcode)) {
8597 ret = sanitize_val_alu(env, insn);
8598 if (ret < 0)
8599 return sanitize_err(env, insn, ret, NULL, NULL);
8600 }
8601
3f50f132
JF
8602 /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
8603 * There are two classes of instructions: The first class we track both
8604 * alu32 and alu64 sign/unsigned bounds independently this provides the
8605 * greatest amount of precision when alu operations are mixed with jmp32
8606 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
8607 * and BPF_OR. This is possible because these ops have fairly easy to
8608 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
8609 * See alu32 verifier tests for examples. The second class of
8610 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
8611 * with regards to tracking sign/unsigned bounds because the bits may
8612 * cross subreg boundaries in the alu64 case. When this happens we mark
8613 * the reg unbounded in the subreg bound space and use the resulting
8614 * tnum to calculate an approximation of the sign/unsigned bounds.
8615 */
48461135
JB
8616 switch (opcode) {
8617 case BPF_ADD:
3f50f132 8618 scalar32_min_max_add(dst_reg, &src_reg);
07cd2631 8619 scalar_min_max_add(dst_reg, &src_reg);
3f50f132 8620 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
48461135
JB
8621 break;
8622 case BPF_SUB:
3f50f132 8623 scalar32_min_max_sub(dst_reg, &src_reg);
07cd2631 8624 scalar_min_max_sub(dst_reg, &src_reg);
3f50f132 8625 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
48461135
JB
8626 break;
8627 case BPF_MUL:
3f50f132
JF
8628 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
8629 scalar32_min_max_mul(dst_reg, &src_reg);
07cd2631 8630 scalar_min_max_mul(dst_reg, &src_reg);
48461135
JB
8631 break;
8632 case BPF_AND:
3f50f132
JF
8633 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
8634 scalar32_min_max_and(dst_reg, &src_reg);
07cd2631 8635 scalar_min_max_and(dst_reg, &src_reg);
f1174f77
EC
8636 break;
8637 case BPF_OR:
3f50f132
JF
8638 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
8639 scalar32_min_max_or(dst_reg, &src_reg);
07cd2631 8640 scalar_min_max_or(dst_reg, &src_reg);
48461135 8641 break;
2921c90d
YS
8642 case BPF_XOR:
8643 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
8644 scalar32_min_max_xor(dst_reg, &src_reg);
8645 scalar_min_max_xor(dst_reg, &src_reg);
8646 break;
48461135 8647 case BPF_LSH:
468f6eaf
JH
8648 if (umax_val >= insn_bitness) {
8649 /* Shifts greater than 31 or 63 are undefined.
8650 * This includes shifts by a negative number.
b03c9f9f 8651 */
61bd5218 8652 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
8653 break;
8654 }
3f50f132
JF
8655 if (alu32)
8656 scalar32_min_max_lsh(dst_reg, &src_reg);
8657 else
8658 scalar_min_max_lsh(dst_reg, &src_reg);
48461135
JB
8659 break;
8660 case BPF_RSH:
468f6eaf
JH
8661 if (umax_val >= insn_bitness) {
8662 /* Shifts greater than 31 or 63 are undefined.
8663 * This includes shifts by a negative number.
b03c9f9f 8664 */
61bd5218 8665 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
8666 break;
8667 }
3f50f132
JF
8668 if (alu32)
8669 scalar32_min_max_rsh(dst_reg, &src_reg);
8670 else
8671 scalar_min_max_rsh(dst_reg, &src_reg);
48461135 8672 break;
9cbe1f5a
YS
8673 case BPF_ARSH:
8674 if (umax_val >= insn_bitness) {
8675 /* Shifts greater than 31 or 63 are undefined.
8676 * This includes shifts by a negative number.
8677 */
8678 mark_reg_unknown(env, regs, insn->dst_reg);
8679 break;
8680 }
3f50f132
JF
8681 if (alu32)
8682 scalar32_min_max_arsh(dst_reg, &src_reg);
8683 else
8684 scalar_min_max_arsh(dst_reg, &src_reg);
9cbe1f5a 8685 break;
48461135 8686 default:
61bd5218 8687 mark_reg_unknown(env, regs, insn->dst_reg);
48461135
JB
8688 break;
8689 }
8690
3f50f132
JF
8691 /* ALU32 ops are zero extended into 64bit register */
8692 if (alu32)
8693 zext_32_to_64(dst_reg);
468f6eaf 8694
294f2fc6 8695 __update_reg_bounds(dst_reg);
b03c9f9f
EC
8696 __reg_deduce_bounds(dst_reg);
8697 __reg_bound_offset(dst_reg);
f1174f77
EC
8698 return 0;
8699}
8700
8701/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
8702 * and var_off.
8703 */
8704static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
8705 struct bpf_insn *insn)
8706{
f4d7e40a
AS
8707 struct bpf_verifier_state *vstate = env->cur_state;
8708 struct bpf_func_state *state = vstate->frame[vstate->curframe];
8709 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
f1174f77
EC
8710 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
8711 u8 opcode = BPF_OP(insn->code);
b5dc0163 8712 int err;
f1174f77
EC
8713
8714 dst_reg = &regs[insn->dst_reg];
f1174f77
EC
8715 src_reg = NULL;
8716 if (dst_reg->type != SCALAR_VALUE)
8717 ptr_reg = dst_reg;
75748837
AS
8718 else
8719 /* Make sure ID is cleared otherwise dst_reg min/max could be
8720 * incorrectly propagated into other registers by find_equal_scalars()
8721 */
8722 dst_reg->id = 0;
f1174f77
EC
8723 if (BPF_SRC(insn->code) == BPF_X) {
8724 src_reg = &regs[insn->src_reg];
f1174f77
EC
8725 if (src_reg->type != SCALAR_VALUE) {
8726 if (dst_reg->type != SCALAR_VALUE) {
8727 /* Combining two pointers by any ALU op yields
82abbf8d
AS
8728 * an arbitrary scalar. Disallow all math except
8729 * pointer subtraction
f1174f77 8730 */
dd066823 8731 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
82abbf8d
AS
8732 mark_reg_unknown(env, regs, insn->dst_reg);
8733 return 0;
f1174f77 8734 }
82abbf8d
AS
8735 verbose(env, "R%d pointer %s pointer prohibited\n",
8736 insn->dst_reg,
8737 bpf_alu_string[opcode >> 4]);
8738 return -EACCES;
f1174f77
EC
8739 } else {
8740 /* scalar += pointer
8741 * This is legal, but we have to reverse our
8742 * src/dest handling in computing the range
8743 */
b5dc0163
AS
8744 err = mark_chain_precision(env, insn->dst_reg);
8745 if (err)
8746 return err;
82abbf8d
AS
8747 return adjust_ptr_min_max_vals(env, insn,
8748 src_reg, dst_reg);
f1174f77
EC
8749 }
8750 } else if (ptr_reg) {
8751 /* pointer += scalar */
b5dc0163
AS
8752 err = mark_chain_precision(env, insn->src_reg);
8753 if (err)
8754 return err;
82abbf8d
AS
8755 return adjust_ptr_min_max_vals(env, insn,
8756 dst_reg, src_reg);
f1174f77
EC
8757 }
8758 } else {
8759 /* Pretend the src is a reg with a known value, since we only
8760 * need to be able to read from this state.
8761 */
8762 off_reg.type = SCALAR_VALUE;
b03c9f9f 8763 __mark_reg_known(&off_reg, insn->imm);
f1174f77 8764 src_reg = &off_reg;
82abbf8d
AS
8765 if (ptr_reg) /* pointer += K */
8766 return adjust_ptr_min_max_vals(env, insn,
8767 ptr_reg, src_reg);
f1174f77
EC
8768 }
8769
8770 /* Got here implies adding two SCALAR_VALUEs */
8771 if (WARN_ON_ONCE(ptr_reg)) {
0f55f9ed 8772 print_verifier_state(env, state, true);
61bd5218 8773 verbose(env, "verifier internal error: unexpected ptr_reg\n");
f1174f77
EC
8774 return -EINVAL;
8775 }
8776 if (WARN_ON(!src_reg)) {
0f55f9ed 8777 print_verifier_state(env, state, true);
61bd5218 8778 verbose(env, "verifier internal error: no src_reg\n");
f1174f77
EC
8779 return -EINVAL;
8780 }
8781 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
48461135
JB
8782}
8783
17a52670 8784/* check validity of 32-bit and 64-bit arithmetic operations */
58e2af8b 8785static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 8786{
638f5b90 8787 struct bpf_reg_state *regs = cur_regs(env);
17a52670
AS
8788 u8 opcode = BPF_OP(insn->code);
8789 int err;
8790
8791 if (opcode == BPF_END || opcode == BPF_NEG) {
8792 if (opcode == BPF_NEG) {
8793 if (BPF_SRC(insn->code) != 0 ||
8794 insn->src_reg != BPF_REG_0 ||
8795 insn->off != 0 || insn->imm != 0) {
61bd5218 8796 verbose(env, "BPF_NEG uses reserved fields\n");
17a52670
AS
8797 return -EINVAL;
8798 }
8799 } else {
8800 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
e67b8a68
EC
8801 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
8802 BPF_CLASS(insn->code) == BPF_ALU64) {
61bd5218 8803 verbose(env, "BPF_END uses reserved fields\n");
17a52670
AS
8804 return -EINVAL;
8805 }
8806 }
8807
8808 /* check src operand */
dc503a8a 8809 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
8810 if (err)
8811 return err;
8812
1be7f75d 8813 if (is_pointer_value(env, insn->dst_reg)) {
61bd5218 8814 verbose(env, "R%d pointer arithmetic prohibited\n",
1be7f75d
AS
8815 insn->dst_reg);
8816 return -EACCES;
8817 }
8818
17a52670 8819 /* check dest operand */
dc503a8a 8820 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
8821 if (err)
8822 return err;
8823
8824 } else if (opcode == BPF_MOV) {
8825
8826 if (BPF_SRC(insn->code) == BPF_X) {
8827 if (insn->imm != 0 || insn->off != 0) {
61bd5218 8828 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
8829 return -EINVAL;
8830 }
8831
8832 /* check src operand */
dc503a8a 8833 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
8834 if (err)
8835 return err;
8836 } else {
8837 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 8838 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
8839 return -EINVAL;
8840 }
8841 }
8842
fbeb1603
AF
8843 /* check dest operand, mark as required later */
8844 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
8845 if (err)
8846 return err;
8847
8848 if (BPF_SRC(insn->code) == BPF_X) {
e434b8cd
JW
8849 struct bpf_reg_state *src_reg = regs + insn->src_reg;
8850 struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
8851
17a52670
AS
8852 if (BPF_CLASS(insn->code) == BPF_ALU64) {
8853 /* case: R1 = R2
8854 * copy register state to dest reg
8855 */
75748837
AS
8856 if (src_reg->type == SCALAR_VALUE && !src_reg->id)
8857 /* Assign src and dst registers the same ID
8858 * that will be used by find_equal_scalars()
8859 * to propagate min/max range.
8860 */
8861 src_reg->id = ++env->id_gen;
e434b8cd
JW
8862 *dst_reg = *src_reg;
8863 dst_reg->live |= REG_LIVE_WRITTEN;
5327ed3d 8864 dst_reg->subreg_def = DEF_NOT_SUBREG;
17a52670 8865 } else {
f1174f77 8866 /* R1 = (u32) R2 */
1be7f75d 8867 if (is_pointer_value(env, insn->src_reg)) {
61bd5218
JK
8868 verbose(env,
8869 "R%d partial copy of pointer\n",
1be7f75d
AS
8870 insn->src_reg);
8871 return -EACCES;
e434b8cd
JW
8872 } else if (src_reg->type == SCALAR_VALUE) {
8873 *dst_reg = *src_reg;
75748837
AS
8874 /* Make sure ID is cleared otherwise
8875 * dst_reg min/max could be incorrectly
8876 * propagated into src_reg by find_equal_scalars()
8877 */
8878 dst_reg->id = 0;
e434b8cd 8879 dst_reg->live |= REG_LIVE_WRITTEN;
5327ed3d 8880 dst_reg->subreg_def = env->insn_idx + 1;
e434b8cd
JW
8881 } else {
8882 mark_reg_unknown(env, regs,
8883 insn->dst_reg);
1be7f75d 8884 }
3f50f132 8885 zext_32_to_64(dst_reg);
3cf2b61e
DB
8886
8887 __update_reg_bounds(dst_reg);
8888 __reg_deduce_bounds(dst_reg);
8889 __reg_bound_offset(dst_reg);
17a52670
AS
8890 }
8891 } else {
8892 /* case: R = imm
8893 * remember the value we stored into this reg
8894 */
fbeb1603
AF
8895 /* clear any state __mark_reg_known doesn't set */
8896 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77 8897 regs[insn->dst_reg].type = SCALAR_VALUE;
95a762e2
JH
8898 if (BPF_CLASS(insn->code) == BPF_ALU64) {
8899 __mark_reg_known(regs + insn->dst_reg,
8900 insn->imm);
8901 } else {
8902 __mark_reg_known(regs + insn->dst_reg,
8903 (u32)insn->imm);
8904 }
17a52670
AS
8905 }
8906
8907 } else if (opcode > BPF_END) {
61bd5218 8908 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
17a52670
AS
8909 return -EINVAL;
8910
8911 } else { /* all other ALU ops: and, sub, xor, add, ... */
8912
17a52670
AS
8913 if (BPF_SRC(insn->code) == BPF_X) {
8914 if (insn->imm != 0 || insn->off != 0) {
61bd5218 8915 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
8916 return -EINVAL;
8917 }
8918 /* check src1 operand */
dc503a8a 8919 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
8920 if (err)
8921 return err;
8922 } else {
8923 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 8924 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
8925 return -EINVAL;
8926 }
8927 }
8928
8929 /* check src2 operand */
dc503a8a 8930 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
8931 if (err)
8932 return err;
8933
8934 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
8935 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
61bd5218 8936 verbose(env, "div by zero\n");
17a52670
AS
8937 return -EINVAL;
8938 }
8939
229394e8
RV
8940 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
8941 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
8942 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
8943
8944 if (insn->imm < 0 || insn->imm >= size) {
61bd5218 8945 verbose(env, "invalid shift %d\n", insn->imm);
229394e8
RV
8946 return -EINVAL;
8947 }
8948 }
8949
1a0dc1ac 8950 /* check dest operand */
dc503a8a 8951 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
1a0dc1ac
AS
8952 if (err)
8953 return err;
8954
f1174f77 8955 return adjust_reg_min_max_vals(env, insn);
17a52670
AS
8956 }
8957
8958 return 0;
8959}
8960
c6a9efa1
PC
8961static void __find_good_pkt_pointers(struct bpf_func_state *state,
8962 struct bpf_reg_state *dst_reg,
6d94e741 8963 enum bpf_reg_type type, int new_range)
c6a9efa1
PC
8964{
8965 struct bpf_reg_state *reg;
8966 int i;
8967
8968 for (i = 0; i < MAX_BPF_REG; i++) {
8969 reg = &state->regs[i];
8970 if (reg->type == type && reg->id == dst_reg->id)
8971 /* keep the maximum range already checked */
8972 reg->range = max(reg->range, new_range);
8973 }
8974
8975 bpf_for_each_spilled_reg(i, state, reg) {
8976 if (!reg)
8977 continue;
8978 if (reg->type == type && reg->id == dst_reg->id)
8979 reg->range = max(reg->range, new_range);
8980 }
8981}
8982
f4d7e40a 8983static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
de8f3a83 8984 struct bpf_reg_state *dst_reg,
f8ddadc4 8985 enum bpf_reg_type type,
fb2a311a 8986 bool range_right_open)
969bf05e 8987{
6d94e741 8988 int new_range, i;
2d2be8ca 8989
fb2a311a
DB
8990 if (dst_reg->off < 0 ||
8991 (dst_reg->off == 0 && range_right_open))
f1174f77
EC
8992 /* This doesn't give us any range */
8993 return;
8994
b03c9f9f
EC
8995 if (dst_reg->umax_value > MAX_PACKET_OFF ||
8996 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
f1174f77
EC
8997 /* Risk of overflow. For instance, ptr + (1<<63) may be less
8998 * than pkt_end, but that's because it's also less than pkt.
8999 */
9000 return;
9001
fb2a311a
DB
9002 new_range = dst_reg->off;
9003 if (range_right_open)
2fa7d94a 9004 new_range++;
fb2a311a
DB
9005
9006 /* Examples for register markings:
2d2be8ca 9007 *
fb2a311a 9008 * pkt_data in dst register:
2d2be8ca
DB
9009 *
9010 * r2 = r3;
9011 * r2 += 8;
9012 * if (r2 > pkt_end) goto <handle exception>
9013 * <access okay>
9014 *
b4e432f1
DB
9015 * r2 = r3;
9016 * r2 += 8;
9017 * if (r2 < pkt_end) goto <access okay>
9018 * <handle exception>
9019 *
2d2be8ca
DB
9020 * Where:
9021 * r2 == dst_reg, pkt_end == src_reg
9022 * r2=pkt(id=n,off=8,r=0)
9023 * r3=pkt(id=n,off=0,r=0)
9024 *
fb2a311a 9025 * pkt_data in src register:
2d2be8ca
DB
9026 *
9027 * r2 = r3;
9028 * r2 += 8;
9029 * if (pkt_end >= r2) goto <access okay>
9030 * <handle exception>
9031 *
b4e432f1
DB
9032 * r2 = r3;
9033 * r2 += 8;
9034 * if (pkt_end <= r2) goto <handle exception>
9035 * <access okay>
9036 *
2d2be8ca
DB
9037 * Where:
9038 * pkt_end == dst_reg, r2 == src_reg
9039 * r2=pkt(id=n,off=8,r=0)
9040 * r3=pkt(id=n,off=0,r=0)
9041 *
9042 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
fb2a311a
DB
9043 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
9044 * and [r3, r3 + 8-1) respectively is safe to access depending on
9045 * the check.
969bf05e 9046 */
2d2be8ca 9047
f1174f77
EC
9048 /* If our ids match, then we must have the same max_value. And we
9049 * don't care about the other reg's fixed offset, since if it's too big
9050 * the range won't allow anything.
9051 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
9052 */
c6a9efa1
PC
9053 for (i = 0; i <= vstate->curframe; i++)
9054 __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
9055 new_range);
969bf05e
AS
9056}
9057
3f50f132 9058static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
4f7b3e82 9059{
3f50f132
JF
9060 struct tnum subreg = tnum_subreg(reg->var_off);
9061 s32 sval = (s32)val;
a72dafaf 9062
3f50f132
JF
9063 switch (opcode) {
9064 case BPF_JEQ:
9065 if (tnum_is_const(subreg))
9066 return !!tnum_equals_const(subreg, val);
9067 break;
9068 case BPF_JNE:
9069 if (tnum_is_const(subreg))
9070 return !tnum_equals_const(subreg, val);
9071 break;
9072 case BPF_JSET:
9073 if ((~subreg.mask & subreg.value) & val)
9074 return 1;
9075 if (!((subreg.mask | subreg.value) & val))
9076 return 0;
9077 break;
9078 case BPF_JGT:
9079 if (reg->u32_min_value > val)
9080 return 1;
9081 else if (reg->u32_max_value <= val)
9082 return 0;
9083 break;
9084 case BPF_JSGT:
9085 if (reg->s32_min_value > sval)
9086 return 1;
ee114dd6 9087 else if (reg->s32_max_value <= sval)
3f50f132
JF
9088 return 0;
9089 break;
9090 case BPF_JLT:
9091 if (reg->u32_max_value < val)
9092 return 1;
9093 else if (reg->u32_min_value >= val)
9094 return 0;
9095 break;
9096 case BPF_JSLT:
9097 if (reg->s32_max_value < sval)
9098 return 1;
9099 else if (reg->s32_min_value >= sval)
9100 return 0;
9101 break;
9102 case BPF_JGE:
9103 if (reg->u32_min_value >= val)
9104 return 1;
9105 else if (reg->u32_max_value < val)
9106 return 0;
9107 break;
9108 case BPF_JSGE:
9109 if (reg->s32_min_value >= sval)
9110 return 1;
9111 else if (reg->s32_max_value < sval)
9112 return 0;
9113 break;
9114 case BPF_JLE:
9115 if (reg->u32_max_value <= val)
9116 return 1;
9117 else if (reg->u32_min_value > val)
9118 return 0;
9119 break;
9120 case BPF_JSLE:
9121 if (reg->s32_max_value <= sval)
9122 return 1;
9123 else if (reg->s32_min_value > sval)
9124 return 0;
9125 break;
9126 }
4f7b3e82 9127
3f50f132
JF
9128 return -1;
9129}
092ed096 9130
3f50f132
JF
9131
9132static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
9133{
9134 s64 sval = (s64)val;
a72dafaf 9135
4f7b3e82
AS
9136 switch (opcode) {
9137 case BPF_JEQ:
9138 if (tnum_is_const(reg->var_off))
9139 return !!tnum_equals_const(reg->var_off, val);
9140 break;
9141 case BPF_JNE:
9142 if (tnum_is_const(reg->var_off))
9143 return !tnum_equals_const(reg->var_off, val);
9144 break;
960ea056
JK
9145 case BPF_JSET:
9146 if ((~reg->var_off.mask & reg->var_off.value) & val)
9147 return 1;
9148 if (!((reg->var_off.mask | reg->var_off.value) & val))
9149 return 0;
9150 break;
4f7b3e82
AS
9151 case BPF_JGT:
9152 if (reg->umin_value > val)
9153 return 1;
9154 else if (reg->umax_value <= val)
9155 return 0;
9156 break;
9157 case BPF_JSGT:
a72dafaf 9158 if (reg->smin_value > sval)
4f7b3e82 9159 return 1;
ee114dd6 9160 else if (reg->smax_value <= sval)
4f7b3e82
AS
9161 return 0;
9162 break;
9163 case BPF_JLT:
9164 if (reg->umax_value < val)
9165 return 1;
9166 else if (reg->umin_value >= val)
9167 return 0;
9168 break;
9169 case BPF_JSLT:
a72dafaf 9170 if (reg->smax_value < sval)
4f7b3e82 9171 return 1;
a72dafaf 9172 else if (reg->smin_value >= sval)
4f7b3e82
AS
9173 return 0;
9174 break;
9175 case BPF_JGE:
9176 if (reg->umin_value >= val)
9177 return 1;
9178 else if (reg->umax_value < val)
9179 return 0;
9180 break;
9181 case BPF_JSGE:
a72dafaf 9182 if (reg->smin_value >= sval)
4f7b3e82 9183 return 1;
a72dafaf 9184 else if (reg->smax_value < sval)
4f7b3e82
AS
9185 return 0;
9186 break;
9187 case BPF_JLE:
9188 if (reg->umax_value <= val)
9189 return 1;
9190 else if (reg->umin_value > val)
9191 return 0;
9192 break;
9193 case BPF_JSLE:
a72dafaf 9194 if (reg->smax_value <= sval)
4f7b3e82 9195 return 1;
a72dafaf 9196 else if (reg->smin_value > sval)
4f7b3e82
AS
9197 return 0;
9198 break;
9199 }
9200
9201 return -1;
9202}
9203
3f50f132
JF
9204/* compute branch direction of the expression "if (reg opcode val) goto target;"
9205 * and return:
9206 * 1 - branch will be taken and "goto target" will be executed
9207 * 0 - branch will not be taken and fall-through to next insn
9208 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
9209 * range [0,10]
604dca5e 9210 */
3f50f132
JF
9211static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
9212 bool is_jmp32)
604dca5e 9213{
cac616db
JF
9214 if (__is_pointer_value(false, reg)) {
9215 if (!reg_type_not_null(reg->type))
9216 return -1;
9217
9218 /* If pointer is valid tests against zero will fail so we can
9219 * use this to direct branch taken.
9220 */
9221 if (val != 0)
9222 return -1;
9223
9224 switch (opcode) {
9225 case BPF_JEQ:
9226 return 0;
9227 case BPF_JNE:
9228 return 1;
9229 default:
9230 return -1;
9231 }
9232 }
604dca5e 9233
3f50f132
JF
9234 if (is_jmp32)
9235 return is_branch32_taken(reg, val, opcode);
9236 return is_branch64_taken(reg, val, opcode);
604dca5e
JH
9237}
9238
6d94e741
AS
9239static int flip_opcode(u32 opcode)
9240{
9241 /* How can we transform "a <op> b" into "b <op> a"? */
9242 static const u8 opcode_flip[16] = {
9243 /* these stay the same */
9244 [BPF_JEQ >> 4] = BPF_JEQ,
9245 [BPF_JNE >> 4] = BPF_JNE,
9246 [BPF_JSET >> 4] = BPF_JSET,
9247 /* these swap "lesser" and "greater" (L and G in the opcodes) */
9248 [BPF_JGE >> 4] = BPF_JLE,
9249 [BPF_JGT >> 4] = BPF_JLT,
9250 [BPF_JLE >> 4] = BPF_JGE,
9251 [BPF_JLT >> 4] = BPF_JGT,
9252 [BPF_JSGE >> 4] = BPF_JSLE,
9253 [BPF_JSGT >> 4] = BPF_JSLT,
9254 [BPF_JSLE >> 4] = BPF_JSGE,
9255 [BPF_JSLT >> 4] = BPF_JSGT
9256 };
9257 return opcode_flip[opcode >> 4];
9258}
9259
9260static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
9261 struct bpf_reg_state *src_reg,
9262 u8 opcode)
9263{
9264 struct bpf_reg_state *pkt;
9265
9266 if (src_reg->type == PTR_TO_PACKET_END) {
9267 pkt = dst_reg;
9268 } else if (dst_reg->type == PTR_TO_PACKET_END) {
9269 pkt = src_reg;
9270 opcode = flip_opcode(opcode);
9271 } else {
9272 return -1;
9273 }
9274
9275 if (pkt->range >= 0)
9276 return -1;
9277
9278 switch (opcode) {
9279 case BPF_JLE:
9280 /* pkt <= pkt_end */
9281 fallthrough;
9282 case BPF_JGT:
9283 /* pkt > pkt_end */
9284 if (pkt->range == BEYOND_PKT_END)
9285 /* pkt has at last one extra byte beyond pkt_end */
9286 return opcode == BPF_JGT;
9287 break;
9288 case BPF_JLT:
9289 /* pkt < pkt_end */
9290 fallthrough;
9291 case BPF_JGE:
9292 /* pkt >= pkt_end */
9293 if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
9294 return opcode == BPF_JGE;
9295 break;
9296 }
9297 return -1;
9298}
9299
48461135
JB
9300/* Adjusts the register min/max values in the case that the dst_reg is the
9301 * variable register that we are working on, and src_reg is a constant or we're
9302 * simply doing a BPF_K check.
f1174f77 9303 * In JEQ/JNE cases we also adjust the var_off values.
48461135
JB
9304 */
9305static void reg_set_min_max(struct bpf_reg_state *true_reg,
3f50f132
JF
9306 struct bpf_reg_state *false_reg,
9307 u64 val, u32 val32,
092ed096 9308 u8 opcode, bool is_jmp32)
48461135 9309{
3f50f132
JF
9310 struct tnum false_32off = tnum_subreg(false_reg->var_off);
9311 struct tnum false_64off = false_reg->var_off;
9312 struct tnum true_32off = tnum_subreg(true_reg->var_off);
9313 struct tnum true_64off = true_reg->var_off;
9314 s64 sval = (s64)val;
9315 s32 sval32 = (s32)val32;
a72dafaf 9316
f1174f77
EC
9317 /* If the dst_reg is a pointer, we can't learn anything about its
9318 * variable offset from the compare (unless src_reg were a pointer into
9319 * the same object, but we don't bother with that.
9320 * Since false_reg and true_reg have the same type by construction, we
9321 * only need to check one of them for pointerness.
9322 */
9323 if (__is_pointer_value(false, false_reg))
9324 return;
4cabc5b1 9325
48461135
JB
9326 switch (opcode) {
9327 case BPF_JEQ:
48461135 9328 case BPF_JNE:
a72dafaf
JW
9329 {
9330 struct bpf_reg_state *reg =
9331 opcode == BPF_JEQ ? true_reg : false_reg;
9332
e688c3db
AS
9333 /* JEQ/JNE comparison doesn't change the register equivalence.
9334 * r1 = r2;
9335 * if (r1 == 42) goto label;
9336 * ...
9337 * label: // here both r1 and r2 are known to be 42.
9338 *
9339 * Hence when marking register as known preserve it's ID.
48461135 9340 */
3f50f132
JF
9341 if (is_jmp32)
9342 __mark_reg32_known(reg, val32);
9343 else
e688c3db 9344 ___mark_reg_known(reg, val);
48461135 9345 break;
a72dafaf 9346 }
960ea056 9347 case BPF_JSET:
3f50f132
JF
9348 if (is_jmp32) {
9349 false_32off = tnum_and(false_32off, tnum_const(~val32));
9350 if (is_power_of_2(val32))
9351 true_32off = tnum_or(true_32off,
9352 tnum_const(val32));
9353 } else {
9354 false_64off = tnum_and(false_64off, tnum_const(~val));
9355 if (is_power_of_2(val))
9356 true_64off = tnum_or(true_64off,
9357 tnum_const(val));
9358 }
960ea056 9359 break;
48461135 9360 case BPF_JGE:
a72dafaf
JW
9361 case BPF_JGT:
9362 {
3f50f132
JF
9363 if (is_jmp32) {
9364 u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1;
9365 u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
9366
9367 false_reg->u32_max_value = min(false_reg->u32_max_value,
9368 false_umax);
9369 true_reg->u32_min_value = max(true_reg->u32_min_value,
9370 true_umin);
9371 } else {
9372 u64 false_umax = opcode == BPF_JGT ? val : val - 1;
9373 u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
9374
9375 false_reg->umax_value = min(false_reg->umax_value, false_umax);
9376 true_reg->umin_value = max(true_reg->umin_value, true_umin);
9377 }
b03c9f9f 9378 break;
a72dafaf 9379 }
48461135 9380 case BPF_JSGE:
a72dafaf
JW
9381 case BPF_JSGT:
9382 {
3f50f132
JF
9383 if (is_jmp32) {
9384 s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1;
9385 s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
a72dafaf 9386
3f50f132
JF
9387 false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
9388 true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
9389 } else {
9390 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
9391 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
9392
9393 false_reg->smax_value = min(false_reg->smax_value, false_smax);
9394 true_reg->smin_value = max(true_reg->smin_value, true_smin);
9395 }
48461135 9396 break;
a72dafaf 9397 }
b4e432f1 9398 case BPF_JLE:
a72dafaf
JW
9399 case BPF_JLT:
9400 {
3f50f132
JF
9401 if (is_jmp32) {
9402 u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1;
9403 u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
9404
9405 false_reg->u32_min_value = max(false_reg->u32_min_value,
9406 false_umin);
9407 true_reg->u32_max_value = min(true_reg->u32_max_value,
9408 true_umax);
9409 } else {
9410 u64 false_umin = opcode == BPF_JLT ? val : val + 1;
9411 u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
9412
9413 false_reg->umin_value = max(false_reg->umin_value, false_umin);
9414 true_reg->umax_value = min(true_reg->umax_value, true_umax);
9415 }
b4e432f1 9416 break;
a72dafaf 9417 }
b4e432f1 9418 case BPF_JSLE:
a72dafaf
JW
9419 case BPF_JSLT:
9420 {
3f50f132
JF
9421 if (is_jmp32) {
9422 s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1;
9423 s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
a72dafaf 9424
3f50f132
JF
9425 false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
9426 true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
9427 } else {
9428 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
9429 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
9430
9431 false_reg->smin_value = max(false_reg->smin_value, false_smin);
9432 true_reg->smax_value = min(true_reg->smax_value, true_smax);
9433 }
b4e432f1 9434 break;
a72dafaf 9435 }
48461135 9436 default:
0fc31b10 9437 return;
48461135
JB
9438 }
9439
3f50f132
JF
9440 if (is_jmp32) {
9441 false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
9442 tnum_subreg(false_32off));
9443 true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
9444 tnum_subreg(true_32off));
9445 __reg_combine_32_into_64(false_reg);
9446 __reg_combine_32_into_64(true_reg);
9447 } else {
9448 false_reg->var_off = false_64off;
9449 true_reg->var_off = true_64off;
9450 __reg_combine_64_into_32(false_reg);
9451 __reg_combine_64_into_32(true_reg);
9452 }
48461135
JB
9453}
9454
f1174f77
EC
9455/* Same as above, but for the case that dst_reg holds a constant and src_reg is
9456 * the variable reg.
48461135
JB
9457 */
9458static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
3f50f132
JF
9459 struct bpf_reg_state *false_reg,
9460 u64 val, u32 val32,
092ed096 9461 u8 opcode, bool is_jmp32)
48461135 9462{
6d94e741 9463 opcode = flip_opcode(opcode);
0fc31b10
JH
9464 /* This uses zero as "not present in table"; luckily the zero opcode,
9465 * BPF_JA, can't get here.
b03c9f9f 9466 */
0fc31b10 9467 if (opcode)
3f50f132 9468 reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
f1174f77
EC
9469}
9470
9471/* Regs are known to be equal, so intersect their min/max/var_off */
9472static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
9473 struct bpf_reg_state *dst_reg)
9474{
b03c9f9f
EC
9475 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
9476 dst_reg->umin_value);
9477 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
9478 dst_reg->umax_value);
9479 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
9480 dst_reg->smin_value);
9481 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
9482 dst_reg->smax_value);
f1174f77
EC
9483 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
9484 dst_reg->var_off);
b03c9f9f
EC
9485 /* We might have learned new bounds from the var_off. */
9486 __update_reg_bounds(src_reg);
9487 __update_reg_bounds(dst_reg);
9488 /* We might have learned something about the sign bit. */
9489 __reg_deduce_bounds(src_reg);
9490 __reg_deduce_bounds(dst_reg);
9491 /* We might have learned some bits from the bounds. */
9492 __reg_bound_offset(src_reg);
9493 __reg_bound_offset(dst_reg);
9494 /* Intersecting with the old var_off might have improved our bounds
9495 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
9496 * then new var_off is (0; 0x7f...fc) which improves our umax.
9497 */
9498 __update_reg_bounds(src_reg);
9499 __update_reg_bounds(dst_reg);
f1174f77
EC
9500}
9501
9502static void reg_combine_min_max(struct bpf_reg_state *true_src,
9503 struct bpf_reg_state *true_dst,
9504 struct bpf_reg_state *false_src,
9505 struct bpf_reg_state *false_dst,
9506 u8 opcode)
9507{
9508 switch (opcode) {
9509 case BPF_JEQ:
9510 __reg_combine_min_max(true_src, true_dst);
9511 break;
9512 case BPF_JNE:
9513 __reg_combine_min_max(false_src, false_dst);
b03c9f9f 9514 break;
4cabc5b1 9515 }
48461135
JB
9516}
9517
fd978bf7
JS
9518static void mark_ptr_or_null_reg(struct bpf_func_state *state,
9519 struct bpf_reg_state *reg, u32 id,
840b9615 9520 bool is_null)
57a09bf0 9521{
c25b2ae1 9522 if (type_may_be_null(reg->type) && reg->id == id &&
93c230e3 9523 !WARN_ON_ONCE(!reg->id)) {
b03c9f9f
EC
9524 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
9525 !tnum_equals_const(reg->var_off, 0) ||
f1174f77 9526 reg->off)) {
e60b0d12
DB
9527 /* Old offset (both fixed and variable parts) should
9528 * have been known-zero, because we don't allow pointer
9529 * arithmetic on pointers that might be NULL. If we
9530 * see this happening, don't convert the register.
9531 */
9532 return;
f1174f77
EC
9533 }
9534 if (is_null) {
9535 reg->type = SCALAR_VALUE;
1b986589
MKL
9536 /* We don't need id and ref_obj_id from this point
9537 * onwards anymore, thus we should better reset it,
9538 * so that state pruning has chances to take effect.
9539 */
9540 reg->id = 0;
9541 reg->ref_obj_id = 0;
4ddb7416
DB
9542
9543 return;
9544 }
9545
9546 mark_ptr_not_null_reg(reg);
9547
9548 if (!reg_may_point_to_spin_lock(reg)) {
1b986589
MKL
9549 /* For not-NULL ptr, reg->ref_obj_id will be reset
9550 * in release_reg_references().
9551 *
9552 * reg->id is still used by spin_lock ptr. Other
9553 * than spin_lock ptr type, reg->id can be reset.
fd978bf7
JS
9554 */
9555 reg->id = 0;
56f668df 9556 }
57a09bf0
TG
9557 }
9558}
9559
c6a9efa1
PC
9560static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
9561 bool is_null)
9562{
9563 struct bpf_reg_state *reg;
9564 int i;
9565
9566 for (i = 0; i < MAX_BPF_REG; i++)
9567 mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
9568
9569 bpf_for_each_spilled_reg(i, state, reg) {
9570 if (!reg)
9571 continue;
9572 mark_ptr_or_null_reg(state, reg, id, is_null);
9573 }
9574}
9575
57a09bf0
TG
9576/* The logic is similar to find_good_pkt_pointers(), both could eventually
9577 * be folded together at some point.
9578 */
840b9615
JS
9579static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
9580 bool is_null)
57a09bf0 9581{
f4d7e40a 9582 struct bpf_func_state *state = vstate->frame[vstate->curframe];
c6a9efa1 9583 struct bpf_reg_state *regs = state->regs;
1b986589 9584 u32 ref_obj_id = regs[regno].ref_obj_id;
a08dd0da 9585 u32 id = regs[regno].id;
c6a9efa1 9586 int i;
57a09bf0 9587
1b986589
MKL
9588 if (ref_obj_id && ref_obj_id == id && is_null)
9589 /* regs[regno] is in the " == NULL" branch.
9590 * No one could have freed the reference state before
9591 * doing the NULL check.
9592 */
9593 WARN_ON_ONCE(release_reference_state(state, id));
fd978bf7 9594
c6a9efa1
PC
9595 for (i = 0; i <= vstate->curframe; i++)
9596 __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
57a09bf0
TG
9597}
9598
5beca081
DB
9599static bool try_match_pkt_pointers(const struct bpf_insn *insn,
9600 struct bpf_reg_state *dst_reg,
9601 struct bpf_reg_state *src_reg,
9602 struct bpf_verifier_state *this_branch,
9603 struct bpf_verifier_state *other_branch)
9604{
9605 if (BPF_SRC(insn->code) != BPF_X)
9606 return false;
9607
092ed096
JW
9608 /* Pointers are always 64-bit. */
9609 if (BPF_CLASS(insn->code) == BPF_JMP32)
9610 return false;
9611
5beca081
DB
9612 switch (BPF_OP(insn->code)) {
9613 case BPF_JGT:
9614 if ((dst_reg->type == PTR_TO_PACKET &&
9615 src_reg->type == PTR_TO_PACKET_END) ||
9616 (dst_reg->type == PTR_TO_PACKET_META &&
9617 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9618 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
9619 find_good_pkt_pointers(this_branch, dst_reg,
9620 dst_reg->type, false);
6d94e741 9621 mark_pkt_end(other_branch, insn->dst_reg, true);
5beca081
DB
9622 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9623 src_reg->type == PTR_TO_PACKET) ||
9624 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9625 src_reg->type == PTR_TO_PACKET_META)) {
9626 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
9627 find_good_pkt_pointers(other_branch, src_reg,
9628 src_reg->type, true);
6d94e741 9629 mark_pkt_end(this_branch, insn->src_reg, false);
5beca081
DB
9630 } else {
9631 return false;
9632 }
9633 break;
9634 case BPF_JLT:
9635 if ((dst_reg->type == PTR_TO_PACKET &&
9636 src_reg->type == PTR_TO_PACKET_END) ||
9637 (dst_reg->type == PTR_TO_PACKET_META &&
9638 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9639 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
9640 find_good_pkt_pointers(other_branch, dst_reg,
9641 dst_reg->type, true);
6d94e741 9642 mark_pkt_end(this_branch, insn->dst_reg, false);
5beca081
DB
9643 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9644 src_reg->type == PTR_TO_PACKET) ||
9645 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9646 src_reg->type == PTR_TO_PACKET_META)) {
9647 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
9648 find_good_pkt_pointers(this_branch, src_reg,
9649 src_reg->type, false);
6d94e741 9650 mark_pkt_end(other_branch, insn->src_reg, true);
5beca081
DB
9651 } else {
9652 return false;
9653 }
9654 break;
9655 case BPF_JGE:
9656 if ((dst_reg->type == PTR_TO_PACKET &&
9657 src_reg->type == PTR_TO_PACKET_END) ||
9658 (dst_reg->type == PTR_TO_PACKET_META &&
9659 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9660 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
9661 find_good_pkt_pointers(this_branch, dst_reg,
9662 dst_reg->type, true);
6d94e741 9663 mark_pkt_end(other_branch, insn->dst_reg, false);
5beca081
DB
9664 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9665 src_reg->type == PTR_TO_PACKET) ||
9666 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9667 src_reg->type == PTR_TO_PACKET_META)) {
9668 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
9669 find_good_pkt_pointers(other_branch, src_reg,
9670 src_reg->type, false);
6d94e741 9671 mark_pkt_end(this_branch, insn->src_reg, true);
5beca081
DB
9672 } else {
9673 return false;
9674 }
9675 break;
9676 case BPF_JLE:
9677 if ((dst_reg->type == PTR_TO_PACKET &&
9678 src_reg->type == PTR_TO_PACKET_END) ||
9679 (dst_reg->type == PTR_TO_PACKET_META &&
9680 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9681 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
9682 find_good_pkt_pointers(other_branch, dst_reg,
9683 dst_reg->type, false);
6d94e741 9684 mark_pkt_end(this_branch, insn->dst_reg, true);
5beca081
DB
9685 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9686 src_reg->type == PTR_TO_PACKET) ||
9687 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9688 src_reg->type == PTR_TO_PACKET_META)) {
9689 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
9690 find_good_pkt_pointers(this_branch, src_reg,
9691 src_reg->type, true);
6d94e741 9692 mark_pkt_end(other_branch, insn->src_reg, false);
5beca081
DB
9693 } else {
9694 return false;
9695 }
9696 break;
9697 default:
9698 return false;
9699 }
9700
9701 return true;
9702}
9703
75748837
AS
9704static void find_equal_scalars(struct bpf_verifier_state *vstate,
9705 struct bpf_reg_state *known_reg)
9706{
9707 struct bpf_func_state *state;
9708 struct bpf_reg_state *reg;
9709 int i, j;
9710
9711 for (i = 0; i <= vstate->curframe; i++) {
9712 state = vstate->frame[i];
9713 for (j = 0; j < MAX_BPF_REG; j++) {
9714 reg = &state->regs[j];
9715 if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
9716 *reg = *known_reg;
9717 }
9718
9719 bpf_for_each_spilled_reg(j, state, reg) {
9720 if (!reg)
9721 continue;
9722 if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
9723 *reg = *known_reg;
9724 }
9725 }
9726}
9727
58e2af8b 9728static int check_cond_jmp_op(struct bpf_verifier_env *env,
17a52670
AS
9729 struct bpf_insn *insn, int *insn_idx)
9730{
f4d7e40a
AS
9731 struct bpf_verifier_state *this_branch = env->cur_state;
9732 struct bpf_verifier_state *other_branch;
9733 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
fb8d251e 9734 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
17a52670 9735 u8 opcode = BPF_OP(insn->code);
092ed096 9736 bool is_jmp32;
fb8d251e 9737 int pred = -1;
17a52670
AS
9738 int err;
9739
092ed096
JW
9740 /* Only conditional jumps are expected to reach here. */
9741 if (opcode == BPF_JA || opcode > BPF_JSLE) {
9742 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
17a52670
AS
9743 return -EINVAL;
9744 }
9745
9746 if (BPF_SRC(insn->code) == BPF_X) {
9747 if (insn->imm != 0) {
092ed096 9748 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17a52670
AS
9749 return -EINVAL;
9750 }
9751
9752 /* check src1 operand */
dc503a8a 9753 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
9754 if (err)
9755 return err;
1be7f75d
AS
9756
9757 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 9758 verbose(env, "R%d pointer comparison prohibited\n",
1be7f75d
AS
9759 insn->src_reg);
9760 return -EACCES;
9761 }
fb8d251e 9762 src_reg = &regs[insn->src_reg];
17a52670
AS
9763 } else {
9764 if (insn->src_reg != BPF_REG_0) {
092ed096 9765 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17a52670
AS
9766 return -EINVAL;
9767 }
9768 }
9769
9770 /* check src2 operand */
dc503a8a 9771 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
9772 if (err)
9773 return err;
9774
1a0dc1ac 9775 dst_reg = &regs[insn->dst_reg];
092ed096 9776 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
1a0dc1ac 9777
3f50f132
JF
9778 if (BPF_SRC(insn->code) == BPF_K) {
9779 pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
9780 } else if (src_reg->type == SCALAR_VALUE &&
9781 is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
9782 pred = is_branch_taken(dst_reg,
9783 tnum_subreg(src_reg->var_off).value,
9784 opcode,
9785 is_jmp32);
9786 } else if (src_reg->type == SCALAR_VALUE &&
9787 !is_jmp32 && tnum_is_const(src_reg->var_off)) {
9788 pred = is_branch_taken(dst_reg,
9789 src_reg->var_off.value,
9790 opcode,
9791 is_jmp32);
6d94e741
AS
9792 } else if (reg_is_pkt_pointer_any(dst_reg) &&
9793 reg_is_pkt_pointer_any(src_reg) &&
9794 !is_jmp32) {
9795 pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
3f50f132
JF
9796 }
9797
b5dc0163 9798 if (pred >= 0) {
cac616db
JF
9799 /* If we get here with a dst_reg pointer type it is because
9800 * above is_branch_taken() special cased the 0 comparison.
9801 */
9802 if (!__is_pointer_value(false, dst_reg))
9803 err = mark_chain_precision(env, insn->dst_reg);
6d94e741
AS
9804 if (BPF_SRC(insn->code) == BPF_X && !err &&
9805 !__is_pointer_value(false, src_reg))
b5dc0163
AS
9806 err = mark_chain_precision(env, insn->src_reg);
9807 if (err)
9808 return err;
9809 }
9183671a 9810
fb8d251e 9811 if (pred == 1) {
9183671a
DB
9812 /* Only follow the goto, ignore fall-through. If needed, push
9813 * the fall-through branch for simulation under speculative
9814 * execution.
9815 */
9816 if (!env->bypass_spec_v1 &&
9817 !sanitize_speculative_path(env, insn, *insn_idx + 1,
9818 *insn_idx))
9819 return -EFAULT;
fb8d251e
AS
9820 *insn_idx += insn->off;
9821 return 0;
9822 } else if (pred == 0) {
9183671a
DB
9823 /* Only follow the fall-through branch, since that's where the
9824 * program will go. If needed, push the goto branch for
9825 * simulation under speculative execution.
fb8d251e 9826 */
9183671a
DB
9827 if (!env->bypass_spec_v1 &&
9828 !sanitize_speculative_path(env, insn,
9829 *insn_idx + insn->off + 1,
9830 *insn_idx))
9831 return -EFAULT;
fb8d251e 9832 return 0;
17a52670
AS
9833 }
9834
979d63d5
DB
9835 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
9836 false);
17a52670
AS
9837 if (!other_branch)
9838 return -EFAULT;
f4d7e40a 9839 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
17a52670 9840
48461135
JB
9841 /* detect if we are comparing against a constant value so we can adjust
9842 * our min/max values for our dst register.
f1174f77
EC
9843 * this is only legit if both are scalars (or pointers to the same
9844 * object, I suppose, but we don't support that right now), because
9845 * otherwise the different base pointers mean the offsets aren't
9846 * comparable.
48461135
JB
9847 */
9848 if (BPF_SRC(insn->code) == BPF_X) {
092ed096 9849 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
092ed096 9850
f1174f77 9851 if (dst_reg->type == SCALAR_VALUE &&
092ed096
JW
9852 src_reg->type == SCALAR_VALUE) {
9853 if (tnum_is_const(src_reg->var_off) ||
3f50f132
JF
9854 (is_jmp32 &&
9855 tnum_is_const(tnum_subreg(src_reg->var_off))))
f4d7e40a 9856 reg_set_min_max(&other_branch_regs[insn->dst_reg],
092ed096 9857 dst_reg,
3f50f132
JF
9858 src_reg->var_off.value,
9859 tnum_subreg(src_reg->var_off).value,
092ed096
JW
9860 opcode, is_jmp32);
9861 else if (tnum_is_const(dst_reg->var_off) ||
3f50f132
JF
9862 (is_jmp32 &&
9863 tnum_is_const(tnum_subreg(dst_reg->var_off))))
f4d7e40a 9864 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
092ed096 9865 src_reg,
3f50f132
JF
9866 dst_reg->var_off.value,
9867 tnum_subreg(dst_reg->var_off).value,
092ed096
JW
9868 opcode, is_jmp32);
9869 else if (!is_jmp32 &&
9870 (opcode == BPF_JEQ || opcode == BPF_JNE))
f1174f77 9871 /* Comparing for equality, we can combine knowledge */
f4d7e40a
AS
9872 reg_combine_min_max(&other_branch_regs[insn->src_reg],
9873 &other_branch_regs[insn->dst_reg],
092ed096 9874 src_reg, dst_reg, opcode);
e688c3db
AS
9875 if (src_reg->id &&
9876 !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
75748837
AS
9877 find_equal_scalars(this_branch, src_reg);
9878 find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
9879 }
9880
f1174f77
EC
9881 }
9882 } else if (dst_reg->type == SCALAR_VALUE) {
f4d7e40a 9883 reg_set_min_max(&other_branch_regs[insn->dst_reg],
3f50f132
JF
9884 dst_reg, insn->imm, (u32)insn->imm,
9885 opcode, is_jmp32);
48461135
JB
9886 }
9887
e688c3db
AS
9888 if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
9889 !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
75748837
AS
9890 find_equal_scalars(this_branch, dst_reg);
9891 find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
9892 }
9893
092ed096
JW
9894 /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
9895 * NOTE: these optimizations below are related with pointer comparison
9896 * which will never be JMP32.
9897 */
9898 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
1a0dc1ac 9899 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
c25b2ae1 9900 type_may_be_null(dst_reg->type)) {
840b9615 9901 /* Mark all identical registers in each branch as either
57a09bf0
TG
9902 * safe or unknown depending R == 0 or R != 0 conditional.
9903 */
840b9615
JS
9904 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
9905 opcode == BPF_JNE);
9906 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
9907 opcode == BPF_JEQ);
5beca081
DB
9908 } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
9909 this_branch, other_branch) &&
9910 is_pointer_value(env, insn->dst_reg)) {
61bd5218
JK
9911 verbose(env, "R%d pointer comparison prohibited\n",
9912 insn->dst_reg);
1be7f75d 9913 return -EACCES;
17a52670 9914 }
06ee7115 9915 if (env->log.level & BPF_LOG_LEVEL)
2e576648 9916 print_insn_state(env, this_branch->frame[this_branch->curframe]);
17a52670
AS
9917 return 0;
9918}
9919
17a52670 9920/* verify BPF_LD_IMM64 instruction */
58e2af8b 9921static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 9922{
d8eca5bb 9923 struct bpf_insn_aux_data *aux = cur_aux(env);
638f5b90 9924 struct bpf_reg_state *regs = cur_regs(env);
4976b718 9925 struct bpf_reg_state *dst_reg;
d8eca5bb 9926 struct bpf_map *map;
17a52670
AS
9927 int err;
9928
9929 if (BPF_SIZE(insn->code) != BPF_DW) {
61bd5218 9930 verbose(env, "invalid BPF_LD_IMM insn\n");
17a52670
AS
9931 return -EINVAL;
9932 }
9933 if (insn->off != 0) {
61bd5218 9934 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
17a52670
AS
9935 return -EINVAL;
9936 }
9937
dc503a8a 9938 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
9939 if (err)
9940 return err;
9941
4976b718 9942 dst_reg = &regs[insn->dst_reg];
6b173873 9943 if (insn->src_reg == 0) {
6b173873
JK
9944 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
9945
4976b718 9946 dst_reg->type = SCALAR_VALUE;
b03c9f9f 9947 __mark_reg_known(&regs[insn->dst_reg], imm);
17a52670 9948 return 0;
6b173873 9949 }
17a52670 9950
d400a6cf
DB
9951 /* All special src_reg cases are listed below. From this point onwards
9952 * we either succeed and assign a corresponding dst_reg->type after
9953 * zeroing the offset, or fail and reject the program.
9954 */
9955 mark_reg_known_zero(env, regs, insn->dst_reg);
4976b718 9956
d400a6cf 9957 if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
4976b718 9958 dst_reg->type = aux->btf_var.reg_type;
34d3a78c 9959 switch (base_type(dst_reg->type)) {
4976b718
HL
9960 case PTR_TO_MEM:
9961 dst_reg->mem_size = aux->btf_var.mem_size;
9962 break;
9963 case PTR_TO_BTF_ID:
22dc4a0f 9964 dst_reg->btf = aux->btf_var.btf;
4976b718
HL
9965 dst_reg->btf_id = aux->btf_var.btf_id;
9966 break;
9967 default:
9968 verbose(env, "bpf verifier is misconfigured\n");
9969 return -EFAULT;
9970 }
9971 return 0;
9972 }
9973
69c087ba
YS
9974 if (insn->src_reg == BPF_PSEUDO_FUNC) {
9975 struct bpf_prog_aux *aux = env->prog->aux;
3990ed4c
MKL
9976 u32 subprogno = find_subprog(env,
9977 env->insn_idx + insn->imm + 1);
69c087ba
YS
9978
9979 if (!aux->func_info) {
9980 verbose(env, "missing btf func_info\n");
9981 return -EINVAL;
9982 }
9983 if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
9984 verbose(env, "callback function not static\n");
9985 return -EINVAL;
9986 }
9987
9988 dst_reg->type = PTR_TO_FUNC;
9989 dst_reg->subprogno = subprogno;
9990 return 0;
9991 }
9992
d8eca5bb 9993 map = env->used_maps[aux->map_index];
4976b718 9994 dst_reg->map_ptr = map;
d8eca5bb 9995
387544bf
AS
9996 if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
9997 insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
4976b718
HL
9998 dst_reg->type = PTR_TO_MAP_VALUE;
9999 dst_reg->off = aux->map_off;
d8eca5bb 10000 if (map_value_has_spin_lock(map))
4976b718 10001 dst_reg->id = ++env->id_gen;
387544bf
AS
10002 } else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
10003 insn->src_reg == BPF_PSEUDO_MAP_IDX) {
4976b718 10004 dst_reg->type = CONST_PTR_TO_MAP;
d8eca5bb
DB
10005 } else {
10006 verbose(env, "bpf verifier is misconfigured\n");
10007 return -EINVAL;
10008 }
17a52670 10009
17a52670
AS
10010 return 0;
10011}
10012
96be4325
DB
10013static bool may_access_skb(enum bpf_prog_type type)
10014{
10015 switch (type) {
10016 case BPF_PROG_TYPE_SOCKET_FILTER:
10017 case BPF_PROG_TYPE_SCHED_CLS:
94caee8c 10018 case BPF_PROG_TYPE_SCHED_ACT:
96be4325
DB
10019 return true;
10020 default:
10021 return false;
10022 }
10023}
10024
ddd872bc
AS
10025/* verify safety of LD_ABS|LD_IND instructions:
10026 * - they can only appear in the programs where ctx == skb
10027 * - since they are wrappers of function calls, they scratch R1-R5 registers,
10028 * preserve R6-R9, and store return value into R0
10029 *
10030 * Implicit input:
10031 * ctx == skb == R6 == CTX
10032 *
10033 * Explicit input:
10034 * SRC == any register
10035 * IMM == 32-bit immediate
10036 *
10037 * Output:
10038 * R0 - 8/16/32-bit skb data converted to cpu endianness
10039 */
58e2af8b 10040static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
ddd872bc 10041{
638f5b90 10042 struct bpf_reg_state *regs = cur_regs(env);
6d4f151a 10043 static const int ctx_reg = BPF_REG_6;
ddd872bc 10044 u8 mode = BPF_MODE(insn->code);
ddd872bc
AS
10045 int i, err;
10046
7e40781c 10047 if (!may_access_skb(resolve_prog_type(env->prog))) {
61bd5218 10048 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
ddd872bc
AS
10049 return -EINVAL;
10050 }
10051
e0cea7ce
DB
10052 if (!env->ops->gen_ld_abs) {
10053 verbose(env, "bpf verifier is misconfigured\n");
10054 return -EINVAL;
10055 }
10056
ddd872bc 10057 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
d82bccc6 10058 BPF_SIZE(insn->code) == BPF_DW ||
ddd872bc 10059 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
61bd5218 10060 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
ddd872bc
AS
10061 return -EINVAL;
10062 }
10063
10064 /* check whether implicit source operand (register R6) is readable */
6d4f151a 10065 err = check_reg_arg(env, ctx_reg, SRC_OP);
ddd872bc
AS
10066 if (err)
10067 return err;
10068
fd978bf7
JS
10069 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
10070 * gen_ld_abs() may terminate the program at runtime, leading to
10071 * reference leak.
10072 */
10073 err = check_reference_leak(env);
10074 if (err) {
10075 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
10076 return err;
10077 }
10078
d83525ca
AS
10079 if (env->cur_state->active_spin_lock) {
10080 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
10081 return -EINVAL;
10082 }
10083
6d4f151a 10084 if (regs[ctx_reg].type != PTR_TO_CTX) {
61bd5218
JK
10085 verbose(env,
10086 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
ddd872bc
AS
10087 return -EINVAL;
10088 }
10089
10090 if (mode == BPF_IND) {
10091 /* check explicit source operand */
dc503a8a 10092 err = check_reg_arg(env, insn->src_reg, SRC_OP);
ddd872bc
AS
10093 if (err)
10094 return err;
10095 }
10096
be80a1d3 10097 err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
6d4f151a
DB
10098 if (err < 0)
10099 return err;
10100
ddd872bc 10101 /* reset caller saved regs to unreadable */
dc503a8a 10102 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 10103 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
10104 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
10105 }
ddd872bc
AS
10106
10107 /* mark destination R0 register as readable, since it contains
dc503a8a
EC
10108 * the value fetched from the packet.
10109 * Already marked as written above.
ddd872bc 10110 */
61bd5218 10111 mark_reg_unknown(env, regs, BPF_REG_0);
5327ed3d
JW
10112 /* ld_abs load up to 32-bit skb data. */
10113 regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
ddd872bc
AS
10114 return 0;
10115}
10116
390ee7e2
AS
10117static int check_return_code(struct bpf_verifier_env *env)
10118{
5cf1e914 10119 struct tnum enforce_attach_type_range = tnum_unknown;
27ae7997 10120 const struct bpf_prog *prog = env->prog;
390ee7e2
AS
10121 struct bpf_reg_state *reg;
10122 struct tnum range = tnum_range(0, 1);
7e40781c 10123 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
27ae7997 10124 int err;
bfc6bb74
AS
10125 struct bpf_func_state *frame = env->cur_state->frame[0];
10126 const bool is_subprog = frame->subprogno;
27ae7997 10127
9e4e01df 10128 /* LSM and struct_ops func-ptr's return type could be "void" */
f782e2c3
DB
10129 if (!is_subprog &&
10130 (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
7e40781c 10131 prog_type == BPF_PROG_TYPE_LSM) &&
27ae7997
MKL
10132 !prog->aux->attach_func_proto->type)
10133 return 0;
10134
8fb33b60 10135 /* eBPF calling convention is such that R0 is used
27ae7997
MKL
10136 * to return the value from eBPF program.
10137 * Make sure that it's readable at this time
10138 * of bpf_exit, which means that program wrote
10139 * something into it earlier
10140 */
10141 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
10142 if (err)
10143 return err;
10144
10145 if (is_pointer_value(env, BPF_REG_0)) {
10146 verbose(env, "R0 leaks addr as return value\n");
10147 return -EACCES;
10148 }
390ee7e2 10149
f782e2c3 10150 reg = cur_regs(env) + BPF_REG_0;
bfc6bb74
AS
10151
10152 if (frame->in_async_callback_fn) {
10153 /* enforce return zero from async callbacks like timer */
10154 if (reg->type != SCALAR_VALUE) {
10155 verbose(env, "In async callback the register R0 is not a known value (%s)\n",
c25b2ae1 10156 reg_type_str(env, reg->type));
bfc6bb74
AS
10157 return -EINVAL;
10158 }
10159
10160 if (!tnum_in(tnum_const(0), reg->var_off)) {
10161 verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
10162 return -EINVAL;
10163 }
10164 return 0;
10165 }
10166
f782e2c3
DB
10167 if (is_subprog) {
10168 if (reg->type != SCALAR_VALUE) {
10169 verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
c25b2ae1 10170 reg_type_str(env, reg->type));
f782e2c3
DB
10171 return -EINVAL;
10172 }
10173 return 0;
10174 }
10175
7e40781c 10176 switch (prog_type) {
983695fa
DB
10177 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
10178 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
1b66d253
DB
10179 env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
10180 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
10181 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
10182 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
10183 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
983695fa 10184 range = tnum_range(1, 1);
77241217
SF
10185 if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
10186 env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
10187 range = tnum_range(0, 3);
ed4ed404 10188 break;
390ee7e2 10189 case BPF_PROG_TYPE_CGROUP_SKB:
5cf1e914 10190 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
10191 range = tnum_range(0, 3);
10192 enforce_attach_type_range = tnum_range(2, 3);
10193 }
ed4ed404 10194 break;
390ee7e2
AS
10195 case BPF_PROG_TYPE_CGROUP_SOCK:
10196 case BPF_PROG_TYPE_SOCK_OPS:
ebc614f6 10197 case BPF_PROG_TYPE_CGROUP_DEVICE:
7b146ceb 10198 case BPF_PROG_TYPE_CGROUP_SYSCTL:
0d01da6a 10199 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
390ee7e2 10200 break;
15ab09bd
AS
10201 case BPF_PROG_TYPE_RAW_TRACEPOINT:
10202 if (!env->prog->aux->attach_btf_id)
10203 return 0;
10204 range = tnum_const(0);
10205 break;
15d83c4d 10206 case BPF_PROG_TYPE_TRACING:
e92888c7
YS
10207 switch (env->prog->expected_attach_type) {
10208 case BPF_TRACE_FENTRY:
10209 case BPF_TRACE_FEXIT:
10210 range = tnum_const(0);
10211 break;
10212 case BPF_TRACE_RAW_TP:
10213 case BPF_MODIFY_RETURN:
15d83c4d 10214 return 0;
2ec0616e
DB
10215 case BPF_TRACE_ITER:
10216 break;
e92888c7
YS
10217 default:
10218 return -ENOTSUPP;
10219 }
15d83c4d 10220 break;
e9ddbb77
JS
10221 case BPF_PROG_TYPE_SK_LOOKUP:
10222 range = tnum_range(SK_DROP, SK_PASS);
10223 break;
e92888c7
YS
10224 case BPF_PROG_TYPE_EXT:
10225 /* freplace program can return anything as its return value
10226 * depends on the to-be-replaced kernel func or bpf program.
10227 */
390ee7e2
AS
10228 default:
10229 return 0;
10230 }
10231
390ee7e2 10232 if (reg->type != SCALAR_VALUE) {
61bd5218 10233 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
c25b2ae1 10234 reg_type_str(env, reg->type));
390ee7e2
AS
10235 return -EINVAL;
10236 }
10237
10238 if (!tnum_in(range, reg->var_off)) {
bc2591d6 10239 verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
390ee7e2
AS
10240 return -EINVAL;
10241 }
5cf1e914 10242
10243 if (!tnum_is_unknown(enforce_attach_type_range) &&
10244 tnum_in(enforce_attach_type_range, reg->var_off))
10245 env->prog->enforce_expected_attach_type = 1;
390ee7e2
AS
10246 return 0;
10247}
10248
475fb78f
AS
10249/* non-recursive DFS pseudo code
10250 * 1 procedure DFS-iterative(G,v):
10251 * 2 label v as discovered
10252 * 3 let S be a stack
10253 * 4 S.push(v)
10254 * 5 while S is not empty
10255 * 6 t <- S.pop()
10256 * 7 if t is what we're looking for:
10257 * 8 return t
10258 * 9 for all edges e in G.adjacentEdges(t) do
10259 * 10 if edge e is already labelled
10260 * 11 continue with the next edge
10261 * 12 w <- G.adjacentVertex(t,e)
10262 * 13 if vertex w is not discovered and not explored
10263 * 14 label e as tree-edge
10264 * 15 label w as discovered
10265 * 16 S.push(w)
10266 * 17 continue at 5
10267 * 18 else if vertex w is discovered
10268 * 19 label e as back-edge
10269 * 20 else
10270 * 21 // vertex w is explored
10271 * 22 label e as forward- or cross-edge
10272 * 23 label t as explored
10273 * 24 S.pop()
10274 *
10275 * convention:
10276 * 0x10 - discovered
10277 * 0x11 - discovered and fall-through edge labelled
10278 * 0x12 - discovered and fall-through and branch edges labelled
10279 * 0x20 - explored
10280 */
10281
10282enum {
10283 DISCOVERED = 0x10,
10284 EXPLORED = 0x20,
10285 FALLTHROUGH = 1,
10286 BRANCH = 2,
10287};
10288
dc2a4ebc
AS
10289static u32 state_htab_size(struct bpf_verifier_env *env)
10290{
10291 return env->prog->len;
10292}
10293
5d839021
AS
10294static struct bpf_verifier_state_list **explored_state(
10295 struct bpf_verifier_env *env,
10296 int idx)
10297{
dc2a4ebc
AS
10298 struct bpf_verifier_state *cur = env->cur_state;
10299 struct bpf_func_state *state = cur->frame[cur->curframe];
10300
10301 return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
5d839021
AS
10302}
10303
10304static void init_explored_state(struct bpf_verifier_env *env, int idx)
10305{
a8f500af 10306 env->insn_aux_data[idx].prune_point = true;
5d839021 10307}
f1bca824 10308
59e2e27d
WAF
10309enum {
10310 DONE_EXPLORING = 0,
10311 KEEP_EXPLORING = 1,
10312};
10313
475fb78f
AS
10314/* t, w, e - match pseudo-code above:
10315 * t - index of current instruction
10316 * w - next instruction
10317 * e - edge
10318 */
2589726d
AS
10319static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
10320 bool loop_ok)
475fb78f 10321{
7df737e9
AS
10322 int *insn_stack = env->cfg.insn_stack;
10323 int *insn_state = env->cfg.insn_state;
10324
475fb78f 10325 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
59e2e27d 10326 return DONE_EXPLORING;
475fb78f
AS
10327
10328 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
59e2e27d 10329 return DONE_EXPLORING;
475fb78f
AS
10330
10331 if (w < 0 || w >= env->prog->len) {
d9762e84 10332 verbose_linfo(env, t, "%d: ", t);
61bd5218 10333 verbose(env, "jump out of range from insn %d to %d\n", t, w);
475fb78f
AS
10334 return -EINVAL;
10335 }
10336
f1bca824
AS
10337 if (e == BRANCH)
10338 /* mark branch target for state pruning */
5d839021 10339 init_explored_state(env, w);
f1bca824 10340
475fb78f
AS
10341 if (insn_state[w] == 0) {
10342 /* tree-edge */
10343 insn_state[t] = DISCOVERED | e;
10344 insn_state[w] = DISCOVERED;
7df737e9 10345 if (env->cfg.cur_stack >= env->prog->len)
475fb78f 10346 return -E2BIG;
7df737e9 10347 insn_stack[env->cfg.cur_stack++] = w;
59e2e27d 10348 return KEEP_EXPLORING;
475fb78f 10349 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
2c78ee89 10350 if (loop_ok && env->bpf_capable)
59e2e27d 10351 return DONE_EXPLORING;
d9762e84
MKL
10352 verbose_linfo(env, t, "%d: ", t);
10353 verbose_linfo(env, w, "%d: ", w);
61bd5218 10354 verbose(env, "back-edge from insn %d to %d\n", t, w);
475fb78f
AS
10355 return -EINVAL;
10356 } else if (insn_state[w] == EXPLORED) {
10357 /* forward- or cross-edge */
10358 insn_state[t] = DISCOVERED | e;
10359 } else {
61bd5218 10360 verbose(env, "insn state internal bug\n");
475fb78f
AS
10361 return -EFAULT;
10362 }
59e2e27d
WAF
10363 return DONE_EXPLORING;
10364}
10365
efdb22de
YS
10366static int visit_func_call_insn(int t, int insn_cnt,
10367 struct bpf_insn *insns,
10368 struct bpf_verifier_env *env,
10369 bool visit_callee)
10370{
10371 int ret;
10372
10373 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
10374 if (ret)
10375 return ret;
10376
10377 if (t + 1 < insn_cnt)
10378 init_explored_state(env, t + 1);
10379 if (visit_callee) {
10380 init_explored_state(env, t);
86fc6ee6
AS
10381 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
10382 /* It's ok to allow recursion from CFG point of
10383 * view. __check_func_call() will do the actual
10384 * check.
10385 */
10386 bpf_pseudo_func(insns + t));
efdb22de
YS
10387 }
10388 return ret;
10389}
10390
59e2e27d
WAF
10391/* Visits the instruction at index t and returns one of the following:
10392 * < 0 - an error occurred
10393 * DONE_EXPLORING - the instruction was fully explored
10394 * KEEP_EXPLORING - there is still work to be done before it is fully explored
10395 */
10396static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
10397{
10398 struct bpf_insn *insns = env->prog->insnsi;
10399 int ret;
10400
69c087ba
YS
10401 if (bpf_pseudo_func(insns + t))
10402 return visit_func_call_insn(t, insn_cnt, insns, env, true);
10403
59e2e27d
WAF
10404 /* All non-branch instructions have a single fall-through edge. */
10405 if (BPF_CLASS(insns[t].code) != BPF_JMP &&
10406 BPF_CLASS(insns[t].code) != BPF_JMP32)
10407 return push_insn(t, t + 1, FALLTHROUGH, env, false);
10408
10409 switch (BPF_OP(insns[t].code)) {
10410 case BPF_EXIT:
10411 return DONE_EXPLORING;
10412
10413 case BPF_CALL:
bfc6bb74
AS
10414 if (insns[t].imm == BPF_FUNC_timer_set_callback)
10415 /* Mark this call insn to trigger is_state_visited() check
10416 * before call itself is processed by __check_func_call().
10417 * Otherwise new async state will be pushed for further
10418 * exploration.
10419 */
10420 init_explored_state(env, t);
efdb22de
YS
10421 return visit_func_call_insn(t, insn_cnt, insns, env,
10422 insns[t].src_reg == BPF_PSEUDO_CALL);
59e2e27d
WAF
10423
10424 case BPF_JA:
10425 if (BPF_SRC(insns[t].code) != BPF_K)
10426 return -EINVAL;
10427
10428 /* unconditional jump with single edge */
10429 ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
10430 true);
10431 if (ret)
10432 return ret;
10433
10434 /* unconditional jmp is not a good pruning point,
10435 * but it's marked, since backtracking needs
10436 * to record jmp history in is_state_visited().
10437 */
10438 init_explored_state(env, t + insns[t].off + 1);
10439 /* tell verifier to check for equivalent states
10440 * after every call and jump
10441 */
10442 if (t + 1 < insn_cnt)
10443 init_explored_state(env, t + 1);
10444
10445 return ret;
10446
10447 default:
10448 /* conditional jump with two edges */
10449 init_explored_state(env, t);
10450 ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
10451 if (ret)
10452 return ret;
10453
10454 return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
10455 }
475fb78f
AS
10456}
10457
10458/* non-recursive depth-first-search to detect loops in BPF program
10459 * loop == back-edge in directed graph
10460 */
58e2af8b 10461static int check_cfg(struct bpf_verifier_env *env)
475fb78f 10462{
475fb78f 10463 int insn_cnt = env->prog->len;
7df737e9 10464 int *insn_stack, *insn_state;
475fb78f 10465 int ret = 0;
59e2e27d 10466 int i;
475fb78f 10467
7df737e9 10468 insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
475fb78f
AS
10469 if (!insn_state)
10470 return -ENOMEM;
10471
7df737e9 10472 insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
475fb78f 10473 if (!insn_stack) {
71dde681 10474 kvfree(insn_state);
475fb78f
AS
10475 return -ENOMEM;
10476 }
10477
10478 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
10479 insn_stack[0] = 0; /* 0 is the first instruction */
7df737e9 10480 env->cfg.cur_stack = 1;
475fb78f 10481
59e2e27d
WAF
10482 while (env->cfg.cur_stack > 0) {
10483 int t = insn_stack[env->cfg.cur_stack - 1];
475fb78f 10484
59e2e27d
WAF
10485 ret = visit_insn(t, insn_cnt, env);
10486 switch (ret) {
10487 case DONE_EXPLORING:
10488 insn_state[t] = EXPLORED;
10489 env->cfg.cur_stack--;
10490 break;
10491 case KEEP_EXPLORING:
10492 break;
10493 default:
10494 if (ret > 0) {
10495 verbose(env, "visit_insn internal bug\n");
10496 ret = -EFAULT;
475fb78f 10497 }
475fb78f 10498 goto err_free;
59e2e27d 10499 }
475fb78f
AS
10500 }
10501
59e2e27d 10502 if (env->cfg.cur_stack < 0) {
61bd5218 10503 verbose(env, "pop stack internal bug\n");
475fb78f
AS
10504 ret = -EFAULT;
10505 goto err_free;
10506 }
475fb78f 10507
475fb78f
AS
10508 for (i = 0; i < insn_cnt; i++) {
10509 if (insn_state[i] != EXPLORED) {
61bd5218 10510 verbose(env, "unreachable insn %d\n", i);
475fb78f
AS
10511 ret = -EINVAL;
10512 goto err_free;
10513 }
10514 }
10515 ret = 0; /* cfg looks good */
10516
10517err_free:
71dde681
AS
10518 kvfree(insn_state);
10519 kvfree(insn_stack);
7df737e9 10520 env->cfg.insn_state = env->cfg.insn_stack = NULL;
475fb78f
AS
10521 return ret;
10522}
10523
09b28d76
AS
10524static int check_abnormal_return(struct bpf_verifier_env *env)
10525{
10526 int i;
10527
10528 for (i = 1; i < env->subprog_cnt; i++) {
10529 if (env->subprog_info[i].has_ld_abs) {
10530 verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
10531 return -EINVAL;
10532 }
10533 if (env->subprog_info[i].has_tail_call) {
10534 verbose(env, "tail_call is not allowed in subprogs without BTF\n");
10535 return -EINVAL;
10536 }
10537 }
10538 return 0;
10539}
10540
838e9690
YS
10541/* The minimum supported BTF func info size */
10542#define MIN_BPF_FUNCINFO_SIZE 8
10543#define MAX_FUNCINFO_REC_SIZE 252
10544
c454a46b
MKL
10545static int check_btf_func(struct bpf_verifier_env *env,
10546 const union bpf_attr *attr,
af2ac3e1 10547 bpfptr_t uattr)
838e9690 10548{
09b28d76 10549 const struct btf_type *type, *func_proto, *ret_type;
d0b2818e 10550 u32 i, nfuncs, urec_size, min_size;
838e9690 10551 u32 krec_size = sizeof(struct bpf_func_info);
c454a46b 10552 struct bpf_func_info *krecord;
8c1b6e69 10553 struct bpf_func_info_aux *info_aux = NULL;
c454a46b
MKL
10554 struct bpf_prog *prog;
10555 const struct btf *btf;
af2ac3e1 10556 bpfptr_t urecord;
d0b2818e 10557 u32 prev_offset = 0;
09b28d76 10558 bool scalar_return;
e7ed83d6 10559 int ret = -ENOMEM;
838e9690
YS
10560
10561 nfuncs = attr->func_info_cnt;
09b28d76
AS
10562 if (!nfuncs) {
10563 if (check_abnormal_return(env))
10564 return -EINVAL;
838e9690 10565 return 0;
09b28d76 10566 }
838e9690
YS
10567
10568 if (nfuncs != env->subprog_cnt) {
10569 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
10570 return -EINVAL;
10571 }
10572
10573 urec_size = attr->func_info_rec_size;
10574 if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
10575 urec_size > MAX_FUNCINFO_REC_SIZE ||
10576 urec_size % sizeof(u32)) {
10577 verbose(env, "invalid func info rec size %u\n", urec_size);
10578 return -EINVAL;
10579 }
10580
c454a46b
MKL
10581 prog = env->prog;
10582 btf = prog->aux->btf;
838e9690 10583
af2ac3e1 10584 urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
838e9690
YS
10585 min_size = min_t(u32, krec_size, urec_size);
10586
ba64e7d8 10587 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
c454a46b
MKL
10588 if (!krecord)
10589 return -ENOMEM;
8c1b6e69
AS
10590 info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
10591 if (!info_aux)
10592 goto err_free;
ba64e7d8 10593
838e9690
YS
10594 for (i = 0; i < nfuncs; i++) {
10595 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
10596 if (ret) {
10597 if (ret == -E2BIG) {
10598 verbose(env, "nonzero tailing record in func info");
10599 /* set the size kernel expects so loader can zero
10600 * out the rest of the record.
10601 */
af2ac3e1
AS
10602 if (copy_to_bpfptr_offset(uattr,
10603 offsetof(union bpf_attr, func_info_rec_size),
10604 &min_size, sizeof(min_size)))
838e9690
YS
10605 ret = -EFAULT;
10606 }
c454a46b 10607 goto err_free;
838e9690
YS
10608 }
10609
af2ac3e1 10610 if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
838e9690 10611 ret = -EFAULT;
c454a46b 10612 goto err_free;
838e9690
YS
10613 }
10614
d30d42e0 10615 /* check insn_off */
09b28d76 10616 ret = -EINVAL;
838e9690 10617 if (i == 0) {
d30d42e0 10618 if (krecord[i].insn_off) {
838e9690 10619 verbose(env,
d30d42e0
MKL
10620 "nonzero insn_off %u for the first func info record",
10621 krecord[i].insn_off);
c454a46b 10622 goto err_free;
838e9690 10623 }
d30d42e0 10624 } else if (krecord[i].insn_off <= prev_offset) {
838e9690
YS
10625 verbose(env,
10626 "same or smaller insn offset (%u) than previous func info record (%u)",
d30d42e0 10627 krecord[i].insn_off, prev_offset);
c454a46b 10628 goto err_free;
838e9690
YS
10629 }
10630
d30d42e0 10631 if (env->subprog_info[i].start != krecord[i].insn_off) {
838e9690 10632 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
c454a46b 10633 goto err_free;
838e9690
YS
10634 }
10635
10636 /* check type_id */
ba64e7d8 10637 type = btf_type_by_id(btf, krecord[i].type_id);
51c39bb1 10638 if (!type || !btf_type_is_func(type)) {
838e9690 10639 verbose(env, "invalid type id %d in func info",
ba64e7d8 10640 krecord[i].type_id);
c454a46b 10641 goto err_free;
838e9690 10642 }
51c39bb1 10643 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
09b28d76
AS
10644
10645 func_proto = btf_type_by_id(btf, type->type);
10646 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
10647 /* btf_func_check() already verified it during BTF load */
10648 goto err_free;
10649 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
10650 scalar_return =
10651 btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
10652 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
10653 verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
10654 goto err_free;
10655 }
10656 if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
10657 verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
10658 goto err_free;
10659 }
10660
d30d42e0 10661 prev_offset = krecord[i].insn_off;
af2ac3e1 10662 bpfptr_add(&urecord, urec_size);
838e9690
YS
10663 }
10664
ba64e7d8
YS
10665 prog->aux->func_info = krecord;
10666 prog->aux->func_info_cnt = nfuncs;
8c1b6e69 10667 prog->aux->func_info_aux = info_aux;
838e9690
YS
10668 return 0;
10669
c454a46b 10670err_free:
ba64e7d8 10671 kvfree(krecord);
8c1b6e69 10672 kfree(info_aux);
838e9690
YS
10673 return ret;
10674}
10675
ba64e7d8
YS
10676static void adjust_btf_func(struct bpf_verifier_env *env)
10677{
8c1b6e69 10678 struct bpf_prog_aux *aux = env->prog->aux;
ba64e7d8
YS
10679 int i;
10680
8c1b6e69 10681 if (!aux->func_info)
ba64e7d8
YS
10682 return;
10683
10684 for (i = 0; i < env->subprog_cnt; i++)
8c1b6e69 10685 aux->func_info[i].insn_off = env->subprog_info[i].start;
ba64e7d8
YS
10686}
10687
1b773d00 10688#define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col)
c454a46b
MKL
10689#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
10690
10691static int check_btf_line(struct bpf_verifier_env *env,
10692 const union bpf_attr *attr,
af2ac3e1 10693 bpfptr_t uattr)
c454a46b
MKL
10694{
10695 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
10696 struct bpf_subprog_info *sub;
10697 struct bpf_line_info *linfo;
10698 struct bpf_prog *prog;
10699 const struct btf *btf;
af2ac3e1 10700 bpfptr_t ulinfo;
c454a46b
MKL
10701 int err;
10702
10703 nr_linfo = attr->line_info_cnt;
10704 if (!nr_linfo)
10705 return 0;
0e6491b5
BC
10706 if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
10707 return -EINVAL;
c454a46b
MKL
10708
10709 rec_size = attr->line_info_rec_size;
10710 if (rec_size < MIN_BPF_LINEINFO_SIZE ||
10711 rec_size > MAX_LINEINFO_REC_SIZE ||
10712 rec_size & (sizeof(u32) - 1))
10713 return -EINVAL;
10714
10715 /* Need to zero it in case the userspace may
10716 * pass in a smaller bpf_line_info object.
10717 */
10718 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
10719 GFP_KERNEL | __GFP_NOWARN);
10720 if (!linfo)
10721 return -ENOMEM;
10722
10723 prog = env->prog;
10724 btf = prog->aux->btf;
10725
10726 s = 0;
10727 sub = env->subprog_info;
af2ac3e1 10728 ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
c454a46b
MKL
10729 expected_size = sizeof(struct bpf_line_info);
10730 ncopy = min_t(u32, expected_size, rec_size);
10731 for (i = 0; i < nr_linfo; i++) {
10732 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
10733 if (err) {
10734 if (err == -E2BIG) {
10735 verbose(env, "nonzero tailing record in line_info");
af2ac3e1
AS
10736 if (copy_to_bpfptr_offset(uattr,
10737 offsetof(union bpf_attr, line_info_rec_size),
10738 &expected_size, sizeof(expected_size)))
c454a46b
MKL
10739 err = -EFAULT;
10740 }
10741 goto err_free;
10742 }
10743
af2ac3e1 10744 if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
c454a46b
MKL
10745 err = -EFAULT;
10746 goto err_free;
10747 }
10748
10749 /*
10750 * Check insn_off to ensure
10751 * 1) strictly increasing AND
10752 * 2) bounded by prog->len
10753 *
10754 * The linfo[0].insn_off == 0 check logically falls into
10755 * the later "missing bpf_line_info for func..." case
10756 * because the first linfo[0].insn_off must be the
10757 * first sub also and the first sub must have
10758 * subprog_info[0].start == 0.
10759 */
10760 if ((i && linfo[i].insn_off <= prev_offset) ||
10761 linfo[i].insn_off >= prog->len) {
10762 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
10763 i, linfo[i].insn_off, prev_offset,
10764 prog->len);
10765 err = -EINVAL;
10766 goto err_free;
10767 }
10768
fdbaa0be
MKL
10769 if (!prog->insnsi[linfo[i].insn_off].code) {
10770 verbose(env,
10771 "Invalid insn code at line_info[%u].insn_off\n",
10772 i);
10773 err = -EINVAL;
10774 goto err_free;
10775 }
10776
23127b33
MKL
10777 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
10778 !btf_name_by_offset(btf, linfo[i].file_name_off)) {
c454a46b
MKL
10779 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
10780 err = -EINVAL;
10781 goto err_free;
10782 }
10783
10784 if (s != env->subprog_cnt) {
10785 if (linfo[i].insn_off == sub[s].start) {
10786 sub[s].linfo_idx = i;
10787 s++;
10788 } else if (sub[s].start < linfo[i].insn_off) {
10789 verbose(env, "missing bpf_line_info for func#%u\n", s);
10790 err = -EINVAL;
10791 goto err_free;
10792 }
10793 }
10794
10795 prev_offset = linfo[i].insn_off;
af2ac3e1 10796 bpfptr_add(&ulinfo, rec_size);
c454a46b
MKL
10797 }
10798
10799 if (s != env->subprog_cnt) {
10800 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
10801 env->subprog_cnt - s, s);
10802 err = -EINVAL;
10803 goto err_free;
10804 }
10805
10806 prog->aux->linfo = linfo;
10807 prog->aux->nr_linfo = nr_linfo;
10808
10809 return 0;
10810
10811err_free:
10812 kvfree(linfo);
10813 return err;
10814}
10815
fbd94c7a
AS
10816#define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo)
10817#define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE
10818
10819static int check_core_relo(struct bpf_verifier_env *env,
10820 const union bpf_attr *attr,
10821 bpfptr_t uattr)
10822{
10823 u32 i, nr_core_relo, ncopy, expected_size, rec_size;
10824 struct bpf_core_relo core_relo = {};
10825 struct bpf_prog *prog = env->prog;
10826 const struct btf *btf = prog->aux->btf;
10827 struct bpf_core_ctx ctx = {
10828 .log = &env->log,
10829 .btf = btf,
10830 };
10831 bpfptr_t u_core_relo;
10832 int err;
10833
10834 nr_core_relo = attr->core_relo_cnt;
10835 if (!nr_core_relo)
10836 return 0;
10837 if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
10838 return -EINVAL;
10839
10840 rec_size = attr->core_relo_rec_size;
10841 if (rec_size < MIN_CORE_RELO_SIZE ||
10842 rec_size > MAX_CORE_RELO_SIZE ||
10843 rec_size % sizeof(u32))
10844 return -EINVAL;
10845
10846 u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
10847 expected_size = sizeof(struct bpf_core_relo);
10848 ncopy = min_t(u32, expected_size, rec_size);
10849
10850 /* Unlike func_info and line_info, copy and apply each CO-RE
10851 * relocation record one at a time.
10852 */
10853 for (i = 0; i < nr_core_relo; i++) {
10854 /* future proofing when sizeof(bpf_core_relo) changes */
10855 err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
10856 if (err) {
10857 if (err == -E2BIG) {
10858 verbose(env, "nonzero tailing record in core_relo");
10859 if (copy_to_bpfptr_offset(uattr,
10860 offsetof(union bpf_attr, core_relo_rec_size),
10861 &expected_size, sizeof(expected_size)))
10862 err = -EFAULT;
10863 }
10864 break;
10865 }
10866
10867 if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
10868 err = -EFAULT;
10869 break;
10870 }
10871
10872 if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
10873 verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
10874 i, core_relo.insn_off, prog->len);
10875 err = -EINVAL;
10876 break;
10877 }
10878
10879 err = bpf_core_apply(&ctx, &core_relo, i,
10880 &prog->insnsi[core_relo.insn_off / 8]);
10881 if (err)
10882 break;
10883 bpfptr_add(&u_core_relo, rec_size);
10884 }
10885 return err;
10886}
10887
c454a46b
MKL
10888static int check_btf_info(struct bpf_verifier_env *env,
10889 const union bpf_attr *attr,
af2ac3e1 10890 bpfptr_t uattr)
c454a46b
MKL
10891{
10892 struct btf *btf;
10893 int err;
10894
09b28d76
AS
10895 if (!attr->func_info_cnt && !attr->line_info_cnt) {
10896 if (check_abnormal_return(env))
10897 return -EINVAL;
c454a46b 10898 return 0;
09b28d76 10899 }
c454a46b
MKL
10900
10901 btf = btf_get_by_fd(attr->prog_btf_fd);
10902 if (IS_ERR(btf))
10903 return PTR_ERR(btf);
350a5c4d
AS
10904 if (btf_is_kernel(btf)) {
10905 btf_put(btf);
10906 return -EACCES;
10907 }
c454a46b
MKL
10908 env->prog->aux->btf = btf;
10909
10910 err = check_btf_func(env, attr, uattr);
10911 if (err)
10912 return err;
10913
10914 err = check_btf_line(env, attr, uattr);
10915 if (err)
10916 return err;
10917
fbd94c7a
AS
10918 err = check_core_relo(env, attr, uattr);
10919 if (err)
10920 return err;
10921
c454a46b 10922 return 0;
ba64e7d8
YS
10923}
10924
f1174f77
EC
10925/* check %cur's range satisfies %old's */
10926static bool range_within(struct bpf_reg_state *old,
10927 struct bpf_reg_state *cur)
10928{
b03c9f9f
EC
10929 return old->umin_value <= cur->umin_value &&
10930 old->umax_value >= cur->umax_value &&
10931 old->smin_value <= cur->smin_value &&
fd675184
DB
10932 old->smax_value >= cur->smax_value &&
10933 old->u32_min_value <= cur->u32_min_value &&
10934 old->u32_max_value >= cur->u32_max_value &&
10935 old->s32_min_value <= cur->s32_min_value &&
10936 old->s32_max_value >= cur->s32_max_value;
f1174f77
EC
10937}
10938
f1174f77
EC
10939/* If in the old state two registers had the same id, then they need to have
10940 * the same id in the new state as well. But that id could be different from
10941 * the old state, so we need to track the mapping from old to new ids.
10942 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
10943 * regs with old id 5 must also have new id 9 for the new state to be safe. But
10944 * regs with a different old id could still have new id 9, we don't care about
10945 * that.
10946 * So we look through our idmap to see if this old id has been seen before. If
10947 * so, we require the new id to match; otherwise, we add the id pair to the map.
969bf05e 10948 */
c9e73e3d 10949static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
969bf05e 10950{
f1174f77 10951 unsigned int i;
969bf05e 10952
c9e73e3d 10953 for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
f1174f77
EC
10954 if (!idmap[i].old) {
10955 /* Reached an empty slot; haven't seen this id before */
10956 idmap[i].old = old_id;
10957 idmap[i].cur = cur_id;
10958 return true;
10959 }
10960 if (idmap[i].old == old_id)
10961 return idmap[i].cur == cur_id;
10962 }
10963 /* We ran out of idmap slots, which should be impossible */
10964 WARN_ON_ONCE(1);
10965 return false;
10966}
10967
9242b5f5
AS
10968static void clean_func_state(struct bpf_verifier_env *env,
10969 struct bpf_func_state *st)
10970{
10971 enum bpf_reg_liveness live;
10972 int i, j;
10973
10974 for (i = 0; i < BPF_REG_FP; i++) {
10975 live = st->regs[i].live;
10976 /* liveness must not touch this register anymore */
10977 st->regs[i].live |= REG_LIVE_DONE;
10978 if (!(live & REG_LIVE_READ))
10979 /* since the register is unused, clear its state
10980 * to make further comparison simpler
10981 */
f54c7898 10982 __mark_reg_not_init(env, &st->regs[i]);
9242b5f5
AS
10983 }
10984
10985 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
10986 live = st->stack[i].spilled_ptr.live;
10987 /* liveness must not touch this stack slot anymore */
10988 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
10989 if (!(live & REG_LIVE_READ)) {
f54c7898 10990 __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
9242b5f5
AS
10991 for (j = 0; j < BPF_REG_SIZE; j++)
10992 st->stack[i].slot_type[j] = STACK_INVALID;
10993 }
10994 }
10995}
10996
10997static void clean_verifier_state(struct bpf_verifier_env *env,
10998 struct bpf_verifier_state *st)
10999{
11000 int i;
11001
11002 if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
11003 /* all regs in this state in all frames were already marked */
11004 return;
11005
11006 for (i = 0; i <= st->curframe; i++)
11007 clean_func_state(env, st->frame[i]);
11008}
11009
11010/* the parentage chains form a tree.
11011 * the verifier states are added to state lists at given insn and
11012 * pushed into state stack for future exploration.
11013 * when the verifier reaches bpf_exit insn some of the verifer states
11014 * stored in the state lists have their final liveness state already,
11015 * but a lot of states will get revised from liveness point of view when
11016 * the verifier explores other branches.
11017 * Example:
11018 * 1: r0 = 1
11019 * 2: if r1 == 100 goto pc+1
11020 * 3: r0 = 2
11021 * 4: exit
11022 * when the verifier reaches exit insn the register r0 in the state list of
11023 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
11024 * of insn 2 and goes exploring further. At the insn 4 it will walk the
11025 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
11026 *
11027 * Since the verifier pushes the branch states as it sees them while exploring
11028 * the program the condition of walking the branch instruction for the second
11029 * time means that all states below this branch were already explored and
8fb33b60 11030 * their final liveness marks are already propagated.
9242b5f5
AS
11031 * Hence when the verifier completes the search of state list in is_state_visited()
11032 * we can call this clean_live_states() function to mark all liveness states
11033 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
11034 * will not be used.
11035 * This function also clears the registers and stack for states that !READ
11036 * to simplify state merging.
11037 *
11038 * Important note here that walking the same branch instruction in the callee
11039 * doesn't meant that the states are DONE. The verifier has to compare
11040 * the callsites
11041 */
11042static void clean_live_states(struct bpf_verifier_env *env, int insn,
11043 struct bpf_verifier_state *cur)
11044{
11045 struct bpf_verifier_state_list *sl;
11046 int i;
11047
5d839021 11048 sl = *explored_state(env, insn);
a8f500af 11049 while (sl) {
2589726d
AS
11050 if (sl->state.branches)
11051 goto next;
dc2a4ebc
AS
11052 if (sl->state.insn_idx != insn ||
11053 sl->state.curframe != cur->curframe)
9242b5f5
AS
11054 goto next;
11055 for (i = 0; i <= cur->curframe; i++)
11056 if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
11057 goto next;
11058 clean_verifier_state(env, &sl->state);
11059next:
11060 sl = sl->next;
11061 }
11062}
11063
f1174f77 11064/* Returns true if (rold safe implies rcur safe) */
e042aa53
DB
11065static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
11066 struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
f1174f77 11067{
f4d7e40a
AS
11068 bool equal;
11069
dc503a8a
EC
11070 if (!(rold->live & REG_LIVE_READ))
11071 /* explored state didn't use this */
11072 return true;
11073
679c782d 11074 equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
f4d7e40a
AS
11075
11076 if (rold->type == PTR_TO_STACK)
11077 /* two stack pointers are equal only if they're pointing to
11078 * the same stack frame, since fp-8 in foo != fp-8 in bar
11079 */
11080 return equal && rold->frameno == rcur->frameno;
11081
11082 if (equal)
969bf05e
AS
11083 return true;
11084
f1174f77
EC
11085 if (rold->type == NOT_INIT)
11086 /* explored state can't have used this */
969bf05e 11087 return true;
f1174f77
EC
11088 if (rcur->type == NOT_INIT)
11089 return false;
c25b2ae1 11090 switch (base_type(rold->type)) {
f1174f77 11091 case SCALAR_VALUE:
e042aa53
DB
11092 if (env->explore_alu_limits)
11093 return false;
f1174f77 11094 if (rcur->type == SCALAR_VALUE) {
b5dc0163
AS
11095 if (!rold->precise && !rcur->precise)
11096 return true;
f1174f77
EC
11097 /* new val must satisfy old val knowledge */
11098 return range_within(rold, rcur) &&
11099 tnum_in(rold->var_off, rcur->var_off);
11100 } else {
179d1c56
JH
11101 /* We're trying to use a pointer in place of a scalar.
11102 * Even if the scalar was unbounded, this could lead to
11103 * pointer leaks because scalars are allowed to leak
11104 * while pointers are not. We could make this safe in
11105 * special cases if root is calling us, but it's
11106 * probably not worth the hassle.
f1174f77 11107 */
179d1c56 11108 return false;
f1174f77 11109 }
69c087ba 11110 case PTR_TO_MAP_KEY:
f1174f77 11111 case PTR_TO_MAP_VALUE:
c25b2ae1
HL
11112 /* a PTR_TO_MAP_VALUE could be safe to use as a
11113 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
11114 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
11115 * checked, doing so could have affected others with the same
11116 * id, and we can't check for that because we lost the id when
11117 * we converted to a PTR_TO_MAP_VALUE.
11118 */
11119 if (type_may_be_null(rold->type)) {
11120 if (!type_may_be_null(rcur->type))
11121 return false;
11122 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
11123 return false;
11124 /* Check our ids match any regs they're supposed to */
11125 return check_ids(rold->id, rcur->id, idmap);
11126 }
11127
1b688a19
EC
11128 /* If the new min/max/var_off satisfy the old ones and
11129 * everything else matches, we are OK.
d83525ca
AS
11130 * 'id' is not compared, since it's only used for maps with
11131 * bpf_spin_lock inside map element and in such cases if
11132 * the rest of the prog is valid for one map element then
11133 * it's valid for all map elements regardless of the key
11134 * used in bpf_map_lookup()
1b688a19
EC
11135 */
11136 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
11137 range_within(rold, rcur) &&
11138 tnum_in(rold->var_off, rcur->var_off);
de8f3a83 11139 case PTR_TO_PACKET_META:
f1174f77 11140 case PTR_TO_PACKET:
de8f3a83 11141 if (rcur->type != rold->type)
f1174f77
EC
11142 return false;
11143 /* We must have at least as much range as the old ptr
11144 * did, so that any accesses which were safe before are
11145 * still safe. This is true even if old range < old off,
11146 * since someone could have accessed through (ptr - k), or
11147 * even done ptr -= k in a register, to get a safe access.
11148 */
11149 if (rold->range > rcur->range)
11150 return false;
11151 /* If the offsets don't match, we can't trust our alignment;
11152 * nor can we be sure that we won't fall out of range.
11153 */
11154 if (rold->off != rcur->off)
11155 return false;
11156 /* id relations must be preserved */
11157 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
11158 return false;
11159 /* new val must satisfy old val knowledge */
11160 return range_within(rold, rcur) &&
11161 tnum_in(rold->var_off, rcur->var_off);
11162 case PTR_TO_CTX:
11163 case CONST_PTR_TO_MAP:
f1174f77 11164 case PTR_TO_PACKET_END:
d58e468b 11165 case PTR_TO_FLOW_KEYS:
c64b7983 11166 case PTR_TO_SOCKET:
46f8bc92 11167 case PTR_TO_SOCK_COMMON:
655a51e5 11168 case PTR_TO_TCP_SOCK:
fada7fdc 11169 case PTR_TO_XDP_SOCK:
f1174f77
EC
11170 /* Only valid matches are exact, which memcmp() above
11171 * would have accepted
11172 */
11173 default:
11174 /* Don't know what's going on, just say it's not safe */
11175 return false;
11176 }
969bf05e 11177
f1174f77
EC
11178 /* Shouldn't get here; if we do, say it's not safe */
11179 WARN_ON_ONCE(1);
969bf05e
AS
11180 return false;
11181}
11182
e042aa53
DB
11183static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
11184 struct bpf_func_state *cur, struct bpf_id_pair *idmap)
638f5b90
AS
11185{
11186 int i, spi;
11187
638f5b90
AS
11188 /* walk slots of the explored stack and ignore any additional
11189 * slots in the current stack, since explored(safe) state
11190 * didn't use them
11191 */
11192 for (i = 0; i < old->allocated_stack; i++) {
11193 spi = i / BPF_REG_SIZE;
11194
b233920c
AS
11195 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
11196 i += BPF_REG_SIZE - 1;
cc2b14d5 11197 /* explored state didn't use this */
fd05e57b 11198 continue;
b233920c 11199 }
cc2b14d5 11200
638f5b90
AS
11201 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
11202 continue;
19e2dbb7
AS
11203
11204 /* explored stack has more populated slots than current stack
11205 * and these slots were used
11206 */
11207 if (i >= cur->allocated_stack)
11208 return false;
11209
cc2b14d5
AS
11210 /* if old state was safe with misc data in the stack
11211 * it will be safe with zero-initialized stack.
11212 * The opposite is not true
11213 */
11214 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
11215 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
11216 continue;
638f5b90
AS
11217 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
11218 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
11219 /* Ex: old explored (safe) state has STACK_SPILL in
b8c1a309 11220 * this stack slot, but current has STACK_MISC ->
638f5b90
AS
11221 * this verifier states are not equivalent,
11222 * return false to continue verification of this path
11223 */
11224 return false;
27113c59 11225 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
638f5b90 11226 continue;
27113c59 11227 if (!is_spilled_reg(&old->stack[spi]))
638f5b90 11228 continue;
e042aa53
DB
11229 if (!regsafe(env, &old->stack[spi].spilled_ptr,
11230 &cur->stack[spi].spilled_ptr, idmap))
638f5b90
AS
11231 /* when explored and current stack slot are both storing
11232 * spilled registers, check that stored pointers types
11233 * are the same as well.
11234 * Ex: explored safe path could have stored
11235 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
11236 * but current path has stored:
11237 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
11238 * such verifier states are not equivalent.
11239 * return false to continue verification of this path
11240 */
11241 return false;
11242 }
11243 return true;
11244}
11245
fd978bf7
JS
11246static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
11247{
11248 if (old->acquired_refs != cur->acquired_refs)
11249 return false;
11250 return !memcmp(old->refs, cur->refs,
11251 sizeof(*old->refs) * old->acquired_refs);
11252}
11253
f1bca824
AS
11254/* compare two verifier states
11255 *
11256 * all states stored in state_list are known to be valid, since
11257 * verifier reached 'bpf_exit' instruction through them
11258 *
11259 * this function is called when verifier exploring different branches of
11260 * execution popped from the state stack. If it sees an old state that has
11261 * more strict register state and more strict stack state then this execution
11262 * branch doesn't need to be explored further, since verifier already
11263 * concluded that more strict state leads to valid finish.
11264 *
11265 * Therefore two states are equivalent if register state is more conservative
11266 * and explored stack state is more conservative than the current one.
11267 * Example:
11268 * explored current
11269 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
11270 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
11271 *
11272 * In other words if current stack state (one being explored) has more
11273 * valid slots than old one that already passed validation, it means
11274 * the verifier can stop exploring and conclude that current state is valid too
11275 *
11276 * Similarly with registers. If explored state has register type as invalid
11277 * whereas register type in current state is meaningful, it means that
11278 * the current state will reach 'bpf_exit' instruction safely
11279 */
c9e73e3d 11280static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
f4d7e40a 11281 struct bpf_func_state *cur)
f1bca824
AS
11282{
11283 int i;
11284
c9e73e3d
LB
11285 memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
11286 for (i = 0; i < MAX_BPF_REG; i++)
e042aa53
DB
11287 if (!regsafe(env, &old->regs[i], &cur->regs[i],
11288 env->idmap_scratch))
c9e73e3d 11289 return false;
f1bca824 11290
e042aa53 11291 if (!stacksafe(env, old, cur, env->idmap_scratch))
c9e73e3d 11292 return false;
fd978bf7
JS
11293
11294 if (!refsafe(old, cur))
c9e73e3d
LB
11295 return false;
11296
11297 return true;
f1bca824
AS
11298}
11299
f4d7e40a
AS
11300static bool states_equal(struct bpf_verifier_env *env,
11301 struct bpf_verifier_state *old,
11302 struct bpf_verifier_state *cur)
11303{
11304 int i;
11305
11306 if (old->curframe != cur->curframe)
11307 return false;
11308
979d63d5
DB
11309 /* Verification state from speculative execution simulation
11310 * must never prune a non-speculative execution one.
11311 */
11312 if (old->speculative && !cur->speculative)
11313 return false;
11314
d83525ca
AS
11315 if (old->active_spin_lock != cur->active_spin_lock)
11316 return false;
11317
f4d7e40a
AS
11318 /* for states to be equal callsites have to be the same
11319 * and all frame states need to be equivalent
11320 */
11321 for (i = 0; i <= old->curframe; i++) {
11322 if (old->frame[i]->callsite != cur->frame[i]->callsite)
11323 return false;
c9e73e3d 11324 if (!func_states_equal(env, old->frame[i], cur->frame[i]))
f4d7e40a
AS
11325 return false;
11326 }
11327 return true;
11328}
11329
5327ed3d
JW
11330/* Return 0 if no propagation happened. Return negative error code if error
11331 * happened. Otherwise, return the propagated bit.
11332 */
55e7f3b5
JW
11333static int propagate_liveness_reg(struct bpf_verifier_env *env,
11334 struct bpf_reg_state *reg,
11335 struct bpf_reg_state *parent_reg)
11336{
5327ed3d
JW
11337 u8 parent_flag = parent_reg->live & REG_LIVE_READ;
11338 u8 flag = reg->live & REG_LIVE_READ;
55e7f3b5
JW
11339 int err;
11340
5327ed3d
JW
11341 /* When comes here, read flags of PARENT_REG or REG could be any of
11342 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
11343 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
11344 */
11345 if (parent_flag == REG_LIVE_READ64 ||
11346 /* Or if there is no read flag from REG. */
11347 !flag ||
11348 /* Or if the read flag from REG is the same as PARENT_REG. */
11349 parent_flag == flag)
55e7f3b5
JW
11350 return 0;
11351
5327ed3d 11352 err = mark_reg_read(env, reg, parent_reg, flag);
55e7f3b5
JW
11353 if (err)
11354 return err;
11355
5327ed3d 11356 return flag;
55e7f3b5
JW
11357}
11358
8e9cd9ce 11359/* A write screens off any subsequent reads; but write marks come from the
f4d7e40a
AS
11360 * straight-line code between a state and its parent. When we arrive at an
11361 * equivalent state (jump target or such) we didn't arrive by the straight-line
11362 * code, so read marks in the state must propagate to the parent regardless
11363 * of the state's write marks. That's what 'parent == state->parent' comparison
679c782d 11364 * in mark_reg_read() is for.
8e9cd9ce 11365 */
f4d7e40a
AS
11366static int propagate_liveness(struct bpf_verifier_env *env,
11367 const struct bpf_verifier_state *vstate,
11368 struct bpf_verifier_state *vparent)
dc503a8a 11369{
3f8cafa4 11370 struct bpf_reg_state *state_reg, *parent_reg;
f4d7e40a 11371 struct bpf_func_state *state, *parent;
3f8cafa4 11372 int i, frame, err = 0;
dc503a8a 11373
f4d7e40a
AS
11374 if (vparent->curframe != vstate->curframe) {
11375 WARN(1, "propagate_live: parent frame %d current frame %d\n",
11376 vparent->curframe, vstate->curframe);
11377 return -EFAULT;
11378 }
dc503a8a
EC
11379 /* Propagate read liveness of registers... */
11380 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
83d16312 11381 for (frame = 0; frame <= vstate->curframe; frame++) {
3f8cafa4
JW
11382 parent = vparent->frame[frame];
11383 state = vstate->frame[frame];
11384 parent_reg = parent->regs;
11385 state_reg = state->regs;
83d16312
JK
11386 /* We don't need to worry about FP liveness, it's read-only */
11387 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
55e7f3b5
JW
11388 err = propagate_liveness_reg(env, &state_reg[i],
11389 &parent_reg[i]);
5327ed3d 11390 if (err < 0)
3f8cafa4 11391 return err;
5327ed3d
JW
11392 if (err == REG_LIVE_READ64)
11393 mark_insn_zext(env, &parent_reg[i]);
dc503a8a 11394 }
f4d7e40a 11395
1b04aee7 11396 /* Propagate stack slots. */
f4d7e40a
AS
11397 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
11398 i < parent->allocated_stack / BPF_REG_SIZE; i++) {
3f8cafa4
JW
11399 parent_reg = &parent->stack[i].spilled_ptr;
11400 state_reg = &state->stack[i].spilled_ptr;
55e7f3b5
JW
11401 err = propagate_liveness_reg(env, state_reg,
11402 parent_reg);
5327ed3d 11403 if (err < 0)
3f8cafa4 11404 return err;
dc503a8a
EC
11405 }
11406 }
5327ed3d 11407 return 0;
dc503a8a
EC
11408}
11409
a3ce685d
AS
11410/* find precise scalars in the previous equivalent state and
11411 * propagate them into the current state
11412 */
11413static int propagate_precision(struct bpf_verifier_env *env,
11414 const struct bpf_verifier_state *old)
11415{
11416 struct bpf_reg_state *state_reg;
11417 struct bpf_func_state *state;
11418 int i, err = 0;
11419
11420 state = old->frame[old->curframe];
11421 state_reg = state->regs;
11422 for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
11423 if (state_reg->type != SCALAR_VALUE ||
11424 !state_reg->precise)
11425 continue;
11426 if (env->log.level & BPF_LOG_LEVEL2)
11427 verbose(env, "propagating r%d\n", i);
11428 err = mark_chain_precision(env, i);
11429 if (err < 0)
11430 return err;
11431 }
11432
11433 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
27113c59 11434 if (!is_spilled_reg(&state->stack[i]))
a3ce685d
AS
11435 continue;
11436 state_reg = &state->stack[i].spilled_ptr;
11437 if (state_reg->type != SCALAR_VALUE ||
11438 !state_reg->precise)
11439 continue;
11440 if (env->log.level & BPF_LOG_LEVEL2)
11441 verbose(env, "propagating fp%d\n",
11442 (-i - 1) * BPF_REG_SIZE);
11443 err = mark_chain_precision_stack(env, i);
11444 if (err < 0)
11445 return err;
11446 }
11447 return 0;
11448}
11449
2589726d
AS
11450static bool states_maybe_looping(struct bpf_verifier_state *old,
11451 struct bpf_verifier_state *cur)
11452{
11453 struct bpf_func_state *fold, *fcur;
11454 int i, fr = cur->curframe;
11455
11456 if (old->curframe != fr)
11457 return false;
11458
11459 fold = old->frame[fr];
11460 fcur = cur->frame[fr];
11461 for (i = 0; i < MAX_BPF_REG; i++)
11462 if (memcmp(&fold->regs[i], &fcur->regs[i],
11463 offsetof(struct bpf_reg_state, parent)))
11464 return false;
11465 return true;
11466}
11467
11468
58e2af8b 11469static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
f1bca824 11470{
58e2af8b 11471 struct bpf_verifier_state_list *new_sl;
9f4686c4 11472 struct bpf_verifier_state_list *sl, **pprev;
679c782d 11473 struct bpf_verifier_state *cur = env->cur_state, *new;
ceefbc96 11474 int i, j, err, states_cnt = 0;
10d274e8 11475 bool add_new_state = env->test_state_freq ? true : false;
f1bca824 11476
b5dc0163 11477 cur->last_insn_idx = env->prev_insn_idx;
a8f500af 11478 if (!env->insn_aux_data[insn_idx].prune_point)
f1bca824
AS
11479 /* this 'insn_idx' instruction wasn't marked, so we will not
11480 * be doing state search here
11481 */
11482 return 0;
11483
2589726d
AS
11484 /* bpf progs typically have pruning point every 4 instructions
11485 * http://vger.kernel.org/bpfconf2019.html#session-1
11486 * Do not add new state for future pruning if the verifier hasn't seen
11487 * at least 2 jumps and at least 8 instructions.
11488 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
11489 * In tests that amounts to up to 50% reduction into total verifier
11490 * memory consumption and 20% verifier time speedup.
11491 */
11492 if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
11493 env->insn_processed - env->prev_insn_processed >= 8)
11494 add_new_state = true;
11495
a8f500af
AS
11496 pprev = explored_state(env, insn_idx);
11497 sl = *pprev;
11498
9242b5f5
AS
11499 clean_live_states(env, insn_idx, cur);
11500
a8f500af 11501 while (sl) {
dc2a4ebc
AS
11502 states_cnt++;
11503 if (sl->state.insn_idx != insn_idx)
11504 goto next;
bfc6bb74 11505
2589726d 11506 if (sl->state.branches) {
bfc6bb74
AS
11507 struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
11508
11509 if (frame->in_async_callback_fn &&
11510 frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
11511 /* Different async_entry_cnt means that the verifier is
11512 * processing another entry into async callback.
11513 * Seeing the same state is not an indication of infinite
11514 * loop or infinite recursion.
11515 * But finding the same state doesn't mean that it's safe
11516 * to stop processing the current state. The previous state
11517 * hasn't yet reached bpf_exit, since state.branches > 0.
11518 * Checking in_async_callback_fn alone is not enough either.
11519 * Since the verifier still needs to catch infinite loops
11520 * inside async callbacks.
11521 */
11522 } else if (states_maybe_looping(&sl->state, cur) &&
11523 states_equal(env, &sl->state, cur)) {
2589726d
AS
11524 verbose_linfo(env, insn_idx, "; ");
11525 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
11526 return -EINVAL;
11527 }
11528 /* if the verifier is processing a loop, avoid adding new state
11529 * too often, since different loop iterations have distinct
11530 * states and may not help future pruning.
11531 * This threshold shouldn't be too low to make sure that
11532 * a loop with large bound will be rejected quickly.
11533 * The most abusive loop will be:
11534 * r1 += 1
11535 * if r1 < 1000000 goto pc-2
11536 * 1M insn_procssed limit / 100 == 10k peak states.
11537 * This threshold shouldn't be too high either, since states
11538 * at the end of the loop are likely to be useful in pruning.
11539 */
11540 if (env->jmps_processed - env->prev_jmps_processed < 20 &&
11541 env->insn_processed - env->prev_insn_processed < 100)
11542 add_new_state = false;
11543 goto miss;
11544 }
638f5b90 11545 if (states_equal(env, &sl->state, cur)) {
9f4686c4 11546 sl->hit_cnt++;
f1bca824 11547 /* reached equivalent register/stack state,
dc503a8a
EC
11548 * prune the search.
11549 * Registers read by the continuation are read by us.
8e9cd9ce
EC
11550 * If we have any write marks in env->cur_state, they
11551 * will prevent corresponding reads in the continuation
11552 * from reaching our parent (an explored_state). Our
11553 * own state will get the read marks recorded, but
11554 * they'll be immediately forgotten as we're pruning
11555 * this state and will pop a new one.
f1bca824 11556 */
f4d7e40a 11557 err = propagate_liveness(env, &sl->state, cur);
a3ce685d
AS
11558
11559 /* if previous state reached the exit with precision and
11560 * current state is equivalent to it (except precsion marks)
11561 * the precision needs to be propagated back in
11562 * the current state.
11563 */
11564 err = err ? : push_jmp_history(env, cur);
11565 err = err ? : propagate_precision(env, &sl->state);
f4d7e40a
AS
11566 if (err)
11567 return err;
f1bca824 11568 return 1;
dc503a8a 11569 }
2589726d
AS
11570miss:
11571 /* when new state is not going to be added do not increase miss count.
11572 * Otherwise several loop iterations will remove the state
11573 * recorded earlier. The goal of these heuristics is to have
11574 * states from some iterations of the loop (some in the beginning
11575 * and some at the end) to help pruning.
11576 */
11577 if (add_new_state)
11578 sl->miss_cnt++;
9f4686c4
AS
11579 /* heuristic to determine whether this state is beneficial
11580 * to keep checking from state equivalence point of view.
11581 * Higher numbers increase max_states_per_insn and verification time,
11582 * but do not meaningfully decrease insn_processed.
11583 */
11584 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
11585 /* the state is unlikely to be useful. Remove it to
11586 * speed up verification
11587 */
11588 *pprev = sl->next;
11589 if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
2589726d
AS
11590 u32 br = sl->state.branches;
11591
11592 WARN_ONCE(br,
11593 "BUG live_done but branches_to_explore %d\n",
11594 br);
9f4686c4
AS
11595 free_verifier_state(&sl->state, false);
11596 kfree(sl);
11597 env->peak_states--;
11598 } else {
11599 /* cannot free this state, since parentage chain may
11600 * walk it later. Add it for free_list instead to
11601 * be freed at the end of verification
11602 */
11603 sl->next = env->free_list;
11604 env->free_list = sl;
11605 }
11606 sl = *pprev;
11607 continue;
11608 }
dc2a4ebc 11609next:
9f4686c4
AS
11610 pprev = &sl->next;
11611 sl = *pprev;
f1bca824
AS
11612 }
11613
06ee7115
AS
11614 if (env->max_states_per_insn < states_cnt)
11615 env->max_states_per_insn = states_cnt;
11616
2c78ee89 11617 if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
b5dc0163 11618 return push_jmp_history(env, cur);
ceefbc96 11619
2589726d 11620 if (!add_new_state)
b5dc0163 11621 return push_jmp_history(env, cur);
ceefbc96 11622
2589726d
AS
11623 /* There were no equivalent states, remember the current one.
11624 * Technically the current state is not proven to be safe yet,
f4d7e40a 11625 * but it will either reach outer most bpf_exit (which means it's safe)
2589726d 11626 * or it will be rejected. When there are no loops the verifier won't be
f4d7e40a 11627 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
2589726d
AS
11628 * again on the way to bpf_exit.
11629 * When looping the sl->state.branches will be > 0 and this state
11630 * will not be considered for equivalence until branches == 0.
f1bca824 11631 */
638f5b90 11632 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
f1bca824
AS
11633 if (!new_sl)
11634 return -ENOMEM;
06ee7115
AS
11635 env->total_states++;
11636 env->peak_states++;
2589726d
AS
11637 env->prev_jmps_processed = env->jmps_processed;
11638 env->prev_insn_processed = env->insn_processed;
f1bca824
AS
11639
11640 /* add new state to the head of linked list */
679c782d
EC
11641 new = &new_sl->state;
11642 err = copy_verifier_state(new, cur);
1969db47 11643 if (err) {
679c782d 11644 free_verifier_state(new, false);
1969db47
AS
11645 kfree(new_sl);
11646 return err;
11647 }
dc2a4ebc 11648 new->insn_idx = insn_idx;
2589726d
AS
11649 WARN_ONCE(new->branches != 1,
11650 "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
b5dc0163 11651
2589726d 11652 cur->parent = new;
b5dc0163
AS
11653 cur->first_insn_idx = insn_idx;
11654 clear_jmp_history(cur);
5d839021
AS
11655 new_sl->next = *explored_state(env, insn_idx);
11656 *explored_state(env, insn_idx) = new_sl;
7640ead9
JK
11657 /* connect new state to parentage chain. Current frame needs all
11658 * registers connected. Only r6 - r9 of the callers are alive (pushed
11659 * to the stack implicitly by JITs) so in callers' frames connect just
11660 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
11661 * the state of the call instruction (with WRITTEN set), and r0 comes
11662 * from callee with its full parentage chain, anyway.
11663 */
8e9cd9ce
EC
11664 /* clear write marks in current state: the writes we did are not writes
11665 * our child did, so they don't screen off its reads from us.
11666 * (There are no read marks in current state, because reads always mark
11667 * their parent and current state never has children yet. Only
11668 * explored_states can get read marks.)
11669 */
eea1c227
AS
11670 for (j = 0; j <= cur->curframe; j++) {
11671 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
11672 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
11673 for (i = 0; i < BPF_REG_FP; i++)
11674 cur->frame[j]->regs[i].live = REG_LIVE_NONE;
11675 }
f4d7e40a
AS
11676
11677 /* all stack frames are accessible from callee, clear them all */
11678 for (j = 0; j <= cur->curframe; j++) {
11679 struct bpf_func_state *frame = cur->frame[j];
679c782d 11680 struct bpf_func_state *newframe = new->frame[j];
f4d7e40a 11681
679c782d 11682 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
cc2b14d5 11683 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
679c782d
EC
11684 frame->stack[i].spilled_ptr.parent =
11685 &newframe->stack[i].spilled_ptr;
11686 }
f4d7e40a 11687 }
f1bca824
AS
11688 return 0;
11689}
11690
c64b7983
JS
11691/* Return true if it's OK to have the same insn return a different type. */
11692static bool reg_type_mismatch_ok(enum bpf_reg_type type)
11693{
c25b2ae1 11694 switch (base_type(type)) {
c64b7983
JS
11695 case PTR_TO_CTX:
11696 case PTR_TO_SOCKET:
46f8bc92 11697 case PTR_TO_SOCK_COMMON:
655a51e5 11698 case PTR_TO_TCP_SOCK:
fada7fdc 11699 case PTR_TO_XDP_SOCK:
2a02759e 11700 case PTR_TO_BTF_ID:
c64b7983
JS
11701 return false;
11702 default:
11703 return true;
11704 }
11705}
11706
11707/* If an instruction was previously used with particular pointer types, then we
11708 * need to be careful to avoid cases such as the below, where it may be ok
11709 * for one branch accessing the pointer, but not ok for the other branch:
11710 *
11711 * R1 = sock_ptr
11712 * goto X;
11713 * ...
11714 * R1 = some_other_valid_ptr;
11715 * goto X;
11716 * ...
11717 * R2 = *(u32 *)(R1 + 0);
11718 */
11719static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
11720{
11721 return src != prev && (!reg_type_mismatch_ok(src) ||
11722 !reg_type_mismatch_ok(prev));
11723}
11724
58e2af8b 11725static int do_check(struct bpf_verifier_env *env)
17a52670 11726{
6f8a57cc 11727 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
51c39bb1 11728 struct bpf_verifier_state *state = env->cur_state;
17a52670 11729 struct bpf_insn *insns = env->prog->insnsi;
638f5b90 11730 struct bpf_reg_state *regs;
06ee7115 11731 int insn_cnt = env->prog->len;
17a52670 11732 bool do_print_state = false;
b5dc0163 11733 int prev_insn_idx = -1;
17a52670 11734
17a52670
AS
11735 for (;;) {
11736 struct bpf_insn *insn;
11737 u8 class;
11738 int err;
11739
b5dc0163 11740 env->prev_insn_idx = prev_insn_idx;
c08435ec 11741 if (env->insn_idx >= insn_cnt) {
61bd5218 11742 verbose(env, "invalid insn idx %d insn_cnt %d\n",
c08435ec 11743 env->insn_idx, insn_cnt);
17a52670
AS
11744 return -EFAULT;
11745 }
11746
c08435ec 11747 insn = &insns[env->insn_idx];
17a52670
AS
11748 class = BPF_CLASS(insn->code);
11749
06ee7115 11750 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
61bd5218
JK
11751 verbose(env,
11752 "BPF program is too large. Processed %d insn\n",
06ee7115 11753 env->insn_processed);
17a52670
AS
11754 return -E2BIG;
11755 }
11756
c08435ec 11757 err = is_state_visited(env, env->insn_idx);
f1bca824
AS
11758 if (err < 0)
11759 return err;
11760 if (err == 1) {
11761 /* found equivalent state, can prune the search */
06ee7115 11762 if (env->log.level & BPF_LOG_LEVEL) {
f1bca824 11763 if (do_print_state)
979d63d5
DB
11764 verbose(env, "\nfrom %d to %d%s: safe\n",
11765 env->prev_insn_idx, env->insn_idx,
11766 env->cur_state->speculative ?
11767 " (speculative execution)" : "");
f1bca824 11768 else
c08435ec 11769 verbose(env, "%d: safe\n", env->insn_idx);
f1bca824
AS
11770 }
11771 goto process_bpf_exit;
11772 }
11773
c3494801
AS
11774 if (signal_pending(current))
11775 return -EAGAIN;
11776
3c2ce60b
DB
11777 if (need_resched())
11778 cond_resched();
11779
2e576648
CL
11780 if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
11781 verbose(env, "\nfrom %d to %d%s:",
11782 env->prev_insn_idx, env->insn_idx,
11783 env->cur_state->speculative ?
11784 " (speculative execution)" : "");
11785 print_verifier_state(env, state->frame[state->curframe], true);
17a52670
AS
11786 do_print_state = false;
11787 }
11788
06ee7115 11789 if (env->log.level & BPF_LOG_LEVEL) {
7105e828 11790 const struct bpf_insn_cbs cbs = {
e6ac2450 11791 .cb_call = disasm_kfunc_name,
7105e828 11792 .cb_print = verbose,
abe08840 11793 .private_data = env,
7105e828
DB
11794 };
11795
2e576648
CL
11796 if (verifier_state_scratched(env))
11797 print_insn_state(env, state->frame[state->curframe]);
11798
c08435ec 11799 verbose_linfo(env, env->insn_idx, "; ");
2e576648 11800 env->prev_log_len = env->log.len_used;
c08435ec 11801 verbose(env, "%d: ", env->insn_idx);
abe08840 11802 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
2e576648
CL
11803 env->prev_insn_print_len = env->log.len_used - env->prev_log_len;
11804 env->prev_log_len = env->log.len_used;
17a52670
AS
11805 }
11806
cae1927c 11807 if (bpf_prog_is_dev_bound(env->prog->aux)) {
c08435ec
DB
11808 err = bpf_prog_offload_verify_insn(env, env->insn_idx,
11809 env->prev_insn_idx);
cae1927c
JK
11810 if (err)
11811 return err;
11812 }
13a27dfc 11813
638f5b90 11814 regs = cur_regs(env);
fe9a5ca7 11815 sanitize_mark_insn_seen(env);
b5dc0163 11816 prev_insn_idx = env->insn_idx;
fd978bf7 11817
17a52670 11818 if (class == BPF_ALU || class == BPF_ALU64) {
1be7f75d 11819 err = check_alu_op(env, insn);
17a52670
AS
11820 if (err)
11821 return err;
11822
11823 } else if (class == BPF_LDX) {
3df126f3 11824 enum bpf_reg_type *prev_src_type, src_reg_type;
9bac3d6d
AS
11825
11826 /* check for reserved fields is already done */
11827
17a52670 11828 /* check src operand */
dc503a8a 11829 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
11830 if (err)
11831 return err;
11832
dc503a8a 11833 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
11834 if (err)
11835 return err;
11836
725f9dcd
AS
11837 src_reg_type = regs[insn->src_reg].type;
11838
17a52670
AS
11839 /* check that memory (src_reg + off) is readable,
11840 * the state of dst_reg will be updated by this func
11841 */
c08435ec
DB
11842 err = check_mem_access(env, env->insn_idx, insn->src_reg,
11843 insn->off, BPF_SIZE(insn->code),
11844 BPF_READ, insn->dst_reg, false);
17a52670
AS
11845 if (err)
11846 return err;
11847
c08435ec 11848 prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
3df126f3
JK
11849
11850 if (*prev_src_type == NOT_INIT) {
9bac3d6d
AS
11851 /* saw a valid insn
11852 * dst_reg = *(u32 *)(src_reg + off)
3df126f3 11853 * save type to validate intersecting paths
9bac3d6d 11854 */
3df126f3 11855 *prev_src_type = src_reg_type;
9bac3d6d 11856
c64b7983 11857 } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9bac3d6d
AS
11858 /* ABuser program is trying to use the same insn
11859 * dst_reg = *(u32*) (src_reg + off)
11860 * with different pointer types:
11861 * src_reg == ctx in one branch and
11862 * src_reg == stack|map in some other branch.
11863 * Reject it.
11864 */
61bd5218 11865 verbose(env, "same insn cannot be used with different pointers\n");
9bac3d6d
AS
11866 return -EINVAL;
11867 }
11868
17a52670 11869 } else if (class == BPF_STX) {
3df126f3 11870 enum bpf_reg_type *prev_dst_type, dst_reg_type;
d691f9e8 11871
91c960b0
BJ
11872 if (BPF_MODE(insn->code) == BPF_ATOMIC) {
11873 err = check_atomic(env, env->insn_idx, insn);
17a52670
AS
11874 if (err)
11875 return err;
c08435ec 11876 env->insn_idx++;
17a52670
AS
11877 continue;
11878 }
11879
5ca419f2
BJ
11880 if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
11881 verbose(env, "BPF_STX uses reserved fields\n");
11882 return -EINVAL;
11883 }
11884
17a52670 11885 /* check src1 operand */
dc503a8a 11886 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
11887 if (err)
11888 return err;
11889 /* check src2 operand */
dc503a8a 11890 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
11891 if (err)
11892 return err;
11893
d691f9e8
AS
11894 dst_reg_type = regs[insn->dst_reg].type;
11895
17a52670 11896 /* check that memory (dst_reg + off) is writeable */
c08435ec
DB
11897 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
11898 insn->off, BPF_SIZE(insn->code),
11899 BPF_WRITE, insn->src_reg, false);
17a52670
AS
11900 if (err)
11901 return err;
11902
c08435ec 11903 prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
3df126f3
JK
11904
11905 if (*prev_dst_type == NOT_INIT) {
11906 *prev_dst_type = dst_reg_type;
c64b7983 11907 } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
61bd5218 11908 verbose(env, "same insn cannot be used with different pointers\n");
d691f9e8
AS
11909 return -EINVAL;
11910 }
11911
17a52670
AS
11912 } else if (class == BPF_ST) {
11913 if (BPF_MODE(insn->code) != BPF_MEM ||
11914 insn->src_reg != BPF_REG_0) {
61bd5218 11915 verbose(env, "BPF_ST uses reserved fields\n");
17a52670
AS
11916 return -EINVAL;
11917 }
11918 /* check src operand */
dc503a8a 11919 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
11920 if (err)
11921 return err;
11922
f37a8cb8 11923 if (is_ctx_reg(env, insn->dst_reg)) {
9d2be44a 11924 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
2a159c6f 11925 insn->dst_reg,
c25b2ae1 11926 reg_type_str(env, reg_state(env, insn->dst_reg)->type));
f37a8cb8
DB
11927 return -EACCES;
11928 }
11929
17a52670 11930 /* check that memory (dst_reg + off) is writeable */
c08435ec
DB
11931 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
11932 insn->off, BPF_SIZE(insn->code),
11933 BPF_WRITE, -1, false);
17a52670
AS
11934 if (err)
11935 return err;
11936
092ed096 11937 } else if (class == BPF_JMP || class == BPF_JMP32) {
17a52670
AS
11938 u8 opcode = BPF_OP(insn->code);
11939
2589726d 11940 env->jmps_processed++;
17a52670
AS
11941 if (opcode == BPF_CALL) {
11942 if (BPF_SRC(insn->code) != BPF_K ||
2357672c
KKD
11943 (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
11944 && insn->off != 0) ||
f4d7e40a 11945 (insn->src_reg != BPF_REG_0 &&
e6ac2450
MKL
11946 insn->src_reg != BPF_PSEUDO_CALL &&
11947 insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
092ed096
JW
11948 insn->dst_reg != BPF_REG_0 ||
11949 class == BPF_JMP32) {
61bd5218 11950 verbose(env, "BPF_CALL uses reserved fields\n");
17a52670
AS
11951 return -EINVAL;
11952 }
11953
d83525ca
AS
11954 if (env->cur_state->active_spin_lock &&
11955 (insn->src_reg == BPF_PSEUDO_CALL ||
11956 insn->imm != BPF_FUNC_spin_unlock)) {
11957 verbose(env, "function calls are not allowed while holding a lock\n");
11958 return -EINVAL;
11959 }
f4d7e40a 11960 if (insn->src_reg == BPF_PSEUDO_CALL)
c08435ec 11961 err = check_func_call(env, insn, &env->insn_idx);
e6ac2450 11962 else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
5c073f26 11963 err = check_kfunc_call(env, insn, &env->insn_idx);
f4d7e40a 11964 else
69c087ba 11965 err = check_helper_call(env, insn, &env->insn_idx);
17a52670
AS
11966 if (err)
11967 return err;
17a52670
AS
11968 } else if (opcode == BPF_JA) {
11969 if (BPF_SRC(insn->code) != BPF_K ||
11970 insn->imm != 0 ||
11971 insn->src_reg != BPF_REG_0 ||
092ed096
JW
11972 insn->dst_reg != BPF_REG_0 ||
11973 class == BPF_JMP32) {
61bd5218 11974 verbose(env, "BPF_JA uses reserved fields\n");
17a52670
AS
11975 return -EINVAL;
11976 }
11977
c08435ec 11978 env->insn_idx += insn->off + 1;
17a52670
AS
11979 continue;
11980
11981 } else if (opcode == BPF_EXIT) {
11982 if (BPF_SRC(insn->code) != BPF_K ||
11983 insn->imm != 0 ||
11984 insn->src_reg != BPF_REG_0 ||
092ed096
JW
11985 insn->dst_reg != BPF_REG_0 ||
11986 class == BPF_JMP32) {
61bd5218 11987 verbose(env, "BPF_EXIT uses reserved fields\n");
17a52670
AS
11988 return -EINVAL;
11989 }
11990
d83525ca
AS
11991 if (env->cur_state->active_spin_lock) {
11992 verbose(env, "bpf_spin_unlock is missing\n");
11993 return -EINVAL;
11994 }
11995
f4d7e40a
AS
11996 if (state->curframe) {
11997 /* exit from nested function */
c08435ec 11998 err = prepare_func_exit(env, &env->insn_idx);
f4d7e40a
AS
11999 if (err)
12000 return err;
12001 do_print_state = true;
12002 continue;
12003 }
12004
fd978bf7
JS
12005 err = check_reference_leak(env);
12006 if (err)
12007 return err;
12008
390ee7e2
AS
12009 err = check_return_code(env);
12010 if (err)
12011 return err;
f1bca824 12012process_bpf_exit:
0f55f9ed 12013 mark_verifier_state_scratched(env);
2589726d 12014 update_branch_counts(env, env->cur_state);
b5dc0163 12015 err = pop_stack(env, &prev_insn_idx,
6f8a57cc 12016 &env->insn_idx, pop_log);
638f5b90
AS
12017 if (err < 0) {
12018 if (err != -ENOENT)
12019 return err;
17a52670
AS
12020 break;
12021 } else {
12022 do_print_state = true;
12023 continue;
12024 }
12025 } else {
c08435ec 12026 err = check_cond_jmp_op(env, insn, &env->insn_idx);
17a52670
AS
12027 if (err)
12028 return err;
12029 }
12030 } else if (class == BPF_LD) {
12031 u8 mode = BPF_MODE(insn->code);
12032
12033 if (mode == BPF_ABS || mode == BPF_IND) {
ddd872bc
AS
12034 err = check_ld_abs(env, insn);
12035 if (err)
12036 return err;
12037
17a52670
AS
12038 } else if (mode == BPF_IMM) {
12039 err = check_ld_imm(env, insn);
12040 if (err)
12041 return err;
12042
c08435ec 12043 env->insn_idx++;
fe9a5ca7 12044 sanitize_mark_insn_seen(env);
17a52670 12045 } else {
61bd5218 12046 verbose(env, "invalid BPF_LD mode\n");
17a52670
AS
12047 return -EINVAL;
12048 }
12049 } else {
61bd5218 12050 verbose(env, "unknown insn class %d\n", class);
17a52670
AS
12051 return -EINVAL;
12052 }
12053
c08435ec 12054 env->insn_idx++;
17a52670
AS
12055 }
12056
12057 return 0;
12058}
12059
541c3bad
AN
12060static int find_btf_percpu_datasec(struct btf *btf)
12061{
12062 const struct btf_type *t;
12063 const char *tname;
12064 int i, n;
12065
12066 /*
12067 * Both vmlinux and module each have their own ".data..percpu"
12068 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
12069 * types to look at only module's own BTF types.
12070 */
12071 n = btf_nr_types(btf);
12072 if (btf_is_module(btf))
12073 i = btf_nr_types(btf_vmlinux);
12074 else
12075 i = 1;
12076
12077 for(; i < n; i++) {
12078 t = btf_type_by_id(btf, i);
12079 if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
12080 continue;
12081
12082 tname = btf_name_by_offset(btf, t->name_off);
12083 if (!strcmp(tname, ".data..percpu"))
12084 return i;
12085 }
12086
12087 return -ENOENT;
12088}
12089
4976b718
HL
12090/* replace pseudo btf_id with kernel symbol address */
12091static int check_pseudo_btf_id(struct bpf_verifier_env *env,
12092 struct bpf_insn *insn,
12093 struct bpf_insn_aux_data *aux)
12094{
eaa6bcb7
HL
12095 const struct btf_var_secinfo *vsi;
12096 const struct btf_type *datasec;
541c3bad 12097 struct btf_mod_pair *btf_mod;
4976b718
HL
12098 const struct btf_type *t;
12099 const char *sym_name;
eaa6bcb7 12100 bool percpu = false;
f16e6313 12101 u32 type, id = insn->imm;
541c3bad 12102 struct btf *btf;
f16e6313 12103 s32 datasec_id;
4976b718 12104 u64 addr;
541c3bad 12105 int i, btf_fd, err;
4976b718 12106
541c3bad
AN
12107 btf_fd = insn[1].imm;
12108 if (btf_fd) {
12109 btf = btf_get_by_fd(btf_fd);
12110 if (IS_ERR(btf)) {
12111 verbose(env, "invalid module BTF object FD specified.\n");
12112 return -EINVAL;
12113 }
12114 } else {
12115 if (!btf_vmlinux) {
12116 verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
12117 return -EINVAL;
12118 }
12119 btf = btf_vmlinux;
12120 btf_get(btf);
4976b718
HL
12121 }
12122
541c3bad 12123 t = btf_type_by_id(btf, id);
4976b718
HL
12124 if (!t) {
12125 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
541c3bad
AN
12126 err = -ENOENT;
12127 goto err_put;
4976b718
HL
12128 }
12129
12130 if (!btf_type_is_var(t)) {
541c3bad
AN
12131 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
12132 err = -EINVAL;
12133 goto err_put;
4976b718
HL
12134 }
12135
541c3bad 12136 sym_name = btf_name_by_offset(btf, t->name_off);
4976b718
HL
12137 addr = kallsyms_lookup_name(sym_name);
12138 if (!addr) {
12139 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
12140 sym_name);
541c3bad
AN
12141 err = -ENOENT;
12142 goto err_put;
4976b718
HL
12143 }
12144
541c3bad 12145 datasec_id = find_btf_percpu_datasec(btf);
eaa6bcb7 12146 if (datasec_id > 0) {
541c3bad 12147 datasec = btf_type_by_id(btf, datasec_id);
eaa6bcb7
HL
12148 for_each_vsi(i, datasec, vsi) {
12149 if (vsi->type == id) {
12150 percpu = true;
12151 break;
12152 }
12153 }
12154 }
12155
4976b718
HL
12156 insn[0].imm = (u32)addr;
12157 insn[1].imm = addr >> 32;
12158
12159 type = t->type;
541c3bad 12160 t = btf_type_skip_modifiers(btf, type, NULL);
eaa6bcb7 12161 if (percpu) {
5844101a 12162 aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
541c3bad 12163 aux->btf_var.btf = btf;
eaa6bcb7
HL
12164 aux->btf_var.btf_id = type;
12165 } else if (!btf_type_is_struct(t)) {
4976b718
HL
12166 const struct btf_type *ret;
12167 const char *tname;
12168 u32 tsize;
12169
12170 /* resolve the type size of ksym. */
541c3bad 12171 ret = btf_resolve_size(btf, t, &tsize);
4976b718 12172 if (IS_ERR(ret)) {
541c3bad 12173 tname = btf_name_by_offset(btf, t->name_off);
4976b718
HL
12174 verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
12175 tname, PTR_ERR(ret));
541c3bad
AN
12176 err = -EINVAL;
12177 goto err_put;
4976b718 12178 }
34d3a78c 12179 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
4976b718
HL
12180 aux->btf_var.mem_size = tsize;
12181 } else {
12182 aux->btf_var.reg_type = PTR_TO_BTF_ID;
541c3bad 12183 aux->btf_var.btf = btf;
4976b718
HL
12184 aux->btf_var.btf_id = type;
12185 }
541c3bad
AN
12186
12187 /* check whether we recorded this BTF (and maybe module) already */
12188 for (i = 0; i < env->used_btf_cnt; i++) {
12189 if (env->used_btfs[i].btf == btf) {
12190 btf_put(btf);
12191 return 0;
12192 }
12193 }
12194
12195 if (env->used_btf_cnt >= MAX_USED_BTFS) {
12196 err = -E2BIG;
12197 goto err_put;
12198 }
12199
12200 btf_mod = &env->used_btfs[env->used_btf_cnt];
12201 btf_mod->btf = btf;
12202 btf_mod->module = NULL;
12203
12204 /* if we reference variables from kernel module, bump its refcount */
12205 if (btf_is_module(btf)) {
12206 btf_mod->module = btf_try_get_module(btf);
12207 if (!btf_mod->module) {
12208 err = -ENXIO;
12209 goto err_put;
12210 }
12211 }
12212
12213 env->used_btf_cnt++;
12214
4976b718 12215 return 0;
541c3bad
AN
12216err_put:
12217 btf_put(btf);
12218 return err;
4976b718
HL
12219}
12220
56f668df
MKL
12221static int check_map_prealloc(struct bpf_map *map)
12222{
12223 return (map->map_type != BPF_MAP_TYPE_HASH &&
bcc6b1b7
MKL
12224 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
12225 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
56f668df
MKL
12226 !(map->map_flags & BPF_F_NO_PREALLOC);
12227}
12228
d83525ca
AS
12229static bool is_tracing_prog_type(enum bpf_prog_type type)
12230{
12231 switch (type) {
12232 case BPF_PROG_TYPE_KPROBE:
12233 case BPF_PROG_TYPE_TRACEPOINT:
12234 case BPF_PROG_TYPE_PERF_EVENT:
12235 case BPF_PROG_TYPE_RAW_TRACEPOINT:
12236 return true;
12237 default:
12238 return false;
12239 }
12240}
12241
94dacdbd
TG
12242static bool is_preallocated_map(struct bpf_map *map)
12243{
12244 if (!check_map_prealloc(map))
12245 return false;
12246 if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
12247 return false;
12248 return true;
12249}
12250
61bd5218
JK
12251static int check_map_prog_compatibility(struct bpf_verifier_env *env,
12252 struct bpf_map *map,
fdc15d38
AS
12253 struct bpf_prog *prog)
12254
12255{
7e40781c 12256 enum bpf_prog_type prog_type = resolve_prog_type(prog);
94dacdbd
TG
12257 /*
12258 * Validate that trace type programs use preallocated hash maps.
12259 *
12260 * For programs attached to PERF events this is mandatory as the
12261 * perf NMI can hit any arbitrary code sequence.
12262 *
12263 * All other trace types using preallocated hash maps are unsafe as
12264 * well because tracepoint or kprobes can be inside locked regions
12265 * of the memory allocator or at a place where a recursion into the
12266 * memory allocator would see inconsistent state.
12267 *
2ed905c5
TG
12268 * On RT enabled kernels run-time allocation of all trace type
12269 * programs is strictly prohibited due to lock type constraints. On
12270 * !RT kernels it is allowed for backwards compatibility reasons for
12271 * now, but warnings are emitted so developers are made aware of
12272 * the unsafety and can fix their programs before this is enforced.
56f668df 12273 */
7e40781c
UP
12274 if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
12275 if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
61bd5218 12276 verbose(env, "perf_event programs can only use preallocated hash map\n");
56f668df
MKL
12277 return -EINVAL;
12278 }
2ed905c5
TG
12279 if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
12280 verbose(env, "trace type programs can only use preallocated hash map\n");
12281 return -EINVAL;
12282 }
94dacdbd
TG
12283 WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
12284 verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
fdc15d38 12285 }
a3884572 12286
9e7a4d98
KS
12287 if (map_value_has_spin_lock(map)) {
12288 if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
12289 verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
12290 return -EINVAL;
12291 }
12292
12293 if (is_tracing_prog_type(prog_type)) {
12294 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
12295 return -EINVAL;
12296 }
12297
12298 if (prog->aux->sleepable) {
12299 verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n");
12300 return -EINVAL;
12301 }
d83525ca
AS
12302 }
12303
5e0bc308
DB
12304 if (map_value_has_timer(map)) {
12305 if (is_tracing_prog_type(prog_type)) {
12306 verbose(env, "tracing progs cannot use bpf_timer yet\n");
12307 return -EINVAL;
12308 }
12309 }
12310
a3884572 12311 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
09728266 12312 !bpf_offload_prog_map_match(prog, map)) {
a3884572
JK
12313 verbose(env, "offload device mismatch between prog and map\n");
12314 return -EINVAL;
12315 }
12316
85d33df3
MKL
12317 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
12318 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
12319 return -EINVAL;
12320 }
12321
1e6c62a8
AS
12322 if (prog->aux->sleepable)
12323 switch (map->map_type) {
12324 case BPF_MAP_TYPE_HASH:
12325 case BPF_MAP_TYPE_LRU_HASH:
12326 case BPF_MAP_TYPE_ARRAY:
638e4b82
AS
12327 case BPF_MAP_TYPE_PERCPU_HASH:
12328 case BPF_MAP_TYPE_PERCPU_ARRAY:
12329 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
12330 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
12331 case BPF_MAP_TYPE_HASH_OF_MAPS:
1e6c62a8
AS
12332 if (!is_preallocated_map(map)) {
12333 verbose(env,
638e4b82 12334 "Sleepable programs can only use preallocated maps\n");
1e6c62a8
AS
12335 return -EINVAL;
12336 }
12337 break;
ba90c2cc 12338 case BPF_MAP_TYPE_RINGBUF:
0fe4b381
KS
12339 case BPF_MAP_TYPE_INODE_STORAGE:
12340 case BPF_MAP_TYPE_SK_STORAGE:
12341 case BPF_MAP_TYPE_TASK_STORAGE:
ba90c2cc 12342 break;
1e6c62a8
AS
12343 default:
12344 verbose(env,
ba90c2cc 12345 "Sleepable programs can only use array, hash, and ringbuf maps\n");
1e6c62a8
AS
12346 return -EINVAL;
12347 }
12348
fdc15d38
AS
12349 return 0;
12350}
12351
b741f163
RG
12352static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
12353{
12354 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
12355 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
12356}
12357
4976b718
HL
12358/* find and rewrite pseudo imm in ld_imm64 instructions:
12359 *
12360 * 1. if it accesses map FD, replace it with actual map pointer.
12361 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
12362 *
12363 * NOTE: btf_vmlinux is required for converting pseudo btf_id.
0246e64d 12364 */
4976b718 12365static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
0246e64d
AS
12366{
12367 struct bpf_insn *insn = env->prog->insnsi;
12368 int insn_cnt = env->prog->len;
fdc15d38 12369 int i, j, err;
0246e64d 12370
f1f7714e 12371 err = bpf_prog_calc_tag(env->prog);
aafe6ae9
DB
12372 if (err)
12373 return err;
12374
0246e64d 12375 for (i = 0; i < insn_cnt; i++, insn++) {
9bac3d6d 12376 if (BPF_CLASS(insn->code) == BPF_LDX &&
d691f9e8 12377 (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
61bd5218 12378 verbose(env, "BPF_LDX uses reserved fields\n");
d691f9e8
AS
12379 return -EINVAL;
12380 }
12381
0246e64d 12382 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
d8eca5bb 12383 struct bpf_insn_aux_data *aux;
0246e64d
AS
12384 struct bpf_map *map;
12385 struct fd f;
d8eca5bb 12386 u64 addr;
387544bf 12387 u32 fd;
0246e64d
AS
12388
12389 if (i == insn_cnt - 1 || insn[1].code != 0 ||
12390 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
12391 insn[1].off != 0) {
61bd5218 12392 verbose(env, "invalid bpf_ld_imm64 insn\n");
0246e64d
AS
12393 return -EINVAL;
12394 }
12395
d8eca5bb 12396 if (insn[0].src_reg == 0)
0246e64d
AS
12397 /* valid generic load 64-bit imm */
12398 goto next_insn;
12399
4976b718
HL
12400 if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
12401 aux = &env->insn_aux_data[i];
12402 err = check_pseudo_btf_id(env, insn, aux);
12403 if (err)
12404 return err;
12405 goto next_insn;
12406 }
12407
69c087ba
YS
12408 if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
12409 aux = &env->insn_aux_data[i];
12410 aux->ptr_type = PTR_TO_FUNC;
12411 goto next_insn;
12412 }
12413
d8eca5bb
DB
12414 /* In final convert_pseudo_ld_imm64() step, this is
12415 * converted into regular 64-bit imm load insn.
12416 */
387544bf
AS
12417 switch (insn[0].src_reg) {
12418 case BPF_PSEUDO_MAP_VALUE:
12419 case BPF_PSEUDO_MAP_IDX_VALUE:
12420 break;
12421 case BPF_PSEUDO_MAP_FD:
12422 case BPF_PSEUDO_MAP_IDX:
12423 if (insn[1].imm == 0)
12424 break;
12425 fallthrough;
12426 default:
12427 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
0246e64d
AS
12428 return -EINVAL;
12429 }
12430
387544bf
AS
12431 switch (insn[0].src_reg) {
12432 case BPF_PSEUDO_MAP_IDX_VALUE:
12433 case BPF_PSEUDO_MAP_IDX:
12434 if (bpfptr_is_null(env->fd_array)) {
12435 verbose(env, "fd_idx without fd_array is invalid\n");
12436 return -EPROTO;
12437 }
12438 if (copy_from_bpfptr_offset(&fd, env->fd_array,
12439 insn[0].imm * sizeof(fd),
12440 sizeof(fd)))
12441 return -EFAULT;
12442 break;
12443 default:
12444 fd = insn[0].imm;
12445 break;
12446 }
12447
12448 f = fdget(fd);
c2101297 12449 map = __bpf_map_get(f);
0246e64d 12450 if (IS_ERR(map)) {
61bd5218 12451 verbose(env, "fd %d is not pointing to valid bpf_map\n",
20182390 12452 insn[0].imm);
0246e64d
AS
12453 return PTR_ERR(map);
12454 }
12455
61bd5218 12456 err = check_map_prog_compatibility(env, map, env->prog);
fdc15d38
AS
12457 if (err) {
12458 fdput(f);
12459 return err;
12460 }
12461
d8eca5bb 12462 aux = &env->insn_aux_data[i];
387544bf
AS
12463 if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
12464 insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
d8eca5bb
DB
12465 addr = (unsigned long)map;
12466 } else {
12467 u32 off = insn[1].imm;
12468
12469 if (off >= BPF_MAX_VAR_OFF) {
12470 verbose(env, "direct value offset of %u is not allowed\n", off);
12471 fdput(f);
12472 return -EINVAL;
12473 }
12474
12475 if (!map->ops->map_direct_value_addr) {
12476 verbose(env, "no direct value access support for this map type\n");
12477 fdput(f);
12478 return -EINVAL;
12479 }
12480
12481 err = map->ops->map_direct_value_addr(map, &addr, off);
12482 if (err) {
12483 verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
12484 map->value_size, off);
12485 fdput(f);
12486 return err;
12487 }
12488
12489 aux->map_off = off;
12490 addr += off;
12491 }
12492
12493 insn[0].imm = (u32)addr;
12494 insn[1].imm = addr >> 32;
0246e64d
AS
12495
12496 /* check whether we recorded this map already */
d8eca5bb 12497 for (j = 0; j < env->used_map_cnt; j++) {
0246e64d 12498 if (env->used_maps[j] == map) {
d8eca5bb 12499 aux->map_index = j;
0246e64d
AS
12500 fdput(f);
12501 goto next_insn;
12502 }
d8eca5bb 12503 }
0246e64d
AS
12504
12505 if (env->used_map_cnt >= MAX_USED_MAPS) {
12506 fdput(f);
12507 return -E2BIG;
12508 }
12509
0246e64d
AS
12510 /* hold the map. If the program is rejected by verifier,
12511 * the map will be released by release_maps() or it
12512 * will be used by the valid program until it's unloaded
ab7f5bf0 12513 * and all maps are released in free_used_maps()
0246e64d 12514 */
1e0bd5a0 12515 bpf_map_inc(map);
d8eca5bb
DB
12516
12517 aux->map_index = env->used_map_cnt;
92117d84
AS
12518 env->used_maps[env->used_map_cnt++] = map;
12519
b741f163 12520 if (bpf_map_is_cgroup_storage(map) &&
e4730423 12521 bpf_cgroup_storage_assign(env->prog->aux, map)) {
b741f163 12522 verbose(env, "only one cgroup storage of each type is allowed\n");
de9cbbaa
RG
12523 fdput(f);
12524 return -EBUSY;
12525 }
12526
0246e64d
AS
12527 fdput(f);
12528next_insn:
12529 insn++;
12530 i++;
5e581dad
DB
12531 continue;
12532 }
12533
12534 /* Basic sanity check before we invest more work here. */
12535 if (!bpf_opcode_in_insntable(insn->code)) {
12536 verbose(env, "unknown opcode %02x\n", insn->code);
12537 return -EINVAL;
0246e64d
AS
12538 }
12539 }
12540
12541 /* now all pseudo BPF_LD_IMM64 instructions load valid
12542 * 'struct bpf_map *' into a register instead of user map_fd.
12543 * These pointers will be used later by verifier to validate map access.
12544 */
12545 return 0;
12546}
12547
12548/* drop refcnt of maps used by the rejected program */
58e2af8b 12549static void release_maps(struct bpf_verifier_env *env)
0246e64d 12550{
a2ea0746
DB
12551 __bpf_free_used_maps(env->prog->aux, env->used_maps,
12552 env->used_map_cnt);
0246e64d
AS
12553}
12554
541c3bad
AN
12555/* drop refcnt of maps used by the rejected program */
12556static void release_btfs(struct bpf_verifier_env *env)
12557{
12558 __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
12559 env->used_btf_cnt);
12560}
12561
0246e64d 12562/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
58e2af8b 12563static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
0246e64d
AS
12564{
12565 struct bpf_insn *insn = env->prog->insnsi;
12566 int insn_cnt = env->prog->len;
12567 int i;
12568
69c087ba
YS
12569 for (i = 0; i < insn_cnt; i++, insn++) {
12570 if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
12571 continue;
12572 if (insn->src_reg == BPF_PSEUDO_FUNC)
12573 continue;
12574 insn->src_reg = 0;
12575 }
0246e64d
AS
12576}
12577
8041902d
AS
12578/* single env->prog->insni[off] instruction was replaced with the range
12579 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
12580 * [0, off) and [off, end) to new locations, so the patched range stays zero
12581 */
75f0fc7b
HF
12582static void adjust_insn_aux_data(struct bpf_verifier_env *env,
12583 struct bpf_insn_aux_data *new_data,
12584 struct bpf_prog *new_prog, u32 off, u32 cnt)
8041902d 12585{
75f0fc7b 12586 struct bpf_insn_aux_data *old_data = env->insn_aux_data;
b325fbca 12587 struct bpf_insn *insn = new_prog->insnsi;
d203b0fd 12588 u32 old_seen = old_data[off].seen;
b325fbca 12589 u32 prog_len;
c131187d 12590 int i;
8041902d 12591
b325fbca
JW
12592 /* aux info at OFF always needs adjustment, no matter fast path
12593 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
12594 * original insn at old prog.
12595 */
12596 old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
12597
8041902d 12598 if (cnt == 1)
75f0fc7b 12599 return;
b325fbca 12600 prog_len = new_prog->len;
75f0fc7b 12601
8041902d
AS
12602 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
12603 memcpy(new_data + off + cnt - 1, old_data + off,
12604 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
b325fbca 12605 for (i = off; i < off + cnt - 1; i++) {
d203b0fd
DB
12606 /* Expand insni[off]'s seen count to the patched range. */
12607 new_data[i].seen = old_seen;
b325fbca
JW
12608 new_data[i].zext_dst = insn_has_def32(env, insn + i);
12609 }
8041902d
AS
12610 env->insn_aux_data = new_data;
12611 vfree(old_data);
8041902d
AS
12612}
12613
cc8b0b92
AS
12614static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
12615{
12616 int i;
12617
12618 if (len == 1)
12619 return;
4cb3d99c
JW
12620 /* NOTE: fake 'exit' subprog should be updated as well. */
12621 for (i = 0; i <= env->subprog_cnt; i++) {
afd59424 12622 if (env->subprog_info[i].start <= off)
cc8b0b92 12623 continue;
9c8105bd 12624 env->subprog_info[i].start += len - 1;
cc8b0b92
AS
12625 }
12626}
12627
7506d211 12628static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
a748c697
MF
12629{
12630 struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
12631 int i, sz = prog->aux->size_poke_tab;
12632 struct bpf_jit_poke_descriptor *desc;
12633
12634 for (i = 0; i < sz; i++) {
12635 desc = &tab[i];
7506d211
JF
12636 if (desc->insn_idx <= off)
12637 continue;
a748c697
MF
12638 desc->insn_idx += len - 1;
12639 }
12640}
12641
8041902d
AS
12642static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
12643 const struct bpf_insn *patch, u32 len)
12644{
12645 struct bpf_prog *new_prog;
75f0fc7b
HF
12646 struct bpf_insn_aux_data *new_data = NULL;
12647
12648 if (len > 1) {
12649 new_data = vzalloc(array_size(env->prog->len + len - 1,
12650 sizeof(struct bpf_insn_aux_data)));
12651 if (!new_data)
12652 return NULL;
12653 }
8041902d
AS
12654
12655 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
4f73379e
AS
12656 if (IS_ERR(new_prog)) {
12657 if (PTR_ERR(new_prog) == -ERANGE)
12658 verbose(env,
12659 "insn %d cannot be patched due to 16-bit range\n",
12660 env->insn_aux_data[off].orig_idx);
75f0fc7b 12661 vfree(new_data);
8041902d 12662 return NULL;
4f73379e 12663 }
75f0fc7b 12664 adjust_insn_aux_data(env, new_data, new_prog, off, len);
cc8b0b92 12665 adjust_subprog_starts(env, off, len);
7506d211 12666 adjust_poke_descs(new_prog, off, len);
8041902d
AS
12667 return new_prog;
12668}
12669
52875a04
JK
12670static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
12671 u32 off, u32 cnt)
12672{
12673 int i, j;
12674
12675 /* find first prog starting at or after off (first to remove) */
12676 for (i = 0; i < env->subprog_cnt; i++)
12677 if (env->subprog_info[i].start >= off)
12678 break;
12679 /* find first prog starting at or after off + cnt (first to stay) */
12680 for (j = i; j < env->subprog_cnt; j++)
12681 if (env->subprog_info[j].start >= off + cnt)
12682 break;
12683 /* if j doesn't start exactly at off + cnt, we are just removing
12684 * the front of previous prog
12685 */
12686 if (env->subprog_info[j].start != off + cnt)
12687 j--;
12688
12689 if (j > i) {
12690 struct bpf_prog_aux *aux = env->prog->aux;
12691 int move;
12692
12693 /* move fake 'exit' subprog as well */
12694 move = env->subprog_cnt + 1 - j;
12695
12696 memmove(env->subprog_info + i,
12697 env->subprog_info + j,
12698 sizeof(*env->subprog_info) * move);
12699 env->subprog_cnt -= j - i;
12700
12701 /* remove func_info */
12702 if (aux->func_info) {
12703 move = aux->func_info_cnt - j;
12704
12705 memmove(aux->func_info + i,
12706 aux->func_info + j,
12707 sizeof(*aux->func_info) * move);
12708 aux->func_info_cnt -= j - i;
12709 /* func_info->insn_off is set after all code rewrites,
12710 * in adjust_btf_func() - no need to adjust
12711 */
12712 }
12713 } else {
12714 /* convert i from "first prog to remove" to "first to adjust" */
12715 if (env->subprog_info[i].start == off)
12716 i++;
12717 }
12718
12719 /* update fake 'exit' subprog as well */
12720 for (; i <= env->subprog_cnt; i++)
12721 env->subprog_info[i].start -= cnt;
12722
12723 return 0;
12724}
12725
12726static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
12727 u32 cnt)
12728{
12729 struct bpf_prog *prog = env->prog;
12730 u32 i, l_off, l_cnt, nr_linfo;
12731 struct bpf_line_info *linfo;
12732
12733 nr_linfo = prog->aux->nr_linfo;
12734 if (!nr_linfo)
12735 return 0;
12736
12737 linfo = prog->aux->linfo;
12738
12739 /* find first line info to remove, count lines to be removed */
12740 for (i = 0; i < nr_linfo; i++)
12741 if (linfo[i].insn_off >= off)
12742 break;
12743
12744 l_off = i;
12745 l_cnt = 0;
12746 for (; i < nr_linfo; i++)
12747 if (linfo[i].insn_off < off + cnt)
12748 l_cnt++;
12749 else
12750 break;
12751
12752 /* First live insn doesn't match first live linfo, it needs to "inherit"
12753 * last removed linfo. prog is already modified, so prog->len == off
12754 * means no live instructions after (tail of the program was removed).
12755 */
12756 if (prog->len != off && l_cnt &&
12757 (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
12758 l_cnt--;
12759 linfo[--i].insn_off = off + cnt;
12760 }
12761
12762 /* remove the line info which refer to the removed instructions */
12763 if (l_cnt) {
12764 memmove(linfo + l_off, linfo + i,
12765 sizeof(*linfo) * (nr_linfo - i));
12766
12767 prog->aux->nr_linfo -= l_cnt;
12768 nr_linfo = prog->aux->nr_linfo;
12769 }
12770
12771 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
12772 for (i = l_off; i < nr_linfo; i++)
12773 linfo[i].insn_off -= cnt;
12774
12775 /* fix up all subprogs (incl. 'exit') which start >= off */
12776 for (i = 0; i <= env->subprog_cnt; i++)
12777 if (env->subprog_info[i].linfo_idx > l_off) {
12778 /* program may have started in the removed region but
12779 * may not be fully removed
12780 */
12781 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
12782 env->subprog_info[i].linfo_idx -= l_cnt;
12783 else
12784 env->subprog_info[i].linfo_idx = l_off;
12785 }
12786
12787 return 0;
12788}
12789
12790static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
12791{
12792 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12793 unsigned int orig_prog_len = env->prog->len;
12794 int err;
12795
08ca90af
JK
12796 if (bpf_prog_is_dev_bound(env->prog->aux))
12797 bpf_prog_offload_remove_insns(env, off, cnt);
12798
52875a04
JK
12799 err = bpf_remove_insns(env->prog, off, cnt);
12800 if (err)
12801 return err;
12802
12803 err = adjust_subprog_starts_after_remove(env, off, cnt);
12804 if (err)
12805 return err;
12806
12807 err = bpf_adj_linfo_after_remove(env, off, cnt);
12808 if (err)
12809 return err;
12810
12811 memmove(aux_data + off, aux_data + off + cnt,
12812 sizeof(*aux_data) * (orig_prog_len - off - cnt));
12813
12814 return 0;
12815}
12816
2a5418a1
DB
12817/* The verifier does more data flow analysis than llvm and will not
12818 * explore branches that are dead at run time. Malicious programs can
12819 * have dead code too. Therefore replace all dead at-run-time code
12820 * with 'ja -1'.
12821 *
12822 * Just nops are not optimal, e.g. if they would sit at the end of the
12823 * program and through another bug we would manage to jump there, then
12824 * we'd execute beyond program memory otherwise. Returning exception
12825 * code also wouldn't work since we can have subprogs where the dead
12826 * code could be located.
c131187d
AS
12827 */
12828static void sanitize_dead_code(struct bpf_verifier_env *env)
12829{
12830 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
2a5418a1 12831 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
c131187d
AS
12832 struct bpf_insn *insn = env->prog->insnsi;
12833 const int insn_cnt = env->prog->len;
12834 int i;
12835
12836 for (i = 0; i < insn_cnt; i++) {
12837 if (aux_data[i].seen)
12838 continue;
2a5418a1 12839 memcpy(insn + i, &trap, sizeof(trap));
45c709f8 12840 aux_data[i].zext_dst = false;
c131187d
AS
12841 }
12842}
12843
e2ae4ca2
JK
12844static bool insn_is_cond_jump(u8 code)
12845{
12846 u8 op;
12847
092ed096
JW
12848 if (BPF_CLASS(code) == BPF_JMP32)
12849 return true;
12850
e2ae4ca2
JK
12851 if (BPF_CLASS(code) != BPF_JMP)
12852 return false;
12853
12854 op = BPF_OP(code);
12855 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
12856}
12857
12858static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
12859{
12860 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12861 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
12862 struct bpf_insn *insn = env->prog->insnsi;
12863 const int insn_cnt = env->prog->len;
12864 int i;
12865
12866 for (i = 0; i < insn_cnt; i++, insn++) {
12867 if (!insn_is_cond_jump(insn->code))
12868 continue;
12869
12870 if (!aux_data[i + 1].seen)
12871 ja.off = insn->off;
12872 else if (!aux_data[i + 1 + insn->off].seen)
12873 ja.off = 0;
12874 else
12875 continue;
12876
08ca90af
JK
12877 if (bpf_prog_is_dev_bound(env->prog->aux))
12878 bpf_prog_offload_replace_insn(env, i, &ja);
12879
e2ae4ca2
JK
12880 memcpy(insn, &ja, sizeof(ja));
12881 }
12882}
12883
52875a04
JK
12884static int opt_remove_dead_code(struct bpf_verifier_env *env)
12885{
12886 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12887 int insn_cnt = env->prog->len;
12888 int i, err;
12889
12890 for (i = 0; i < insn_cnt; i++) {
12891 int j;
12892
12893 j = 0;
12894 while (i + j < insn_cnt && !aux_data[i + j].seen)
12895 j++;
12896 if (!j)
12897 continue;
12898
12899 err = verifier_remove_insns(env, i, j);
12900 if (err)
12901 return err;
12902 insn_cnt = env->prog->len;
12903 }
12904
12905 return 0;
12906}
12907
a1b14abc
JK
12908static int opt_remove_nops(struct bpf_verifier_env *env)
12909{
12910 const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
12911 struct bpf_insn *insn = env->prog->insnsi;
12912 int insn_cnt = env->prog->len;
12913 int i, err;
12914
12915 for (i = 0; i < insn_cnt; i++) {
12916 if (memcmp(&insn[i], &ja, sizeof(ja)))
12917 continue;
12918
12919 err = verifier_remove_insns(env, i, 1);
12920 if (err)
12921 return err;
12922 insn_cnt--;
12923 i--;
12924 }
12925
12926 return 0;
12927}
12928
d6c2308c
JW
12929static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
12930 const union bpf_attr *attr)
a4b1d3c1 12931{
d6c2308c 12932 struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
a4b1d3c1 12933 struct bpf_insn_aux_data *aux = env->insn_aux_data;
d6c2308c 12934 int i, patch_len, delta = 0, len = env->prog->len;
a4b1d3c1 12935 struct bpf_insn *insns = env->prog->insnsi;
a4b1d3c1 12936 struct bpf_prog *new_prog;
d6c2308c 12937 bool rnd_hi32;
a4b1d3c1 12938
d6c2308c 12939 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
a4b1d3c1 12940 zext_patch[1] = BPF_ZEXT_REG(0);
d6c2308c
JW
12941 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
12942 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
12943 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
a4b1d3c1
JW
12944 for (i = 0; i < len; i++) {
12945 int adj_idx = i + delta;
12946 struct bpf_insn insn;
83a28819 12947 int load_reg;
a4b1d3c1 12948
d6c2308c 12949 insn = insns[adj_idx];
83a28819 12950 load_reg = insn_def_regno(&insn);
d6c2308c
JW
12951 if (!aux[adj_idx].zext_dst) {
12952 u8 code, class;
12953 u32 imm_rnd;
12954
12955 if (!rnd_hi32)
12956 continue;
12957
12958 code = insn.code;
12959 class = BPF_CLASS(code);
83a28819 12960 if (load_reg == -1)
d6c2308c
JW
12961 continue;
12962
12963 /* NOTE: arg "reg" (the fourth one) is only used for
83a28819
IL
12964 * BPF_STX + SRC_OP, so it is safe to pass NULL
12965 * here.
d6c2308c 12966 */
83a28819 12967 if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
d6c2308c
JW
12968 if (class == BPF_LD &&
12969 BPF_MODE(code) == BPF_IMM)
12970 i++;
12971 continue;
12972 }
12973
12974 /* ctx load could be transformed into wider load. */
12975 if (class == BPF_LDX &&
12976 aux[adj_idx].ptr_type == PTR_TO_CTX)
12977 continue;
12978
12979 imm_rnd = get_random_int();
12980 rnd_hi32_patch[0] = insn;
12981 rnd_hi32_patch[1].imm = imm_rnd;
83a28819 12982 rnd_hi32_patch[3].dst_reg = load_reg;
d6c2308c
JW
12983 patch = rnd_hi32_patch;
12984 patch_len = 4;
12985 goto apply_patch_buffer;
12986 }
12987
39491867
BJ
12988 /* Add in an zero-extend instruction if a) the JIT has requested
12989 * it or b) it's a CMPXCHG.
12990 *
12991 * The latter is because: BPF_CMPXCHG always loads a value into
12992 * R0, therefore always zero-extends. However some archs'
12993 * equivalent instruction only does this load when the
12994 * comparison is successful. This detail of CMPXCHG is
12995 * orthogonal to the general zero-extension behaviour of the
12996 * CPU, so it's treated independently of bpf_jit_needs_zext.
12997 */
12998 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
a4b1d3c1
JW
12999 continue;
13000
83a28819
IL
13001 if (WARN_ON(load_reg == -1)) {
13002 verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
13003 return -EFAULT;
b2e37a71
IL
13004 }
13005
a4b1d3c1 13006 zext_patch[0] = insn;
b2e37a71
IL
13007 zext_patch[1].dst_reg = load_reg;
13008 zext_patch[1].src_reg = load_reg;
d6c2308c
JW
13009 patch = zext_patch;
13010 patch_len = 2;
13011apply_patch_buffer:
13012 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
a4b1d3c1
JW
13013 if (!new_prog)
13014 return -ENOMEM;
13015 env->prog = new_prog;
13016 insns = new_prog->insnsi;
13017 aux = env->insn_aux_data;
d6c2308c 13018 delta += patch_len - 1;
a4b1d3c1
JW
13019 }
13020
13021 return 0;
13022}
13023
c64b7983
JS
13024/* convert load instructions that access fields of a context type into a
13025 * sequence of instructions that access fields of the underlying structure:
13026 * struct __sk_buff -> struct sk_buff
13027 * struct bpf_sock_ops -> struct sock
9bac3d6d 13028 */
58e2af8b 13029static int convert_ctx_accesses(struct bpf_verifier_env *env)
9bac3d6d 13030{
00176a34 13031 const struct bpf_verifier_ops *ops = env->ops;
f96da094 13032 int i, cnt, size, ctx_field_size, delta = 0;
3df126f3 13033 const int insn_cnt = env->prog->len;
36bbef52 13034 struct bpf_insn insn_buf[16], *insn;
46f53a65 13035 u32 target_size, size_default, off;
9bac3d6d 13036 struct bpf_prog *new_prog;
d691f9e8 13037 enum bpf_access_type type;
f96da094 13038 bool is_narrower_load;
9bac3d6d 13039
b09928b9
DB
13040 if (ops->gen_prologue || env->seen_direct_write) {
13041 if (!ops->gen_prologue) {
13042 verbose(env, "bpf verifier is misconfigured\n");
13043 return -EINVAL;
13044 }
36bbef52
DB
13045 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
13046 env->prog);
13047 if (cnt >= ARRAY_SIZE(insn_buf)) {
61bd5218 13048 verbose(env, "bpf verifier is misconfigured\n");
36bbef52
DB
13049 return -EINVAL;
13050 } else if (cnt) {
8041902d 13051 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
36bbef52
DB
13052 if (!new_prog)
13053 return -ENOMEM;
8041902d 13054
36bbef52 13055 env->prog = new_prog;
3df126f3 13056 delta += cnt - 1;
36bbef52
DB
13057 }
13058 }
13059
c64b7983 13060 if (bpf_prog_is_dev_bound(env->prog->aux))
9bac3d6d
AS
13061 return 0;
13062
3df126f3 13063 insn = env->prog->insnsi + delta;
36bbef52 13064
9bac3d6d 13065 for (i = 0; i < insn_cnt; i++, insn++) {
c64b7983 13066 bpf_convert_ctx_access_t convert_ctx_access;
2039f26f 13067 bool ctx_access;
c64b7983 13068
62c7989b
DB
13069 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
13070 insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
13071 insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
2039f26f 13072 insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
d691f9e8 13073 type = BPF_READ;
2039f26f
DB
13074 ctx_access = true;
13075 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
13076 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
13077 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
13078 insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
13079 insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
13080 insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
13081 insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
13082 insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
d691f9e8 13083 type = BPF_WRITE;
2039f26f
DB
13084 ctx_access = BPF_CLASS(insn->code) == BPF_STX;
13085 } else {
9bac3d6d 13086 continue;
2039f26f 13087 }
9bac3d6d 13088
af86ca4e 13089 if (type == BPF_WRITE &&
2039f26f 13090 env->insn_aux_data[i + delta].sanitize_stack_spill) {
af86ca4e 13091 struct bpf_insn patch[] = {
af86ca4e 13092 *insn,
2039f26f 13093 BPF_ST_NOSPEC(),
af86ca4e
AS
13094 };
13095
13096 cnt = ARRAY_SIZE(patch);
13097 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
13098 if (!new_prog)
13099 return -ENOMEM;
13100
13101 delta += cnt - 1;
13102 env->prog = new_prog;
13103 insn = new_prog->insnsi + i + delta;
13104 continue;
13105 }
13106
2039f26f
DB
13107 if (!ctx_access)
13108 continue;
13109
6efe152d 13110 switch ((int)env->insn_aux_data[i + delta].ptr_type) {
c64b7983
JS
13111 case PTR_TO_CTX:
13112 if (!ops->convert_ctx_access)
13113 continue;
13114 convert_ctx_access = ops->convert_ctx_access;
13115 break;
13116 case PTR_TO_SOCKET:
46f8bc92 13117 case PTR_TO_SOCK_COMMON:
c64b7983
JS
13118 convert_ctx_access = bpf_sock_convert_ctx_access;
13119 break;
655a51e5
MKL
13120 case PTR_TO_TCP_SOCK:
13121 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
13122 break;
fada7fdc
JL
13123 case PTR_TO_XDP_SOCK:
13124 convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
13125 break;
2a02759e 13126 case PTR_TO_BTF_ID:
6efe152d 13127 case PTR_TO_BTF_ID | PTR_UNTRUSTED:
27ae7997
MKL
13128 if (type == BPF_READ) {
13129 insn->code = BPF_LDX | BPF_PROBE_MEM |
13130 BPF_SIZE((insn)->code);
13131 env->prog->aux->num_exentries++;
7e40781c 13132 } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
2a02759e
AS
13133 verbose(env, "Writes through BTF pointers are not allowed\n");
13134 return -EINVAL;
13135 }
2a02759e 13136 continue;
c64b7983 13137 default:
9bac3d6d 13138 continue;
c64b7983 13139 }
9bac3d6d 13140
31fd8581 13141 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
f96da094 13142 size = BPF_LDST_BYTES(insn);
31fd8581
YS
13143
13144 /* If the read access is a narrower load of the field,
13145 * convert to a 4/8-byte load, to minimum program type specific
13146 * convert_ctx_access changes. If conversion is successful,
13147 * we will apply proper mask to the result.
13148 */
f96da094 13149 is_narrower_load = size < ctx_field_size;
46f53a65
AI
13150 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
13151 off = insn->off;
31fd8581 13152 if (is_narrower_load) {
f96da094
DB
13153 u8 size_code;
13154
13155 if (type == BPF_WRITE) {
61bd5218 13156 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
f96da094
DB
13157 return -EINVAL;
13158 }
31fd8581 13159
f96da094 13160 size_code = BPF_H;
31fd8581
YS
13161 if (ctx_field_size == 4)
13162 size_code = BPF_W;
13163 else if (ctx_field_size == 8)
13164 size_code = BPF_DW;
f96da094 13165
bc23105c 13166 insn->off = off & ~(size_default - 1);
31fd8581
YS
13167 insn->code = BPF_LDX | BPF_MEM | size_code;
13168 }
f96da094
DB
13169
13170 target_size = 0;
c64b7983
JS
13171 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
13172 &target_size);
f96da094
DB
13173 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
13174 (ctx_field_size && !target_size)) {
61bd5218 13175 verbose(env, "bpf verifier is misconfigured\n");
9bac3d6d
AS
13176 return -EINVAL;
13177 }
f96da094
DB
13178
13179 if (is_narrower_load && size < target_size) {
d895a0f1
IL
13180 u8 shift = bpf_ctx_narrow_access_offset(
13181 off, size, size_default) * 8;
d7af7e49
AI
13182 if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
13183 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
13184 return -EINVAL;
13185 }
46f53a65
AI
13186 if (ctx_field_size <= 4) {
13187 if (shift)
13188 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
13189 insn->dst_reg,
13190 shift);
31fd8581 13191 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
f96da094 13192 (1 << size * 8) - 1);
46f53a65
AI
13193 } else {
13194 if (shift)
13195 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
13196 insn->dst_reg,
13197 shift);
31fd8581 13198 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
e2f7fc0a 13199 (1ULL << size * 8) - 1);
46f53a65 13200 }
31fd8581 13201 }
9bac3d6d 13202
8041902d 13203 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9bac3d6d
AS
13204 if (!new_prog)
13205 return -ENOMEM;
13206
3df126f3 13207 delta += cnt - 1;
9bac3d6d
AS
13208
13209 /* keep walking new program and skip insns we just inserted */
13210 env->prog = new_prog;
3df126f3 13211 insn = new_prog->insnsi + i + delta;
9bac3d6d
AS
13212 }
13213
13214 return 0;
13215}
13216
1c2a088a
AS
13217static int jit_subprogs(struct bpf_verifier_env *env)
13218{
13219 struct bpf_prog *prog = env->prog, **func, *tmp;
13220 int i, j, subprog_start, subprog_end = 0, len, subprog;
a748c697 13221 struct bpf_map *map_ptr;
7105e828 13222 struct bpf_insn *insn;
1c2a088a 13223 void *old_bpf_func;
c4c0bdc0 13224 int err, num_exentries;
1c2a088a 13225
f910cefa 13226 if (env->subprog_cnt <= 1)
1c2a088a
AS
13227 return 0;
13228
7105e828 13229 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
3990ed4c 13230 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
69c087ba 13231 continue;
69c087ba 13232
c7a89784
DB
13233 /* Upon error here we cannot fall back to interpreter but
13234 * need a hard reject of the program. Thus -EFAULT is
13235 * propagated in any case.
13236 */
1c2a088a
AS
13237 subprog = find_subprog(env, i + insn->imm + 1);
13238 if (subprog < 0) {
13239 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
13240 i + insn->imm + 1);
13241 return -EFAULT;
13242 }
13243 /* temporarily remember subprog id inside insn instead of
13244 * aux_data, since next loop will split up all insns into funcs
13245 */
f910cefa 13246 insn->off = subprog;
1c2a088a
AS
13247 /* remember original imm in case JIT fails and fallback
13248 * to interpreter will be needed
13249 */
13250 env->insn_aux_data[i].call_imm = insn->imm;
13251 /* point imm to __bpf_call_base+1 from JITs point of view */
13252 insn->imm = 1;
3990ed4c
MKL
13253 if (bpf_pseudo_func(insn))
13254 /* jit (e.g. x86_64) may emit fewer instructions
13255 * if it learns a u32 imm is the same as a u64 imm.
13256 * Force a non zero here.
13257 */
13258 insn[1].imm = 1;
1c2a088a
AS
13259 }
13260
c454a46b
MKL
13261 err = bpf_prog_alloc_jited_linfo(prog);
13262 if (err)
13263 goto out_undo_insn;
13264
13265 err = -ENOMEM;
6396bb22 13266 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
1c2a088a 13267 if (!func)
c7a89784 13268 goto out_undo_insn;
1c2a088a 13269
f910cefa 13270 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a 13271 subprog_start = subprog_end;
4cb3d99c 13272 subprog_end = env->subprog_info[i + 1].start;
1c2a088a
AS
13273
13274 len = subprog_end - subprog_start;
fb7dd8bc 13275 /* bpf_prog_run() doesn't call subprogs directly,
492ecee8
AS
13276 * hence main prog stats include the runtime of subprogs.
13277 * subprogs don't have IDs and not reachable via prog_get_next_id
700d4796 13278 * func[i]->stats will never be accessed and stays NULL
492ecee8
AS
13279 */
13280 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
1c2a088a
AS
13281 if (!func[i])
13282 goto out_free;
13283 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
13284 len * sizeof(struct bpf_insn));
4f74d809 13285 func[i]->type = prog->type;
1c2a088a 13286 func[i]->len = len;
4f74d809
DB
13287 if (bpf_prog_calc_tag(func[i]))
13288 goto out_free;
1c2a088a 13289 func[i]->is_func = 1;
ba64e7d8 13290 func[i]->aux->func_idx = i;
f263a814 13291 /* Below members will be freed only at prog->aux */
ba64e7d8
YS
13292 func[i]->aux->btf = prog->aux->btf;
13293 func[i]->aux->func_info = prog->aux->func_info;
f263a814
JF
13294 func[i]->aux->poke_tab = prog->aux->poke_tab;
13295 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
ba64e7d8 13296
a748c697 13297 for (j = 0; j < prog->aux->size_poke_tab; j++) {
f263a814 13298 struct bpf_jit_poke_descriptor *poke;
a748c697 13299
f263a814
JF
13300 poke = &prog->aux->poke_tab[j];
13301 if (poke->insn_idx < subprog_end &&
13302 poke->insn_idx >= subprog_start)
13303 poke->aux = func[i]->aux;
a748c697
MF
13304 }
13305
1c2a088a
AS
13306 /* Use bpf_prog_F_tag to indicate functions in stack traces.
13307 * Long term would need debug info to populate names
13308 */
13309 func[i]->aux->name[0] = 'F';
9c8105bd 13310 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
1c2a088a 13311 func[i]->jit_requested = 1;
d2a3b7c5 13312 func[i]->blinding_requested = prog->blinding_requested;
e6ac2450 13313 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
2357672c 13314 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
c454a46b
MKL
13315 func[i]->aux->linfo = prog->aux->linfo;
13316 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
13317 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
13318 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
c4c0bdc0
YS
13319 num_exentries = 0;
13320 insn = func[i]->insnsi;
13321 for (j = 0; j < func[i]->len; j++, insn++) {
13322 if (BPF_CLASS(insn->code) == BPF_LDX &&
13323 BPF_MODE(insn->code) == BPF_PROBE_MEM)
13324 num_exentries++;
13325 }
13326 func[i]->aux->num_exentries = num_exentries;
ebf7d1f5 13327 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
1c2a088a
AS
13328 func[i] = bpf_int_jit_compile(func[i]);
13329 if (!func[i]->jited) {
13330 err = -ENOTSUPP;
13331 goto out_free;
13332 }
13333 cond_resched();
13334 }
a748c697 13335
1c2a088a
AS
13336 /* at this point all bpf functions were successfully JITed
13337 * now populate all bpf_calls with correct addresses and
13338 * run last pass of JIT
13339 */
f910cefa 13340 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
13341 insn = func[i]->insnsi;
13342 for (j = 0; j < func[i]->len; j++, insn++) {
69c087ba 13343 if (bpf_pseudo_func(insn)) {
3990ed4c 13344 subprog = insn->off;
69c087ba
YS
13345 insn[0].imm = (u32)(long)func[subprog]->bpf_func;
13346 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
13347 continue;
13348 }
23a2d70c 13349 if (!bpf_pseudo_call(insn))
1c2a088a
AS
13350 continue;
13351 subprog = insn->off;
3d717fad 13352 insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
1c2a088a 13353 }
2162fed4
SD
13354
13355 /* we use the aux data to keep a list of the start addresses
13356 * of the JITed images for each function in the program
13357 *
13358 * for some architectures, such as powerpc64, the imm field
13359 * might not be large enough to hold the offset of the start
13360 * address of the callee's JITed image from __bpf_call_base
13361 *
13362 * in such cases, we can lookup the start address of a callee
13363 * by using its subprog id, available from the off field of
13364 * the call instruction, as an index for this list
13365 */
13366 func[i]->aux->func = func;
13367 func[i]->aux->func_cnt = env->subprog_cnt;
1c2a088a 13368 }
f910cefa 13369 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
13370 old_bpf_func = func[i]->bpf_func;
13371 tmp = bpf_int_jit_compile(func[i]);
13372 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
13373 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
c7a89784 13374 err = -ENOTSUPP;
1c2a088a
AS
13375 goto out_free;
13376 }
13377 cond_resched();
13378 }
13379
13380 /* finally lock prog and jit images for all functions and
13381 * populate kallsysm
13382 */
f910cefa 13383 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
13384 bpf_prog_lock_ro(func[i]);
13385 bpf_prog_kallsyms_add(func[i]);
13386 }
7105e828
DB
13387
13388 /* Last step: make now unused interpreter insns from main
13389 * prog consistent for later dump requests, so they can
13390 * later look the same as if they were interpreted only.
13391 */
13392 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
69c087ba
YS
13393 if (bpf_pseudo_func(insn)) {
13394 insn[0].imm = env->insn_aux_data[i].call_imm;
3990ed4c
MKL
13395 insn[1].imm = insn->off;
13396 insn->off = 0;
69c087ba
YS
13397 continue;
13398 }
23a2d70c 13399 if (!bpf_pseudo_call(insn))
7105e828
DB
13400 continue;
13401 insn->off = env->insn_aux_data[i].call_imm;
13402 subprog = find_subprog(env, i + insn->off + 1);
dbecd738 13403 insn->imm = subprog;
7105e828
DB
13404 }
13405
1c2a088a
AS
13406 prog->jited = 1;
13407 prog->bpf_func = func[0]->bpf_func;
d00c6473 13408 prog->jited_len = func[0]->jited_len;
1c2a088a 13409 prog->aux->func = func;
f910cefa 13410 prog->aux->func_cnt = env->subprog_cnt;
e16301fb 13411 bpf_prog_jit_attempt_done(prog);
1c2a088a
AS
13412 return 0;
13413out_free:
f263a814
JF
13414 /* We failed JIT'ing, so at this point we need to unregister poke
13415 * descriptors from subprogs, so that kernel is not attempting to
13416 * patch it anymore as we're freeing the subprog JIT memory.
13417 */
13418 for (i = 0; i < prog->aux->size_poke_tab; i++) {
13419 map_ptr = prog->aux->poke_tab[i].tail_call.map;
13420 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
13421 }
13422 /* At this point we're guaranteed that poke descriptors are not
13423 * live anymore. We can just unlink its descriptor table as it's
13424 * released with the main prog.
13425 */
a748c697
MF
13426 for (i = 0; i < env->subprog_cnt; i++) {
13427 if (!func[i])
13428 continue;
f263a814 13429 func[i]->aux->poke_tab = NULL;
a748c697
MF
13430 bpf_jit_free(func[i]);
13431 }
1c2a088a 13432 kfree(func);
c7a89784 13433out_undo_insn:
1c2a088a
AS
13434 /* cleanup main prog to be interpreted */
13435 prog->jit_requested = 0;
d2a3b7c5 13436 prog->blinding_requested = 0;
1c2a088a 13437 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
23a2d70c 13438 if (!bpf_pseudo_call(insn))
1c2a088a
AS
13439 continue;
13440 insn->off = 0;
13441 insn->imm = env->insn_aux_data[i].call_imm;
13442 }
e16301fb 13443 bpf_prog_jit_attempt_done(prog);
1c2a088a
AS
13444 return err;
13445}
13446
1ea47e01
AS
13447static int fixup_call_args(struct bpf_verifier_env *env)
13448{
19d28fbd 13449#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
13450 struct bpf_prog *prog = env->prog;
13451 struct bpf_insn *insn = prog->insnsi;
e6ac2450 13452 bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
1ea47e01 13453 int i, depth;
19d28fbd 13454#endif
e4052d06 13455 int err = 0;
1ea47e01 13456
e4052d06
QM
13457 if (env->prog->jit_requested &&
13458 !bpf_prog_is_dev_bound(env->prog->aux)) {
19d28fbd
DM
13459 err = jit_subprogs(env);
13460 if (err == 0)
1c2a088a 13461 return 0;
c7a89784
DB
13462 if (err == -EFAULT)
13463 return err;
19d28fbd
DM
13464 }
13465#ifndef CONFIG_BPF_JIT_ALWAYS_ON
e6ac2450
MKL
13466 if (has_kfunc_call) {
13467 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
13468 return -EINVAL;
13469 }
e411901c
MF
13470 if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
13471 /* When JIT fails the progs with bpf2bpf calls and tail_calls
13472 * have to be rejected, since interpreter doesn't support them yet.
13473 */
13474 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
13475 return -EINVAL;
13476 }
1ea47e01 13477 for (i = 0; i < prog->len; i++, insn++) {
69c087ba
YS
13478 if (bpf_pseudo_func(insn)) {
13479 /* When JIT fails the progs with callback calls
13480 * have to be rejected, since interpreter doesn't support them yet.
13481 */
13482 verbose(env, "callbacks are not allowed in non-JITed programs\n");
13483 return -EINVAL;
13484 }
13485
23a2d70c 13486 if (!bpf_pseudo_call(insn))
1ea47e01
AS
13487 continue;
13488 depth = get_callee_stack_depth(env, insn, i);
13489 if (depth < 0)
13490 return depth;
13491 bpf_patch_call_args(insn, depth);
13492 }
19d28fbd
DM
13493 err = 0;
13494#endif
13495 return err;
1ea47e01
AS
13496}
13497
e6ac2450
MKL
13498static int fixup_kfunc_call(struct bpf_verifier_env *env,
13499 struct bpf_insn *insn)
13500{
13501 const struct bpf_kfunc_desc *desc;
13502
a5d82727
KKD
13503 if (!insn->imm) {
13504 verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
13505 return -EINVAL;
13506 }
13507
e6ac2450
MKL
13508 /* insn->imm has the btf func_id. Replace it with
13509 * an address (relative to __bpf_base_call).
13510 */
2357672c 13511 desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
e6ac2450
MKL
13512 if (!desc) {
13513 verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
13514 insn->imm);
13515 return -EFAULT;
13516 }
13517
13518 insn->imm = desc->imm;
13519
13520 return 0;
13521}
13522
e6ac5933
BJ
13523/* Do various post-verification rewrites in a single program pass.
13524 * These rewrites simplify JIT and interpreter implementations.
e245c5c6 13525 */
e6ac5933 13526static int do_misc_fixups(struct bpf_verifier_env *env)
e245c5c6 13527{
79741b3b 13528 struct bpf_prog *prog = env->prog;
f92c1e18 13529 enum bpf_attach_type eatype = prog->expected_attach_type;
9b99edca 13530 enum bpf_prog_type prog_type = resolve_prog_type(prog);
79741b3b 13531 struct bpf_insn *insn = prog->insnsi;
e245c5c6 13532 const struct bpf_func_proto *fn;
79741b3b 13533 const int insn_cnt = prog->len;
09772d92 13534 const struct bpf_map_ops *ops;
c93552c4 13535 struct bpf_insn_aux_data *aux;
81ed18ab
AS
13536 struct bpf_insn insn_buf[16];
13537 struct bpf_prog *new_prog;
13538 struct bpf_map *map_ptr;
d2e4c1e6 13539 int i, ret, cnt, delta = 0;
e245c5c6 13540
79741b3b 13541 for (i = 0; i < insn_cnt; i++, insn++) {
e6ac5933 13542 /* Make divide-by-zero exceptions impossible. */
f6b1b3bf
DB
13543 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
13544 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
13545 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
68fda450 13546 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
f6b1b3bf 13547 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
e88b2c6e
DB
13548 bool isdiv = BPF_OP(insn->code) == BPF_DIV;
13549 struct bpf_insn *patchlet;
13550 struct bpf_insn chk_and_div[] = {
9b00f1b7 13551 /* [R,W]x div 0 -> 0 */
e88b2c6e
DB
13552 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
13553 BPF_JNE | BPF_K, insn->src_reg,
13554 0, 2, 0),
f6b1b3bf
DB
13555 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
13556 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
13557 *insn,
13558 };
e88b2c6e 13559 struct bpf_insn chk_and_mod[] = {
9b00f1b7 13560 /* [R,W]x mod 0 -> [R,W]x */
e88b2c6e
DB
13561 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
13562 BPF_JEQ | BPF_K, insn->src_reg,
9b00f1b7 13563 0, 1 + (is64 ? 0 : 1), 0),
f6b1b3bf 13564 *insn,
9b00f1b7
DB
13565 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
13566 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
f6b1b3bf 13567 };
f6b1b3bf 13568
e88b2c6e
DB
13569 patchlet = isdiv ? chk_and_div : chk_and_mod;
13570 cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
9b00f1b7 13571 ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
f6b1b3bf
DB
13572
13573 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
68fda450
AS
13574 if (!new_prog)
13575 return -ENOMEM;
13576
13577 delta += cnt - 1;
13578 env->prog = prog = new_prog;
13579 insn = new_prog->insnsi + i + delta;
13580 continue;
13581 }
13582
e6ac5933 13583 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
e0cea7ce
DB
13584 if (BPF_CLASS(insn->code) == BPF_LD &&
13585 (BPF_MODE(insn->code) == BPF_ABS ||
13586 BPF_MODE(insn->code) == BPF_IND)) {
13587 cnt = env->ops->gen_ld_abs(insn, insn_buf);
13588 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
13589 verbose(env, "bpf verifier is misconfigured\n");
13590 return -EINVAL;
13591 }
13592
13593 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13594 if (!new_prog)
13595 return -ENOMEM;
13596
13597 delta += cnt - 1;
13598 env->prog = prog = new_prog;
13599 insn = new_prog->insnsi + i + delta;
13600 continue;
13601 }
13602
e6ac5933 13603 /* Rewrite pointer arithmetic to mitigate speculation attacks. */
979d63d5
DB
13604 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
13605 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
13606 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
13607 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
979d63d5 13608 struct bpf_insn *patch = &insn_buf[0];
801c6058 13609 bool issrc, isneg, isimm;
979d63d5
DB
13610 u32 off_reg;
13611
13612 aux = &env->insn_aux_data[i + delta];
3612af78
DB
13613 if (!aux->alu_state ||
13614 aux->alu_state == BPF_ALU_NON_POINTER)
979d63d5
DB
13615 continue;
13616
13617 isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
13618 issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
13619 BPF_ALU_SANITIZE_SRC;
801c6058 13620 isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
979d63d5
DB
13621
13622 off_reg = issrc ? insn->src_reg : insn->dst_reg;
801c6058
DB
13623 if (isimm) {
13624 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
13625 } else {
13626 if (isneg)
13627 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
13628 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
13629 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
13630 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
13631 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
13632 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
13633 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
13634 }
b9b34ddb
DB
13635 if (!issrc)
13636 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
13637 insn->src_reg = BPF_REG_AX;
979d63d5
DB
13638 if (isneg)
13639 insn->code = insn->code == code_add ?
13640 code_sub : code_add;
13641 *patch++ = *insn;
801c6058 13642 if (issrc && isneg && !isimm)
979d63d5
DB
13643 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
13644 cnt = patch - insn_buf;
13645
13646 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13647 if (!new_prog)
13648 return -ENOMEM;
13649
13650 delta += cnt - 1;
13651 env->prog = prog = new_prog;
13652 insn = new_prog->insnsi + i + delta;
13653 continue;
13654 }
13655
79741b3b
AS
13656 if (insn->code != (BPF_JMP | BPF_CALL))
13657 continue;
cc8b0b92
AS
13658 if (insn->src_reg == BPF_PSEUDO_CALL)
13659 continue;
e6ac2450
MKL
13660 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
13661 ret = fixup_kfunc_call(env, insn);
13662 if (ret)
13663 return ret;
13664 continue;
13665 }
e245c5c6 13666
79741b3b
AS
13667 if (insn->imm == BPF_FUNC_get_route_realm)
13668 prog->dst_needed = 1;
13669 if (insn->imm == BPF_FUNC_get_prandom_u32)
13670 bpf_user_rnd_init_once();
9802d865
JB
13671 if (insn->imm == BPF_FUNC_override_return)
13672 prog->kprobe_override = 1;
79741b3b 13673 if (insn->imm == BPF_FUNC_tail_call) {
7b9f6da1
DM
13674 /* If we tail call into other programs, we
13675 * cannot make any assumptions since they can
13676 * be replaced dynamically during runtime in
13677 * the program array.
13678 */
13679 prog->cb_access = 1;
e411901c
MF
13680 if (!allow_tail_call_in_subprogs(env))
13681 prog->aux->stack_depth = MAX_BPF_STACK;
13682 prog->aux->max_pkt_offset = MAX_PACKET_OFF;
7b9f6da1 13683
79741b3b 13684 /* mark bpf_tail_call as different opcode to avoid
8fb33b60 13685 * conditional branch in the interpreter for every normal
79741b3b
AS
13686 * call and to prevent accidental JITing by JIT compiler
13687 * that doesn't support bpf_tail_call yet
e245c5c6 13688 */
79741b3b 13689 insn->imm = 0;
71189fa9 13690 insn->code = BPF_JMP | BPF_TAIL_CALL;
b2157399 13691
c93552c4 13692 aux = &env->insn_aux_data[i + delta];
d2a3b7c5 13693 if (env->bpf_capable && !prog->blinding_requested &&
cc52d914 13694 prog->jit_requested &&
d2e4c1e6
DB
13695 !bpf_map_key_poisoned(aux) &&
13696 !bpf_map_ptr_poisoned(aux) &&
13697 !bpf_map_ptr_unpriv(aux)) {
13698 struct bpf_jit_poke_descriptor desc = {
13699 .reason = BPF_POKE_REASON_TAIL_CALL,
13700 .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
13701 .tail_call.key = bpf_map_key_immediate(aux),
a748c697 13702 .insn_idx = i + delta,
d2e4c1e6
DB
13703 };
13704
13705 ret = bpf_jit_add_poke_descriptor(prog, &desc);
13706 if (ret < 0) {
13707 verbose(env, "adding tail call poke descriptor failed\n");
13708 return ret;
13709 }
13710
13711 insn->imm = ret + 1;
13712 continue;
13713 }
13714
c93552c4
DB
13715 if (!bpf_map_ptr_unpriv(aux))
13716 continue;
13717
b2157399
AS
13718 /* instead of changing every JIT dealing with tail_call
13719 * emit two extra insns:
13720 * if (index >= max_entries) goto out;
13721 * index &= array->index_mask;
13722 * to avoid out-of-bounds cpu speculation
13723 */
c93552c4 13724 if (bpf_map_ptr_poisoned(aux)) {
40950343 13725 verbose(env, "tail_call abusing map_ptr\n");
b2157399
AS
13726 return -EINVAL;
13727 }
c93552c4 13728
d2e4c1e6 13729 map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
b2157399
AS
13730 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
13731 map_ptr->max_entries, 2);
13732 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
13733 container_of(map_ptr,
13734 struct bpf_array,
13735 map)->index_mask);
13736 insn_buf[2] = *insn;
13737 cnt = 3;
13738 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13739 if (!new_prog)
13740 return -ENOMEM;
13741
13742 delta += cnt - 1;
13743 env->prog = prog = new_prog;
13744 insn = new_prog->insnsi + i + delta;
79741b3b
AS
13745 continue;
13746 }
e245c5c6 13747
b00628b1
AS
13748 if (insn->imm == BPF_FUNC_timer_set_callback) {
13749 /* The verifier will process callback_fn as many times as necessary
13750 * with different maps and the register states prepared by
13751 * set_timer_callback_state will be accurate.
13752 *
13753 * The following use case is valid:
13754 * map1 is shared by prog1, prog2, prog3.
13755 * prog1 calls bpf_timer_init for some map1 elements
13756 * prog2 calls bpf_timer_set_callback for some map1 elements.
13757 * Those that were not bpf_timer_init-ed will return -EINVAL.
13758 * prog3 calls bpf_timer_start for some map1 elements.
13759 * Those that were not both bpf_timer_init-ed and
13760 * bpf_timer_set_callback-ed will return -EINVAL.
13761 */
13762 struct bpf_insn ld_addrs[2] = {
13763 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
13764 };
13765
13766 insn_buf[0] = ld_addrs[0];
13767 insn_buf[1] = ld_addrs[1];
13768 insn_buf[2] = *insn;
13769 cnt = 3;
13770
13771 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13772 if (!new_prog)
13773 return -ENOMEM;
13774
13775 delta += cnt - 1;
13776 env->prog = prog = new_prog;
13777 insn = new_prog->insnsi + i + delta;
13778 goto patch_call_imm;
13779 }
13780
b00fa38a
JK
13781 if (insn->imm == BPF_FUNC_task_storage_get ||
13782 insn->imm == BPF_FUNC_sk_storage_get ||
13783 insn->imm == BPF_FUNC_inode_storage_get) {
13784 if (env->prog->aux->sleepable)
d56c9fe6 13785 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
b00fa38a 13786 else
d56c9fe6 13787 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
b00fa38a
JK
13788 insn_buf[1] = *insn;
13789 cnt = 2;
13790
13791 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13792 if (!new_prog)
13793 return -ENOMEM;
13794
13795 delta += cnt - 1;
13796 env->prog = prog = new_prog;
13797 insn = new_prog->insnsi + i + delta;
13798 goto patch_call_imm;
13799 }
13800
89c63074 13801 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
09772d92
DB
13802 * and other inlining handlers are currently limited to 64 bit
13803 * only.
89c63074 13804 */
60b58afc 13805 if (prog->jit_requested && BITS_PER_LONG == 64 &&
09772d92
DB
13806 (insn->imm == BPF_FUNC_map_lookup_elem ||
13807 insn->imm == BPF_FUNC_map_update_elem ||
84430d42
DB
13808 insn->imm == BPF_FUNC_map_delete_elem ||
13809 insn->imm == BPF_FUNC_map_push_elem ||
13810 insn->imm == BPF_FUNC_map_pop_elem ||
e6a4750f 13811 insn->imm == BPF_FUNC_map_peek_elem ||
0640c77c
AI
13812 insn->imm == BPF_FUNC_redirect_map ||
13813 insn->imm == BPF_FUNC_for_each_map_elem)) {
c93552c4
DB
13814 aux = &env->insn_aux_data[i + delta];
13815 if (bpf_map_ptr_poisoned(aux))
13816 goto patch_call_imm;
13817
d2e4c1e6 13818 map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
09772d92
DB
13819 ops = map_ptr->ops;
13820 if (insn->imm == BPF_FUNC_map_lookup_elem &&
13821 ops->map_gen_lookup) {
13822 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
4a8f87e6
DB
13823 if (cnt == -EOPNOTSUPP)
13824 goto patch_map_ops_generic;
13825 if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
09772d92
DB
13826 verbose(env, "bpf verifier is misconfigured\n");
13827 return -EINVAL;
13828 }
81ed18ab 13829
09772d92
DB
13830 new_prog = bpf_patch_insn_data(env, i + delta,
13831 insn_buf, cnt);
13832 if (!new_prog)
13833 return -ENOMEM;
81ed18ab 13834
09772d92
DB
13835 delta += cnt - 1;
13836 env->prog = prog = new_prog;
13837 insn = new_prog->insnsi + i + delta;
13838 continue;
13839 }
81ed18ab 13840
09772d92
DB
13841 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
13842 (void *(*)(struct bpf_map *map, void *key))NULL));
13843 BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
13844 (int (*)(struct bpf_map *map, void *key))NULL));
13845 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
13846 (int (*)(struct bpf_map *map, void *key, void *value,
13847 u64 flags))NULL));
84430d42
DB
13848 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
13849 (int (*)(struct bpf_map *map, void *value,
13850 u64 flags))NULL));
13851 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
13852 (int (*)(struct bpf_map *map, void *value))NULL));
13853 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
13854 (int (*)(struct bpf_map *map, void *value))NULL));
e6a4750f
BT
13855 BUILD_BUG_ON(!__same_type(ops->map_redirect,
13856 (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
0640c77c
AI
13857 BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
13858 (int (*)(struct bpf_map *map,
13859 bpf_callback_t callback_fn,
13860 void *callback_ctx,
13861 u64 flags))NULL));
e6a4750f 13862
4a8f87e6 13863patch_map_ops_generic:
09772d92
DB
13864 switch (insn->imm) {
13865 case BPF_FUNC_map_lookup_elem:
3d717fad 13866 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
09772d92
DB
13867 continue;
13868 case BPF_FUNC_map_update_elem:
3d717fad 13869 insn->imm = BPF_CALL_IMM(ops->map_update_elem);
09772d92
DB
13870 continue;
13871 case BPF_FUNC_map_delete_elem:
3d717fad 13872 insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
09772d92 13873 continue;
84430d42 13874 case BPF_FUNC_map_push_elem:
3d717fad 13875 insn->imm = BPF_CALL_IMM(ops->map_push_elem);
84430d42
DB
13876 continue;
13877 case BPF_FUNC_map_pop_elem:
3d717fad 13878 insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
84430d42
DB
13879 continue;
13880 case BPF_FUNC_map_peek_elem:
3d717fad 13881 insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
84430d42 13882 continue;
e6a4750f 13883 case BPF_FUNC_redirect_map:
3d717fad 13884 insn->imm = BPF_CALL_IMM(ops->map_redirect);
e6a4750f 13885 continue;
0640c77c
AI
13886 case BPF_FUNC_for_each_map_elem:
13887 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
e6a4750f 13888 continue;
09772d92 13889 }
81ed18ab 13890
09772d92 13891 goto patch_call_imm;
81ed18ab
AS
13892 }
13893
e6ac5933 13894 /* Implement bpf_jiffies64 inline. */
5576b991
MKL
13895 if (prog->jit_requested && BITS_PER_LONG == 64 &&
13896 insn->imm == BPF_FUNC_jiffies64) {
13897 struct bpf_insn ld_jiffies_addr[2] = {
13898 BPF_LD_IMM64(BPF_REG_0,
13899 (unsigned long)&jiffies),
13900 };
13901
13902 insn_buf[0] = ld_jiffies_addr[0];
13903 insn_buf[1] = ld_jiffies_addr[1];
13904 insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
13905 BPF_REG_0, 0);
13906 cnt = 3;
13907
13908 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
13909 cnt);
13910 if (!new_prog)
13911 return -ENOMEM;
13912
13913 delta += cnt - 1;
13914 env->prog = prog = new_prog;
13915 insn = new_prog->insnsi + i + delta;
13916 continue;
13917 }
13918
f92c1e18
JO
13919 /* Implement bpf_get_func_arg inline. */
13920 if (prog_type == BPF_PROG_TYPE_TRACING &&
13921 insn->imm == BPF_FUNC_get_func_arg) {
13922 /* Load nr_args from ctx - 8 */
13923 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
13924 insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
13925 insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
13926 insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
13927 insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
13928 insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
13929 insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
13930 insn_buf[7] = BPF_JMP_A(1);
13931 insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
13932 cnt = 9;
13933
13934 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13935 if (!new_prog)
13936 return -ENOMEM;
13937
13938 delta += cnt - 1;
13939 env->prog = prog = new_prog;
13940 insn = new_prog->insnsi + i + delta;
13941 continue;
13942 }
13943
13944 /* Implement bpf_get_func_ret inline. */
13945 if (prog_type == BPF_PROG_TYPE_TRACING &&
13946 insn->imm == BPF_FUNC_get_func_ret) {
13947 if (eatype == BPF_TRACE_FEXIT ||
13948 eatype == BPF_MODIFY_RETURN) {
13949 /* Load nr_args from ctx - 8 */
13950 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
13951 insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
13952 insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
13953 insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
13954 insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
13955 insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
13956 cnt = 6;
13957 } else {
13958 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
13959 cnt = 1;
13960 }
13961
13962 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13963 if (!new_prog)
13964 return -ENOMEM;
13965
13966 delta += cnt - 1;
13967 env->prog = prog = new_prog;
13968 insn = new_prog->insnsi + i + delta;
13969 continue;
13970 }
13971
13972 /* Implement get_func_arg_cnt inline. */
13973 if (prog_type == BPF_PROG_TYPE_TRACING &&
13974 insn->imm == BPF_FUNC_get_func_arg_cnt) {
13975 /* Load nr_args from ctx - 8 */
13976 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
13977
13978 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
13979 if (!new_prog)
13980 return -ENOMEM;
13981
13982 env->prog = prog = new_prog;
13983 insn = new_prog->insnsi + i + delta;
13984 continue;
13985 }
13986
f705ec76 13987 /* Implement bpf_get_func_ip inline. */
9b99edca
JO
13988 if (prog_type == BPF_PROG_TYPE_TRACING &&
13989 insn->imm == BPF_FUNC_get_func_ip) {
f92c1e18
JO
13990 /* Load IP address from ctx - 16 */
13991 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
9b99edca
JO
13992
13993 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
13994 if (!new_prog)
13995 return -ENOMEM;
13996
13997 env->prog = prog = new_prog;
13998 insn = new_prog->insnsi + i + delta;
13999 continue;
14000 }
14001
81ed18ab 14002patch_call_imm:
5e43f899 14003 fn = env->ops->get_func_proto(insn->imm, env->prog);
79741b3b
AS
14004 /* all functions that have prototype and verifier allowed
14005 * programs to call them, must be real in-kernel functions
14006 */
14007 if (!fn->func) {
61bd5218
JK
14008 verbose(env,
14009 "kernel subsystem misconfigured func %s#%d\n",
79741b3b
AS
14010 func_id_name(insn->imm), insn->imm);
14011 return -EFAULT;
e245c5c6 14012 }
79741b3b 14013 insn->imm = fn->func - __bpf_call_base;
e245c5c6 14014 }
e245c5c6 14015
d2e4c1e6
DB
14016 /* Since poke tab is now finalized, publish aux to tracker. */
14017 for (i = 0; i < prog->aux->size_poke_tab; i++) {
14018 map_ptr = prog->aux->poke_tab[i].tail_call.map;
14019 if (!map_ptr->ops->map_poke_track ||
14020 !map_ptr->ops->map_poke_untrack ||
14021 !map_ptr->ops->map_poke_run) {
14022 verbose(env, "bpf verifier is misconfigured\n");
14023 return -EINVAL;
14024 }
14025
14026 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
14027 if (ret < 0) {
14028 verbose(env, "tracking tail call prog failed\n");
14029 return ret;
14030 }
14031 }
14032
e6ac2450
MKL
14033 sort_kfunc_descs_by_imm(env->prog);
14034
79741b3b
AS
14035 return 0;
14036}
e245c5c6 14037
58e2af8b 14038static void free_states(struct bpf_verifier_env *env)
f1bca824 14039{
58e2af8b 14040 struct bpf_verifier_state_list *sl, *sln;
f1bca824
AS
14041 int i;
14042
9f4686c4
AS
14043 sl = env->free_list;
14044 while (sl) {
14045 sln = sl->next;
14046 free_verifier_state(&sl->state, false);
14047 kfree(sl);
14048 sl = sln;
14049 }
51c39bb1 14050 env->free_list = NULL;
9f4686c4 14051
f1bca824
AS
14052 if (!env->explored_states)
14053 return;
14054
dc2a4ebc 14055 for (i = 0; i < state_htab_size(env); i++) {
f1bca824
AS
14056 sl = env->explored_states[i];
14057
a8f500af
AS
14058 while (sl) {
14059 sln = sl->next;
14060 free_verifier_state(&sl->state, false);
14061 kfree(sl);
14062 sl = sln;
14063 }
51c39bb1 14064 env->explored_states[i] = NULL;
f1bca824 14065 }
51c39bb1 14066}
f1bca824 14067
51c39bb1
AS
14068static int do_check_common(struct bpf_verifier_env *env, int subprog)
14069{
6f8a57cc 14070 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
51c39bb1
AS
14071 struct bpf_verifier_state *state;
14072 struct bpf_reg_state *regs;
14073 int ret, i;
14074
14075 env->prev_linfo = NULL;
14076 env->pass_cnt++;
14077
14078 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
14079 if (!state)
14080 return -ENOMEM;
14081 state->curframe = 0;
14082 state->speculative = false;
14083 state->branches = 1;
14084 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
14085 if (!state->frame[0]) {
14086 kfree(state);
14087 return -ENOMEM;
14088 }
14089 env->cur_state = state;
14090 init_func_state(env, state->frame[0],
14091 BPF_MAIN_FUNC /* callsite */,
14092 0 /* frameno */,
14093 subprog);
14094
14095 regs = state->frame[state->curframe]->regs;
be8704ff 14096 if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
51c39bb1
AS
14097 ret = btf_prepare_func_args(env, subprog, regs);
14098 if (ret)
14099 goto out;
14100 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
14101 if (regs[i].type == PTR_TO_CTX)
14102 mark_reg_known_zero(env, regs, i);
14103 else if (regs[i].type == SCALAR_VALUE)
14104 mark_reg_unknown(env, regs, i);
cf9f2f8d 14105 else if (base_type(regs[i].type) == PTR_TO_MEM) {
e5069b9c
DB
14106 const u32 mem_size = regs[i].mem_size;
14107
14108 mark_reg_known_zero(env, regs, i);
14109 regs[i].mem_size = mem_size;
14110 regs[i].id = ++env->id_gen;
14111 }
51c39bb1
AS
14112 }
14113 } else {
14114 /* 1st arg to a function */
14115 regs[BPF_REG_1].type = PTR_TO_CTX;
14116 mark_reg_known_zero(env, regs, BPF_REG_1);
34747c41 14117 ret = btf_check_subprog_arg_match(env, subprog, regs);
51c39bb1
AS
14118 if (ret == -EFAULT)
14119 /* unlikely verifier bug. abort.
14120 * ret == 0 and ret < 0 are sadly acceptable for
14121 * main() function due to backward compatibility.
14122 * Like socket filter program may be written as:
14123 * int bpf_prog(struct pt_regs *ctx)
14124 * and never dereference that ctx in the program.
14125 * 'struct pt_regs' is a type mismatch for socket
14126 * filter that should be using 'struct __sk_buff'.
14127 */
14128 goto out;
14129 }
14130
14131 ret = do_check(env);
14132out:
f59bbfc2
AS
14133 /* check for NULL is necessary, since cur_state can be freed inside
14134 * do_check() under memory pressure.
14135 */
14136 if (env->cur_state) {
14137 free_verifier_state(env->cur_state, true);
14138 env->cur_state = NULL;
14139 }
6f8a57cc
AN
14140 while (!pop_stack(env, NULL, NULL, false));
14141 if (!ret && pop_log)
14142 bpf_vlog_reset(&env->log, 0);
51c39bb1 14143 free_states(env);
51c39bb1
AS
14144 return ret;
14145}
14146
14147/* Verify all global functions in a BPF program one by one based on their BTF.
14148 * All global functions must pass verification. Otherwise the whole program is rejected.
14149 * Consider:
14150 * int bar(int);
14151 * int foo(int f)
14152 * {
14153 * return bar(f);
14154 * }
14155 * int bar(int b)
14156 * {
14157 * ...
14158 * }
14159 * foo() will be verified first for R1=any_scalar_value. During verification it
14160 * will be assumed that bar() already verified successfully and call to bar()
14161 * from foo() will be checked for type match only. Later bar() will be verified
14162 * independently to check that it's safe for R1=any_scalar_value.
14163 */
14164static int do_check_subprogs(struct bpf_verifier_env *env)
14165{
14166 struct bpf_prog_aux *aux = env->prog->aux;
14167 int i, ret;
14168
14169 if (!aux->func_info)
14170 return 0;
14171
14172 for (i = 1; i < env->subprog_cnt; i++) {
14173 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
14174 continue;
14175 env->insn_idx = env->subprog_info[i].start;
14176 WARN_ON_ONCE(env->insn_idx == 0);
14177 ret = do_check_common(env, i);
14178 if (ret) {
14179 return ret;
14180 } else if (env->log.level & BPF_LOG_LEVEL) {
14181 verbose(env,
14182 "Func#%d is safe for any args that match its prototype\n",
14183 i);
14184 }
14185 }
14186 return 0;
14187}
14188
14189static int do_check_main(struct bpf_verifier_env *env)
14190{
14191 int ret;
14192
14193 env->insn_idx = 0;
14194 ret = do_check_common(env, 0);
14195 if (!ret)
14196 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
14197 return ret;
14198}
14199
14200
06ee7115
AS
14201static void print_verification_stats(struct bpf_verifier_env *env)
14202{
14203 int i;
14204
14205 if (env->log.level & BPF_LOG_STATS) {
14206 verbose(env, "verification time %lld usec\n",
14207 div_u64(env->verification_time, 1000));
14208 verbose(env, "stack depth ");
14209 for (i = 0; i < env->subprog_cnt; i++) {
14210 u32 depth = env->subprog_info[i].stack_depth;
14211
14212 verbose(env, "%d", depth);
14213 if (i + 1 < env->subprog_cnt)
14214 verbose(env, "+");
14215 }
14216 verbose(env, "\n");
14217 }
14218 verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
14219 "total_states %d peak_states %d mark_read %d\n",
14220 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
14221 env->max_states_per_insn, env->total_states,
14222 env->peak_states, env->longest_mark_read_walk);
f1bca824
AS
14223}
14224
27ae7997
MKL
14225static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
14226{
14227 const struct btf_type *t, *func_proto;
14228 const struct bpf_struct_ops *st_ops;
14229 const struct btf_member *member;
14230 struct bpf_prog *prog = env->prog;
14231 u32 btf_id, member_idx;
14232 const char *mname;
14233
12aa8a94
THJ
14234 if (!prog->gpl_compatible) {
14235 verbose(env, "struct ops programs must have a GPL compatible license\n");
14236 return -EINVAL;
14237 }
14238
27ae7997
MKL
14239 btf_id = prog->aux->attach_btf_id;
14240 st_ops = bpf_struct_ops_find(btf_id);
14241 if (!st_ops) {
14242 verbose(env, "attach_btf_id %u is not a supported struct\n",
14243 btf_id);
14244 return -ENOTSUPP;
14245 }
14246
14247 t = st_ops->type;
14248 member_idx = prog->expected_attach_type;
14249 if (member_idx >= btf_type_vlen(t)) {
14250 verbose(env, "attach to invalid member idx %u of struct %s\n",
14251 member_idx, st_ops->name);
14252 return -EINVAL;
14253 }
14254
14255 member = &btf_type_member(t)[member_idx];
14256 mname = btf_name_by_offset(btf_vmlinux, member->name_off);
14257 func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
14258 NULL);
14259 if (!func_proto) {
14260 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
14261 mname, member_idx, st_ops->name);
14262 return -EINVAL;
14263 }
14264
14265 if (st_ops->check_member) {
14266 int err = st_ops->check_member(t, member);
14267
14268 if (err) {
14269 verbose(env, "attach to unsupported member %s of struct %s\n",
14270 mname, st_ops->name);
14271 return err;
14272 }
14273 }
14274
14275 prog->aux->attach_func_proto = func_proto;
14276 prog->aux->attach_func_name = mname;
14277 env->ops = st_ops->verifier_ops;
14278
14279 return 0;
14280}
6ba43b76
KS
14281#define SECURITY_PREFIX "security_"
14282
f7b12b6f 14283static int check_attach_modify_return(unsigned long addr, const char *func_name)
6ba43b76 14284{
69191754 14285 if (within_error_injection_list(addr) ||
f7b12b6f 14286 !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
6ba43b76 14287 return 0;
6ba43b76 14288
6ba43b76
KS
14289 return -EINVAL;
14290}
27ae7997 14291
1e6c62a8
AS
14292/* list of non-sleepable functions that are otherwise on
14293 * ALLOW_ERROR_INJECTION list
14294 */
14295BTF_SET_START(btf_non_sleepable_error_inject)
14296/* Three functions below can be called from sleepable and non-sleepable context.
14297 * Assume non-sleepable from bpf safety point of view.
14298 */
9dd3d069 14299BTF_ID(func, __filemap_add_folio)
1e6c62a8
AS
14300BTF_ID(func, should_fail_alloc_page)
14301BTF_ID(func, should_failslab)
14302BTF_SET_END(btf_non_sleepable_error_inject)
14303
14304static int check_non_sleepable_error_inject(u32 btf_id)
14305{
14306 return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
14307}
14308
f7b12b6f
THJ
14309int bpf_check_attach_target(struct bpf_verifier_log *log,
14310 const struct bpf_prog *prog,
14311 const struct bpf_prog *tgt_prog,
14312 u32 btf_id,
14313 struct bpf_attach_target_info *tgt_info)
38207291 14314{
be8704ff 14315 bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
f1b9509c 14316 const char prefix[] = "btf_trace_";
5b92a28a 14317 int ret = 0, subprog = -1, i;
38207291 14318 const struct btf_type *t;
5b92a28a 14319 bool conservative = true;
38207291 14320 const char *tname;
5b92a28a 14321 struct btf *btf;
f7b12b6f 14322 long addr = 0;
38207291 14323
f1b9509c 14324 if (!btf_id) {
efc68158 14325 bpf_log(log, "Tracing programs must provide btf_id\n");
f1b9509c
AS
14326 return -EINVAL;
14327 }
22dc4a0f 14328 btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
5b92a28a 14329 if (!btf) {
efc68158 14330 bpf_log(log,
5b92a28a
AS
14331 "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
14332 return -EINVAL;
14333 }
14334 t = btf_type_by_id(btf, btf_id);
f1b9509c 14335 if (!t) {
efc68158 14336 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
f1b9509c
AS
14337 return -EINVAL;
14338 }
5b92a28a 14339 tname = btf_name_by_offset(btf, t->name_off);
f1b9509c 14340 if (!tname) {
efc68158 14341 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
f1b9509c
AS
14342 return -EINVAL;
14343 }
5b92a28a
AS
14344 if (tgt_prog) {
14345 struct bpf_prog_aux *aux = tgt_prog->aux;
14346
14347 for (i = 0; i < aux->func_info_cnt; i++)
14348 if (aux->func_info[i].type_id == btf_id) {
14349 subprog = i;
14350 break;
14351 }
14352 if (subprog == -1) {
efc68158 14353 bpf_log(log, "Subprog %s doesn't exist\n", tname);
5b92a28a
AS
14354 return -EINVAL;
14355 }
14356 conservative = aux->func_info_aux[subprog].unreliable;
be8704ff
AS
14357 if (prog_extension) {
14358 if (conservative) {
efc68158 14359 bpf_log(log,
be8704ff
AS
14360 "Cannot replace static functions\n");
14361 return -EINVAL;
14362 }
14363 if (!prog->jit_requested) {
efc68158 14364 bpf_log(log,
be8704ff
AS
14365 "Extension programs should be JITed\n");
14366 return -EINVAL;
14367 }
be8704ff
AS
14368 }
14369 if (!tgt_prog->jited) {
efc68158 14370 bpf_log(log, "Can attach to only JITed progs\n");
be8704ff
AS
14371 return -EINVAL;
14372 }
14373 if (tgt_prog->type == prog->type) {
14374 /* Cannot fentry/fexit another fentry/fexit program.
14375 * Cannot attach program extension to another extension.
14376 * It's ok to attach fentry/fexit to extension program.
14377 */
efc68158 14378 bpf_log(log, "Cannot recursively attach\n");
be8704ff
AS
14379 return -EINVAL;
14380 }
14381 if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
14382 prog_extension &&
14383 (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
14384 tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
14385 /* Program extensions can extend all program types
14386 * except fentry/fexit. The reason is the following.
14387 * The fentry/fexit programs are used for performance
14388 * analysis, stats and can be attached to any program
14389 * type except themselves. When extension program is
14390 * replacing XDP function it is necessary to allow
14391 * performance analysis of all functions. Both original
14392 * XDP program and its program extension. Hence
14393 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
14394 * allowed. If extending of fentry/fexit was allowed it
14395 * would be possible to create long call chain
14396 * fentry->extension->fentry->extension beyond
14397 * reasonable stack size. Hence extending fentry is not
14398 * allowed.
14399 */
efc68158 14400 bpf_log(log, "Cannot extend fentry/fexit\n");
be8704ff
AS
14401 return -EINVAL;
14402 }
5b92a28a 14403 } else {
be8704ff 14404 if (prog_extension) {
efc68158 14405 bpf_log(log, "Cannot replace kernel functions\n");
be8704ff
AS
14406 return -EINVAL;
14407 }
5b92a28a 14408 }
f1b9509c
AS
14409
14410 switch (prog->expected_attach_type) {
14411 case BPF_TRACE_RAW_TP:
5b92a28a 14412 if (tgt_prog) {
efc68158 14413 bpf_log(log,
5b92a28a
AS
14414 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
14415 return -EINVAL;
14416 }
38207291 14417 if (!btf_type_is_typedef(t)) {
efc68158 14418 bpf_log(log, "attach_btf_id %u is not a typedef\n",
38207291
MKL
14419 btf_id);
14420 return -EINVAL;
14421 }
f1b9509c 14422 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
efc68158 14423 bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
38207291
MKL
14424 btf_id, tname);
14425 return -EINVAL;
14426 }
14427 tname += sizeof(prefix) - 1;
5b92a28a 14428 t = btf_type_by_id(btf, t->type);
38207291
MKL
14429 if (!btf_type_is_ptr(t))
14430 /* should never happen in valid vmlinux build */
14431 return -EINVAL;
5b92a28a 14432 t = btf_type_by_id(btf, t->type);
38207291
MKL
14433 if (!btf_type_is_func_proto(t))
14434 /* should never happen in valid vmlinux build */
14435 return -EINVAL;
14436
f7b12b6f 14437 break;
15d83c4d
YS
14438 case BPF_TRACE_ITER:
14439 if (!btf_type_is_func(t)) {
efc68158 14440 bpf_log(log, "attach_btf_id %u is not a function\n",
15d83c4d
YS
14441 btf_id);
14442 return -EINVAL;
14443 }
14444 t = btf_type_by_id(btf, t->type);
14445 if (!btf_type_is_func_proto(t))
14446 return -EINVAL;
f7b12b6f
THJ
14447 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
14448 if (ret)
14449 return ret;
14450 break;
be8704ff
AS
14451 default:
14452 if (!prog_extension)
14453 return -EINVAL;
df561f66 14454 fallthrough;
ae240823 14455 case BPF_MODIFY_RETURN:
9e4e01df 14456 case BPF_LSM_MAC:
fec56f58
AS
14457 case BPF_TRACE_FENTRY:
14458 case BPF_TRACE_FEXIT:
14459 if (!btf_type_is_func(t)) {
efc68158 14460 bpf_log(log, "attach_btf_id %u is not a function\n",
fec56f58
AS
14461 btf_id);
14462 return -EINVAL;
14463 }
be8704ff 14464 if (prog_extension &&
efc68158 14465 btf_check_type_match(log, prog, btf, t))
be8704ff 14466 return -EINVAL;
5b92a28a 14467 t = btf_type_by_id(btf, t->type);
fec56f58
AS
14468 if (!btf_type_is_func_proto(t))
14469 return -EINVAL;
f7b12b6f 14470
4a1e7c0c
THJ
14471 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
14472 (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
14473 prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
14474 return -EINVAL;
14475
f7b12b6f 14476 if (tgt_prog && conservative)
5b92a28a 14477 t = NULL;
f7b12b6f
THJ
14478
14479 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
fec56f58 14480 if (ret < 0)
f7b12b6f
THJ
14481 return ret;
14482
5b92a28a 14483 if (tgt_prog) {
e9eeec58
YS
14484 if (subprog == 0)
14485 addr = (long) tgt_prog->bpf_func;
14486 else
14487 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
5b92a28a
AS
14488 } else {
14489 addr = kallsyms_lookup_name(tname);
14490 if (!addr) {
efc68158 14491 bpf_log(log,
5b92a28a
AS
14492 "The address of function %s cannot be found\n",
14493 tname);
f7b12b6f 14494 return -ENOENT;
5b92a28a 14495 }
fec56f58 14496 }
18644cec 14497
1e6c62a8
AS
14498 if (prog->aux->sleepable) {
14499 ret = -EINVAL;
14500 switch (prog->type) {
14501 case BPF_PROG_TYPE_TRACING:
14502 /* fentry/fexit/fmod_ret progs can be sleepable only if they are
14503 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
14504 */
14505 if (!check_non_sleepable_error_inject(btf_id) &&
14506 within_error_injection_list(addr))
14507 ret = 0;
14508 break;
14509 case BPF_PROG_TYPE_LSM:
14510 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
14511 * Only some of them are sleepable.
14512 */
423f1610 14513 if (bpf_lsm_is_sleepable_hook(btf_id))
1e6c62a8
AS
14514 ret = 0;
14515 break;
14516 default:
14517 break;
14518 }
f7b12b6f
THJ
14519 if (ret) {
14520 bpf_log(log, "%s is not sleepable\n", tname);
14521 return ret;
14522 }
1e6c62a8 14523 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
1af9270e 14524 if (tgt_prog) {
efc68158 14525 bpf_log(log, "can't modify return codes of BPF programs\n");
f7b12b6f
THJ
14526 return -EINVAL;
14527 }
14528 ret = check_attach_modify_return(addr, tname);
14529 if (ret) {
14530 bpf_log(log, "%s() is not modifiable\n", tname);
14531 return ret;
1af9270e 14532 }
18644cec 14533 }
f7b12b6f
THJ
14534
14535 break;
14536 }
14537 tgt_info->tgt_addr = addr;
14538 tgt_info->tgt_name = tname;
14539 tgt_info->tgt_type = t;
14540 return 0;
14541}
14542
35e3815f
JO
14543BTF_SET_START(btf_id_deny)
14544BTF_ID_UNUSED
14545#ifdef CONFIG_SMP
14546BTF_ID(func, migrate_disable)
14547BTF_ID(func, migrate_enable)
14548#endif
14549#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
14550BTF_ID(func, rcu_read_unlock_strict)
14551#endif
14552BTF_SET_END(btf_id_deny)
14553
f7b12b6f
THJ
14554static int check_attach_btf_id(struct bpf_verifier_env *env)
14555{
14556 struct bpf_prog *prog = env->prog;
3aac1ead 14557 struct bpf_prog *tgt_prog = prog->aux->dst_prog;
f7b12b6f
THJ
14558 struct bpf_attach_target_info tgt_info = {};
14559 u32 btf_id = prog->aux->attach_btf_id;
14560 struct bpf_trampoline *tr;
14561 int ret;
14562 u64 key;
14563
79a7f8bd
AS
14564 if (prog->type == BPF_PROG_TYPE_SYSCALL) {
14565 if (prog->aux->sleepable)
14566 /* attach_btf_id checked to be zero already */
14567 return 0;
14568 verbose(env, "Syscall programs can only be sleepable\n");
14569 return -EINVAL;
14570 }
14571
f7b12b6f
THJ
14572 if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
14573 prog->type != BPF_PROG_TYPE_LSM) {
14574 verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
14575 return -EINVAL;
14576 }
14577
14578 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
14579 return check_struct_ops_btf_id(env);
14580
14581 if (prog->type != BPF_PROG_TYPE_TRACING &&
14582 prog->type != BPF_PROG_TYPE_LSM &&
14583 prog->type != BPF_PROG_TYPE_EXT)
14584 return 0;
14585
14586 ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
14587 if (ret)
fec56f58 14588 return ret;
f7b12b6f
THJ
14589
14590 if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
3aac1ead
THJ
14591 /* to make freplace equivalent to their targets, they need to
14592 * inherit env->ops and expected_attach_type for the rest of the
14593 * verification
14594 */
f7b12b6f
THJ
14595 env->ops = bpf_verifier_ops[tgt_prog->type];
14596 prog->expected_attach_type = tgt_prog->expected_attach_type;
14597 }
14598
14599 /* store info about the attachment target that will be used later */
14600 prog->aux->attach_func_proto = tgt_info.tgt_type;
14601 prog->aux->attach_func_name = tgt_info.tgt_name;
14602
4a1e7c0c
THJ
14603 if (tgt_prog) {
14604 prog->aux->saved_dst_prog_type = tgt_prog->type;
14605 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
14606 }
14607
f7b12b6f
THJ
14608 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
14609 prog->aux->attach_btf_trace = true;
14610 return 0;
14611 } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
14612 if (!bpf_iter_prog_supported(prog))
14613 return -EINVAL;
14614 return 0;
14615 }
14616
14617 if (prog->type == BPF_PROG_TYPE_LSM) {
14618 ret = bpf_lsm_verify_prog(&env->log, prog);
14619 if (ret < 0)
14620 return ret;
35e3815f
JO
14621 } else if (prog->type == BPF_PROG_TYPE_TRACING &&
14622 btf_id_set_contains(&btf_id_deny, btf_id)) {
14623 return -EINVAL;
38207291 14624 }
f7b12b6f 14625
22dc4a0f 14626 key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
f7b12b6f
THJ
14627 tr = bpf_trampoline_get(key, &tgt_info);
14628 if (!tr)
14629 return -ENOMEM;
14630
3aac1ead 14631 prog->aux->dst_trampoline = tr;
f7b12b6f 14632 return 0;
38207291
MKL
14633}
14634
76654e67
AM
14635struct btf *bpf_get_btf_vmlinux(void)
14636{
14637 if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
14638 mutex_lock(&bpf_verifier_lock);
14639 if (!btf_vmlinux)
14640 btf_vmlinux = btf_parse_vmlinux();
14641 mutex_unlock(&bpf_verifier_lock);
14642 }
14643 return btf_vmlinux;
14644}
14645
af2ac3e1 14646int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
51580e79 14647{
06ee7115 14648 u64 start_time = ktime_get_ns();
58e2af8b 14649 struct bpf_verifier_env *env;
b9193c1b 14650 struct bpf_verifier_log *log;
9e4c24e7 14651 int i, len, ret = -EINVAL;
e2ae4ca2 14652 bool is_priv;
51580e79 14653
eba0c929
AB
14654 /* no program is valid */
14655 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
14656 return -EINVAL;
14657
58e2af8b 14658 /* 'struct bpf_verifier_env' can be global, but since it's not small,
cbd35700
AS
14659 * allocate/free it every time bpf_check() is called
14660 */
58e2af8b 14661 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
cbd35700
AS
14662 if (!env)
14663 return -ENOMEM;
61bd5218 14664 log = &env->log;
cbd35700 14665
9e4c24e7 14666 len = (*prog)->len;
fad953ce 14667 env->insn_aux_data =
9e4c24e7 14668 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
3df126f3
JK
14669 ret = -ENOMEM;
14670 if (!env->insn_aux_data)
14671 goto err_free_env;
9e4c24e7
JK
14672 for (i = 0; i < len; i++)
14673 env->insn_aux_data[i].orig_idx = i;
9bac3d6d 14674 env->prog = *prog;
00176a34 14675 env->ops = bpf_verifier_ops[env->prog->type];
387544bf 14676 env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
2c78ee89 14677 is_priv = bpf_capable();
0246e64d 14678
76654e67 14679 bpf_get_btf_vmlinux();
8580ac94 14680
cbd35700 14681 /* grab the mutex to protect few globals used by verifier */
45a73c17
AS
14682 if (!is_priv)
14683 mutex_lock(&bpf_verifier_lock);
cbd35700
AS
14684
14685 if (attr->log_level || attr->log_buf || attr->log_size) {
14686 /* user requested verbose verifier output
14687 * and supplied buffer to store the verification trace
14688 */
e7bf8249
JK
14689 log->level = attr->log_level;
14690 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
14691 log->len_total = attr->log_size;
cbd35700 14692
e7bf8249 14693 /* log attributes have to be sane */
866de407
HT
14694 if (!bpf_verifier_log_attr_valid(log)) {
14695 ret = -EINVAL;
3df126f3 14696 goto err_unlock;
866de407 14697 }
cbd35700 14698 }
1ad2f583 14699
0f55f9ed
CL
14700 mark_verifier_state_clean(env);
14701
8580ac94
AS
14702 if (IS_ERR(btf_vmlinux)) {
14703 /* Either gcc or pahole or kernel are broken. */
14704 verbose(env, "in-kernel BTF is malformed\n");
14705 ret = PTR_ERR(btf_vmlinux);
38207291 14706 goto skip_full_check;
8580ac94
AS
14707 }
14708
1ad2f583
DB
14709 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
14710 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
e07b98d9 14711 env->strict_alignment = true;
e9ee9efc
DM
14712 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
14713 env->strict_alignment = false;
cbd35700 14714
2c78ee89 14715 env->allow_ptr_leaks = bpf_allow_ptr_leaks();
01f810ac 14716 env->allow_uninit_stack = bpf_allow_uninit_stack();
41c48f3a 14717 env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
2c78ee89
AS
14718 env->bypass_spec_v1 = bpf_bypass_spec_v1();
14719 env->bypass_spec_v4 = bpf_bypass_spec_v4();
14720 env->bpf_capable = bpf_capable();
e2ae4ca2 14721
10d274e8
AS
14722 if (is_priv)
14723 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
14724
dc2a4ebc 14725 env->explored_states = kvcalloc(state_htab_size(env),
58e2af8b 14726 sizeof(struct bpf_verifier_state_list *),
f1bca824
AS
14727 GFP_USER);
14728 ret = -ENOMEM;
14729 if (!env->explored_states)
14730 goto skip_full_check;
14731
e6ac2450
MKL
14732 ret = add_subprog_and_kfunc(env);
14733 if (ret < 0)
14734 goto skip_full_check;
14735
d9762e84 14736 ret = check_subprogs(env);
475fb78f
AS
14737 if (ret < 0)
14738 goto skip_full_check;
14739
c454a46b 14740 ret = check_btf_info(env, attr, uattr);
838e9690
YS
14741 if (ret < 0)
14742 goto skip_full_check;
14743
be8704ff
AS
14744 ret = check_attach_btf_id(env);
14745 if (ret)
14746 goto skip_full_check;
14747
4976b718
HL
14748 ret = resolve_pseudo_ldimm64(env);
14749 if (ret < 0)
14750 goto skip_full_check;
14751
ceb11679
YZ
14752 if (bpf_prog_is_dev_bound(env->prog->aux)) {
14753 ret = bpf_prog_offload_verifier_prep(env->prog);
14754 if (ret)
14755 goto skip_full_check;
14756 }
14757
d9762e84
MKL
14758 ret = check_cfg(env);
14759 if (ret < 0)
14760 goto skip_full_check;
14761
51c39bb1
AS
14762 ret = do_check_subprogs(env);
14763 ret = ret ?: do_check_main(env);
cbd35700 14764
c941ce9c
QM
14765 if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
14766 ret = bpf_prog_offload_finalize(env);
14767
0246e64d 14768skip_full_check:
51c39bb1 14769 kvfree(env->explored_states);
0246e64d 14770
c131187d 14771 if (ret == 0)
9b38c405 14772 ret = check_max_stack_depth(env);
c131187d 14773
9b38c405 14774 /* instruction rewrites happen after this point */
e2ae4ca2
JK
14775 if (is_priv) {
14776 if (ret == 0)
14777 opt_hard_wire_dead_code_branches(env);
52875a04
JK
14778 if (ret == 0)
14779 ret = opt_remove_dead_code(env);
a1b14abc
JK
14780 if (ret == 0)
14781 ret = opt_remove_nops(env);
52875a04
JK
14782 } else {
14783 if (ret == 0)
14784 sanitize_dead_code(env);
e2ae4ca2
JK
14785 }
14786
9bac3d6d
AS
14787 if (ret == 0)
14788 /* program is valid, convert *(u32*)(ctx + off) accesses */
14789 ret = convert_ctx_accesses(env);
14790
e245c5c6 14791 if (ret == 0)
e6ac5933 14792 ret = do_misc_fixups(env);
e245c5c6 14793
a4b1d3c1
JW
14794 /* do 32-bit optimization after insn patching has done so those patched
14795 * insns could be handled correctly.
14796 */
d6c2308c
JW
14797 if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
14798 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
14799 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
14800 : false;
a4b1d3c1
JW
14801 }
14802
1ea47e01
AS
14803 if (ret == 0)
14804 ret = fixup_call_args(env);
14805
06ee7115
AS
14806 env->verification_time = ktime_get_ns() - start_time;
14807 print_verification_stats(env);
aba64c7d 14808 env->prog->aux->verified_insns = env->insn_processed;
06ee7115 14809
a2a7d570 14810 if (log->level && bpf_verifier_log_full(log))
cbd35700 14811 ret = -ENOSPC;
a2a7d570 14812 if (log->level && !log->ubuf) {
cbd35700 14813 ret = -EFAULT;
a2a7d570 14814 goto err_release_maps;
cbd35700
AS
14815 }
14816
541c3bad
AN
14817 if (ret)
14818 goto err_release_maps;
14819
14820 if (env->used_map_cnt) {
0246e64d 14821 /* if program passed verifier, update used_maps in bpf_prog_info */
9bac3d6d
AS
14822 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
14823 sizeof(env->used_maps[0]),
14824 GFP_KERNEL);
0246e64d 14825
9bac3d6d 14826 if (!env->prog->aux->used_maps) {
0246e64d 14827 ret = -ENOMEM;
a2a7d570 14828 goto err_release_maps;
0246e64d
AS
14829 }
14830
9bac3d6d 14831 memcpy(env->prog->aux->used_maps, env->used_maps,
0246e64d 14832 sizeof(env->used_maps[0]) * env->used_map_cnt);
9bac3d6d 14833 env->prog->aux->used_map_cnt = env->used_map_cnt;
541c3bad
AN
14834 }
14835 if (env->used_btf_cnt) {
14836 /* if program passed verifier, update used_btfs in bpf_prog_aux */
14837 env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
14838 sizeof(env->used_btfs[0]),
14839 GFP_KERNEL);
14840 if (!env->prog->aux->used_btfs) {
14841 ret = -ENOMEM;
14842 goto err_release_maps;
14843 }
0246e64d 14844
541c3bad
AN
14845 memcpy(env->prog->aux->used_btfs, env->used_btfs,
14846 sizeof(env->used_btfs[0]) * env->used_btf_cnt);
14847 env->prog->aux->used_btf_cnt = env->used_btf_cnt;
14848 }
14849 if (env->used_map_cnt || env->used_btf_cnt) {
0246e64d
AS
14850 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
14851 * bpf_ld_imm64 instructions
14852 */
14853 convert_pseudo_ld_imm64(env);
14854 }
cbd35700 14855
541c3bad 14856 adjust_btf_func(env);
ba64e7d8 14857
a2a7d570 14858err_release_maps:
9bac3d6d 14859 if (!env->prog->aux->used_maps)
0246e64d 14860 /* if we didn't copy map pointers into bpf_prog_info, release
ab7f5bf0 14861 * them now. Otherwise free_used_maps() will release them.
0246e64d
AS
14862 */
14863 release_maps(env);
541c3bad
AN
14864 if (!env->prog->aux->used_btfs)
14865 release_btfs(env);
03f87c0b
THJ
14866
14867 /* extension progs temporarily inherit the attach_type of their targets
14868 for verification purposes, so set it back to zero before returning
14869 */
14870 if (env->prog->type == BPF_PROG_TYPE_EXT)
14871 env->prog->expected_attach_type = 0;
14872
9bac3d6d 14873 *prog = env->prog;
3df126f3 14874err_unlock:
45a73c17
AS
14875 if (!is_priv)
14876 mutex_unlock(&bpf_verifier_lock);
3df126f3
JK
14877 vfree(env->insn_aux_data);
14878err_free_env:
14879 kfree(env);
51580e79
AS
14880 return ret;
14881}