selftests/bpf: Fix sk lookup usage in test_sock_addr
[linux-2.6-block.git] / kernel / bpf / verifier.c
CommitLineData
51580e79 1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
969bf05e 2 * Copyright (c) 2016 Facebook
fd978bf7 3 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
51580e79
AS
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
838e9690 14#include <uapi/linux/btf.h>
51580e79
AS
15#include <linux/kernel.h>
16#include <linux/types.h>
17#include <linux/slab.h>
18#include <linux/bpf.h>
838e9690 19#include <linux/btf.h>
58e2af8b 20#include <linux/bpf_verifier.h>
51580e79
AS
21#include <linux/filter.h>
22#include <net/netlink.h>
23#include <linux/file.h>
24#include <linux/vmalloc.h>
ebb676da 25#include <linux/stringify.h>
cc8b0b92
AS
26#include <linux/bsearch.h>
27#include <linux/sort.h>
c195651e 28#include <linux/perf_event.h>
51580e79 29
f4ac7e0b
JK
30#include "disasm.h"
31
00176a34
JK
32static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
33#define BPF_PROG_TYPE(_id, _name) \
34 [_id] = & _name ## _verifier_ops,
35#define BPF_MAP_TYPE(_id, _ops)
36#include <linux/bpf_types.h>
37#undef BPF_PROG_TYPE
38#undef BPF_MAP_TYPE
39};
40
51580e79
AS
41/* bpf_check() is a static code analyzer that walks eBPF program
42 * instruction by instruction and updates register/stack state.
43 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
44 *
45 * The first pass is depth-first-search to check that the program is a DAG.
46 * It rejects the following programs:
47 * - larger than BPF_MAXINSNS insns
48 * - if loop is present (detected via back-edge)
49 * - unreachable insns exist (shouldn't be a forest. program = one function)
50 * - out of bounds or malformed jumps
51 * The second pass is all possible path descent from the 1st insn.
52 * Since it's analyzing all pathes through the program, the length of the
eba38a96 53 * analysis is limited to 64k insn, which may be hit even if total number of
51580e79
AS
54 * insn is less then 4K, but there are too many branches that change stack/regs.
55 * Number of 'branches to be analyzed' is limited to 1k
56 *
57 * On entry to each instruction, each register has a type, and the instruction
58 * changes the types of the registers depending on instruction semantics.
59 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
60 * copied to R1.
61 *
62 * All registers are 64-bit.
63 * R0 - return register
64 * R1-R5 argument passing registers
65 * R6-R9 callee saved registers
66 * R10 - frame pointer read-only
67 *
68 * At the start of BPF program the register R1 contains a pointer to bpf_context
69 * and has type PTR_TO_CTX.
70 *
71 * Verifier tracks arithmetic operations on pointers in case:
72 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
73 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
74 * 1st insn copies R10 (which has FRAME_PTR) type into R1
75 * and 2nd arithmetic instruction is pattern matched to recognize
76 * that it wants to construct a pointer to some element within stack.
77 * So after 2nd insn, the register R1 has type PTR_TO_STACK
78 * (and -20 constant is saved for further stack bounds checking).
79 * Meaning that this reg is a pointer to stack plus known immediate constant.
80 *
f1174f77 81 * Most of the time the registers have SCALAR_VALUE type, which
51580e79 82 * means the register has some value, but it's not a valid pointer.
f1174f77 83 * (like pointer plus pointer becomes SCALAR_VALUE type)
51580e79
AS
84 *
85 * When verifier sees load or store instructions the type of base register
c64b7983
JS
86 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
87 * four pointer types recognized by check_mem_access() function.
51580e79
AS
88 *
89 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
90 * and the range of [ptr, ptr + map's value_size) is accessible.
91 *
92 * registers used to pass values to function calls are checked against
93 * function argument constraints.
94 *
95 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
96 * It means that the register type passed to this function must be
97 * PTR_TO_STACK and it will be used inside the function as
98 * 'pointer to map element key'
99 *
100 * For example the argument constraints for bpf_map_lookup_elem():
101 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
102 * .arg1_type = ARG_CONST_MAP_PTR,
103 * .arg2_type = ARG_PTR_TO_MAP_KEY,
104 *
105 * ret_type says that this function returns 'pointer to map elem value or null'
106 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
107 * 2nd argument should be a pointer to stack, which will be used inside
108 * the helper function as a pointer to map element key.
109 *
110 * On the kernel side the helper function looks like:
111 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
112 * {
113 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
114 * void *key = (void *) (unsigned long) r2;
115 * void *value;
116 *
117 * here kernel can access 'key' and 'map' pointers safely, knowing that
118 * [key, key + map->key_size) bytes are valid and were initialized on
119 * the stack of eBPF program.
120 * }
121 *
122 * Corresponding eBPF program may look like:
123 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
124 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
125 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
126 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
127 * here verifier looks at prototype of map_lookup_elem() and sees:
128 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
129 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
130 *
131 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
132 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
133 * and were initialized prior to this call.
134 * If it's ok, then verifier allows this BPF_CALL insn and looks at
135 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
136 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
137 * returns ether pointer to map value or NULL.
138 *
139 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
140 * insn, the register holding that pointer in the true branch changes state to
141 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
142 * branch. See check_cond_jmp_op().
143 *
144 * After the call R0 is set to return type of the function and registers R1-R5
145 * are set to NOT_INIT to indicate that they are no longer readable.
fd978bf7
JS
146 *
147 * The following reference types represent a potential reference to a kernel
148 * resource which, after first being allocated, must be checked and freed by
149 * the BPF program:
150 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
151 *
152 * When the verifier sees a helper call return a reference type, it allocates a
153 * pointer id for the reference and stores it in the current function state.
154 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
155 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
156 * passes through a NULL-check conditional. For the branch wherein the state is
157 * changed to CONST_IMM, the verifier releases the reference.
6acc9b43
JS
158 *
159 * For each helper function that allocates a reference, such as
160 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
161 * bpf_sk_release(). When a reference type passes into the release function,
162 * the verifier also releases the reference. If any unchecked or unreleased
163 * reference remains at the end of the program, the verifier rejects it.
51580e79
AS
164 */
165
17a52670 166/* verifier_state + insn_idx are pushed to stack when branch is encountered */
58e2af8b 167struct bpf_verifier_stack_elem {
17a52670
AS
168 /* verifer state is 'st'
169 * before processing instruction 'insn_idx'
170 * and after processing instruction 'prev_insn_idx'
171 */
58e2af8b 172 struct bpf_verifier_state st;
17a52670
AS
173 int insn_idx;
174 int prev_insn_idx;
58e2af8b 175 struct bpf_verifier_stack_elem *next;
cbd35700
AS
176};
177
8e17c1b1 178#define BPF_COMPLEXITY_LIMIT_INSNS 131072
07016151 179#define BPF_COMPLEXITY_LIMIT_STACK 1024
ceefbc96 180#define BPF_COMPLEXITY_LIMIT_STATES 64
07016151 181
c93552c4
DB
182#define BPF_MAP_PTR_UNPRIV 1UL
183#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
184 POISON_POINTER_DELTA))
185#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
186
187static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
188{
189 return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
190}
191
192static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
193{
194 return aux->map_state & BPF_MAP_PTR_UNPRIV;
195}
196
197static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
198 const struct bpf_map *map, bool unpriv)
199{
200 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
201 unpriv |= bpf_map_ptr_unpriv(aux);
202 aux->map_state = (unsigned long)map |
203 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
204}
fad73a1a 205
33ff9823
DB
206struct bpf_call_arg_meta {
207 struct bpf_map *map_ptr;
435faee1 208 bool raw_mode;
36bbef52 209 bool pkt_access;
435faee1
DB
210 int regno;
211 int access_size;
849fa506
YS
212 s64 msize_smax_value;
213 u64 msize_umax_value;
fd978bf7 214 int ptr_id;
33ff9823
DB
215};
216
cbd35700
AS
217static DEFINE_MUTEX(bpf_verifier_lock);
218
77d2e05a
MKL
219void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
220 va_list args)
cbd35700 221{
a2a7d570 222 unsigned int n;
cbd35700 223
a2a7d570 224 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
a2a7d570
JK
225
226 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
227 "verifier log line truncated - local buffer too short\n");
228
229 n = min(log->len_total - log->len_used - 1, n);
230 log->kbuf[n] = '\0';
231
232 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
233 log->len_used += n;
234 else
235 log->ubuf = NULL;
cbd35700 236}
abe08840
JO
237
238/* log_level controls verbosity level of eBPF verifier.
239 * bpf_verifier_log_write() is used to dump the verification trace to the log,
240 * so the user can figure out what's wrong with the program
430e68d1 241 */
abe08840
JO
242__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
243 const char *fmt, ...)
244{
245 va_list args;
246
77d2e05a
MKL
247 if (!bpf_verifier_log_needed(&env->log))
248 return;
249
abe08840 250 va_start(args, fmt);
77d2e05a 251 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
252 va_end(args);
253}
254EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
255
256__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
257{
77d2e05a 258 struct bpf_verifier_env *env = private_data;
abe08840
JO
259 va_list args;
260
77d2e05a
MKL
261 if (!bpf_verifier_log_needed(&env->log))
262 return;
263
abe08840 264 va_start(args, fmt);
77d2e05a 265 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
266 va_end(args);
267}
cbd35700 268
de8f3a83
DB
269static bool type_is_pkt_pointer(enum bpf_reg_type type)
270{
271 return type == PTR_TO_PACKET ||
272 type == PTR_TO_PACKET_META;
273}
274
840b9615
JS
275static bool reg_type_may_be_null(enum bpf_reg_type type)
276{
fd978bf7
JS
277 return type == PTR_TO_MAP_VALUE_OR_NULL ||
278 type == PTR_TO_SOCKET_OR_NULL;
279}
280
281static bool type_is_refcounted(enum bpf_reg_type type)
282{
283 return type == PTR_TO_SOCKET;
284}
285
286static bool type_is_refcounted_or_null(enum bpf_reg_type type)
287{
288 return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
289}
290
291static bool reg_is_refcounted(const struct bpf_reg_state *reg)
292{
293 return type_is_refcounted(reg->type);
294}
295
296static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
297{
298 return type_is_refcounted_or_null(reg->type);
299}
300
301static bool arg_type_is_refcounted(enum bpf_arg_type type)
302{
303 return type == ARG_PTR_TO_SOCKET;
304}
305
306/* Determine whether the function releases some resources allocated by another
307 * function call. The first reference type argument will be assumed to be
308 * released by release_reference().
309 */
310static bool is_release_function(enum bpf_func_id func_id)
311{
6acc9b43 312 return func_id == BPF_FUNC_sk_release;
840b9615
JS
313}
314
17a52670
AS
315/* string representation of 'enum bpf_reg_type' */
316static const char * const reg_type_str[] = {
317 [NOT_INIT] = "?",
f1174f77 318 [SCALAR_VALUE] = "inv",
17a52670
AS
319 [PTR_TO_CTX] = "ctx",
320 [CONST_PTR_TO_MAP] = "map_ptr",
321 [PTR_TO_MAP_VALUE] = "map_value",
322 [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
17a52670 323 [PTR_TO_STACK] = "fp",
969bf05e 324 [PTR_TO_PACKET] = "pkt",
de8f3a83 325 [PTR_TO_PACKET_META] = "pkt_meta",
969bf05e 326 [PTR_TO_PACKET_END] = "pkt_end",
d58e468b 327 [PTR_TO_FLOW_KEYS] = "flow_keys",
c64b7983
JS
328 [PTR_TO_SOCKET] = "sock",
329 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
17a52670
AS
330};
331
8efea21d
EC
332static char slot_type_char[] = {
333 [STACK_INVALID] = '?',
334 [STACK_SPILL] = 'r',
335 [STACK_MISC] = 'm',
336 [STACK_ZERO] = '0',
337};
338
4e92024a
AS
339static void print_liveness(struct bpf_verifier_env *env,
340 enum bpf_reg_liveness live)
341{
342 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN))
343 verbose(env, "_");
344 if (live & REG_LIVE_READ)
345 verbose(env, "r");
346 if (live & REG_LIVE_WRITTEN)
347 verbose(env, "w");
348}
349
f4d7e40a
AS
350static struct bpf_func_state *func(struct bpf_verifier_env *env,
351 const struct bpf_reg_state *reg)
352{
353 struct bpf_verifier_state *cur = env->cur_state;
354
355 return cur->frame[reg->frameno];
356}
357
61bd5218 358static void print_verifier_state(struct bpf_verifier_env *env,
f4d7e40a 359 const struct bpf_func_state *state)
17a52670 360{
f4d7e40a 361 const struct bpf_reg_state *reg;
17a52670
AS
362 enum bpf_reg_type t;
363 int i;
364
f4d7e40a
AS
365 if (state->frameno)
366 verbose(env, " frame%d:", state->frameno);
17a52670 367 for (i = 0; i < MAX_BPF_REG; i++) {
1a0dc1ac
AS
368 reg = &state->regs[i];
369 t = reg->type;
17a52670
AS
370 if (t == NOT_INIT)
371 continue;
4e92024a
AS
372 verbose(env, " R%d", i);
373 print_liveness(env, reg->live);
374 verbose(env, "=%s", reg_type_str[t]);
f1174f77
EC
375 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
376 tnum_is_const(reg->var_off)) {
377 /* reg->off should be 0 for SCALAR_VALUE */
61bd5218 378 verbose(env, "%lld", reg->var_off.value + reg->off);
f4d7e40a
AS
379 if (t == PTR_TO_STACK)
380 verbose(env, ",call_%d", func(env, reg)->callsite);
f1174f77 381 } else {
61bd5218 382 verbose(env, "(id=%d", reg->id);
f1174f77 383 if (t != SCALAR_VALUE)
61bd5218 384 verbose(env, ",off=%d", reg->off);
de8f3a83 385 if (type_is_pkt_pointer(t))
61bd5218 386 verbose(env, ",r=%d", reg->range);
f1174f77
EC
387 else if (t == CONST_PTR_TO_MAP ||
388 t == PTR_TO_MAP_VALUE ||
389 t == PTR_TO_MAP_VALUE_OR_NULL)
61bd5218 390 verbose(env, ",ks=%d,vs=%d",
f1174f77
EC
391 reg->map_ptr->key_size,
392 reg->map_ptr->value_size);
7d1238f2
EC
393 if (tnum_is_const(reg->var_off)) {
394 /* Typically an immediate SCALAR_VALUE, but
395 * could be a pointer whose offset is too big
396 * for reg->off
397 */
61bd5218 398 verbose(env, ",imm=%llx", reg->var_off.value);
7d1238f2
EC
399 } else {
400 if (reg->smin_value != reg->umin_value &&
401 reg->smin_value != S64_MIN)
61bd5218 402 verbose(env, ",smin_value=%lld",
7d1238f2
EC
403 (long long)reg->smin_value);
404 if (reg->smax_value != reg->umax_value &&
405 reg->smax_value != S64_MAX)
61bd5218 406 verbose(env, ",smax_value=%lld",
7d1238f2
EC
407 (long long)reg->smax_value);
408 if (reg->umin_value != 0)
61bd5218 409 verbose(env, ",umin_value=%llu",
7d1238f2
EC
410 (unsigned long long)reg->umin_value);
411 if (reg->umax_value != U64_MAX)
61bd5218 412 verbose(env, ",umax_value=%llu",
7d1238f2
EC
413 (unsigned long long)reg->umax_value);
414 if (!tnum_is_unknown(reg->var_off)) {
415 char tn_buf[48];
f1174f77 416
7d1238f2 417 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 418 verbose(env, ",var_off=%s", tn_buf);
7d1238f2 419 }
f1174f77 420 }
61bd5218 421 verbose(env, ")");
f1174f77 422 }
17a52670 423 }
638f5b90 424 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
8efea21d
EC
425 char types_buf[BPF_REG_SIZE + 1];
426 bool valid = false;
427 int j;
428
429 for (j = 0; j < BPF_REG_SIZE; j++) {
430 if (state->stack[i].slot_type[j] != STACK_INVALID)
431 valid = true;
432 types_buf[j] = slot_type_char[
433 state->stack[i].slot_type[j]];
434 }
435 types_buf[BPF_REG_SIZE] = 0;
436 if (!valid)
437 continue;
438 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
439 print_liveness(env, state->stack[i].spilled_ptr.live);
440 if (state->stack[i].slot_type[0] == STACK_SPILL)
4e92024a 441 verbose(env, "=%s",
638f5b90 442 reg_type_str[state->stack[i].spilled_ptr.type]);
8efea21d
EC
443 else
444 verbose(env, "=%s", types_buf);
17a52670 445 }
fd978bf7
JS
446 if (state->acquired_refs && state->refs[0].id) {
447 verbose(env, " refs=%d", state->refs[0].id);
448 for (i = 1; i < state->acquired_refs; i++)
449 if (state->refs[i].id)
450 verbose(env, ",%d", state->refs[i].id);
451 }
61bd5218 452 verbose(env, "\n");
17a52670
AS
453}
454
84dbf350
JS
455#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \
456static int copy_##NAME##_state(struct bpf_func_state *dst, \
457 const struct bpf_func_state *src) \
458{ \
459 if (!src->FIELD) \
460 return 0; \
461 if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \
462 /* internal bug, make state invalid to reject the program */ \
463 memset(dst, 0, sizeof(*dst)); \
464 return -EFAULT; \
465 } \
466 memcpy(dst->FIELD, src->FIELD, \
467 sizeof(*src->FIELD) * (src->COUNT / SIZE)); \
468 return 0; \
638f5b90 469}
fd978bf7
JS
470/* copy_reference_state() */
471COPY_STATE_FN(reference, acquired_refs, refs, 1)
84dbf350
JS
472/* copy_stack_state() */
473COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
474#undef COPY_STATE_FN
475
476#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \
477static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
478 bool copy_old) \
479{ \
480 u32 old_size = state->COUNT; \
481 struct bpf_##NAME##_state *new_##FIELD; \
482 int slot = size / SIZE; \
483 \
484 if (size <= old_size || !size) { \
485 if (copy_old) \
486 return 0; \
487 state->COUNT = slot * SIZE; \
488 if (!size && old_size) { \
489 kfree(state->FIELD); \
490 state->FIELD = NULL; \
491 } \
492 return 0; \
493 } \
494 new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
495 GFP_KERNEL); \
496 if (!new_##FIELD) \
497 return -ENOMEM; \
498 if (copy_old) { \
499 if (state->FIELD) \
500 memcpy(new_##FIELD, state->FIELD, \
501 sizeof(*new_##FIELD) * (old_size / SIZE)); \
502 memset(new_##FIELD + old_size / SIZE, 0, \
503 sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
504 } \
505 state->COUNT = slot * SIZE; \
506 kfree(state->FIELD); \
507 state->FIELD = new_##FIELD; \
508 return 0; \
509}
fd978bf7
JS
510/* realloc_reference_state() */
511REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
84dbf350
JS
512/* realloc_stack_state() */
513REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
514#undef REALLOC_STATE_FN
638f5b90
AS
515
516/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
517 * make it consume minimal amount of memory. check_stack_write() access from
f4d7e40a 518 * the program calls into realloc_func_state() to grow the stack size.
84dbf350
JS
519 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
520 * which realloc_stack_state() copies over. It points to previous
521 * bpf_verifier_state which is never reallocated.
638f5b90 522 */
fd978bf7
JS
523static int realloc_func_state(struct bpf_func_state *state, int stack_size,
524 int refs_size, bool copy_old)
638f5b90 525{
fd978bf7
JS
526 int err = realloc_reference_state(state, refs_size, copy_old);
527 if (err)
528 return err;
529 return realloc_stack_state(state, stack_size, copy_old);
530}
531
532/* Acquire a pointer id from the env and update the state->refs to include
533 * this new pointer reference.
534 * On success, returns a valid pointer id to associate with the register
535 * On failure, returns a negative errno.
638f5b90 536 */
fd978bf7 537static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
638f5b90 538{
fd978bf7
JS
539 struct bpf_func_state *state = cur_func(env);
540 int new_ofs = state->acquired_refs;
541 int id, err;
542
543 err = realloc_reference_state(state, state->acquired_refs + 1, true);
544 if (err)
545 return err;
546 id = ++env->id_gen;
547 state->refs[new_ofs].id = id;
548 state->refs[new_ofs].insn_idx = insn_idx;
638f5b90 549
fd978bf7
JS
550 return id;
551}
552
553/* release function corresponding to acquire_reference_state(). Idempotent. */
554static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
555{
556 int i, last_idx;
557
558 if (!ptr_id)
559 return -EFAULT;
560
561 last_idx = state->acquired_refs - 1;
562 for (i = 0; i < state->acquired_refs; i++) {
563 if (state->refs[i].id == ptr_id) {
564 if (last_idx && i != last_idx)
565 memcpy(&state->refs[i], &state->refs[last_idx],
566 sizeof(*state->refs));
567 memset(&state->refs[last_idx], 0, sizeof(*state->refs));
568 state->acquired_refs--;
638f5b90 569 return 0;
638f5b90 570 }
638f5b90 571 }
fd978bf7
JS
572 return -EFAULT;
573}
574
575/* variation on the above for cases where we expect that there must be an
576 * outstanding reference for the specified ptr_id.
577 */
578static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
579{
580 struct bpf_func_state *state = cur_func(env);
581 int err;
582
583 err = __release_reference_state(state, ptr_id);
584 if (WARN_ON_ONCE(err != 0))
585 verbose(env, "verifier internal error: can't release reference\n");
586 return err;
587}
588
589static int transfer_reference_state(struct bpf_func_state *dst,
590 struct bpf_func_state *src)
591{
592 int err = realloc_reference_state(dst, src->acquired_refs, false);
593 if (err)
594 return err;
595 err = copy_reference_state(dst, src);
596 if (err)
597 return err;
638f5b90
AS
598 return 0;
599}
600
f4d7e40a
AS
601static void free_func_state(struct bpf_func_state *state)
602{
5896351e
AS
603 if (!state)
604 return;
fd978bf7 605 kfree(state->refs);
f4d7e40a
AS
606 kfree(state->stack);
607 kfree(state);
608}
609
1969db47
AS
610static void free_verifier_state(struct bpf_verifier_state *state,
611 bool free_self)
638f5b90 612{
f4d7e40a
AS
613 int i;
614
615 for (i = 0; i <= state->curframe; i++) {
616 free_func_state(state->frame[i]);
617 state->frame[i] = NULL;
618 }
1969db47
AS
619 if (free_self)
620 kfree(state);
638f5b90
AS
621}
622
623/* copy verifier state from src to dst growing dst stack space
624 * when necessary to accommodate larger src stack
625 */
f4d7e40a
AS
626static int copy_func_state(struct bpf_func_state *dst,
627 const struct bpf_func_state *src)
638f5b90
AS
628{
629 int err;
630
fd978bf7
JS
631 err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
632 false);
633 if (err)
634 return err;
635 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
636 err = copy_reference_state(dst, src);
638f5b90
AS
637 if (err)
638 return err;
638f5b90
AS
639 return copy_stack_state(dst, src);
640}
641
f4d7e40a
AS
642static int copy_verifier_state(struct bpf_verifier_state *dst_state,
643 const struct bpf_verifier_state *src)
644{
645 struct bpf_func_state *dst;
646 int i, err;
647
648 /* if dst has more stack frames then src frame, free them */
649 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
650 free_func_state(dst_state->frame[i]);
651 dst_state->frame[i] = NULL;
652 }
653 dst_state->curframe = src->curframe;
f4d7e40a
AS
654 for (i = 0; i <= src->curframe; i++) {
655 dst = dst_state->frame[i];
656 if (!dst) {
657 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
658 if (!dst)
659 return -ENOMEM;
660 dst_state->frame[i] = dst;
661 }
662 err = copy_func_state(dst, src->frame[i]);
663 if (err)
664 return err;
665 }
666 return 0;
667}
668
638f5b90
AS
669static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
670 int *insn_idx)
671{
672 struct bpf_verifier_state *cur = env->cur_state;
673 struct bpf_verifier_stack_elem *elem, *head = env->head;
674 int err;
17a52670
AS
675
676 if (env->head == NULL)
638f5b90 677 return -ENOENT;
17a52670 678
638f5b90
AS
679 if (cur) {
680 err = copy_verifier_state(cur, &head->st);
681 if (err)
682 return err;
683 }
684 if (insn_idx)
685 *insn_idx = head->insn_idx;
17a52670 686 if (prev_insn_idx)
638f5b90
AS
687 *prev_insn_idx = head->prev_insn_idx;
688 elem = head->next;
1969db47 689 free_verifier_state(&head->st, false);
638f5b90 690 kfree(head);
17a52670
AS
691 env->head = elem;
692 env->stack_size--;
638f5b90 693 return 0;
17a52670
AS
694}
695
58e2af8b
JK
696static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
697 int insn_idx, int prev_insn_idx)
17a52670 698{
638f5b90 699 struct bpf_verifier_state *cur = env->cur_state;
58e2af8b 700 struct bpf_verifier_stack_elem *elem;
638f5b90 701 int err;
17a52670 702
638f5b90 703 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
17a52670
AS
704 if (!elem)
705 goto err;
706
17a52670
AS
707 elem->insn_idx = insn_idx;
708 elem->prev_insn_idx = prev_insn_idx;
709 elem->next = env->head;
710 env->head = elem;
711 env->stack_size++;
1969db47
AS
712 err = copy_verifier_state(&elem->st, cur);
713 if (err)
714 goto err;
07016151 715 if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
61bd5218 716 verbose(env, "BPF program is too complex\n");
17a52670
AS
717 goto err;
718 }
719 return &elem->st;
720err:
5896351e
AS
721 free_verifier_state(env->cur_state, true);
722 env->cur_state = NULL;
17a52670 723 /* pop all elements and return */
638f5b90 724 while (!pop_stack(env, NULL, NULL));
17a52670
AS
725 return NULL;
726}
727
728#define CALLER_SAVED_REGS 6
729static const int caller_saved[CALLER_SAVED_REGS] = {
730 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
731};
732
f1174f77
EC
733static void __mark_reg_not_init(struct bpf_reg_state *reg);
734
b03c9f9f
EC
735/* Mark the unknown part of a register (variable offset or scalar value) as
736 * known to have the value @imm.
737 */
738static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
739{
a9c676bc
AS
740 /* Clear id, off, and union(map_ptr, range) */
741 memset(((u8 *)reg) + sizeof(reg->type), 0,
742 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
b03c9f9f
EC
743 reg->var_off = tnum_const(imm);
744 reg->smin_value = (s64)imm;
745 reg->smax_value = (s64)imm;
746 reg->umin_value = imm;
747 reg->umax_value = imm;
748}
749
f1174f77
EC
750/* Mark the 'variable offset' part of a register as zero. This should be
751 * used only on registers holding a pointer type.
752 */
753static void __mark_reg_known_zero(struct bpf_reg_state *reg)
a9789ef9 754{
b03c9f9f 755 __mark_reg_known(reg, 0);
f1174f77 756}
a9789ef9 757
cc2b14d5
AS
758static void __mark_reg_const_zero(struct bpf_reg_state *reg)
759{
760 __mark_reg_known(reg, 0);
cc2b14d5
AS
761 reg->type = SCALAR_VALUE;
762}
763
61bd5218
JK
764static void mark_reg_known_zero(struct bpf_verifier_env *env,
765 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
766{
767 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 768 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
f1174f77
EC
769 /* Something bad happened, let's kill all regs */
770 for (regno = 0; regno < MAX_BPF_REG; regno++)
771 __mark_reg_not_init(regs + regno);
772 return;
773 }
774 __mark_reg_known_zero(regs + regno);
775}
776
de8f3a83
DB
777static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
778{
779 return type_is_pkt_pointer(reg->type);
780}
781
782static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
783{
784 return reg_is_pkt_pointer(reg) ||
785 reg->type == PTR_TO_PACKET_END;
786}
787
788/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
789static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
790 enum bpf_reg_type which)
791{
792 /* The register can already have a range from prior markings.
793 * This is fine as long as it hasn't been advanced from its
794 * origin.
795 */
796 return reg->type == which &&
797 reg->id == 0 &&
798 reg->off == 0 &&
799 tnum_equals_const(reg->var_off, 0);
800}
801
b03c9f9f
EC
802/* Attempts to improve min/max values based on var_off information */
803static void __update_reg_bounds(struct bpf_reg_state *reg)
804{
805 /* min signed is max(sign bit) | min(other bits) */
806 reg->smin_value = max_t(s64, reg->smin_value,
807 reg->var_off.value | (reg->var_off.mask & S64_MIN));
808 /* max signed is min(sign bit) | max(other bits) */
809 reg->smax_value = min_t(s64, reg->smax_value,
810 reg->var_off.value | (reg->var_off.mask & S64_MAX));
811 reg->umin_value = max(reg->umin_value, reg->var_off.value);
812 reg->umax_value = min(reg->umax_value,
813 reg->var_off.value | reg->var_off.mask);
814}
815
816/* Uses signed min/max values to inform unsigned, and vice-versa */
817static void __reg_deduce_bounds(struct bpf_reg_state *reg)
818{
819 /* Learn sign from signed bounds.
820 * If we cannot cross the sign boundary, then signed and unsigned bounds
821 * are the same, so combine. This works even in the negative case, e.g.
822 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
823 */
824 if (reg->smin_value >= 0 || reg->smax_value < 0) {
825 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
826 reg->umin_value);
827 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
828 reg->umax_value);
829 return;
830 }
831 /* Learn sign from unsigned bounds. Signed bounds cross the sign
832 * boundary, so we must be careful.
833 */
834 if ((s64)reg->umax_value >= 0) {
835 /* Positive. We can't learn anything from the smin, but smax
836 * is positive, hence safe.
837 */
838 reg->smin_value = reg->umin_value;
839 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
840 reg->umax_value);
841 } else if ((s64)reg->umin_value < 0) {
842 /* Negative. We can't learn anything from the smax, but smin
843 * is negative, hence safe.
844 */
845 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
846 reg->umin_value);
847 reg->smax_value = reg->umax_value;
848 }
849}
850
851/* Attempts to improve var_off based on unsigned min/max information */
852static void __reg_bound_offset(struct bpf_reg_state *reg)
853{
854 reg->var_off = tnum_intersect(reg->var_off,
855 tnum_range(reg->umin_value,
856 reg->umax_value));
857}
858
859/* Reset the min/max bounds of a register */
860static void __mark_reg_unbounded(struct bpf_reg_state *reg)
861{
862 reg->smin_value = S64_MIN;
863 reg->smax_value = S64_MAX;
864 reg->umin_value = 0;
865 reg->umax_value = U64_MAX;
866}
867
f1174f77
EC
868/* Mark a register as having a completely unknown (scalar) value. */
869static void __mark_reg_unknown(struct bpf_reg_state *reg)
870{
a9c676bc
AS
871 /*
872 * Clear type, id, off, and union(map_ptr, range) and
873 * padding between 'type' and union
874 */
875 memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
f1174f77 876 reg->type = SCALAR_VALUE;
f1174f77 877 reg->var_off = tnum_unknown;
f4d7e40a 878 reg->frameno = 0;
b03c9f9f 879 __mark_reg_unbounded(reg);
f1174f77
EC
880}
881
61bd5218
JK
882static void mark_reg_unknown(struct bpf_verifier_env *env,
883 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
884{
885 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 886 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
19ceb417
AS
887 /* Something bad happened, let's kill all regs except FP */
888 for (regno = 0; regno < BPF_REG_FP; regno++)
f1174f77
EC
889 __mark_reg_not_init(regs + regno);
890 return;
891 }
892 __mark_reg_unknown(regs + regno);
893}
894
895static void __mark_reg_not_init(struct bpf_reg_state *reg)
896{
897 __mark_reg_unknown(reg);
898 reg->type = NOT_INIT;
899}
900
61bd5218
JK
901static void mark_reg_not_init(struct bpf_verifier_env *env,
902 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
903{
904 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 905 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
19ceb417
AS
906 /* Something bad happened, let's kill all regs except FP */
907 for (regno = 0; regno < BPF_REG_FP; regno++)
f1174f77
EC
908 __mark_reg_not_init(regs + regno);
909 return;
910 }
911 __mark_reg_not_init(regs + regno);
a9789ef9
DB
912}
913
61bd5218 914static void init_reg_state(struct bpf_verifier_env *env,
f4d7e40a 915 struct bpf_func_state *state)
17a52670 916{
f4d7e40a 917 struct bpf_reg_state *regs = state->regs;
17a52670
AS
918 int i;
919
dc503a8a 920 for (i = 0; i < MAX_BPF_REG; i++) {
61bd5218 921 mark_reg_not_init(env, regs, i);
dc503a8a 922 regs[i].live = REG_LIVE_NONE;
679c782d 923 regs[i].parent = NULL;
dc503a8a 924 }
17a52670
AS
925
926 /* frame pointer */
f1174f77 927 regs[BPF_REG_FP].type = PTR_TO_STACK;
61bd5218 928 mark_reg_known_zero(env, regs, BPF_REG_FP);
f4d7e40a 929 regs[BPF_REG_FP].frameno = state->frameno;
17a52670
AS
930
931 /* 1st arg to a function */
932 regs[BPF_REG_1].type = PTR_TO_CTX;
61bd5218 933 mark_reg_known_zero(env, regs, BPF_REG_1);
6760bf2d
DB
934}
935
f4d7e40a
AS
936#define BPF_MAIN_FUNC (-1)
937static void init_func_state(struct bpf_verifier_env *env,
938 struct bpf_func_state *state,
939 int callsite, int frameno, int subprogno)
940{
941 state->callsite = callsite;
942 state->frameno = frameno;
943 state->subprogno = subprogno;
944 init_reg_state(env, state);
945}
946
17a52670
AS
947enum reg_arg_type {
948 SRC_OP, /* register is used as source operand */
949 DST_OP, /* register is used as destination operand */
950 DST_OP_NO_MARK /* same as above, check only, don't mark */
951};
952
cc8b0b92
AS
953static int cmp_subprogs(const void *a, const void *b)
954{
9c8105bd
JW
955 return ((struct bpf_subprog_info *)a)->start -
956 ((struct bpf_subprog_info *)b)->start;
cc8b0b92
AS
957}
958
959static int find_subprog(struct bpf_verifier_env *env, int off)
960{
9c8105bd 961 struct bpf_subprog_info *p;
cc8b0b92 962
9c8105bd
JW
963 p = bsearch(&off, env->subprog_info, env->subprog_cnt,
964 sizeof(env->subprog_info[0]), cmp_subprogs);
cc8b0b92
AS
965 if (!p)
966 return -ENOENT;
9c8105bd 967 return p - env->subprog_info;
cc8b0b92
AS
968
969}
970
971static int add_subprog(struct bpf_verifier_env *env, int off)
972{
973 int insn_cnt = env->prog->len;
974 int ret;
975
976 if (off >= insn_cnt || off < 0) {
977 verbose(env, "call to invalid destination\n");
978 return -EINVAL;
979 }
980 ret = find_subprog(env, off);
981 if (ret >= 0)
982 return 0;
4cb3d99c 983 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
cc8b0b92
AS
984 verbose(env, "too many subprograms\n");
985 return -E2BIG;
986 }
9c8105bd
JW
987 env->subprog_info[env->subprog_cnt++].start = off;
988 sort(env->subprog_info, env->subprog_cnt,
989 sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
cc8b0b92
AS
990 return 0;
991}
992
993static int check_subprogs(struct bpf_verifier_env *env)
994{
995 int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
9c8105bd 996 struct bpf_subprog_info *subprog = env->subprog_info;
cc8b0b92
AS
997 struct bpf_insn *insn = env->prog->insnsi;
998 int insn_cnt = env->prog->len;
999
f910cefa
JW
1000 /* Add entry function. */
1001 ret = add_subprog(env, 0);
1002 if (ret < 0)
1003 return ret;
1004
cc8b0b92
AS
1005 /* determine subprog starts. The end is one before the next starts */
1006 for (i = 0; i < insn_cnt; i++) {
1007 if (insn[i].code != (BPF_JMP | BPF_CALL))
1008 continue;
1009 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1010 continue;
1011 if (!env->allow_ptr_leaks) {
1012 verbose(env, "function calls to other bpf functions are allowed for root only\n");
1013 return -EPERM;
1014 }
cc8b0b92
AS
1015 ret = add_subprog(env, i + insn[i].imm + 1);
1016 if (ret < 0)
1017 return ret;
1018 }
1019
4cb3d99c
JW
1020 /* Add a fake 'exit' subprog which could simplify subprog iteration
1021 * logic. 'subprog_cnt' should not be increased.
1022 */
1023 subprog[env->subprog_cnt].start = insn_cnt;
1024
cc8b0b92
AS
1025 if (env->log.level > 1)
1026 for (i = 0; i < env->subprog_cnt; i++)
9c8105bd 1027 verbose(env, "func#%d @%d\n", i, subprog[i].start);
cc8b0b92
AS
1028
1029 /* now check that all jumps are within the same subprog */
4cb3d99c
JW
1030 subprog_start = subprog[cur_subprog].start;
1031 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
1032 for (i = 0; i < insn_cnt; i++) {
1033 u8 code = insn[i].code;
1034
1035 if (BPF_CLASS(code) != BPF_JMP)
1036 goto next;
1037 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1038 goto next;
1039 off = i + insn[i].off + 1;
1040 if (off < subprog_start || off >= subprog_end) {
1041 verbose(env, "jump out of range from insn %d to %d\n", i, off);
1042 return -EINVAL;
1043 }
1044next:
1045 if (i == subprog_end - 1) {
1046 /* to avoid fall-through from one subprog into another
1047 * the last insn of the subprog should be either exit
1048 * or unconditional jump back
1049 */
1050 if (code != (BPF_JMP | BPF_EXIT) &&
1051 code != (BPF_JMP | BPF_JA)) {
1052 verbose(env, "last insn is not an exit or jmp\n");
1053 return -EINVAL;
1054 }
1055 subprog_start = subprog_end;
4cb3d99c
JW
1056 cur_subprog++;
1057 if (cur_subprog < env->subprog_cnt)
9c8105bd 1058 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
1059 }
1060 }
1061 return 0;
1062}
1063
679c782d
EC
1064/* Parentage chain of this register (or stack slot) should take care of all
1065 * issues like callee-saved registers, stack slot allocation time, etc.
1066 */
f4d7e40a 1067static int mark_reg_read(struct bpf_verifier_env *env,
679c782d
EC
1068 const struct bpf_reg_state *state,
1069 struct bpf_reg_state *parent)
f4d7e40a
AS
1070{
1071 bool writes = parent == state->parent; /* Observe write marks */
dc503a8a
EC
1072
1073 while (parent) {
1074 /* if read wasn't screened by an earlier write ... */
679c782d 1075 if (writes && state->live & REG_LIVE_WRITTEN)
dc503a8a
EC
1076 break;
1077 /* ... then we depend on parent's value */
679c782d 1078 parent->live |= REG_LIVE_READ;
dc503a8a
EC
1079 state = parent;
1080 parent = state->parent;
f4d7e40a 1081 writes = true;
dc503a8a 1082 }
f4d7e40a 1083 return 0;
dc503a8a
EC
1084}
1085
1086static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
17a52670
AS
1087 enum reg_arg_type t)
1088{
f4d7e40a
AS
1089 struct bpf_verifier_state *vstate = env->cur_state;
1090 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1091 struct bpf_reg_state *regs = state->regs;
dc503a8a 1092
17a52670 1093 if (regno >= MAX_BPF_REG) {
61bd5218 1094 verbose(env, "R%d is invalid\n", regno);
17a52670
AS
1095 return -EINVAL;
1096 }
1097
1098 if (t == SRC_OP) {
1099 /* check whether register used as source operand can be read */
1100 if (regs[regno].type == NOT_INIT) {
61bd5218 1101 verbose(env, "R%d !read_ok\n", regno);
17a52670
AS
1102 return -EACCES;
1103 }
679c782d
EC
1104 /* We don't need to worry about FP liveness because it's read-only */
1105 if (regno != BPF_REG_FP)
1106 return mark_reg_read(env, &regs[regno],
1107 regs[regno].parent);
17a52670
AS
1108 } else {
1109 /* check whether register used as dest operand can be written to */
1110 if (regno == BPF_REG_FP) {
61bd5218 1111 verbose(env, "frame pointer is read only\n");
17a52670
AS
1112 return -EACCES;
1113 }
dc503a8a 1114 regs[regno].live |= REG_LIVE_WRITTEN;
17a52670 1115 if (t == DST_OP)
61bd5218 1116 mark_reg_unknown(env, regs, regno);
17a52670
AS
1117 }
1118 return 0;
1119}
1120
1be7f75d
AS
1121static bool is_spillable_regtype(enum bpf_reg_type type)
1122{
1123 switch (type) {
1124 case PTR_TO_MAP_VALUE:
1125 case PTR_TO_MAP_VALUE_OR_NULL:
1126 case PTR_TO_STACK:
1127 case PTR_TO_CTX:
969bf05e 1128 case PTR_TO_PACKET:
de8f3a83 1129 case PTR_TO_PACKET_META:
969bf05e 1130 case PTR_TO_PACKET_END:
d58e468b 1131 case PTR_TO_FLOW_KEYS:
1be7f75d 1132 case CONST_PTR_TO_MAP:
c64b7983
JS
1133 case PTR_TO_SOCKET:
1134 case PTR_TO_SOCKET_OR_NULL:
1be7f75d
AS
1135 return true;
1136 default:
1137 return false;
1138 }
1139}
1140
cc2b14d5
AS
1141/* Does this register contain a constant zero? */
1142static bool register_is_null(struct bpf_reg_state *reg)
1143{
1144 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
1145}
1146
17a52670
AS
1147/* check_stack_read/write functions track spill/fill of registers,
1148 * stack boundary and alignment are checked in check_mem_access()
1149 */
61bd5218 1150static int check_stack_write(struct bpf_verifier_env *env,
f4d7e40a 1151 struct bpf_func_state *state, /* func where register points to */
af86ca4e 1152 int off, int size, int value_regno, int insn_idx)
17a52670 1153{
f4d7e40a 1154 struct bpf_func_state *cur; /* state of the current function */
638f5b90 1155 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
f4d7e40a 1156 enum bpf_reg_type type;
638f5b90 1157
f4d7e40a 1158 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
fd978bf7 1159 state->acquired_refs, true);
638f5b90
AS
1160 if (err)
1161 return err;
9c399760
AS
1162 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
1163 * so it's aligned access and [off, off + size) are within stack limits
1164 */
638f5b90
AS
1165 if (!env->allow_ptr_leaks &&
1166 state->stack[spi].slot_type[0] == STACK_SPILL &&
1167 size != BPF_REG_SIZE) {
1168 verbose(env, "attempt to corrupt spilled pointer on stack\n");
1169 return -EACCES;
1170 }
17a52670 1171
f4d7e40a 1172 cur = env->cur_state->frame[env->cur_state->curframe];
17a52670 1173 if (value_regno >= 0 &&
f4d7e40a 1174 is_spillable_regtype((type = cur->regs[value_regno].type))) {
17a52670
AS
1175
1176 /* register containing pointer is being spilled into stack */
9c399760 1177 if (size != BPF_REG_SIZE) {
61bd5218 1178 verbose(env, "invalid size of register spill\n");
17a52670
AS
1179 return -EACCES;
1180 }
1181
f4d7e40a
AS
1182 if (state != cur && type == PTR_TO_STACK) {
1183 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
1184 return -EINVAL;
1185 }
1186
17a52670 1187 /* save register state */
f4d7e40a 1188 state->stack[spi].spilled_ptr = cur->regs[value_regno];
638f5b90 1189 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
17a52670 1190
af86ca4e
AS
1191 for (i = 0; i < BPF_REG_SIZE; i++) {
1192 if (state->stack[spi].slot_type[i] == STACK_MISC &&
1193 !env->allow_ptr_leaks) {
1194 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
1195 int soff = (-spi - 1) * BPF_REG_SIZE;
1196
1197 /* detected reuse of integer stack slot with a pointer
1198 * which means either llvm is reusing stack slot or
1199 * an attacker is trying to exploit CVE-2018-3639
1200 * (speculative store bypass)
1201 * Have to sanitize that slot with preemptive
1202 * store of zero.
1203 */
1204 if (*poff && *poff != soff) {
1205 /* disallow programs where single insn stores
1206 * into two different stack slots, since verifier
1207 * cannot sanitize them
1208 */
1209 verbose(env,
1210 "insn %d cannot access two stack slots fp%d and fp%d",
1211 insn_idx, *poff, soff);
1212 return -EINVAL;
1213 }
1214 *poff = soff;
1215 }
638f5b90 1216 state->stack[spi].slot_type[i] = STACK_SPILL;
af86ca4e 1217 }
9c399760 1218 } else {
cc2b14d5
AS
1219 u8 type = STACK_MISC;
1220
679c782d
EC
1221 /* regular write of data into stack destroys any spilled ptr */
1222 state->stack[spi].spilled_ptr.type = NOT_INIT;
9c399760 1223
cc2b14d5
AS
1224 /* only mark the slot as written if all 8 bytes were written
1225 * otherwise read propagation may incorrectly stop too soon
1226 * when stack slots are partially written.
1227 * This heuristic means that read propagation will be
1228 * conservative, since it will add reg_live_read marks
1229 * to stack slots all the way to first state when programs
1230 * writes+reads less than 8 bytes
1231 */
1232 if (size == BPF_REG_SIZE)
1233 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1234
1235 /* when we zero initialize stack slots mark them as such */
1236 if (value_regno >= 0 &&
1237 register_is_null(&cur->regs[value_regno]))
1238 type = STACK_ZERO;
1239
9c399760 1240 for (i = 0; i < size; i++)
638f5b90 1241 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
cc2b14d5 1242 type;
17a52670
AS
1243 }
1244 return 0;
1245}
1246
61bd5218 1247static int check_stack_read(struct bpf_verifier_env *env,
f4d7e40a
AS
1248 struct bpf_func_state *reg_state /* func where register points to */,
1249 int off, int size, int value_regno)
17a52670 1250{
f4d7e40a
AS
1251 struct bpf_verifier_state *vstate = env->cur_state;
1252 struct bpf_func_state *state = vstate->frame[vstate->curframe];
638f5b90
AS
1253 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
1254 u8 *stype;
17a52670 1255
f4d7e40a 1256 if (reg_state->allocated_stack <= slot) {
638f5b90
AS
1257 verbose(env, "invalid read from stack off %d+0 size %d\n",
1258 off, size);
1259 return -EACCES;
1260 }
f4d7e40a 1261 stype = reg_state->stack[spi].slot_type;
17a52670 1262
638f5b90 1263 if (stype[0] == STACK_SPILL) {
9c399760 1264 if (size != BPF_REG_SIZE) {
61bd5218 1265 verbose(env, "invalid size of register spill\n");
17a52670
AS
1266 return -EACCES;
1267 }
9c399760 1268 for (i = 1; i < BPF_REG_SIZE; i++) {
638f5b90 1269 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
61bd5218 1270 verbose(env, "corrupted spill memory\n");
17a52670
AS
1271 return -EACCES;
1272 }
1273 }
1274
dc503a8a 1275 if (value_regno >= 0) {
17a52670 1276 /* restore register state from stack */
f4d7e40a 1277 state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
2f18f62e
AS
1278 /* mark reg as written since spilled pointer state likely
1279 * has its liveness marks cleared by is_state_visited()
1280 * which resets stack/reg liveness for state transitions
1281 */
1282 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
dc503a8a 1283 }
679c782d
EC
1284 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1285 reg_state->stack[spi].spilled_ptr.parent);
17a52670
AS
1286 return 0;
1287 } else {
cc2b14d5
AS
1288 int zeros = 0;
1289
17a52670 1290 for (i = 0; i < size; i++) {
cc2b14d5
AS
1291 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
1292 continue;
1293 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
1294 zeros++;
1295 continue;
17a52670 1296 }
cc2b14d5
AS
1297 verbose(env, "invalid read from stack off %d+%d size %d\n",
1298 off, i, size);
1299 return -EACCES;
1300 }
679c782d
EC
1301 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1302 reg_state->stack[spi].spilled_ptr.parent);
cc2b14d5
AS
1303 if (value_regno >= 0) {
1304 if (zeros == size) {
1305 /* any size read into register is zero extended,
1306 * so the whole register == const_zero
1307 */
1308 __mark_reg_const_zero(&state->regs[value_regno]);
1309 } else {
1310 /* have read misc data from the stack */
1311 mark_reg_unknown(env, state->regs, value_regno);
1312 }
1313 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
17a52670 1314 }
17a52670
AS
1315 return 0;
1316 }
1317}
1318
1319/* check read/write into map element returned by bpf_map_lookup_elem() */
f1174f77 1320static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 1321 int size, bool zero_size_allowed)
17a52670 1322{
638f5b90
AS
1323 struct bpf_reg_state *regs = cur_regs(env);
1324 struct bpf_map *map = regs[regno].map_ptr;
17a52670 1325
9fd29c08
YS
1326 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1327 off + size > map->value_size) {
61bd5218 1328 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
17a52670
AS
1329 map->value_size, off, size);
1330 return -EACCES;
1331 }
1332 return 0;
1333}
1334
f1174f77
EC
1335/* check read/write into a map element with possible variable offset */
1336static int check_map_access(struct bpf_verifier_env *env, u32 regno,
9fd29c08 1337 int off, int size, bool zero_size_allowed)
dbcfe5f7 1338{
f4d7e40a
AS
1339 struct bpf_verifier_state *vstate = env->cur_state;
1340 struct bpf_func_state *state = vstate->frame[vstate->curframe];
dbcfe5f7
GB
1341 struct bpf_reg_state *reg = &state->regs[regno];
1342 int err;
1343
f1174f77
EC
1344 /* We may have adjusted the register to this map value, so we
1345 * need to try adding each of min_value and max_value to off
1346 * to make sure our theoretical access will be safe.
dbcfe5f7 1347 */
61bd5218
JK
1348 if (env->log.level)
1349 print_verifier_state(env, state);
dbcfe5f7
GB
1350 /* The minimum value is only important with signed
1351 * comparisons where we can't assume the floor of a
1352 * value is 0. If we are using signed variables for our
1353 * index'es we need to make sure that whatever we use
1354 * will have a set floor within our range.
1355 */
b03c9f9f 1356 if (reg->smin_value < 0) {
61bd5218 1357 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
dbcfe5f7
GB
1358 regno);
1359 return -EACCES;
1360 }
9fd29c08
YS
1361 err = __check_map_access(env, regno, reg->smin_value + off, size,
1362 zero_size_allowed);
dbcfe5f7 1363 if (err) {
61bd5218
JK
1364 verbose(env, "R%d min value is outside of the array range\n",
1365 regno);
dbcfe5f7
GB
1366 return err;
1367 }
1368
b03c9f9f
EC
1369 /* If we haven't set a max value then we need to bail since we can't be
1370 * sure we won't do bad things.
1371 * If reg->umax_value + off could overflow, treat that as unbounded too.
dbcfe5f7 1372 */
b03c9f9f 1373 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
61bd5218 1374 verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
dbcfe5f7
GB
1375 regno);
1376 return -EACCES;
1377 }
9fd29c08
YS
1378 err = __check_map_access(env, regno, reg->umax_value + off, size,
1379 zero_size_allowed);
f1174f77 1380 if (err)
61bd5218
JK
1381 verbose(env, "R%d max value is outside of the array range\n",
1382 regno);
f1174f77 1383 return err;
dbcfe5f7
GB
1384}
1385
969bf05e
AS
1386#define MAX_PACKET_OFF 0xffff
1387
58e2af8b 1388static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3a0af8fd
TG
1389 const struct bpf_call_arg_meta *meta,
1390 enum bpf_access_type t)
4acf6c0b 1391{
36bbef52 1392 switch (env->prog->type) {
5d66fa7d 1393 /* Program types only with direct read access go here! */
3a0af8fd
TG
1394 case BPF_PROG_TYPE_LWT_IN:
1395 case BPF_PROG_TYPE_LWT_OUT:
004d4b27 1396 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2dbb9b9e 1397 case BPF_PROG_TYPE_SK_REUSEPORT:
5d66fa7d 1398 case BPF_PROG_TYPE_FLOW_DISSECTOR:
d5563d36 1399 case BPF_PROG_TYPE_CGROUP_SKB:
3a0af8fd
TG
1400 if (t == BPF_WRITE)
1401 return false;
7e57fbb2 1402 /* fallthrough */
5d66fa7d
DB
1403
1404 /* Program types with direct read + write access go here! */
36bbef52
DB
1405 case BPF_PROG_TYPE_SCHED_CLS:
1406 case BPF_PROG_TYPE_SCHED_ACT:
4acf6c0b 1407 case BPF_PROG_TYPE_XDP:
3a0af8fd 1408 case BPF_PROG_TYPE_LWT_XMIT:
8a31db56 1409 case BPF_PROG_TYPE_SK_SKB:
4f738adb 1410 case BPF_PROG_TYPE_SK_MSG:
36bbef52
DB
1411 if (meta)
1412 return meta->pkt_access;
1413
1414 env->seen_direct_write = true;
4acf6c0b
BB
1415 return true;
1416 default:
1417 return false;
1418 }
1419}
1420
f1174f77 1421static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
9fd29c08 1422 int off, int size, bool zero_size_allowed)
969bf05e 1423{
638f5b90 1424 struct bpf_reg_state *regs = cur_regs(env);
58e2af8b 1425 struct bpf_reg_state *reg = &regs[regno];
969bf05e 1426
9fd29c08
YS
1427 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1428 (u64)off + size > reg->range) {
61bd5218 1429 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
d91b28ed 1430 off, size, regno, reg->id, reg->off, reg->range);
969bf05e
AS
1431 return -EACCES;
1432 }
1433 return 0;
1434}
1435
f1174f77 1436static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 1437 int size, bool zero_size_allowed)
f1174f77 1438{
638f5b90 1439 struct bpf_reg_state *regs = cur_regs(env);
f1174f77
EC
1440 struct bpf_reg_state *reg = &regs[regno];
1441 int err;
1442
1443 /* We may have added a variable offset to the packet pointer; but any
1444 * reg->range we have comes after that. We are only checking the fixed
1445 * offset.
1446 */
1447
1448 /* We don't allow negative numbers, because we aren't tracking enough
1449 * detail to prove they're safe.
1450 */
b03c9f9f 1451 if (reg->smin_value < 0) {
61bd5218 1452 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
f1174f77
EC
1453 regno);
1454 return -EACCES;
1455 }
9fd29c08 1456 err = __check_packet_access(env, regno, off, size, zero_size_allowed);
f1174f77 1457 if (err) {
61bd5218 1458 verbose(env, "R%d offset is outside of the packet\n", regno);
f1174f77
EC
1459 return err;
1460 }
e647815a
JW
1461
1462 /* __check_packet_access has made sure "off + size - 1" is within u16.
1463 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
1464 * otherwise find_good_pkt_pointers would have refused to set range info
1465 * that __check_packet_access would have rejected this pkt access.
1466 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
1467 */
1468 env->prog->aux->max_pkt_offset =
1469 max_t(u32, env->prog->aux->max_pkt_offset,
1470 off + reg->umax_value + size - 1);
1471
f1174f77
EC
1472 return err;
1473}
1474
1475/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
31fd8581 1476static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
19de99f7 1477 enum bpf_access_type t, enum bpf_reg_type *reg_type)
17a52670 1478{
f96da094
DB
1479 struct bpf_insn_access_aux info = {
1480 .reg_type = *reg_type,
1481 };
31fd8581 1482
4f9218aa 1483 if (env->ops->is_valid_access &&
5e43f899 1484 env->ops->is_valid_access(off, size, t, env->prog, &info)) {
f96da094
DB
1485 /* A non zero info.ctx_field_size indicates that this field is a
1486 * candidate for later verifier transformation to load the whole
1487 * field and then apply a mask when accessed with a narrower
1488 * access than actual ctx access size. A zero info.ctx_field_size
1489 * will only allow for whole field access and rejects any other
1490 * type of narrower access.
31fd8581 1491 */
23994631 1492 *reg_type = info.reg_type;
31fd8581 1493
4f9218aa 1494 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
32bbe007
AS
1495 /* remember the offset of last byte accessed in ctx */
1496 if (env->prog->aux->max_ctx_offset < off + size)
1497 env->prog->aux->max_ctx_offset = off + size;
17a52670 1498 return 0;
32bbe007 1499 }
17a52670 1500
61bd5218 1501 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
17a52670
AS
1502 return -EACCES;
1503}
1504
d58e468b
PP
1505static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
1506 int size)
1507{
1508 if (size < 0 || off < 0 ||
1509 (u64)off + size > sizeof(struct bpf_flow_keys)) {
1510 verbose(env, "invalid access to flow keys off=%d size=%d\n",
1511 off, size);
1512 return -EACCES;
1513 }
1514 return 0;
1515}
1516
c64b7983
JS
1517static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
1518 int size, enum bpf_access_type t)
1519{
1520 struct bpf_reg_state *regs = cur_regs(env);
1521 struct bpf_reg_state *reg = &regs[regno];
1522 struct bpf_insn_access_aux info;
1523
1524 if (reg->smin_value < 0) {
1525 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1526 regno);
1527 return -EACCES;
1528 }
1529
1530 if (!bpf_sock_is_valid_access(off, size, t, &info)) {
1531 verbose(env, "invalid bpf_sock access off=%d size=%d\n",
1532 off, size);
1533 return -EACCES;
1534 }
1535
1536 return 0;
1537}
1538
4cabc5b1
DB
1539static bool __is_pointer_value(bool allow_ptr_leaks,
1540 const struct bpf_reg_state *reg)
1be7f75d 1541{
4cabc5b1 1542 if (allow_ptr_leaks)
1be7f75d
AS
1543 return false;
1544
f1174f77 1545 return reg->type != SCALAR_VALUE;
1be7f75d
AS
1546}
1547
2a159c6f
DB
1548static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
1549{
1550 return cur_regs(env) + regno;
1551}
1552
4cabc5b1
DB
1553static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1554{
2a159c6f 1555 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4cabc5b1
DB
1556}
1557
f37a8cb8
DB
1558static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1559{
2a159c6f 1560 const struct bpf_reg_state *reg = reg_state(env, regno);
f37a8cb8 1561
fd978bf7
JS
1562 return reg->type == PTR_TO_CTX ||
1563 reg->type == PTR_TO_SOCKET;
f37a8cb8
DB
1564}
1565
ca369602
DB
1566static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
1567{
2a159c6f 1568 const struct bpf_reg_state *reg = reg_state(env, regno);
ca369602
DB
1569
1570 return type_is_pkt_pointer(reg->type);
1571}
1572
4b5defde
DB
1573static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
1574{
1575 const struct bpf_reg_state *reg = reg_state(env, regno);
1576
1577 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
1578 return reg->type == PTR_TO_FLOW_KEYS;
1579}
1580
61bd5218
JK
1581static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
1582 const struct bpf_reg_state *reg,
d1174416 1583 int off, int size, bool strict)
969bf05e 1584{
f1174f77 1585 struct tnum reg_off;
e07b98d9 1586 int ip_align;
d1174416
DM
1587
1588 /* Byte size accesses are always allowed. */
1589 if (!strict || size == 1)
1590 return 0;
1591
e4eda884
DM
1592 /* For platforms that do not have a Kconfig enabling
1593 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
1594 * NET_IP_ALIGN is universally set to '2'. And on platforms
1595 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
1596 * to this code only in strict mode where we want to emulate
1597 * the NET_IP_ALIGN==2 checking. Therefore use an
1598 * unconditional IP align value of '2'.
e07b98d9 1599 */
e4eda884 1600 ip_align = 2;
f1174f77
EC
1601
1602 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
1603 if (!tnum_is_aligned(reg_off, size)) {
1604 char tn_buf[48];
1605
1606 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218
JK
1607 verbose(env,
1608 "misaligned packet access off %d+%s+%d+%d size %d\n",
f1174f77 1609 ip_align, tn_buf, reg->off, off, size);
969bf05e
AS
1610 return -EACCES;
1611 }
79adffcd 1612
969bf05e
AS
1613 return 0;
1614}
1615
61bd5218
JK
1616static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
1617 const struct bpf_reg_state *reg,
f1174f77
EC
1618 const char *pointer_desc,
1619 int off, int size, bool strict)
79adffcd 1620{
f1174f77
EC
1621 struct tnum reg_off;
1622
1623 /* Byte size accesses are always allowed. */
1624 if (!strict || size == 1)
1625 return 0;
1626
1627 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
1628 if (!tnum_is_aligned(reg_off, size)) {
1629 char tn_buf[48];
1630
1631 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 1632 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
f1174f77 1633 pointer_desc, tn_buf, reg->off, off, size);
79adffcd
DB
1634 return -EACCES;
1635 }
1636
969bf05e
AS
1637 return 0;
1638}
1639
e07b98d9 1640static int check_ptr_alignment(struct bpf_verifier_env *env,
ca369602
DB
1641 const struct bpf_reg_state *reg, int off,
1642 int size, bool strict_alignment_once)
79adffcd 1643{
ca369602 1644 bool strict = env->strict_alignment || strict_alignment_once;
f1174f77 1645 const char *pointer_desc = "";
d1174416 1646
79adffcd
DB
1647 switch (reg->type) {
1648 case PTR_TO_PACKET:
de8f3a83
DB
1649 case PTR_TO_PACKET_META:
1650 /* Special case, because of NET_IP_ALIGN. Given metadata sits
1651 * right in front, treat it the very same way.
1652 */
61bd5218 1653 return check_pkt_ptr_alignment(env, reg, off, size, strict);
d58e468b
PP
1654 case PTR_TO_FLOW_KEYS:
1655 pointer_desc = "flow keys ";
1656 break;
f1174f77
EC
1657 case PTR_TO_MAP_VALUE:
1658 pointer_desc = "value ";
1659 break;
1660 case PTR_TO_CTX:
1661 pointer_desc = "context ";
1662 break;
1663 case PTR_TO_STACK:
1664 pointer_desc = "stack ";
a5ec6ae1
JH
1665 /* The stack spill tracking logic in check_stack_write()
1666 * and check_stack_read() relies on stack accesses being
1667 * aligned.
1668 */
1669 strict = true;
f1174f77 1670 break;
c64b7983
JS
1671 case PTR_TO_SOCKET:
1672 pointer_desc = "sock ";
1673 break;
79adffcd 1674 default:
f1174f77 1675 break;
79adffcd 1676 }
61bd5218
JK
1677 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
1678 strict);
79adffcd
DB
1679}
1680
f4d7e40a
AS
1681static int update_stack_depth(struct bpf_verifier_env *env,
1682 const struct bpf_func_state *func,
1683 int off)
1684{
9c8105bd 1685 u16 stack = env->subprog_info[func->subprogno].stack_depth;
f4d7e40a
AS
1686
1687 if (stack >= -off)
1688 return 0;
1689
1690 /* update known max for given subprogram */
9c8105bd 1691 env->subprog_info[func->subprogno].stack_depth = -off;
70a87ffe
AS
1692 return 0;
1693}
f4d7e40a 1694
70a87ffe
AS
1695/* starting from main bpf function walk all instructions of the function
1696 * and recursively walk all callees that given function can call.
1697 * Ignore jump and exit insns.
1698 * Since recursion is prevented by check_cfg() this algorithm
1699 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
1700 */
1701static int check_max_stack_depth(struct bpf_verifier_env *env)
1702{
9c8105bd
JW
1703 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
1704 struct bpf_subprog_info *subprog = env->subprog_info;
70a87ffe 1705 struct bpf_insn *insn = env->prog->insnsi;
70a87ffe
AS
1706 int ret_insn[MAX_CALL_FRAMES];
1707 int ret_prog[MAX_CALL_FRAMES];
f4d7e40a 1708
70a87ffe
AS
1709process_func:
1710 /* round up to 32-bytes, since this is granularity
1711 * of interpreter stack size
1712 */
9c8105bd 1713 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe 1714 if (depth > MAX_BPF_STACK) {
f4d7e40a 1715 verbose(env, "combined stack size of %d calls is %d. Too large\n",
70a87ffe 1716 frame + 1, depth);
f4d7e40a
AS
1717 return -EACCES;
1718 }
70a87ffe 1719continue_func:
4cb3d99c 1720 subprog_end = subprog[idx + 1].start;
70a87ffe
AS
1721 for (; i < subprog_end; i++) {
1722 if (insn[i].code != (BPF_JMP | BPF_CALL))
1723 continue;
1724 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1725 continue;
1726 /* remember insn and function to return to */
1727 ret_insn[frame] = i + 1;
9c8105bd 1728 ret_prog[frame] = idx;
70a87ffe
AS
1729
1730 /* find the callee */
1731 i = i + insn[i].imm + 1;
9c8105bd
JW
1732 idx = find_subprog(env, i);
1733 if (idx < 0) {
70a87ffe
AS
1734 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1735 i);
1736 return -EFAULT;
1737 }
70a87ffe
AS
1738 frame++;
1739 if (frame >= MAX_CALL_FRAMES) {
1740 WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
1741 return -EFAULT;
1742 }
1743 goto process_func;
1744 }
1745 /* end of for() loop means the last insn of the 'subprog'
1746 * was reached. Doesn't matter whether it was JA or EXIT
1747 */
1748 if (frame == 0)
1749 return 0;
9c8105bd 1750 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe
AS
1751 frame--;
1752 i = ret_insn[frame];
9c8105bd 1753 idx = ret_prog[frame];
70a87ffe 1754 goto continue_func;
f4d7e40a
AS
1755}
1756
19d28fbd 1757#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
1758static int get_callee_stack_depth(struct bpf_verifier_env *env,
1759 const struct bpf_insn *insn, int idx)
1760{
1761 int start = idx + insn->imm + 1, subprog;
1762
1763 subprog = find_subprog(env, start);
1764 if (subprog < 0) {
1765 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1766 start);
1767 return -EFAULT;
1768 }
9c8105bd 1769 return env->subprog_info[subprog].stack_depth;
1ea47e01 1770}
19d28fbd 1771#endif
1ea47e01 1772
58990d1f
DB
1773static int check_ctx_reg(struct bpf_verifier_env *env,
1774 const struct bpf_reg_state *reg, int regno)
1775{
1776 /* Access to ctx or passing it to a helper is only allowed in
1777 * its original, unmodified form.
1778 */
1779
1780 if (reg->off) {
1781 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
1782 regno, reg->off);
1783 return -EACCES;
1784 }
1785
1786 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1787 char tn_buf[48];
1788
1789 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1790 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
1791 return -EACCES;
1792 }
1793
1794 return 0;
1795}
1796
0c17d1d2
JH
1797/* truncate register to smaller size (in bytes)
1798 * must be called with size < BPF_REG_SIZE
1799 */
1800static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1801{
1802 u64 mask;
1803
1804 /* clear high bits in bit representation */
1805 reg->var_off = tnum_cast(reg->var_off, size);
1806
1807 /* fix arithmetic bounds */
1808 mask = ((u64)1 << (size * 8)) - 1;
1809 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1810 reg->umin_value &= mask;
1811 reg->umax_value &= mask;
1812 } else {
1813 reg->umin_value = 0;
1814 reg->umax_value = mask;
1815 }
1816 reg->smin_value = reg->umin_value;
1817 reg->smax_value = reg->umax_value;
1818}
1819
17a52670
AS
1820/* check whether memory at (regno + off) is accessible for t = (read | write)
1821 * if t==write, value_regno is a register which value is stored into memory
1822 * if t==read, value_regno is a register which will receive the value from memory
1823 * if t==write && value_regno==-1, some unknown value is stored into memory
1824 * if t==read && value_regno==-1, don't care what we read from memory
1825 */
ca369602
DB
1826static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
1827 int off, int bpf_size, enum bpf_access_type t,
1828 int value_regno, bool strict_alignment_once)
17a52670 1829{
638f5b90
AS
1830 struct bpf_reg_state *regs = cur_regs(env);
1831 struct bpf_reg_state *reg = regs + regno;
f4d7e40a 1832 struct bpf_func_state *state;
17a52670
AS
1833 int size, err = 0;
1834
1835 size = bpf_size_to_bytes(bpf_size);
1836 if (size < 0)
1837 return size;
1838
f1174f77 1839 /* alignment checks will add in reg->off themselves */
ca369602 1840 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
969bf05e
AS
1841 if (err)
1842 return err;
17a52670 1843
f1174f77
EC
1844 /* for access checks, reg->off is just part of off */
1845 off += reg->off;
1846
1847 if (reg->type == PTR_TO_MAP_VALUE) {
1be7f75d
AS
1848 if (t == BPF_WRITE && value_regno >= 0 &&
1849 is_pointer_value(env, value_regno)) {
61bd5218 1850 verbose(env, "R%d leaks addr into map\n", value_regno);
1be7f75d
AS
1851 return -EACCES;
1852 }
48461135 1853
9fd29c08 1854 err = check_map_access(env, regno, off, size, false);
17a52670 1855 if (!err && t == BPF_READ && value_regno >= 0)
638f5b90 1856 mark_reg_unknown(env, regs, value_regno);
17a52670 1857
1a0dc1ac 1858 } else if (reg->type == PTR_TO_CTX) {
f1174f77 1859 enum bpf_reg_type reg_type = SCALAR_VALUE;
19de99f7 1860
1be7f75d
AS
1861 if (t == BPF_WRITE && value_regno >= 0 &&
1862 is_pointer_value(env, value_regno)) {
61bd5218 1863 verbose(env, "R%d leaks addr into ctx\n", value_regno);
1be7f75d
AS
1864 return -EACCES;
1865 }
f1174f77 1866
58990d1f
DB
1867 err = check_ctx_reg(env, reg, regno);
1868 if (err < 0)
1869 return err;
1870
31fd8581 1871 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
969bf05e 1872 if (!err && t == BPF_READ && value_regno >= 0) {
f1174f77 1873 /* ctx access returns either a scalar, or a
de8f3a83
DB
1874 * PTR_TO_PACKET[_META,_END]. In the latter
1875 * case, we know the offset is zero.
f1174f77
EC
1876 */
1877 if (reg_type == SCALAR_VALUE)
638f5b90 1878 mark_reg_unknown(env, regs, value_regno);
f1174f77 1879 else
638f5b90 1880 mark_reg_known_zero(env, regs,
61bd5218 1881 value_regno);
638f5b90 1882 regs[value_regno].type = reg_type;
969bf05e 1883 }
17a52670 1884
f1174f77
EC
1885 } else if (reg->type == PTR_TO_STACK) {
1886 /* stack accesses must be at a fixed offset, so that we can
1887 * determine what type of data were returned.
1888 * See check_stack_read().
1889 */
1890 if (!tnum_is_const(reg->var_off)) {
1891 char tn_buf[48];
1892
1893 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 1894 verbose(env, "variable stack access var_off=%s off=%d size=%d",
f1174f77
EC
1895 tn_buf, off, size);
1896 return -EACCES;
1897 }
1898 off += reg->var_off.value;
17a52670 1899 if (off >= 0 || off < -MAX_BPF_STACK) {
61bd5218
JK
1900 verbose(env, "invalid stack off=%d size=%d\n", off,
1901 size);
17a52670
AS
1902 return -EACCES;
1903 }
8726679a 1904
f4d7e40a
AS
1905 state = func(env, reg);
1906 err = update_stack_depth(env, state, off);
1907 if (err)
1908 return err;
8726679a 1909
638f5b90 1910 if (t == BPF_WRITE)
61bd5218 1911 err = check_stack_write(env, state, off, size,
af86ca4e 1912 value_regno, insn_idx);
638f5b90 1913 else
61bd5218
JK
1914 err = check_stack_read(env, state, off, size,
1915 value_regno);
de8f3a83 1916 } else if (reg_is_pkt_pointer(reg)) {
3a0af8fd 1917 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
61bd5218 1918 verbose(env, "cannot write into packet\n");
969bf05e
AS
1919 return -EACCES;
1920 }
4acf6c0b
BB
1921 if (t == BPF_WRITE && value_regno >= 0 &&
1922 is_pointer_value(env, value_regno)) {
61bd5218
JK
1923 verbose(env, "R%d leaks addr into packet\n",
1924 value_regno);
4acf6c0b
BB
1925 return -EACCES;
1926 }
9fd29c08 1927 err = check_packet_access(env, regno, off, size, false);
969bf05e 1928 if (!err && t == BPF_READ && value_regno >= 0)
638f5b90 1929 mark_reg_unknown(env, regs, value_regno);
d58e468b
PP
1930 } else if (reg->type == PTR_TO_FLOW_KEYS) {
1931 if (t == BPF_WRITE && value_regno >= 0 &&
1932 is_pointer_value(env, value_regno)) {
1933 verbose(env, "R%d leaks addr into flow keys\n",
1934 value_regno);
1935 return -EACCES;
1936 }
1937
1938 err = check_flow_keys_access(env, off, size);
1939 if (!err && t == BPF_READ && value_regno >= 0)
1940 mark_reg_unknown(env, regs, value_regno);
c64b7983
JS
1941 } else if (reg->type == PTR_TO_SOCKET) {
1942 if (t == BPF_WRITE) {
1943 verbose(env, "cannot write into socket\n");
1944 return -EACCES;
1945 }
1946 err = check_sock_access(env, regno, off, size, t);
1947 if (!err && value_regno >= 0)
1948 mark_reg_unknown(env, regs, value_regno);
17a52670 1949 } else {
61bd5218
JK
1950 verbose(env, "R%d invalid mem access '%s'\n", regno,
1951 reg_type_str[reg->type]);
17a52670
AS
1952 return -EACCES;
1953 }
969bf05e 1954
f1174f77 1955 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
638f5b90 1956 regs[value_regno].type == SCALAR_VALUE) {
f1174f77 1957 /* b/h/w load zero-extends, mark upper bits as known 0 */
0c17d1d2 1958 coerce_reg_to_size(&regs[value_regno], size);
969bf05e 1959 }
17a52670
AS
1960 return err;
1961}
1962
31fd8581 1963static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
17a52670 1964{
17a52670
AS
1965 int err;
1966
1967 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
1968 insn->imm != 0) {
61bd5218 1969 verbose(env, "BPF_XADD uses reserved fields\n");
17a52670
AS
1970 return -EINVAL;
1971 }
1972
1973 /* check src1 operand */
dc503a8a 1974 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
1975 if (err)
1976 return err;
1977
1978 /* check src2 operand */
dc503a8a 1979 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
1980 if (err)
1981 return err;
1982
6bdf6abc 1983 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 1984 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6bdf6abc
DB
1985 return -EACCES;
1986 }
1987
ca369602 1988 if (is_ctx_reg(env, insn->dst_reg) ||
4b5defde
DB
1989 is_pkt_reg(env, insn->dst_reg) ||
1990 is_flow_key_reg(env, insn->dst_reg)) {
ca369602 1991 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
2a159c6f
DB
1992 insn->dst_reg,
1993 reg_type_str[reg_state(env, insn->dst_reg)->type]);
f37a8cb8
DB
1994 return -EACCES;
1995 }
1996
17a52670 1997 /* check whether atomic_add can read the memory */
31fd8581 1998 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
ca369602 1999 BPF_SIZE(insn->code), BPF_READ, -1, true);
17a52670
AS
2000 if (err)
2001 return err;
2002
2003 /* check whether atomic_add can write into the same memory */
31fd8581 2004 return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
ca369602 2005 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
17a52670
AS
2006}
2007
2008/* when register 'regno' is passed into function that will read 'access_size'
2009 * bytes from that pointer, make sure that it's within stack boundary
f1174f77
EC
2010 * and all elements of stack are initialized.
2011 * Unlike most pointer bounds-checking functions, this one doesn't take an
2012 * 'off' argument, so it has to add in reg->off itself.
17a52670 2013 */
58e2af8b 2014static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
435faee1
DB
2015 int access_size, bool zero_size_allowed,
2016 struct bpf_call_arg_meta *meta)
17a52670 2017{
2a159c6f 2018 struct bpf_reg_state *reg = reg_state(env, regno);
f4d7e40a 2019 struct bpf_func_state *state = func(env, reg);
638f5b90 2020 int off, i, slot, spi;
17a52670 2021
914cb781 2022 if (reg->type != PTR_TO_STACK) {
f1174f77 2023 /* Allow zero-byte read from NULL, regardless of pointer type */
8e2fe1d9 2024 if (zero_size_allowed && access_size == 0 &&
914cb781 2025 register_is_null(reg))
8e2fe1d9
DB
2026 return 0;
2027
61bd5218 2028 verbose(env, "R%d type=%s expected=%s\n", regno,
914cb781 2029 reg_type_str[reg->type],
8e2fe1d9 2030 reg_type_str[PTR_TO_STACK]);
17a52670 2031 return -EACCES;
8e2fe1d9 2032 }
17a52670 2033
f1174f77 2034 /* Only allow fixed-offset stack reads */
914cb781 2035 if (!tnum_is_const(reg->var_off)) {
f1174f77
EC
2036 char tn_buf[48];
2037
914cb781 2038 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 2039 verbose(env, "invalid variable stack read R%d var_off=%s\n",
f1174f77 2040 regno, tn_buf);
ea25f914 2041 return -EACCES;
f1174f77 2042 }
914cb781 2043 off = reg->off + reg->var_off.value;
17a52670 2044 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
9fd29c08 2045 access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
61bd5218 2046 verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
17a52670
AS
2047 regno, off, access_size);
2048 return -EACCES;
2049 }
2050
435faee1
DB
2051 if (meta && meta->raw_mode) {
2052 meta->access_size = access_size;
2053 meta->regno = regno;
2054 return 0;
2055 }
2056
17a52670 2057 for (i = 0; i < access_size; i++) {
cc2b14d5
AS
2058 u8 *stype;
2059
638f5b90
AS
2060 slot = -(off + i) - 1;
2061 spi = slot / BPF_REG_SIZE;
cc2b14d5
AS
2062 if (state->allocated_stack <= slot)
2063 goto err;
2064 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2065 if (*stype == STACK_MISC)
2066 goto mark;
2067 if (*stype == STACK_ZERO) {
2068 /* helper can write anything into the stack */
2069 *stype = STACK_MISC;
2070 goto mark;
17a52670 2071 }
cc2b14d5
AS
2072err:
2073 verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
2074 off, i, access_size);
2075 return -EACCES;
2076mark:
2077 /* reading any byte out of 8-byte 'spill_slot' will cause
2078 * the whole slot to be marked as 'read'
2079 */
679c782d
EC
2080 mark_reg_read(env, &state->stack[spi].spilled_ptr,
2081 state->stack[spi].spilled_ptr.parent);
17a52670 2082 }
f4d7e40a 2083 return update_stack_depth(env, state, off);
17a52670
AS
2084}
2085
06c1c049
GB
2086static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
2087 int access_size, bool zero_size_allowed,
2088 struct bpf_call_arg_meta *meta)
2089{
638f5b90 2090 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
06c1c049 2091
f1174f77 2092 switch (reg->type) {
06c1c049 2093 case PTR_TO_PACKET:
de8f3a83 2094 case PTR_TO_PACKET_META:
9fd29c08
YS
2095 return check_packet_access(env, regno, reg->off, access_size,
2096 zero_size_allowed);
06c1c049 2097 case PTR_TO_MAP_VALUE:
9fd29c08
YS
2098 return check_map_access(env, regno, reg->off, access_size,
2099 zero_size_allowed);
f1174f77 2100 default: /* scalar_value|ptr_to_stack or invalid ptr */
06c1c049
GB
2101 return check_stack_boundary(env, regno, access_size,
2102 zero_size_allowed, meta);
2103 }
2104}
2105
90133415
DB
2106static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
2107{
2108 return type == ARG_PTR_TO_MEM ||
2109 type == ARG_PTR_TO_MEM_OR_NULL ||
2110 type == ARG_PTR_TO_UNINIT_MEM;
2111}
2112
2113static bool arg_type_is_mem_size(enum bpf_arg_type type)
2114{
2115 return type == ARG_CONST_SIZE ||
2116 type == ARG_CONST_SIZE_OR_ZERO;
2117}
2118
58e2af8b 2119static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
33ff9823
DB
2120 enum bpf_arg_type arg_type,
2121 struct bpf_call_arg_meta *meta)
17a52670 2122{
638f5b90 2123 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6841de8b 2124 enum bpf_reg_type expected_type, type = reg->type;
17a52670
AS
2125 int err = 0;
2126
80f1d68c 2127 if (arg_type == ARG_DONTCARE)
17a52670
AS
2128 return 0;
2129
dc503a8a
EC
2130 err = check_reg_arg(env, regno, SRC_OP);
2131 if (err)
2132 return err;
17a52670 2133
1be7f75d
AS
2134 if (arg_type == ARG_ANYTHING) {
2135 if (is_pointer_value(env, regno)) {
61bd5218
JK
2136 verbose(env, "R%d leaks addr into helper function\n",
2137 regno);
1be7f75d
AS
2138 return -EACCES;
2139 }
80f1d68c 2140 return 0;
1be7f75d 2141 }
80f1d68c 2142
de8f3a83 2143 if (type_is_pkt_pointer(type) &&
3a0af8fd 2144 !may_access_direct_pkt_data(env, meta, BPF_READ)) {
61bd5218 2145 verbose(env, "helper access to the packet is not allowed\n");
6841de8b
AS
2146 return -EACCES;
2147 }
2148
8e2fe1d9 2149 if (arg_type == ARG_PTR_TO_MAP_KEY ||
2ea864c5
MV
2150 arg_type == ARG_PTR_TO_MAP_VALUE ||
2151 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
17a52670 2152 expected_type = PTR_TO_STACK;
d71962f3 2153 if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
de8f3a83 2154 type != expected_type)
6841de8b 2155 goto err_type;
39f19ebb
AS
2156 } else if (arg_type == ARG_CONST_SIZE ||
2157 arg_type == ARG_CONST_SIZE_OR_ZERO) {
f1174f77
EC
2158 expected_type = SCALAR_VALUE;
2159 if (type != expected_type)
6841de8b 2160 goto err_type;
17a52670
AS
2161 } else if (arg_type == ARG_CONST_MAP_PTR) {
2162 expected_type = CONST_PTR_TO_MAP;
6841de8b
AS
2163 if (type != expected_type)
2164 goto err_type;
608cd71a
AS
2165 } else if (arg_type == ARG_PTR_TO_CTX) {
2166 expected_type = PTR_TO_CTX;
6841de8b
AS
2167 if (type != expected_type)
2168 goto err_type;
58990d1f
DB
2169 err = check_ctx_reg(env, reg, regno);
2170 if (err < 0)
2171 return err;
c64b7983
JS
2172 } else if (arg_type == ARG_PTR_TO_SOCKET) {
2173 expected_type = PTR_TO_SOCKET;
2174 if (type != expected_type)
2175 goto err_type;
fd978bf7
JS
2176 if (meta->ptr_id || !reg->id) {
2177 verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n",
2178 meta->ptr_id, reg->id);
2179 return -EFAULT;
2180 }
2181 meta->ptr_id = reg->id;
90133415 2182 } else if (arg_type_is_mem_ptr(arg_type)) {
8e2fe1d9
DB
2183 expected_type = PTR_TO_STACK;
2184 /* One exception here. In case function allows for NULL to be
f1174f77 2185 * passed in as argument, it's a SCALAR_VALUE type. Final test
8e2fe1d9
DB
2186 * happens during stack boundary checking.
2187 */
914cb781 2188 if (register_is_null(reg) &&
db1ac496 2189 arg_type == ARG_PTR_TO_MEM_OR_NULL)
6841de8b 2190 /* final test in check_stack_boundary() */;
de8f3a83
DB
2191 else if (!type_is_pkt_pointer(type) &&
2192 type != PTR_TO_MAP_VALUE &&
f1174f77 2193 type != expected_type)
6841de8b 2194 goto err_type;
39f19ebb 2195 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
17a52670 2196 } else {
61bd5218 2197 verbose(env, "unsupported arg_type %d\n", arg_type);
17a52670
AS
2198 return -EFAULT;
2199 }
2200
17a52670
AS
2201 if (arg_type == ARG_CONST_MAP_PTR) {
2202 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
33ff9823 2203 meta->map_ptr = reg->map_ptr;
17a52670
AS
2204 } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
2205 /* bpf_map_xxx(..., map_ptr, ..., key) call:
2206 * check that [key, key + map->key_size) are within
2207 * stack limits and initialized
2208 */
33ff9823 2209 if (!meta->map_ptr) {
17a52670
AS
2210 /* in function declaration map_ptr must come before
2211 * map_key, so that it's verified and known before
2212 * we have to check map_key here. Otherwise it means
2213 * that kernel subsystem misconfigured verifier
2214 */
61bd5218 2215 verbose(env, "invalid map_ptr to access map->key\n");
17a52670
AS
2216 return -EACCES;
2217 }
d71962f3
PC
2218 err = check_helper_mem_access(env, regno,
2219 meta->map_ptr->key_size, false,
2220 NULL);
2ea864c5
MV
2221 } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
2222 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
17a52670
AS
2223 /* bpf_map_xxx(..., map_ptr, ..., value) call:
2224 * check [value, value + map->value_size) validity
2225 */
33ff9823 2226 if (!meta->map_ptr) {
17a52670 2227 /* kernel subsystem misconfigured verifier */
61bd5218 2228 verbose(env, "invalid map_ptr to access map->value\n");
17a52670
AS
2229 return -EACCES;
2230 }
2ea864c5 2231 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
d71962f3
PC
2232 err = check_helper_mem_access(env, regno,
2233 meta->map_ptr->value_size, false,
2ea864c5 2234 meta);
90133415 2235 } else if (arg_type_is_mem_size(arg_type)) {
39f19ebb 2236 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
17a52670 2237
849fa506
YS
2238 /* remember the mem_size which may be used later
2239 * to refine return values.
2240 */
2241 meta->msize_smax_value = reg->smax_value;
2242 meta->msize_umax_value = reg->umax_value;
2243
f1174f77
EC
2244 /* The register is SCALAR_VALUE; the access check
2245 * happens using its boundaries.
06c1c049 2246 */
f1174f77 2247 if (!tnum_is_const(reg->var_off))
06c1c049
GB
2248 /* For unprivileged variable accesses, disable raw
2249 * mode so that the program is required to
2250 * initialize all the memory that the helper could
2251 * just partially fill up.
2252 */
2253 meta = NULL;
2254
b03c9f9f 2255 if (reg->smin_value < 0) {
61bd5218 2256 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
f1174f77
EC
2257 regno);
2258 return -EACCES;
2259 }
06c1c049 2260
b03c9f9f 2261 if (reg->umin_value == 0) {
f1174f77
EC
2262 err = check_helper_mem_access(env, regno - 1, 0,
2263 zero_size_allowed,
2264 meta);
06c1c049
GB
2265 if (err)
2266 return err;
06c1c049 2267 }
f1174f77 2268
b03c9f9f 2269 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
61bd5218 2270 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
f1174f77
EC
2271 regno);
2272 return -EACCES;
2273 }
2274 err = check_helper_mem_access(env, regno - 1,
b03c9f9f 2275 reg->umax_value,
f1174f77 2276 zero_size_allowed, meta);
17a52670
AS
2277 }
2278
2279 return err;
6841de8b 2280err_type:
61bd5218 2281 verbose(env, "R%d type=%s expected=%s\n", regno,
6841de8b
AS
2282 reg_type_str[type], reg_type_str[expected_type]);
2283 return -EACCES;
17a52670
AS
2284}
2285
61bd5218
JK
2286static int check_map_func_compatibility(struct bpf_verifier_env *env,
2287 struct bpf_map *map, int func_id)
35578d79 2288{
35578d79
KX
2289 if (!map)
2290 return 0;
2291
6aff67c8
AS
2292 /* We need a two way check, first is from map perspective ... */
2293 switch (map->map_type) {
2294 case BPF_MAP_TYPE_PROG_ARRAY:
2295 if (func_id != BPF_FUNC_tail_call)
2296 goto error;
2297 break;
2298 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
2299 if (func_id != BPF_FUNC_perf_event_read &&
908432ca
YS
2300 func_id != BPF_FUNC_perf_event_output &&
2301 func_id != BPF_FUNC_perf_event_read_value)
6aff67c8
AS
2302 goto error;
2303 break;
2304 case BPF_MAP_TYPE_STACK_TRACE:
2305 if (func_id != BPF_FUNC_get_stackid)
2306 goto error;
2307 break;
4ed8ec52 2308 case BPF_MAP_TYPE_CGROUP_ARRAY:
60747ef4 2309 if (func_id != BPF_FUNC_skb_under_cgroup &&
60d20f91 2310 func_id != BPF_FUNC_current_task_under_cgroup)
4a482f34
MKL
2311 goto error;
2312 break;
cd339431 2313 case BPF_MAP_TYPE_CGROUP_STORAGE:
b741f163 2314 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
cd339431
RG
2315 if (func_id != BPF_FUNC_get_local_storage)
2316 goto error;
2317 break;
546ac1ff
JF
2318 /* devmap returns a pointer to a live net_device ifindex that we cannot
2319 * allow to be modified from bpf side. So do not allow lookup elements
2320 * for now.
2321 */
2322 case BPF_MAP_TYPE_DEVMAP:
2ddf71e2 2323 if (func_id != BPF_FUNC_redirect_map)
546ac1ff
JF
2324 goto error;
2325 break;
fbfc504a
BT
2326 /* Restrict bpf side of cpumap and xskmap, open when use-cases
2327 * appear.
2328 */
6710e112 2329 case BPF_MAP_TYPE_CPUMAP:
fbfc504a 2330 case BPF_MAP_TYPE_XSKMAP:
6710e112
JDB
2331 if (func_id != BPF_FUNC_redirect_map)
2332 goto error;
2333 break;
56f668df 2334 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
bcc6b1b7 2335 case BPF_MAP_TYPE_HASH_OF_MAPS:
56f668df
MKL
2336 if (func_id != BPF_FUNC_map_lookup_elem)
2337 goto error;
16a43625 2338 break;
174a79ff
JF
2339 case BPF_MAP_TYPE_SOCKMAP:
2340 if (func_id != BPF_FUNC_sk_redirect_map &&
2341 func_id != BPF_FUNC_sock_map_update &&
4f738adb
JF
2342 func_id != BPF_FUNC_map_delete_elem &&
2343 func_id != BPF_FUNC_msg_redirect_map)
174a79ff
JF
2344 goto error;
2345 break;
81110384
JF
2346 case BPF_MAP_TYPE_SOCKHASH:
2347 if (func_id != BPF_FUNC_sk_redirect_hash &&
2348 func_id != BPF_FUNC_sock_hash_update &&
2349 func_id != BPF_FUNC_map_delete_elem &&
2350 func_id != BPF_FUNC_msg_redirect_hash)
2351 goto error;
2352 break;
2dbb9b9e
MKL
2353 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
2354 if (func_id != BPF_FUNC_sk_select_reuseport)
2355 goto error;
2356 break;
f1a2e44a
MV
2357 case BPF_MAP_TYPE_QUEUE:
2358 case BPF_MAP_TYPE_STACK:
2359 if (func_id != BPF_FUNC_map_peek_elem &&
2360 func_id != BPF_FUNC_map_pop_elem &&
2361 func_id != BPF_FUNC_map_push_elem)
2362 goto error;
2363 break;
6aff67c8
AS
2364 default:
2365 break;
2366 }
2367
2368 /* ... and second from the function itself. */
2369 switch (func_id) {
2370 case BPF_FUNC_tail_call:
2371 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
2372 goto error;
f910cefa 2373 if (env->subprog_cnt > 1) {
f4d7e40a
AS
2374 verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
2375 return -EINVAL;
2376 }
6aff67c8
AS
2377 break;
2378 case BPF_FUNC_perf_event_read:
2379 case BPF_FUNC_perf_event_output:
908432ca 2380 case BPF_FUNC_perf_event_read_value:
6aff67c8
AS
2381 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
2382 goto error;
2383 break;
2384 case BPF_FUNC_get_stackid:
2385 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
2386 goto error;
2387 break;
60d20f91 2388 case BPF_FUNC_current_task_under_cgroup:
747ea55e 2389 case BPF_FUNC_skb_under_cgroup:
4a482f34
MKL
2390 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
2391 goto error;
2392 break;
97f91a7c 2393 case BPF_FUNC_redirect_map:
9c270af3 2394 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
fbfc504a
BT
2395 map->map_type != BPF_MAP_TYPE_CPUMAP &&
2396 map->map_type != BPF_MAP_TYPE_XSKMAP)
97f91a7c
JF
2397 goto error;
2398 break;
174a79ff 2399 case BPF_FUNC_sk_redirect_map:
4f738adb 2400 case BPF_FUNC_msg_redirect_map:
81110384 2401 case BPF_FUNC_sock_map_update:
174a79ff
JF
2402 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2403 goto error;
2404 break;
81110384
JF
2405 case BPF_FUNC_sk_redirect_hash:
2406 case BPF_FUNC_msg_redirect_hash:
2407 case BPF_FUNC_sock_hash_update:
2408 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
174a79ff
JF
2409 goto error;
2410 break;
cd339431 2411 case BPF_FUNC_get_local_storage:
b741f163
RG
2412 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
2413 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
cd339431
RG
2414 goto error;
2415 break;
2dbb9b9e
MKL
2416 case BPF_FUNC_sk_select_reuseport:
2417 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
2418 goto error;
2419 break;
f1a2e44a
MV
2420 case BPF_FUNC_map_peek_elem:
2421 case BPF_FUNC_map_pop_elem:
2422 case BPF_FUNC_map_push_elem:
2423 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
2424 map->map_type != BPF_MAP_TYPE_STACK)
2425 goto error;
2426 break;
6aff67c8
AS
2427 default:
2428 break;
35578d79
KX
2429 }
2430
2431 return 0;
6aff67c8 2432error:
61bd5218 2433 verbose(env, "cannot pass map_type %d into func %s#%d\n",
ebb676da 2434 map->map_type, func_id_name(func_id), func_id);
6aff67c8 2435 return -EINVAL;
35578d79
KX
2436}
2437
90133415 2438static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
435faee1
DB
2439{
2440 int count = 0;
2441
39f19ebb 2442 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2443 count++;
39f19ebb 2444 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2445 count++;
39f19ebb 2446 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2447 count++;
39f19ebb 2448 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2449 count++;
39f19ebb 2450 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
435faee1
DB
2451 count++;
2452
90133415
DB
2453 /* We only support one arg being in raw mode at the moment,
2454 * which is sufficient for the helper functions we have
2455 * right now.
2456 */
2457 return count <= 1;
2458}
2459
2460static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
2461 enum bpf_arg_type arg_next)
2462{
2463 return (arg_type_is_mem_ptr(arg_curr) &&
2464 !arg_type_is_mem_size(arg_next)) ||
2465 (!arg_type_is_mem_ptr(arg_curr) &&
2466 arg_type_is_mem_size(arg_next));
2467}
2468
2469static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
2470{
2471 /* bpf_xxx(..., buf, len) call will access 'len'
2472 * bytes from memory 'buf'. Both arg types need
2473 * to be paired, so make sure there's no buggy
2474 * helper function specification.
2475 */
2476 if (arg_type_is_mem_size(fn->arg1_type) ||
2477 arg_type_is_mem_ptr(fn->arg5_type) ||
2478 check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
2479 check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
2480 check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
2481 check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
2482 return false;
2483
2484 return true;
2485}
2486
fd978bf7
JS
2487static bool check_refcount_ok(const struct bpf_func_proto *fn)
2488{
2489 int count = 0;
2490
2491 if (arg_type_is_refcounted(fn->arg1_type))
2492 count++;
2493 if (arg_type_is_refcounted(fn->arg2_type))
2494 count++;
2495 if (arg_type_is_refcounted(fn->arg3_type))
2496 count++;
2497 if (arg_type_is_refcounted(fn->arg4_type))
2498 count++;
2499 if (arg_type_is_refcounted(fn->arg5_type))
2500 count++;
2501
2502 /* We only support one arg being unreferenced at the moment,
2503 * which is sufficient for the helper functions we have right now.
2504 */
2505 return count <= 1;
2506}
2507
90133415
DB
2508static int check_func_proto(const struct bpf_func_proto *fn)
2509{
2510 return check_raw_mode_ok(fn) &&
fd978bf7
JS
2511 check_arg_pair_ok(fn) &&
2512 check_refcount_ok(fn) ? 0 : -EINVAL;
435faee1
DB
2513}
2514
de8f3a83
DB
2515/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
2516 * are now invalid, so turn them into unknown SCALAR_VALUE.
f1174f77 2517 */
f4d7e40a
AS
2518static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
2519 struct bpf_func_state *state)
969bf05e 2520{
58e2af8b 2521 struct bpf_reg_state *regs = state->regs, *reg;
969bf05e
AS
2522 int i;
2523
2524 for (i = 0; i < MAX_BPF_REG; i++)
de8f3a83 2525 if (reg_is_pkt_pointer_any(&regs[i]))
61bd5218 2526 mark_reg_unknown(env, regs, i);
969bf05e 2527
f3709f69
JS
2528 bpf_for_each_spilled_reg(i, state, reg) {
2529 if (!reg)
969bf05e 2530 continue;
de8f3a83
DB
2531 if (reg_is_pkt_pointer_any(reg))
2532 __mark_reg_unknown(reg);
969bf05e
AS
2533 }
2534}
2535
f4d7e40a
AS
2536static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
2537{
2538 struct bpf_verifier_state *vstate = env->cur_state;
2539 int i;
2540
2541 for (i = 0; i <= vstate->curframe; i++)
2542 __clear_all_pkt_pointers(env, vstate->frame[i]);
2543}
2544
fd978bf7
JS
2545static void release_reg_references(struct bpf_verifier_env *env,
2546 struct bpf_func_state *state, int id)
2547{
2548 struct bpf_reg_state *regs = state->regs, *reg;
2549 int i;
2550
2551 for (i = 0; i < MAX_BPF_REG; i++)
2552 if (regs[i].id == id)
2553 mark_reg_unknown(env, regs, i);
2554
2555 bpf_for_each_spilled_reg(i, state, reg) {
2556 if (!reg)
2557 continue;
2558 if (reg_is_refcounted(reg) && reg->id == id)
2559 __mark_reg_unknown(reg);
2560 }
2561}
2562
2563/* The pointer with the specified id has released its reference to kernel
2564 * resources. Identify all copies of the same pointer and clear the reference.
2565 */
2566static int release_reference(struct bpf_verifier_env *env,
2567 struct bpf_call_arg_meta *meta)
2568{
2569 struct bpf_verifier_state *vstate = env->cur_state;
2570 int i;
2571
2572 for (i = 0; i <= vstate->curframe; i++)
2573 release_reg_references(env, vstate->frame[i], meta->ptr_id);
2574
2575 return release_reference_state(env, meta->ptr_id);
2576}
2577
f4d7e40a
AS
2578static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
2579 int *insn_idx)
2580{
2581 struct bpf_verifier_state *state = env->cur_state;
2582 struct bpf_func_state *caller, *callee;
fd978bf7 2583 int i, err, subprog, target_insn;
f4d7e40a 2584
aada9ce6 2585 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
f4d7e40a 2586 verbose(env, "the call stack of %d frames is too deep\n",
aada9ce6 2587 state->curframe + 2);
f4d7e40a
AS
2588 return -E2BIG;
2589 }
2590
2591 target_insn = *insn_idx + insn->imm;
2592 subprog = find_subprog(env, target_insn + 1);
2593 if (subprog < 0) {
2594 verbose(env, "verifier bug. No program starts at insn %d\n",
2595 target_insn + 1);
2596 return -EFAULT;
2597 }
2598
2599 caller = state->frame[state->curframe];
2600 if (state->frame[state->curframe + 1]) {
2601 verbose(env, "verifier bug. Frame %d already allocated\n",
2602 state->curframe + 1);
2603 return -EFAULT;
2604 }
2605
2606 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
2607 if (!callee)
2608 return -ENOMEM;
2609 state->frame[state->curframe + 1] = callee;
2610
2611 /* callee cannot access r0, r6 - r9 for reading and has to write
2612 * into its own stack before reading from it.
2613 * callee can read/write into caller's stack
2614 */
2615 init_func_state(env, callee,
2616 /* remember the callsite, it will be used by bpf_exit */
2617 *insn_idx /* callsite */,
2618 state->curframe + 1 /* frameno within this callchain */,
f910cefa 2619 subprog /* subprog number within this prog */);
f4d7e40a 2620
fd978bf7
JS
2621 /* Transfer references to the callee */
2622 err = transfer_reference_state(callee, caller);
2623 if (err)
2624 return err;
2625
679c782d
EC
2626 /* copy r1 - r5 args that callee can access. The copy includes parent
2627 * pointers, which connects us up to the liveness chain
2628 */
f4d7e40a
AS
2629 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
2630 callee->regs[i] = caller->regs[i];
2631
679c782d 2632 /* after the call registers r0 - r5 were scratched */
f4d7e40a
AS
2633 for (i = 0; i < CALLER_SAVED_REGS; i++) {
2634 mark_reg_not_init(env, caller->regs, caller_saved[i]);
2635 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2636 }
2637
2638 /* only increment it after check_reg_arg() finished */
2639 state->curframe++;
2640
2641 /* and go analyze first insn of the callee */
2642 *insn_idx = target_insn;
2643
2644 if (env->log.level) {
2645 verbose(env, "caller:\n");
2646 print_verifier_state(env, caller);
2647 verbose(env, "callee:\n");
2648 print_verifier_state(env, callee);
2649 }
2650 return 0;
2651}
2652
2653static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
2654{
2655 struct bpf_verifier_state *state = env->cur_state;
2656 struct bpf_func_state *caller, *callee;
2657 struct bpf_reg_state *r0;
fd978bf7 2658 int err;
f4d7e40a
AS
2659
2660 callee = state->frame[state->curframe];
2661 r0 = &callee->regs[BPF_REG_0];
2662 if (r0->type == PTR_TO_STACK) {
2663 /* technically it's ok to return caller's stack pointer
2664 * (or caller's caller's pointer) back to the caller,
2665 * since these pointers are valid. Only current stack
2666 * pointer will be invalid as soon as function exits,
2667 * but let's be conservative
2668 */
2669 verbose(env, "cannot return stack pointer to the caller\n");
2670 return -EINVAL;
2671 }
2672
2673 state->curframe--;
2674 caller = state->frame[state->curframe];
2675 /* return to the caller whatever r0 had in the callee */
2676 caller->regs[BPF_REG_0] = *r0;
2677
fd978bf7
JS
2678 /* Transfer references to the caller */
2679 err = transfer_reference_state(caller, callee);
2680 if (err)
2681 return err;
2682
f4d7e40a
AS
2683 *insn_idx = callee->callsite + 1;
2684 if (env->log.level) {
2685 verbose(env, "returning from callee:\n");
2686 print_verifier_state(env, callee);
2687 verbose(env, "to caller at %d:\n", *insn_idx);
2688 print_verifier_state(env, caller);
2689 }
2690 /* clear everything in the callee */
2691 free_func_state(callee);
2692 state->frame[state->curframe + 1] = NULL;
2693 return 0;
2694}
2695
849fa506
YS
2696static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
2697 int func_id,
2698 struct bpf_call_arg_meta *meta)
2699{
2700 struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
2701
2702 if (ret_type != RET_INTEGER ||
2703 (func_id != BPF_FUNC_get_stack &&
2704 func_id != BPF_FUNC_probe_read_str))
2705 return;
2706
2707 ret_reg->smax_value = meta->msize_smax_value;
2708 ret_reg->umax_value = meta->msize_umax_value;
2709 __reg_deduce_bounds(ret_reg);
2710 __reg_bound_offset(ret_reg);
2711}
2712
c93552c4
DB
2713static int
2714record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
2715 int func_id, int insn_idx)
2716{
2717 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
2718
2719 if (func_id != BPF_FUNC_tail_call &&
09772d92
DB
2720 func_id != BPF_FUNC_map_lookup_elem &&
2721 func_id != BPF_FUNC_map_update_elem &&
f1a2e44a
MV
2722 func_id != BPF_FUNC_map_delete_elem &&
2723 func_id != BPF_FUNC_map_push_elem &&
2724 func_id != BPF_FUNC_map_pop_elem &&
2725 func_id != BPF_FUNC_map_peek_elem)
c93552c4 2726 return 0;
09772d92 2727
c93552c4
DB
2728 if (meta->map_ptr == NULL) {
2729 verbose(env, "kernel subsystem misconfigured verifier\n");
2730 return -EINVAL;
2731 }
2732
2733 if (!BPF_MAP_PTR(aux->map_state))
2734 bpf_map_ptr_store(aux, meta->map_ptr,
2735 meta->map_ptr->unpriv_array);
2736 else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
2737 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
2738 meta->map_ptr->unpriv_array);
2739 return 0;
2740}
2741
fd978bf7
JS
2742static int check_reference_leak(struct bpf_verifier_env *env)
2743{
2744 struct bpf_func_state *state = cur_func(env);
2745 int i;
2746
2747 for (i = 0; i < state->acquired_refs; i++) {
2748 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
2749 state->refs[i].id, state->refs[i].insn_idx);
2750 }
2751 return state->acquired_refs ? -EINVAL : 0;
2752}
2753
f4d7e40a 2754static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
17a52670 2755{
17a52670 2756 const struct bpf_func_proto *fn = NULL;
638f5b90 2757 struct bpf_reg_state *regs;
33ff9823 2758 struct bpf_call_arg_meta meta;
969bf05e 2759 bool changes_data;
17a52670
AS
2760 int i, err;
2761
2762 /* find function prototype */
2763 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
61bd5218
JK
2764 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
2765 func_id);
17a52670
AS
2766 return -EINVAL;
2767 }
2768
00176a34 2769 if (env->ops->get_func_proto)
5e43f899 2770 fn = env->ops->get_func_proto(func_id, env->prog);
17a52670 2771 if (!fn) {
61bd5218
JK
2772 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
2773 func_id);
17a52670
AS
2774 return -EINVAL;
2775 }
2776
2777 /* eBPF programs must be GPL compatible to use GPL-ed functions */
24701ece 2778 if (!env->prog->gpl_compatible && fn->gpl_only) {
3fe2867c 2779 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
17a52670
AS
2780 return -EINVAL;
2781 }
2782
04514d13 2783 /* With LD_ABS/IND some JITs save/restore skb from r1. */
17bedab2 2784 changes_data = bpf_helper_changes_pkt_data(fn->func);
04514d13
DB
2785 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
2786 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
2787 func_id_name(func_id), func_id);
2788 return -EINVAL;
2789 }
969bf05e 2790
33ff9823 2791 memset(&meta, 0, sizeof(meta));
36bbef52 2792 meta.pkt_access = fn->pkt_access;
33ff9823 2793
90133415 2794 err = check_func_proto(fn);
435faee1 2795 if (err) {
61bd5218 2796 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
ebb676da 2797 func_id_name(func_id), func_id);
435faee1
DB
2798 return err;
2799 }
2800
17a52670 2801 /* check args */
33ff9823 2802 err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
17a52670
AS
2803 if (err)
2804 return err;
33ff9823 2805 err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
17a52670
AS
2806 if (err)
2807 return err;
33ff9823 2808 err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
17a52670
AS
2809 if (err)
2810 return err;
33ff9823 2811 err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
17a52670
AS
2812 if (err)
2813 return err;
33ff9823 2814 err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
17a52670
AS
2815 if (err)
2816 return err;
2817
c93552c4
DB
2818 err = record_func_map(env, &meta, func_id, insn_idx);
2819 if (err)
2820 return err;
2821
435faee1
DB
2822 /* Mark slots with STACK_MISC in case of raw mode, stack offset
2823 * is inferred from register state.
2824 */
2825 for (i = 0; i < meta.access_size; i++) {
ca369602
DB
2826 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
2827 BPF_WRITE, -1, false);
435faee1
DB
2828 if (err)
2829 return err;
2830 }
2831
fd978bf7
JS
2832 if (func_id == BPF_FUNC_tail_call) {
2833 err = check_reference_leak(env);
2834 if (err) {
2835 verbose(env, "tail_call would lead to reference leak\n");
2836 return err;
2837 }
2838 } else if (is_release_function(func_id)) {
2839 err = release_reference(env, &meta);
2840 if (err)
2841 return err;
2842 }
2843
638f5b90 2844 regs = cur_regs(env);
cd339431
RG
2845
2846 /* check that flags argument in get_local_storage(map, flags) is 0,
2847 * this is required because get_local_storage() can't return an error.
2848 */
2849 if (func_id == BPF_FUNC_get_local_storage &&
2850 !register_is_null(&regs[BPF_REG_2])) {
2851 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
2852 return -EINVAL;
2853 }
2854
17a52670 2855 /* reset caller saved regs */
dc503a8a 2856 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 2857 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
2858 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2859 }
17a52670 2860
dc503a8a 2861 /* update return register (already marked as written above) */
17a52670 2862 if (fn->ret_type == RET_INTEGER) {
f1174f77 2863 /* sets type to SCALAR_VALUE */
61bd5218 2864 mark_reg_unknown(env, regs, BPF_REG_0);
17a52670
AS
2865 } else if (fn->ret_type == RET_VOID) {
2866 regs[BPF_REG_0].type = NOT_INIT;
3e6a4b3e
RG
2867 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
2868 fn->ret_type == RET_PTR_TO_MAP_VALUE) {
f1174f77 2869 /* There is no offset yet applied, variable or fixed */
61bd5218 2870 mark_reg_known_zero(env, regs, BPF_REG_0);
17a52670
AS
2871 /* remember map_ptr, so that check_map_access()
2872 * can check 'value_size' boundary of memory access
2873 * to map element returned from bpf_map_lookup_elem()
2874 */
33ff9823 2875 if (meta.map_ptr == NULL) {
61bd5218
JK
2876 verbose(env,
2877 "kernel subsystem misconfigured verifier\n");
17a52670
AS
2878 return -EINVAL;
2879 }
33ff9823 2880 regs[BPF_REG_0].map_ptr = meta.map_ptr;
4d31f301
DB
2881 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
2882 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
2883 } else {
2884 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
2885 regs[BPF_REG_0].id = ++env->id_gen;
2886 }
c64b7983 2887 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
fd978bf7
JS
2888 int id = acquire_reference_state(env, insn_idx);
2889 if (id < 0)
2890 return id;
c64b7983
JS
2891 mark_reg_known_zero(env, regs, BPF_REG_0);
2892 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
fd978bf7 2893 regs[BPF_REG_0].id = id;
17a52670 2894 } else {
61bd5218 2895 verbose(env, "unknown return type %d of func %s#%d\n",
ebb676da 2896 fn->ret_type, func_id_name(func_id), func_id);
17a52670
AS
2897 return -EINVAL;
2898 }
04fd61ab 2899
849fa506
YS
2900 do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
2901
61bd5218 2902 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
35578d79
KX
2903 if (err)
2904 return err;
04fd61ab 2905
c195651e
YS
2906 if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
2907 const char *err_str;
2908
2909#ifdef CONFIG_PERF_EVENTS
2910 err = get_callchain_buffers(sysctl_perf_event_max_stack);
2911 err_str = "cannot get callchain buffer for func %s#%d\n";
2912#else
2913 err = -ENOTSUPP;
2914 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
2915#endif
2916 if (err) {
2917 verbose(env, err_str, func_id_name(func_id), func_id);
2918 return err;
2919 }
2920
2921 env->prog->has_callchain_buf = true;
2922 }
2923
969bf05e
AS
2924 if (changes_data)
2925 clear_all_pkt_pointers(env);
2926 return 0;
2927}
2928
b03c9f9f
EC
2929static bool signed_add_overflows(s64 a, s64 b)
2930{
2931 /* Do the add in u64, where overflow is well-defined */
2932 s64 res = (s64)((u64)a + (u64)b);
2933
2934 if (b < 0)
2935 return res > a;
2936 return res < a;
2937}
2938
2939static bool signed_sub_overflows(s64 a, s64 b)
2940{
2941 /* Do the sub in u64, where overflow is well-defined */
2942 s64 res = (s64)((u64)a - (u64)b);
2943
2944 if (b < 0)
2945 return res < a;
2946 return res > a;
969bf05e
AS
2947}
2948
bb7f0f98
AS
2949static bool check_reg_sane_offset(struct bpf_verifier_env *env,
2950 const struct bpf_reg_state *reg,
2951 enum bpf_reg_type type)
2952{
2953 bool known = tnum_is_const(reg->var_off);
2954 s64 val = reg->var_off.value;
2955 s64 smin = reg->smin_value;
2956
2957 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
2958 verbose(env, "math between %s pointer and %lld is not allowed\n",
2959 reg_type_str[type], val);
2960 return false;
2961 }
2962
2963 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
2964 verbose(env, "%s pointer offset %d is not allowed\n",
2965 reg_type_str[type], reg->off);
2966 return false;
2967 }
2968
2969 if (smin == S64_MIN) {
2970 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
2971 reg_type_str[type]);
2972 return false;
2973 }
2974
2975 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
2976 verbose(env, "value %lld makes %s pointer be out of bounds\n",
2977 smin, reg_type_str[type]);
2978 return false;
2979 }
2980
2981 return true;
2982}
2983
f1174f77 2984/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
f1174f77
EC
2985 * Caller should also handle BPF_MOV case separately.
2986 * If we return -EACCES, caller may want to try again treating pointer as a
2987 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
2988 */
2989static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
2990 struct bpf_insn *insn,
2991 const struct bpf_reg_state *ptr_reg,
2992 const struct bpf_reg_state *off_reg)
969bf05e 2993{
f4d7e40a
AS
2994 struct bpf_verifier_state *vstate = env->cur_state;
2995 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2996 struct bpf_reg_state *regs = state->regs, *dst_reg;
f1174f77 2997 bool known = tnum_is_const(off_reg->var_off);
b03c9f9f
EC
2998 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
2999 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
3000 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
3001 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
969bf05e 3002 u8 opcode = BPF_OP(insn->code);
f1174f77 3003 u32 dst = insn->dst_reg;
969bf05e 3004
f1174f77 3005 dst_reg = &regs[dst];
969bf05e 3006
6f16101e
DB
3007 if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
3008 smin_val > smax_val || umin_val > umax_val) {
3009 /* Taint dst register if offset had invalid bounds derived from
3010 * e.g. dead branches.
3011 */
3012 __mark_reg_unknown(dst_reg);
3013 return 0;
f1174f77
EC
3014 }
3015
3016 if (BPF_CLASS(insn->code) != BPF_ALU64) {
3017 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
82abbf8d
AS
3018 verbose(env,
3019 "R%d 32-bit pointer arithmetic prohibited\n",
3020 dst);
f1174f77 3021 return -EACCES;
969bf05e
AS
3022 }
3023
aad2eeaf
JS
3024 switch (ptr_reg->type) {
3025 case PTR_TO_MAP_VALUE_OR_NULL:
3026 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
3027 dst, reg_type_str[ptr_reg->type]);
f1174f77 3028 return -EACCES;
aad2eeaf
JS
3029 case CONST_PTR_TO_MAP:
3030 case PTR_TO_PACKET_END:
c64b7983
JS
3031 case PTR_TO_SOCKET:
3032 case PTR_TO_SOCKET_OR_NULL:
aad2eeaf
JS
3033 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
3034 dst, reg_type_str[ptr_reg->type]);
f1174f77 3035 return -EACCES;
aad2eeaf
JS
3036 default:
3037 break;
f1174f77
EC
3038 }
3039
3040 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
3041 * The id may be overwritten later if we create a new variable offset.
969bf05e 3042 */
f1174f77
EC
3043 dst_reg->type = ptr_reg->type;
3044 dst_reg->id = ptr_reg->id;
969bf05e 3045
bb7f0f98
AS
3046 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
3047 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
3048 return -EINVAL;
3049
f1174f77
EC
3050 switch (opcode) {
3051 case BPF_ADD:
3052 /* We can take a fixed offset as long as it doesn't overflow
3053 * the s32 'off' field
969bf05e 3054 */
b03c9f9f
EC
3055 if (known && (ptr_reg->off + smin_val ==
3056 (s64)(s32)(ptr_reg->off + smin_val))) {
f1174f77 3057 /* pointer += K. Accumulate it into fixed offset */
b03c9f9f
EC
3058 dst_reg->smin_value = smin_ptr;
3059 dst_reg->smax_value = smax_ptr;
3060 dst_reg->umin_value = umin_ptr;
3061 dst_reg->umax_value = umax_ptr;
f1174f77 3062 dst_reg->var_off = ptr_reg->var_off;
b03c9f9f 3063 dst_reg->off = ptr_reg->off + smin_val;
0962590e 3064 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
3065 break;
3066 }
f1174f77
EC
3067 /* A new variable offset is created. Note that off_reg->off
3068 * == 0, since it's a scalar.
3069 * dst_reg gets the pointer type and since some positive
3070 * integer value was added to the pointer, give it a new 'id'
3071 * if it's a PTR_TO_PACKET.
3072 * this creates a new 'base' pointer, off_reg (variable) gets
3073 * added into the variable offset, and we copy the fixed offset
3074 * from ptr_reg.
969bf05e 3075 */
b03c9f9f
EC
3076 if (signed_add_overflows(smin_ptr, smin_val) ||
3077 signed_add_overflows(smax_ptr, smax_val)) {
3078 dst_reg->smin_value = S64_MIN;
3079 dst_reg->smax_value = S64_MAX;
3080 } else {
3081 dst_reg->smin_value = smin_ptr + smin_val;
3082 dst_reg->smax_value = smax_ptr + smax_val;
3083 }
3084 if (umin_ptr + umin_val < umin_ptr ||
3085 umax_ptr + umax_val < umax_ptr) {
3086 dst_reg->umin_value = 0;
3087 dst_reg->umax_value = U64_MAX;
3088 } else {
3089 dst_reg->umin_value = umin_ptr + umin_val;
3090 dst_reg->umax_value = umax_ptr + umax_val;
3091 }
f1174f77
EC
3092 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
3093 dst_reg->off = ptr_reg->off;
0962590e 3094 dst_reg->raw = ptr_reg->raw;
de8f3a83 3095 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
3096 dst_reg->id = ++env->id_gen;
3097 /* something was added to pkt_ptr, set range to zero */
0962590e 3098 dst_reg->raw = 0;
f1174f77
EC
3099 }
3100 break;
3101 case BPF_SUB:
3102 if (dst_reg == off_reg) {
3103 /* scalar -= pointer. Creates an unknown scalar */
82abbf8d
AS
3104 verbose(env, "R%d tried to subtract pointer from scalar\n",
3105 dst);
f1174f77
EC
3106 return -EACCES;
3107 }
3108 /* We don't allow subtraction from FP, because (according to
3109 * test_verifier.c test "invalid fp arithmetic", JITs might not
3110 * be able to deal with it.
969bf05e 3111 */
f1174f77 3112 if (ptr_reg->type == PTR_TO_STACK) {
82abbf8d
AS
3113 verbose(env, "R%d subtraction from stack pointer prohibited\n",
3114 dst);
f1174f77
EC
3115 return -EACCES;
3116 }
b03c9f9f
EC
3117 if (known && (ptr_reg->off - smin_val ==
3118 (s64)(s32)(ptr_reg->off - smin_val))) {
f1174f77 3119 /* pointer -= K. Subtract it from fixed offset */
b03c9f9f
EC
3120 dst_reg->smin_value = smin_ptr;
3121 dst_reg->smax_value = smax_ptr;
3122 dst_reg->umin_value = umin_ptr;
3123 dst_reg->umax_value = umax_ptr;
f1174f77
EC
3124 dst_reg->var_off = ptr_reg->var_off;
3125 dst_reg->id = ptr_reg->id;
b03c9f9f 3126 dst_reg->off = ptr_reg->off - smin_val;
0962590e 3127 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
3128 break;
3129 }
f1174f77
EC
3130 /* A new variable offset is created. If the subtrahend is known
3131 * nonnegative, then any reg->range we had before is still good.
969bf05e 3132 */
b03c9f9f
EC
3133 if (signed_sub_overflows(smin_ptr, smax_val) ||
3134 signed_sub_overflows(smax_ptr, smin_val)) {
3135 /* Overflow possible, we know nothing */
3136 dst_reg->smin_value = S64_MIN;
3137 dst_reg->smax_value = S64_MAX;
3138 } else {
3139 dst_reg->smin_value = smin_ptr - smax_val;
3140 dst_reg->smax_value = smax_ptr - smin_val;
3141 }
3142 if (umin_ptr < umax_val) {
3143 /* Overflow possible, we know nothing */
3144 dst_reg->umin_value = 0;
3145 dst_reg->umax_value = U64_MAX;
3146 } else {
3147 /* Cannot overflow (as long as bounds are consistent) */
3148 dst_reg->umin_value = umin_ptr - umax_val;
3149 dst_reg->umax_value = umax_ptr - umin_val;
3150 }
f1174f77
EC
3151 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
3152 dst_reg->off = ptr_reg->off;
0962590e 3153 dst_reg->raw = ptr_reg->raw;
de8f3a83 3154 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
3155 dst_reg->id = ++env->id_gen;
3156 /* something was added to pkt_ptr, set range to zero */
b03c9f9f 3157 if (smin_val < 0)
0962590e 3158 dst_reg->raw = 0;
43188702 3159 }
f1174f77
EC
3160 break;
3161 case BPF_AND:
3162 case BPF_OR:
3163 case BPF_XOR:
82abbf8d
AS
3164 /* bitwise ops on pointers are troublesome, prohibit. */
3165 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
3166 dst, bpf_alu_string[opcode >> 4]);
f1174f77
EC
3167 return -EACCES;
3168 default:
3169 /* other operators (e.g. MUL,LSH) produce non-pointer results */
82abbf8d
AS
3170 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
3171 dst, bpf_alu_string[opcode >> 4]);
f1174f77 3172 return -EACCES;
43188702
JF
3173 }
3174
bb7f0f98
AS
3175 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
3176 return -EINVAL;
3177
b03c9f9f
EC
3178 __update_reg_bounds(dst_reg);
3179 __reg_deduce_bounds(dst_reg);
3180 __reg_bound_offset(dst_reg);
43188702
JF
3181 return 0;
3182}
3183
468f6eaf
JH
3184/* WARNING: This function does calculations on 64-bit values, but the actual
3185 * execution may occur on 32-bit values. Therefore, things like bitshifts
3186 * need extra checks in the 32-bit case.
3187 */
f1174f77
EC
3188static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
3189 struct bpf_insn *insn,
3190 struct bpf_reg_state *dst_reg,
3191 struct bpf_reg_state src_reg)
969bf05e 3192{
638f5b90 3193 struct bpf_reg_state *regs = cur_regs(env);
48461135 3194 u8 opcode = BPF_OP(insn->code);
f1174f77 3195 bool src_known, dst_known;
b03c9f9f
EC
3196 s64 smin_val, smax_val;
3197 u64 umin_val, umax_val;
468f6eaf 3198 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
48461135 3199
b799207e
JH
3200 if (insn_bitness == 32) {
3201 /* Relevant for 32-bit RSH: Information can propagate towards
3202 * LSB, so it isn't sufficient to only truncate the output to
3203 * 32 bits.
3204 */
3205 coerce_reg_to_size(dst_reg, 4);
3206 coerce_reg_to_size(&src_reg, 4);
3207 }
3208
b03c9f9f
EC
3209 smin_val = src_reg.smin_value;
3210 smax_val = src_reg.smax_value;
3211 umin_val = src_reg.umin_value;
3212 umax_val = src_reg.umax_value;
f1174f77
EC
3213 src_known = tnum_is_const(src_reg.var_off);
3214 dst_known = tnum_is_const(dst_reg->var_off);
f23cc643 3215
6f16101e
DB
3216 if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
3217 smin_val > smax_val || umin_val > umax_val) {
3218 /* Taint dst register if offset had invalid bounds derived from
3219 * e.g. dead branches.
3220 */
3221 __mark_reg_unknown(dst_reg);
3222 return 0;
3223 }
3224
bb7f0f98
AS
3225 if (!src_known &&
3226 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
3227 __mark_reg_unknown(dst_reg);
3228 return 0;
3229 }
3230
48461135
JB
3231 switch (opcode) {
3232 case BPF_ADD:
b03c9f9f
EC
3233 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
3234 signed_add_overflows(dst_reg->smax_value, smax_val)) {
3235 dst_reg->smin_value = S64_MIN;
3236 dst_reg->smax_value = S64_MAX;
3237 } else {
3238 dst_reg->smin_value += smin_val;
3239 dst_reg->smax_value += smax_val;
3240 }
3241 if (dst_reg->umin_value + umin_val < umin_val ||
3242 dst_reg->umax_value + umax_val < umax_val) {
3243 dst_reg->umin_value = 0;
3244 dst_reg->umax_value = U64_MAX;
3245 } else {
3246 dst_reg->umin_value += umin_val;
3247 dst_reg->umax_value += umax_val;
3248 }
f1174f77 3249 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
48461135
JB
3250 break;
3251 case BPF_SUB:
b03c9f9f
EC
3252 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
3253 signed_sub_overflows(dst_reg->smax_value, smin_val)) {
3254 /* Overflow possible, we know nothing */
3255 dst_reg->smin_value = S64_MIN;
3256 dst_reg->smax_value = S64_MAX;
3257 } else {
3258 dst_reg->smin_value -= smax_val;
3259 dst_reg->smax_value -= smin_val;
3260 }
3261 if (dst_reg->umin_value < umax_val) {
3262 /* Overflow possible, we know nothing */
3263 dst_reg->umin_value = 0;
3264 dst_reg->umax_value = U64_MAX;
3265 } else {
3266 /* Cannot overflow (as long as bounds are consistent) */
3267 dst_reg->umin_value -= umax_val;
3268 dst_reg->umax_value -= umin_val;
3269 }
f1174f77 3270 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
48461135
JB
3271 break;
3272 case BPF_MUL:
b03c9f9f
EC
3273 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
3274 if (smin_val < 0 || dst_reg->smin_value < 0) {
f1174f77 3275 /* Ain't nobody got time to multiply that sign */
b03c9f9f
EC
3276 __mark_reg_unbounded(dst_reg);
3277 __update_reg_bounds(dst_reg);
f1174f77
EC
3278 break;
3279 }
b03c9f9f
EC
3280 /* Both values are positive, so we can work with unsigned and
3281 * copy the result to signed (unless it exceeds S64_MAX).
f1174f77 3282 */
b03c9f9f
EC
3283 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
3284 /* Potential overflow, we know nothing */
3285 __mark_reg_unbounded(dst_reg);
3286 /* (except what we can learn from the var_off) */
3287 __update_reg_bounds(dst_reg);
3288 break;
3289 }
3290 dst_reg->umin_value *= umin_val;
3291 dst_reg->umax_value *= umax_val;
3292 if (dst_reg->umax_value > S64_MAX) {
3293 /* Overflow possible, we know nothing */
3294 dst_reg->smin_value = S64_MIN;
3295 dst_reg->smax_value = S64_MAX;
3296 } else {
3297 dst_reg->smin_value = dst_reg->umin_value;
3298 dst_reg->smax_value = dst_reg->umax_value;
3299 }
48461135
JB
3300 break;
3301 case BPF_AND:
f1174f77 3302 if (src_known && dst_known) {
b03c9f9f
EC
3303 __mark_reg_known(dst_reg, dst_reg->var_off.value &
3304 src_reg.var_off.value);
f1174f77
EC
3305 break;
3306 }
b03c9f9f
EC
3307 /* We get our minimum from the var_off, since that's inherently
3308 * bitwise. Our maximum is the minimum of the operands' maxima.
f23cc643 3309 */
f1174f77 3310 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
b03c9f9f
EC
3311 dst_reg->umin_value = dst_reg->var_off.value;
3312 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
3313 if (dst_reg->smin_value < 0 || smin_val < 0) {
3314 /* Lose signed bounds when ANDing negative numbers,
3315 * ain't nobody got time for that.
3316 */
3317 dst_reg->smin_value = S64_MIN;
3318 dst_reg->smax_value = S64_MAX;
3319 } else {
3320 /* ANDing two positives gives a positive, so safe to
3321 * cast result into s64.
3322 */
3323 dst_reg->smin_value = dst_reg->umin_value;
3324 dst_reg->smax_value = dst_reg->umax_value;
3325 }
3326 /* We may learn something more from the var_off */
3327 __update_reg_bounds(dst_reg);
f1174f77
EC
3328 break;
3329 case BPF_OR:
3330 if (src_known && dst_known) {
b03c9f9f
EC
3331 __mark_reg_known(dst_reg, dst_reg->var_off.value |
3332 src_reg.var_off.value);
f1174f77
EC
3333 break;
3334 }
b03c9f9f
EC
3335 /* We get our maximum from the var_off, and our minimum is the
3336 * maximum of the operands' minima
f1174f77
EC
3337 */
3338 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
b03c9f9f
EC
3339 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
3340 dst_reg->umax_value = dst_reg->var_off.value |
3341 dst_reg->var_off.mask;
3342 if (dst_reg->smin_value < 0 || smin_val < 0) {
3343 /* Lose signed bounds when ORing negative numbers,
3344 * ain't nobody got time for that.
3345 */
3346 dst_reg->smin_value = S64_MIN;
3347 dst_reg->smax_value = S64_MAX;
f1174f77 3348 } else {
b03c9f9f
EC
3349 /* ORing two positives gives a positive, so safe to
3350 * cast result into s64.
3351 */
3352 dst_reg->smin_value = dst_reg->umin_value;
3353 dst_reg->smax_value = dst_reg->umax_value;
f1174f77 3354 }
b03c9f9f
EC
3355 /* We may learn something more from the var_off */
3356 __update_reg_bounds(dst_reg);
48461135
JB
3357 break;
3358 case BPF_LSH:
468f6eaf
JH
3359 if (umax_val >= insn_bitness) {
3360 /* Shifts greater than 31 or 63 are undefined.
3361 * This includes shifts by a negative number.
b03c9f9f 3362 */
61bd5218 3363 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
3364 break;
3365 }
b03c9f9f
EC
3366 /* We lose all sign bit information (except what we can pick
3367 * up from var_off)
48461135 3368 */
b03c9f9f
EC
3369 dst_reg->smin_value = S64_MIN;
3370 dst_reg->smax_value = S64_MAX;
3371 /* If we might shift our top bit out, then we know nothing */
3372 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
3373 dst_reg->umin_value = 0;
3374 dst_reg->umax_value = U64_MAX;
d1174416 3375 } else {
b03c9f9f
EC
3376 dst_reg->umin_value <<= umin_val;
3377 dst_reg->umax_value <<= umax_val;
d1174416 3378 }
afbe1a5b 3379 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
b03c9f9f
EC
3380 /* We may learn something more from the var_off */
3381 __update_reg_bounds(dst_reg);
48461135
JB
3382 break;
3383 case BPF_RSH:
468f6eaf
JH
3384 if (umax_val >= insn_bitness) {
3385 /* Shifts greater than 31 or 63 are undefined.
3386 * This includes shifts by a negative number.
b03c9f9f 3387 */
61bd5218 3388 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
3389 break;
3390 }
4374f256
EC
3391 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
3392 * be negative, then either:
3393 * 1) src_reg might be zero, so the sign bit of the result is
3394 * unknown, so we lose our signed bounds
3395 * 2) it's known negative, thus the unsigned bounds capture the
3396 * signed bounds
3397 * 3) the signed bounds cross zero, so they tell us nothing
3398 * about the result
3399 * If the value in dst_reg is known nonnegative, then again the
3400 * unsigned bounts capture the signed bounds.
3401 * Thus, in all cases it suffices to blow away our signed bounds
3402 * and rely on inferring new ones from the unsigned bounds and
3403 * var_off of the result.
3404 */
3405 dst_reg->smin_value = S64_MIN;
3406 dst_reg->smax_value = S64_MAX;
afbe1a5b 3407 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
b03c9f9f
EC
3408 dst_reg->umin_value >>= umax_val;
3409 dst_reg->umax_value >>= umin_val;
3410 /* We may learn something more from the var_off */
3411 __update_reg_bounds(dst_reg);
48461135 3412 break;
9cbe1f5a
YS
3413 case BPF_ARSH:
3414 if (umax_val >= insn_bitness) {
3415 /* Shifts greater than 31 or 63 are undefined.
3416 * This includes shifts by a negative number.
3417 */
3418 mark_reg_unknown(env, regs, insn->dst_reg);
3419 break;
3420 }
3421
3422 /* Upon reaching here, src_known is true and
3423 * umax_val is equal to umin_val.
3424 */
3425 dst_reg->smin_value >>= umin_val;
3426 dst_reg->smax_value >>= umin_val;
3427 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
3428
3429 /* blow away the dst_reg umin_value/umax_value and rely on
3430 * dst_reg var_off to refine the result.
3431 */
3432 dst_reg->umin_value = 0;
3433 dst_reg->umax_value = U64_MAX;
3434 __update_reg_bounds(dst_reg);
3435 break;
48461135 3436 default:
61bd5218 3437 mark_reg_unknown(env, regs, insn->dst_reg);
48461135
JB
3438 break;
3439 }
3440
468f6eaf
JH
3441 if (BPF_CLASS(insn->code) != BPF_ALU64) {
3442 /* 32-bit ALU ops are (32,32)->32 */
3443 coerce_reg_to_size(dst_reg, 4);
468f6eaf
JH
3444 }
3445
b03c9f9f
EC
3446 __reg_deduce_bounds(dst_reg);
3447 __reg_bound_offset(dst_reg);
f1174f77
EC
3448 return 0;
3449}
3450
3451/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
3452 * and var_off.
3453 */
3454static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
3455 struct bpf_insn *insn)
3456{
f4d7e40a
AS
3457 struct bpf_verifier_state *vstate = env->cur_state;
3458 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3459 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
f1174f77
EC
3460 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
3461 u8 opcode = BPF_OP(insn->code);
f1174f77
EC
3462
3463 dst_reg = &regs[insn->dst_reg];
f1174f77
EC
3464 src_reg = NULL;
3465 if (dst_reg->type != SCALAR_VALUE)
3466 ptr_reg = dst_reg;
3467 if (BPF_SRC(insn->code) == BPF_X) {
3468 src_reg = &regs[insn->src_reg];
f1174f77
EC
3469 if (src_reg->type != SCALAR_VALUE) {
3470 if (dst_reg->type != SCALAR_VALUE) {
3471 /* Combining two pointers by any ALU op yields
82abbf8d
AS
3472 * an arbitrary scalar. Disallow all math except
3473 * pointer subtraction
f1174f77 3474 */
dd066823 3475 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
82abbf8d
AS
3476 mark_reg_unknown(env, regs, insn->dst_reg);
3477 return 0;
f1174f77 3478 }
82abbf8d
AS
3479 verbose(env, "R%d pointer %s pointer prohibited\n",
3480 insn->dst_reg,
3481 bpf_alu_string[opcode >> 4]);
3482 return -EACCES;
f1174f77
EC
3483 } else {
3484 /* scalar += pointer
3485 * This is legal, but we have to reverse our
3486 * src/dest handling in computing the range
3487 */
82abbf8d
AS
3488 return adjust_ptr_min_max_vals(env, insn,
3489 src_reg, dst_reg);
f1174f77
EC
3490 }
3491 } else if (ptr_reg) {
3492 /* pointer += scalar */
82abbf8d
AS
3493 return adjust_ptr_min_max_vals(env, insn,
3494 dst_reg, src_reg);
f1174f77
EC
3495 }
3496 } else {
3497 /* Pretend the src is a reg with a known value, since we only
3498 * need to be able to read from this state.
3499 */
3500 off_reg.type = SCALAR_VALUE;
b03c9f9f 3501 __mark_reg_known(&off_reg, insn->imm);
f1174f77 3502 src_reg = &off_reg;
82abbf8d
AS
3503 if (ptr_reg) /* pointer += K */
3504 return adjust_ptr_min_max_vals(env, insn,
3505 ptr_reg, src_reg);
f1174f77
EC
3506 }
3507
3508 /* Got here implies adding two SCALAR_VALUEs */
3509 if (WARN_ON_ONCE(ptr_reg)) {
f4d7e40a 3510 print_verifier_state(env, state);
61bd5218 3511 verbose(env, "verifier internal error: unexpected ptr_reg\n");
f1174f77
EC
3512 return -EINVAL;
3513 }
3514 if (WARN_ON(!src_reg)) {
f4d7e40a 3515 print_verifier_state(env, state);
61bd5218 3516 verbose(env, "verifier internal error: no src_reg\n");
f1174f77
EC
3517 return -EINVAL;
3518 }
3519 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
48461135
JB
3520}
3521
17a52670 3522/* check validity of 32-bit and 64-bit arithmetic operations */
58e2af8b 3523static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 3524{
638f5b90 3525 struct bpf_reg_state *regs = cur_regs(env);
17a52670
AS
3526 u8 opcode = BPF_OP(insn->code);
3527 int err;
3528
3529 if (opcode == BPF_END || opcode == BPF_NEG) {
3530 if (opcode == BPF_NEG) {
3531 if (BPF_SRC(insn->code) != 0 ||
3532 insn->src_reg != BPF_REG_0 ||
3533 insn->off != 0 || insn->imm != 0) {
61bd5218 3534 verbose(env, "BPF_NEG uses reserved fields\n");
17a52670
AS
3535 return -EINVAL;
3536 }
3537 } else {
3538 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
e67b8a68
EC
3539 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
3540 BPF_CLASS(insn->code) == BPF_ALU64) {
61bd5218 3541 verbose(env, "BPF_END uses reserved fields\n");
17a52670
AS
3542 return -EINVAL;
3543 }
3544 }
3545
3546 /* check src operand */
dc503a8a 3547 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
3548 if (err)
3549 return err;
3550
1be7f75d 3551 if (is_pointer_value(env, insn->dst_reg)) {
61bd5218 3552 verbose(env, "R%d pointer arithmetic prohibited\n",
1be7f75d
AS
3553 insn->dst_reg);
3554 return -EACCES;
3555 }
3556
17a52670 3557 /* check dest operand */
dc503a8a 3558 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
3559 if (err)
3560 return err;
3561
3562 } else if (opcode == BPF_MOV) {
3563
3564 if (BPF_SRC(insn->code) == BPF_X) {
3565 if (insn->imm != 0 || insn->off != 0) {
61bd5218 3566 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
3567 return -EINVAL;
3568 }
3569
3570 /* check src operand */
dc503a8a 3571 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
3572 if (err)
3573 return err;
3574 } else {
3575 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 3576 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
3577 return -EINVAL;
3578 }
3579 }
3580
fbeb1603
AF
3581 /* check dest operand, mark as required later */
3582 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
3583 if (err)
3584 return err;
3585
3586 if (BPF_SRC(insn->code) == BPF_X) {
e434b8cd
JW
3587 struct bpf_reg_state *src_reg = regs + insn->src_reg;
3588 struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
3589
17a52670
AS
3590 if (BPF_CLASS(insn->code) == BPF_ALU64) {
3591 /* case: R1 = R2
3592 * copy register state to dest reg
3593 */
e434b8cd
JW
3594 *dst_reg = *src_reg;
3595 dst_reg->live |= REG_LIVE_WRITTEN;
17a52670 3596 } else {
f1174f77 3597 /* R1 = (u32) R2 */
1be7f75d 3598 if (is_pointer_value(env, insn->src_reg)) {
61bd5218
JK
3599 verbose(env,
3600 "R%d partial copy of pointer\n",
1be7f75d
AS
3601 insn->src_reg);
3602 return -EACCES;
e434b8cd
JW
3603 } else if (src_reg->type == SCALAR_VALUE) {
3604 *dst_reg = *src_reg;
3605 dst_reg->live |= REG_LIVE_WRITTEN;
3606 } else {
3607 mark_reg_unknown(env, regs,
3608 insn->dst_reg);
1be7f75d 3609 }
e434b8cd 3610 coerce_reg_to_size(dst_reg, 4);
17a52670
AS
3611 }
3612 } else {
3613 /* case: R = imm
3614 * remember the value we stored into this reg
3615 */
fbeb1603
AF
3616 /* clear any state __mark_reg_known doesn't set */
3617 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77 3618 regs[insn->dst_reg].type = SCALAR_VALUE;
95a762e2
JH
3619 if (BPF_CLASS(insn->code) == BPF_ALU64) {
3620 __mark_reg_known(regs + insn->dst_reg,
3621 insn->imm);
3622 } else {
3623 __mark_reg_known(regs + insn->dst_reg,
3624 (u32)insn->imm);
3625 }
17a52670
AS
3626 }
3627
3628 } else if (opcode > BPF_END) {
61bd5218 3629 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
17a52670
AS
3630 return -EINVAL;
3631
3632 } else { /* all other ALU ops: and, sub, xor, add, ... */
3633
17a52670
AS
3634 if (BPF_SRC(insn->code) == BPF_X) {
3635 if (insn->imm != 0 || insn->off != 0) {
61bd5218 3636 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
3637 return -EINVAL;
3638 }
3639 /* check src1 operand */
dc503a8a 3640 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
3641 if (err)
3642 return err;
3643 } else {
3644 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 3645 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
3646 return -EINVAL;
3647 }
3648 }
3649
3650 /* check src2 operand */
dc503a8a 3651 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
3652 if (err)
3653 return err;
3654
3655 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
3656 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
61bd5218 3657 verbose(env, "div by zero\n");
17a52670
AS
3658 return -EINVAL;
3659 }
3660
229394e8
RV
3661 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
3662 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
3663 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
3664
3665 if (insn->imm < 0 || insn->imm >= size) {
61bd5218 3666 verbose(env, "invalid shift %d\n", insn->imm);
229394e8
RV
3667 return -EINVAL;
3668 }
3669 }
3670
1a0dc1ac 3671 /* check dest operand */
dc503a8a 3672 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
1a0dc1ac
AS
3673 if (err)
3674 return err;
3675
f1174f77 3676 return adjust_reg_min_max_vals(env, insn);
17a52670
AS
3677 }
3678
3679 return 0;
3680}
3681
f4d7e40a 3682static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
de8f3a83 3683 struct bpf_reg_state *dst_reg,
f8ddadc4 3684 enum bpf_reg_type type,
fb2a311a 3685 bool range_right_open)
969bf05e 3686{
f4d7e40a 3687 struct bpf_func_state *state = vstate->frame[vstate->curframe];
58e2af8b 3688 struct bpf_reg_state *regs = state->regs, *reg;
fb2a311a 3689 u16 new_range;
f4d7e40a 3690 int i, j;
2d2be8ca 3691
fb2a311a
DB
3692 if (dst_reg->off < 0 ||
3693 (dst_reg->off == 0 && range_right_open))
f1174f77
EC
3694 /* This doesn't give us any range */
3695 return;
3696
b03c9f9f
EC
3697 if (dst_reg->umax_value > MAX_PACKET_OFF ||
3698 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
f1174f77
EC
3699 /* Risk of overflow. For instance, ptr + (1<<63) may be less
3700 * than pkt_end, but that's because it's also less than pkt.
3701 */
3702 return;
3703
fb2a311a
DB
3704 new_range = dst_reg->off;
3705 if (range_right_open)
3706 new_range--;
3707
3708 /* Examples for register markings:
2d2be8ca 3709 *
fb2a311a 3710 * pkt_data in dst register:
2d2be8ca
DB
3711 *
3712 * r2 = r3;
3713 * r2 += 8;
3714 * if (r2 > pkt_end) goto <handle exception>
3715 * <access okay>
3716 *
b4e432f1
DB
3717 * r2 = r3;
3718 * r2 += 8;
3719 * if (r2 < pkt_end) goto <access okay>
3720 * <handle exception>
3721 *
2d2be8ca
DB
3722 * Where:
3723 * r2 == dst_reg, pkt_end == src_reg
3724 * r2=pkt(id=n,off=8,r=0)
3725 * r3=pkt(id=n,off=0,r=0)
3726 *
fb2a311a 3727 * pkt_data in src register:
2d2be8ca
DB
3728 *
3729 * r2 = r3;
3730 * r2 += 8;
3731 * if (pkt_end >= r2) goto <access okay>
3732 * <handle exception>
3733 *
b4e432f1
DB
3734 * r2 = r3;
3735 * r2 += 8;
3736 * if (pkt_end <= r2) goto <handle exception>
3737 * <access okay>
3738 *
2d2be8ca
DB
3739 * Where:
3740 * pkt_end == dst_reg, r2 == src_reg
3741 * r2=pkt(id=n,off=8,r=0)
3742 * r3=pkt(id=n,off=0,r=0)
3743 *
3744 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
fb2a311a
DB
3745 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
3746 * and [r3, r3 + 8-1) respectively is safe to access depending on
3747 * the check.
969bf05e 3748 */
2d2be8ca 3749
f1174f77
EC
3750 /* If our ids match, then we must have the same max_value. And we
3751 * don't care about the other reg's fixed offset, since if it's too big
3752 * the range won't allow anything.
3753 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
3754 */
969bf05e 3755 for (i = 0; i < MAX_BPF_REG; i++)
de8f3a83 3756 if (regs[i].type == type && regs[i].id == dst_reg->id)
b1977682 3757 /* keep the maximum range already checked */
fb2a311a 3758 regs[i].range = max(regs[i].range, new_range);
969bf05e 3759
f4d7e40a
AS
3760 for (j = 0; j <= vstate->curframe; j++) {
3761 state = vstate->frame[j];
f3709f69
JS
3762 bpf_for_each_spilled_reg(i, state, reg) {
3763 if (!reg)
f4d7e40a 3764 continue;
f4d7e40a
AS
3765 if (reg->type == type && reg->id == dst_reg->id)
3766 reg->range = max(reg->range, new_range);
3767 }
969bf05e
AS
3768 }
3769}
3770
4f7b3e82
AS
3771/* compute branch direction of the expression "if (reg opcode val) goto target;"
3772 * and return:
3773 * 1 - branch will be taken and "goto target" will be executed
3774 * 0 - branch will not be taken and fall-through to next insn
3775 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
3776 */
3777static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
3778{
3779 if (__is_pointer_value(false, reg))
3780 return -1;
3781
3782 switch (opcode) {
3783 case BPF_JEQ:
3784 if (tnum_is_const(reg->var_off))
3785 return !!tnum_equals_const(reg->var_off, val);
3786 break;
3787 case BPF_JNE:
3788 if (tnum_is_const(reg->var_off))
3789 return !tnum_equals_const(reg->var_off, val);
3790 break;
3791 case BPF_JGT:
3792 if (reg->umin_value > val)
3793 return 1;
3794 else if (reg->umax_value <= val)
3795 return 0;
3796 break;
3797 case BPF_JSGT:
3798 if (reg->smin_value > (s64)val)
3799 return 1;
3800 else if (reg->smax_value < (s64)val)
3801 return 0;
3802 break;
3803 case BPF_JLT:
3804 if (reg->umax_value < val)
3805 return 1;
3806 else if (reg->umin_value >= val)
3807 return 0;
3808 break;
3809 case BPF_JSLT:
3810 if (reg->smax_value < (s64)val)
3811 return 1;
3812 else if (reg->smin_value >= (s64)val)
3813 return 0;
3814 break;
3815 case BPF_JGE:
3816 if (reg->umin_value >= val)
3817 return 1;
3818 else if (reg->umax_value < val)
3819 return 0;
3820 break;
3821 case BPF_JSGE:
3822 if (reg->smin_value >= (s64)val)
3823 return 1;
3824 else if (reg->smax_value < (s64)val)
3825 return 0;
3826 break;
3827 case BPF_JLE:
3828 if (reg->umax_value <= val)
3829 return 1;
3830 else if (reg->umin_value > val)
3831 return 0;
3832 break;
3833 case BPF_JSLE:
3834 if (reg->smax_value <= (s64)val)
3835 return 1;
3836 else if (reg->smin_value > (s64)val)
3837 return 0;
3838 break;
3839 }
3840
3841 return -1;
3842}
3843
48461135
JB
3844/* Adjusts the register min/max values in the case that the dst_reg is the
3845 * variable register that we are working on, and src_reg is a constant or we're
3846 * simply doing a BPF_K check.
f1174f77 3847 * In JEQ/JNE cases we also adjust the var_off values.
48461135
JB
3848 */
3849static void reg_set_min_max(struct bpf_reg_state *true_reg,
3850 struct bpf_reg_state *false_reg, u64 val,
3851 u8 opcode)
3852{
f1174f77
EC
3853 /* If the dst_reg is a pointer, we can't learn anything about its
3854 * variable offset from the compare (unless src_reg were a pointer into
3855 * the same object, but we don't bother with that.
3856 * Since false_reg and true_reg have the same type by construction, we
3857 * only need to check one of them for pointerness.
3858 */
3859 if (__is_pointer_value(false, false_reg))
3860 return;
4cabc5b1 3861
48461135
JB
3862 switch (opcode) {
3863 case BPF_JEQ:
3864 /* If this is false then we know nothing Jon Snow, but if it is
3865 * true then we know for sure.
3866 */
b03c9f9f 3867 __mark_reg_known(true_reg, val);
48461135
JB
3868 break;
3869 case BPF_JNE:
3870 /* If this is true we know nothing Jon Snow, but if it is false
3871 * we know the value for sure;
3872 */
b03c9f9f 3873 __mark_reg_known(false_reg, val);
48461135
JB
3874 break;
3875 case BPF_JGT:
b03c9f9f
EC
3876 false_reg->umax_value = min(false_reg->umax_value, val);
3877 true_reg->umin_value = max(true_reg->umin_value, val + 1);
3878 break;
48461135 3879 case BPF_JSGT:
b03c9f9f
EC
3880 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
3881 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
48461135 3882 break;
b4e432f1
DB
3883 case BPF_JLT:
3884 false_reg->umin_value = max(false_reg->umin_value, val);
3885 true_reg->umax_value = min(true_reg->umax_value, val - 1);
3886 break;
3887 case BPF_JSLT:
3888 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
3889 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
3890 break;
48461135 3891 case BPF_JGE:
b03c9f9f
EC
3892 false_reg->umax_value = min(false_reg->umax_value, val - 1);
3893 true_reg->umin_value = max(true_reg->umin_value, val);
3894 break;
48461135 3895 case BPF_JSGE:
b03c9f9f
EC
3896 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
3897 true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
48461135 3898 break;
b4e432f1
DB
3899 case BPF_JLE:
3900 false_reg->umin_value = max(false_reg->umin_value, val + 1);
3901 true_reg->umax_value = min(true_reg->umax_value, val);
3902 break;
3903 case BPF_JSLE:
3904 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
3905 true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
3906 break;
48461135
JB
3907 default:
3908 break;
3909 }
3910
b03c9f9f
EC
3911 __reg_deduce_bounds(false_reg);
3912 __reg_deduce_bounds(true_reg);
3913 /* We might have learned some bits from the bounds. */
3914 __reg_bound_offset(false_reg);
3915 __reg_bound_offset(true_reg);
3916 /* Intersecting with the old var_off might have improved our bounds
3917 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3918 * then new var_off is (0; 0x7f...fc) which improves our umax.
3919 */
3920 __update_reg_bounds(false_reg);
3921 __update_reg_bounds(true_reg);
48461135
JB
3922}
3923
f1174f77
EC
3924/* Same as above, but for the case that dst_reg holds a constant and src_reg is
3925 * the variable reg.
48461135
JB
3926 */
3927static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
3928 struct bpf_reg_state *false_reg, u64 val,
3929 u8 opcode)
3930{
f1174f77
EC
3931 if (__is_pointer_value(false, false_reg))
3932 return;
4cabc5b1 3933
48461135
JB
3934 switch (opcode) {
3935 case BPF_JEQ:
3936 /* If this is false then we know nothing Jon Snow, but if it is
3937 * true then we know for sure.
3938 */
b03c9f9f 3939 __mark_reg_known(true_reg, val);
48461135
JB
3940 break;
3941 case BPF_JNE:
3942 /* If this is true we know nothing Jon Snow, but if it is false
3943 * we know the value for sure;
3944 */
b03c9f9f 3945 __mark_reg_known(false_reg, val);
48461135
JB
3946 break;
3947 case BPF_JGT:
b03c9f9f
EC
3948 true_reg->umax_value = min(true_reg->umax_value, val - 1);
3949 false_reg->umin_value = max(false_reg->umin_value, val);
3950 break;
48461135 3951 case BPF_JSGT:
b03c9f9f
EC
3952 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
3953 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
48461135 3954 break;
b4e432f1
DB
3955 case BPF_JLT:
3956 true_reg->umin_value = max(true_reg->umin_value, val + 1);
3957 false_reg->umax_value = min(false_reg->umax_value, val);
3958 break;
3959 case BPF_JSLT:
3960 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
3961 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
3962 break;
48461135 3963 case BPF_JGE:
b03c9f9f
EC
3964 true_reg->umax_value = min(true_reg->umax_value, val);
3965 false_reg->umin_value = max(false_reg->umin_value, val + 1);
3966 break;
48461135 3967 case BPF_JSGE:
b03c9f9f
EC
3968 true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
3969 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
48461135 3970 break;
b4e432f1
DB
3971 case BPF_JLE:
3972 true_reg->umin_value = max(true_reg->umin_value, val);
3973 false_reg->umax_value = min(false_reg->umax_value, val - 1);
3974 break;
3975 case BPF_JSLE:
3976 true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
3977 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
3978 break;
48461135
JB
3979 default:
3980 break;
3981 }
3982
b03c9f9f
EC
3983 __reg_deduce_bounds(false_reg);
3984 __reg_deduce_bounds(true_reg);
3985 /* We might have learned some bits from the bounds. */
3986 __reg_bound_offset(false_reg);
3987 __reg_bound_offset(true_reg);
3988 /* Intersecting with the old var_off might have improved our bounds
3989 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3990 * then new var_off is (0; 0x7f...fc) which improves our umax.
3991 */
3992 __update_reg_bounds(false_reg);
3993 __update_reg_bounds(true_reg);
f1174f77
EC
3994}
3995
3996/* Regs are known to be equal, so intersect their min/max/var_off */
3997static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
3998 struct bpf_reg_state *dst_reg)
3999{
b03c9f9f
EC
4000 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
4001 dst_reg->umin_value);
4002 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
4003 dst_reg->umax_value);
4004 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
4005 dst_reg->smin_value);
4006 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
4007 dst_reg->smax_value);
f1174f77
EC
4008 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
4009 dst_reg->var_off);
b03c9f9f
EC
4010 /* We might have learned new bounds from the var_off. */
4011 __update_reg_bounds(src_reg);
4012 __update_reg_bounds(dst_reg);
4013 /* We might have learned something about the sign bit. */
4014 __reg_deduce_bounds(src_reg);
4015 __reg_deduce_bounds(dst_reg);
4016 /* We might have learned some bits from the bounds. */
4017 __reg_bound_offset(src_reg);
4018 __reg_bound_offset(dst_reg);
4019 /* Intersecting with the old var_off might have improved our bounds
4020 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4021 * then new var_off is (0; 0x7f...fc) which improves our umax.
4022 */
4023 __update_reg_bounds(src_reg);
4024 __update_reg_bounds(dst_reg);
f1174f77
EC
4025}
4026
4027static void reg_combine_min_max(struct bpf_reg_state *true_src,
4028 struct bpf_reg_state *true_dst,
4029 struct bpf_reg_state *false_src,
4030 struct bpf_reg_state *false_dst,
4031 u8 opcode)
4032{
4033 switch (opcode) {
4034 case BPF_JEQ:
4035 __reg_combine_min_max(true_src, true_dst);
4036 break;
4037 case BPF_JNE:
4038 __reg_combine_min_max(false_src, false_dst);
b03c9f9f 4039 break;
4cabc5b1 4040 }
48461135
JB
4041}
4042
fd978bf7
JS
4043static void mark_ptr_or_null_reg(struct bpf_func_state *state,
4044 struct bpf_reg_state *reg, u32 id,
840b9615 4045 bool is_null)
57a09bf0 4046{
840b9615 4047 if (reg_type_may_be_null(reg->type) && reg->id == id) {
f1174f77
EC
4048 /* Old offset (both fixed and variable parts) should
4049 * have been known-zero, because we don't allow pointer
4050 * arithmetic on pointers that might be NULL.
4051 */
b03c9f9f
EC
4052 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
4053 !tnum_equals_const(reg->var_off, 0) ||
f1174f77 4054 reg->off)) {
b03c9f9f
EC
4055 __mark_reg_known_zero(reg);
4056 reg->off = 0;
f1174f77
EC
4057 }
4058 if (is_null) {
4059 reg->type = SCALAR_VALUE;
840b9615
JS
4060 } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
4061 if (reg->map_ptr->inner_map_meta) {
4062 reg->type = CONST_PTR_TO_MAP;
4063 reg->map_ptr = reg->map_ptr->inner_map_meta;
4064 } else {
4065 reg->type = PTR_TO_MAP_VALUE;
4066 }
c64b7983
JS
4067 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
4068 reg->type = PTR_TO_SOCKET;
56f668df 4069 }
fd978bf7
JS
4070 if (is_null || !reg_is_refcounted(reg)) {
4071 /* We don't need id from this point onwards anymore,
4072 * thus we should better reset it, so that state
4073 * pruning has chances to take effect.
4074 */
4075 reg->id = 0;
56f668df 4076 }
57a09bf0
TG
4077 }
4078}
4079
4080/* The logic is similar to find_good_pkt_pointers(), both could eventually
4081 * be folded together at some point.
4082 */
840b9615
JS
4083static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
4084 bool is_null)
57a09bf0 4085{
f4d7e40a 4086 struct bpf_func_state *state = vstate->frame[vstate->curframe];
f3709f69 4087 struct bpf_reg_state *reg, *regs = state->regs;
a08dd0da 4088 u32 id = regs[regno].id;
f4d7e40a 4089 int i, j;
57a09bf0 4090
fd978bf7
JS
4091 if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
4092 __release_reference_state(state, id);
4093
57a09bf0 4094 for (i = 0; i < MAX_BPF_REG; i++)
fd978bf7 4095 mark_ptr_or_null_reg(state, &regs[i], id, is_null);
57a09bf0 4096
f4d7e40a
AS
4097 for (j = 0; j <= vstate->curframe; j++) {
4098 state = vstate->frame[j];
f3709f69
JS
4099 bpf_for_each_spilled_reg(i, state, reg) {
4100 if (!reg)
f4d7e40a 4101 continue;
fd978bf7 4102 mark_ptr_or_null_reg(state, reg, id, is_null);
f4d7e40a 4103 }
57a09bf0
TG
4104 }
4105}
4106
5beca081
DB
4107static bool try_match_pkt_pointers(const struct bpf_insn *insn,
4108 struct bpf_reg_state *dst_reg,
4109 struct bpf_reg_state *src_reg,
4110 struct bpf_verifier_state *this_branch,
4111 struct bpf_verifier_state *other_branch)
4112{
4113 if (BPF_SRC(insn->code) != BPF_X)
4114 return false;
4115
4116 switch (BPF_OP(insn->code)) {
4117 case BPF_JGT:
4118 if ((dst_reg->type == PTR_TO_PACKET &&
4119 src_reg->type == PTR_TO_PACKET_END) ||
4120 (dst_reg->type == PTR_TO_PACKET_META &&
4121 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4122 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
4123 find_good_pkt_pointers(this_branch, dst_reg,
4124 dst_reg->type, false);
4125 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4126 src_reg->type == PTR_TO_PACKET) ||
4127 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4128 src_reg->type == PTR_TO_PACKET_META)) {
4129 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
4130 find_good_pkt_pointers(other_branch, src_reg,
4131 src_reg->type, true);
4132 } else {
4133 return false;
4134 }
4135 break;
4136 case BPF_JLT:
4137 if ((dst_reg->type == PTR_TO_PACKET &&
4138 src_reg->type == PTR_TO_PACKET_END) ||
4139 (dst_reg->type == PTR_TO_PACKET_META &&
4140 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4141 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
4142 find_good_pkt_pointers(other_branch, dst_reg,
4143 dst_reg->type, true);
4144 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4145 src_reg->type == PTR_TO_PACKET) ||
4146 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4147 src_reg->type == PTR_TO_PACKET_META)) {
4148 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
4149 find_good_pkt_pointers(this_branch, src_reg,
4150 src_reg->type, false);
4151 } else {
4152 return false;
4153 }
4154 break;
4155 case BPF_JGE:
4156 if ((dst_reg->type == PTR_TO_PACKET &&
4157 src_reg->type == PTR_TO_PACKET_END) ||
4158 (dst_reg->type == PTR_TO_PACKET_META &&
4159 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4160 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
4161 find_good_pkt_pointers(this_branch, dst_reg,
4162 dst_reg->type, true);
4163 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4164 src_reg->type == PTR_TO_PACKET) ||
4165 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4166 src_reg->type == PTR_TO_PACKET_META)) {
4167 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
4168 find_good_pkt_pointers(other_branch, src_reg,
4169 src_reg->type, false);
4170 } else {
4171 return false;
4172 }
4173 break;
4174 case BPF_JLE:
4175 if ((dst_reg->type == PTR_TO_PACKET &&
4176 src_reg->type == PTR_TO_PACKET_END) ||
4177 (dst_reg->type == PTR_TO_PACKET_META &&
4178 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4179 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
4180 find_good_pkt_pointers(other_branch, dst_reg,
4181 dst_reg->type, false);
4182 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4183 src_reg->type == PTR_TO_PACKET) ||
4184 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4185 src_reg->type == PTR_TO_PACKET_META)) {
4186 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
4187 find_good_pkt_pointers(this_branch, src_reg,
4188 src_reg->type, true);
4189 } else {
4190 return false;
4191 }
4192 break;
4193 default:
4194 return false;
4195 }
4196
4197 return true;
4198}
4199
58e2af8b 4200static int check_cond_jmp_op(struct bpf_verifier_env *env,
17a52670
AS
4201 struct bpf_insn *insn, int *insn_idx)
4202{
f4d7e40a
AS
4203 struct bpf_verifier_state *this_branch = env->cur_state;
4204 struct bpf_verifier_state *other_branch;
4205 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
4206 struct bpf_reg_state *dst_reg, *other_branch_regs;
17a52670
AS
4207 u8 opcode = BPF_OP(insn->code);
4208 int err;
4209
b4e432f1 4210 if (opcode > BPF_JSLE) {
61bd5218 4211 verbose(env, "invalid BPF_JMP opcode %x\n", opcode);
17a52670
AS
4212 return -EINVAL;
4213 }
4214
4215 if (BPF_SRC(insn->code) == BPF_X) {
4216 if (insn->imm != 0) {
61bd5218 4217 verbose(env, "BPF_JMP uses reserved fields\n");
17a52670
AS
4218 return -EINVAL;
4219 }
4220
4221 /* check src1 operand */
dc503a8a 4222 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
4223 if (err)
4224 return err;
1be7f75d
AS
4225
4226 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 4227 verbose(env, "R%d pointer comparison prohibited\n",
1be7f75d
AS
4228 insn->src_reg);
4229 return -EACCES;
4230 }
17a52670
AS
4231 } else {
4232 if (insn->src_reg != BPF_REG_0) {
61bd5218 4233 verbose(env, "BPF_JMP uses reserved fields\n");
17a52670
AS
4234 return -EINVAL;
4235 }
4236 }
4237
4238 /* check src2 operand */
dc503a8a 4239 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
4240 if (err)
4241 return err;
4242
1a0dc1ac
AS
4243 dst_reg = &regs[insn->dst_reg];
4244
4f7b3e82
AS
4245 if (BPF_SRC(insn->code) == BPF_K) {
4246 int pred = is_branch_taken(dst_reg, insn->imm, opcode);
4247
4248 if (pred == 1) {
4249 /* only follow the goto, ignore fall-through */
17a52670
AS
4250 *insn_idx += insn->off;
4251 return 0;
4f7b3e82
AS
4252 } else if (pred == 0) {
4253 /* only follow fall-through branch, since
17a52670
AS
4254 * that's where the program will go
4255 */
4256 return 0;
4257 }
4258 }
4259
4260 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
4261 if (!other_branch)
4262 return -EFAULT;
f4d7e40a 4263 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
17a52670 4264
48461135
JB
4265 /* detect if we are comparing against a constant value so we can adjust
4266 * our min/max values for our dst register.
f1174f77
EC
4267 * this is only legit if both are scalars (or pointers to the same
4268 * object, I suppose, but we don't support that right now), because
4269 * otherwise the different base pointers mean the offsets aren't
4270 * comparable.
48461135
JB
4271 */
4272 if (BPF_SRC(insn->code) == BPF_X) {
f1174f77
EC
4273 if (dst_reg->type == SCALAR_VALUE &&
4274 regs[insn->src_reg].type == SCALAR_VALUE) {
4275 if (tnum_is_const(regs[insn->src_reg].var_off))
f4d7e40a 4276 reg_set_min_max(&other_branch_regs[insn->dst_reg],
f1174f77
EC
4277 dst_reg, regs[insn->src_reg].var_off.value,
4278 opcode);
4279 else if (tnum_is_const(dst_reg->var_off))
f4d7e40a 4280 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
f1174f77
EC
4281 &regs[insn->src_reg],
4282 dst_reg->var_off.value, opcode);
4283 else if (opcode == BPF_JEQ || opcode == BPF_JNE)
4284 /* Comparing for equality, we can combine knowledge */
f4d7e40a
AS
4285 reg_combine_min_max(&other_branch_regs[insn->src_reg],
4286 &other_branch_regs[insn->dst_reg],
f1174f77
EC
4287 &regs[insn->src_reg],
4288 &regs[insn->dst_reg], opcode);
4289 }
4290 } else if (dst_reg->type == SCALAR_VALUE) {
f4d7e40a 4291 reg_set_min_max(&other_branch_regs[insn->dst_reg],
48461135
JB
4292 dst_reg, insn->imm, opcode);
4293 }
4294
58e2af8b 4295 /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
17a52670 4296 if (BPF_SRC(insn->code) == BPF_K &&
1a0dc1ac 4297 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
840b9615
JS
4298 reg_type_may_be_null(dst_reg->type)) {
4299 /* Mark all identical registers in each branch as either
57a09bf0
TG
4300 * safe or unknown depending R == 0 or R != 0 conditional.
4301 */
840b9615
JS
4302 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
4303 opcode == BPF_JNE);
4304 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
4305 opcode == BPF_JEQ);
5beca081
DB
4306 } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
4307 this_branch, other_branch) &&
4308 is_pointer_value(env, insn->dst_reg)) {
61bd5218
JK
4309 verbose(env, "R%d pointer comparison prohibited\n",
4310 insn->dst_reg);
1be7f75d 4311 return -EACCES;
17a52670 4312 }
61bd5218 4313 if (env->log.level)
f4d7e40a 4314 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
17a52670
AS
4315 return 0;
4316}
4317
0246e64d
AS
4318/* return the map pointer stored inside BPF_LD_IMM64 instruction */
4319static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
4320{
4321 u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
4322
4323 return (struct bpf_map *) (unsigned long) imm64;
4324}
4325
17a52670 4326/* verify BPF_LD_IMM64 instruction */
58e2af8b 4327static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 4328{
638f5b90 4329 struct bpf_reg_state *regs = cur_regs(env);
17a52670
AS
4330 int err;
4331
4332 if (BPF_SIZE(insn->code) != BPF_DW) {
61bd5218 4333 verbose(env, "invalid BPF_LD_IMM insn\n");
17a52670
AS
4334 return -EINVAL;
4335 }
4336 if (insn->off != 0) {
61bd5218 4337 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
17a52670
AS
4338 return -EINVAL;
4339 }
4340
dc503a8a 4341 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
4342 if (err)
4343 return err;
4344
6b173873 4345 if (insn->src_reg == 0) {
6b173873
JK
4346 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
4347
f1174f77 4348 regs[insn->dst_reg].type = SCALAR_VALUE;
b03c9f9f 4349 __mark_reg_known(&regs[insn->dst_reg], imm);
17a52670 4350 return 0;
6b173873 4351 }
17a52670
AS
4352
4353 /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
4354 BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
4355
4356 regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
4357 regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
4358 return 0;
4359}
4360
96be4325
DB
4361static bool may_access_skb(enum bpf_prog_type type)
4362{
4363 switch (type) {
4364 case BPF_PROG_TYPE_SOCKET_FILTER:
4365 case BPF_PROG_TYPE_SCHED_CLS:
94caee8c 4366 case BPF_PROG_TYPE_SCHED_ACT:
96be4325
DB
4367 return true;
4368 default:
4369 return false;
4370 }
4371}
4372
ddd872bc
AS
4373/* verify safety of LD_ABS|LD_IND instructions:
4374 * - they can only appear in the programs where ctx == skb
4375 * - since they are wrappers of function calls, they scratch R1-R5 registers,
4376 * preserve R6-R9, and store return value into R0
4377 *
4378 * Implicit input:
4379 * ctx == skb == R6 == CTX
4380 *
4381 * Explicit input:
4382 * SRC == any register
4383 * IMM == 32-bit immediate
4384 *
4385 * Output:
4386 * R0 - 8/16/32-bit skb data converted to cpu endianness
4387 */
58e2af8b 4388static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
ddd872bc 4389{
638f5b90 4390 struct bpf_reg_state *regs = cur_regs(env);
ddd872bc 4391 u8 mode = BPF_MODE(insn->code);
ddd872bc
AS
4392 int i, err;
4393
24701ece 4394 if (!may_access_skb(env->prog->type)) {
61bd5218 4395 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
ddd872bc
AS
4396 return -EINVAL;
4397 }
4398
e0cea7ce
DB
4399 if (!env->ops->gen_ld_abs) {
4400 verbose(env, "bpf verifier is misconfigured\n");
4401 return -EINVAL;
4402 }
4403
f910cefa 4404 if (env->subprog_cnt > 1) {
f4d7e40a
AS
4405 /* when program has LD_ABS insn JITs and interpreter assume
4406 * that r1 == ctx == skb which is not the case for callees
4407 * that can have arbitrary arguments. It's problematic
4408 * for main prog as well since JITs would need to analyze
4409 * all functions in order to make proper register save/restore
4410 * decisions in the main prog. Hence disallow LD_ABS with calls
4411 */
4412 verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
4413 return -EINVAL;
4414 }
4415
ddd872bc 4416 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
d82bccc6 4417 BPF_SIZE(insn->code) == BPF_DW ||
ddd872bc 4418 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
61bd5218 4419 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
ddd872bc
AS
4420 return -EINVAL;
4421 }
4422
4423 /* check whether implicit source operand (register R6) is readable */
dc503a8a 4424 err = check_reg_arg(env, BPF_REG_6, SRC_OP);
ddd872bc
AS
4425 if (err)
4426 return err;
4427
fd978bf7
JS
4428 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
4429 * gen_ld_abs() may terminate the program at runtime, leading to
4430 * reference leak.
4431 */
4432 err = check_reference_leak(env);
4433 if (err) {
4434 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
4435 return err;
4436 }
4437
ddd872bc 4438 if (regs[BPF_REG_6].type != PTR_TO_CTX) {
61bd5218
JK
4439 verbose(env,
4440 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
ddd872bc
AS
4441 return -EINVAL;
4442 }
4443
4444 if (mode == BPF_IND) {
4445 /* check explicit source operand */
dc503a8a 4446 err = check_reg_arg(env, insn->src_reg, SRC_OP);
ddd872bc
AS
4447 if (err)
4448 return err;
4449 }
4450
4451 /* reset caller saved regs to unreadable */
dc503a8a 4452 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 4453 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
4454 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
4455 }
ddd872bc
AS
4456
4457 /* mark destination R0 register as readable, since it contains
dc503a8a
EC
4458 * the value fetched from the packet.
4459 * Already marked as written above.
ddd872bc 4460 */
61bd5218 4461 mark_reg_unknown(env, regs, BPF_REG_0);
ddd872bc
AS
4462 return 0;
4463}
4464
390ee7e2
AS
4465static int check_return_code(struct bpf_verifier_env *env)
4466{
4467 struct bpf_reg_state *reg;
4468 struct tnum range = tnum_range(0, 1);
4469
4470 switch (env->prog->type) {
4471 case BPF_PROG_TYPE_CGROUP_SKB:
4472 case BPF_PROG_TYPE_CGROUP_SOCK:
4fbac77d 4473 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
390ee7e2 4474 case BPF_PROG_TYPE_SOCK_OPS:
ebc614f6 4475 case BPF_PROG_TYPE_CGROUP_DEVICE:
390ee7e2
AS
4476 break;
4477 default:
4478 return 0;
4479 }
4480
638f5b90 4481 reg = cur_regs(env) + BPF_REG_0;
390ee7e2 4482 if (reg->type != SCALAR_VALUE) {
61bd5218 4483 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
390ee7e2
AS
4484 reg_type_str[reg->type]);
4485 return -EINVAL;
4486 }
4487
4488 if (!tnum_in(range, reg->var_off)) {
61bd5218 4489 verbose(env, "At program exit the register R0 ");
390ee7e2
AS
4490 if (!tnum_is_unknown(reg->var_off)) {
4491 char tn_buf[48];
4492
4493 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 4494 verbose(env, "has value %s", tn_buf);
390ee7e2 4495 } else {
61bd5218 4496 verbose(env, "has unknown scalar value");
390ee7e2 4497 }
61bd5218 4498 verbose(env, " should have been 0 or 1\n");
390ee7e2
AS
4499 return -EINVAL;
4500 }
4501 return 0;
4502}
4503
475fb78f
AS
4504/* non-recursive DFS pseudo code
4505 * 1 procedure DFS-iterative(G,v):
4506 * 2 label v as discovered
4507 * 3 let S be a stack
4508 * 4 S.push(v)
4509 * 5 while S is not empty
4510 * 6 t <- S.pop()
4511 * 7 if t is what we're looking for:
4512 * 8 return t
4513 * 9 for all edges e in G.adjacentEdges(t) do
4514 * 10 if edge e is already labelled
4515 * 11 continue with the next edge
4516 * 12 w <- G.adjacentVertex(t,e)
4517 * 13 if vertex w is not discovered and not explored
4518 * 14 label e as tree-edge
4519 * 15 label w as discovered
4520 * 16 S.push(w)
4521 * 17 continue at 5
4522 * 18 else if vertex w is discovered
4523 * 19 label e as back-edge
4524 * 20 else
4525 * 21 // vertex w is explored
4526 * 22 label e as forward- or cross-edge
4527 * 23 label t as explored
4528 * 24 S.pop()
4529 *
4530 * convention:
4531 * 0x10 - discovered
4532 * 0x11 - discovered and fall-through edge labelled
4533 * 0x12 - discovered and fall-through and branch edges labelled
4534 * 0x20 - explored
4535 */
4536
4537enum {
4538 DISCOVERED = 0x10,
4539 EXPLORED = 0x20,
4540 FALLTHROUGH = 1,
4541 BRANCH = 2,
4542};
4543
58e2af8b 4544#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
f1bca824 4545
475fb78f
AS
4546static int *insn_stack; /* stack of insns to process */
4547static int cur_stack; /* current stack index */
4548static int *insn_state;
4549
4550/* t, w, e - match pseudo-code above:
4551 * t - index of current instruction
4552 * w - next instruction
4553 * e - edge
4554 */
58e2af8b 4555static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
475fb78f
AS
4556{
4557 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
4558 return 0;
4559
4560 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
4561 return 0;
4562
4563 if (w < 0 || w >= env->prog->len) {
61bd5218 4564 verbose(env, "jump out of range from insn %d to %d\n", t, w);
475fb78f
AS
4565 return -EINVAL;
4566 }
4567
f1bca824
AS
4568 if (e == BRANCH)
4569 /* mark branch target for state pruning */
4570 env->explored_states[w] = STATE_LIST_MARK;
4571
475fb78f
AS
4572 if (insn_state[w] == 0) {
4573 /* tree-edge */
4574 insn_state[t] = DISCOVERED | e;
4575 insn_state[w] = DISCOVERED;
4576 if (cur_stack >= env->prog->len)
4577 return -E2BIG;
4578 insn_stack[cur_stack++] = w;
4579 return 1;
4580 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
61bd5218 4581 verbose(env, "back-edge from insn %d to %d\n", t, w);
475fb78f
AS
4582 return -EINVAL;
4583 } else if (insn_state[w] == EXPLORED) {
4584 /* forward- or cross-edge */
4585 insn_state[t] = DISCOVERED | e;
4586 } else {
61bd5218 4587 verbose(env, "insn state internal bug\n");
475fb78f
AS
4588 return -EFAULT;
4589 }
4590 return 0;
4591}
4592
4593/* non-recursive depth-first-search to detect loops in BPF program
4594 * loop == back-edge in directed graph
4595 */
58e2af8b 4596static int check_cfg(struct bpf_verifier_env *env)
475fb78f
AS
4597{
4598 struct bpf_insn *insns = env->prog->insnsi;
4599 int insn_cnt = env->prog->len;
4600 int ret = 0;
4601 int i, t;
4602
cc8b0b92
AS
4603 ret = check_subprogs(env);
4604 if (ret < 0)
4605 return ret;
4606
475fb78f
AS
4607 insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
4608 if (!insn_state)
4609 return -ENOMEM;
4610
4611 insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
4612 if (!insn_stack) {
4613 kfree(insn_state);
4614 return -ENOMEM;
4615 }
4616
4617 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
4618 insn_stack[0] = 0; /* 0 is the first instruction */
4619 cur_stack = 1;
4620
4621peek_stack:
4622 if (cur_stack == 0)
4623 goto check_state;
4624 t = insn_stack[cur_stack - 1];
4625
4626 if (BPF_CLASS(insns[t].code) == BPF_JMP) {
4627 u8 opcode = BPF_OP(insns[t].code);
4628
4629 if (opcode == BPF_EXIT) {
4630 goto mark_explored;
4631 } else if (opcode == BPF_CALL) {
4632 ret = push_insn(t, t + 1, FALLTHROUGH, env);
4633 if (ret == 1)
4634 goto peek_stack;
4635 else if (ret < 0)
4636 goto err_free;
07016151
DB
4637 if (t + 1 < insn_cnt)
4638 env->explored_states[t + 1] = STATE_LIST_MARK;
cc8b0b92
AS
4639 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
4640 env->explored_states[t] = STATE_LIST_MARK;
4641 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
4642 if (ret == 1)
4643 goto peek_stack;
4644 else if (ret < 0)
4645 goto err_free;
4646 }
475fb78f
AS
4647 } else if (opcode == BPF_JA) {
4648 if (BPF_SRC(insns[t].code) != BPF_K) {
4649 ret = -EINVAL;
4650 goto err_free;
4651 }
4652 /* unconditional jump with single edge */
4653 ret = push_insn(t, t + insns[t].off + 1,
4654 FALLTHROUGH, env);
4655 if (ret == 1)
4656 goto peek_stack;
4657 else if (ret < 0)
4658 goto err_free;
f1bca824
AS
4659 /* tell verifier to check for equivalent states
4660 * after every call and jump
4661 */
c3de6317
AS
4662 if (t + 1 < insn_cnt)
4663 env->explored_states[t + 1] = STATE_LIST_MARK;
475fb78f
AS
4664 } else {
4665 /* conditional jump with two edges */
3c2ce60b 4666 env->explored_states[t] = STATE_LIST_MARK;
475fb78f
AS
4667 ret = push_insn(t, t + 1, FALLTHROUGH, env);
4668 if (ret == 1)
4669 goto peek_stack;
4670 else if (ret < 0)
4671 goto err_free;
4672
4673 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
4674 if (ret == 1)
4675 goto peek_stack;
4676 else if (ret < 0)
4677 goto err_free;
4678 }
4679 } else {
4680 /* all other non-branch instructions with single
4681 * fall-through edge
4682 */
4683 ret = push_insn(t, t + 1, FALLTHROUGH, env);
4684 if (ret == 1)
4685 goto peek_stack;
4686 else if (ret < 0)
4687 goto err_free;
4688 }
4689
4690mark_explored:
4691 insn_state[t] = EXPLORED;
4692 if (cur_stack-- <= 0) {
61bd5218 4693 verbose(env, "pop stack internal bug\n");
475fb78f
AS
4694 ret = -EFAULT;
4695 goto err_free;
4696 }
4697 goto peek_stack;
4698
4699check_state:
4700 for (i = 0; i < insn_cnt; i++) {
4701 if (insn_state[i] != EXPLORED) {
61bd5218 4702 verbose(env, "unreachable insn %d\n", i);
475fb78f
AS
4703 ret = -EINVAL;
4704 goto err_free;
4705 }
4706 }
4707 ret = 0; /* cfg looks good */
4708
4709err_free:
4710 kfree(insn_state);
4711 kfree(insn_stack);
4712 return ret;
4713}
4714
838e9690
YS
4715/* The minimum supported BTF func info size */
4716#define MIN_BPF_FUNCINFO_SIZE 8
4717#define MAX_FUNCINFO_REC_SIZE 252
4718
c454a46b
MKL
4719static int check_btf_func(struct bpf_verifier_env *env,
4720 const union bpf_attr *attr,
4721 union bpf_attr __user *uattr)
838e9690
YS
4722{
4723 u32 i, nfuncs, urec_size, min_size, prev_offset;
4724 u32 krec_size = sizeof(struct bpf_func_info);
c454a46b 4725 struct bpf_func_info *krecord;
838e9690 4726 const struct btf_type *type;
c454a46b
MKL
4727 struct bpf_prog *prog;
4728 const struct btf *btf;
838e9690 4729 void __user *urecord;
838e9690
YS
4730 int ret = 0;
4731
4732 nfuncs = attr->func_info_cnt;
4733 if (!nfuncs)
4734 return 0;
4735
4736 if (nfuncs != env->subprog_cnt) {
4737 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
4738 return -EINVAL;
4739 }
4740
4741 urec_size = attr->func_info_rec_size;
4742 if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
4743 urec_size > MAX_FUNCINFO_REC_SIZE ||
4744 urec_size % sizeof(u32)) {
4745 verbose(env, "invalid func info rec size %u\n", urec_size);
4746 return -EINVAL;
4747 }
4748
c454a46b
MKL
4749 prog = env->prog;
4750 btf = prog->aux->btf;
838e9690
YS
4751
4752 urecord = u64_to_user_ptr(attr->func_info);
4753 min_size = min_t(u32, krec_size, urec_size);
4754
ba64e7d8 4755 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
c454a46b
MKL
4756 if (!krecord)
4757 return -ENOMEM;
ba64e7d8 4758
838e9690
YS
4759 for (i = 0; i < nfuncs; i++) {
4760 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
4761 if (ret) {
4762 if (ret == -E2BIG) {
4763 verbose(env, "nonzero tailing record in func info");
4764 /* set the size kernel expects so loader can zero
4765 * out the rest of the record.
4766 */
4767 if (put_user(min_size, &uattr->func_info_rec_size))
4768 ret = -EFAULT;
4769 }
c454a46b 4770 goto err_free;
838e9690
YS
4771 }
4772
ba64e7d8 4773 if (copy_from_user(&krecord[i], urecord, min_size)) {
838e9690 4774 ret = -EFAULT;
c454a46b 4775 goto err_free;
838e9690
YS
4776 }
4777
d30d42e0 4778 /* check insn_off */
838e9690 4779 if (i == 0) {
d30d42e0 4780 if (krecord[i].insn_off) {
838e9690 4781 verbose(env,
d30d42e0
MKL
4782 "nonzero insn_off %u for the first func info record",
4783 krecord[i].insn_off);
838e9690 4784 ret = -EINVAL;
c454a46b 4785 goto err_free;
838e9690 4786 }
d30d42e0 4787 } else if (krecord[i].insn_off <= prev_offset) {
838e9690
YS
4788 verbose(env,
4789 "same or smaller insn offset (%u) than previous func info record (%u)",
d30d42e0 4790 krecord[i].insn_off, prev_offset);
838e9690 4791 ret = -EINVAL;
c454a46b 4792 goto err_free;
838e9690
YS
4793 }
4794
d30d42e0 4795 if (env->subprog_info[i].start != krecord[i].insn_off) {
838e9690
YS
4796 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
4797 ret = -EINVAL;
c454a46b 4798 goto err_free;
838e9690
YS
4799 }
4800
4801 /* check type_id */
ba64e7d8 4802 type = btf_type_by_id(btf, krecord[i].type_id);
838e9690
YS
4803 if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
4804 verbose(env, "invalid type id %d in func info",
ba64e7d8 4805 krecord[i].type_id);
838e9690 4806 ret = -EINVAL;
c454a46b 4807 goto err_free;
838e9690
YS
4808 }
4809
d30d42e0 4810 prev_offset = krecord[i].insn_off;
838e9690
YS
4811 urecord += urec_size;
4812 }
4813
ba64e7d8
YS
4814 prog->aux->func_info = krecord;
4815 prog->aux->func_info_cnt = nfuncs;
838e9690
YS
4816 return 0;
4817
c454a46b 4818err_free:
ba64e7d8 4819 kvfree(krecord);
838e9690
YS
4820 return ret;
4821}
4822
ba64e7d8
YS
4823static void adjust_btf_func(struct bpf_verifier_env *env)
4824{
4825 int i;
4826
4827 if (!env->prog->aux->func_info)
4828 return;
4829
4830 for (i = 0; i < env->subprog_cnt; i++)
d30d42e0 4831 env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
ba64e7d8
YS
4832}
4833
c454a46b
MKL
4834#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
4835 sizeof(((struct bpf_line_info *)(0))->line_col))
4836#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
4837
4838static int check_btf_line(struct bpf_verifier_env *env,
4839 const union bpf_attr *attr,
4840 union bpf_attr __user *uattr)
4841{
4842 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
4843 struct bpf_subprog_info *sub;
4844 struct bpf_line_info *linfo;
4845 struct bpf_prog *prog;
4846 const struct btf *btf;
4847 void __user *ulinfo;
4848 int err;
4849
4850 nr_linfo = attr->line_info_cnt;
4851 if (!nr_linfo)
4852 return 0;
4853
4854 rec_size = attr->line_info_rec_size;
4855 if (rec_size < MIN_BPF_LINEINFO_SIZE ||
4856 rec_size > MAX_LINEINFO_REC_SIZE ||
4857 rec_size & (sizeof(u32) - 1))
4858 return -EINVAL;
4859
4860 /* Need to zero it in case the userspace may
4861 * pass in a smaller bpf_line_info object.
4862 */
4863 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
4864 GFP_KERNEL | __GFP_NOWARN);
4865 if (!linfo)
4866 return -ENOMEM;
4867
4868 prog = env->prog;
4869 btf = prog->aux->btf;
4870
4871 s = 0;
4872 sub = env->subprog_info;
4873 ulinfo = u64_to_user_ptr(attr->line_info);
4874 expected_size = sizeof(struct bpf_line_info);
4875 ncopy = min_t(u32, expected_size, rec_size);
4876 for (i = 0; i < nr_linfo; i++) {
4877 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
4878 if (err) {
4879 if (err == -E2BIG) {
4880 verbose(env, "nonzero tailing record in line_info");
4881 if (put_user(expected_size,
4882 &uattr->line_info_rec_size))
4883 err = -EFAULT;
4884 }
4885 goto err_free;
4886 }
4887
4888 if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
4889 err = -EFAULT;
4890 goto err_free;
4891 }
4892
4893 /*
4894 * Check insn_off to ensure
4895 * 1) strictly increasing AND
4896 * 2) bounded by prog->len
4897 *
4898 * The linfo[0].insn_off == 0 check logically falls into
4899 * the later "missing bpf_line_info for func..." case
4900 * because the first linfo[0].insn_off must be the
4901 * first sub also and the first sub must have
4902 * subprog_info[0].start == 0.
4903 */
4904 if ((i && linfo[i].insn_off <= prev_offset) ||
4905 linfo[i].insn_off >= prog->len) {
4906 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
4907 i, linfo[i].insn_off, prev_offset,
4908 prog->len);
4909 err = -EINVAL;
4910 goto err_free;
4911 }
4912
4913 if (!btf_name_offset_valid(btf, linfo[i].line_off) ||
4914 !btf_name_offset_valid(btf, linfo[i].file_name_off)) {
4915 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
4916 err = -EINVAL;
4917 goto err_free;
4918 }
4919
4920 if (s != env->subprog_cnt) {
4921 if (linfo[i].insn_off == sub[s].start) {
4922 sub[s].linfo_idx = i;
4923 s++;
4924 } else if (sub[s].start < linfo[i].insn_off) {
4925 verbose(env, "missing bpf_line_info for func#%u\n", s);
4926 err = -EINVAL;
4927 goto err_free;
4928 }
4929 }
4930
4931 prev_offset = linfo[i].insn_off;
4932 ulinfo += rec_size;
4933 }
4934
4935 if (s != env->subprog_cnt) {
4936 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
4937 env->subprog_cnt - s, s);
4938 err = -EINVAL;
4939 goto err_free;
4940 }
4941
4942 prog->aux->linfo = linfo;
4943 prog->aux->nr_linfo = nr_linfo;
4944
4945 return 0;
4946
4947err_free:
4948 kvfree(linfo);
4949 return err;
4950}
4951
4952static int check_btf_info(struct bpf_verifier_env *env,
4953 const union bpf_attr *attr,
4954 union bpf_attr __user *uattr)
4955{
4956 struct btf *btf;
4957 int err;
4958
4959 if (!attr->func_info_cnt && !attr->line_info_cnt)
4960 return 0;
4961
4962 btf = btf_get_by_fd(attr->prog_btf_fd);
4963 if (IS_ERR(btf))
4964 return PTR_ERR(btf);
4965 env->prog->aux->btf = btf;
4966
4967 err = check_btf_func(env, attr, uattr);
4968 if (err)
4969 return err;
4970
4971 err = check_btf_line(env, attr, uattr);
4972 if (err)
4973 return err;
4974
4975 return 0;
ba64e7d8
YS
4976}
4977
f1174f77
EC
4978/* check %cur's range satisfies %old's */
4979static bool range_within(struct bpf_reg_state *old,
4980 struct bpf_reg_state *cur)
4981{
b03c9f9f
EC
4982 return old->umin_value <= cur->umin_value &&
4983 old->umax_value >= cur->umax_value &&
4984 old->smin_value <= cur->smin_value &&
4985 old->smax_value >= cur->smax_value;
f1174f77
EC
4986}
4987
4988/* Maximum number of register states that can exist at once */
4989#define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
4990struct idpair {
4991 u32 old;
4992 u32 cur;
4993};
4994
4995/* If in the old state two registers had the same id, then they need to have
4996 * the same id in the new state as well. But that id could be different from
4997 * the old state, so we need to track the mapping from old to new ids.
4998 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
4999 * regs with old id 5 must also have new id 9 for the new state to be safe. But
5000 * regs with a different old id could still have new id 9, we don't care about
5001 * that.
5002 * So we look through our idmap to see if this old id has been seen before. If
5003 * so, we require the new id to match; otherwise, we add the id pair to the map.
969bf05e 5004 */
f1174f77 5005static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
969bf05e 5006{
f1174f77 5007 unsigned int i;
969bf05e 5008
f1174f77
EC
5009 for (i = 0; i < ID_MAP_SIZE; i++) {
5010 if (!idmap[i].old) {
5011 /* Reached an empty slot; haven't seen this id before */
5012 idmap[i].old = old_id;
5013 idmap[i].cur = cur_id;
5014 return true;
5015 }
5016 if (idmap[i].old == old_id)
5017 return idmap[i].cur == cur_id;
5018 }
5019 /* We ran out of idmap slots, which should be impossible */
5020 WARN_ON_ONCE(1);
5021 return false;
5022}
5023
5024/* Returns true if (rold safe implies rcur safe) */
1b688a19
EC
5025static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
5026 struct idpair *idmap)
f1174f77 5027{
f4d7e40a
AS
5028 bool equal;
5029
dc503a8a
EC
5030 if (!(rold->live & REG_LIVE_READ))
5031 /* explored state didn't use this */
5032 return true;
5033
679c782d 5034 equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
f4d7e40a
AS
5035
5036 if (rold->type == PTR_TO_STACK)
5037 /* two stack pointers are equal only if they're pointing to
5038 * the same stack frame, since fp-8 in foo != fp-8 in bar
5039 */
5040 return equal && rold->frameno == rcur->frameno;
5041
5042 if (equal)
969bf05e
AS
5043 return true;
5044
f1174f77
EC
5045 if (rold->type == NOT_INIT)
5046 /* explored state can't have used this */
969bf05e 5047 return true;
f1174f77
EC
5048 if (rcur->type == NOT_INIT)
5049 return false;
5050 switch (rold->type) {
5051 case SCALAR_VALUE:
5052 if (rcur->type == SCALAR_VALUE) {
5053 /* new val must satisfy old val knowledge */
5054 return range_within(rold, rcur) &&
5055 tnum_in(rold->var_off, rcur->var_off);
5056 } else {
179d1c56
JH
5057 /* We're trying to use a pointer in place of a scalar.
5058 * Even if the scalar was unbounded, this could lead to
5059 * pointer leaks because scalars are allowed to leak
5060 * while pointers are not. We could make this safe in
5061 * special cases if root is calling us, but it's
5062 * probably not worth the hassle.
f1174f77 5063 */
179d1c56 5064 return false;
f1174f77
EC
5065 }
5066 case PTR_TO_MAP_VALUE:
1b688a19
EC
5067 /* If the new min/max/var_off satisfy the old ones and
5068 * everything else matches, we are OK.
5069 * We don't care about the 'id' value, because nothing
5070 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
5071 */
5072 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
5073 range_within(rold, rcur) &&
5074 tnum_in(rold->var_off, rcur->var_off);
f1174f77
EC
5075 case PTR_TO_MAP_VALUE_OR_NULL:
5076 /* a PTR_TO_MAP_VALUE could be safe to use as a
5077 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
5078 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
5079 * checked, doing so could have affected others with the same
5080 * id, and we can't check for that because we lost the id when
5081 * we converted to a PTR_TO_MAP_VALUE.
5082 */
5083 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
5084 return false;
5085 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
5086 return false;
5087 /* Check our ids match any regs they're supposed to */
5088 return check_ids(rold->id, rcur->id, idmap);
de8f3a83 5089 case PTR_TO_PACKET_META:
f1174f77 5090 case PTR_TO_PACKET:
de8f3a83 5091 if (rcur->type != rold->type)
f1174f77
EC
5092 return false;
5093 /* We must have at least as much range as the old ptr
5094 * did, so that any accesses which were safe before are
5095 * still safe. This is true even if old range < old off,
5096 * since someone could have accessed through (ptr - k), or
5097 * even done ptr -= k in a register, to get a safe access.
5098 */
5099 if (rold->range > rcur->range)
5100 return false;
5101 /* If the offsets don't match, we can't trust our alignment;
5102 * nor can we be sure that we won't fall out of range.
5103 */
5104 if (rold->off != rcur->off)
5105 return false;
5106 /* id relations must be preserved */
5107 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
5108 return false;
5109 /* new val must satisfy old val knowledge */
5110 return range_within(rold, rcur) &&
5111 tnum_in(rold->var_off, rcur->var_off);
5112 case PTR_TO_CTX:
5113 case CONST_PTR_TO_MAP:
f1174f77 5114 case PTR_TO_PACKET_END:
d58e468b 5115 case PTR_TO_FLOW_KEYS:
c64b7983
JS
5116 case PTR_TO_SOCKET:
5117 case PTR_TO_SOCKET_OR_NULL:
f1174f77
EC
5118 /* Only valid matches are exact, which memcmp() above
5119 * would have accepted
5120 */
5121 default:
5122 /* Don't know what's going on, just say it's not safe */
5123 return false;
5124 }
969bf05e 5125
f1174f77
EC
5126 /* Shouldn't get here; if we do, say it's not safe */
5127 WARN_ON_ONCE(1);
969bf05e
AS
5128 return false;
5129}
5130
f4d7e40a
AS
5131static bool stacksafe(struct bpf_func_state *old,
5132 struct bpf_func_state *cur,
638f5b90
AS
5133 struct idpair *idmap)
5134{
5135 int i, spi;
5136
5137 /* if explored stack has more populated slots than current stack
5138 * such stacks are not equivalent
5139 */
5140 if (old->allocated_stack > cur->allocated_stack)
5141 return false;
5142
5143 /* walk slots of the explored stack and ignore any additional
5144 * slots in the current stack, since explored(safe) state
5145 * didn't use them
5146 */
5147 for (i = 0; i < old->allocated_stack; i++) {
5148 spi = i / BPF_REG_SIZE;
5149
cc2b14d5
AS
5150 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
5151 /* explored state didn't use this */
fd05e57b 5152 continue;
cc2b14d5 5153
638f5b90
AS
5154 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
5155 continue;
cc2b14d5
AS
5156 /* if old state was safe with misc data in the stack
5157 * it will be safe with zero-initialized stack.
5158 * The opposite is not true
5159 */
5160 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
5161 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
5162 continue;
638f5b90
AS
5163 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
5164 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
5165 /* Ex: old explored (safe) state has STACK_SPILL in
5166 * this stack slot, but current has has STACK_MISC ->
5167 * this verifier states are not equivalent,
5168 * return false to continue verification of this path
5169 */
5170 return false;
5171 if (i % BPF_REG_SIZE)
5172 continue;
5173 if (old->stack[spi].slot_type[0] != STACK_SPILL)
5174 continue;
5175 if (!regsafe(&old->stack[spi].spilled_ptr,
5176 &cur->stack[spi].spilled_ptr,
5177 idmap))
5178 /* when explored and current stack slot are both storing
5179 * spilled registers, check that stored pointers types
5180 * are the same as well.
5181 * Ex: explored safe path could have stored
5182 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
5183 * but current path has stored:
5184 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
5185 * such verifier states are not equivalent.
5186 * return false to continue verification of this path
5187 */
5188 return false;
5189 }
5190 return true;
5191}
5192
fd978bf7
JS
5193static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
5194{
5195 if (old->acquired_refs != cur->acquired_refs)
5196 return false;
5197 return !memcmp(old->refs, cur->refs,
5198 sizeof(*old->refs) * old->acquired_refs);
5199}
5200
f1bca824
AS
5201/* compare two verifier states
5202 *
5203 * all states stored in state_list are known to be valid, since
5204 * verifier reached 'bpf_exit' instruction through them
5205 *
5206 * this function is called when verifier exploring different branches of
5207 * execution popped from the state stack. If it sees an old state that has
5208 * more strict register state and more strict stack state then this execution
5209 * branch doesn't need to be explored further, since verifier already
5210 * concluded that more strict state leads to valid finish.
5211 *
5212 * Therefore two states are equivalent if register state is more conservative
5213 * and explored stack state is more conservative than the current one.
5214 * Example:
5215 * explored current
5216 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
5217 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
5218 *
5219 * In other words if current stack state (one being explored) has more
5220 * valid slots than old one that already passed validation, it means
5221 * the verifier can stop exploring and conclude that current state is valid too
5222 *
5223 * Similarly with registers. If explored state has register type as invalid
5224 * whereas register type in current state is meaningful, it means that
5225 * the current state will reach 'bpf_exit' instruction safely
5226 */
f4d7e40a
AS
5227static bool func_states_equal(struct bpf_func_state *old,
5228 struct bpf_func_state *cur)
f1bca824 5229{
f1174f77
EC
5230 struct idpair *idmap;
5231 bool ret = false;
f1bca824
AS
5232 int i;
5233
f1174f77
EC
5234 idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
5235 /* If we failed to allocate the idmap, just say it's not safe */
5236 if (!idmap)
1a0dc1ac 5237 return false;
f1174f77
EC
5238
5239 for (i = 0; i < MAX_BPF_REG; i++) {
1b688a19 5240 if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
f1174f77 5241 goto out_free;
f1bca824
AS
5242 }
5243
638f5b90
AS
5244 if (!stacksafe(old, cur, idmap))
5245 goto out_free;
fd978bf7
JS
5246
5247 if (!refsafe(old, cur))
5248 goto out_free;
f1174f77
EC
5249 ret = true;
5250out_free:
5251 kfree(idmap);
5252 return ret;
f1bca824
AS
5253}
5254
f4d7e40a
AS
5255static bool states_equal(struct bpf_verifier_env *env,
5256 struct bpf_verifier_state *old,
5257 struct bpf_verifier_state *cur)
5258{
5259 int i;
5260
5261 if (old->curframe != cur->curframe)
5262 return false;
5263
5264 /* for states to be equal callsites have to be the same
5265 * and all frame states need to be equivalent
5266 */
5267 for (i = 0; i <= old->curframe; i++) {
5268 if (old->frame[i]->callsite != cur->frame[i]->callsite)
5269 return false;
5270 if (!func_states_equal(old->frame[i], cur->frame[i]))
5271 return false;
5272 }
5273 return true;
5274}
5275
8e9cd9ce 5276/* A write screens off any subsequent reads; but write marks come from the
f4d7e40a
AS
5277 * straight-line code between a state and its parent. When we arrive at an
5278 * equivalent state (jump target or such) we didn't arrive by the straight-line
5279 * code, so read marks in the state must propagate to the parent regardless
5280 * of the state's write marks. That's what 'parent == state->parent' comparison
679c782d 5281 * in mark_reg_read() is for.
8e9cd9ce 5282 */
f4d7e40a
AS
5283static int propagate_liveness(struct bpf_verifier_env *env,
5284 const struct bpf_verifier_state *vstate,
5285 struct bpf_verifier_state *vparent)
dc503a8a 5286{
f4d7e40a
AS
5287 int i, frame, err = 0;
5288 struct bpf_func_state *state, *parent;
dc503a8a 5289
f4d7e40a
AS
5290 if (vparent->curframe != vstate->curframe) {
5291 WARN(1, "propagate_live: parent frame %d current frame %d\n",
5292 vparent->curframe, vstate->curframe);
5293 return -EFAULT;
5294 }
dc503a8a
EC
5295 /* Propagate read liveness of registers... */
5296 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
5297 /* We don't need to worry about FP liveness because it's read-only */
5298 for (i = 0; i < BPF_REG_FP; i++) {
f4d7e40a 5299 if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
63f45f84 5300 continue;
f4d7e40a 5301 if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
679c782d
EC
5302 err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i],
5303 &vparent->frame[vstate->curframe]->regs[i]);
f4d7e40a
AS
5304 if (err)
5305 return err;
dc503a8a
EC
5306 }
5307 }
f4d7e40a 5308
dc503a8a 5309 /* ... and stack slots */
f4d7e40a
AS
5310 for (frame = 0; frame <= vstate->curframe; frame++) {
5311 state = vstate->frame[frame];
5312 parent = vparent->frame[frame];
5313 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
5314 i < parent->allocated_stack / BPF_REG_SIZE; i++) {
f4d7e40a
AS
5315 if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
5316 continue;
5317 if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
679c782d
EC
5318 mark_reg_read(env, &state->stack[i].spilled_ptr,
5319 &parent->stack[i].spilled_ptr);
dc503a8a
EC
5320 }
5321 }
f4d7e40a 5322 return err;
dc503a8a
EC
5323}
5324
58e2af8b 5325static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
f1bca824 5326{
58e2af8b
JK
5327 struct bpf_verifier_state_list *new_sl;
5328 struct bpf_verifier_state_list *sl;
679c782d 5329 struct bpf_verifier_state *cur = env->cur_state, *new;
ceefbc96 5330 int i, j, err, states_cnt = 0;
f1bca824
AS
5331
5332 sl = env->explored_states[insn_idx];
5333 if (!sl)
5334 /* this 'insn_idx' instruction wasn't marked, so we will not
5335 * be doing state search here
5336 */
5337 return 0;
5338
5339 while (sl != STATE_LIST_MARK) {
638f5b90 5340 if (states_equal(env, &sl->state, cur)) {
f1bca824 5341 /* reached equivalent register/stack state,
dc503a8a
EC
5342 * prune the search.
5343 * Registers read by the continuation are read by us.
8e9cd9ce
EC
5344 * If we have any write marks in env->cur_state, they
5345 * will prevent corresponding reads in the continuation
5346 * from reaching our parent (an explored_state). Our
5347 * own state will get the read marks recorded, but
5348 * they'll be immediately forgotten as we're pruning
5349 * this state and will pop a new one.
f1bca824 5350 */
f4d7e40a
AS
5351 err = propagate_liveness(env, &sl->state, cur);
5352 if (err)
5353 return err;
f1bca824 5354 return 1;
dc503a8a 5355 }
f1bca824 5356 sl = sl->next;
ceefbc96 5357 states_cnt++;
f1bca824
AS
5358 }
5359
ceefbc96
AS
5360 if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
5361 return 0;
5362
f1bca824
AS
5363 /* there were no equivalent states, remember current one.
5364 * technically the current state is not proven to be safe yet,
f4d7e40a
AS
5365 * but it will either reach outer most bpf_exit (which means it's safe)
5366 * or it will be rejected. Since there are no loops, we won't be
5367 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
5368 * again on the way to bpf_exit
f1bca824 5369 */
638f5b90 5370 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
f1bca824
AS
5371 if (!new_sl)
5372 return -ENOMEM;
5373
5374 /* add new state to the head of linked list */
679c782d
EC
5375 new = &new_sl->state;
5376 err = copy_verifier_state(new, cur);
1969db47 5377 if (err) {
679c782d 5378 free_verifier_state(new, false);
1969db47
AS
5379 kfree(new_sl);
5380 return err;
5381 }
f1bca824
AS
5382 new_sl->next = env->explored_states[insn_idx];
5383 env->explored_states[insn_idx] = new_sl;
dc503a8a 5384 /* connect new state to parentage chain */
679c782d
EC
5385 for (i = 0; i < BPF_REG_FP; i++)
5386 cur_regs(env)[i].parent = &new->frame[new->curframe]->regs[i];
8e9cd9ce
EC
5387 /* clear write marks in current state: the writes we did are not writes
5388 * our child did, so they don't screen off its reads from us.
5389 * (There are no read marks in current state, because reads always mark
5390 * their parent and current state never has children yet. Only
5391 * explored_states can get read marks.)
5392 */
dc503a8a 5393 for (i = 0; i < BPF_REG_FP; i++)
f4d7e40a
AS
5394 cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
5395
5396 /* all stack frames are accessible from callee, clear them all */
5397 for (j = 0; j <= cur->curframe; j++) {
5398 struct bpf_func_state *frame = cur->frame[j];
679c782d 5399 struct bpf_func_state *newframe = new->frame[j];
f4d7e40a 5400
679c782d 5401 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
cc2b14d5 5402 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
679c782d
EC
5403 frame->stack[i].spilled_ptr.parent =
5404 &newframe->stack[i].spilled_ptr;
5405 }
f4d7e40a 5406 }
f1bca824
AS
5407 return 0;
5408}
5409
c64b7983
JS
5410/* Return true if it's OK to have the same insn return a different type. */
5411static bool reg_type_mismatch_ok(enum bpf_reg_type type)
5412{
5413 switch (type) {
5414 case PTR_TO_CTX:
5415 case PTR_TO_SOCKET:
5416 case PTR_TO_SOCKET_OR_NULL:
5417 return false;
5418 default:
5419 return true;
5420 }
5421}
5422
5423/* If an instruction was previously used with particular pointer types, then we
5424 * need to be careful to avoid cases such as the below, where it may be ok
5425 * for one branch accessing the pointer, but not ok for the other branch:
5426 *
5427 * R1 = sock_ptr
5428 * goto X;
5429 * ...
5430 * R1 = some_other_valid_ptr;
5431 * goto X;
5432 * ...
5433 * R2 = *(u32 *)(R1 + 0);
5434 */
5435static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
5436{
5437 return src != prev && (!reg_type_mismatch_ok(src) ||
5438 !reg_type_mismatch_ok(prev));
5439}
5440
58e2af8b 5441static int do_check(struct bpf_verifier_env *env)
17a52670 5442{
638f5b90 5443 struct bpf_verifier_state *state;
17a52670 5444 struct bpf_insn *insns = env->prog->insnsi;
638f5b90 5445 struct bpf_reg_state *regs;
f4d7e40a 5446 int insn_cnt = env->prog->len, i;
17a52670
AS
5447 int insn_idx, prev_insn_idx = 0;
5448 int insn_processed = 0;
5449 bool do_print_state = false;
5450
638f5b90
AS
5451 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
5452 if (!state)
5453 return -ENOMEM;
f4d7e40a 5454 state->curframe = 0;
f4d7e40a
AS
5455 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
5456 if (!state->frame[0]) {
5457 kfree(state);
5458 return -ENOMEM;
5459 }
5460 env->cur_state = state;
5461 init_func_state(env, state->frame[0],
5462 BPF_MAIN_FUNC /* callsite */,
5463 0 /* frameno */,
5464 0 /* subprogno, zero == main subprog */);
17a52670
AS
5465 insn_idx = 0;
5466 for (;;) {
5467 struct bpf_insn *insn;
5468 u8 class;
5469 int err;
5470
5471 if (insn_idx >= insn_cnt) {
61bd5218 5472 verbose(env, "invalid insn idx %d insn_cnt %d\n",
17a52670
AS
5473 insn_idx, insn_cnt);
5474 return -EFAULT;
5475 }
5476
5477 insn = &insns[insn_idx];
5478 class = BPF_CLASS(insn->code);
5479
07016151 5480 if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
61bd5218
JK
5481 verbose(env,
5482 "BPF program is too large. Processed %d insn\n",
17a52670
AS
5483 insn_processed);
5484 return -E2BIG;
5485 }
5486
f1bca824
AS
5487 err = is_state_visited(env, insn_idx);
5488 if (err < 0)
5489 return err;
5490 if (err == 1) {
5491 /* found equivalent state, can prune the search */
61bd5218 5492 if (env->log.level) {
f1bca824 5493 if (do_print_state)
61bd5218 5494 verbose(env, "\nfrom %d to %d: safe\n",
f1bca824
AS
5495 prev_insn_idx, insn_idx);
5496 else
61bd5218 5497 verbose(env, "%d: safe\n", insn_idx);
f1bca824
AS
5498 }
5499 goto process_bpf_exit;
5500 }
5501
c3494801
AS
5502 if (signal_pending(current))
5503 return -EAGAIN;
5504
3c2ce60b
DB
5505 if (need_resched())
5506 cond_resched();
5507
61bd5218
JK
5508 if (env->log.level > 1 || (env->log.level && do_print_state)) {
5509 if (env->log.level > 1)
5510 verbose(env, "%d:", insn_idx);
c5fc9692 5511 else
61bd5218 5512 verbose(env, "\nfrom %d to %d:",
c5fc9692 5513 prev_insn_idx, insn_idx);
f4d7e40a 5514 print_verifier_state(env, state->frame[state->curframe]);
17a52670
AS
5515 do_print_state = false;
5516 }
5517
61bd5218 5518 if (env->log.level) {
7105e828
DB
5519 const struct bpf_insn_cbs cbs = {
5520 .cb_print = verbose,
abe08840 5521 .private_data = env,
7105e828
DB
5522 };
5523
61bd5218 5524 verbose(env, "%d: ", insn_idx);
abe08840 5525 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
17a52670
AS
5526 }
5527
cae1927c
JK
5528 if (bpf_prog_is_dev_bound(env->prog->aux)) {
5529 err = bpf_prog_offload_verify_insn(env, insn_idx,
5530 prev_insn_idx);
5531 if (err)
5532 return err;
5533 }
13a27dfc 5534
638f5b90 5535 regs = cur_regs(env);
c131187d 5536 env->insn_aux_data[insn_idx].seen = true;
fd978bf7 5537
17a52670 5538 if (class == BPF_ALU || class == BPF_ALU64) {
1be7f75d 5539 err = check_alu_op(env, insn);
17a52670
AS
5540 if (err)
5541 return err;
5542
5543 } else if (class == BPF_LDX) {
3df126f3 5544 enum bpf_reg_type *prev_src_type, src_reg_type;
9bac3d6d
AS
5545
5546 /* check for reserved fields is already done */
5547
17a52670 5548 /* check src operand */
dc503a8a 5549 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
5550 if (err)
5551 return err;
5552
dc503a8a 5553 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
5554 if (err)
5555 return err;
5556
725f9dcd
AS
5557 src_reg_type = regs[insn->src_reg].type;
5558
17a52670
AS
5559 /* check that memory (src_reg + off) is readable,
5560 * the state of dst_reg will be updated by this func
5561 */
31fd8581 5562 err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
17a52670 5563 BPF_SIZE(insn->code), BPF_READ,
ca369602 5564 insn->dst_reg, false);
17a52670
AS
5565 if (err)
5566 return err;
5567
3df126f3
JK
5568 prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
5569
5570 if (*prev_src_type == NOT_INIT) {
9bac3d6d
AS
5571 /* saw a valid insn
5572 * dst_reg = *(u32 *)(src_reg + off)
3df126f3 5573 * save type to validate intersecting paths
9bac3d6d 5574 */
3df126f3 5575 *prev_src_type = src_reg_type;
9bac3d6d 5576
c64b7983 5577 } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9bac3d6d
AS
5578 /* ABuser program is trying to use the same insn
5579 * dst_reg = *(u32*) (src_reg + off)
5580 * with different pointer types:
5581 * src_reg == ctx in one branch and
5582 * src_reg == stack|map in some other branch.
5583 * Reject it.
5584 */
61bd5218 5585 verbose(env, "same insn cannot be used with different pointers\n");
9bac3d6d
AS
5586 return -EINVAL;
5587 }
5588
17a52670 5589 } else if (class == BPF_STX) {
3df126f3 5590 enum bpf_reg_type *prev_dst_type, dst_reg_type;
d691f9e8 5591
17a52670 5592 if (BPF_MODE(insn->code) == BPF_XADD) {
31fd8581 5593 err = check_xadd(env, insn_idx, insn);
17a52670
AS
5594 if (err)
5595 return err;
5596 insn_idx++;
5597 continue;
5598 }
5599
17a52670 5600 /* check src1 operand */
dc503a8a 5601 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
5602 if (err)
5603 return err;
5604 /* check src2 operand */
dc503a8a 5605 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
5606 if (err)
5607 return err;
5608
d691f9e8
AS
5609 dst_reg_type = regs[insn->dst_reg].type;
5610
17a52670 5611 /* check that memory (dst_reg + off) is writeable */
31fd8581 5612 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
17a52670 5613 BPF_SIZE(insn->code), BPF_WRITE,
ca369602 5614 insn->src_reg, false);
17a52670
AS
5615 if (err)
5616 return err;
5617
3df126f3
JK
5618 prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
5619
5620 if (*prev_dst_type == NOT_INIT) {
5621 *prev_dst_type = dst_reg_type;
c64b7983 5622 } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
61bd5218 5623 verbose(env, "same insn cannot be used with different pointers\n");
d691f9e8
AS
5624 return -EINVAL;
5625 }
5626
17a52670
AS
5627 } else if (class == BPF_ST) {
5628 if (BPF_MODE(insn->code) != BPF_MEM ||
5629 insn->src_reg != BPF_REG_0) {
61bd5218 5630 verbose(env, "BPF_ST uses reserved fields\n");
17a52670
AS
5631 return -EINVAL;
5632 }
5633 /* check src operand */
dc503a8a 5634 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
5635 if (err)
5636 return err;
5637
f37a8cb8 5638 if (is_ctx_reg(env, insn->dst_reg)) {
9d2be44a 5639 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
2a159c6f
DB
5640 insn->dst_reg,
5641 reg_type_str[reg_state(env, insn->dst_reg)->type]);
f37a8cb8
DB
5642 return -EACCES;
5643 }
5644
17a52670 5645 /* check that memory (dst_reg + off) is writeable */
31fd8581 5646 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
17a52670 5647 BPF_SIZE(insn->code), BPF_WRITE,
ca369602 5648 -1, false);
17a52670
AS
5649 if (err)
5650 return err;
5651
5652 } else if (class == BPF_JMP) {
5653 u8 opcode = BPF_OP(insn->code);
5654
5655 if (opcode == BPF_CALL) {
5656 if (BPF_SRC(insn->code) != BPF_K ||
5657 insn->off != 0 ||
f4d7e40a
AS
5658 (insn->src_reg != BPF_REG_0 &&
5659 insn->src_reg != BPF_PSEUDO_CALL) ||
17a52670 5660 insn->dst_reg != BPF_REG_0) {
61bd5218 5661 verbose(env, "BPF_CALL uses reserved fields\n");
17a52670
AS
5662 return -EINVAL;
5663 }
5664
f4d7e40a
AS
5665 if (insn->src_reg == BPF_PSEUDO_CALL)
5666 err = check_func_call(env, insn, &insn_idx);
5667 else
5668 err = check_helper_call(env, insn->imm, insn_idx);
17a52670
AS
5669 if (err)
5670 return err;
5671
5672 } else if (opcode == BPF_JA) {
5673 if (BPF_SRC(insn->code) != BPF_K ||
5674 insn->imm != 0 ||
5675 insn->src_reg != BPF_REG_0 ||
5676 insn->dst_reg != BPF_REG_0) {
61bd5218 5677 verbose(env, "BPF_JA uses reserved fields\n");
17a52670
AS
5678 return -EINVAL;
5679 }
5680
5681 insn_idx += insn->off + 1;
5682 continue;
5683
5684 } else if (opcode == BPF_EXIT) {
5685 if (BPF_SRC(insn->code) != BPF_K ||
5686 insn->imm != 0 ||
5687 insn->src_reg != BPF_REG_0 ||
5688 insn->dst_reg != BPF_REG_0) {
61bd5218 5689 verbose(env, "BPF_EXIT uses reserved fields\n");
17a52670
AS
5690 return -EINVAL;
5691 }
5692
f4d7e40a
AS
5693 if (state->curframe) {
5694 /* exit from nested function */
5695 prev_insn_idx = insn_idx;
5696 err = prepare_func_exit(env, &insn_idx);
5697 if (err)
5698 return err;
5699 do_print_state = true;
5700 continue;
5701 }
5702
fd978bf7
JS
5703 err = check_reference_leak(env);
5704 if (err)
5705 return err;
5706
17a52670
AS
5707 /* eBPF calling convetion is such that R0 is used
5708 * to return the value from eBPF program.
5709 * Make sure that it's readable at this time
5710 * of bpf_exit, which means that program wrote
5711 * something into it earlier
5712 */
dc503a8a 5713 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
17a52670
AS
5714 if (err)
5715 return err;
5716
1be7f75d 5717 if (is_pointer_value(env, BPF_REG_0)) {
61bd5218 5718 verbose(env, "R0 leaks addr as return value\n");
1be7f75d
AS
5719 return -EACCES;
5720 }
5721
390ee7e2
AS
5722 err = check_return_code(env);
5723 if (err)
5724 return err;
f1bca824 5725process_bpf_exit:
638f5b90
AS
5726 err = pop_stack(env, &prev_insn_idx, &insn_idx);
5727 if (err < 0) {
5728 if (err != -ENOENT)
5729 return err;
17a52670
AS
5730 break;
5731 } else {
5732 do_print_state = true;
5733 continue;
5734 }
5735 } else {
5736 err = check_cond_jmp_op(env, insn, &insn_idx);
5737 if (err)
5738 return err;
5739 }
5740 } else if (class == BPF_LD) {
5741 u8 mode = BPF_MODE(insn->code);
5742
5743 if (mode == BPF_ABS || mode == BPF_IND) {
ddd872bc
AS
5744 err = check_ld_abs(env, insn);
5745 if (err)
5746 return err;
5747
17a52670
AS
5748 } else if (mode == BPF_IMM) {
5749 err = check_ld_imm(env, insn);
5750 if (err)
5751 return err;
5752
5753 insn_idx++;
c131187d 5754 env->insn_aux_data[insn_idx].seen = true;
17a52670 5755 } else {
61bd5218 5756 verbose(env, "invalid BPF_LD mode\n");
17a52670
AS
5757 return -EINVAL;
5758 }
5759 } else {
61bd5218 5760 verbose(env, "unknown insn class %d\n", class);
17a52670
AS
5761 return -EINVAL;
5762 }
5763
5764 insn_idx++;
5765 }
5766
4bd95f4b
DB
5767 verbose(env, "processed %d insns (limit %d), stack depth ",
5768 insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
f910cefa 5769 for (i = 0; i < env->subprog_cnt; i++) {
9c8105bd 5770 u32 depth = env->subprog_info[i].stack_depth;
f4d7e40a
AS
5771
5772 verbose(env, "%d", depth);
f910cefa 5773 if (i + 1 < env->subprog_cnt)
f4d7e40a
AS
5774 verbose(env, "+");
5775 }
5776 verbose(env, "\n");
9c8105bd 5777 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
17a52670
AS
5778 return 0;
5779}
5780
56f668df
MKL
5781static int check_map_prealloc(struct bpf_map *map)
5782{
5783 return (map->map_type != BPF_MAP_TYPE_HASH &&
bcc6b1b7
MKL
5784 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
5785 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
56f668df
MKL
5786 !(map->map_flags & BPF_F_NO_PREALLOC);
5787}
5788
61bd5218
JK
5789static int check_map_prog_compatibility(struct bpf_verifier_env *env,
5790 struct bpf_map *map,
fdc15d38
AS
5791 struct bpf_prog *prog)
5792
5793{
56f668df
MKL
5794 /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
5795 * preallocated hash maps, since doing memory allocation
5796 * in overflow_handler can crash depending on where nmi got
5797 * triggered.
5798 */
5799 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
5800 if (!check_map_prealloc(map)) {
61bd5218 5801 verbose(env, "perf_event programs can only use preallocated hash map\n");
56f668df
MKL
5802 return -EINVAL;
5803 }
5804 if (map->inner_map_meta &&
5805 !check_map_prealloc(map->inner_map_meta)) {
61bd5218 5806 verbose(env, "perf_event programs can only use preallocated inner hash map\n");
56f668df
MKL
5807 return -EINVAL;
5808 }
fdc15d38 5809 }
a3884572
JK
5810
5811 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
09728266 5812 !bpf_offload_prog_map_match(prog, map)) {
a3884572
JK
5813 verbose(env, "offload device mismatch between prog and map\n");
5814 return -EINVAL;
5815 }
5816
fdc15d38
AS
5817 return 0;
5818}
5819
b741f163
RG
5820static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
5821{
5822 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
5823 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
5824}
5825
0246e64d
AS
5826/* look for pseudo eBPF instructions that access map FDs and
5827 * replace them with actual map pointers
5828 */
58e2af8b 5829static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
0246e64d
AS
5830{
5831 struct bpf_insn *insn = env->prog->insnsi;
5832 int insn_cnt = env->prog->len;
fdc15d38 5833 int i, j, err;
0246e64d 5834
f1f7714e 5835 err = bpf_prog_calc_tag(env->prog);
aafe6ae9
DB
5836 if (err)
5837 return err;
5838
0246e64d 5839 for (i = 0; i < insn_cnt; i++, insn++) {
9bac3d6d 5840 if (BPF_CLASS(insn->code) == BPF_LDX &&
d691f9e8 5841 (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
61bd5218 5842 verbose(env, "BPF_LDX uses reserved fields\n");
9bac3d6d
AS
5843 return -EINVAL;
5844 }
5845
d691f9e8
AS
5846 if (BPF_CLASS(insn->code) == BPF_STX &&
5847 ((BPF_MODE(insn->code) != BPF_MEM &&
5848 BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
61bd5218 5849 verbose(env, "BPF_STX uses reserved fields\n");
d691f9e8
AS
5850 return -EINVAL;
5851 }
5852
0246e64d
AS
5853 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
5854 struct bpf_map *map;
5855 struct fd f;
5856
5857 if (i == insn_cnt - 1 || insn[1].code != 0 ||
5858 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
5859 insn[1].off != 0) {
61bd5218 5860 verbose(env, "invalid bpf_ld_imm64 insn\n");
0246e64d
AS
5861 return -EINVAL;
5862 }
5863
5864 if (insn->src_reg == 0)
5865 /* valid generic load 64-bit imm */
5866 goto next_insn;
5867
5868 if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
61bd5218
JK
5869 verbose(env,
5870 "unrecognized bpf_ld_imm64 insn\n");
0246e64d
AS
5871 return -EINVAL;
5872 }
5873
5874 f = fdget(insn->imm);
c2101297 5875 map = __bpf_map_get(f);
0246e64d 5876 if (IS_ERR(map)) {
61bd5218 5877 verbose(env, "fd %d is not pointing to valid bpf_map\n",
0246e64d 5878 insn->imm);
0246e64d
AS
5879 return PTR_ERR(map);
5880 }
5881
61bd5218 5882 err = check_map_prog_compatibility(env, map, env->prog);
fdc15d38
AS
5883 if (err) {
5884 fdput(f);
5885 return err;
5886 }
5887
0246e64d
AS
5888 /* store map pointer inside BPF_LD_IMM64 instruction */
5889 insn[0].imm = (u32) (unsigned long) map;
5890 insn[1].imm = ((u64) (unsigned long) map) >> 32;
5891
5892 /* check whether we recorded this map already */
5893 for (j = 0; j < env->used_map_cnt; j++)
5894 if (env->used_maps[j] == map) {
5895 fdput(f);
5896 goto next_insn;
5897 }
5898
5899 if (env->used_map_cnt >= MAX_USED_MAPS) {
5900 fdput(f);
5901 return -E2BIG;
5902 }
5903
0246e64d
AS
5904 /* hold the map. If the program is rejected by verifier,
5905 * the map will be released by release_maps() or it
5906 * will be used by the valid program until it's unloaded
ab7f5bf0 5907 * and all maps are released in free_used_maps()
0246e64d 5908 */
92117d84
AS
5909 map = bpf_map_inc(map, false);
5910 if (IS_ERR(map)) {
5911 fdput(f);
5912 return PTR_ERR(map);
5913 }
5914 env->used_maps[env->used_map_cnt++] = map;
5915
b741f163 5916 if (bpf_map_is_cgroup_storage(map) &&
de9cbbaa 5917 bpf_cgroup_storage_assign(env->prog, map)) {
b741f163 5918 verbose(env, "only one cgroup storage of each type is allowed\n");
de9cbbaa
RG
5919 fdput(f);
5920 return -EBUSY;
5921 }
5922
0246e64d
AS
5923 fdput(f);
5924next_insn:
5925 insn++;
5926 i++;
5e581dad
DB
5927 continue;
5928 }
5929
5930 /* Basic sanity check before we invest more work here. */
5931 if (!bpf_opcode_in_insntable(insn->code)) {
5932 verbose(env, "unknown opcode %02x\n", insn->code);
5933 return -EINVAL;
0246e64d
AS
5934 }
5935 }
5936
5937 /* now all pseudo BPF_LD_IMM64 instructions load valid
5938 * 'struct bpf_map *' into a register instead of user map_fd.
5939 * These pointers will be used later by verifier to validate map access.
5940 */
5941 return 0;
5942}
5943
5944/* drop refcnt of maps used by the rejected program */
58e2af8b 5945static void release_maps(struct bpf_verifier_env *env)
0246e64d 5946{
8bad74f9 5947 enum bpf_cgroup_storage_type stype;
0246e64d
AS
5948 int i;
5949
8bad74f9
RG
5950 for_each_cgroup_storage_type(stype) {
5951 if (!env->prog->aux->cgroup_storage[stype])
5952 continue;
de9cbbaa 5953 bpf_cgroup_storage_release(env->prog,
8bad74f9
RG
5954 env->prog->aux->cgroup_storage[stype]);
5955 }
de9cbbaa 5956
0246e64d
AS
5957 for (i = 0; i < env->used_map_cnt; i++)
5958 bpf_map_put(env->used_maps[i]);
5959}
5960
5961/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
58e2af8b 5962static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
0246e64d
AS
5963{
5964 struct bpf_insn *insn = env->prog->insnsi;
5965 int insn_cnt = env->prog->len;
5966 int i;
5967
5968 for (i = 0; i < insn_cnt; i++, insn++)
5969 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
5970 insn->src_reg = 0;
5971}
5972
8041902d
AS
5973/* single env->prog->insni[off] instruction was replaced with the range
5974 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
5975 * [0, off) and [off, end) to new locations, so the patched range stays zero
5976 */
5977static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
5978 u32 off, u32 cnt)
5979{
5980 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
c131187d 5981 int i;
8041902d
AS
5982
5983 if (cnt == 1)
5984 return 0;
fad953ce
KC
5985 new_data = vzalloc(array_size(prog_len,
5986 sizeof(struct bpf_insn_aux_data)));
8041902d
AS
5987 if (!new_data)
5988 return -ENOMEM;
5989 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
5990 memcpy(new_data + off + cnt - 1, old_data + off,
5991 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
c131187d
AS
5992 for (i = off; i < off + cnt - 1; i++)
5993 new_data[i].seen = true;
8041902d
AS
5994 env->insn_aux_data = new_data;
5995 vfree(old_data);
5996 return 0;
5997}
5998
cc8b0b92
AS
5999static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
6000{
6001 int i;
6002
6003 if (len == 1)
6004 return;
4cb3d99c
JW
6005 /* NOTE: fake 'exit' subprog should be updated as well. */
6006 for (i = 0; i <= env->subprog_cnt; i++) {
afd59424 6007 if (env->subprog_info[i].start <= off)
cc8b0b92 6008 continue;
9c8105bd 6009 env->subprog_info[i].start += len - 1;
cc8b0b92
AS
6010 }
6011}
6012
8041902d
AS
6013static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
6014 const struct bpf_insn *patch, u32 len)
6015{
6016 struct bpf_prog *new_prog;
6017
6018 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
6019 if (!new_prog)
6020 return NULL;
6021 if (adjust_insn_aux_data(env, new_prog->len, off, len))
6022 return NULL;
cc8b0b92 6023 adjust_subprog_starts(env, off, len);
8041902d
AS
6024 return new_prog;
6025}
6026
2a5418a1
DB
6027/* The verifier does more data flow analysis than llvm and will not
6028 * explore branches that are dead at run time. Malicious programs can
6029 * have dead code too. Therefore replace all dead at-run-time code
6030 * with 'ja -1'.
6031 *
6032 * Just nops are not optimal, e.g. if they would sit at the end of the
6033 * program and through another bug we would manage to jump there, then
6034 * we'd execute beyond program memory otherwise. Returning exception
6035 * code also wouldn't work since we can have subprogs where the dead
6036 * code could be located.
c131187d
AS
6037 */
6038static void sanitize_dead_code(struct bpf_verifier_env *env)
6039{
6040 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
2a5418a1 6041 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
c131187d
AS
6042 struct bpf_insn *insn = env->prog->insnsi;
6043 const int insn_cnt = env->prog->len;
6044 int i;
6045
6046 for (i = 0; i < insn_cnt; i++) {
6047 if (aux_data[i].seen)
6048 continue;
2a5418a1 6049 memcpy(insn + i, &trap, sizeof(trap));
c131187d
AS
6050 }
6051}
6052
c64b7983
JS
6053/* convert load instructions that access fields of a context type into a
6054 * sequence of instructions that access fields of the underlying structure:
6055 * struct __sk_buff -> struct sk_buff
6056 * struct bpf_sock_ops -> struct sock
9bac3d6d 6057 */
58e2af8b 6058static int convert_ctx_accesses(struct bpf_verifier_env *env)
9bac3d6d 6059{
00176a34 6060 const struct bpf_verifier_ops *ops = env->ops;
f96da094 6061 int i, cnt, size, ctx_field_size, delta = 0;
3df126f3 6062 const int insn_cnt = env->prog->len;
36bbef52 6063 struct bpf_insn insn_buf[16], *insn;
46f53a65 6064 u32 target_size, size_default, off;
9bac3d6d 6065 struct bpf_prog *new_prog;
d691f9e8 6066 enum bpf_access_type type;
f96da094 6067 bool is_narrower_load;
9bac3d6d 6068
b09928b9
DB
6069 if (ops->gen_prologue || env->seen_direct_write) {
6070 if (!ops->gen_prologue) {
6071 verbose(env, "bpf verifier is misconfigured\n");
6072 return -EINVAL;
6073 }
36bbef52
DB
6074 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
6075 env->prog);
6076 if (cnt >= ARRAY_SIZE(insn_buf)) {
61bd5218 6077 verbose(env, "bpf verifier is misconfigured\n");
36bbef52
DB
6078 return -EINVAL;
6079 } else if (cnt) {
8041902d 6080 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
36bbef52
DB
6081 if (!new_prog)
6082 return -ENOMEM;
8041902d 6083
36bbef52 6084 env->prog = new_prog;
3df126f3 6085 delta += cnt - 1;
36bbef52
DB
6086 }
6087 }
6088
c64b7983 6089 if (bpf_prog_is_dev_bound(env->prog->aux))
9bac3d6d
AS
6090 return 0;
6091
3df126f3 6092 insn = env->prog->insnsi + delta;
36bbef52 6093
9bac3d6d 6094 for (i = 0; i < insn_cnt; i++, insn++) {
c64b7983
JS
6095 bpf_convert_ctx_access_t convert_ctx_access;
6096
62c7989b
DB
6097 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
6098 insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
6099 insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
ea2e7ce5 6100 insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
d691f9e8 6101 type = BPF_READ;
62c7989b
DB
6102 else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
6103 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
6104 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
ea2e7ce5 6105 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
d691f9e8
AS
6106 type = BPF_WRITE;
6107 else
9bac3d6d
AS
6108 continue;
6109
af86ca4e
AS
6110 if (type == BPF_WRITE &&
6111 env->insn_aux_data[i + delta].sanitize_stack_off) {
6112 struct bpf_insn patch[] = {
6113 /* Sanitize suspicious stack slot with zero.
6114 * There are no memory dependencies for this store,
6115 * since it's only using frame pointer and immediate
6116 * constant of zero
6117 */
6118 BPF_ST_MEM(BPF_DW, BPF_REG_FP,
6119 env->insn_aux_data[i + delta].sanitize_stack_off,
6120 0),
6121 /* the original STX instruction will immediately
6122 * overwrite the same stack slot with appropriate value
6123 */
6124 *insn,
6125 };
6126
6127 cnt = ARRAY_SIZE(patch);
6128 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
6129 if (!new_prog)
6130 return -ENOMEM;
6131
6132 delta += cnt - 1;
6133 env->prog = new_prog;
6134 insn = new_prog->insnsi + i + delta;
6135 continue;
6136 }
6137
c64b7983
JS
6138 switch (env->insn_aux_data[i + delta].ptr_type) {
6139 case PTR_TO_CTX:
6140 if (!ops->convert_ctx_access)
6141 continue;
6142 convert_ctx_access = ops->convert_ctx_access;
6143 break;
6144 case PTR_TO_SOCKET:
6145 convert_ctx_access = bpf_sock_convert_ctx_access;
6146 break;
6147 default:
9bac3d6d 6148 continue;
c64b7983 6149 }
9bac3d6d 6150
31fd8581 6151 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
f96da094 6152 size = BPF_LDST_BYTES(insn);
31fd8581
YS
6153
6154 /* If the read access is a narrower load of the field,
6155 * convert to a 4/8-byte load, to minimum program type specific
6156 * convert_ctx_access changes. If conversion is successful,
6157 * we will apply proper mask to the result.
6158 */
f96da094 6159 is_narrower_load = size < ctx_field_size;
46f53a65
AI
6160 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
6161 off = insn->off;
31fd8581 6162 if (is_narrower_load) {
f96da094
DB
6163 u8 size_code;
6164
6165 if (type == BPF_WRITE) {
61bd5218 6166 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
f96da094
DB
6167 return -EINVAL;
6168 }
31fd8581 6169
f96da094 6170 size_code = BPF_H;
31fd8581
YS
6171 if (ctx_field_size == 4)
6172 size_code = BPF_W;
6173 else if (ctx_field_size == 8)
6174 size_code = BPF_DW;
f96da094 6175
bc23105c 6176 insn->off = off & ~(size_default - 1);
31fd8581
YS
6177 insn->code = BPF_LDX | BPF_MEM | size_code;
6178 }
f96da094
DB
6179
6180 target_size = 0;
c64b7983
JS
6181 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
6182 &target_size);
f96da094
DB
6183 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
6184 (ctx_field_size && !target_size)) {
61bd5218 6185 verbose(env, "bpf verifier is misconfigured\n");
9bac3d6d
AS
6186 return -EINVAL;
6187 }
f96da094
DB
6188
6189 if (is_narrower_load && size < target_size) {
46f53a65
AI
6190 u8 shift = (off & (size_default - 1)) * 8;
6191
6192 if (ctx_field_size <= 4) {
6193 if (shift)
6194 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
6195 insn->dst_reg,
6196 shift);
31fd8581 6197 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
f96da094 6198 (1 << size * 8) - 1);
46f53a65
AI
6199 } else {
6200 if (shift)
6201 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
6202 insn->dst_reg,
6203 shift);
31fd8581 6204 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
f96da094 6205 (1 << size * 8) - 1);
46f53a65 6206 }
31fd8581 6207 }
9bac3d6d 6208
8041902d 6209 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9bac3d6d
AS
6210 if (!new_prog)
6211 return -ENOMEM;
6212
3df126f3 6213 delta += cnt - 1;
9bac3d6d
AS
6214
6215 /* keep walking new program and skip insns we just inserted */
6216 env->prog = new_prog;
3df126f3 6217 insn = new_prog->insnsi + i + delta;
9bac3d6d
AS
6218 }
6219
6220 return 0;
6221}
6222
1c2a088a
AS
6223static int jit_subprogs(struct bpf_verifier_env *env)
6224{
6225 struct bpf_prog *prog = env->prog, **func, *tmp;
6226 int i, j, subprog_start, subprog_end = 0, len, subprog;
7105e828 6227 struct bpf_insn *insn;
1c2a088a 6228 void *old_bpf_func;
c454a46b 6229 int err;
1c2a088a 6230
f910cefa 6231 if (env->subprog_cnt <= 1)
1c2a088a
AS
6232 return 0;
6233
7105e828 6234 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1c2a088a
AS
6235 if (insn->code != (BPF_JMP | BPF_CALL) ||
6236 insn->src_reg != BPF_PSEUDO_CALL)
6237 continue;
c7a89784
DB
6238 /* Upon error here we cannot fall back to interpreter but
6239 * need a hard reject of the program. Thus -EFAULT is
6240 * propagated in any case.
6241 */
1c2a088a
AS
6242 subprog = find_subprog(env, i + insn->imm + 1);
6243 if (subprog < 0) {
6244 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
6245 i + insn->imm + 1);
6246 return -EFAULT;
6247 }
6248 /* temporarily remember subprog id inside insn instead of
6249 * aux_data, since next loop will split up all insns into funcs
6250 */
f910cefa 6251 insn->off = subprog;
1c2a088a
AS
6252 /* remember original imm in case JIT fails and fallback
6253 * to interpreter will be needed
6254 */
6255 env->insn_aux_data[i].call_imm = insn->imm;
6256 /* point imm to __bpf_call_base+1 from JITs point of view */
6257 insn->imm = 1;
6258 }
6259
c454a46b
MKL
6260 err = bpf_prog_alloc_jited_linfo(prog);
6261 if (err)
6262 goto out_undo_insn;
6263
6264 err = -ENOMEM;
6396bb22 6265 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
1c2a088a 6266 if (!func)
c7a89784 6267 goto out_undo_insn;
1c2a088a 6268
f910cefa 6269 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a 6270 subprog_start = subprog_end;
4cb3d99c 6271 subprog_end = env->subprog_info[i + 1].start;
1c2a088a
AS
6272
6273 len = subprog_end - subprog_start;
6274 func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
6275 if (!func[i])
6276 goto out_free;
6277 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
6278 len * sizeof(struct bpf_insn));
4f74d809 6279 func[i]->type = prog->type;
1c2a088a 6280 func[i]->len = len;
4f74d809
DB
6281 if (bpf_prog_calc_tag(func[i]))
6282 goto out_free;
1c2a088a 6283 func[i]->is_func = 1;
ba64e7d8
YS
6284 func[i]->aux->func_idx = i;
6285 /* the btf and func_info will be freed only at prog->aux */
6286 func[i]->aux->btf = prog->aux->btf;
6287 func[i]->aux->func_info = prog->aux->func_info;
6288
1c2a088a
AS
6289 /* Use bpf_prog_F_tag to indicate functions in stack traces.
6290 * Long term would need debug info to populate names
6291 */
6292 func[i]->aux->name[0] = 'F';
9c8105bd 6293 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
1c2a088a 6294 func[i]->jit_requested = 1;
c454a46b
MKL
6295 func[i]->aux->linfo = prog->aux->linfo;
6296 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
6297 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
6298 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
1c2a088a
AS
6299 func[i] = bpf_int_jit_compile(func[i]);
6300 if (!func[i]->jited) {
6301 err = -ENOTSUPP;
6302 goto out_free;
6303 }
6304 cond_resched();
6305 }
6306 /* at this point all bpf functions were successfully JITed
6307 * now populate all bpf_calls with correct addresses and
6308 * run last pass of JIT
6309 */
f910cefa 6310 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
6311 insn = func[i]->insnsi;
6312 for (j = 0; j < func[i]->len; j++, insn++) {
6313 if (insn->code != (BPF_JMP | BPF_CALL) ||
6314 insn->src_reg != BPF_PSEUDO_CALL)
6315 continue;
6316 subprog = insn->off;
1c2a088a
AS
6317 insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
6318 func[subprog]->bpf_func -
6319 __bpf_call_base;
6320 }
2162fed4
SD
6321
6322 /* we use the aux data to keep a list of the start addresses
6323 * of the JITed images for each function in the program
6324 *
6325 * for some architectures, such as powerpc64, the imm field
6326 * might not be large enough to hold the offset of the start
6327 * address of the callee's JITed image from __bpf_call_base
6328 *
6329 * in such cases, we can lookup the start address of a callee
6330 * by using its subprog id, available from the off field of
6331 * the call instruction, as an index for this list
6332 */
6333 func[i]->aux->func = func;
6334 func[i]->aux->func_cnt = env->subprog_cnt;
1c2a088a 6335 }
f910cefa 6336 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
6337 old_bpf_func = func[i]->bpf_func;
6338 tmp = bpf_int_jit_compile(func[i]);
6339 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
6340 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
c7a89784 6341 err = -ENOTSUPP;
1c2a088a
AS
6342 goto out_free;
6343 }
6344 cond_resched();
6345 }
6346
6347 /* finally lock prog and jit images for all functions and
6348 * populate kallsysm
6349 */
f910cefa 6350 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
6351 bpf_prog_lock_ro(func[i]);
6352 bpf_prog_kallsyms_add(func[i]);
6353 }
7105e828
DB
6354
6355 /* Last step: make now unused interpreter insns from main
6356 * prog consistent for later dump requests, so they can
6357 * later look the same as if they were interpreted only.
6358 */
6359 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7105e828
DB
6360 if (insn->code != (BPF_JMP | BPF_CALL) ||
6361 insn->src_reg != BPF_PSEUDO_CALL)
6362 continue;
6363 insn->off = env->insn_aux_data[i].call_imm;
6364 subprog = find_subprog(env, i + insn->off + 1);
dbecd738 6365 insn->imm = subprog;
7105e828
DB
6366 }
6367
1c2a088a
AS
6368 prog->jited = 1;
6369 prog->bpf_func = func[0]->bpf_func;
6370 prog->aux->func = func;
f910cefa 6371 prog->aux->func_cnt = env->subprog_cnt;
c454a46b 6372 bpf_prog_free_unused_jited_linfo(prog);
1c2a088a
AS
6373 return 0;
6374out_free:
f910cefa 6375 for (i = 0; i < env->subprog_cnt; i++)
1c2a088a
AS
6376 if (func[i])
6377 bpf_jit_free(func[i]);
6378 kfree(func);
c7a89784 6379out_undo_insn:
1c2a088a
AS
6380 /* cleanup main prog to be interpreted */
6381 prog->jit_requested = 0;
6382 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
6383 if (insn->code != (BPF_JMP | BPF_CALL) ||
6384 insn->src_reg != BPF_PSEUDO_CALL)
6385 continue;
6386 insn->off = 0;
6387 insn->imm = env->insn_aux_data[i].call_imm;
6388 }
c454a46b 6389 bpf_prog_free_jited_linfo(prog);
1c2a088a
AS
6390 return err;
6391}
6392
1ea47e01
AS
6393static int fixup_call_args(struct bpf_verifier_env *env)
6394{
19d28fbd 6395#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
6396 struct bpf_prog *prog = env->prog;
6397 struct bpf_insn *insn = prog->insnsi;
6398 int i, depth;
19d28fbd 6399#endif
e4052d06 6400 int err = 0;
1ea47e01 6401
e4052d06
QM
6402 if (env->prog->jit_requested &&
6403 !bpf_prog_is_dev_bound(env->prog->aux)) {
19d28fbd
DM
6404 err = jit_subprogs(env);
6405 if (err == 0)
1c2a088a 6406 return 0;
c7a89784
DB
6407 if (err == -EFAULT)
6408 return err;
19d28fbd
DM
6409 }
6410#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
6411 for (i = 0; i < prog->len; i++, insn++) {
6412 if (insn->code != (BPF_JMP | BPF_CALL) ||
6413 insn->src_reg != BPF_PSEUDO_CALL)
6414 continue;
6415 depth = get_callee_stack_depth(env, insn, i);
6416 if (depth < 0)
6417 return depth;
6418 bpf_patch_call_args(insn, depth);
6419 }
19d28fbd
DM
6420 err = 0;
6421#endif
6422 return err;
1ea47e01
AS
6423}
6424
79741b3b 6425/* fixup insn->imm field of bpf_call instructions
81ed18ab 6426 * and inline eligible helpers as explicit sequence of BPF instructions
e245c5c6
AS
6427 *
6428 * this function is called after eBPF program passed verification
6429 */
79741b3b 6430static int fixup_bpf_calls(struct bpf_verifier_env *env)
e245c5c6 6431{
79741b3b
AS
6432 struct bpf_prog *prog = env->prog;
6433 struct bpf_insn *insn = prog->insnsi;
e245c5c6 6434 const struct bpf_func_proto *fn;
79741b3b 6435 const int insn_cnt = prog->len;
09772d92 6436 const struct bpf_map_ops *ops;
c93552c4 6437 struct bpf_insn_aux_data *aux;
81ed18ab
AS
6438 struct bpf_insn insn_buf[16];
6439 struct bpf_prog *new_prog;
6440 struct bpf_map *map_ptr;
6441 int i, cnt, delta = 0;
e245c5c6 6442
79741b3b 6443 for (i = 0; i < insn_cnt; i++, insn++) {
f6b1b3bf
DB
6444 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
6445 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
6446 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
68fda450 6447 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
f6b1b3bf
DB
6448 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
6449 struct bpf_insn mask_and_div[] = {
6450 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
6451 /* Rx div 0 -> 0 */
6452 BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
6453 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
6454 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
6455 *insn,
6456 };
6457 struct bpf_insn mask_and_mod[] = {
6458 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
6459 /* Rx mod 0 -> Rx */
6460 BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
6461 *insn,
6462 };
6463 struct bpf_insn *patchlet;
6464
6465 if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
6466 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
6467 patchlet = mask_and_div + (is64 ? 1 : 0);
6468 cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
6469 } else {
6470 patchlet = mask_and_mod + (is64 ? 1 : 0);
6471 cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
6472 }
6473
6474 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
68fda450
AS
6475 if (!new_prog)
6476 return -ENOMEM;
6477
6478 delta += cnt - 1;
6479 env->prog = prog = new_prog;
6480 insn = new_prog->insnsi + i + delta;
6481 continue;
6482 }
6483
e0cea7ce
DB
6484 if (BPF_CLASS(insn->code) == BPF_LD &&
6485 (BPF_MODE(insn->code) == BPF_ABS ||
6486 BPF_MODE(insn->code) == BPF_IND)) {
6487 cnt = env->ops->gen_ld_abs(insn, insn_buf);
6488 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
6489 verbose(env, "bpf verifier is misconfigured\n");
6490 return -EINVAL;
6491 }
6492
6493 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
6494 if (!new_prog)
6495 return -ENOMEM;
6496
6497 delta += cnt - 1;
6498 env->prog = prog = new_prog;
6499 insn = new_prog->insnsi + i + delta;
6500 continue;
6501 }
6502
79741b3b
AS
6503 if (insn->code != (BPF_JMP | BPF_CALL))
6504 continue;
cc8b0b92
AS
6505 if (insn->src_reg == BPF_PSEUDO_CALL)
6506 continue;
e245c5c6 6507
79741b3b
AS
6508 if (insn->imm == BPF_FUNC_get_route_realm)
6509 prog->dst_needed = 1;
6510 if (insn->imm == BPF_FUNC_get_prandom_u32)
6511 bpf_user_rnd_init_once();
9802d865
JB
6512 if (insn->imm == BPF_FUNC_override_return)
6513 prog->kprobe_override = 1;
79741b3b 6514 if (insn->imm == BPF_FUNC_tail_call) {
7b9f6da1
DM
6515 /* If we tail call into other programs, we
6516 * cannot make any assumptions since they can
6517 * be replaced dynamically during runtime in
6518 * the program array.
6519 */
6520 prog->cb_access = 1;
80a58d02 6521 env->prog->aux->stack_depth = MAX_BPF_STACK;
e647815a 6522 env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
7b9f6da1 6523
79741b3b
AS
6524 /* mark bpf_tail_call as different opcode to avoid
6525 * conditional branch in the interpeter for every normal
6526 * call and to prevent accidental JITing by JIT compiler
6527 * that doesn't support bpf_tail_call yet
e245c5c6 6528 */
79741b3b 6529 insn->imm = 0;
71189fa9 6530 insn->code = BPF_JMP | BPF_TAIL_CALL;
b2157399 6531
c93552c4
DB
6532 aux = &env->insn_aux_data[i + delta];
6533 if (!bpf_map_ptr_unpriv(aux))
6534 continue;
6535
b2157399
AS
6536 /* instead of changing every JIT dealing with tail_call
6537 * emit two extra insns:
6538 * if (index >= max_entries) goto out;
6539 * index &= array->index_mask;
6540 * to avoid out-of-bounds cpu speculation
6541 */
c93552c4 6542 if (bpf_map_ptr_poisoned(aux)) {
40950343 6543 verbose(env, "tail_call abusing map_ptr\n");
b2157399
AS
6544 return -EINVAL;
6545 }
c93552c4
DB
6546
6547 map_ptr = BPF_MAP_PTR(aux->map_state);
b2157399
AS
6548 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
6549 map_ptr->max_entries, 2);
6550 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
6551 container_of(map_ptr,
6552 struct bpf_array,
6553 map)->index_mask);
6554 insn_buf[2] = *insn;
6555 cnt = 3;
6556 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
6557 if (!new_prog)
6558 return -ENOMEM;
6559
6560 delta += cnt - 1;
6561 env->prog = prog = new_prog;
6562 insn = new_prog->insnsi + i + delta;
79741b3b
AS
6563 continue;
6564 }
e245c5c6 6565
89c63074 6566 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
09772d92
DB
6567 * and other inlining handlers are currently limited to 64 bit
6568 * only.
89c63074 6569 */
60b58afc 6570 if (prog->jit_requested && BITS_PER_LONG == 64 &&
09772d92
DB
6571 (insn->imm == BPF_FUNC_map_lookup_elem ||
6572 insn->imm == BPF_FUNC_map_update_elem ||
84430d42
DB
6573 insn->imm == BPF_FUNC_map_delete_elem ||
6574 insn->imm == BPF_FUNC_map_push_elem ||
6575 insn->imm == BPF_FUNC_map_pop_elem ||
6576 insn->imm == BPF_FUNC_map_peek_elem)) {
c93552c4
DB
6577 aux = &env->insn_aux_data[i + delta];
6578 if (bpf_map_ptr_poisoned(aux))
6579 goto patch_call_imm;
6580
6581 map_ptr = BPF_MAP_PTR(aux->map_state);
09772d92
DB
6582 ops = map_ptr->ops;
6583 if (insn->imm == BPF_FUNC_map_lookup_elem &&
6584 ops->map_gen_lookup) {
6585 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
6586 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
6587 verbose(env, "bpf verifier is misconfigured\n");
6588 return -EINVAL;
6589 }
81ed18ab 6590
09772d92
DB
6591 new_prog = bpf_patch_insn_data(env, i + delta,
6592 insn_buf, cnt);
6593 if (!new_prog)
6594 return -ENOMEM;
81ed18ab 6595
09772d92
DB
6596 delta += cnt - 1;
6597 env->prog = prog = new_prog;
6598 insn = new_prog->insnsi + i + delta;
6599 continue;
6600 }
81ed18ab 6601
09772d92
DB
6602 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
6603 (void *(*)(struct bpf_map *map, void *key))NULL));
6604 BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
6605 (int (*)(struct bpf_map *map, void *key))NULL));
6606 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
6607 (int (*)(struct bpf_map *map, void *key, void *value,
6608 u64 flags))NULL));
84430d42
DB
6609 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
6610 (int (*)(struct bpf_map *map, void *value,
6611 u64 flags))NULL));
6612 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
6613 (int (*)(struct bpf_map *map, void *value))NULL));
6614 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
6615 (int (*)(struct bpf_map *map, void *value))NULL));
6616
09772d92
DB
6617 switch (insn->imm) {
6618 case BPF_FUNC_map_lookup_elem:
6619 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
6620 __bpf_call_base;
6621 continue;
6622 case BPF_FUNC_map_update_elem:
6623 insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
6624 __bpf_call_base;
6625 continue;
6626 case BPF_FUNC_map_delete_elem:
6627 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
6628 __bpf_call_base;
6629 continue;
84430d42
DB
6630 case BPF_FUNC_map_push_elem:
6631 insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
6632 __bpf_call_base;
6633 continue;
6634 case BPF_FUNC_map_pop_elem:
6635 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
6636 __bpf_call_base;
6637 continue;
6638 case BPF_FUNC_map_peek_elem:
6639 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
6640 __bpf_call_base;
6641 continue;
09772d92 6642 }
81ed18ab 6643
09772d92 6644 goto patch_call_imm;
81ed18ab
AS
6645 }
6646
6647patch_call_imm:
5e43f899 6648 fn = env->ops->get_func_proto(insn->imm, env->prog);
79741b3b
AS
6649 /* all functions that have prototype and verifier allowed
6650 * programs to call them, must be real in-kernel functions
6651 */
6652 if (!fn->func) {
61bd5218
JK
6653 verbose(env,
6654 "kernel subsystem misconfigured func %s#%d\n",
79741b3b
AS
6655 func_id_name(insn->imm), insn->imm);
6656 return -EFAULT;
e245c5c6 6657 }
79741b3b 6658 insn->imm = fn->func - __bpf_call_base;
e245c5c6 6659 }
e245c5c6 6660
79741b3b
AS
6661 return 0;
6662}
e245c5c6 6663
58e2af8b 6664static void free_states(struct bpf_verifier_env *env)
f1bca824 6665{
58e2af8b 6666 struct bpf_verifier_state_list *sl, *sln;
f1bca824
AS
6667 int i;
6668
6669 if (!env->explored_states)
6670 return;
6671
6672 for (i = 0; i < env->prog->len; i++) {
6673 sl = env->explored_states[i];
6674
6675 if (sl)
6676 while (sl != STATE_LIST_MARK) {
6677 sln = sl->next;
1969db47 6678 free_verifier_state(&sl->state, false);
f1bca824
AS
6679 kfree(sl);
6680 sl = sln;
6681 }
6682 }
6683
6684 kfree(env->explored_states);
6685}
6686
838e9690
YS
6687int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
6688 union bpf_attr __user *uattr)
51580e79 6689{
58e2af8b 6690 struct bpf_verifier_env *env;
b9193c1b 6691 struct bpf_verifier_log *log;
51580e79
AS
6692 int ret = -EINVAL;
6693
eba0c929
AB
6694 /* no program is valid */
6695 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
6696 return -EINVAL;
6697
58e2af8b 6698 /* 'struct bpf_verifier_env' can be global, but since it's not small,
cbd35700
AS
6699 * allocate/free it every time bpf_check() is called
6700 */
58e2af8b 6701 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
cbd35700
AS
6702 if (!env)
6703 return -ENOMEM;
61bd5218 6704 log = &env->log;
cbd35700 6705
fad953ce
KC
6706 env->insn_aux_data =
6707 vzalloc(array_size(sizeof(struct bpf_insn_aux_data),
6708 (*prog)->len));
3df126f3
JK
6709 ret = -ENOMEM;
6710 if (!env->insn_aux_data)
6711 goto err_free_env;
9bac3d6d 6712 env->prog = *prog;
00176a34 6713 env->ops = bpf_verifier_ops[env->prog->type];
0246e64d 6714
cbd35700
AS
6715 /* grab the mutex to protect few globals used by verifier */
6716 mutex_lock(&bpf_verifier_lock);
6717
6718 if (attr->log_level || attr->log_buf || attr->log_size) {
6719 /* user requested verbose verifier output
6720 * and supplied buffer to store the verification trace
6721 */
e7bf8249
JK
6722 log->level = attr->log_level;
6723 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
6724 log->len_total = attr->log_size;
cbd35700
AS
6725
6726 ret = -EINVAL;
e7bf8249
JK
6727 /* log attributes have to be sane */
6728 if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
6729 !log->level || !log->ubuf)
3df126f3 6730 goto err_unlock;
cbd35700 6731 }
1ad2f583
DB
6732
6733 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
6734 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
e07b98d9 6735 env->strict_alignment = true;
e9ee9efc
DM
6736 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
6737 env->strict_alignment = false;
cbd35700 6738
f4e3ec0d
JK
6739 ret = replace_map_fd_with_map_ptr(env);
6740 if (ret < 0)
6741 goto skip_full_check;
6742
cae1927c 6743 if (bpf_prog_is_dev_bound(env->prog->aux)) {
a40a2632 6744 ret = bpf_prog_offload_verifier_prep(env->prog);
ab3f0063 6745 if (ret)
f4e3ec0d 6746 goto skip_full_check;
ab3f0063
JK
6747 }
6748
9bac3d6d 6749 env->explored_states = kcalloc(env->prog->len,
58e2af8b 6750 sizeof(struct bpf_verifier_state_list *),
f1bca824
AS
6751 GFP_USER);
6752 ret = -ENOMEM;
6753 if (!env->explored_states)
6754 goto skip_full_check;
6755
cc8b0b92
AS
6756 env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
6757
475fb78f
AS
6758 ret = check_cfg(env);
6759 if (ret < 0)
6760 goto skip_full_check;
6761
c454a46b 6762 ret = check_btf_info(env, attr, uattr);
838e9690
YS
6763 if (ret < 0)
6764 goto skip_full_check;
6765
17a52670 6766 ret = do_check(env);
8c01c4f8
CG
6767 if (env->cur_state) {
6768 free_verifier_state(env->cur_state, true);
6769 env->cur_state = NULL;
6770 }
cbd35700 6771
c941ce9c
QM
6772 if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
6773 ret = bpf_prog_offload_finalize(env);
6774
0246e64d 6775skip_full_check:
638f5b90 6776 while (!pop_stack(env, NULL, NULL));
f1bca824 6777 free_states(env);
0246e64d 6778
c131187d
AS
6779 if (ret == 0)
6780 sanitize_dead_code(env);
6781
70a87ffe
AS
6782 if (ret == 0)
6783 ret = check_max_stack_depth(env);
6784
9bac3d6d
AS
6785 if (ret == 0)
6786 /* program is valid, convert *(u32*)(ctx + off) accesses */
6787 ret = convert_ctx_accesses(env);
6788
e245c5c6 6789 if (ret == 0)
79741b3b 6790 ret = fixup_bpf_calls(env);
e245c5c6 6791
1ea47e01
AS
6792 if (ret == 0)
6793 ret = fixup_call_args(env);
6794
a2a7d570 6795 if (log->level && bpf_verifier_log_full(log))
cbd35700 6796 ret = -ENOSPC;
a2a7d570 6797 if (log->level && !log->ubuf) {
cbd35700 6798 ret = -EFAULT;
a2a7d570 6799 goto err_release_maps;
cbd35700
AS
6800 }
6801
0246e64d
AS
6802 if (ret == 0 && env->used_map_cnt) {
6803 /* if program passed verifier, update used_maps in bpf_prog_info */
9bac3d6d
AS
6804 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
6805 sizeof(env->used_maps[0]),
6806 GFP_KERNEL);
0246e64d 6807
9bac3d6d 6808 if (!env->prog->aux->used_maps) {
0246e64d 6809 ret = -ENOMEM;
a2a7d570 6810 goto err_release_maps;
0246e64d
AS
6811 }
6812
9bac3d6d 6813 memcpy(env->prog->aux->used_maps, env->used_maps,
0246e64d 6814 sizeof(env->used_maps[0]) * env->used_map_cnt);
9bac3d6d 6815 env->prog->aux->used_map_cnt = env->used_map_cnt;
0246e64d
AS
6816
6817 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
6818 * bpf_ld_imm64 instructions
6819 */
6820 convert_pseudo_ld_imm64(env);
6821 }
cbd35700 6822
ba64e7d8
YS
6823 if (ret == 0)
6824 adjust_btf_func(env);
6825
a2a7d570 6826err_release_maps:
9bac3d6d 6827 if (!env->prog->aux->used_maps)
0246e64d 6828 /* if we didn't copy map pointers into bpf_prog_info, release
ab7f5bf0 6829 * them now. Otherwise free_used_maps() will release them.
0246e64d
AS
6830 */
6831 release_maps(env);
9bac3d6d 6832 *prog = env->prog;
3df126f3 6833err_unlock:
cbd35700 6834 mutex_unlock(&bpf_verifier_lock);
3df126f3
JK
6835 vfree(env->insn_aux_data);
6836err_free_env:
6837 kfree(env);
51580e79
AS
6838 return ret;
6839}