media: bpf: add bpf function to report mouse movement
[linux-block.git] / kernel / bpf / verifier.c
CommitLineData
51580e79 1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
969bf05e 2 * Copyright (c) 2016 Facebook
fd978bf7 3 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
51580e79
AS
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
838e9690 14#include <uapi/linux/btf.h>
51580e79
AS
15#include <linux/kernel.h>
16#include <linux/types.h>
17#include <linux/slab.h>
18#include <linux/bpf.h>
838e9690 19#include <linux/btf.h>
58e2af8b 20#include <linux/bpf_verifier.h>
51580e79
AS
21#include <linux/filter.h>
22#include <net/netlink.h>
23#include <linux/file.h>
24#include <linux/vmalloc.h>
ebb676da 25#include <linux/stringify.h>
cc8b0b92
AS
26#include <linux/bsearch.h>
27#include <linux/sort.h>
c195651e 28#include <linux/perf_event.h>
51580e79 29
f4ac7e0b
JK
30#include "disasm.h"
31
00176a34
JK
32static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
33#define BPF_PROG_TYPE(_id, _name) \
34 [_id] = & _name ## _verifier_ops,
35#define BPF_MAP_TYPE(_id, _ops)
36#include <linux/bpf_types.h>
37#undef BPF_PROG_TYPE
38#undef BPF_MAP_TYPE
39};
40
51580e79
AS
41/* bpf_check() is a static code analyzer that walks eBPF program
42 * instruction by instruction and updates register/stack state.
43 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
44 *
45 * The first pass is depth-first-search to check that the program is a DAG.
46 * It rejects the following programs:
47 * - larger than BPF_MAXINSNS insns
48 * - if loop is present (detected via back-edge)
49 * - unreachable insns exist (shouldn't be a forest. program = one function)
50 * - out of bounds or malformed jumps
51 * The second pass is all possible path descent from the 1st insn.
52 * Since it's analyzing all pathes through the program, the length of the
eba38a96 53 * analysis is limited to 64k insn, which may be hit even if total number of
51580e79
AS
54 * insn is less then 4K, but there are too many branches that change stack/regs.
55 * Number of 'branches to be analyzed' is limited to 1k
56 *
57 * On entry to each instruction, each register has a type, and the instruction
58 * changes the types of the registers depending on instruction semantics.
59 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
60 * copied to R1.
61 *
62 * All registers are 64-bit.
63 * R0 - return register
64 * R1-R5 argument passing registers
65 * R6-R9 callee saved registers
66 * R10 - frame pointer read-only
67 *
68 * At the start of BPF program the register R1 contains a pointer to bpf_context
69 * and has type PTR_TO_CTX.
70 *
71 * Verifier tracks arithmetic operations on pointers in case:
72 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
73 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
74 * 1st insn copies R10 (which has FRAME_PTR) type into R1
75 * and 2nd arithmetic instruction is pattern matched to recognize
76 * that it wants to construct a pointer to some element within stack.
77 * So after 2nd insn, the register R1 has type PTR_TO_STACK
78 * (and -20 constant is saved for further stack bounds checking).
79 * Meaning that this reg is a pointer to stack plus known immediate constant.
80 *
f1174f77 81 * Most of the time the registers have SCALAR_VALUE type, which
51580e79 82 * means the register has some value, but it's not a valid pointer.
f1174f77 83 * (like pointer plus pointer becomes SCALAR_VALUE type)
51580e79
AS
84 *
85 * When verifier sees load or store instructions the type of base register
c64b7983
JS
86 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
87 * four pointer types recognized by check_mem_access() function.
51580e79
AS
88 *
89 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
90 * and the range of [ptr, ptr + map's value_size) is accessible.
91 *
92 * registers used to pass values to function calls are checked against
93 * function argument constraints.
94 *
95 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
96 * It means that the register type passed to this function must be
97 * PTR_TO_STACK and it will be used inside the function as
98 * 'pointer to map element key'
99 *
100 * For example the argument constraints for bpf_map_lookup_elem():
101 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
102 * .arg1_type = ARG_CONST_MAP_PTR,
103 * .arg2_type = ARG_PTR_TO_MAP_KEY,
104 *
105 * ret_type says that this function returns 'pointer to map elem value or null'
106 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
107 * 2nd argument should be a pointer to stack, which will be used inside
108 * the helper function as a pointer to map element key.
109 *
110 * On the kernel side the helper function looks like:
111 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
112 * {
113 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
114 * void *key = (void *) (unsigned long) r2;
115 * void *value;
116 *
117 * here kernel can access 'key' and 'map' pointers safely, knowing that
118 * [key, key + map->key_size) bytes are valid and were initialized on
119 * the stack of eBPF program.
120 * }
121 *
122 * Corresponding eBPF program may look like:
123 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
124 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
125 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
126 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
127 * here verifier looks at prototype of map_lookup_elem() and sees:
128 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
129 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
130 *
131 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
132 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
133 * and were initialized prior to this call.
134 * If it's ok, then verifier allows this BPF_CALL insn and looks at
135 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
136 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
137 * returns ether pointer to map value or NULL.
138 *
139 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
140 * insn, the register holding that pointer in the true branch changes state to
141 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
142 * branch. See check_cond_jmp_op().
143 *
144 * After the call R0 is set to return type of the function and registers R1-R5
145 * are set to NOT_INIT to indicate that they are no longer readable.
fd978bf7
JS
146 *
147 * The following reference types represent a potential reference to a kernel
148 * resource which, after first being allocated, must be checked and freed by
149 * the BPF program:
150 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
151 *
152 * When the verifier sees a helper call return a reference type, it allocates a
153 * pointer id for the reference and stores it in the current function state.
154 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
155 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
156 * passes through a NULL-check conditional. For the branch wherein the state is
157 * changed to CONST_IMM, the verifier releases the reference.
6acc9b43
JS
158 *
159 * For each helper function that allocates a reference, such as
160 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
161 * bpf_sk_release(). When a reference type passes into the release function,
162 * the verifier also releases the reference. If any unchecked or unreleased
163 * reference remains at the end of the program, the verifier rejects it.
51580e79
AS
164 */
165
17a52670 166/* verifier_state + insn_idx are pushed to stack when branch is encountered */
58e2af8b 167struct bpf_verifier_stack_elem {
17a52670
AS
168 /* verifer state is 'st'
169 * before processing instruction 'insn_idx'
170 * and after processing instruction 'prev_insn_idx'
171 */
58e2af8b 172 struct bpf_verifier_state st;
17a52670
AS
173 int insn_idx;
174 int prev_insn_idx;
58e2af8b 175 struct bpf_verifier_stack_elem *next;
cbd35700
AS
176};
177
8e17c1b1 178#define BPF_COMPLEXITY_LIMIT_INSNS 131072
07016151
DB
179#define BPF_COMPLEXITY_LIMIT_STACK 1024
180
c93552c4
DB
181#define BPF_MAP_PTR_UNPRIV 1UL
182#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
183 POISON_POINTER_DELTA))
184#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
185
186static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
187{
188 return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
189}
190
191static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
192{
193 return aux->map_state & BPF_MAP_PTR_UNPRIV;
194}
195
196static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
197 const struct bpf_map *map, bool unpriv)
198{
199 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
200 unpriv |= bpf_map_ptr_unpriv(aux);
201 aux->map_state = (unsigned long)map |
202 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
203}
fad73a1a 204
33ff9823
DB
205struct bpf_call_arg_meta {
206 struct bpf_map *map_ptr;
435faee1 207 bool raw_mode;
36bbef52 208 bool pkt_access;
435faee1
DB
209 int regno;
210 int access_size;
849fa506
YS
211 s64 msize_smax_value;
212 u64 msize_umax_value;
fd978bf7 213 int ptr_id;
33ff9823
DB
214};
215
cbd35700
AS
216static DEFINE_MUTEX(bpf_verifier_lock);
217
77d2e05a
MKL
218void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
219 va_list args)
cbd35700 220{
a2a7d570 221 unsigned int n;
cbd35700 222
a2a7d570 223 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
a2a7d570
JK
224
225 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
226 "verifier log line truncated - local buffer too short\n");
227
228 n = min(log->len_total - log->len_used - 1, n);
229 log->kbuf[n] = '\0';
230
231 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
232 log->len_used += n;
233 else
234 log->ubuf = NULL;
cbd35700 235}
abe08840
JO
236
237/* log_level controls verbosity level of eBPF verifier.
238 * bpf_verifier_log_write() is used to dump the verification trace to the log,
239 * so the user can figure out what's wrong with the program
430e68d1 240 */
abe08840
JO
241__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
242 const char *fmt, ...)
243{
244 va_list args;
245
77d2e05a
MKL
246 if (!bpf_verifier_log_needed(&env->log))
247 return;
248
abe08840 249 va_start(args, fmt);
77d2e05a 250 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
251 va_end(args);
252}
253EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
254
255__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
256{
77d2e05a 257 struct bpf_verifier_env *env = private_data;
abe08840
JO
258 va_list args;
259
77d2e05a
MKL
260 if (!bpf_verifier_log_needed(&env->log))
261 return;
262
abe08840 263 va_start(args, fmt);
77d2e05a 264 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
265 va_end(args);
266}
cbd35700 267
de8f3a83
DB
268static bool type_is_pkt_pointer(enum bpf_reg_type type)
269{
270 return type == PTR_TO_PACKET ||
271 type == PTR_TO_PACKET_META;
272}
273
840b9615
JS
274static bool reg_type_may_be_null(enum bpf_reg_type type)
275{
fd978bf7
JS
276 return type == PTR_TO_MAP_VALUE_OR_NULL ||
277 type == PTR_TO_SOCKET_OR_NULL;
278}
279
280static bool type_is_refcounted(enum bpf_reg_type type)
281{
282 return type == PTR_TO_SOCKET;
283}
284
285static bool type_is_refcounted_or_null(enum bpf_reg_type type)
286{
287 return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
288}
289
290static bool reg_is_refcounted(const struct bpf_reg_state *reg)
291{
292 return type_is_refcounted(reg->type);
293}
294
295static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
296{
297 return type_is_refcounted_or_null(reg->type);
298}
299
300static bool arg_type_is_refcounted(enum bpf_arg_type type)
301{
302 return type == ARG_PTR_TO_SOCKET;
303}
304
305/* Determine whether the function releases some resources allocated by another
306 * function call. The first reference type argument will be assumed to be
307 * released by release_reference().
308 */
309static bool is_release_function(enum bpf_func_id func_id)
310{
6acc9b43 311 return func_id == BPF_FUNC_sk_release;
840b9615
JS
312}
313
17a52670
AS
314/* string representation of 'enum bpf_reg_type' */
315static const char * const reg_type_str[] = {
316 [NOT_INIT] = "?",
f1174f77 317 [SCALAR_VALUE] = "inv",
17a52670
AS
318 [PTR_TO_CTX] = "ctx",
319 [CONST_PTR_TO_MAP] = "map_ptr",
320 [PTR_TO_MAP_VALUE] = "map_value",
321 [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
17a52670 322 [PTR_TO_STACK] = "fp",
969bf05e 323 [PTR_TO_PACKET] = "pkt",
de8f3a83 324 [PTR_TO_PACKET_META] = "pkt_meta",
969bf05e 325 [PTR_TO_PACKET_END] = "pkt_end",
d58e468b 326 [PTR_TO_FLOW_KEYS] = "flow_keys",
c64b7983
JS
327 [PTR_TO_SOCKET] = "sock",
328 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
17a52670
AS
329};
330
8efea21d
EC
331static char slot_type_char[] = {
332 [STACK_INVALID] = '?',
333 [STACK_SPILL] = 'r',
334 [STACK_MISC] = 'm',
335 [STACK_ZERO] = '0',
336};
337
4e92024a
AS
338static void print_liveness(struct bpf_verifier_env *env,
339 enum bpf_reg_liveness live)
340{
341 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN))
342 verbose(env, "_");
343 if (live & REG_LIVE_READ)
344 verbose(env, "r");
345 if (live & REG_LIVE_WRITTEN)
346 verbose(env, "w");
347}
348
f4d7e40a
AS
349static struct bpf_func_state *func(struct bpf_verifier_env *env,
350 const struct bpf_reg_state *reg)
351{
352 struct bpf_verifier_state *cur = env->cur_state;
353
354 return cur->frame[reg->frameno];
355}
356
61bd5218 357static void print_verifier_state(struct bpf_verifier_env *env,
f4d7e40a 358 const struct bpf_func_state *state)
17a52670 359{
f4d7e40a 360 const struct bpf_reg_state *reg;
17a52670
AS
361 enum bpf_reg_type t;
362 int i;
363
f4d7e40a
AS
364 if (state->frameno)
365 verbose(env, " frame%d:", state->frameno);
17a52670 366 for (i = 0; i < MAX_BPF_REG; i++) {
1a0dc1ac
AS
367 reg = &state->regs[i];
368 t = reg->type;
17a52670
AS
369 if (t == NOT_INIT)
370 continue;
4e92024a
AS
371 verbose(env, " R%d", i);
372 print_liveness(env, reg->live);
373 verbose(env, "=%s", reg_type_str[t]);
f1174f77
EC
374 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
375 tnum_is_const(reg->var_off)) {
376 /* reg->off should be 0 for SCALAR_VALUE */
61bd5218 377 verbose(env, "%lld", reg->var_off.value + reg->off);
f4d7e40a
AS
378 if (t == PTR_TO_STACK)
379 verbose(env, ",call_%d", func(env, reg)->callsite);
f1174f77 380 } else {
61bd5218 381 verbose(env, "(id=%d", reg->id);
f1174f77 382 if (t != SCALAR_VALUE)
61bd5218 383 verbose(env, ",off=%d", reg->off);
de8f3a83 384 if (type_is_pkt_pointer(t))
61bd5218 385 verbose(env, ",r=%d", reg->range);
f1174f77
EC
386 else if (t == CONST_PTR_TO_MAP ||
387 t == PTR_TO_MAP_VALUE ||
388 t == PTR_TO_MAP_VALUE_OR_NULL)
61bd5218 389 verbose(env, ",ks=%d,vs=%d",
f1174f77
EC
390 reg->map_ptr->key_size,
391 reg->map_ptr->value_size);
7d1238f2
EC
392 if (tnum_is_const(reg->var_off)) {
393 /* Typically an immediate SCALAR_VALUE, but
394 * could be a pointer whose offset is too big
395 * for reg->off
396 */
61bd5218 397 verbose(env, ",imm=%llx", reg->var_off.value);
7d1238f2
EC
398 } else {
399 if (reg->smin_value != reg->umin_value &&
400 reg->smin_value != S64_MIN)
61bd5218 401 verbose(env, ",smin_value=%lld",
7d1238f2
EC
402 (long long)reg->smin_value);
403 if (reg->smax_value != reg->umax_value &&
404 reg->smax_value != S64_MAX)
61bd5218 405 verbose(env, ",smax_value=%lld",
7d1238f2
EC
406 (long long)reg->smax_value);
407 if (reg->umin_value != 0)
61bd5218 408 verbose(env, ",umin_value=%llu",
7d1238f2
EC
409 (unsigned long long)reg->umin_value);
410 if (reg->umax_value != U64_MAX)
61bd5218 411 verbose(env, ",umax_value=%llu",
7d1238f2
EC
412 (unsigned long long)reg->umax_value);
413 if (!tnum_is_unknown(reg->var_off)) {
414 char tn_buf[48];
f1174f77 415
7d1238f2 416 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 417 verbose(env, ",var_off=%s", tn_buf);
7d1238f2 418 }
f1174f77 419 }
61bd5218 420 verbose(env, ")");
f1174f77 421 }
17a52670 422 }
638f5b90 423 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
8efea21d
EC
424 char types_buf[BPF_REG_SIZE + 1];
425 bool valid = false;
426 int j;
427
428 for (j = 0; j < BPF_REG_SIZE; j++) {
429 if (state->stack[i].slot_type[j] != STACK_INVALID)
430 valid = true;
431 types_buf[j] = slot_type_char[
432 state->stack[i].slot_type[j]];
433 }
434 types_buf[BPF_REG_SIZE] = 0;
435 if (!valid)
436 continue;
437 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
438 print_liveness(env, state->stack[i].spilled_ptr.live);
439 if (state->stack[i].slot_type[0] == STACK_SPILL)
4e92024a 440 verbose(env, "=%s",
638f5b90 441 reg_type_str[state->stack[i].spilled_ptr.type]);
8efea21d
EC
442 else
443 verbose(env, "=%s", types_buf);
17a52670 444 }
fd978bf7
JS
445 if (state->acquired_refs && state->refs[0].id) {
446 verbose(env, " refs=%d", state->refs[0].id);
447 for (i = 1; i < state->acquired_refs; i++)
448 if (state->refs[i].id)
449 verbose(env, ",%d", state->refs[i].id);
450 }
61bd5218 451 verbose(env, "\n");
17a52670
AS
452}
453
84dbf350
JS
454#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \
455static int copy_##NAME##_state(struct bpf_func_state *dst, \
456 const struct bpf_func_state *src) \
457{ \
458 if (!src->FIELD) \
459 return 0; \
460 if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \
461 /* internal bug, make state invalid to reject the program */ \
462 memset(dst, 0, sizeof(*dst)); \
463 return -EFAULT; \
464 } \
465 memcpy(dst->FIELD, src->FIELD, \
466 sizeof(*src->FIELD) * (src->COUNT / SIZE)); \
467 return 0; \
638f5b90 468}
fd978bf7
JS
469/* copy_reference_state() */
470COPY_STATE_FN(reference, acquired_refs, refs, 1)
84dbf350
JS
471/* copy_stack_state() */
472COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
473#undef COPY_STATE_FN
474
475#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \
476static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
477 bool copy_old) \
478{ \
479 u32 old_size = state->COUNT; \
480 struct bpf_##NAME##_state *new_##FIELD; \
481 int slot = size / SIZE; \
482 \
483 if (size <= old_size || !size) { \
484 if (copy_old) \
485 return 0; \
486 state->COUNT = slot * SIZE; \
487 if (!size && old_size) { \
488 kfree(state->FIELD); \
489 state->FIELD = NULL; \
490 } \
491 return 0; \
492 } \
493 new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
494 GFP_KERNEL); \
495 if (!new_##FIELD) \
496 return -ENOMEM; \
497 if (copy_old) { \
498 if (state->FIELD) \
499 memcpy(new_##FIELD, state->FIELD, \
500 sizeof(*new_##FIELD) * (old_size / SIZE)); \
501 memset(new_##FIELD + old_size / SIZE, 0, \
502 sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
503 } \
504 state->COUNT = slot * SIZE; \
505 kfree(state->FIELD); \
506 state->FIELD = new_##FIELD; \
507 return 0; \
508}
fd978bf7
JS
509/* realloc_reference_state() */
510REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
84dbf350
JS
511/* realloc_stack_state() */
512REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
513#undef REALLOC_STATE_FN
638f5b90
AS
514
515/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
516 * make it consume minimal amount of memory. check_stack_write() access from
f4d7e40a 517 * the program calls into realloc_func_state() to grow the stack size.
84dbf350
JS
518 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
519 * which realloc_stack_state() copies over. It points to previous
520 * bpf_verifier_state which is never reallocated.
638f5b90 521 */
fd978bf7
JS
522static int realloc_func_state(struct bpf_func_state *state, int stack_size,
523 int refs_size, bool copy_old)
638f5b90 524{
fd978bf7
JS
525 int err = realloc_reference_state(state, refs_size, copy_old);
526 if (err)
527 return err;
528 return realloc_stack_state(state, stack_size, copy_old);
529}
530
531/* Acquire a pointer id from the env and update the state->refs to include
532 * this new pointer reference.
533 * On success, returns a valid pointer id to associate with the register
534 * On failure, returns a negative errno.
638f5b90 535 */
fd978bf7 536static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
638f5b90 537{
fd978bf7
JS
538 struct bpf_func_state *state = cur_func(env);
539 int new_ofs = state->acquired_refs;
540 int id, err;
541
542 err = realloc_reference_state(state, state->acquired_refs + 1, true);
543 if (err)
544 return err;
545 id = ++env->id_gen;
546 state->refs[new_ofs].id = id;
547 state->refs[new_ofs].insn_idx = insn_idx;
638f5b90 548
fd978bf7
JS
549 return id;
550}
551
552/* release function corresponding to acquire_reference_state(). Idempotent. */
553static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
554{
555 int i, last_idx;
556
557 if (!ptr_id)
558 return -EFAULT;
559
560 last_idx = state->acquired_refs - 1;
561 for (i = 0; i < state->acquired_refs; i++) {
562 if (state->refs[i].id == ptr_id) {
563 if (last_idx && i != last_idx)
564 memcpy(&state->refs[i], &state->refs[last_idx],
565 sizeof(*state->refs));
566 memset(&state->refs[last_idx], 0, sizeof(*state->refs));
567 state->acquired_refs--;
638f5b90 568 return 0;
638f5b90 569 }
638f5b90 570 }
fd978bf7
JS
571 return -EFAULT;
572}
573
574/* variation on the above for cases where we expect that there must be an
575 * outstanding reference for the specified ptr_id.
576 */
577static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
578{
579 struct bpf_func_state *state = cur_func(env);
580 int err;
581
582 err = __release_reference_state(state, ptr_id);
583 if (WARN_ON_ONCE(err != 0))
584 verbose(env, "verifier internal error: can't release reference\n");
585 return err;
586}
587
588static int transfer_reference_state(struct bpf_func_state *dst,
589 struct bpf_func_state *src)
590{
591 int err = realloc_reference_state(dst, src->acquired_refs, false);
592 if (err)
593 return err;
594 err = copy_reference_state(dst, src);
595 if (err)
596 return err;
638f5b90
AS
597 return 0;
598}
599
f4d7e40a
AS
600static void free_func_state(struct bpf_func_state *state)
601{
5896351e
AS
602 if (!state)
603 return;
fd978bf7 604 kfree(state->refs);
f4d7e40a
AS
605 kfree(state->stack);
606 kfree(state);
607}
608
1969db47
AS
609static void free_verifier_state(struct bpf_verifier_state *state,
610 bool free_self)
638f5b90 611{
f4d7e40a
AS
612 int i;
613
614 for (i = 0; i <= state->curframe; i++) {
615 free_func_state(state->frame[i]);
616 state->frame[i] = NULL;
617 }
1969db47
AS
618 if (free_self)
619 kfree(state);
638f5b90
AS
620}
621
622/* copy verifier state from src to dst growing dst stack space
623 * when necessary to accommodate larger src stack
624 */
f4d7e40a
AS
625static int copy_func_state(struct bpf_func_state *dst,
626 const struct bpf_func_state *src)
638f5b90
AS
627{
628 int err;
629
fd978bf7
JS
630 err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
631 false);
632 if (err)
633 return err;
634 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
635 err = copy_reference_state(dst, src);
638f5b90
AS
636 if (err)
637 return err;
638f5b90
AS
638 return copy_stack_state(dst, src);
639}
640
f4d7e40a
AS
641static int copy_verifier_state(struct bpf_verifier_state *dst_state,
642 const struct bpf_verifier_state *src)
643{
644 struct bpf_func_state *dst;
645 int i, err;
646
647 /* if dst has more stack frames then src frame, free them */
648 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
649 free_func_state(dst_state->frame[i]);
650 dst_state->frame[i] = NULL;
651 }
652 dst_state->curframe = src->curframe;
f4d7e40a
AS
653 for (i = 0; i <= src->curframe; i++) {
654 dst = dst_state->frame[i];
655 if (!dst) {
656 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
657 if (!dst)
658 return -ENOMEM;
659 dst_state->frame[i] = dst;
660 }
661 err = copy_func_state(dst, src->frame[i]);
662 if (err)
663 return err;
664 }
665 return 0;
666}
667
638f5b90
AS
668static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
669 int *insn_idx)
670{
671 struct bpf_verifier_state *cur = env->cur_state;
672 struct bpf_verifier_stack_elem *elem, *head = env->head;
673 int err;
17a52670
AS
674
675 if (env->head == NULL)
638f5b90 676 return -ENOENT;
17a52670 677
638f5b90
AS
678 if (cur) {
679 err = copy_verifier_state(cur, &head->st);
680 if (err)
681 return err;
682 }
683 if (insn_idx)
684 *insn_idx = head->insn_idx;
17a52670 685 if (prev_insn_idx)
638f5b90
AS
686 *prev_insn_idx = head->prev_insn_idx;
687 elem = head->next;
1969db47 688 free_verifier_state(&head->st, false);
638f5b90 689 kfree(head);
17a52670
AS
690 env->head = elem;
691 env->stack_size--;
638f5b90 692 return 0;
17a52670
AS
693}
694
58e2af8b
JK
695static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
696 int insn_idx, int prev_insn_idx)
17a52670 697{
638f5b90 698 struct bpf_verifier_state *cur = env->cur_state;
58e2af8b 699 struct bpf_verifier_stack_elem *elem;
638f5b90 700 int err;
17a52670 701
638f5b90 702 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
17a52670
AS
703 if (!elem)
704 goto err;
705
17a52670
AS
706 elem->insn_idx = insn_idx;
707 elem->prev_insn_idx = prev_insn_idx;
708 elem->next = env->head;
709 env->head = elem;
710 env->stack_size++;
1969db47
AS
711 err = copy_verifier_state(&elem->st, cur);
712 if (err)
713 goto err;
07016151 714 if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
61bd5218 715 verbose(env, "BPF program is too complex\n");
17a52670
AS
716 goto err;
717 }
718 return &elem->st;
719err:
5896351e
AS
720 free_verifier_state(env->cur_state, true);
721 env->cur_state = NULL;
17a52670 722 /* pop all elements and return */
638f5b90 723 while (!pop_stack(env, NULL, NULL));
17a52670
AS
724 return NULL;
725}
726
727#define CALLER_SAVED_REGS 6
728static const int caller_saved[CALLER_SAVED_REGS] = {
729 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
730};
731
f1174f77
EC
732static void __mark_reg_not_init(struct bpf_reg_state *reg);
733
b03c9f9f
EC
734/* Mark the unknown part of a register (variable offset or scalar value) as
735 * known to have the value @imm.
736 */
737static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
738{
a9c676bc
AS
739 /* Clear id, off, and union(map_ptr, range) */
740 memset(((u8 *)reg) + sizeof(reg->type), 0,
741 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
b03c9f9f
EC
742 reg->var_off = tnum_const(imm);
743 reg->smin_value = (s64)imm;
744 reg->smax_value = (s64)imm;
745 reg->umin_value = imm;
746 reg->umax_value = imm;
747}
748
f1174f77
EC
749/* Mark the 'variable offset' part of a register as zero. This should be
750 * used only on registers holding a pointer type.
751 */
752static void __mark_reg_known_zero(struct bpf_reg_state *reg)
a9789ef9 753{
b03c9f9f 754 __mark_reg_known(reg, 0);
f1174f77 755}
a9789ef9 756
cc2b14d5
AS
757static void __mark_reg_const_zero(struct bpf_reg_state *reg)
758{
759 __mark_reg_known(reg, 0);
cc2b14d5
AS
760 reg->type = SCALAR_VALUE;
761}
762
61bd5218
JK
763static void mark_reg_known_zero(struct bpf_verifier_env *env,
764 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
765{
766 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 767 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
f1174f77
EC
768 /* Something bad happened, let's kill all regs */
769 for (regno = 0; regno < MAX_BPF_REG; regno++)
770 __mark_reg_not_init(regs + regno);
771 return;
772 }
773 __mark_reg_known_zero(regs + regno);
774}
775
de8f3a83
DB
776static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
777{
778 return type_is_pkt_pointer(reg->type);
779}
780
781static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
782{
783 return reg_is_pkt_pointer(reg) ||
784 reg->type == PTR_TO_PACKET_END;
785}
786
787/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
788static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
789 enum bpf_reg_type which)
790{
791 /* The register can already have a range from prior markings.
792 * This is fine as long as it hasn't been advanced from its
793 * origin.
794 */
795 return reg->type == which &&
796 reg->id == 0 &&
797 reg->off == 0 &&
798 tnum_equals_const(reg->var_off, 0);
799}
800
b03c9f9f
EC
801/* Attempts to improve min/max values based on var_off information */
802static void __update_reg_bounds(struct bpf_reg_state *reg)
803{
804 /* min signed is max(sign bit) | min(other bits) */
805 reg->smin_value = max_t(s64, reg->smin_value,
806 reg->var_off.value | (reg->var_off.mask & S64_MIN));
807 /* max signed is min(sign bit) | max(other bits) */
808 reg->smax_value = min_t(s64, reg->smax_value,
809 reg->var_off.value | (reg->var_off.mask & S64_MAX));
810 reg->umin_value = max(reg->umin_value, reg->var_off.value);
811 reg->umax_value = min(reg->umax_value,
812 reg->var_off.value | reg->var_off.mask);
813}
814
815/* Uses signed min/max values to inform unsigned, and vice-versa */
816static void __reg_deduce_bounds(struct bpf_reg_state *reg)
817{
818 /* Learn sign from signed bounds.
819 * If we cannot cross the sign boundary, then signed and unsigned bounds
820 * are the same, so combine. This works even in the negative case, e.g.
821 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
822 */
823 if (reg->smin_value >= 0 || reg->smax_value < 0) {
824 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
825 reg->umin_value);
826 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
827 reg->umax_value);
828 return;
829 }
830 /* Learn sign from unsigned bounds. Signed bounds cross the sign
831 * boundary, so we must be careful.
832 */
833 if ((s64)reg->umax_value >= 0) {
834 /* Positive. We can't learn anything from the smin, but smax
835 * is positive, hence safe.
836 */
837 reg->smin_value = reg->umin_value;
838 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
839 reg->umax_value);
840 } else if ((s64)reg->umin_value < 0) {
841 /* Negative. We can't learn anything from the smax, but smin
842 * is negative, hence safe.
843 */
844 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
845 reg->umin_value);
846 reg->smax_value = reg->umax_value;
847 }
848}
849
850/* Attempts to improve var_off based on unsigned min/max information */
851static void __reg_bound_offset(struct bpf_reg_state *reg)
852{
853 reg->var_off = tnum_intersect(reg->var_off,
854 tnum_range(reg->umin_value,
855 reg->umax_value));
856}
857
858/* Reset the min/max bounds of a register */
859static void __mark_reg_unbounded(struct bpf_reg_state *reg)
860{
861 reg->smin_value = S64_MIN;
862 reg->smax_value = S64_MAX;
863 reg->umin_value = 0;
864 reg->umax_value = U64_MAX;
865}
866
f1174f77
EC
867/* Mark a register as having a completely unknown (scalar) value. */
868static void __mark_reg_unknown(struct bpf_reg_state *reg)
869{
a9c676bc
AS
870 /*
871 * Clear type, id, off, and union(map_ptr, range) and
872 * padding between 'type' and union
873 */
874 memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
f1174f77 875 reg->type = SCALAR_VALUE;
f1174f77 876 reg->var_off = tnum_unknown;
f4d7e40a 877 reg->frameno = 0;
b03c9f9f 878 __mark_reg_unbounded(reg);
f1174f77
EC
879}
880
61bd5218
JK
881static void mark_reg_unknown(struct bpf_verifier_env *env,
882 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
883{
884 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 885 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
19ceb417
AS
886 /* Something bad happened, let's kill all regs except FP */
887 for (regno = 0; regno < BPF_REG_FP; regno++)
f1174f77
EC
888 __mark_reg_not_init(regs + regno);
889 return;
890 }
891 __mark_reg_unknown(regs + regno);
892}
893
894static void __mark_reg_not_init(struct bpf_reg_state *reg)
895{
896 __mark_reg_unknown(reg);
897 reg->type = NOT_INIT;
898}
899
61bd5218
JK
900static void mark_reg_not_init(struct bpf_verifier_env *env,
901 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
902{
903 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 904 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
19ceb417
AS
905 /* Something bad happened, let's kill all regs except FP */
906 for (regno = 0; regno < BPF_REG_FP; regno++)
f1174f77
EC
907 __mark_reg_not_init(regs + regno);
908 return;
909 }
910 __mark_reg_not_init(regs + regno);
a9789ef9
DB
911}
912
61bd5218 913static void init_reg_state(struct bpf_verifier_env *env,
f4d7e40a 914 struct bpf_func_state *state)
17a52670 915{
f4d7e40a 916 struct bpf_reg_state *regs = state->regs;
17a52670
AS
917 int i;
918
dc503a8a 919 for (i = 0; i < MAX_BPF_REG; i++) {
61bd5218 920 mark_reg_not_init(env, regs, i);
dc503a8a 921 regs[i].live = REG_LIVE_NONE;
679c782d 922 regs[i].parent = NULL;
dc503a8a 923 }
17a52670
AS
924
925 /* frame pointer */
f1174f77 926 regs[BPF_REG_FP].type = PTR_TO_STACK;
61bd5218 927 mark_reg_known_zero(env, regs, BPF_REG_FP);
f4d7e40a 928 regs[BPF_REG_FP].frameno = state->frameno;
17a52670
AS
929
930 /* 1st arg to a function */
931 regs[BPF_REG_1].type = PTR_TO_CTX;
61bd5218 932 mark_reg_known_zero(env, regs, BPF_REG_1);
6760bf2d
DB
933}
934
f4d7e40a
AS
935#define BPF_MAIN_FUNC (-1)
936static void init_func_state(struct bpf_verifier_env *env,
937 struct bpf_func_state *state,
938 int callsite, int frameno, int subprogno)
939{
940 state->callsite = callsite;
941 state->frameno = frameno;
942 state->subprogno = subprogno;
943 init_reg_state(env, state);
944}
945
17a52670
AS
946enum reg_arg_type {
947 SRC_OP, /* register is used as source operand */
948 DST_OP, /* register is used as destination operand */
949 DST_OP_NO_MARK /* same as above, check only, don't mark */
950};
951
cc8b0b92
AS
952static int cmp_subprogs(const void *a, const void *b)
953{
9c8105bd
JW
954 return ((struct bpf_subprog_info *)a)->start -
955 ((struct bpf_subprog_info *)b)->start;
cc8b0b92
AS
956}
957
958static int find_subprog(struct bpf_verifier_env *env, int off)
959{
9c8105bd 960 struct bpf_subprog_info *p;
cc8b0b92 961
9c8105bd
JW
962 p = bsearch(&off, env->subprog_info, env->subprog_cnt,
963 sizeof(env->subprog_info[0]), cmp_subprogs);
cc8b0b92
AS
964 if (!p)
965 return -ENOENT;
9c8105bd 966 return p - env->subprog_info;
cc8b0b92
AS
967
968}
969
970static int add_subprog(struct bpf_verifier_env *env, int off)
971{
972 int insn_cnt = env->prog->len;
973 int ret;
974
975 if (off >= insn_cnt || off < 0) {
976 verbose(env, "call to invalid destination\n");
977 return -EINVAL;
978 }
979 ret = find_subprog(env, off);
980 if (ret >= 0)
981 return 0;
4cb3d99c 982 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
cc8b0b92
AS
983 verbose(env, "too many subprograms\n");
984 return -E2BIG;
985 }
9c8105bd
JW
986 env->subprog_info[env->subprog_cnt++].start = off;
987 sort(env->subprog_info, env->subprog_cnt,
988 sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
cc8b0b92
AS
989 return 0;
990}
991
992static int check_subprogs(struct bpf_verifier_env *env)
993{
994 int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
9c8105bd 995 struct bpf_subprog_info *subprog = env->subprog_info;
cc8b0b92
AS
996 struct bpf_insn *insn = env->prog->insnsi;
997 int insn_cnt = env->prog->len;
998
f910cefa
JW
999 /* Add entry function. */
1000 ret = add_subprog(env, 0);
1001 if (ret < 0)
1002 return ret;
1003
cc8b0b92
AS
1004 /* determine subprog starts. The end is one before the next starts */
1005 for (i = 0; i < insn_cnt; i++) {
1006 if (insn[i].code != (BPF_JMP | BPF_CALL))
1007 continue;
1008 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1009 continue;
1010 if (!env->allow_ptr_leaks) {
1011 verbose(env, "function calls to other bpf functions are allowed for root only\n");
1012 return -EPERM;
1013 }
cc8b0b92
AS
1014 ret = add_subprog(env, i + insn[i].imm + 1);
1015 if (ret < 0)
1016 return ret;
1017 }
1018
4cb3d99c
JW
1019 /* Add a fake 'exit' subprog which could simplify subprog iteration
1020 * logic. 'subprog_cnt' should not be increased.
1021 */
1022 subprog[env->subprog_cnt].start = insn_cnt;
1023
cc8b0b92
AS
1024 if (env->log.level > 1)
1025 for (i = 0; i < env->subprog_cnt; i++)
9c8105bd 1026 verbose(env, "func#%d @%d\n", i, subprog[i].start);
cc8b0b92
AS
1027
1028 /* now check that all jumps are within the same subprog */
4cb3d99c
JW
1029 subprog_start = subprog[cur_subprog].start;
1030 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
1031 for (i = 0; i < insn_cnt; i++) {
1032 u8 code = insn[i].code;
1033
1034 if (BPF_CLASS(code) != BPF_JMP)
1035 goto next;
1036 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1037 goto next;
1038 off = i + insn[i].off + 1;
1039 if (off < subprog_start || off >= subprog_end) {
1040 verbose(env, "jump out of range from insn %d to %d\n", i, off);
1041 return -EINVAL;
1042 }
1043next:
1044 if (i == subprog_end - 1) {
1045 /* to avoid fall-through from one subprog into another
1046 * the last insn of the subprog should be either exit
1047 * or unconditional jump back
1048 */
1049 if (code != (BPF_JMP | BPF_EXIT) &&
1050 code != (BPF_JMP | BPF_JA)) {
1051 verbose(env, "last insn is not an exit or jmp\n");
1052 return -EINVAL;
1053 }
1054 subprog_start = subprog_end;
4cb3d99c
JW
1055 cur_subprog++;
1056 if (cur_subprog < env->subprog_cnt)
9c8105bd 1057 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
1058 }
1059 }
1060 return 0;
1061}
1062
679c782d
EC
1063/* Parentage chain of this register (or stack slot) should take care of all
1064 * issues like callee-saved registers, stack slot allocation time, etc.
1065 */
f4d7e40a 1066static int mark_reg_read(struct bpf_verifier_env *env,
679c782d
EC
1067 const struct bpf_reg_state *state,
1068 struct bpf_reg_state *parent)
f4d7e40a
AS
1069{
1070 bool writes = parent == state->parent; /* Observe write marks */
dc503a8a
EC
1071
1072 while (parent) {
1073 /* if read wasn't screened by an earlier write ... */
679c782d 1074 if (writes && state->live & REG_LIVE_WRITTEN)
dc503a8a
EC
1075 break;
1076 /* ... then we depend on parent's value */
679c782d 1077 parent->live |= REG_LIVE_READ;
dc503a8a
EC
1078 state = parent;
1079 parent = state->parent;
f4d7e40a 1080 writes = true;
dc503a8a 1081 }
f4d7e40a 1082 return 0;
dc503a8a
EC
1083}
1084
1085static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
17a52670
AS
1086 enum reg_arg_type t)
1087{
f4d7e40a
AS
1088 struct bpf_verifier_state *vstate = env->cur_state;
1089 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1090 struct bpf_reg_state *regs = state->regs;
dc503a8a 1091
17a52670 1092 if (regno >= MAX_BPF_REG) {
61bd5218 1093 verbose(env, "R%d is invalid\n", regno);
17a52670
AS
1094 return -EINVAL;
1095 }
1096
1097 if (t == SRC_OP) {
1098 /* check whether register used as source operand can be read */
1099 if (regs[regno].type == NOT_INIT) {
61bd5218 1100 verbose(env, "R%d !read_ok\n", regno);
17a52670
AS
1101 return -EACCES;
1102 }
679c782d
EC
1103 /* We don't need to worry about FP liveness because it's read-only */
1104 if (regno != BPF_REG_FP)
1105 return mark_reg_read(env, &regs[regno],
1106 regs[regno].parent);
17a52670
AS
1107 } else {
1108 /* check whether register used as dest operand can be written to */
1109 if (regno == BPF_REG_FP) {
61bd5218 1110 verbose(env, "frame pointer is read only\n");
17a52670
AS
1111 return -EACCES;
1112 }
dc503a8a 1113 regs[regno].live |= REG_LIVE_WRITTEN;
17a52670 1114 if (t == DST_OP)
61bd5218 1115 mark_reg_unknown(env, regs, regno);
17a52670
AS
1116 }
1117 return 0;
1118}
1119
1be7f75d
AS
1120static bool is_spillable_regtype(enum bpf_reg_type type)
1121{
1122 switch (type) {
1123 case PTR_TO_MAP_VALUE:
1124 case PTR_TO_MAP_VALUE_OR_NULL:
1125 case PTR_TO_STACK:
1126 case PTR_TO_CTX:
969bf05e 1127 case PTR_TO_PACKET:
de8f3a83 1128 case PTR_TO_PACKET_META:
969bf05e 1129 case PTR_TO_PACKET_END:
d58e468b 1130 case PTR_TO_FLOW_KEYS:
1be7f75d 1131 case CONST_PTR_TO_MAP:
c64b7983
JS
1132 case PTR_TO_SOCKET:
1133 case PTR_TO_SOCKET_OR_NULL:
1be7f75d
AS
1134 return true;
1135 default:
1136 return false;
1137 }
1138}
1139
cc2b14d5
AS
1140/* Does this register contain a constant zero? */
1141static bool register_is_null(struct bpf_reg_state *reg)
1142{
1143 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
1144}
1145
17a52670
AS
1146/* check_stack_read/write functions track spill/fill of registers,
1147 * stack boundary and alignment are checked in check_mem_access()
1148 */
61bd5218 1149static int check_stack_write(struct bpf_verifier_env *env,
f4d7e40a 1150 struct bpf_func_state *state, /* func where register points to */
af86ca4e 1151 int off, int size, int value_regno, int insn_idx)
17a52670 1152{
f4d7e40a 1153 struct bpf_func_state *cur; /* state of the current function */
638f5b90 1154 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
f4d7e40a 1155 enum bpf_reg_type type;
638f5b90 1156
f4d7e40a 1157 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
fd978bf7 1158 state->acquired_refs, true);
638f5b90
AS
1159 if (err)
1160 return err;
9c399760
AS
1161 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
1162 * so it's aligned access and [off, off + size) are within stack limits
1163 */
638f5b90
AS
1164 if (!env->allow_ptr_leaks &&
1165 state->stack[spi].slot_type[0] == STACK_SPILL &&
1166 size != BPF_REG_SIZE) {
1167 verbose(env, "attempt to corrupt spilled pointer on stack\n");
1168 return -EACCES;
1169 }
17a52670 1170
f4d7e40a 1171 cur = env->cur_state->frame[env->cur_state->curframe];
17a52670 1172 if (value_regno >= 0 &&
f4d7e40a 1173 is_spillable_regtype((type = cur->regs[value_regno].type))) {
17a52670
AS
1174
1175 /* register containing pointer is being spilled into stack */
9c399760 1176 if (size != BPF_REG_SIZE) {
61bd5218 1177 verbose(env, "invalid size of register spill\n");
17a52670
AS
1178 return -EACCES;
1179 }
1180
f4d7e40a
AS
1181 if (state != cur && type == PTR_TO_STACK) {
1182 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
1183 return -EINVAL;
1184 }
1185
17a52670 1186 /* save register state */
f4d7e40a 1187 state->stack[spi].spilled_ptr = cur->regs[value_regno];
638f5b90 1188 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
17a52670 1189
af86ca4e
AS
1190 for (i = 0; i < BPF_REG_SIZE; i++) {
1191 if (state->stack[spi].slot_type[i] == STACK_MISC &&
1192 !env->allow_ptr_leaks) {
1193 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
1194 int soff = (-spi - 1) * BPF_REG_SIZE;
1195
1196 /* detected reuse of integer stack slot with a pointer
1197 * which means either llvm is reusing stack slot or
1198 * an attacker is trying to exploit CVE-2018-3639
1199 * (speculative store bypass)
1200 * Have to sanitize that slot with preemptive
1201 * store of zero.
1202 */
1203 if (*poff && *poff != soff) {
1204 /* disallow programs where single insn stores
1205 * into two different stack slots, since verifier
1206 * cannot sanitize them
1207 */
1208 verbose(env,
1209 "insn %d cannot access two stack slots fp%d and fp%d",
1210 insn_idx, *poff, soff);
1211 return -EINVAL;
1212 }
1213 *poff = soff;
1214 }
638f5b90 1215 state->stack[spi].slot_type[i] = STACK_SPILL;
af86ca4e 1216 }
9c399760 1217 } else {
cc2b14d5
AS
1218 u8 type = STACK_MISC;
1219
679c782d
EC
1220 /* regular write of data into stack destroys any spilled ptr */
1221 state->stack[spi].spilled_ptr.type = NOT_INIT;
9c399760 1222
cc2b14d5
AS
1223 /* only mark the slot as written if all 8 bytes were written
1224 * otherwise read propagation may incorrectly stop too soon
1225 * when stack slots are partially written.
1226 * This heuristic means that read propagation will be
1227 * conservative, since it will add reg_live_read marks
1228 * to stack slots all the way to first state when programs
1229 * writes+reads less than 8 bytes
1230 */
1231 if (size == BPF_REG_SIZE)
1232 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1233
1234 /* when we zero initialize stack slots mark them as such */
1235 if (value_regno >= 0 &&
1236 register_is_null(&cur->regs[value_regno]))
1237 type = STACK_ZERO;
1238
9c399760 1239 for (i = 0; i < size; i++)
638f5b90 1240 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
cc2b14d5 1241 type;
17a52670
AS
1242 }
1243 return 0;
1244}
1245
61bd5218 1246static int check_stack_read(struct bpf_verifier_env *env,
f4d7e40a
AS
1247 struct bpf_func_state *reg_state /* func where register points to */,
1248 int off, int size, int value_regno)
17a52670 1249{
f4d7e40a
AS
1250 struct bpf_verifier_state *vstate = env->cur_state;
1251 struct bpf_func_state *state = vstate->frame[vstate->curframe];
638f5b90
AS
1252 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
1253 u8 *stype;
17a52670 1254
f4d7e40a 1255 if (reg_state->allocated_stack <= slot) {
638f5b90
AS
1256 verbose(env, "invalid read from stack off %d+0 size %d\n",
1257 off, size);
1258 return -EACCES;
1259 }
f4d7e40a 1260 stype = reg_state->stack[spi].slot_type;
17a52670 1261
638f5b90 1262 if (stype[0] == STACK_SPILL) {
9c399760 1263 if (size != BPF_REG_SIZE) {
61bd5218 1264 verbose(env, "invalid size of register spill\n");
17a52670
AS
1265 return -EACCES;
1266 }
9c399760 1267 for (i = 1; i < BPF_REG_SIZE; i++) {
638f5b90 1268 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
61bd5218 1269 verbose(env, "corrupted spill memory\n");
17a52670
AS
1270 return -EACCES;
1271 }
1272 }
1273
dc503a8a 1274 if (value_regno >= 0) {
17a52670 1275 /* restore register state from stack */
f4d7e40a 1276 state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
2f18f62e
AS
1277 /* mark reg as written since spilled pointer state likely
1278 * has its liveness marks cleared by is_state_visited()
1279 * which resets stack/reg liveness for state transitions
1280 */
1281 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
dc503a8a 1282 }
679c782d
EC
1283 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1284 reg_state->stack[spi].spilled_ptr.parent);
17a52670
AS
1285 return 0;
1286 } else {
cc2b14d5
AS
1287 int zeros = 0;
1288
17a52670 1289 for (i = 0; i < size; i++) {
cc2b14d5
AS
1290 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
1291 continue;
1292 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
1293 zeros++;
1294 continue;
17a52670 1295 }
cc2b14d5
AS
1296 verbose(env, "invalid read from stack off %d+%d size %d\n",
1297 off, i, size);
1298 return -EACCES;
1299 }
679c782d
EC
1300 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1301 reg_state->stack[spi].spilled_ptr.parent);
cc2b14d5
AS
1302 if (value_regno >= 0) {
1303 if (zeros == size) {
1304 /* any size read into register is zero extended,
1305 * so the whole register == const_zero
1306 */
1307 __mark_reg_const_zero(&state->regs[value_regno]);
1308 } else {
1309 /* have read misc data from the stack */
1310 mark_reg_unknown(env, state->regs, value_regno);
1311 }
1312 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
17a52670 1313 }
17a52670
AS
1314 return 0;
1315 }
1316}
1317
1318/* check read/write into map element returned by bpf_map_lookup_elem() */
f1174f77 1319static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 1320 int size, bool zero_size_allowed)
17a52670 1321{
638f5b90
AS
1322 struct bpf_reg_state *regs = cur_regs(env);
1323 struct bpf_map *map = regs[regno].map_ptr;
17a52670 1324
9fd29c08
YS
1325 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1326 off + size > map->value_size) {
61bd5218 1327 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
17a52670
AS
1328 map->value_size, off, size);
1329 return -EACCES;
1330 }
1331 return 0;
1332}
1333
f1174f77
EC
1334/* check read/write into a map element with possible variable offset */
1335static int check_map_access(struct bpf_verifier_env *env, u32 regno,
9fd29c08 1336 int off, int size, bool zero_size_allowed)
dbcfe5f7 1337{
f4d7e40a
AS
1338 struct bpf_verifier_state *vstate = env->cur_state;
1339 struct bpf_func_state *state = vstate->frame[vstate->curframe];
dbcfe5f7
GB
1340 struct bpf_reg_state *reg = &state->regs[regno];
1341 int err;
1342
f1174f77
EC
1343 /* We may have adjusted the register to this map value, so we
1344 * need to try adding each of min_value and max_value to off
1345 * to make sure our theoretical access will be safe.
dbcfe5f7 1346 */
61bd5218
JK
1347 if (env->log.level)
1348 print_verifier_state(env, state);
dbcfe5f7
GB
1349 /* The minimum value is only important with signed
1350 * comparisons where we can't assume the floor of a
1351 * value is 0. If we are using signed variables for our
1352 * index'es we need to make sure that whatever we use
1353 * will have a set floor within our range.
1354 */
b03c9f9f 1355 if (reg->smin_value < 0) {
61bd5218 1356 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
dbcfe5f7
GB
1357 regno);
1358 return -EACCES;
1359 }
9fd29c08
YS
1360 err = __check_map_access(env, regno, reg->smin_value + off, size,
1361 zero_size_allowed);
dbcfe5f7 1362 if (err) {
61bd5218
JK
1363 verbose(env, "R%d min value is outside of the array range\n",
1364 regno);
dbcfe5f7
GB
1365 return err;
1366 }
1367
b03c9f9f
EC
1368 /* If we haven't set a max value then we need to bail since we can't be
1369 * sure we won't do bad things.
1370 * If reg->umax_value + off could overflow, treat that as unbounded too.
dbcfe5f7 1371 */
b03c9f9f 1372 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
61bd5218 1373 verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
dbcfe5f7
GB
1374 regno);
1375 return -EACCES;
1376 }
9fd29c08
YS
1377 err = __check_map_access(env, regno, reg->umax_value + off, size,
1378 zero_size_allowed);
f1174f77 1379 if (err)
61bd5218
JK
1380 verbose(env, "R%d max value is outside of the array range\n",
1381 regno);
f1174f77 1382 return err;
dbcfe5f7
GB
1383}
1384
969bf05e
AS
1385#define MAX_PACKET_OFF 0xffff
1386
58e2af8b 1387static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3a0af8fd
TG
1388 const struct bpf_call_arg_meta *meta,
1389 enum bpf_access_type t)
4acf6c0b 1390{
36bbef52 1391 switch (env->prog->type) {
5d66fa7d 1392 /* Program types only with direct read access go here! */
3a0af8fd
TG
1393 case BPF_PROG_TYPE_LWT_IN:
1394 case BPF_PROG_TYPE_LWT_OUT:
004d4b27 1395 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2dbb9b9e 1396 case BPF_PROG_TYPE_SK_REUSEPORT:
5d66fa7d 1397 case BPF_PROG_TYPE_FLOW_DISSECTOR:
d5563d36 1398 case BPF_PROG_TYPE_CGROUP_SKB:
3a0af8fd
TG
1399 if (t == BPF_WRITE)
1400 return false;
7e57fbb2 1401 /* fallthrough */
5d66fa7d
DB
1402
1403 /* Program types with direct read + write access go here! */
36bbef52
DB
1404 case BPF_PROG_TYPE_SCHED_CLS:
1405 case BPF_PROG_TYPE_SCHED_ACT:
4acf6c0b 1406 case BPF_PROG_TYPE_XDP:
3a0af8fd 1407 case BPF_PROG_TYPE_LWT_XMIT:
8a31db56 1408 case BPF_PROG_TYPE_SK_SKB:
4f738adb 1409 case BPF_PROG_TYPE_SK_MSG:
36bbef52
DB
1410 if (meta)
1411 return meta->pkt_access;
1412
1413 env->seen_direct_write = true;
4acf6c0b
BB
1414 return true;
1415 default:
1416 return false;
1417 }
1418}
1419
f1174f77 1420static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
9fd29c08 1421 int off, int size, bool zero_size_allowed)
969bf05e 1422{
638f5b90 1423 struct bpf_reg_state *regs = cur_regs(env);
58e2af8b 1424 struct bpf_reg_state *reg = &regs[regno];
969bf05e 1425
9fd29c08
YS
1426 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1427 (u64)off + size > reg->range) {
61bd5218 1428 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
d91b28ed 1429 off, size, regno, reg->id, reg->off, reg->range);
969bf05e
AS
1430 return -EACCES;
1431 }
1432 return 0;
1433}
1434
f1174f77 1435static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 1436 int size, bool zero_size_allowed)
f1174f77 1437{
638f5b90 1438 struct bpf_reg_state *regs = cur_regs(env);
f1174f77
EC
1439 struct bpf_reg_state *reg = &regs[regno];
1440 int err;
1441
1442 /* We may have added a variable offset to the packet pointer; but any
1443 * reg->range we have comes after that. We are only checking the fixed
1444 * offset.
1445 */
1446
1447 /* We don't allow negative numbers, because we aren't tracking enough
1448 * detail to prove they're safe.
1449 */
b03c9f9f 1450 if (reg->smin_value < 0) {
61bd5218 1451 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
f1174f77
EC
1452 regno);
1453 return -EACCES;
1454 }
9fd29c08 1455 err = __check_packet_access(env, regno, off, size, zero_size_allowed);
f1174f77 1456 if (err) {
61bd5218 1457 verbose(env, "R%d offset is outside of the packet\n", regno);
f1174f77
EC
1458 return err;
1459 }
e647815a
JW
1460
1461 /* __check_packet_access has made sure "off + size - 1" is within u16.
1462 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
1463 * otherwise find_good_pkt_pointers would have refused to set range info
1464 * that __check_packet_access would have rejected this pkt access.
1465 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
1466 */
1467 env->prog->aux->max_pkt_offset =
1468 max_t(u32, env->prog->aux->max_pkt_offset,
1469 off + reg->umax_value + size - 1);
1470
f1174f77
EC
1471 return err;
1472}
1473
1474/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
31fd8581 1475static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
19de99f7 1476 enum bpf_access_type t, enum bpf_reg_type *reg_type)
17a52670 1477{
f96da094
DB
1478 struct bpf_insn_access_aux info = {
1479 .reg_type = *reg_type,
1480 };
31fd8581 1481
4f9218aa 1482 if (env->ops->is_valid_access &&
5e43f899 1483 env->ops->is_valid_access(off, size, t, env->prog, &info)) {
f96da094
DB
1484 /* A non zero info.ctx_field_size indicates that this field is a
1485 * candidate for later verifier transformation to load the whole
1486 * field and then apply a mask when accessed with a narrower
1487 * access than actual ctx access size. A zero info.ctx_field_size
1488 * will only allow for whole field access and rejects any other
1489 * type of narrower access.
31fd8581 1490 */
23994631 1491 *reg_type = info.reg_type;
31fd8581 1492
4f9218aa 1493 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
32bbe007
AS
1494 /* remember the offset of last byte accessed in ctx */
1495 if (env->prog->aux->max_ctx_offset < off + size)
1496 env->prog->aux->max_ctx_offset = off + size;
17a52670 1497 return 0;
32bbe007 1498 }
17a52670 1499
61bd5218 1500 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
17a52670
AS
1501 return -EACCES;
1502}
1503
d58e468b
PP
1504static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
1505 int size)
1506{
1507 if (size < 0 || off < 0 ||
1508 (u64)off + size > sizeof(struct bpf_flow_keys)) {
1509 verbose(env, "invalid access to flow keys off=%d size=%d\n",
1510 off, size);
1511 return -EACCES;
1512 }
1513 return 0;
1514}
1515
c64b7983
JS
1516static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
1517 int size, enum bpf_access_type t)
1518{
1519 struct bpf_reg_state *regs = cur_regs(env);
1520 struct bpf_reg_state *reg = &regs[regno];
1521 struct bpf_insn_access_aux info;
1522
1523 if (reg->smin_value < 0) {
1524 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1525 regno);
1526 return -EACCES;
1527 }
1528
1529 if (!bpf_sock_is_valid_access(off, size, t, &info)) {
1530 verbose(env, "invalid bpf_sock access off=%d size=%d\n",
1531 off, size);
1532 return -EACCES;
1533 }
1534
1535 return 0;
1536}
1537
4cabc5b1
DB
1538static bool __is_pointer_value(bool allow_ptr_leaks,
1539 const struct bpf_reg_state *reg)
1be7f75d 1540{
4cabc5b1 1541 if (allow_ptr_leaks)
1be7f75d
AS
1542 return false;
1543
f1174f77 1544 return reg->type != SCALAR_VALUE;
1be7f75d
AS
1545}
1546
2a159c6f
DB
1547static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
1548{
1549 return cur_regs(env) + regno;
1550}
1551
4cabc5b1
DB
1552static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1553{
2a159c6f 1554 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4cabc5b1
DB
1555}
1556
f37a8cb8
DB
1557static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1558{
2a159c6f 1559 const struct bpf_reg_state *reg = reg_state(env, regno);
f37a8cb8 1560
fd978bf7
JS
1561 return reg->type == PTR_TO_CTX ||
1562 reg->type == PTR_TO_SOCKET;
f37a8cb8
DB
1563}
1564
ca369602
DB
1565static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
1566{
2a159c6f 1567 const struct bpf_reg_state *reg = reg_state(env, regno);
ca369602
DB
1568
1569 return type_is_pkt_pointer(reg->type);
1570}
1571
4b5defde
DB
1572static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
1573{
1574 const struct bpf_reg_state *reg = reg_state(env, regno);
1575
1576 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
1577 return reg->type == PTR_TO_FLOW_KEYS;
1578}
1579
61bd5218
JK
1580static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
1581 const struct bpf_reg_state *reg,
d1174416 1582 int off, int size, bool strict)
969bf05e 1583{
f1174f77 1584 struct tnum reg_off;
e07b98d9 1585 int ip_align;
d1174416
DM
1586
1587 /* Byte size accesses are always allowed. */
1588 if (!strict || size == 1)
1589 return 0;
1590
e4eda884
DM
1591 /* For platforms that do not have a Kconfig enabling
1592 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
1593 * NET_IP_ALIGN is universally set to '2'. And on platforms
1594 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
1595 * to this code only in strict mode where we want to emulate
1596 * the NET_IP_ALIGN==2 checking. Therefore use an
1597 * unconditional IP align value of '2'.
e07b98d9 1598 */
e4eda884 1599 ip_align = 2;
f1174f77
EC
1600
1601 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
1602 if (!tnum_is_aligned(reg_off, size)) {
1603 char tn_buf[48];
1604
1605 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218
JK
1606 verbose(env,
1607 "misaligned packet access off %d+%s+%d+%d size %d\n",
f1174f77 1608 ip_align, tn_buf, reg->off, off, size);
969bf05e
AS
1609 return -EACCES;
1610 }
79adffcd 1611
969bf05e
AS
1612 return 0;
1613}
1614
61bd5218
JK
1615static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
1616 const struct bpf_reg_state *reg,
f1174f77
EC
1617 const char *pointer_desc,
1618 int off, int size, bool strict)
79adffcd 1619{
f1174f77
EC
1620 struct tnum reg_off;
1621
1622 /* Byte size accesses are always allowed. */
1623 if (!strict || size == 1)
1624 return 0;
1625
1626 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
1627 if (!tnum_is_aligned(reg_off, size)) {
1628 char tn_buf[48];
1629
1630 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 1631 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
f1174f77 1632 pointer_desc, tn_buf, reg->off, off, size);
79adffcd
DB
1633 return -EACCES;
1634 }
1635
969bf05e
AS
1636 return 0;
1637}
1638
e07b98d9 1639static int check_ptr_alignment(struct bpf_verifier_env *env,
ca369602
DB
1640 const struct bpf_reg_state *reg, int off,
1641 int size, bool strict_alignment_once)
79adffcd 1642{
ca369602 1643 bool strict = env->strict_alignment || strict_alignment_once;
f1174f77 1644 const char *pointer_desc = "";
d1174416 1645
79adffcd
DB
1646 switch (reg->type) {
1647 case PTR_TO_PACKET:
de8f3a83
DB
1648 case PTR_TO_PACKET_META:
1649 /* Special case, because of NET_IP_ALIGN. Given metadata sits
1650 * right in front, treat it the very same way.
1651 */
61bd5218 1652 return check_pkt_ptr_alignment(env, reg, off, size, strict);
d58e468b
PP
1653 case PTR_TO_FLOW_KEYS:
1654 pointer_desc = "flow keys ";
1655 break;
f1174f77
EC
1656 case PTR_TO_MAP_VALUE:
1657 pointer_desc = "value ";
1658 break;
1659 case PTR_TO_CTX:
1660 pointer_desc = "context ";
1661 break;
1662 case PTR_TO_STACK:
1663 pointer_desc = "stack ";
a5ec6ae1
JH
1664 /* The stack spill tracking logic in check_stack_write()
1665 * and check_stack_read() relies on stack accesses being
1666 * aligned.
1667 */
1668 strict = true;
f1174f77 1669 break;
c64b7983
JS
1670 case PTR_TO_SOCKET:
1671 pointer_desc = "sock ";
1672 break;
79adffcd 1673 default:
f1174f77 1674 break;
79adffcd 1675 }
61bd5218
JK
1676 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
1677 strict);
79adffcd
DB
1678}
1679
f4d7e40a
AS
1680static int update_stack_depth(struct bpf_verifier_env *env,
1681 const struct bpf_func_state *func,
1682 int off)
1683{
9c8105bd 1684 u16 stack = env->subprog_info[func->subprogno].stack_depth;
f4d7e40a
AS
1685
1686 if (stack >= -off)
1687 return 0;
1688
1689 /* update known max for given subprogram */
9c8105bd 1690 env->subprog_info[func->subprogno].stack_depth = -off;
70a87ffe
AS
1691 return 0;
1692}
f4d7e40a 1693
70a87ffe
AS
1694/* starting from main bpf function walk all instructions of the function
1695 * and recursively walk all callees that given function can call.
1696 * Ignore jump and exit insns.
1697 * Since recursion is prevented by check_cfg() this algorithm
1698 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
1699 */
1700static int check_max_stack_depth(struct bpf_verifier_env *env)
1701{
9c8105bd
JW
1702 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
1703 struct bpf_subprog_info *subprog = env->subprog_info;
70a87ffe 1704 struct bpf_insn *insn = env->prog->insnsi;
70a87ffe
AS
1705 int ret_insn[MAX_CALL_FRAMES];
1706 int ret_prog[MAX_CALL_FRAMES];
f4d7e40a 1707
70a87ffe
AS
1708process_func:
1709 /* round up to 32-bytes, since this is granularity
1710 * of interpreter stack size
1711 */
9c8105bd 1712 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe 1713 if (depth > MAX_BPF_STACK) {
f4d7e40a 1714 verbose(env, "combined stack size of %d calls is %d. Too large\n",
70a87ffe 1715 frame + 1, depth);
f4d7e40a
AS
1716 return -EACCES;
1717 }
70a87ffe 1718continue_func:
4cb3d99c 1719 subprog_end = subprog[idx + 1].start;
70a87ffe
AS
1720 for (; i < subprog_end; i++) {
1721 if (insn[i].code != (BPF_JMP | BPF_CALL))
1722 continue;
1723 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1724 continue;
1725 /* remember insn and function to return to */
1726 ret_insn[frame] = i + 1;
9c8105bd 1727 ret_prog[frame] = idx;
70a87ffe
AS
1728
1729 /* find the callee */
1730 i = i + insn[i].imm + 1;
9c8105bd
JW
1731 idx = find_subprog(env, i);
1732 if (idx < 0) {
70a87ffe
AS
1733 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1734 i);
1735 return -EFAULT;
1736 }
70a87ffe
AS
1737 frame++;
1738 if (frame >= MAX_CALL_FRAMES) {
1739 WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
1740 return -EFAULT;
1741 }
1742 goto process_func;
1743 }
1744 /* end of for() loop means the last insn of the 'subprog'
1745 * was reached. Doesn't matter whether it was JA or EXIT
1746 */
1747 if (frame == 0)
1748 return 0;
9c8105bd 1749 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe
AS
1750 frame--;
1751 i = ret_insn[frame];
9c8105bd 1752 idx = ret_prog[frame];
70a87ffe 1753 goto continue_func;
f4d7e40a
AS
1754}
1755
19d28fbd 1756#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
1757static int get_callee_stack_depth(struct bpf_verifier_env *env,
1758 const struct bpf_insn *insn, int idx)
1759{
1760 int start = idx + insn->imm + 1, subprog;
1761
1762 subprog = find_subprog(env, start);
1763 if (subprog < 0) {
1764 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1765 start);
1766 return -EFAULT;
1767 }
9c8105bd 1768 return env->subprog_info[subprog].stack_depth;
1ea47e01 1769}
19d28fbd 1770#endif
1ea47e01 1771
58990d1f
DB
1772static int check_ctx_reg(struct bpf_verifier_env *env,
1773 const struct bpf_reg_state *reg, int regno)
1774{
1775 /* Access to ctx or passing it to a helper is only allowed in
1776 * its original, unmodified form.
1777 */
1778
1779 if (reg->off) {
1780 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
1781 regno, reg->off);
1782 return -EACCES;
1783 }
1784
1785 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1786 char tn_buf[48];
1787
1788 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1789 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
1790 return -EACCES;
1791 }
1792
1793 return 0;
1794}
1795
0c17d1d2
JH
1796/* truncate register to smaller size (in bytes)
1797 * must be called with size < BPF_REG_SIZE
1798 */
1799static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1800{
1801 u64 mask;
1802
1803 /* clear high bits in bit representation */
1804 reg->var_off = tnum_cast(reg->var_off, size);
1805
1806 /* fix arithmetic bounds */
1807 mask = ((u64)1 << (size * 8)) - 1;
1808 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1809 reg->umin_value &= mask;
1810 reg->umax_value &= mask;
1811 } else {
1812 reg->umin_value = 0;
1813 reg->umax_value = mask;
1814 }
1815 reg->smin_value = reg->umin_value;
1816 reg->smax_value = reg->umax_value;
1817}
1818
17a52670
AS
1819/* check whether memory at (regno + off) is accessible for t = (read | write)
1820 * if t==write, value_regno is a register which value is stored into memory
1821 * if t==read, value_regno is a register which will receive the value from memory
1822 * if t==write && value_regno==-1, some unknown value is stored into memory
1823 * if t==read && value_regno==-1, don't care what we read from memory
1824 */
ca369602
DB
1825static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
1826 int off, int bpf_size, enum bpf_access_type t,
1827 int value_regno, bool strict_alignment_once)
17a52670 1828{
638f5b90
AS
1829 struct bpf_reg_state *regs = cur_regs(env);
1830 struct bpf_reg_state *reg = regs + regno;
f4d7e40a 1831 struct bpf_func_state *state;
17a52670
AS
1832 int size, err = 0;
1833
1834 size = bpf_size_to_bytes(bpf_size);
1835 if (size < 0)
1836 return size;
1837
f1174f77 1838 /* alignment checks will add in reg->off themselves */
ca369602 1839 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
969bf05e
AS
1840 if (err)
1841 return err;
17a52670 1842
f1174f77
EC
1843 /* for access checks, reg->off is just part of off */
1844 off += reg->off;
1845
1846 if (reg->type == PTR_TO_MAP_VALUE) {
1be7f75d
AS
1847 if (t == BPF_WRITE && value_regno >= 0 &&
1848 is_pointer_value(env, value_regno)) {
61bd5218 1849 verbose(env, "R%d leaks addr into map\n", value_regno);
1be7f75d
AS
1850 return -EACCES;
1851 }
48461135 1852
9fd29c08 1853 err = check_map_access(env, regno, off, size, false);
17a52670 1854 if (!err && t == BPF_READ && value_regno >= 0)
638f5b90 1855 mark_reg_unknown(env, regs, value_regno);
17a52670 1856
1a0dc1ac 1857 } else if (reg->type == PTR_TO_CTX) {
f1174f77 1858 enum bpf_reg_type reg_type = SCALAR_VALUE;
19de99f7 1859
1be7f75d
AS
1860 if (t == BPF_WRITE && value_regno >= 0 &&
1861 is_pointer_value(env, value_regno)) {
61bd5218 1862 verbose(env, "R%d leaks addr into ctx\n", value_regno);
1be7f75d
AS
1863 return -EACCES;
1864 }
f1174f77 1865
58990d1f
DB
1866 err = check_ctx_reg(env, reg, regno);
1867 if (err < 0)
1868 return err;
1869
31fd8581 1870 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
969bf05e 1871 if (!err && t == BPF_READ && value_regno >= 0) {
f1174f77 1872 /* ctx access returns either a scalar, or a
de8f3a83
DB
1873 * PTR_TO_PACKET[_META,_END]. In the latter
1874 * case, we know the offset is zero.
f1174f77
EC
1875 */
1876 if (reg_type == SCALAR_VALUE)
638f5b90 1877 mark_reg_unknown(env, regs, value_regno);
f1174f77 1878 else
638f5b90 1879 mark_reg_known_zero(env, regs,
61bd5218 1880 value_regno);
638f5b90 1881 regs[value_regno].type = reg_type;
969bf05e 1882 }
17a52670 1883
f1174f77
EC
1884 } else if (reg->type == PTR_TO_STACK) {
1885 /* stack accesses must be at a fixed offset, so that we can
1886 * determine what type of data were returned.
1887 * See check_stack_read().
1888 */
1889 if (!tnum_is_const(reg->var_off)) {
1890 char tn_buf[48];
1891
1892 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 1893 verbose(env, "variable stack access var_off=%s off=%d size=%d",
f1174f77
EC
1894 tn_buf, off, size);
1895 return -EACCES;
1896 }
1897 off += reg->var_off.value;
17a52670 1898 if (off >= 0 || off < -MAX_BPF_STACK) {
61bd5218
JK
1899 verbose(env, "invalid stack off=%d size=%d\n", off,
1900 size);
17a52670
AS
1901 return -EACCES;
1902 }
8726679a 1903
f4d7e40a
AS
1904 state = func(env, reg);
1905 err = update_stack_depth(env, state, off);
1906 if (err)
1907 return err;
8726679a 1908
638f5b90 1909 if (t == BPF_WRITE)
61bd5218 1910 err = check_stack_write(env, state, off, size,
af86ca4e 1911 value_regno, insn_idx);
638f5b90 1912 else
61bd5218
JK
1913 err = check_stack_read(env, state, off, size,
1914 value_regno);
de8f3a83 1915 } else if (reg_is_pkt_pointer(reg)) {
3a0af8fd 1916 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
61bd5218 1917 verbose(env, "cannot write into packet\n");
969bf05e
AS
1918 return -EACCES;
1919 }
4acf6c0b
BB
1920 if (t == BPF_WRITE && value_regno >= 0 &&
1921 is_pointer_value(env, value_regno)) {
61bd5218
JK
1922 verbose(env, "R%d leaks addr into packet\n",
1923 value_regno);
4acf6c0b
BB
1924 return -EACCES;
1925 }
9fd29c08 1926 err = check_packet_access(env, regno, off, size, false);
969bf05e 1927 if (!err && t == BPF_READ && value_regno >= 0)
638f5b90 1928 mark_reg_unknown(env, regs, value_regno);
d58e468b
PP
1929 } else if (reg->type == PTR_TO_FLOW_KEYS) {
1930 if (t == BPF_WRITE && value_regno >= 0 &&
1931 is_pointer_value(env, value_regno)) {
1932 verbose(env, "R%d leaks addr into flow keys\n",
1933 value_regno);
1934 return -EACCES;
1935 }
1936
1937 err = check_flow_keys_access(env, off, size);
1938 if (!err && t == BPF_READ && value_regno >= 0)
1939 mark_reg_unknown(env, regs, value_regno);
c64b7983
JS
1940 } else if (reg->type == PTR_TO_SOCKET) {
1941 if (t == BPF_WRITE) {
1942 verbose(env, "cannot write into socket\n");
1943 return -EACCES;
1944 }
1945 err = check_sock_access(env, regno, off, size, t);
1946 if (!err && value_regno >= 0)
1947 mark_reg_unknown(env, regs, value_regno);
17a52670 1948 } else {
61bd5218
JK
1949 verbose(env, "R%d invalid mem access '%s'\n", regno,
1950 reg_type_str[reg->type]);
17a52670
AS
1951 return -EACCES;
1952 }
969bf05e 1953
f1174f77 1954 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
638f5b90 1955 regs[value_regno].type == SCALAR_VALUE) {
f1174f77 1956 /* b/h/w load zero-extends, mark upper bits as known 0 */
0c17d1d2 1957 coerce_reg_to_size(&regs[value_regno], size);
969bf05e 1958 }
17a52670
AS
1959 return err;
1960}
1961
31fd8581 1962static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
17a52670 1963{
17a52670
AS
1964 int err;
1965
1966 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
1967 insn->imm != 0) {
61bd5218 1968 verbose(env, "BPF_XADD uses reserved fields\n");
17a52670
AS
1969 return -EINVAL;
1970 }
1971
1972 /* check src1 operand */
dc503a8a 1973 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
1974 if (err)
1975 return err;
1976
1977 /* check src2 operand */
dc503a8a 1978 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
1979 if (err)
1980 return err;
1981
6bdf6abc 1982 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 1983 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6bdf6abc
DB
1984 return -EACCES;
1985 }
1986
ca369602 1987 if (is_ctx_reg(env, insn->dst_reg) ||
4b5defde
DB
1988 is_pkt_reg(env, insn->dst_reg) ||
1989 is_flow_key_reg(env, insn->dst_reg)) {
ca369602 1990 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
2a159c6f
DB
1991 insn->dst_reg,
1992 reg_type_str[reg_state(env, insn->dst_reg)->type]);
f37a8cb8
DB
1993 return -EACCES;
1994 }
1995
17a52670 1996 /* check whether atomic_add can read the memory */
31fd8581 1997 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
ca369602 1998 BPF_SIZE(insn->code), BPF_READ, -1, true);
17a52670
AS
1999 if (err)
2000 return err;
2001
2002 /* check whether atomic_add can write into the same memory */
31fd8581 2003 return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
ca369602 2004 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
17a52670
AS
2005}
2006
2007/* when register 'regno' is passed into function that will read 'access_size'
2008 * bytes from that pointer, make sure that it's within stack boundary
f1174f77
EC
2009 * and all elements of stack are initialized.
2010 * Unlike most pointer bounds-checking functions, this one doesn't take an
2011 * 'off' argument, so it has to add in reg->off itself.
17a52670 2012 */
58e2af8b 2013static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
435faee1
DB
2014 int access_size, bool zero_size_allowed,
2015 struct bpf_call_arg_meta *meta)
17a52670 2016{
2a159c6f 2017 struct bpf_reg_state *reg = reg_state(env, regno);
f4d7e40a 2018 struct bpf_func_state *state = func(env, reg);
638f5b90 2019 int off, i, slot, spi;
17a52670 2020
914cb781 2021 if (reg->type != PTR_TO_STACK) {
f1174f77 2022 /* Allow zero-byte read from NULL, regardless of pointer type */
8e2fe1d9 2023 if (zero_size_allowed && access_size == 0 &&
914cb781 2024 register_is_null(reg))
8e2fe1d9
DB
2025 return 0;
2026
61bd5218 2027 verbose(env, "R%d type=%s expected=%s\n", regno,
914cb781 2028 reg_type_str[reg->type],
8e2fe1d9 2029 reg_type_str[PTR_TO_STACK]);
17a52670 2030 return -EACCES;
8e2fe1d9 2031 }
17a52670 2032
f1174f77 2033 /* Only allow fixed-offset stack reads */
914cb781 2034 if (!tnum_is_const(reg->var_off)) {
f1174f77
EC
2035 char tn_buf[48];
2036
914cb781 2037 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 2038 verbose(env, "invalid variable stack read R%d var_off=%s\n",
f1174f77 2039 regno, tn_buf);
ea25f914 2040 return -EACCES;
f1174f77 2041 }
914cb781 2042 off = reg->off + reg->var_off.value;
17a52670 2043 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
9fd29c08 2044 access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
61bd5218 2045 verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
17a52670
AS
2046 regno, off, access_size);
2047 return -EACCES;
2048 }
2049
435faee1
DB
2050 if (meta && meta->raw_mode) {
2051 meta->access_size = access_size;
2052 meta->regno = regno;
2053 return 0;
2054 }
2055
17a52670 2056 for (i = 0; i < access_size; i++) {
cc2b14d5
AS
2057 u8 *stype;
2058
638f5b90
AS
2059 slot = -(off + i) - 1;
2060 spi = slot / BPF_REG_SIZE;
cc2b14d5
AS
2061 if (state->allocated_stack <= slot)
2062 goto err;
2063 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2064 if (*stype == STACK_MISC)
2065 goto mark;
2066 if (*stype == STACK_ZERO) {
2067 /* helper can write anything into the stack */
2068 *stype = STACK_MISC;
2069 goto mark;
17a52670 2070 }
cc2b14d5
AS
2071err:
2072 verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
2073 off, i, access_size);
2074 return -EACCES;
2075mark:
2076 /* reading any byte out of 8-byte 'spill_slot' will cause
2077 * the whole slot to be marked as 'read'
2078 */
679c782d
EC
2079 mark_reg_read(env, &state->stack[spi].spilled_ptr,
2080 state->stack[spi].spilled_ptr.parent);
17a52670 2081 }
f4d7e40a 2082 return update_stack_depth(env, state, off);
17a52670
AS
2083}
2084
06c1c049
GB
2085static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
2086 int access_size, bool zero_size_allowed,
2087 struct bpf_call_arg_meta *meta)
2088{
638f5b90 2089 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
06c1c049 2090
f1174f77 2091 switch (reg->type) {
06c1c049 2092 case PTR_TO_PACKET:
de8f3a83 2093 case PTR_TO_PACKET_META:
9fd29c08
YS
2094 return check_packet_access(env, regno, reg->off, access_size,
2095 zero_size_allowed);
06c1c049 2096 case PTR_TO_MAP_VALUE:
9fd29c08
YS
2097 return check_map_access(env, regno, reg->off, access_size,
2098 zero_size_allowed);
f1174f77 2099 default: /* scalar_value|ptr_to_stack or invalid ptr */
06c1c049
GB
2100 return check_stack_boundary(env, regno, access_size,
2101 zero_size_allowed, meta);
2102 }
2103}
2104
90133415
DB
2105static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
2106{
2107 return type == ARG_PTR_TO_MEM ||
2108 type == ARG_PTR_TO_MEM_OR_NULL ||
2109 type == ARG_PTR_TO_UNINIT_MEM;
2110}
2111
2112static bool arg_type_is_mem_size(enum bpf_arg_type type)
2113{
2114 return type == ARG_CONST_SIZE ||
2115 type == ARG_CONST_SIZE_OR_ZERO;
2116}
2117
58e2af8b 2118static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
33ff9823
DB
2119 enum bpf_arg_type arg_type,
2120 struct bpf_call_arg_meta *meta)
17a52670 2121{
638f5b90 2122 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6841de8b 2123 enum bpf_reg_type expected_type, type = reg->type;
17a52670
AS
2124 int err = 0;
2125
80f1d68c 2126 if (arg_type == ARG_DONTCARE)
17a52670
AS
2127 return 0;
2128
dc503a8a
EC
2129 err = check_reg_arg(env, regno, SRC_OP);
2130 if (err)
2131 return err;
17a52670 2132
1be7f75d
AS
2133 if (arg_type == ARG_ANYTHING) {
2134 if (is_pointer_value(env, regno)) {
61bd5218
JK
2135 verbose(env, "R%d leaks addr into helper function\n",
2136 regno);
1be7f75d
AS
2137 return -EACCES;
2138 }
80f1d68c 2139 return 0;
1be7f75d 2140 }
80f1d68c 2141
de8f3a83 2142 if (type_is_pkt_pointer(type) &&
3a0af8fd 2143 !may_access_direct_pkt_data(env, meta, BPF_READ)) {
61bd5218 2144 verbose(env, "helper access to the packet is not allowed\n");
6841de8b
AS
2145 return -EACCES;
2146 }
2147
8e2fe1d9 2148 if (arg_type == ARG_PTR_TO_MAP_KEY ||
2ea864c5
MV
2149 arg_type == ARG_PTR_TO_MAP_VALUE ||
2150 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
17a52670 2151 expected_type = PTR_TO_STACK;
d71962f3 2152 if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
de8f3a83 2153 type != expected_type)
6841de8b 2154 goto err_type;
39f19ebb
AS
2155 } else if (arg_type == ARG_CONST_SIZE ||
2156 arg_type == ARG_CONST_SIZE_OR_ZERO) {
f1174f77
EC
2157 expected_type = SCALAR_VALUE;
2158 if (type != expected_type)
6841de8b 2159 goto err_type;
17a52670
AS
2160 } else if (arg_type == ARG_CONST_MAP_PTR) {
2161 expected_type = CONST_PTR_TO_MAP;
6841de8b
AS
2162 if (type != expected_type)
2163 goto err_type;
608cd71a
AS
2164 } else if (arg_type == ARG_PTR_TO_CTX) {
2165 expected_type = PTR_TO_CTX;
6841de8b
AS
2166 if (type != expected_type)
2167 goto err_type;
58990d1f
DB
2168 err = check_ctx_reg(env, reg, regno);
2169 if (err < 0)
2170 return err;
c64b7983
JS
2171 } else if (arg_type == ARG_PTR_TO_SOCKET) {
2172 expected_type = PTR_TO_SOCKET;
2173 if (type != expected_type)
2174 goto err_type;
fd978bf7
JS
2175 if (meta->ptr_id || !reg->id) {
2176 verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n",
2177 meta->ptr_id, reg->id);
2178 return -EFAULT;
2179 }
2180 meta->ptr_id = reg->id;
90133415 2181 } else if (arg_type_is_mem_ptr(arg_type)) {
8e2fe1d9
DB
2182 expected_type = PTR_TO_STACK;
2183 /* One exception here. In case function allows for NULL to be
f1174f77 2184 * passed in as argument, it's a SCALAR_VALUE type. Final test
8e2fe1d9
DB
2185 * happens during stack boundary checking.
2186 */
914cb781 2187 if (register_is_null(reg) &&
db1ac496 2188 arg_type == ARG_PTR_TO_MEM_OR_NULL)
6841de8b 2189 /* final test in check_stack_boundary() */;
de8f3a83
DB
2190 else if (!type_is_pkt_pointer(type) &&
2191 type != PTR_TO_MAP_VALUE &&
f1174f77 2192 type != expected_type)
6841de8b 2193 goto err_type;
39f19ebb 2194 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
17a52670 2195 } else {
61bd5218 2196 verbose(env, "unsupported arg_type %d\n", arg_type);
17a52670
AS
2197 return -EFAULT;
2198 }
2199
17a52670
AS
2200 if (arg_type == ARG_CONST_MAP_PTR) {
2201 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
33ff9823 2202 meta->map_ptr = reg->map_ptr;
17a52670
AS
2203 } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
2204 /* bpf_map_xxx(..., map_ptr, ..., key) call:
2205 * check that [key, key + map->key_size) are within
2206 * stack limits and initialized
2207 */
33ff9823 2208 if (!meta->map_ptr) {
17a52670
AS
2209 /* in function declaration map_ptr must come before
2210 * map_key, so that it's verified and known before
2211 * we have to check map_key here. Otherwise it means
2212 * that kernel subsystem misconfigured verifier
2213 */
61bd5218 2214 verbose(env, "invalid map_ptr to access map->key\n");
17a52670
AS
2215 return -EACCES;
2216 }
d71962f3
PC
2217 err = check_helper_mem_access(env, regno,
2218 meta->map_ptr->key_size, false,
2219 NULL);
2ea864c5
MV
2220 } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
2221 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
17a52670
AS
2222 /* bpf_map_xxx(..., map_ptr, ..., value) call:
2223 * check [value, value + map->value_size) validity
2224 */
33ff9823 2225 if (!meta->map_ptr) {
17a52670 2226 /* kernel subsystem misconfigured verifier */
61bd5218 2227 verbose(env, "invalid map_ptr to access map->value\n");
17a52670
AS
2228 return -EACCES;
2229 }
2ea864c5 2230 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
d71962f3
PC
2231 err = check_helper_mem_access(env, regno,
2232 meta->map_ptr->value_size, false,
2ea864c5 2233 meta);
90133415 2234 } else if (arg_type_is_mem_size(arg_type)) {
39f19ebb 2235 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
17a52670 2236
849fa506
YS
2237 /* remember the mem_size which may be used later
2238 * to refine return values.
2239 */
2240 meta->msize_smax_value = reg->smax_value;
2241 meta->msize_umax_value = reg->umax_value;
2242
f1174f77
EC
2243 /* The register is SCALAR_VALUE; the access check
2244 * happens using its boundaries.
06c1c049 2245 */
f1174f77 2246 if (!tnum_is_const(reg->var_off))
06c1c049
GB
2247 /* For unprivileged variable accesses, disable raw
2248 * mode so that the program is required to
2249 * initialize all the memory that the helper could
2250 * just partially fill up.
2251 */
2252 meta = NULL;
2253
b03c9f9f 2254 if (reg->smin_value < 0) {
61bd5218 2255 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
f1174f77
EC
2256 regno);
2257 return -EACCES;
2258 }
06c1c049 2259
b03c9f9f 2260 if (reg->umin_value == 0) {
f1174f77
EC
2261 err = check_helper_mem_access(env, regno - 1, 0,
2262 zero_size_allowed,
2263 meta);
06c1c049
GB
2264 if (err)
2265 return err;
06c1c049 2266 }
f1174f77 2267
b03c9f9f 2268 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
61bd5218 2269 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
f1174f77
EC
2270 regno);
2271 return -EACCES;
2272 }
2273 err = check_helper_mem_access(env, regno - 1,
b03c9f9f 2274 reg->umax_value,
f1174f77 2275 zero_size_allowed, meta);
17a52670
AS
2276 }
2277
2278 return err;
6841de8b 2279err_type:
61bd5218 2280 verbose(env, "R%d type=%s expected=%s\n", regno,
6841de8b
AS
2281 reg_type_str[type], reg_type_str[expected_type]);
2282 return -EACCES;
17a52670
AS
2283}
2284
61bd5218
JK
2285static int check_map_func_compatibility(struct bpf_verifier_env *env,
2286 struct bpf_map *map, int func_id)
35578d79 2287{
35578d79
KX
2288 if (!map)
2289 return 0;
2290
6aff67c8
AS
2291 /* We need a two way check, first is from map perspective ... */
2292 switch (map->map_type) {
2293 case BPF_MAP_TYPE_PROG_ARRAY:
2294 if (func_id != BPF_FUNC_tail_call)
2295 goto error;
2296 break;
2297 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
2298 if (func_id != BPF_FUNC_perf_event_read &&
908432ca
YS
2299 func_id != BPF_FUNC_perf_event_output &&
2300 func_id != BPF_FUNC_perf_event_read_value)
6aff67c8
AS
2301 goto error;
2302 break;
2303 case BPF_MAP_TYPE_STACK_TRACE:
2304 if (func_id != BPF_FUNC_get_stackid)
2305 goto error;
2306 break;
4ed8ec52 2307 case BPF_MAP_TYPE_CGROUP_ARRAY:
60747ef4 2308 if (func_id != BPF_FUNC_skb_under_cgroup &&
60d20f91 2309 func_id != BPF_FUNC_current_task_under_cgroup)
4a482f34
MKL
2310 goto error;
2311 break;
cd339431 2312 case BPF_MAP_TYPE_CGROUP_STORAGE:
b741f163 2313 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
cd339431
RG
2314 if (func_id != BPF_FUNC_get_local_storage)
2315 goto error;
2316 break;
546ac1ff
JF
2317 /* devmap returns a pointer to a live net_device ifindex that we cannot
2318 * allow to be modified from bpf side. So do not allow lookup elements
2319 * for now.
2320 */
2321 case BPF_MAP_TYPE_DEVMAP:
2ddf71e2 2322 if (func_id != BPF_FUNC_redirect_map)
546ac1ff
JF
2323 goto error;
2324 break;
fbfc504a
BT
2325 /* Restrict bpf side of cpumap and xskmap, open when use-cases
2326 * appear.
2327 */
6710e112 2328 case BPF_MAP_TYPE_CPUMAP:
fbfc504a 2329 case BPF_MAP_TYPE_XSKMAP:
6710e112
JDB
2330 if (func_id != BPF_FUNC_redirect_map)
2331 goto error;
2332 break;
56f668df 2333 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
bcc6b1b7 2334 case BPF_MAP_TYPE_HASH_OF_MAPS:
56f668df
MKL
2335 if (func_id != BPF_FUNC_map_lookup_elem)
2336 goto error;
16a43625 2337 break;
174a79ff
JF
2338 case BPF_MAP_TYPE_SOCKMAP:
2339 if (func_id != BPF_FUNC_sk_redirect_map &&
2340 func_id != BPF_FUNC_sock_map_update &&
4f738adb
JF
2341 func_id != BPF_FUNC_map_delete_elem &&
2342 func_id != BPF_FUNC_msg_redirect_map)
174a79ff
JF
2343 goto error;
2344 break;
81110384
JF
2345 case BPF_MAP_TYPE_SOCKHASH:
2346 if (func_id != BPF_FUNC_sk_redirect_hash &&
2347 func_id != BPF_FUNC_sock_hash_update &&
2348 func_id != BPF_FUNC_map_delete_elem &&
2349 func_id != BPF_FUNC_msg_redirect_hash)
2350 goto error;
2351 break;
2dbb9b9e
MKL
2352 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
2353 if (func_id != BPF_FUNC_sk_select_reuseport)
2354 goto error;
2355 break;
f1a2e44a
MV
2356 case BPF_MAP_TYPE_QUEUE:
2357 case BPF_MAP_TYPE_STACK:
2358 if (func_id != BPF_FUNC_map_peek_elem &&
2359 func_id != BPF_FUNC_map_pop_elem &&
2360 func_id != BPF_FUNC_map_push_elem)
2361 goto error;
2362 break;
6aff67c8
AS
2363 default:
2364 break;
2365 }
2366
2367 /* ... and second from the function itself. */
2368 switch (func_id) {
2369 case BPF_FUNC_tail_call:
2370 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
2371 goto error;
f910cefa 2372 if (env->subprog_cnt > 1) {
f4d7e40a
AS
2373 verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
2374 return -EINVAL;
2375 }
6aff67c8
AS
2376 break;
2377 case BPF_FUNC_perf_event_read:
2378 case BPF_FUNC_perf_event_output:
908432ca 2379 case BPF_FUNC_perf_event_read_value:
6aff67c8
AS
2380 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
2381 goto error;
2382 break;
2383 case BPF_FUNC_get_stackid:
2384 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
2385 goto error;
2386 break;
60d20f91 2387 case BPF_FUNC_current_task_under_cgroup:
747ea55e 2388 case BPF_FUNC_skb_under_cgroup:
4a482f34
MKL
2389 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
2390 goto error;
2391 break;
97f91a7c 2392 case BPF_FUNC_redirect_map:
9c270af3 2393 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
fbfc504a
BT
2394 map->map_type != BPF_MAP_TYPE_CPUMAP &&
2395 map->map_type != BPF_MAP_TYPE_XSKMAP)
97f91a7c
JF
2396 goto error;
2397 break;
174a79ff 2398 case BPF_FUNC_sk_redirect_map:
4f738adb 2399 case BPF_FUNC_msg_redirect_map:
81110384 2400 case BPF_FUNC_sock_map_update:
174a79ff
JF
2401 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2402 goto error;
2403 break;
81110384
JF
2404 case BPF_FUNC_sk_redirect_hash:
2405 case BPF_FUNC_msg_redirect_hash:
2406 case BPF_FUNC_sock_hash_update:
2407 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
174a79ff
JF
2408 goto error;
2409 break;
cd339431 2410 case BPF_FUNC_get_local_storage:
b741f163
RG
2411 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
2412 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
cd339431
RG
2413 goto error;
2414 break;
2dbb9b9e
MKL
2415 case BPF_FUNC_sk_select_reuseport:
2416 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
2417 goto error;
2418 break;
f1a2e44a
MV
2419 case BPF_FUNC_map_peek_elem:
2420 case BPF_FUNC_map_pop_elem:
2421 case BPF_FUNC_map_push_elem:
2422 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
2423 map->map_type != BPF_MAP_TYPE_STACK)
2424 goto error;
2425 break;
6aff67c8
AS
2426 default:
2427 break;
35578d79
KX
2428 }
2429
2430 return 0;
6aff67c8 2431error:
61bd5218 2432 verbose(env, "cannot pass map_type %d into func %s#%d\n",
ebb676da 2433 map->map_type, func_id_name(func_id), func_id);
6aff67c8 2434 return -EINVAL;
35578d79
KX
2435}
2436
90133415 2437static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
435faee1
DB
2438{
2439 int count = 0;
2440
39f19ebb 2441 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2442 count++;
39f19ebb 2443 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2444 count++;
39f19ebb 2445 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2446 count++;
39f19ebb 2447 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2448 count++;
39f19ebb 2449 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
435faee1
DB
2450 count++;
2451
90133415
DB
2452 /* We only support one arg being in raw mode at the moment,
2453 * which is sufficient for the helper functions we have
2454 * right now.
2455 */
2456 return count <= 1;
2457}
2458
2459static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
2460 enum bpf_arg_type arg_next)
2461{
2462 return (arg_type_is_mem_ptr(arg_curr) &&
2463 !arg_type_is_mem_size(arg_next)) ||
2464 (!arg_type_is_mem_ptr(arg_curr) &&
2465 arg_type_is_mem_size(arg_next));
2466}
2467
2468static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
2469{
2470 /* bpf_xxx(..., buf, len) call will access 'len'
2471 * bytes from memory 'buf'. Both arg types need
2472 * to be paired, so make sure there's no buggy
2473 * helper function specification.
2474 */
2475 if (arg_type_is_mem_size(fn->arg1_type) ||
2476 arg_type_is_mem_ptr(fn->arg5_type) ||
2477 check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
2478 check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
2479 check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
2480 check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
2481 return false;
2482
2483 return true;
2484}
2485
fd978bf7
JS
2486static bool check_refcount_ok(const struct bpf_func_proto *fn)
2487{
2488 int count = 0;
2489
2490 if (arg_type_is_refcounted(fn->arg1_type))
2491 count++;
2492 if (arg_type_is_refcounted(fn->arg2_type))
2493 count++;
2494 if (arg_type_is_refcounted(fn->arg3_type))
2495 count++;
2496 if (arg_type_is_refcounted(fn->arg4_type))
2497 count++;
2498 if (arg_type_is_refcounted(fn->arg5_type))
2499 count++;
2500
2501 /* We only support one arg being unreferenced at the moment,
2502 * which is sufficient for the helper functions we have right now.
2503 */
2504 return count <= 1;
2505}
2506
90133415
DB
2507static int check_func_proto(const struct bpf_func_proto *fn)
2508{
2509 return check_raw_mode_ok(fn) &&
fd978bf7
JS
2510 check_arg_pair_ok(fn) &&
2511 check_refcount_ok(fn) ? 0 : -EINVAL;
435faee1
DB
2512}
2513
de8f3a83
DB
2514/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
2515 * are now invalid, so turn them into unknown SCALAR_VALUE.
f1174f77 2516 */
f4d7e40a
AS
2517static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
2518 struct bpf_func_state *state)
969bf05e 2519{
58e2af8b 2520 struct bpf_reg_state *regs = state->regs, *reg;
969bf05e
AS
2521 int i;
2522
2523 for (i = 0; i < MAX_BPF_REG; i++)
de8f3a83 2524 if (reg_is_pkt_pointer_any(&regs[i]))
61bd5218 2525 mark_reg_unknown(env, regs, i);
969bf05e 2526
f3709f69
JS
2527 bpf_for_each_spilled_reg(i, state, reg) {
2528 if (!reg)
969bf05e 2529 continue;
de8f3a83
DB
2530 if (reg_is_pkt_pointer_any(reg))
2531 __mark_reg_unknown(reg);
969bf05e
AS
2532 }
2533}
2534
f4d7e40a
AS
2535static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
2536{
2537 struct bpf_verifier_state *vstate = env->cur_state;
2538 int i;
2539
2540 for (i = 0; i <= vstate->curframe; i++)
2541 __clear_all_pkt_pointers(env, vstate->frame[i]);
2542}
2543
fd978bf7
JS
2544static void release_reg_references(struct bpf_verifier_env *env,
2545 struct bpf_func_state *state, int id)
2546{
2547 struct bpf_reg_state *regs = state->regs, *reg;
2548 int i;
2549
2550 for (i = 0; i < MAX_BPF_REG; i++)
2551 if (regs[i].id == id)
2552 mark_reg_unknown(env, regs, i);
2553
2554 bpf_for_each_spilled_reg(i, state, reg) {
2555 if (!reg)
2556 continue;
2557 if (reg_is_refcounted(reg) && reg->id == id)
2558 __mark_reg_unknown(reg);
2559 }
2560}
2561
2562/* The pointer with the specified id has released its reference to kernel
2563 * resources. Identify all copies of the same pointer and clear the reference.
2564 */
2565static int release_reference(struct bpf_verifier_env *env,
2566 struct bpf_call_arg_meta *meta)
2567{
2568 struct bpf_verifier_state *vstate = env->cur_state;
2569 int i;
2570
2571 for (i = 0; i <= vstate->curframe; i++)
2572 release_reg_references(env, vstate->frame[i], meta->ptr_id);
2573
2574 return release_reference_state(env, meta->ptr_id);
2575}
2576
f4d7e40a
AS
2577static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
2578 int *insn_idx)
2579{
2580 struct bpf_verifier_state *state = env->cur_state;
2581 struct bpf_func_state *caller, *callee;
fd978bf7 2582 int i, err, subprog, target_insn;
f4d7e40a 2583
aada9ce6 2584 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
f4d7e40a 2585 verbose(env, "the call stack of %d frames is too deep\n",
aada9ce6 2586 state->curframe + 2);
f4d7e40a
AS
2587 return -E2BIG;
2588 }
2589
2590 target_insn = *insn_idx + insn->imm;
2591 subprog = find_subprog(env, target_insn + 1);
2592 if (subprog < 0) {
2593 verbose(env, "verifier bug. No program starts at insn %d\n",
2594 target_insn + 1);
2595 return -EFAULT;
2596 }
2597
2598 caller = state->frame[state->curframe];
2599 if (state->frame[state->curframe + 1]) {
2600 verbose(env, "verifier bug. Frame %d already allocated\n",
2601 state->curframe + 1);
2602 return -EFAULT;
2603 }
2604
2605 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
2606 if (!callee)
2607 return -ENOMEM;
2608 state->frame[state->curframe + 1] = callee;
2609
2610 /* callee cannot access r0, r6 - r9 for reading and has to write
2611 * into its own stack before reading from it.
2612 * callee can read/write into caller's stack
2613 */
2614 init_func_state(env, callee,
2615 /* remember the callsite, it will be used by bpf_exit */
2616 *insn_idx /* callsite */,
2617 state->curframe + 1 /* frameno within this callchain */,
f910cefa 2618 subprog /* subprog number within this prog */);
f4d7e40a 2619
fd978bf7
JS
2620 /* Transfer references to the callee */
2621 err = transfer_reference_state(callee, caller);
2622 if (err)
2623 return err;
2624
679c782d
EC
2625 /* copy r1 - r5 args that callee can access. The copy includes parent
2626 * pointers, which connects us up to the liveness chain
2627 */
f4d7e40a
AS
2628 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
2629 callee->regs[i] = caller->regs[i];
2630
679c782d 2631 /* after the call registers r0 - r5 were scratched */
f4d7e40a
AS
2632 for (i = 0; i < CALLER_SAVED_REGS; i++) {
2633 mark_reg_not_init(env, caller->regs, caller_saved[i]);
2634 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2635 }
2636
2637 /* only increment it after check_reg_arg() finished */
2638 state->curframe++;
2639
2640 /* and go analyze first insn of the callee */
2641 *insn_idx = target_insn;
2642
2643 if (env->log.level) {
2644 verbose(env, "caller:\n");
2645 print_verifier_state(env, caller);
2646 verbose(env, "callee:\n");
2647 print_verifier_state(env, callee);
2648 }
2649 return 0;
2650}
2651
2652static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
2653{
2654 struct bpf_verifier_state *state = env->cur_state;
2655 struct bpf_func_state *caller, *callee;
2656 struct bpf_reg_state *r0;
fd978bf7 2657 int err;
f4d7e40a
AS
2658
2659 callee = state->frame[state->curframe];
2660 r0 = &callee->regs[BPF_REG_0];
2661 if (r0->type == PTR_TO_STACK) {
2662 /* technically it's ok to return caller's stack pointer
2663 * (or caller's caller's pointer) back to the caller,
2664 * since these pointers are valid. Only current stack
2665 * pointer will be invalid as soon as function exits,
2666 * but let's be conservative
2667 */
2668 verbose(env, "cannot return stack pointer to the caller\n");
2669 return -EINVAL;
2670 }
2671
2672 state->curframe--;
2673 caller = state->frame[state->curframe];
2674 /* return to the caller whatever r0 had in the callee */
2675 caller->regs[BPF_REG_0] = *r0;
2676
fd978bf7
JS
2677 /* Transfer references to the caller */
2678 err = transfer_reference_state(caller, callee);
2679 if (err)
2680 return err;
2681
f4d7e40a
AS
2682 *insn_idx = callee->callsite + 1;
2683 if (env->log.level) {
2684 verbose(env, "returning from callee:\n");
2685 print_verifier_state(env, callee);
2686 verbose(env, "to caller at %d:\n", *insn_idx);
2687 print_verifier_state(env, caller);
2688 }
2689 /* clear everything in the callee */
2690 free_func_state(callee);
2691 state->frame[state->curframe + 1] = NULL;
2692 return 0;
2693}
2694
849fa506
YS
2695static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
2696 int func_id,
2697 struct bpf_call_arg_meta *meta)
2698{
2699 struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
2700
2701 if (ret_type != RET_INTEGER ||
2702 (func_id != BPF_FUNC_get_stack &&
2703 func_id != BPF_FUNC_probe_read_str))
2704 return;
2705
2706 ret_reg->smax_value = meta->msize_smax_value;
2707 ret_reg->umax_value = meta->msize_umax_value;
2708 __reg_deduce_bounds(ret_reg);
2709 __reg_bound_offset(ret_reg);
2710}
2711
c93552c4
DB
2712static int
2713record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
2714 int func_id, int insn_idx)
2715{
2716 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
2717
2718 if (func_id != BPF_FUNC_tail_call &&
09772d92
DB
2719 func_id != BPF_FUNC_map_lookup_elem &&
2720 func_id != BPF_FUNC_map_update_elem &&
f1a2e44a
MV
2721 func_id != BPF_FUNC_map_delete_elem &&
2722 func_id != BPF_FUNC_map_push_elem &&
2723 func_id != BPF_FUNC_map_pop_elem &&
2724 func_id != BPF_FUNC_map_peek_elem)
c93552c4 2725 return 0;
09772d92 2726
c93552c4
DB
2727 if (meta->map_ptr == NULL) {
2728 verbose(env, "kernel subsystem misconfigured verifier\n");
2729 return -EINVAL;
2730 }
2731
2732 if (!BPF_MAP_PTR(aux->map_state))
2733 bpf_map_ptr_store(aux, meta->map_ptr,
2734 meta->map_ptr->unpriv_array);
2735 else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
2736 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
2737 meta->map_ptr->unpriv_array);
2738 return 0;
2739}
2740
fd978bf7
JS
2741static int check_reference_leak(struct bpf_verifier_env *env)
2742{
2743 struct bpf_func_state *state = cur_func(env);
2744 int i;
2745
2746 for (i = 0; i < state->acquired_refs; i++) {
2747 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
2748 state->refs[i].id, state->refs[i].insn_idx);
2749 }
2750 return state->acquired_refs ? -EINVAL : 0;
2751}
2752
f4d7e40a 2753static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
17a52670 2754{
17a52670 2755 const struct bpf_func_proto *fn = NULL;
638f5b90 2756 struct bpf_reg_state *regs;
33ff9823 2757 struct bpf_call_arg_meta meta;
969bf05e 2758 bool changes_data;
17a52670
AS
2759 int i, err;
2760
2761 /* find function prototype */
2762 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
61bd5218
JK
2763 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
2764 func_id);
17a52670
AS
2765 return -EINVAL;
2766 }
2767
00176a34 2768 if (env->ops->get_func_proto)
5e43f899 2769 fn = env->ops->get_func_proto(func_id, env->prog);
17a52670 2770 if (!fn) {
61bd5218
JK
2771 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
2772 func_id);
17a52670
AS
2773 return -EINVAL;
2774 }
2775
2776 /* eBPF programs must be GPL compatible to use GPL-ed functions */
24701ece 2777 if (!env->prog->gpl_compatible && fn->gpl_only) {
3fe2867c 2778 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
17a52670
AS
2779 return -EINVAL;
2780 }
2781
04514d13 2782 /* With LD_ABS/IND some JITs save/restore skb from r1. */
17bedab2 2783 changes_data = bpf_helper_changes_pkt_data(fn->func);
04514d13
DB
2784 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
2785 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
2786 func_id_name(func_id), func_id);
2787 return -EINVAL;
2788 }
969bf05e 2789
33ff9823 2790 memset(&meta, 0, sizeof(meta));
36bbef52 2791 meta.pkt_access = fn->pkt_access;
33ff9823 2792
90133415 2793 err = check_func_proto(fn);
435faee1 2794 if (err) {
61bd5218 2795 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
ebb676da 2796 func_id_name(func_id), func_id);
435faee1
DB
2797 return err;
2798 }
2799
17a52670 2800 /* check args */
33ff9823 2801 err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
17a52670
AS
2802 if (err)
2803 return err;
33ff9823 2804 err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
17a52670
AS
2805 if (err)
2806 return err;
33ff9823 2807 err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
17a52670
AS
2808 if (err)
2809 return err;
33ff9823 2810 err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
17a52670
AS
2811 if (err)
2812 return err;
33ff9823 2813 err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
17a52670
AS
2814 if (err)
2815 return err;
2816
c93552c4
DB
2817 err = record_func_map(env, &meta, func_id, insn_idx);
2818 if (err)
2819 return err;
2820
435faee1
DB
2821 /* Mark slots with STACK_MISC in case of raw mode, stack offset
2822 * is inferred from register state.
2823 */
2824 for (i = 0; i < meta.access_size; i++) {
ca369602
DB
2825 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
2826 BPF_WRITE, -1, false);
435faee1
DB
2827 if (err)
2828 return err;
2829 }
2830
fd978bf7
JS
2831 if (func_id == BPF_FUNC_tail_call) {
2832 err = check_reference_leak(env);
2833 if (err) {
2834 verbose(env, "tail_call would lead to reference leak\n");
2835 return err;
2836 }
2837 } else if (is_release_function(func_id)) {
2838 err = release_reference(env, &meta);
2839 if (err)
2840 return err;
2841 }
2842
638f5b90 2843 regs = cur_regs(env);
cd339431
RG
2844
2845 /* check that flags argument in get_local_storage(map, flags) is 0,
2846 * this is required because get_local_storage() can't return an error.
2847 */
2848 if (func_id == BPF_FUNC_get_local_storage &&
2849 !register_is_null(&regs[BPF_REG_2])) {
2850 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
2851 return -EINVAL;
2852 }
2853
17a52670 2854 /* reset caller saved regs */
dc503a8a 2855 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 2856 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
2857 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2858 }
17a52670 2859
dc503a8a 2860 /* update return register (already marked as written above) */
17a52670 2861 if (fn->ret_type == RET_INTEGER) {
f1174f77 2862 /* sets type to SCALAR_VALUE */
61bd5218 2863 mark_reg_unknown(env, regs, BPF_REG_0);
17a52670
AS
2864 } else if (fn->ret_type == RET_VOID) {
2865 regs[BPF_REG_0].type = NOT_INIT;
3e6a4b3e
RG
2866 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
2867 fn->ret_type == RET_PTR_TO_MAP_VALUE) {
f1174f77 2868 /* There is no offset yet applied, variable or fixed */
61bd5218 2869 mark_reg_known_zero(env, regs, BPF_REG_0);
17a52670
AS
2870 /* remember map_ptr, so that check_map_access()
2871 * can check 'value_size' boundary of memory access
2872 * to map element returned from bpf_map_lookup_elem()
2873 */
33ff9823 2874 if (meta.map_ptr == NULL) {
61bd5218
JK
2875 verbose(env,
2876 "kernel subsystem misconfigured verifier\n");
17a52670
AS
2877 return -EINVAL;
2878 }
33ff9823 2879 regs[BPF_REG_0].map_ptr = meta.map_ptr;
4d31f301
DB
2880 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
2881 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
2882 } else {
2883 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
2884 regs[BPF_REG_0].id = ++env->id_gen;
2885 }
c64b7983 2886 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
fd978bf7
JS
2887 int id = acquire_reference_state(env, insn_idx);
2888 if (id < 0)
2889 return id;
c64b7983
JS
2890 mark_reg_known_zero(env, regs, BPF_REG_0);
2891 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
fd978bf7 2892 regs[BPF_REG_0].id = id;
17a52670 2893 } else {
61bd5218 2894 verbose(env, "unknown return type %d of func %s#%d\n",
ebb676da 2895 fn->ret_type, func_id_name(func_id), func_id);
17a52670
AS
2896 return -EINVAL;
2897 }
04fd61ab 2898
849fa506
YS
2899 do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
2900
61bd5218 2901 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
35578d79
KX
2902 if (err)
2903 return err;
04fd61ab 2904
c195651e
YS
2905 if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
2906 const char *err_str;
2907
2908#ifdef CONFIG_PERF_EVENTS
2909 err = get_callchain_buffers(sysctl_perf_event_max_stack);
2910 err_str = "cannot get callchain buffer for func %s#%d\n";
2911#else
2912 err = -ENOTSUPP;
2913 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
2914#endif
2915 if (err) {
2916 verbose(env, err_str, func_id_name(func_id), func_id);
2917 return err;
2918 }
2919
2920 env->prog->has_callchain_buf = true;
2921 }
2922
969bf05e
AS
2923 if (changes_data)
2924 clear_all_pkt_pointers(env);
2925 return 0;
2926}
2927
b03c9f9f
EC
2928static bool signed_add_overflows(s64 a, s64 b)
2929{
2930 /* Do the add in u64, where overflow is well-defined */
2931 s64 res = (s64)((u64)a + (u64)b);
2932
2933 if (b < 0)
2934 return res > a;
2935 return res < a;
2936}
2937
2938static bool signed_sub_overflows(s64 a, s64 b)
2939{
2940 /* Do the sub in u64, where overflow is well-defined */
2941 s64 res = (s64)((u64)a - (u64)b);
2942
2943 if (b < 0)
2944 return res < a;
2945 return res > a;
969bf05e
AS
2946}
2947
bb7f0f98
AS
2948static bool check_reg_sane_offset(struct bpf_verifier_env *env,
2949 const struct bpf_reg_state *reg,
2950 enum bpf_reg_type type)
2951{
2952 bool known = tnum_is_const(reg->var_off);
2953 s64 val = reg->var_off.value;
2954 s64 smin = reg->smin_value;
2955
2956 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
2957 verbose(env, "math between %s pointer and %lld is not allowed\n",
2958 reg_type_str[type], val);
2959 return false;
2960 }
2961
2962 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
2963 verbose(env, "%s pointer offset %d is not allowed\n",
2964 reg_type_str[type], reg->off);
2965 return false;
2966 }
2967
2968 if (smin == S64_MIN) {
2969 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
2970 reg_type_str[type]);
2971 return false;
2972 }
2973
2974 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
2975 verbose(env, "value %lld makes %s pointer be out of bounds\n",
2976 smin, reg_type_str[type]);
2977 return false;
2978 }
2979
2980 return true;
2981}
2982
f1174f77 2983/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
f1174f77
EC
2984 * Caller should also handle BPF_MOV case separately.
2985 * If we return -EACCES, caller may want to try again treating pointer as a
2986 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
2987 */
2988static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
2989 struct bpf_insn *insn,
2990 const struct bpf_reg_state *ptr_reg,
2991 const struct bpf_reg_state *off_reg)
969bf05e 2992{
f4d7e40a
AS
2993 struct bpf_verifier_state *vstate = env->cur_state;
2994 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2995 struct bpf_reg_state *regs = state->regs, *dst_reg;
f1174f77 2996 bool known = tnum_is_const(off_reg->var_off);
b03c9f9f
EC
2997 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
2998 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
2999 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
3000 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
969bf05e 3001 u8 opcode = BPF_OP(insn->code);
f1174f77 3002 u32 dst = insn->dst_reg;
969bf05e 3003
f1174f77 3004 dst_reg = &regs[dst];
969bf05e 3005
6f16101e
DB
3006 if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
3007 smin_val > smax_val || umin_val > umax_val) {
3008 /* Taint dst register if offset had invalid bounds derived from
3009 * e.g. dead branches.
3010 */
3011 __mark_reg_unknown(dst_reg);
3012 return 0;
f1174f77
EC
3013 }
3014
3015 if (BPF_CLASS(insn->code) != BPF_ALU64) {
3016 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
82abbf8d
AS
3017 verbose(env,
3018 "R%d 32-bit pointer arithmetic prohibited\n",
3019 dst);
f1174f77 3020 return -EACCES;
969bf05e
AS
3021 }
3022
aad2eeaf
JS
3023 switch (ptr_reg->type) {
3024 case PTR_TO_MAP_VALUE_OR_NULL:
3025 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
3026 dst, reg_type_str[ptr_reg->type]);
f1174f77 3027 return -EACCES;
aad2eeaf
JS
3028 case CONST_PTR_TO_MAP:
3029 case PTR_TO_PACKET_END:
c64b7983
JS
3030 case PTR_TO_SOCKET:
3031 case PTR_TO_SOCKET_OR_NULL:
aad2eeaf
JS
3032 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
3033 dst, reg_type_str[ptr_reg->type]);
f1174f77 3034 return -EACCES;
aad2eeaf
JS
3035 default:
3036 break;
f1174f77
EC
3037 }
3038
3039 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
3040 * The id may be overwritten later if we create a new variable offset.
969bf05e 3041 */
f1174f77
EC
3042 dst_reg->type = ptr_reg->type;
3043 dst_reg->id = ptr_reg->id;
969bf05e 3044
bb7f0f98
AS
3045 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
3046 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
3047 return -EINVAL;
3048
f1174f77
EC
3049 switch (opcode) {
3050 case BPF_ADD:
3051 /* We can take a fixed offset as long as it doesn't overflow
3052 * the s32 'off' field
969bf05e 3053 */
b03c9f9f
EC
3054 if (known && (ptr_reg->off + smin_val ==
3055 (s64)(s32)(ptr_reg->off + smin_val))) {
f1174f77 3056 /* pointer += K. Accumulate it into fixed offset */
b03c9f9f
EC
3057 dst_reg->smin_value = smin_ptr;
3058 dst_reg->smax_value = smax_ptr;
3059 dst_reg->umin_value = umin_ptr;
3060 dst_reg->umax_value = umax_ptr;
f1174f77 3061 dst_reg->var_off = ptr_reg->var_off;
b03c9f9f 3062 dst_reg->off = ptr_reg->off + smin_val;
0962590e 3063 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
3064 break;
3065 }
f1174f77
EC
3066 /* A new variable offset is created. Note that off_reg->off
3067 * == 0, since it's a scalar.
3068 * dst_reg gets the pointer type and since some positive
3069 * integer value was added to the pointer, give it a new 'id'
3070 * if it's a PTR_TO_PACKET.
3071 * this creates a new 'base' pointer, off_reg (variable) gets
3072 * added into the variable offset, and we copy the fixed offset
3073 * from ptr_reg.
969bf05e 3074 */
b03c9f9f
EC
3075 if (signed_add_overflows(smin_ptr, smin_val) ||
3076 signed_add_overflows(smax_ptr, smax_val)) {
3077 dst_reg->smin_value = S64_MIN;
3078 dst_reg->smax_value = S64_MAX;
3079 } else {
3080 dst_reg->smin_value = smin_ptr + smin_val;
3081 dst_reg->smax_value = smax_ptr + smax_val;
3082 }
3083 if (umin_ptr + umin_val < umin_ptr ||
3084 umax_ptr + umax_val < umax_ptr) {
3085 dst_reg->umin_value = 0;
3086 dst_reg->umax_value = U64_MAX;
3087 } else {
3088 dst_reg->umin_value = umin_ptr + umin_val;
3089 dst_reg->umax_value = umax_ptr + umax_val;
3090 }
f1174f77
EC
3091 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
3092 dst_reg->off = ptr_reg->off;
0962590e 3093 dst_reg->raw = ptr_reg->raw;
de8f3a83 3094 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
3095 dst_reg->id = ++env->id_gen;
3096 /* something was added to pkt_ptr, set range to zero */
0962590e 3097 dst_reg->raw = 0;
f1174f77
EC
3098 }
3099 break;
3100 case BPF_SUB:
3101 if (dst_reg == off_reg) {
3102 /* scalar -= pointer. Creates an unknown scalar */
82abbf8d
AS
3103 verbose(env, "R%d tried to subtract pointer from scalar\n",
3104 dst);
f1174f77
EC
3105 return -EACCES;
3106 }
3107 /* We don't allow subtraction from FP, because (according to
3108 * test_verifier.c test "invalid fp arithmetic", JITs might not
3109 * be able to deal with it.
969bf05e 3110 */
f1174f77 3111 if (ptr_reg->type == PTR_TO_STACK) {
82abbf8d
AS
3112 verbose(env, "R%d subtraction from stack pointer prohibited\n",
3113 dst);
f1174f77
EC
3114 return -EACCES;
3115 }
b03c9f9f
EC
3116 if (known && (ptr_reg->off - smin_val ==
3117 (s64)(s32)(ptr_reg->off - smin_val))) {
f1174f77 3118 /* pointer -= K. Subtract it from fixed offset */
b03c9f9f
EC
3119 dst_reg->smin_value = smin_ptr;
3120 dst_reg->smax_value = smax_ptr;
3121 dst_reg->umin_value = umin_ptr;
3122 dst_reg->umax_value = umax_ptr;
f1174f77
EC
3123 dst_reg->var_off = ptr_reg->var_off;
3124 dst_reg->id = ptr_reg->id;
b03c9f9f 3125 dst_reg->off = ptr_reg->off - smin_val;
0962590e 3126 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
3127 break;
3128 }
f1174f77
EC
3129 /* A new variable offset is created. If the subtrahend is known
3130 * nonnegative, then any reg->range we had before is still good.
969bf05e 3131 */
b03c9f9f
EC
3132 if (signed_sub_overflows(smin_ptr, smax_val) ||
3133 signed_sub_overflows(smax_ptr, smin_val)) {
3134 /* Overflow possible, we know nothing */
3135 dst_reg->smin_value = S64_MIN;
3136 dst_reg->smax_value = S64_MAX;
3137 } else {
3138 dst_reg->smin_value = smin_ptr - smax_val;
3139 dst_reg->smax_value = smax_ptr - smin_val;
3140 }
3141 if (umin_ptr < umax_val) {
3142 /* Overflow possible, we know nothing */
3143 dst_reg->umin_value = 0;
3144 dst_reg->umax_value = U64_MAX;
3145 } else {
3146 /* Cannot overflow (as long as bounds are consistent) */
3147 dst_reg->umin_value = umin_ptr - umax_val;
3148 dst_reg->umax_value = umax_ptr - umin_val;
3149 }
f1174f77
EC
3150 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
3151 dst_reg->off = ptr_reg->off;
0962590e 3152 dst_reg->raw = ptr_reg->raw;
de8f3a83 3153 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
3154 dst_reg->id = ++env->id_gen;
3155 /* something was added to pkt_ptr, set range to zero */
b03c9f9f 3156 if (smin_val < 0)
0962590e 3157 dst_reg->raw = 0;
43188702 3158 }
f1174f77
EC
3159 break;
3160 case BPF_AND:
3161 case BPF_OR:
3162 case BPF_XOR:
82abbf8d
AS
3163 /* bitwise ops on pointers are troublesome, prohibit. */
3164 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
3165 dst, bpf_alu_string[opcode >> 4]);
f1174f77
EC
3166 return -EACCES;
3167 default:
3168 /* other operators (e.g. MUL,LSH) produce non-pointer results */
82abbf8d
AS
3169 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
3170 dst, bpf_alu_string[opcode >> 4]);
f1174f77 3171 return -EACCES;
43188702
JF
3172 }
3173
bb7f0f98
AS
3174 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
3175 return -EINVAL;
3176
b03c9f9f
EC
3177 __update_reg_bounds(dst_reg);
3178 __reg_deduce_bounds(dst_reg);
3179 __reg_bound_offset(dst_reg);
43188702
JF
3180 return 0;
3181}
3182
468f6eaf
JH
3183/* WARNING: This function does calculations on 64-bit values, but the actual
3184 * execution may occur on 32-bit values. Therefore, things like bitshifts
3185 * need extra checks in the 32-bit case.
3186 */
f1174f77
EC
3187static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
3188 struct bpf_insn *insn,
3189 struct bpf_reg_state *dst_reg,
3190 struct bpf_reg_state src_reg)
969bf05e 3191{
638f5b90 3192 struct bpf_reg_state *regs = cur_regs(env);
48461135 3193 u8 opcode = BPF_OP(insn->code);
f1174f77 3194 bool src_known, dst_known;
b03c9f9f
EC
3195 s64 smin_val, smax_val;
3196 u64 umin_val, umax_val;
468f6eaf 3197 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
48461135 3198
b799207e
JH
3199 if (insn_bitness == 32) {
3200 /* Relevant for 32-bit RSH: Information can propagate towards
3201 * LSB, so it isn't sufficient to only truncate the output to
3202 * 32 bits.
3203 */
3204 coerce_reg_to_size(dst_reg, 4);
3205 coerce_reg_to_size(&src_reg, 4);
3206 }
3207
b03c9f9f
EC
3208 smin_val = src_reg.smin_value;
3209 smax_val = src_reg.smax_value;
3210 umin_val = src_reg.umin_value;
3211 umax_val = src_reg.umax_value;
f1174f77
EC
3212 src_known = tnum_is_const(src_reg.var_off);
3213 dst_known = tnum_is_const(dst_reg->var_off);
f23cc643 3214
6f16101e
DB
3215 if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
3216 smin_val > smax_val || umin_val > umax_val) {
3217 /* Taint dst register if offset had invalid bounds derived from
3218 * e.g. dead branches.
3219 */
3220 __mark_reg_unknown(dst_reg);
3221 return 0;
3222 }
3223
bb7f0f98
AS
3224 if (!src_known &&
3225 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
3226 __mark_reg_unknown(dst_reg);
3227 return 0;
3228 }
3229
48461135
JB
3230 switch (opcode) {
3231 case BPF_ADD:
b03c9f9f
EC
3232 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
3233 signed_add_overflows(dst_reg->smax_value, smax_val)) {
3234 dst_reg->smin_value = S64_MIN;
3235 dst_reg->smax_value = S64_MAX;
3236 } else {
3237 dst_reg->smin_value += smin_val;
3238 dst_reg->smax_value += smax_val;
3239 }
3240 if (dst_reg->umin_value + umin_val < umin_val ||
3241 dst_reg->umax_value + umax_val < umax_val) {
3242 dst_reg->umin_value = 0;
3243 dst_reg->umax_value = U64_MAX;
3244 } else {
3245 dst_reg->umin_value += umin_val;
3246 dst_reg->umax_value += umax_val;
3247 }
f1174f77 3248 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
48461135
JB
3249 break;
3250 case BPF_SUB:
b03c9f9f
EC
3251 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
3252 signed_sub_overflows(dst_reg->smax_value, smin_val)) {
3253 /* Overflow possible, we know nothing */
3254 dst_reg->smin_value = S64_MIN;
3255 dst_reg->smax_value = S64_MAX;
3256 } else {
3257 dst_reg->smin_value -= smax_val;
3258 dst_reg->smax_value -= smin_val;
3259 }
3260 if (dst_reg->umin_value < umax_val) {
3261 /* Overflow possible, we know nothing */
3262 dst_reg->umin_value = 0;
3263 dst_reg->umax_value = U64_MAX;
3264 } else {
3265 /* Cannot overflow (as long as bounds are consistent) */
3266 dst_reg->umin_value -= umax_val;
3267 dst_reg->umax_value -= umin_val;
3268 }
f1174f77 3269 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
48461135
JB
3270 break;
3271 case BPF_MUL:
b03c9f9f
EC
3272 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
3273 if (smin_val < 0 || dst_reg->smin_value < 0) {
f1174f77 3274 /* Ain't nobody got time to multiply that sign */
b03c9f9f
EC
3275 __mark_reg_unbounded(dst_reg);
3276 __update_reg_bounds(dst_reg);
f1174f77
EC
3277 break;
3278 }
b03c9f9f
EC
3279 /* Both values are positive, so we can work with unsigned and
3280 * copy the result to signed (unless it exceeds S64_MAX).
f1174f77 3281 */
b03c9f9f
EC
3282 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
3283 /* Potential overflow, we know nothing */
3284 __mark_reg_unbounded(dst_reg);
3285 /* (except what we can learn from the var_off) */
3286 __update_reg_bounds(dst_reg);
3287 break;
3288 }
3289 dst_reg->umin_value *= umin_val;
3290 dst_reg->umax_value *= umax_val;
3291 if (dst_reg->umax_value > S64_MAX) {
3292 /* Overflow possible, we know nothing */
3293 dst_reg->smin_value = S64_MIN;
3294 dst_reg->smax_value = S64_MAX;
3295 } else {
3296 dst_reg->smin_value = dst_reg->umin_value;
3297 dst_reg->smax_value = dst_reg->umax_value;
3298 }
48461135
JB
3299 break;
3300 case BPF_AND:
f1174f77 3301 if (src_known && dst_known) {
b03c9f9f
EC
3302 __mark_reg_known(dst_reg, dst_reg->var_off.value &
3303 src_reg.var_off.value);
f1174f77
EC
3304 break;
3305 }
b03c9f9f
EC
3306 /* We get our minimum from the var_off, since that's inherently
3307 * bitwise. Our maximum is the minimum of the operands' maxima.
f23cc643 3308 */
f1174f77 3309 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
b03c9f9f
EC
3310 dst_reg->umin_value = dst_reg->var_off.value;
3311 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
3312 if (dst_reg->smin_value < 0 || smin_val < 0) {
3313 /* Lose signed bounds when ANDing negative numbers,
3314 * ain't nobody got time for that.
3315 */
3316 dst_reg->smin_value = S64_MIN;
3317 dst_reg->smax_value = S64_MAX;
3318 } else {
3319 /* ANDing two positives gives a positive, so safe to
3320 * cast result into s64.
3321 */
3322 dst_reg->smin_value = dst_reg->umin_value;
3323 dst_reg->smax_value = dst_reg->umax_value;
3324 }
3325 /* We may learn something more from the var_off */
3326 __update_reg_bounds(dst_reg);
f1174f77
EC
3327 break;
3328 case BPF_OR:
3329 if (src_known && dst_known) {
b03c9f9f
EC
3330 __mark_reg_known(dst_reg, dst_reg->var_off.value |
3331 src_reg.var_off.value);
f1174f77
EC
3332 break;
3333 }
b03c9f9f
EC
3334 /* We get our maximum from the var_off, and our minimum is the
3335 * maximum of the operands' minima
f1174f77
EC
3336 */
3337 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
b03c9f9f
EC
3338 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
3339 dst_reg->umax_value = dst_reg->var_off.value |
3340 dst_reg->var_off.mask;
3341 if (dst_reg->smin_value < 0 || smin_val < 0) {
3342 /* Lose signed bounds when ORing negative numbers,
3343 * ain't nobody got time for that.
3344 */
3345 dst_reg->smin_value = S64_MIN;
3346 dst_reg->smax_value = S64_MAX;
f1174f77 3347 } else {
b03c9f9f
EC
3348 /* ORing two positives gives a positive, so safe to
3349 * cast result into s64.
3350 */
3351 dst_reg->smin_value = dst_reg->umin_value;
3352 dst_reg->smax_value = dst_reg->umax_value;
f1174f77 3353 }
b03c9f9f
EC
3354 /* We may learn something more from the var_off */
3355 __update_reg_bounds(dst_reg);
48461135
JB
3356 break;
3357 case BPF_LSH:
468f6eaf
JH
3358 if (umax_val >= insn_bitness) {
3359 /* Shifts greater than 31 or 63 are undefined.
3360 * This includes shifts by a negative number.
b03c9f9f 3361 */
61bd5218 3362 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
3363 break;
3364 }
b03c9f9f
EC
3365 /* We lose all sign bit information (except what we can pick
3366 * up from var_off)
48461135 3367 */
b03c9f9f
EC
3368 dst_reg->smin_value = S64_MIN;
3369 dst_reg->smax_value = S64_MAX;
3370 /* If we might shift our top bit out, then we know nothing */
3371 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
3372 dst_reg->umin_value = 0;
3373 dst_reg->umax_value = U64_MAX;
d1174416 3374 } else {
b03c9f9f
EC
3375 dst_reg->umin_value <<= umin_val;
3376 dst_reg->umax_value <<= umax_val;
d1174416 3377 }
afbe1a5b 3378 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
b03c9f9f
EC
3379 /* We may learn something more from the var_off */
3380 __update_reg_bounds(dst_reg);
48461135
JB
3381 break;
3382 case BPF_RSH:
468f6eaf
JH
3383 if (umax_val >= insn_bitness) {
3384 /* Shifts greater than 31 or 63 are undefined.
3385 * This includes shifts by a negative number.
b03c9f9f 3386 */
61bd5218 3387 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
3388 break;
3389 }
4374f256
EC
3390 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
3391 * be negative, then either:
3392 * 1) src_reg might be zero, so the sign bit of the result is
3393 * unknown, so we lose our signed bounds
3394 * 2) it's known negative, thus the unsigned bounds capture the
3395 * signed bounds
3396 * 3) the signed bounds cross zero, so they tell us nothing
3397 * about the result
3398 * If the value in dst_reg is known nonnegative, then again the
3399 * unsigned bounts capture the signed bounds.
3400 * Thus, in all cases it suffices to blow away our signed bounds
3401 * and rely on inferring new ones from the unsigned bounds and
3402 * var_off of the result.
3403 */
3404 dst_reg->smin_value = S64_MIN;
3405 dst_reg->smax_value = S64_MAX;
afbe1a5b 3406 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
b03c9f9f
EC
3407 dst_reg->umin_value >>= umax_val;
3408 dst_reg->umax_value >>= umin_val;
3409 /* We may learn something more from the var_off */
3410 __update_reg_bounds(dst_reg);
48461135 3411 break;
9cbe1f5a
YS
3412 case BPF_ARSH:
3413 if (umax_val >= insn_bitness) {
3414 /* Shifts greater than 31 or 63 are undefined.
3415 * This includes shifts by a negative number.
3416 */
3417 mark_reg_unknown(env, regs, insn->dst_reg);
3418 break;
3419 }
3420
3421 /* Upon reaching here, src_known is true and
3422 * umax_val is equal to umin_val.
3423 */
3424 dst_reg->smin_value >>= umin_val;
3425 dst_reg->smax_value >>= umin_val;
3426 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
3427
3428 /* blow away the dst_reg umin_value/umax_value and rely on
3429 * dst_reg var_off to refine the result.
3430 */
3431 dst_reg->umin_value = 0;
3432 dst_reg->umax_value = U64_MAX;
3433 __update_reg_bounds(dst_reg);
3434 break;
48461135 3435 default:
61bd5218 3436 mark_reg_unknown(env, regs, insn->dst_reg);
48461135
JB
3437 break;
3438 }
3439
468f6eaf
JH
3440 if (BPF_CLASS(insn->code) != BPF_ALU64) {
3441 /* 32-bit ALU ops are (32,32)->32 */
3442 coerce_reg_to_size(dst_reg, 4);
468f6eaf
JH
3443 }
3444
b03c9f9f
EC
3445 __reg_deduce_bounds(dst_reg);
3446 __reg_bound_offset(dst_reg);
f1174f77
EC
3447 return 0;
3448}
3449
3450/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
3451 * and var_off.
3452 */
3453static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
3454 struct bpf_insn *insn)
3455{
f4d7e40a
AS
3456 struct bpf_verifier_state *vstate = env->cur_state;
3457 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3458 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
f1174f77
EC
3459 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
3460 u8 opcode = BPF_OP(insn->code);
f1174f77
EC
3461
3462 dst_reg = &regs[insn->dst_reg];
f1174f77
EC
3463 src_reg = NULL;
3464 if (dst_reg->type != SCALAR_VALUE)
3465 ptr_reg = dst_reg;
3466 if (BPF_SRC(insn->code) == BPF_X) {
3467 src_reg = &regs[insn->src_reg];
f1174f77
EC
3468 if (src_reg->type != SCALAR_VALUE) {
3469 if (dst_reg->type != SCALAR_VALUE) {
3470 /* Combining two pointers by any ALU op yields
82abbf8d
AS
3471 * an arbitrary scalar. Disallow all math except
3472 * pointer subtraction
f1174f77 3473 */
dd066823 3474 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
82abbf8d
AS
3475 mark_reg_unknown(env, regs, insn->dst_reg);
3476 return 0;
f1174f77 3477 }
82abbf8d
AS
3478 verbose(env, "R%d pointer %s pointer prohibited\n",
3479 insn->dst_reg,
3480 bpf_alu_string[opcode >> 4]);
3481 return -EACCES;
f1174f77
EC
3482 } else {
3483 /* scalar += pointer
3484 * This is legal, but we have to reverse our
3485 * src/dest handling in computing the range
3486 */
82abbf8d
AS
3487 return adjust_ptr_min_max_vals(env, insn,
3488 src_reg, dst_reg);
f1174f77
EC
3489 }
3490 } else if (ptr_reg) {
3491 /* pointer += scalar */
82abbf8d
AS
3492 return adjust_ptr_min_max_vals(env, insn,
3493 dst_reg, src_reg);
f1174f77
EC
3494 }
3495 } else {
3496 /* Pretend the src is a reg with a known value, since we only
3497 * need to be able to read from this state.
3498 */
3499 off_reg.type = SCALAR_VALUE;
b03c9f9f 3500 __mark_reg_known(&off_reg, insn->imm);
f1174f77 3501 src_reg = &off_reg;
82abbf8d
AS
3502 if (ptr_reg) /* pointer += K */
3503 return adjust_ptr_min_max_vals(env, insn,
3504 ptr_reg, src_reg);
f1174f77
EC
3505 }
3506
3507 /* Got here implies adding two SCALAR_VALUEs */
3508 if (WARN_ON_ONCE(ptr_reg)) {
f4d7e40a 3509 print_verifier_state(env, state);
61bd5218 3510 verbose(env, "verifier internal error: unexpected ptr_reg\n");
f1174f77
EC
3511 return -EINVAL;
3512 }
3513 if (WARN_ON(!src_reg)) {
f4d7e40a 3514 print_verifier_state(env, state);
61bd5218 3515 verbose(env, "verifier internal error: no src_reg\n");
f1174f77
EC
3516 return -EINVAL;
3517 }
3518 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
48461135
JB
3519}
3520
17a52670 3521/* check validity of 32-bit and 64-bit arithmetic operations */
58e2af8b 3522static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 3523{
638f5b90 3524 struct bpf_reg_state *regs = cur_regs(env);
17a52670
AS
3525 u8 opcode = BPF_OP(insn->code);
3526 int err;
3527
3528 if (opcode == BPF_END || opcode == BPF_NEG) {
3529 if (opcode == BPF_NEG) {
3530 if (BPF_SRC(insn->code) != 0 ||
3531 insn->src_reg != BPF_REG_0 ||
3532 insn->off != 0 || insn->imm != 0) {
61bd5218 3533 verbose(env, "BPF_NEG uses reserved fields\n");
17a52670
AS
3534 return -EINVAL;
3535 }
3536 } else {
3537 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
e67b8a68
EC
3538 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
3539 BPF_CLASS(insn->code) == BPF_ALU64) {
61bd5218 3540 verbose(env, "BPF_END uses reserved fields\n");
17a52670
AS
3541 return -EINVAL;
3542 }
3543 }
3544
3545 /* check src operand */
dc503a8a 3546 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
3547 if (err)
3548 return err;
3549
1be7f75d 3550 if (is_pointer_value(env, insn->dst_reg)) {
61bd5218 3551 verbose(env, "R%d pointer arithmetic prohibited\n",
1be7f75d
AS
3552 insn->dst_reg);
3553 return -EACCES;
3554 }
3555
17a52670 3556 /* check dest operand */
dc503a8a 3557 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
3558 if (err)
3559 return err;
3560
3561 } else if (opcode == BPF_MOV) {
3562
3563 if (BPF_SRC(insn->code) == BPF_X) {
3564 if (insn->imm != 0 || insn->off != 0) {
61bd5218 3565 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
3566 return -EINVAL;
3567 }
3568
3569 /* check src operand */
dc503a8a 3570 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
3571 if (err)
3572 return err;
3573 } else {
3574 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 3575 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
3576 return -EINVAL;
3577 }
3578 }
3579
fbeb1603
AF
3580 /* check dest operand, mark as required later */
3581 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
3582 if (err)
3583 return err;
3584
3585 if (BPF_SRC(insn->code) == BPF_X) {
3586 if (BPF_CLASS(insn->code) == BPF_ALU64) {
3587 /* case: R1 = R2
3588 * copy register state to dest reg
3589 */
3590 regs[insn->dst_reg] = regs[insn->src_reg];
8fe2d6cc 3591 regs[insn->dst_reg].live |= REG_LIVE_WRITTEN;
17a52670 3592 } else {
f1174f77 3593 /* R1 = (u32) R2 */
1be7f75d 3594 if (is_pointer_value(env, insn->src_reg)) {
61bd5218
JK
3595 verbose(env,
3596 "R%d partial copy of pointer\n",
1be7f75d
AS
3597 insn->src_reg);
3598 return -EACCES;
3599 }
61bd5218 3600 mark_reg_unknown(env, regs, insn->dst_reg);
0c17d1d2 3601 coerce_reg_to_size(&regs[insn->dst_reg], 4);
17a52670
AS
3602 }
3603 } else {
3604 /* case: R = imm
3605 * remember the value we stored into this reg
3606 */
fbeb1603
AF
3607 /* clear any state __mark_reg_known doesn't set */
3608 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77 3609 regs[insn->dst_reg].type = SCALAR_VALUE;
95a762e2
JH
3610 if (BPF_CLASS(insn->code) == BPF_ALU64) {
3611 __mark_reg_known(regs + insn->dst_reg,
3612 insn->imm);
3613 } else {
3614 __mark_reg_known(regs + insn->dst_reg,
3615 (u32)insn->imm);
3616 }
17a52670
AS
3617 }
3618
3619 } else if (opcode > BPF_END) {
61bd5218 3620 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
17a52670
AS
3621 return -EINVAL;
3622
3623 } else { /* all other ALU ops: and, sub, xor, add, ... */
3624
17a52670
AS
3625 if (BPF_SRC(insn->code) == BPF_X) {
3626 if (insn->imm != 0 || insn->off != 0) {
61bd5218 3627 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
3628 return -EINVAL;
3629 }
3630 /* check src1 operand */
dc503a8a 3631 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
3632 if (err)
3633 return err;
3634 } else {
3635 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 3636 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
3637 return -EINVAL;
3638 }
3639 }
3640
3641 /* check src2 operand */
dc503a8a 3642 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
3643 if (err)
3644 return err;
3645
3646 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
3647 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
61bd5218 3648 verbose(env, "div by zero\n");
17a52670
AS
3649 return -EINVAL;
3650 }
3651
229394e8
RV
3652 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
3653 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
3654 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
3655
3656 if (insn->imm < 0 || insn->imm >= size) {
61bd5218 3657 verbose(env, "invalid shift %d\n", insn->imm);
229394e8
RV
3658 return -EINVAL;
3659 }
3660 }
3661
1a0dc1ac 3662 /* check dest operand */
dc503a8a 3663 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
1a0dc1ac
AS
3664 if (err)
3665 return err;
3666
f1174f77 3667 return adjust_reg_min_max_vals(env, insn);
17a52670
AS
3668 }
3669
3670 return 0;
3671}
3672
f4d7e40a 3673static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
de8f3a83 3674 struct bpf_reg_state *dst_reg,
f8ddadc4 3675 enum bpf_reg_type type,
fb2a311a 3676 bool range_right_open)
969bf05e 3677{
f4d7e40a 3678 struct bpf_func_state *state = vstate->frame[vstate->curframe];
58e2af8b 3679 struct bpf_reg_state *regs = state->regs, *reg;
fb2a311a 3680 u16 new_range;
f4d7e40a 3681 int i, j;
2d2be8ca 3682
fb2a311a
DB
3683 if (dst_reg->off < 0 ||
3684 (dst_reg->off == 0 && range_right_open))
f1174f77
EC
3685 /* This doesn't give us any range */
3686 return;
3687
b03c9f9f
EC
3688 if (dst_reg->umax_value > MAX_PACKET_OFF ||
3689 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
f1174f77
EC
3690 /* Risk of overflow. For instance, ptr + (1<<63) may be less
3691 * than pkt_end, but that's because it's also less than pkt.
3692 */
3693 return;
3694
fb2a311a
DB
3695 new_range = dst_reg->off;
3696 if (range_right_open)
3697 new_range--;
3698
3699 /* Examples for register markings:
2d2be8ca 3700 *
fb2a311a 3701 * pkt_data in dst register:
2d2be8ca
DB
3702 *
3703 * r2 = r3;
3704 * r2 += 8;
3705 * if (r2 > pkt_end) goto <handle exception>
3706 * <access okay>
3707 *
b4e432f1
DB
3708 * r2 = r3;
3709 * r2 += 8;
3710 * if (r2 < pkt_end) goto <access okay>
3711 * <handle exception>
3712 *
2d2be8ca
DB
3713 * Where:
3714 * r2 == dst_reg, pkt_end == src_reg
3715 * r2=pkt(id=n,off=8,r=0)
3716 * r3=pkt(id=n,off=0,r=0)
3717 *
fb2a311a 3718 * pkt_data in src register:
2d2be8ca
DB
3719 *
3720 * r2 = r3;
3721 * r2 += 8;
3722 * if (pkt_end >= r2) goto <access okay>
3723 * <handle exception>
3724 *
b4e432f1
DB
3725 * r2 = r3;
3726 * r2 += 8;
3727 * if (pkt_end <= r2) goto <handle exception>
3728 * <access okay>
3729 *
2d2be8ca
DB
3730 * Where:
3731 * pkt_end == dst_reg, r2 == src_reg
3732 * r2=pkt(id=n,off=8,r=0)
3733 * r3=pkt(id=n,off=0,r=0)
3734 *
3735 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
fb2a311a
DB
3736 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
3737 * and [r3, r3 + 8-1) respectively is safe to access depending on
3738 * the check.
969bf05e 3739 */
2d2be8ca 3740
f1174f77
EC
3741 /* If our ids match, then we must have the same max_value. And we
3742 * don't care about the other reg's fixed offset, since if it's too big
3743 * the range won't allow anything.
3744 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
3745 */
969bf05e 3746 for (i = 0; i < MAX_BPF_REG; i++)
de8f3a83 3747 if (regs[i].type == type && regs[i].id == dst_reg->id)
b1977682 3748 /* keep the maximum range already checked */
fb2a311a 3749 regs[i].range = max(regs[i].range, new_range);
969bf05e 3750
f4d7e40a
AS
3751 for (j = 0; j <= vstate->curframe; j++) {
3752 state = vstate->frame[j];
f3709f69
JS
3753 bpf_for_each_spilled_reg(i, state, reg) {
3754 if (!reg)
f4d7e40a 3755 continue;
f4d7e40a
AS
3756 if (reg->type == type && reg->id == dst_reg->id)
3757 reg->range = max(reg->range, new_range);
3758 }
969bf05e
AS
3759 }
3760}
3761
48461135
JB
3762/* Adjusts the register min/max values in the case that the dst_reg is the
3763 * variable register that we are working on, and src_reg is a constant or we're
3764 * simply doing a BPF_K check.
f1174f77 3765 * In JEQ/JNE cases we also adjust the var_off values.
48461135
JB
3766 */
3767static void reg_set_min_max(struct bpf_reg_state *true_reg,
3768 struct bpf_reg_state *false_reg, u64 val,
3769 u8 opcode)
3770{
f1174f77
EC
3771 /* If the dst_reg is a pointer, we can't learn anything about its
3772 * variable offset from the compare (unless src_reg were a pointer into
3773 * the same object, but we don't bother with that.
3774 * Since false_reg and true_reg have the same type by construction, we
3775 * only need to check one of them for pointerness.
3776 */
3777 if (__is_pointer_value(false, false_reg))
3778 return;
4cabc5b1 3779
48461135
JB
3780 switch (opcode) {
3781 case BPF_JEQ:
3782 /* If this is false then we know nothing Jon Snow, but if it is
3783 * true then we know for sure.
3784 */
b03c9f9f 3785 __mark_reg_known(true_reg, val);
48461135
JB
3786 break;
3787 case BPF_JNE:
3788 /* If this is true we know nothing Jon Snow, but if it is false
3789 * we know the value for sure;
3790 */
b03c9f9f 3791 __mark_reg_known(false_reg, val);
48461135
JB
3792 break;
3793 case BPF_JGT:
b03c9f9f
EC
3794 false_reg->umax_value = min(false_reg->umax_value, val);
3795 true_reg->umin_value = max(true_reg->umin_value, val + 1);
3796 break;
48461135 3797 case BPF_JSGT:
b03c9f9f
EC
3798 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
3799 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
48461135 3800 break;
b4e432f1
DB
3801 case BPF_JLT:
3802 false_reg->umin_value = max(false_reg->umin_value, val);
3803 true_reg->umax_value = min(true_reg->umax_value, val - 1);
3804 break;
3805 case BPF_JSLT:
3806 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
3807 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
3808 break;
48461135 3809 case BPF_JGE:
b03c9f9f
EC
3810 false_reg->umax_value = min(false_reg->umax_value, val - 1);
3811 true_reg->umin_value = max(true_reg->umin_value, val);
3812 break;
48461135 3813 case BPF_JSGE:
b03c9f9f
EC
3814 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
3815 true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
48461135 3816 break;
b4e432f1
DB
3817 case BPF_JLE:
3818 false_reg->umin_value = max(false_reg->umin_value, val + 1);
3819 true_reg->umax_value = min(true_reg->umax_value, val);
3820 break;
3821 case BPF_JSLE:
3822 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
3823 true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
3824 break;
48461135
JB
3825 default:
3826 break;
3827 }
3828
b03c9f9f
EC
3829 __reg_deduce_bounds(false_reg);
3830 __reg_deduce_bounds(true_reg);
3831 /* We might have learned some bits from the bounds. */
3832 __reg_bound_offset(false_reg);
3833 __reg_bound_offset(true_reg);
3834 /* Intersecting with the old var_off might have improved our bounds
3835 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3836 * then new var_off is (0; 0x7f...fc) which improves our umax.
3837 */
3838 __update_reg_bounds(false_reg);
3839 __update_reg_bounds(true_reg);
48461135
JB
3840}
3841
f1174f77
EC
3842/* Same as above, but for the case that dst_reg holds a constant and src_reg is
3843 * the variable reg.
48461135
JB
3844 */
3845static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
3846 struct bpf_reg_state *false_reg, u64 val,
3847 u8 opcode)
3848{
f1174f77
EC
3849 if (__is_pointer_value(false, false_reg))
3850 return;
4cabc5b1 3851
48461135
JB
3852 switch (opcode) {
3853 case BPF_JEQ:
3854 /* If this is false then we know nothing Jon Snow, but if it is
3855 * true then we know for sure.
3856 */
b03c9f9f 3857 __mark_reg_known(true_reg, val);
48461135
JB
3858 break;
3859 case BPF_JNE:
3860 /* If this is true we know nothing Jon Snow, but if it is false
3861 * we know the value for sure;
3862 */
b03c9f9f 3863 __mark_reg_known(false_reg, val);
48461135
JB
3864 break;
3865 case BPF_JGT:
b03c9f9f
EC
3866 true_reg->umax_value = min(true_reg->umax_value, val - 1);
3867 false_reg->umin_value = max(false_reg->umin_value, val);
3868 break;
48461135 3869 case BPF_JSGT:
b03c9f9f
EC
3870 true_reg->smax_value = min_t(s64, true_reg->smax_value, val - 1);
3871 false_reg->smin_value = max_t(s64, false_reg->smin_value, val);
48461135 3872 break;
b4e432f1
DB
3873 case BPF_JLT:
3874 true_reg->umin_value = max(true_reg->umin_value, val + 1);
3875 false_reg->umax_value = min(false_reg->umax_value, val);
3876 break;
3877 case BPF_JSLT:
3878 true_reg->smin_value = max_t(s64, true_reg->smin_value, val + 1);
3879 false_reg->smax_value = min_t(s64, false_reg->smax_value, val);
3880 break;
48461135 3881 case BPF_JGE:
b03c9f9f
EC
3882 true_reg->umax_value = min(true_reg->umax_value, val);
3883 false_reg->umin_value = max(false_reg->umin_value, val + 1);
3884 break;
48461135 3885 case BPF_JSGE:
b03c9f9f
EC
3886 true_reg->smax_value = min_t(s64, true_reg->smax_value, val);
3887 false_reg->smin_value = max_t(s64, false_reg->smin_value, val + 1);
48461135 3888 break;
b4e432f1
DB
3889 case BPF_JLE:
3890 true_reg->umin_value = max(true_reg->umin_value, val);
3891 false_reg->umax_value = min(false_reg->umax_value, val - 1);
3892 break;
3893 case BPF_JSLE:
3894 true_reg->smin_value = max_t(s64, true_reg->smin_value, val);
3895 false_reg->smax_value = min_t(s64, false_reg->smax_value, val - 1);
3896 break;
48461135
JB
3897 default:
3898 break;
3899 }
3900
b03c9f9f
EC
3901 __reg_deduce_bounds(false_reg);
3902 __reg_deduce_bounds(true_reg);
3903 /* We might have learned some bits from the bounds. */
3904 __reg_bound_offset(false_reg);
3905 __reg_bound_offset(true_reg);
3906 /* Intersecting with the old var_off might have improved our bounds
3907 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3908 * then new var_off is (0; 0x7f...fc) which improves our umax.
3909 */
3910 __update_reg_bounds(false_reg);
3911 __update_reg_bounds(true_reg);
f1174f77
EC
3912}
3913
3914/* Regs are known to be equal, so intersect their min/max/var_off */
3915static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
3916 struct bpf_reg_state *dst_reg)
3917{
b03c9f9f
EC
3918 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
3919 dst_reg->umin_value);
3920 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
3921 dst_reg->umax_value);
3922 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
3923 dst_reg->smin_value);
3924 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
3925 dst_reg->smax_value);
f1174f77
EC
3926 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
3927 dst_reg->var_off);
b03c9f9f
EC
3928 /* We might have learned new bounds from the var_off. */
3929 __update_reg_bounds(src_reg);
3930 __update_reg_bounds(dst_reg);
3931 /* We might have learned something about the sign bit. */
3932 __reg_deduce_bounds(src_reg);
3933 __reg_deduce_bounds(dst_reg);
3934 /* We might have learned some bits from the bounds. */
3935 __reg_bound_offset(src_reg);
3936 __reg_bound_offset(dst_reg);
3937 /* Intersecting with the old var_off might have improved our bounds
3938 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
3939 * then new var_off is (0; 0x7f...fc) which improves our umax.
3940 */
3941 __update_reg_bounds(src_reg);
3942 __update_reg_bounds(dst_reg);
f1174f77
EC
3943}
3944
3945static void reg_combine_min_max(struct bpf_reg_state *true_src,
3946 struct bpf_reg_state *true_dst,
3947 struct bpf_reg_state *false_src,
3948 struct bpf_reg_state *false_dst,
3949 u8 opcode)
3950{
3951 switch (opcode) {
3952 case BPF_JEQ:
3953 __reg_combine_min_max(true_src, true_dst);
3954 break;
3955 case BPF_JNE:
3956 __reg_combine_min_max(false_src, false_dst);
b03c9f9f 3957 break;
4cabc5b1 3958 }
48461135
JB
3959}
3960
fd978bf7
JS
3961static void mark_ptr_or_null_reg(struct bpf_func_state *state,
3962 struct bpf_reg_state *reg, u32 id,
840b9615 3963 bool is_null)
57a09bf0 3964{
840b9615 3965 if (reg_type_may_be_null(reg->type) && reg->id == id) {
f1174f77
EC
3966 /* Old offset (both fixed and variable parts) should
3967 * have been known-zero, because we don't allow pointer
3968 * arithmetic on pointers that might be NULL.
3969 */
b03c9f9f
EC
3970 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
3971 !tnum_equals_const(reg->var_off, 0) ||
f1174f77 3972 reg->off)) {
b03c9f9f
EC
3973 __mark_reg_known_zero(reg);
3974 reg->off = 0;
f1174f77
EC
3975 }
3976 if (is_null) {
3977 reg->type = SCALAR_VALUE;
840b9615
JS
3978 } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
3979 if (reg->map_ptr->inner_map_meta) {
3980 reg->type = CONST_PTR_TO_MAP;
3981 reg->map_ptr = reg->map_ptr->inner_map_meta;
3982 } else {
3983 reg->type = PTR_TO_MAP_VALUE;
3984 }
c64b7983
JS
3985 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
3986 reg->type = PTR_TO_SOCKET;
56f668df 3987 }
fd978bf7
JS
3988 if (is_null || !reg_is_refcounted(reg)) {
3989 /* We don't need id from this point onwards anymore,
3990 * thus we should better reset it, so that state
3991 * pruning has chances to take effect.
3992 */
3993 reg->id = 0;
56f668df 3994 }
57a09bf0
TG
3995 }
3996}
3997
3998/* The logic is similar to find_good_pkt_pointers(), both could eventually
3999 * be folded together at some point.
4000 */
840b9615
JS
4001static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
4002 bool is_null)
57a09bf0 4003{
f4d7e40a 4004 struct bpf_func_state *state = vstate->frame[vstate->curframe];
f3709f69 4005 struct bpf_reg_state *reg, *regs = state->regs;
a08dd0da 4006 u32 id = regs[regno].id;
f4d7e40a 4007 int i, j;
57a09bf0 4008
fd978bf7
JS
4009 if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
4010 __release_reference_state(state, id);
4011
57a09bf0 4012 for (i = 0; i < MAX_BPF_REG; i++)
fd978bf7 4013 mark_ptr_or_null_reg(state, &regs[i], id, is_null);
57a09bf0 4014
f4d7e40a
AS
4015 for (j = 0; j <= vstate->curframe; j++) {
4016 state = vstate->frame[j];
f3709f69
JS
4017 bpf_for_each_spilled_reg(i, state, reg) {
4018 if (!reg)
f4d7e40a 4019 continue;
fd978bf7 4020 mark_ptr_or_null_reg(state, reg, id, is_null);
f4d7e40a 4021 }
57a09bf0
TG
4022 }
4023}
4024
5beca081
DB
4025static bool try_match_pkt_pointers(const struct bpf_insn *insn,
4026 struct bpf_reg_state *dst_reg,
4027 struct bpf_reg_state *src_reg,
4028 struct bpf_verifier_state *this_branch,
4029 struct bpf_verifier_state *other_branch)
4030{
4031 if (BPF_SRC(insn->code) != BPF_X)
4032 return false;
4033
4034 switch (BPF_OP(insn->code)) {
4035 case BPF_JGT:
4036 if ((dst_reg->type == PTR_TO_PACKET &&
4037 src_reg->type == PTR_TO_PACKET_END) ||
4038 (dst_reg->type == PTR_TO_PACKET_META &&
4039 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4040 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
4041 find_good_pkt_pointers(this_branch, dst_reg,
4042 dst_reg->type, false);
4043 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4044 src_reg->type == PTR_TO_PACKET) ||
4045 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4046 src_reg->type == PTR_TO_PACKET_META)) {
4047 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
4048 find_good_pkt_pointers(other_branch, src_reg,
4049 src_reg->type, true);
4050 } else {
4051 return false;
4052 }
4053 break;
4054 case BPF_JLT:
4055 if ((dst_reg->type == PTR_TO_PACKET &&
4056 src_reg->type == PTR_TO_PACKET_END) ||
4057 (dst_reg->type == PTR_TO_PACKET_META &&
4058 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4059 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
4060 find_good_pkt_pointers(other_branch, dst_reg,
4061 dst_reg->type, true);
4062 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4063 src_reg->type == PTR_TO_PACKET) ||
4064 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4065 src_reg->type == PTR_TO_PACKET_META)) {
4066 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
4067 find_good_pkt_pointers(this_branch, src_reg,
4068 src_reg->type, false);
4069 } else {
4070 return false;
4071 }
4072 break;
4073 case BPF_JGE:
4074 if ((dst_reg->type == PTR_TO_PACKET &&
4075 src_reg->type == PTR_TO_PACKET_END) ||
4076 (dst_reg->type == PTR_TO_PACKET_META &&
4077 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4078 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
4079 find_good_pkt_pointers(this_branch, dst_reg,
4080 dst_reg->type, true);
4081 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4082 src_reg->type == PTR_TO_PACKET) ||
4083 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4084 src_reg->type == PTR_TO_PACKET_META)) {
4085 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
4086 find_good_pkt_pointers(other_branch, src_reg,
4087 src_reg->type, false);
4088 } else {
4089 return false;
4090 }
4091 break;
4092 case BPF_JLE:
4093 if ((dst_reg->type == PTR_TO_PACKET &&
4094 src_reg->type == PTR_TO_PACKET_END) ||
4095 (dst_reg->type == PTR_TO_PACKET_META &&
4096 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4097 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
4098 find_good_pkt_pointers(other_branch, dst_reg,
4099 dst_reg->type, false);
4100 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4101 src_reg->type == PTR_TO_PACKET) ||
4102 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4103 src_reg->type == PTR_TO_PACKET_META)) {
4104 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
4105 find_good_pkt_pointers(this_branch, src_reg,
4106 src_reg->type, true);
4107 } else {
4108 return false;
4109 }
4110 break;
4111 default:
4112 return false;
4113 }
4114
4115 return true;
4116}
4117
58e2af8b 4118static int check_cond_jmp_op(struct bpf_verifier_env *env,
17a52670
AS
4119 struct bpf_insn *insn, int *insn_idx)
4120{
f4d7e40a
AS
4121 struct bpf_verifier_state *this_branch = env->cur_state;
4122 struct bpf_verifier_state *other_branch;
4123 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
4124 struct bpf_reg_state *dst_reg, *other_branch_regs;
17a52670
AS
4125 u8 opcode = BPF_OP(insn->code);
4126 int err;
4127
b4e432f1 4128 if (opcode > BPF_JSLE) {
61bd5218 4129 verbose(env, "invalid BPF_JMP opcode %x\n", opcode);
17a52670
AS
4130 return -EINVAL;
4131 }
4132
4133 if (BPF_SRC(insn->code) == BPF_X) {
4134 if (insn->imm != 0) {
61bd5218 4135 verbose(env, "BPF_JMP uses reserved fields\n");
17a52670
AS
4136 return -EINVAL;
4137 }
4138
4139 /* check src1 operand */
dc503a8a 4140 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
4141 if (err)
4142 return err;
1be7f75d
AS
4143
4144 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 4145 verbose(env, "R%d pointer comparison prohibited\n",
1be7f75d
AS
4146 insn->src_reg);
4147 return -EACCES;
4148 }
17a52670
AS
4149 } else {
4150 if (insn->src_reg != BPF_REG_0) {
61bd5218 4151 verbose(env, "BPF_JMP uses reserved fields\n");
17a52670
AS
4152 return -EINVAL;
4153 }
4154 }
4155
4156 /* check src2 operand */
dc503a8a 4157 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
4158 if (err)
4159 return err;
4160
1a0dc1ac
AS
4161 dst_reg = &regs[insn->dst_reg];
4162
17a52670
AS
4163 /* detect if R == 0 where R was initialized to zero earlier */
4164 if (BPF_SRC(insn->code) == BPF_K &&
4165 (opcode == BPF_JEQ || opcode == BPF_JNE) &&
f1174f77 4166 dst_reg->type == SCALAR_VALUE &&
3bf15921
AS
4167 tnum_is_const(dst_reg->var_off)) {
4168 if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) ||
4169 (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) {
17a52670
AS
4170 /* if (imm == imm) goto pc+off;
4171 * only follow the goto, ignore fall-through
4172 */
4173 *insn_idx += insn->off;
4174 return 0;
4175 } else {
4176 /* if (imm != imm) goto pc+off;
4177 * only follow fall-through branch, since
4178 * that's where the program will go
4179 */
4180 return 0;
4181 }
4182 }
4183
4184 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
4185 if (!other_branch)
4186 return -EFAULT;
f4d7e40a 4187 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
17a52670 4188
48461135
JB
4189 /* detect if we are comparing against a constant value so we can adjust
4190 * our min/max values for our dst register.
f1174f77
EC
4191 * this is only legit if both are scalars (or pointers to the same
4192 * object, I suppose, but we don't support that right now), because
4193 * otherwise the different base pointers mean the offsets aren't
4194 * comparable.
48461135
JB
4195 */
4196 if (BPF_SRC(insn->code) == BPF_X) {
f1174f77
EC
4197 if (dst_reg->type == SCALAR_VALUE &&
4198 regs[insn->src_reg].type == SCALAR_VALUE) {
4199 if (tnum_is_const(regs[insn->src_reg].var_off))
f4d7e40a 4200 reg_set_min_max(&other_branch_regs[insn->dst_reg],
f1174f77
EC
4201 dst_reg, regs[insn->src_reg].var_off.value,
4202 opcode);
4203 else if (tnum_is_const(dst_reg->var_off))
f4d7e40a 4204 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
f1174f77
EC
4205 &regs[insn->src_reg],
4206 dst_reg->var_off.value, opcode);
4207 else if (opcode == BPF_JEQ || opcode == BPF_JNE)
4208 /* Comparing for equality, we can combine knowledge */
f4d7e40a
AS
4209 reg_combine_min_max(&other_branch_regs[insn->src_reg],
4210 &other_branch_regs[insn->dst_reg],
f1174f77
EC
4211 &regs[insn->src_reg],
4212 &regs[insn->dst_reg], opcode);
4213 }
4214 } else if (dst_reg->type == SCALAR_VALUE) {
f4d7e40a 4215 reg_set_min_max(&other_branch_regs[insn->dst_reg],
48461135
JB
4216 dst_reg, insn->imm, opcode);
4217 }
4218
58e2af8b 4219 /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
17a52670 4220 if (BPF_SRC(insn->code) == BPF_K &&
1a0dc1ac 4221 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
840b9615
JS
4222 reg_type_may_be_null(dst_reg->type)) {
4223 /* Mark all identical registers in each branch as either
57a09bf0
TG
4224 * safe or unknown depending R == 0 or R != 0 conditional.
4225 */
840b9615
JS
4226 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
4227 opcode == BPF_JNE);
4228 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
4229 opcode == BPF_JEQ);
5beca081
DB
4230 } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
4231 this_branch, other_branch) &&
4232 is_pointer_value(env, insn->dst_reg)) {
61bd5218
JK
4233 verbose(env, "R%d pointer comparison prohibited\n",
4234 insn->dst_reg);
1be7f75d 4235 return -EACCES;
17a52670 4236 }
61bd5218 4237 if (env->log.level)
f4d7e40a 4238 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
17a52670
AS
4239 return 0;
4240}
4241
0246e64d
AS
4242/* return the map pointer stored inside BPF_LD_IMM64 instruction */
4243static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
4244{
4245 u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
4246
4247 return (struct bpf_map *) (unsigned long) imm64;
4248}
4249
17a52670 4250/* verify BPF_LD_IMM64 instruction */
58e2af8b 4251static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 4252{
638f5b90 4253 struct bpf_reg_state *regs = cur_regs(env);
17a52670
AS
4254 int err;
4255
4256 if (BPF_SIZE(insn->code) != BPF_DW) {
61bd5218 4257 verbose(env, "invalid BPF_LD_IMM insn\n");
17a52670
AS
4258 return -EINVAL;
4259 }
4260 if (insn->off != 0) {
61bd5218 4261 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
17a52670
AS
4262 return -EINVAL;
4263 }
4264
dc503a8a 4265 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
4266 if (err)
4267 return err;
4268
6b173873 4269 if (insn->src_reg == 0) {
6b173873
JK
4270 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
4271
f1174f77 4272 regs[insn->dst_reg].type = SCALAR_VALUE;
b03c9f9f 4273 __mark_reg_known(&regs[insn->dst_reg], imm);
17a52670 4274 return 0;
6b173873 4275 }
17a52670
AS
4276
4277 /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
4278 BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
4279
4280 regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
4281 regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
4282 return 0;
4283}
4284
96be4325
DB
4285static bool may_access_skb(enum bpf_prog_type type)
4286{
4287 switch (type) {
4288 case BPF_PROG_TYPE_SOCKET_FILTER:
4289 case BPF_PROG_TYPE_SCHED_CLS:
94caee8c 4290 case BPF_PROG_TYPE_SCHED_ACT:
96be4325
DB
4291 return true;
4292 default:
4293 return false;
4294 }
4295}
4296
ddd872bc
AS
4297/* verify safety of LD_ABS|LD_IND instructions:
4298 * - they can only appear in the programs where ctx == skb
4299 * - since they are wrappers of function calls, they scratch R1-R5 registers,
4300 * preserve R6-R9, and store return value into R0
4301 *
4302 * Implicit input:
4303 * ctx == skb == R6 == CTX
4304 *
4305 * Explicit input:
4306 * SRC == any register
4307 * IMM == 32-bit immediate
4308 *
4309 * Output:
4310 * R0 - 8/16/32-bit skb data converted to cpu endianness
4311 */
58e2af8b 4312static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
ddd872bc 4313{
638f5b90 4314 struct bpf_reg_state *regs = cur_regs(env);
ddd872bc 4315 u8 mode = BPF_MODE(insn->code);
ddd872bc
AS
4316 int i, err;
4317
24701ece 4318 if (!may_access_skb(env->prog->type)) {
61bd5218 4319 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
ddd872bc
AS
4320 return -EINVAL;
4321 }
4322
e0cea7ce
DB
4323 if (!env->ops->gen_ld_abs) {
4324 verbose(env, "bpf verifier is misconfigured\n");
4325 return -EINVAL;
4326 }
4327
f910cefa 4328 if (env->subprog_cnt > 1) {
f4d7e40a
AS
4329 /* when program has LD_ABS insn JITs and interpreter assume
4330 * that r1 == ctx == skb which is not the case for callees
4331 * that can have arbitrary arguments. It's problematic
4332 * for main prog as well since JITs would need to analyze
4333 * all functions in order to make proper register save/restore
4334 * decisions in the main prog. Hence disallow LD_ABS with calls
4335 */
4336 verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
4337 return -EINVAL;
4338 }
4339
ddd872bc 4340 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
d82bccc6 4341 BPF_SIZE(insn->code) == BPF_DW ||
ddd872bc 4342 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
61bd5218 4343 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
ddd872bc
AS
4344 return -EINVAL;
4345 }
4346
4347 /* check whether implicit source operand (register R6) is readable */
dc503a8a 4348 err = check_reg_arg(env, BPF_REG_6, SRC_OP);
ddd872bc
AS
4349 if (err)
4350 return err;
4351
fd978bf7
JS
4352 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
4353 * gen_ld_abs() may terminate the program at runtime, leading to
4354 * reference leak.
4355 */
4356 err = check_reference_leak(env);
4357 if (err) {
4358 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
4359 return err;
4360 }
4361
ddd872bc 4362 if (regs[BPF_REG_6].type != PTR_TO_CTX) {
61bd5218
JK
4363 verbose(env,
4364 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
ddd872bc
AS
4365 return -EINVAL;
4366 }
4367
4368 if (mode == BPF_IND) {
4369 /* check explicit source operand */
dc503a8a 4370 err = check_reg_arg(env, insn->src_reg, SRC_OP);
ddd872bc
AS
4371 if (err)
4372 return err;
4373 }
4374
4375 /* reset caller saved regs to unreadable */
dc503a8a 4376 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 4377 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
4378 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
4379 }
ddd872bc
AS
4380
4381 /* mark destination R0 register as readable, since it contains
dc503a8a
EC
4382 * the value fetched from the packet.
4383 * Already marked as written above.
ddd872bc 4384 */
61bd5218 4385 mark_reg_unknown(env, regs, BPF_REG_0);
ddd872bc
AS
4386 return 0;
4387}
4388
390ee7e2
AS
4389static int check_return_code(struct bpf_verifier_env *env)
4390{
4391 struct bpf_reg_state *reg;
4392 struct tnum range = tnum_range(0, 1);
4393
4394 switch (env->prog->type) {
4395 case BPF_PROG_TYPE_CGROUP_SKB:
4396 case BPF_PROG_TYPE_CGROUP_SOCK:
4fbac77d 4397 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
390ee7e2 4398 case BPF_PROG_TYPE_SOCK_OPS:
ebc614f6 4399 case BPF_PROG_TYPE_CGROUP_DEVICE:
390ee7e2
AS
4400 break;
4401 default:
4402 return 0;
4403 }
4404
638f5b90 4405 reg = cur_regs(env) + BPF_REG_0;
390ee7e2 4406 if (reg->type != SCALAR_VALUE) {
61bd5218 4407 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
390ee7e2
AS
4408 reg_type_str[reg->type]);
4409 return -EINVAL;
4410 }
4411
4412 if (!tnum_in(range, reg->var_off)) {
61bd5218 4413 verbose(env, "At program exit the register R0 ");
390ee7e2
AS
4414 if (!tnum_is_unknown(reg->var_off)) {
4415 char tn_buf[48];
4416
4417 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 4418 verbose(env, "has value %s", tn_buf);
390ee7e2 4419 } else {
61bd5218 4420 verbose(env, "has unknown scalar value");
390ee7e2 4421 }
61bd5218 4422 verbose(env, " should have been 0 or 1\n");
390ee7e2
AS
4423 return -EINVAL;
4424 }
4425 return 0;
4426}
4427
475fb78f
AS
4428/* non-recursive DFS pseudo code
4429 * 1 procedure DFS-iterative(G,v):
4430 * 2 label v as discovered
4431 * 3 let S be a stack
4432 * 4 S.push(v)
4433 * 5 while S is not empty
4434 * 6 t <- S.pop()
4435 * 7 if t is what we're looking for:
4436 * 8 return t
4437 * 9 for all edges e in G.adjacentEdges(t) do
4438 * 10 if edge e is already labelled
4439 * 11 continue with the next edge
4440 * 12 w <- G.adjacentVertex(t,e)
4441 * 13 if vertex w is not discovered and not explored
4442 * 14 label e as tree-edge
4443 * 15 label w as discovered
4444 * 16 S.push(w)
4445 * 17 continue at 5
4446 * 18 else if vertex w is discovered
4447 * 19 label e as back-edge
4448 * 20 else
4449 * 21 // vertex w is explored
4450 * 22 label e as forward- or cross-edge
4451 * 23 label t as explored
4452 * 24 S.pop()
4453 *
4454 * convention:
4455 * 0x10 - discovered
4456 * 0x11 - discovered and fall-through edge labelled
4457 * 0x12 - discovered and fall-through and branch edges labelled
4458 * 0x20 - explored
4459 */
4460
4461enum {
4462 DISCOVERED = 0x10,
4463 EXPLORED = 0x20,
4464 FALLTHROUGH = 1,
4465 BRANCH = 2,
4466};
4467
58e2af8b 4468#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
f1bca824 4469
475fb78f
AS
4470static int *insn_stack; /* stack of insns to process */
4471static int cur_stack; /* current stack index */
4472static int *insn_state;
4473
4474/* t, w, e - match pseudo-code above:
4475 * t - index of current instruction
4476 * w - next instruction
4477 * e - edge
4478 */
58e2af8b 4479static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
475fb78f
AS
4480{
4481 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
4482 return 0;
4483
4484 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
4485 return 0;
4486
4487 if (w < 0 || w >= env->prog->len) {
61bd5218 4488 verbose(env, "jump out of range from insn %d to %d\n", t, w);
475fb78f
AS
4489 return -EINVAL;
4490 }
4491
f1bca824
AS
4492 if (e == BRANCH)
4493 /* mark branch target for state pruning */
4494 env->explored_states[w] = STATE_LIST_MARK;
4495
475fb78f
AS
4496 if (insn_state[w] == 0) {
4497 /* tree-edge */
4498 insn_state[t] = DISCOVERED | e;
4499 insn_state[w] = DISCOVERED;
4500 if (cur_stack >= env->prog->len)
4501 return -E2BIG;
4502 insn_stack[cur_stack++] = w;
4503 return 1;
4504 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
61bd5218 4505 verbose(env, "back-edge from insn %d to %d\n", t, w);
475fb78f
AS
4506 return -EINVAL;
4507 } else if (insn_state[w] == EXPLORED) {
4508 /* forward- or cross-edge */
4509 insn_state[t] = DISCOVERED | e;
4510 } else {
61bd5218 4511 verbose(env, "insn state internal bug\n");
475fb78f
AS
4512 return -EFAULT;
4513 }
4514 return 0;
4515}
4516
4517/* non-recursive depth-first-search to detect loops in BPF program
4518 * loop == back-edge in directed graph
4519 */
58e2af8b 4520static int check_cfg(struct bpf_verifier_env *env)
475fb78f
AS
4521{
4522 struct bpf_insn *insns = env->prog->insnsi;
4523 int insn_cnt = env->prog->len;
4524 int ret = 0;
4525 int i, t;
4526
cc8b0b92
AS
4527 ret = check_subprogs(env);
4528 if (ret < 0)
4529 return ret;
4530
475fb78f
AS
4531 insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
4532 if (!insn_state)
4533 return -ENOMEM;
4534
4535 insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
4536 if (!insn_stack) {
4537 kfree(insn_state);
4538 return -ENOMEM;
4539 }
4540
4541 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
4542 insn_stack[0] = 0; /* 0 is the first instruction */
4543 cur_stack = 1;
4544
4545peek_stack:
4546 if (cur_stack == 0)
4547 goto check_state;
4548 t = insn_stack[cur_stack - 1];
4549
4550 if (BPF_CLASS(insns[t].code) == BPF_JMP) {
4551 u8 opcode = BPF_OP(insns[t].code);
4552
4553 if (opcode == BPF_EXIT) {
4554 goto mark_explored;
4555 } else if (opcode == BPF_CALL) {
4556 ret = push_insn(t, t + 1, FALLTHROUGH, env);
4557 if (ret == 1)
4558 goto peek_stack;
4559 else if (ret < 0)
4560 goto err_free;
07016151
DB
4561 if (t + 1 < insn_cnt)
4562 env->explored_states[t + 1] = STATE_LIST_MARK;
cc8b0b92
AS
4563 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
4564 env->explored_states[t] = STATE_LIST_MARK;
4565 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
4566 if (ret == 1)
4567 goto peek_stack;
4568 else if (ret < 0)
4569 goto err_free;
4570 }
475fb78f
AS
4571 } else if (opcode == BPF_JA) {
4572 if (BPF_SRC(insns[t].code) != BPF_K) {
4573 ret = -EINVAL;
4574 goto err_free;
4575 }
4576 /* unconditional jump with single edge */
4577 ret = push_insn(t, t + insns[t].off + 1,
4578 FALLTHROUGH, env);
4579 if (ret == 1)
4580 goto peek_stack;
4581 else if (ret < 0)
4582 goto err_free;
f1bca824
AS
4583 /* tell verifier to check for equivalent states
4584 * after every call and jump
4585 */
c3de6317
AS
4586 if (t + 1 < insn_cnt)
4587 env->explored_states[t + 1] = STATE_LIST_MARK;
475fb78f
AS
4588 } else {
4589 /* conditional jump with two edges */
3c2ce60b 4590 env->explored_states[t] = STATE_LIST_MARK;
475fb78f
AS
4591 ret = push_insn(t, t + 1, FALLTHROUGH, env);
4592 if (ret == 1)
4593 goto peek_stack;
4594 else if (ret < 0)
4595 goto err_free;
4596
4597 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
4598 if (ret == 1)
4599 goto peek_stack;
4600 else if (ret < 0)
4601 goto err_free;
4602 }
4603 } else {
4604 /* all other non-branch instructions with single
4605 * fall-through edge
4606 */
4607 ret = push_insn(t, t + 1, FALLTHROUGH, env);
4608 if (ret == 1)
4609 goto peek_stack;
4610 else if (ret < 0)
4611 goto err_free;
4612 }
4613
4614mark_explored:
4615 insn_state[t] = EXPLORED;
4616 if (cur_stack-- <= 0) {
61bd5218 4617 verbose(env, "pop stack internal bug\n");
475fb78f
AS
4618 ret = -EFAULT;
4619 goto err_free;
4620 }
4621 goto peek_stack;
4622
4623check_state:
4624 for (i = 0; i < insn_cnt; i++) {
4625 if (insn_state[i] != EXPLORED) {
61bd5218 4626 verbose(env, "unreachable insn %d\n", i);
475fb78f
AS
4627 ret = -EINVAL;
4628 goto err_free;
4629 }
4630 }
4631 ret = 0; /* cfg looks good */
4632
4633err_free:
4634 kfree(insn_state);
4635 kfree(insn_stack);
4636 return ret;
4637}
4638
838e9690
YS
4639/* The minimum supported BTF func info size */
4640#define MIN_BPF_FUNCINFO_SIZE 8
4641#define MAX_FUNCINFO_REC_SIZE 252
4642
c454a46b
MKL
4643static int check_btf_func(struct bpf_verifier_env *env,
4644 const union bpf_attr *attr,
4645 union bpf_attr __user *uattr)
838e9690
YS
4646{
4647 u32 i, nfuncs, urec_size, min_size, prev_offset;
4648 u32 krec_size = sizeof(struct bpf_func_info);
c454a46b 4649 struct bpf_func_info *krecord;
838e9690 4650 const struct btf_type *type;
c454a46b
MKL
4651 struct bpf_prog *prog;
4652 const struct btf *btf;
838e9690 4653 void __user *urecord;
838e9690
YS
4654 int ret = 0;
4655
4656 nfuncs = attr->func_info_cnt;
4657 if (!nfuncs)
4658 return 0;
4659
4660 if (nfuncs != env->subprog_cnt) {
4661 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
4662 return -EINVAL;
4663 }
4664
4665 urec_size = attr->func_info_rec_size;
4666 if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
4667 urec_size > MAX_FUNCINFO_REC_SIZE ||
4668 urec_size % sizeof(u32)) {
4669 verbose(env, "invalid func info rec size %u\n", urec_size);
4670 return -EINVAL;
4671 }
4672
c454a46b
MKL
4673 prog = env->prog;
4674 btf = prog->aux->btf;
838e9690
YS
4675
4676 urecord = u64_to_user_ptr(attr->func_info);
4677 min_size = min_t(u32, krec_size, urec_size);
4678
ba64e7d8 4679 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
c454a46b
MKL
4680 if (!krecord)
4681 return -ENOMEM;
ba64e7d8 4682
838e9690
YS
4683 for (i = 0; i < nfuncs; i++) {
4684 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
4685 if (ret) {
4686 if (ret == -E2BIG) {
4687 verbose(env, "nonzero tailing record in func info");
4688 /* set the size kernel expects so loader can zero
4689 * out the rest of the record.
4690 */
4691 if (put_user(min_size, &uattr->func_info_rec_size))
4692 ret = -EFAULT;
4693 }
c454a46b 4694 goto err_free;
838e9690
YS
4695 }
4696
ba64e7d8 4697 if (copy_from_user(&krecord[i], urecord, min_size)) {
838e9690 4698 ret = -EFAULT;
c454a46b 4699 goto err_free;
838e9690
YS
4700 }
4701
d30d42e0 4702 /* check insn_off */
838e9690 4703 if (i == 0) {
d30d42e0 4704 if (krecord[i].insn_off) {
838e9690 4705 verbose(env,
d30d42e0
MKL
4706 "nonzero insn_off %u for the first func info record",
4707 krecord[i].insn_off);
838e9690 4708 ret = -EINVAL;
c454a46b 4709 goto err_free;
838e9690 4710 }
d30d42e0 4711 } else if (krecord[i].insn_off <= prev_offset) {
838e9690
YS
4712 verbose(env,
4713 "same or smaller insn offset (%u) than previous func info record (%u)",
d30d42e0 4714 krecord[i].insn_off, prev_offset);
838e9690 4715 ret = -EINVAL;
c454a46b 4716 goto err_free;
838e9690
YS
4717 }
4718
d30d42e0 4719 if (env->subprog_info[i].start != krecord[i].insn_off) {
838e9690
YS
4720 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
4721 ret = -EINVAL;
c454a46b 4722 goto err_free;
838e9690
YS
4723 }
4724
4725 /* check type_id */
ba64e7d8 4726 type = btf_type_by_id(btf, krecord[i].type_id);
838e9690
YS
4727 if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
4728 verbose(env, "invalid type id %d in func info",
ba64e7d8 4729 krecord[i].type_id);
838e9690 4730 ret = -EINVAL;
c454a46b 4731 goto err_free;
838e9690
YS
4732 }
4733
d30d42e0 4734 prev_offset = krecord[i].insn_off;
838e9690
YS
4735 urecord += urec_size;
4736 }
4737
ba64e7d8
YS
4738 prog->aux->func_info = krecord;
4739 prog->aux->func_info_cnt = nfuncs;
838e9690
YS
4740 return 0;
4741
c454a46b 4742err_free:
ba64e7d8 4743 kvfree(krecord);
838e9690
YS
4744 return ret;
4745}
4746
ba64e7d8
YS
4747static void adjust_btf_func(struct bpf_verifier_env *env)
4748{
4749 int i;
4750
4751 if (!env->prog->aux->func_info)
4752 return;
4753
4754 for (i = 0; i < env->subprog_cnt; i++)
d30d42e0 4755 env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
ba64e7d8
YS
4756}
4757
c454a46b
MKL
4758#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
4759 sizeof(((struct bpf_line_info *)(0))->line_col))
4760#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
4761
4762static int check_btf_line(struct bpf_verifier_env *env,
4763 const union bpf_attr *attr,
4764 union bpf_attr __user *uattr)
4765{
4766 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
4767 struct bpf_subprog_info *sub;
4768 struct bpf_line_info *linfo;
4769 struct bpf_prog *prog;
4770 const struct btf *btf;
4771 void __user *ulinfo;
4772 int err;
4773
4774 nr_linfo = attr->line_info_cnt;
4775 if (!nr_linfo)
4776 return 0;
4777
4778 rec_size = attr->line_info_rec_size;
4779 if (rec_size < MIN_BPF_LINEINFO_SIZE ||
4780 rec_size > MAX_LINEINFO_REC_SIZE ||
4781 rec_size & (sizeof(u32) - 1))
4782 return -EINVAL;
4783
4784 /* Need to zero it in case the userspace may
4785 * pass in a smaller bpf_line_info object.
4786 */
4787 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
4788 GFP_KERNEL | __GFP_NOWARN);
4789 if (!linfo)
4790 return -ENOMEM;
4791
4792 prog = env->prog;
4793 btf = prog->aux->btf;
4794
4795 s = 0;
4796 sub = env->subprog_info;
4797 ulinfo = u64_to_user_ptr(attr->line_info);
4798 expected_size = sizeof(struct bpf_line_info);
4799 ncopy = min_t(u32, expected_size, rec_size);
4800 for (i = 0; i < nr_linfo; i++) {
4801 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
4802 if (err) {
4803 if (err == -E2BIG) {
4804 verbose(env, "nonzero tailing record in line_info");
4805 if (put_user(expected_size,
4806 &uattr->line_info_rec_size))
4807 err = -EFAULT;
4808 }
4809 goto err_free;
4810 }
4811
4812 if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
4813 err = -EFAULT;
4814 goto err_free;
4815 }
4816
4817 /*
4818 * Check insn_off to ensure
4819 * 1) strictly increasing AND
4820 * 2) bounded by prog->len
4821 *
4822 * The linfo[0].insn_off == 0 check logically falls into
4823 * the later "missing bpf_line_info for func..." case
4824 * because the first linfo[0].insn_off must be the
4825 * first sub also and the first sub must have
4826 * subprog_info[0].start == 0.
4827 */
4828 if ((i && linfo[i].insn_off <= prev_offset) ||
4829 linfo[i].insn_off >= prog->len) {
4830 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
4831 i, linfo[i].insn_off, prev_offset,
4832 prog->len);
4833 err = -EINVAL;
4834 goto err_free;
4835 }
4836
4837 if (!btf_name_offset_valid(btf, linfo[i].line_off) ||
4838 !btf_name_offset_valid(btf, linfo[i].file_name_off)) {
4839 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
4840 err = -EINVAL;
4841 goto err_free;
4842 }
4843
4844 if (s != env->subprog_cnt) {
4845 if (linfo[i].insn_off == sub[s].start) {
4846 sub[s].linfo_idx = i;
4847 s++;
4848 } else if (sub[s].start < linfo[i].insn_off) {
4849 verbose(env, "missing bpf_line_info for func#%u\n", s);
4850 err = -EINVAL;
4851 goto err_free;
4852 }
4853 }
4854
4855 prev_offset = linfo[i].insn_off;
4856 ulinfo += rec_size;
4857 }
4858
4859 if (s != env->subprog_cnt) {
4860 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
4861 env->subprog_cnt - s, s);
4862 err = -EINVAL;
4863 goto err_free;
4864 }
4865
4866 prog->aux->linfo = linfo;
4867 prog->aux->nr_linfo = nr_linfo;
4868
4869 return 0;
4870
4871err_free:
4872 kvfree(linfo);
4873 return err;
4874}
4875
4876static int check_btf_info(struct bpf_verifier_env *env,
4877 const union bpf_attr *attr,
4878 union bpf_attr __user *uattr)
4879{
4880 struct btf *btf;
4881 int err;
4882
4883 if (!attr->func_info_cnt && !attr->line_info_cnt)
4884 return 0;
4885
4886 btf = btf_get_by_fd(attr->prog_btf_fd);
4887 if (IS_ERR(btf))
4888 return PTR_ERR(btf);
4889 env->prog->aux->btf = btf;
4890
4891 err = check_btf_func(env, attr, uattr);
4892 if (err)
4893 return err;
4894
4895 err = check_btf_line(env, attr, uattr);
4896 if (err)
4897 return err;
4898
4899 return 0;
4900}
4901
f1174f77
EC
4902/* check %cur's range satisfies %old's */
4903static bool range_within(struct bpf_reg_state *old,
4904 struct bpf_reg_state *cur)
4905{
b03c9f9f
EC
4906 return old->umin_value <= cur->umin_value &&
4907 old->umax_value >= cur->umax_value &&
4908 old->smin_value <= cur->smin_value &&
4909 old->smax_value >= cur->smax_value;
f1174f77
EC
4910}
4911
4912/* Maximum number of register states that can exist at once */
4913#define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
4914struct idpair {
4915 u32 old;
4916 u32 cur;
4917};
4918
4919/* If in the old state two registers had the same id, then they need to have
4920 * the same id in the new state as well. But that id could be different from
4921 * the old state, so we need to track the mapping from old to new ids.
4922 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
4923 * regs with old id 5 must also have new id 9 for the new state to be safe. But
4924 * regs with a different old id could still have new id 9, we don't care about
4925 * that.
4926 * So we look through our idmap to see if this old id has been seen before. If
4927 * so, we require the new id to match; otherwise, we add the id pair to the map.
969bf05e 4928 */
f1174f77 4929static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
969bf05e 4930{
f1174f77 4931 unsigned int i;
969bf05e 4932
f1174f77
EC
4933 for (i = 0; i < ID_MAP_SIZE; i++) {
4934 if (!idmap[i].old) {
4935 /* Reached an empty slot; haven't seen this id before */
4936 idmap[i].old = old_id;
4937 idmap[i].cur = cur_id;
4938 return true;
4939 }
4940 if (idmap[i].old == old_id)
4941 return idmap[i].cur == cur_id;
4942 }
4943 /* We ran out of idmap slots, which should be impossible */
4944 WARN_ON_ONCE(1);
4945 return false;
4946}
4947
4948/* Returns true if (rold safe implies rcur safe) */
1b688a19
EC
4949static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
4950 struct idpair *idmap)
f1174f77 4951{
f4d7e40a
AS
4952 bool equal;
4953
dc503a8a
EC
4954 if (!(rold->live & REG_LIVE_READ))
4955 /* explored state didn't use this */
4956 return true;
4957
679c782d 4958 equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
f4d7e40a
AS
4959
4960 if (rold->type == PTR_TO_STACK)
4961 /* two stack pointers are equal only if they're pointing to
4962 * the same stack frame, since fp-8 in foo != fp-8 in bar
4963 */
4964 return equal && rold->frameno == rcur->frameno;
4965
4966 if (equal)
969bf05e
AS
4967 return true;
4968
f1174f77
EC
4969 if (rold->type == NOT_INIT)
4970 /* explored state can't have used this */
969bf05e 4971 return true;
f1174f77
EC
4972 if (rcur->type == NOT_INIT)
4973 return false;
4974 switch (rold->type) {
4975 case SCALAR_VALUE:
4976 if (rcur->type == SCALAR_VALUE) {
4977 /* new val must satisfy old val knowledge */
4978 return range_within(rold, rcur) &&
4979 tnum_in(rold->var_off, rcur->var_off);
4980 } else {
179d1c56
JH
4981 /* We're trying to use a pointer in place of a scalar.
4982 * Even if the scalar was unbounded, this could lead to
4983 * pointer leaks because scalars are allowed to leak
4984 * while pointers are not. We could make this safe in
4985 * special cases if root is calling us, but it's
4986 * probably not worth the hassle.
f1174f77 4987 */
179d1c56 4988 return false;
f1174f77
EC
4989 }
4990 case PTR_TO_MAP_VALUE:
1b688a19
EC
4991 /* If the new min/max/var_off satisfy the old ones and
4992 * everything else matches, we are OK.
4993 * We don't care about the 'id' value, because nothing
4994 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
4995 */
4996 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
4997 range_within(rold, rcur) &&
4998 tnum_in(rold->var_off, rcur->var_off);
f1174f77
EC
4999 case PTR_TO_MAP_VALUE_OR_NULL:
5000 /* a PTR_TO_MAP_VALUE could be safe to use as a
5001 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
5002 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
5003 * checked, doing so could have affected others with the same
5004 * id, and we can't check for that because we lost the id when
5005 * we converted to a PTR_TO_MAP_VALUE.
5006 */
5007 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
5008 return false;
5009 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
5010 return false;
5011 /* Check our ids match any regs they're supposed to */
5012 return check_ids(rold->id, rcur->id, idmap);
de8f3a83 5013 case PTR_TO_PACKET_META:
f1174f77 5014 case PTR_TO_PACKET:
de8f3a83 5015 if (rcur->type != rold->type)
f1174f77
EC
5016 return false;
5017 /* We must have at least as much range as the old ptr
5018 * did, so that any accesses which were safe before are
5019 * still safe. This is true even if old range < old off,
5020 * since someone could have accessed through (ptr - k), or
5021 * even done ptr -= k in a register, to get a safe access.
5022 */
5023 if (rold->range > rcur->range)
5024 return false;
5025 /* If the offsets don't match, we can't trust our alignment;
5026 * nor can we be sure that we won't fall out of range.
5027 */
5028 if (rold->off != rcur->off)
5029 return false;
5030 /* id relations must be preserved */
5031 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
5032 return false;
5033 /* new val must satisfy old val knowledge */
5034 return range_within(rold, rcur) &&
5035 tnum_in(rold->var_off, rcur->var_off);
5036 case PTR_TO_CTX:
5037 case CONST_PTR_TO_MAP:
f1174f77 5038 case PTR_TO_PACKET_END:
d58e468b 5039 case PTR_TO_FLOW_KEYS:
c64b7983
JS
5040 case PTR_TO_SOCKET:
5041 case PTR_TO_SOCKET_OR_NULL:
f1174f77
EC
5042 /* Only valid matches are exact, which memcmp() above
5043 * would have accepted
5044 */
5045 default:
5046 /* Don't know what's going on, just say it's not safe */
5047 return false;
5048 }
969bf05e 5049
f1174f77
EC
5050 /* Shouldn't get here; if we do, say it's not safe */
5051 WARN_ON_ONCE(1);
969bf05e
AS
5052 return false;
5053}
5054
f4d7e40a
AS
5055static bool stacksafe(struct bpf_func_state *old,
5056 struct bpf_func_state *cur,
638f5b90
AS
5057 struct idpair *idmap)
5058{
5059 int i, spi;
5060
5061 /* if explored stack has more populated slots than current stack
5062 * such stacks are not equivalent
5063 */
5064 if (old->allocated_stack > cur->allocated_stack)
5065 return false;
5066
5067 /* walk slots of the explored stack and ignore any additional
5068 * slots in the current stack, since explored(safe) state
5069 * didn't use them
5070 */
5071 for (i = 0; i < old->allocated_stack; i++) {
5072 spi = i / BPF_REG_SIZE;
5073
cc2b14d5
AS
5074 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
5075 /* explored state didn't use this */
fd05e57b 5076 continue;
cc2b14d5 5077
638f5b90
AS
5078 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
5079 continue;
cc2b14d5
AS
5080 /* if old state was safe with misc data in the stack
5081 * it will be safe with zero-initialized stack.
5082 * The opposite is not true
5083 */
5084 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
5085 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
5086 continue;
638f5b90
AS
5087 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
5088 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
5089 /* Ex: old explored (safe) state has STACK_SPILL in
5090 * this stack slot, but current has has STACK_MISC ->
5091 * this verifier states are not equivalent,
5092 * return false to continue verification of this path
5093 */
5094 return false;
5095 if (i % BPF_REG_SIZE)
5096 continue;
5097 if (old->stack[spi].slot_type[0] != STACK_SPILL)
5098 continue;
5099 if (!regsafe(&old->stack[spi].spilled_ptr,
5100 &cur->stack[spi].spilled_ptr,
5101 idmap))
5102 /* when explored and current stack slot are both storing
5103 * spilled registers, check that stored pointers types
5104 * are the same as well.
5105 * Ex: explored safe path could have stored
5106 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
5107 * but current path has stored:
5108 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
5109 * such verifier states are not equivalent.
5110 * return false to continue verification of this path
5111 */
5112 return false;
5113 }
5114 return true;
5115}
5116
fd978bf7
JS
5117static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
5118{
5119 if (old->acquired_refs != cur->acquired_refs)
5120 return false;
5121 return !memcmp(old->refs, cur->refs,
5122 sizeof(*old->refs) * old->acquired_refs);
5123}
5124
f1bca824
AS
5125/* compare two verifier states
5126 *
5127 * all states stored in state_list are known to be valid, since
5128 * verifier reached 'bpf_exit' instruction through them
5129 *
5130 * this function is called when verifier exploring different branches of
5131 * execution popped from the state stack. If it sees an old state that has
5132 * more strict register state and more strict stack state then this execution
5133 * branch doesn't need to be explored further, since verifier already
5134 * concluded that more strict state leads to valid finish.
5135 *
5136 * Therefore two states are equivalent if register state is more conservative
5137 * and explored stack state is more conservative than the current one.
5138 * Example:
5139 * explored current
5140 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
5141 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
5142 *
5143 * In other words if current stack state (one being explored) has more
5144 * valid slots than old one that already passed validation, it means
5145 * the verifier can stop exploring and conclude that current state is valid too
5146 *
5147 * Similarly with registers. If explored state has register type as invalid
5148 * whereas register type in current state is meaningful, it means that
5149 * the current state will reach 'bpf_exit' instruction safely
5150 */
f4d7e40a
AS
5151static bool func_states_equal(struct bpf_func_state *old,
5152 struct bpf_func_state *cur)
f1bca824 5153{
f1174f77
EC
5154 struct idpair *idmap;
5155 bool ret = false;
f1bca824
AS
5156 int i;
5157
f1174f77
EC
5158 idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
5159 /* If we failed to allocate the idmap, just say it's not safe */
5160 if (!idmap)
1a0dc1ac 5161 return false;
f1174f77
EC
5162
5163 for (i = 0; i < MAX_BPF_REG; i++) {
1b688a19 5164 if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
f1174f77 5165 goto out_free;
f1bca824
AS
5166 }
5167
638f5b90
AS
5168 if (!stacksafe(old, cur, idmap))
5169 goto out_free;
fd978bf7
JS
5170
5171 if (!refsafe(old, cur))
5172 goto out_free;
f1174f77
EC
5173 ret = true;
5174out_free:
5175 kfree(idmap);
5176 return ret;
f1bca824
AS
5177}
5178
f4d7e40a
AS
5179static bool states_equal(struct bpf_verifier_env *env,
5180 struct bpf_verifier_state *old,
5181 struct bpf_verifier_state *cur)
5182{
5183 int i;
5184
5185 if (old->curframe != cur->curframe)
5186 return false;
5187
5188 /* for states to be equal callsites have to be the same
5189 * and all frame states need to be equivalent
5190 */
5191 for (i = 0; i <= old->curframe; i++) {
5192 if (old->frame[i]->callsite != cur->frame[i]->callsite)
5193 return false;
5194 if (!func_states_equal(old->frame[i], cur->frame[i]))
5195 return false;
5196 }
5197 return true;
5198}
5199
8e9cd9ce 5200/* A write screens off any subsequent reads; but write marks come from the
f4d7e40a
AS
5201 * straight-line code between a state and its parent. When we arrive at an
5202 * equivalent state (jump target or such) we didn't arrive by the straight-line
5203 * code, so read marks in the state must propagate to the parent regardless
5204 * of the state's write marks. That's what 'parent == state->parent' comparison
679c782d 5205 * in mark_reg_read() is for.
8e9cd9ce 5206 */
f4d7e40a
AS
5207static int propagate_liveness(struct bpf_verifier_env *env,
5208 const struct bpf_verifier_state *vstate,
5209 struct bpf_verifier_state *vparent)
dc503a8a 5210{
f4d7e40a
AS
5211 int i, frame, err = 0;
5212 struct bpf_func_state *state, *parent;
dc503a8a 5213
f4d7e40a
AS
5214 if (vparent->curframe != vstate->curframe) {
5215 WARN(1, "propagate_live: parent frame %d current frame %d\n",
5216 vparent->curframe, vstate->curframe);
5217 return -EFAULT;
5218 }
dc503a8a
EC
5219 /* Propagate read liveness of registers... */
5220 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
5221 /* We don't need to worry about FP liveness because it's read-only */
5222 for (i = 0; i < BPF_REG_FP; i++) {
f4d7e40a 5223 if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
63f45f84 5224 continue;
f4d7e40a 5225 if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
679c782d
EC
5226 err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i],
5227 &vparent->frame[vstate->curframe]->regs[i]);
f4d7e40a
AS
5228 if (err)
5229 return err;
dc503a8a
EC
5230 }
5231 }
f4d7e40a 5232
dc503a8a 5233 /* ... and stack slots */
f4d7e40a
AS
5234 for (frame = 0; frame <= vstate->curframe; frame++) {
5235 state = vstate->frame[frame];
5236 parent = vparent->frame[frame];
5237 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
5238 i < parent->allocated_stack / BPF_REG_SIZE; i++) {
f4d7e40a
AS
5239 if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
5240 continue;
5241 if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
679c782d
EC
5242 mark_reg_read(env, &state->stack[i].spilled_ptr,
5243 &parent->stack[i].spilled_ptr);
dc503a8a
EC
5244 }
5245 }
f4d7e40a 5246 return err;
dc503a8a
EC
5247}
5248
58e2af8b 5249static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
f1bca824 5250{
58e2af8b
JK
5251 struct bpf_verifier_state_list *new_sl;
5252 struct bpf_verifier_state_list *sl;
679c782d 5253 struct bpf_verifier_state *cur = env->cur_state, *new;
f4d7e40a 5254 int i, j, err;
f1bca824
AS
5255
5256 sl = env->explored_states[insn_idx];
5257 if (!sl)
5258 /* this 'insn_idx' instruction wasn't marked, so we will not
5259 * be doing state search here
5260 */
5261 return 0;
5262
5263 while (sl != STATE_LIST_MARK) {
638f5b90 5264 if (states_equal(env, &sl->state, cur)) {
f1bca824 5265 /* reached equivalent register/stack state,
dc503a8a
EC
5266 * prune the search.
5267 * Registers read by the continuation are read by us.
8e9cd9ce
EC
5268 * If we have any write marks in env->cur_state, they
5269 * will prevent corresponding reads in the continuation
5270 * from reaching our parent (an explored_state). Our
5271 * own state will get the read marks recorded, but
5272 * they'll be immediately forgotten as we're pruning
5273 * this state and will pop a new one.
f1bca824 5274 */
f4d7e40a
AS
5275 err = propagate_liveness(env, &sl->state, cur);
5276 if (err)
5277 return err;
f1bca824 5278 return 1;
dc503a8a 5279 }
f1bca824
AS
5280 sl = sl->next;
5281 }
5282
5283 /* there were no equivalent states, remember current one.
5284 * technically the current state is not proven to be safe yet,
f4d7e40a
AS
5285 * but it will either reach outer most bpf_exit (which means it's safe)
5286 * or it will be rejected. Since there are no loops, we won't be
5287 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
5288 * again on the way to bpf_exit
f1bca824 5289 */
638f5b90 5290 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
f1bca824
AS
5291 if (!new_sl)
5292 return -ENOMEM;
5293
5294 /* add new state to the head of linked list */
679c782d
EC
5295 new = &new_sl->state;
5296 err = copy_verifier_state(new, cur);
1969db47 5297 if (err) {
679c782d 5298 free_verifier_state(new, false);
1969db47
AS
5299 kfree(new_sl);
5300 return err;
5301 }
f1bca824
AS
5302 new_sl->next = env->explored_states[insn_idx];
5303 env->explored_states[insn_idx] = new_sl;
dc503a8a 5304 /* connect new state to parentage chain */
679c782d
EC
5305 for (i = 0; i < BPF_REG_FP; i++)
5306 cur_regs(env)[i].parent = &new->frame[new->curframe]->regs[i];
8e9cd9ce
EC
5307 /* clear write marks in current state: the writes we did are not writes
5308 * our child did, so they don't screen off its reads from us.
5309 * (There are no read marks in current state, because reads always mark
5310 * their parent and current state never has children yet. Only
5311 * explored_states can get read marks.)
5312 */
dc503a8a 5313 for (i = 0; i < BPF_REG_FP; i++)
f4d7e40a
AS
5314 cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
5315
5316 /* all stack frames are accessible from callee, clear them all */
5317 for (j = 0; j <= cur->curframe; j++) {
5318 struct bpf_func_state *frame = cur->frame[j];
679c782d 5319 struct bpf_func_state *newframe = new->frame[j];
f4d7e40a 5320
679c782d 5321 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
cc2b14d5 5322 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
679c782d
EC
5323 frame->stack[i].spilled_ptr.parent =
5324 &newframe->stack[i].spilled_ptr;
5325 }
f4d7e40a 5326 }
f1bca824
AS
5327 return 0;
5328}
5329
c64b7983
JS
5330/* Return true if it's OK to have the same insn return a different type. */
5331static bool reg_type_mismatch_ok(enum bpf_reg_type type)
5332{
5333 switch (type) {
5334 case PTR_TO_CTX:
5335 case PTR_TO_SOCKET:
5336 case PTR_TO_SOCKET_OR_NULL:
5337 return false;
5338 default:
5339 return true;
5340 }
5341}
5342
5343/* If an instruction was previously used with particular pointer types, then we
5344 * need to be careful to avoid cases such as the below, where it may be ok
5345 * for one branch accessing the pointer, but not ok for the other branch:
5346 *
5347 * R1 = sock_ptr
5348 * goto X;
5349 * ...
5350 * R1 = some_other_valid_ptr;
5351 * goto X;
5352 * ...
5353 * R2 = *(u32 *)(R1 + 0);
5354 */
5355static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
5356{
5357 return src != prev && (!reg_type_mismatch_ok(src) ||
5358 !reg_type_mismatch_ok(prev));
5359}
5360
58e2af8b 5361static int do_check(struct bpf_verifier_env *env)
17a52670 5362{
638f5b90 5363 struct bpf_verifier_state *state;
17a52670 5364 struct bpf_insn *insns = env->prog->insnsi;
638f5b90 5365 struct bpf_reg_state *regs;
f4d7e40a 5366 int insn_cnt = env->prog->len, i;
17a52670
AS
5367 int insn_idx, prev_insn_idx = 0;
5368 int insn_processed = 0;
5369 bool do_print_state = false;
5370
638f5b90
AS
5371 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
5372 if (!state)
5373 return -ENOMEM;
f4d7e40a 5374 state->curframe = 0;
f4d7e40a
AS
5375 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
5376 if (!state->frame[0]) {
5377 kfree(state);
5378 return -ENOMEM;
5379 }
5380 env->cur_state = state;
5381 init_func_state(env, state->frame[0],
5382 BPF_MAIN_FUNC /* callsite */,
5383 0 /* frameno */,
5384 0 /* subprogno, zero == main subprog */);
17a52670
AS
5385 insn_idx = 0;
5386 for (;;) {
5387 struct bpf_insn *insn;
5388 u8 class;
5389 int err;
5390
5391 if (insn_idx >= insn_cnt) {
61bd5218 5392 verbose(env, "invalid insn idx %d insn_cnt %d\n",
17a52670
AS
5393 insn_idx, insn_cnt);
5394 return -EFAULT;
5395 }
5396
5397 insn = &insns[insn_idx];
5398 class = BPF_CLASS(insn->code);
5399
07016151 5400 if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
61bd5218
JK
5401 verbose(env,
5402 "BPF program is too large. Processed %d insn\n",
17a52670
AS
5403 insn_processed);
5404 return -E2BIG;
5405 }
5406
f1bca824
AS
5407 err = is_state_visited(env, insn_idx);
5408 if (err < 0)
5409 return err;
5410 if (err == 1) {
5411 /* found equivalent state, can prune the search */
61bd5218 5412 if (env->log.level) {
f1bca824 5413 if (do_print_state)
61bd5218 5414 verbose(env, "\nfrom %d to %d: safe\n",
f1bca824
AS
5415 prev_insn_idx, insn_idx);
5416 else
61bd5218 5417 verbose(env, "%d: safe\n", insn_idx);
f1bca824
AS
5418 }
5419 goto process_bpf_exit;
5420 }
5421
3c2ce60b
DB
5422 if (need_resched())
5423 cond_resched();
5424
61bd5218
JK
5425 if (env->log.level > 1 || (env->log.level && do_print_state)) {
5426 if (env->log.level > 1)
5427 verbose(env, "%d:", insn_idx);
c5fc9692 5428 else
61bd5218 5429 verbose(env, "\nfrom %d to %d:",
c5fc9692 5430 prev_insn_idx, insn_idx);
f4d7e40a 5431 print_verifier_state(env, state->frame[state->curframe]);
17a52670
AS
5432 do_print_state = false;
5433 }
5434
61bd5218 5435 if (env->log.level) {
7105e828
DB
5436 const struct bpf_insn_cbs cbs = {
5437 .cb_print = verbose,
abe08840 5438 .private_data = env,
7105e828
DB
5439 };
5440
61bd5218 5441 verbose(env, "%d: ", insn_idx);
abe08840 5442 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
17a52670
AS
5443 }
5444
cae1927c
JK
5445 if (bpf_prog_is_dev_bound(env->prog->aux)) {
5446 err = bpf_prog_offload_verify_insn(env, insn_idx,
5447 prev_insn_idx);
5448 if (err)
5449 return err;
5450 }
13a27dfc 5451
638f5b90 5452 regs = cur_regs(env);
c131187d 5453 env->insn_aux_data[insn_idx].seen = true;
fd978bf7 5454
17a52670 5455 if (class == BPF_ALU || class == BPF_ALU64) {
1be7f75d 5456 err = check_alu_op(env, insn);
17a52670
AS
5457 if (err)
5458 return err;
5459
5460 } else if (class == BPF_LDX) {
3df126f3 5461 enum bpf_reg_type *prev_src_type, src_reg_type;
9bac3d6d
AS
5462
5463 /* check for reserved fields is already done */
5464
17a52670 5465 /* check src operand */
dc503a8a 5466 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
5467 if (err)
5468 return err;
5469
dc503a8a 5470 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
5471 if (err)
5472 return err;
5473
725f9dcd
AS
5474 src_reg_type = regs[insn->src_reg].type;
5475
17a52670
AS
5476 /* check that memory (src_reg + off) is readable,
5477 * the state of dst_reg will be updated by this func
5478 */
31fd8581 5479 err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
17a52670 5480 BPF_SIZE(insn->code), BPF_READ,
ca369602 5481 insn->dst_reg, false);
17a52670
AS
5482 if (err)
5483 return err;
5484
3df126f3
JK
5485 prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
5486
5487 if (*prev_src_type == NOT_INIT) {
9bac3d6d
AS
5488 /* saw a valid insn
5489 * dst_reg = *(u32 *)(src_reg + off)
3df126f3 5490 * save type to validate intersecting paths
9bac3d6d 5491 */
3df126f3 5492 *prev_src_type = src_reg_type;
9bac3d6d 5493
c64b7983 5494 } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9bac3d6d
AS
5495 /* ABuser program is trying to use the same insn
5496 * dst_reg = *(u32*) (src_reg + off)
5497 * with different pointer types:
5498 * src_reg == ctx in one branch and
5499 * src_reg == stack|map in some other branch.
5500 * Reject it.
5501 */
61bd5218 5502 verbose(env, "same insn cannot be used with different pointers\n");
9bac3d6d
AS
5503 return -EINVAL;
5504 }
5505
17a52670 5506 } else if (class == BPF_STX) {
3df126f3 5507 enum bpf_reg_type *prev_dst_type, dst_reg_type;
d691f9e8 5508
17a52670 5509 if (BPF_MODE(insn->code) == BPF_XADD) {
31fd8581 5510 err = check_xadd(env, insn_idx, insn);
17a52670
AS
5511 if (err)
5512 return err;
5513 insn_idx++;
5514 continue;
5515 }
5516
17a52670 5517 /* check src1 operand */
dc503a8a 5518 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
5519 if (err)
5520 return err;
5521 /* check src2 operand */
dc503a8a 5522 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
5523 if (err)
5524 return err;
5525
d691f9e8
AS
5526 dst_reg_type = regs[insn->dst_reg].type;
5527
17a52670 5528 /* check that memory (dst_reg + off) is writeable */
31fd8581 5529 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
17a52670 5530 BPF_SIZE(insn->code), BPF_WRITE,
ca369602 5531 insn->src_reg, false);
17a52670
AS
5532 if (err)
5533 return err;
5534
3df126f3
JK
5535 prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
5536
5537 if (*prev_dst_type == NOT_INIT) {
5538 *prev_dst_type = dst_reg_type;
c64b7983 5539 } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
61bd5218 5540 verbose(env, "same insn cannot be used with different pointers\n");
d691f9e8
AS
5541 return -EINVAL;
5542 }
5543
17a52670
AS
5544 } else if (class == BPF_ST) {
5545 if (BPF_MODE(insn->code) != BPF_MEM ||
5546 insn->src_reg != BPF_REG_0) {
61bd5218 5547 verbose(env, "BPF_ST uses reserved fields\n");
17a52670
AS
5548 return -EINVAL;
5549 }
5550 /* check src operand */
dc503a8a 5551 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
5552 if (err)
5553 return err;
5554
f37a8cb8 5555 if (is_ctx_reg(env, insn->dst_reg)) {
9d2be44a 5556 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
2a159c6f
DB
5557 insn->dst_reg,
5558 reg_type_str[reg_state(env, insn->dst_reg)->type]);
f37a8cb8
DB
5559 return -EACCES;
5560 }
5561
17a52670 5562 /* check that memory (dst_reg + off) is writeable */
31fd8581 5563 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
17a52670 5564 BPF_SIZE(insn->code), BPF_WRITE,
ca369602 5565 -1, false);
17a52670
AS
5566 if (err)
5567 return err;
5568
5569 } else if (class == BPF_JMP) {
5570 u8 opcode = BPF_OP(insn->code);
5571
5572 if (opcode == BPF_CALL) {
5573 if (BPF_SRC(insn->code) != BPF_K ||
5574 insn->off != 0 ||
f4d7e40a
AS
5575 (insn->src_reg != BPF_REG_0 &&
5576 insn->src_reg != BPF_PSEUDO_CALL) ||
17a52670 5577 insn->dst_reg != BPF_REG_0) {
61bd5218 5578 verbose(env, "BPF_CALL uses reserved fields\n");
17a52670
AS
5579 return -EINVAL;
5580 }
5581
f4d7e40a
AS
5582 if (insn->src_reg == BPF_PSEUDO_CALL)
5583 err = check_func_call(env, insn, &insn_idx);
5584 else
5585 err = check_helper_call(env, insn->imm, insn_idx);
17a52670
AS
5586 if (err)
5587 return err;
5588
5589 } else if (opcode == BPF_JA) {
5590 if (BPF_SRC(insn->code) != BPF_K ||
5591 insn->imm != 0 ||
5592 insn->src_reg != BPF_REG_0 ||
5593 insn->dst_reg != BPF_REG_0) {
61bd5218 5594 verbose(env, "BPF_JA uses reserved fields\n");
17a52670
AS
5595 return -EINVAL;
5596 }
5597
5598 insn_idx += insn->off + 1;
5599 continue;
5600
5601 } else if (opcode == BPF_EXIT) {
5602 if (BPF_SRC(insn->code) != BPF_K ||
5603 insn->imm != 0 ||
5604 insn->src_reg != BPF_REG_0 ||
5605 insn->dst_reg != BPF_REG_0) {
61bd5218 5606 verbose(env, "BPF_EXIT uses reserved fields\n");
17a52670
AS
5607 return -EINVAL;
5608 }
5609
f4d7e40a
AS
5610 if (state->curframe) {
5611 /* exit from nested function */
5612 prev_insn_idx = insn_idx;
5613 err = prepare_func_exit(env, &insn_idx);
5614 if (err)
5615 return err;
5616 do_print_state = true;
5617 continue;
5618 }
5619
fd978bf7
JS
5620 err = check_reference_leak(env);
5621 if (err)
5622 return err;
5623
17a52670
AS
5624 /* eBPF calling convetion is such that R0 is used
5625 * to return the value from eBPF program.
5626 * Make sure that it's readable at this time
5627 * of bpf_exit, which means that program wrote
5628 * something into it earlier
5629 */
dc503a8a 5630 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
17a52670
AS
5631 if (err)
5632 return err;
5633
1be7f75d 5634 if (is_pointer_value(env, BPF_REG_0)) {
61bd5218 5635 verbose(env, "R0 leaks addr as return value\n");
1be7f75d
AS
5636 return -EACCES;
5637 }
5638
390ee7e2
AS
5639 err = check_return_code(env);
5640 if (err)
5641 return err;
f1bca824 5642process_bpf_exit:
638f5b90
AS
5643 err = pop_stack(env, &prev_insn_idx, &insn_idx);
5644 if (err < 0) {
5645 if (err != -ENOENT)
5646 return err;
17a52670
AS
5647 break;
5648 } else {
5649 do_print_state = true;
5650 continue;
5651 }
5652 } else {
5653 err = check_cond_jmp_op(env, insn, &insn_idx);
5654 if (err)
5655 return err;
5656 }
5657 } else if (class == BPF_LD) {
5658 u8 mode = BPF_MODE(insn->code);
5659
5660 if (mode == BPF_ABS || mode == BPF_IND) {
ddd872bc
AS
5661 err = check_ld_abs(env, insn);
5662 if (err)
5663 return err;
5664
17a52670
AS
5665 } else if (mode == BPF_IMM) {
5666 err = check_ld_imm(env, insn);
5667 if (err)
5668 return err;
5669
5670 insn_idx++;
c131187d 5671 env->insn_aux_data[insn_idx].seen = true;
17a52670 5672 } else {
61bd5218 5673 verbose(env, "invalid BPF_LD mode\n");
17a52670
AS
5674 return -EINVAL;
5675 }
5676 } else {
61bd5218 5677 verbose(env, "unknown insn class %d\n", class);
17a52670
AS
5678 return -EINVAL;
5679 }
5680
5681 insn_idx++;
5682 }
5683
4bd95f4b
DB
5684 verbose(env, "processed %d insns (limit %d), stack depth ",
5685 insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
f910cefa 5686 for (i = 0; i < env->subprog_cnt; i++) {
9c8105bd 5687 u32 depth = env->subprog_info[i].stack_depth;
f4d7e40a
AS
5688
5689 verbose(env, "%d", depth);
f910cefa 5690 if (i + 1 < env->subprog_cnt)
f4d7e40a
AS
5691 verbose(env, "+");
5692 }
5693 verbose(env, "\n");
9c8105bd 5694 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
17a52670
AS
5695 return 0;
5696}
5697
56f668df
MKL
5698static int check_map_prealloc(struct bpf_map *map)
5699{
5700 return (map->map_type != BPF_MAP_TYPE_HASH &&
bcc6b1b7
MKL
5701 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
5702 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
56f668df
MKL
5703 !(map->map_flags & BPF_F_NO_PREALLOC);
5704}
5705
61bd5218
JK
5706static int check_map_prog_compatibility(struct bpf_verifier_env *env,
5707 struct bpf_map *map,
fdc15d38
AS
5708 struct bpf_prog *prog)
5709
5710{
56f668df
MKL
5711 /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
5712 * preallocated hash maps, since doing memory allocation
5713 * in overflow_handler can crash depending on where nmi got
5714 * triggered.
5715 */
5716 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
5717 if (!check_map_prealloc(map)) {
61bd5218 5718 verbose(env, "perf_event programs can only use preallocated hash map\n");
56f668df
MKL
5719 return -EINVAL;
5720 }
5721 if (map->inner_map_meta &&
5722 !check_map_prealloc(map->inner_map_meta)) {
61bd5218 5723 verbose(env, "perf_event programs can only use preallocated inner hash map\n");
56f668df
MKL
5724 return -EINVAL;
5725 }
fdc15d38 5726 }
a3884572
JK
5727
5728 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
09728266 5729 !bpf_offload_prog_map_match(prog, map)) {
a3884572
JK
5730 verbose(env, "offload device mismatch between prog and map\n");
5731 return -EINVAL;
5732 }
5733
fdc15d38
AS
5734 return 0;
5735}
5736
b741f163
RG
5737static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
5738{
5739 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
5740 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
5741}
5742
0246e64d
AS
5743/* look for pseudo eBPF instructions that access map FDs and
5744 * replace them with actual map pointers
5745 */
58e2af8b 5746static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
0246e64d
AS
5747{
5748 struct bpf_insn *insn = env->prog->insnsi;
5749 int insn_cnt = env->prog->len;
fdc15d38 5750 int i, j, err;
0246e64d 5751
f1f7714e 5752 err = bpf_prog_calc_tag(env->prog);
aafe6ae9
DB
5753 if (err)
5754 return err;
5755
0246e64d 5756 for (i = 0; i < insn_cnt; i++, insn++) {
9bac3d6d 5757 if (BPF_CLASS(insn->code) == BPF_LDX &&
d691f9e8 5758 (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
61bd5218 5759 verbose(env, "BPF_LDX uses reserved fields\n");
9bac3d6d
AS
5760 return -EINVAL;
5761 }
5762
d691f9e8
AS
5763 if (BPF_CLASS(insn->code) == BPF_STX &&
5764 ((BPF_MODE(insn->code) != BPF_MEM &&
5765 BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
61bd5218 5766 verbose(env, "BPF_STX uses reserved fields\n");
d691f9e8
AS
5767 return -EINVAL;
5768 }
5769
0246e64d
AS
5770 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
5771 struct bpf_map *map;
5772 struct fd f;
5773
5774 if (i == insn_cnt - 1 || insn[1].code != 0 ||
5775 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
5776 insn[1].off != 0) {
61bd5218 5777 verbose(env, "invalid bpf_ld_imm64 insn\n");
0246e64d
AS
5778 return -EINVAL;
5779 }
5780
5781 if (insn->src_reg == 0)
5782 /* valid generic load 64-bit imm */
5783 goto next_insn;
5784
5785 if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
61bd5218
JK
5786 verbose(env,
5787 "unrecognized bpf_ld_imm64 insn\n");
0246e64d
AS
5788 return -EINVAL;
5789 }
5790
5791 f = fdget(insn->imm);
c2101297 5792 map = __bpf_map_get(f);
0246e64d 5793 if (IS_ERR(map)) {
61bd5218 5794 verbose(env, "fd %d is not pointing to valid bpf_map\n",
0246e64d 5795 insn->imm);
0246e64d
AS
5796 return PTR_ERR(map);
5797 }
5798
61bd5218 5799 err = check_map_prog_compatibility(env, map, env->prog);
fdc15d38
AS
5800 if (err) {
5801 fdput(f);
5802 return err;
5803 }
5804
0246e64d
AS
5805 /* store map pointer inside BPF_LD_IMM64 instruction */
5806 insn[0].imm = (u32) (unsigned long) map;
5807 insn[1].imm = ((u64) (unsigned long) map) >> 32;
5808
5809 /* check whether we recorded this map already */
5810 for (j = 0; j < env->used_map_cnt; j++)
5811 if (env->used_maps[j] == map) {
5812 fdput(f);
5813 goto next_insn;
5814 }
5815
5816 if (env->used_map_cnt >= MAX_USED_MAPS) {
5817 fdput(f);
5818 return -E2BIG;
5819 }
5820
0246e64d
AS
5821 /* hold the map. If the program is rejected by verifier,
5822 * the map will be released by release_maps() or it
5823 * will be used by the valid program until it's unloaded
ab7f5bf0 5824 * and all maps are released in free_used_maps()
0246e64d 5825 */
92117d84
AS
5826 map = bpf_map_inc(map, false);
5827 if (IS_ERR(map)) {
5828 fdput(f);
5829 return PTR_ERR(map);
5830 }
5831 env->used_maps[env->used_map_cnt++] = map;
5832
b741f163 5833 if (bpf_map_is_cgroup_storage(map) &&
de9cbbaa 5834 bpf_cgroup_storage_assign(env->prog, map)) {
b741f163 5835 verbose(env, "only one cgroup storage of each type is allowed\n");
de9cbbaa
RG
5836 fdput(f);
5837 return -EBUSY;
5838 }
5839
0246e64d
AS
5840 fdput(f);
5841next_insn:
5842 insn++;
5843 i++;
5e581dad
DB
5844 continue;
5845 }
5846
5847 /* Basic sanity check before we invest more work here. */
5848 if (!bpf_opcode_in_insntable(insn->code)) {
5849 verbose(env, "unknown opcode %02x\n", insn->code);
5850 return -EINVAL;
0246e64d
AS
5851 }
5852 }
5853
5854 /* now all pseudo BPF_LD_IMM64 instructions load valid
5855 * 'struct bpf_map *' into a register instead of user map_fd.
5856 * These pointers will be used later by verifier to validate map access.
5857 */
5858 return 0;
5859}
5860
5861/* drop refcnt of maps used by the rejected program */
58e2af8b 5862static void release_maps(struct bpf_verifier_env *env)
0246e64d 5863{
8bad74f9 5864 enum bpf_cgroup_storage_type stype;
0246e64d
AS
5865 int i;
5866
8bad74f9
RG
5867 for_each_cgroup_storage_type(stype) {
5868 if (!env->prog->aux->cgroup_storage[stype])
5869 continue;
de9cbbaa 5870 bpf_cgroup_storage_release(env->prog,
8bad74f9
RG
5871 env->prog->aux->cgroup_storage[stype]);
5872 }
de9cbbaa 5873
0246e64d
AS
5874 for (i = 0; i < env->used_map_cnt; i++)
5875 bpf_map_put(env->used_maps[i]);
5876}
5877
5878/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
58e2af8b 5879static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
0246e64d
AS
5880{
5881 struct bpf_insn *insn = env->prog->insnsi;
5882 int insn_cnt = env->prog->len;
5883 int i;
5884
5885 for (i = 0; i < insn_cnt; i++, insn++)
5886 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
5887 insn->src_reg = 0;
5888}
5889
8041902d
AS
5890/* single env->prog->insni[off] instruction was replaced with the range
5891 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
5892 * [0, off) and [off, end) to new locations, so the patched range stays zero
5893 */
5894static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
5895 u32 off, u32 cnt)
5896{
5897 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
c131187d 5898 int i;
8041902d
AS
5899
5900 if (cnt == 1)
5901 return 0;
fad953ce
KC
5902 new_data = vzalloc(array_size(prog_len,
5903 sizeof(struct bpf_insn_aux_data)));
8041902d
AS
5904 if (!new_data)
5905 return -ENOMEM;
5906 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
5907 memcpy(new_data + off + cnt - 1, old_data + off,
5908 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
c131187d
AS
5909 for (i = off; i < off + cnt - 1; i++)
5910 new_data[i].seen = true;
8041902d
AS
5911 env->insn_aux_data = new_data;
5912 vfree(old_data);
5913 return 0;
5914}
5915
cc8b0b92
AS
5916static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
5917{
5918 int i;
5919
5920 if (len == 1)
5921 return;
4cb3d99c
JW
5922 /* NOTE: fake 'exit' subprog should be updated as well. */
5923 for (i = 0; i <= env->subprog_cnt; i++) {
afd59424 5924 if (env->subprog_info[i].start <= off)
cc8b0b92 5925 continue;
9c8105bd 5926 env->subprog_info[i].start += len - 1;
cc8b0b92
AS
5927 }
5928}
5929
8041902d
AS
5930static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
5931 const struct bpf_insn *patch, u32 len)
5932{
5933 struct bpf_prog *new_prog;
5934
5935 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
5936 if (!new_prog)
5937 return NULL;
5938 if (adjust_insn_aux_data(env, new_prog->len, off, len))
5939 return NULL;
cc8b0b92 5940 adjust_subprog_starts(env, off, len);
8041902d
AS
5941 return new_prog;
5942}
5943
2a5418a1
DB
5944/* The verifier does more data flow analysis than llvm and will not
5945 * explore branches that are dead at run time. Malicious programs can
5946 * have dead code too. Therefore replace all dead at-run-time code
5947 * with 'ja -1'.
5948 *
5949 * Just nops are not optimal, e.g. if they would sit at the end of the
5950 * program and through another bug we would manage to jump there, then
5951 * we'd execute beyond program memory otherwise. Returning exception
5952 * code also wouldn't work since we can have subprogs where the dead
5953 * code could be located.
c131187d
AS
5954 */
5955static void sanitize_dead_code(struct bpf_verifier_env *env)
5956{
5957 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
2a5418a1 5958 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
c131187d
AS
5959 struct bpf_insn *insn = env->prog->insnsi;
5960 const int insn_cnt = env->prog->len;
5961 int i;
5962
5963 for (i = 0; i < insn_cnt; i++) {
5964 if (aux_data[i].seen)
5965 continue;
2a5418a1 5966 memcpy(insn + i, &trap, sizeof(trap));
c131187d
AS
5967 }
5968}
5969
c64b7983
JS
5970/* convert load instructions that access fields of a context type into a
5971 * sequence of instructions that access fields of the underlying structure:
5972 * struct __sk_buff -> struct sk_buff
5973 * struct bpf_sock_ops -> struct sock
9bac3d6d 5974 */
58e2af8b 5975static int convert_ctx_accesses(struct bpf_verifier_env *env)
9bac3d6d 5976{
00176a34 5977 const struct bpf_verifier_ops *ops = env->ops;
f96da094 5978 int i, cnt, size, ctx_field_size, delta = 0;
3df126f3 5979 const int insn_cnt = env->prog->len;
36bbef52 5980 struct bpf_insn insn_buf[16], *insn;
46f53a65 5981 u32 target_size, size_default, off;
9bac3d6d 5982 struct bpf_prog *new_prog;
d691f9e8 5983 enum bpf_access_type type;
f96da094 5984 bool is_narrower_load;
9bac3d6d 5985
b09928b9
DB
5986 if (ops->gen_prologue || env->seen_direct_write) {
5987 if (!ops->gen_prologue) {
5988 verbose(env, "bpf verifier is misconfigured\n");
5989 return -EINVAL;
5990 }
36bbef52
DB
5991 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
5992 env->prog);
5993 if (cnt >= ARRAY_SIZE(insn_buf)) {
61bd5218 5994 verbose(env, "bpf verifier is misconfigured\n");
36bbef52
DB
5995 return -EINVAL;
5996 } else if (cnt) {
8041902d 5997 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
36bbef52
DB
5998 if (!new_prog)
5999 return -ENOMEM;
8041902d 6000
36bbef52 6001 env->prog = new_prog;
3df126f3 6002 delta += cnt - 1;
36bbef52
DB
6003 }
6004 }
6005
c64b7983 6006 if (bpf_prog_is_dev_bound(env->prog->aux))
9bac3d6d
AS
6007 return 0;
6008
3df126f3 6009 insn = env->prog->insnsi + delta;
36bbef52 6010
9bac3d6d 6011 for (i = 0; i < insn_cnt; i++, insn++) {
c64b7983
JS
6012 bpf_convert_ctx_access_t convert_ctx_access;
6013
62c7989b
DB
6014 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
6015 insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
6016 insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
ea2e7ce5 6017 insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
d691f9e8 6018 type = BPF_READ;
62c7989b
DB
6019 else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
6020 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
6021 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
ea2e7ce5 6022 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
d691f9e8
AS
6023 type = BPF_WRITE;
6024 else
9bac3d6d
AS
6025 continue;
6026
af86ca4e
AS
6027 if (type == BPF_WRITE &&
6028 env->insn_aux_data[i + delta].sanitize_stack_off) {
6029 struct bpf_insn patch[] = {
6030 /* Sanitize suspicious stack slot with zero.
6031 * There are no memory dependencies for this store,
6032 * since it's only using frame pointer and immediate
6033 * constant of zero
6034 */
6035 BPF_ST_MEM(BPF_DW, BPF_REG_FP,
6036 env->insn_aux_data[i + delta].sanitize_stack_off,
6037 0),
6038 /* the original STX instruction will immediately
6039 * overwrite the same stack slot with appropriate value
6040 */
6041 *insn,
6042 };
6043
6044 cnt = ARRAY_SIZE(patch);
6045 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
6046 if (!new_prog)
6047 return -ENOMEM;
6048
6049 delta += cnt - 1;
6050 env->prog = new_prog;
6051 insn = new_prog->insnsi + i + delta;
6052 continue;
6053 }
6054
c64b7983
JS
6055 switch (env->insn_aux_data[i + delta].ptr_type) {
6056 case PTR_TO_CTX:
6057 if (!ops->convert_ctx_access)
6058 continue;
6059 convert_ctx_access = ops->convert_ctx_access;
6060 break;
6061 case PTR_TO_SOCKET:
6062 convert_ctx_access = bpf_sock_convert_ctx_access;
6063 break;
6064 default:
9bac3d6d 6065 continue;
c64b7983 6066 }
9bac3d6d 6067
31fd8581 6068 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
f96da094 6069 size = BPF_LDST_BYTES(insn);
31fd8581
YS
6070
6071 /* If the read access is a narrower load of the field,
6072 * convert to a 4/8-byte load, to minimum program type specific
6073 * convert_ctx_access changes. If conversion is successful,
6074 * we will apply proper mask to the result.
6075 */
f96da094 6076 is_narrower_load = size < ctx_field_size;
46f53a65
AI
6077 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
6078 off = insn->off;
31fd8581 6079 if (is_narrower_load) {
f96da094
DB
6080 u8 size_code;
6081
6082 if (type == BPF_WRITE) {
61bd5218 6083 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
f96da094
DB
6084 return -EINVAL;
6085 }
31fd8581 6086
f96da094 6087 size_code = BPF_H;
31fd8581
YS
6088 if (ctx_field_size == 4)
6089 size_code = BPF_W;
6090 else if (ctx_field_size == 8)
6091 size_code = BPF_DW;
f96da094 6092
bc23105c 6093 insn->off = off & ~(size_default - 1);
31fd8581
YS
6094 insn->code = BPF_LDX | BPF_MEM | size_code;
6095 }
f96da094
DB
6096
6097 target_size = 0;
c64b7983
JS
6098 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
6099 &target_size);
f96da094
DB
6100 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
6101 (ctx_field_size && !target_size)) {
61bd5218 6102 verbose(env, "bpf verifier is misconfigured\n");
9bac3d6d
AS
6103 return -EINVAL;
6104 }
f96da094
DB
6105
6106 if (is_narrower_load && size < target_size) {
46f53a65
AI
6107 u8 shift = (off & (size_default - 1)) * 8;
6108
6109 if (ctx_field_size <= 4) {
6110 if (shift)
6111 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
6112 insn->dst_reg,
6113 shift);
31fd8581 6114 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
f96da094 6115 (1 << size * 8) - 1);
46f53a65
AI
6116 } else {
6117 if (shift)
6118 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
6119 insn->dst_reg,
6120 shift);
31fd8581 6121 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
f96da094 6122 (1 << size * 8) - 1);
46f53a65 6123 }
31fd8581 6124 }
9bac3d6d 6125
8041902d 6126 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9bac3d6d
AS
6127 if (!new_prog)
6128 return -ENOMEM;
6129
3df126f3 6130 delta += cnt - 1;
9bac3d6d
AS
6131
6132 /* keep walking new program and skip insns we just inserted */
6133 env->prog = new_prog;
3df126f3 6134 insn = new_prog->insnsi + i + delta;
9bac3d6d
AS
6135 }
6136
6137 return 0;
6138}
6139
1c2a088a
AS
6140static int jit_subprogs(struct bpf_verifier_env *env)
6141{
6142 struct bpf_prog *prog = env->prog, **func, *tmp;
6143 int i, j, subprog_start, subprog_end = 0, len, subprog;
7105e828 6144 struct bpf_insn *insn;
1c2a088a 6145 void *old_bpf_func;
c454a46b 6146 int err;
1c2a088a 6147
f910cefa 6148 if (env->subprog_cnt <= 1)
1c2a088a
AS
6149 return 0;
6150
7105e828 6151 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1c2a088a
AS
6152 if (insn->code != (BPF_JMP | BPF_CALL) ||
6153 insn->src_reg != BPF_PSEUDO_CALL)
6154 continue;
c7a89784
DB
6155 /* Upon error here we cannot fall back to interpreter but
6156 * need a hard reject of the program. Thus -EFAULT is
6157 * propagated in any case.
6158 */
1c2a088a
AS
6159 subprog = find_subprog(env, i + insn->imm + 1);
6160 if (subprog < 0) {
6161 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
6162 i + insn->imm + 1);
6163 return -EFAULT;
6164 }
6165 /* temporarily remember subprog id inside insn instead of
6166 * aux_data, since next loop will split up all insns into funcs
6167 */
f910cefa 6168 insn->off = subprog;
1c2a088a
AS
6169 /* remember original imm in case JIT fails and fallback
6170 * to interpreter will be needed
6171 */
6172 env->insn_aux_data[i].call_imm = insn->imm;
6173 /* point imm to __bpf_call_base+1 from JITs point of view */
6174 insn->imm = 1;
6175 }
6176
c454a46b
MKL
6177 err = bpf_prog_alloc_jited_linfo(prog);
6178 if (err)
6179 goto out_undo_insn;
6180
6181 err = -ENOMEM;
6396bb22 6182 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
1c2a088a 6183 if (!func)
c7a89784 6184 goto out_undo_insn;
1c2a088a 6185
f910cefa 6186 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a 6187 subprog_start = subprog_end;
4cb3d99c 6188 subprog_end = env->subprog_info[i + 1].start;
1c2a088a
AS
6189
6190 len = subprog_end - subprog_start;
6191 func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
6192 if (!func[i])
6193 goto out_free;
6194 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
6195 len * sizeof(struct bpf_insn));
4f74d809 6196 func[i]->type = prog->type;
1c2a088a 6197 func[i]->len = len;
4f74d809
DB
6198 if (bpf_prog_calc_tag(func[i]))
6199 goto out_free;
1c2a088a 6200 func[i]->is_func = 1;
ba64e7d8
YS
6201 func[i]->aux->func_idx = i;
6202 /* the btf and func_info will be freed only at prog->aux */
6203 func[i]->aux->btf = prog->aux->btf;
6204 func[i]->aux->func_info = prog->aux->func_info;
6205
1c2a088a
AS
6206 /* Use bpf_prog_F_tag to indicate functions in stack traces.
6207 * Long term would need debug info to populate names
6208 */
6209 func[i]->aux->name[0] = 'F';
9c8105bd 6210 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
1c2a088a 6211 func[i]->jit_requested = 1;
c454a46b
MKL
6212 func[i]->aux->linfo = prog->aux->linfo;
6213 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
6214 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
6215 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
1c2a088a
AS
6216 func[i] = bpf_int_jit_compile(func[i]);
6217 if (!func[i]->jited) {
6218 err = -ENOTSUPP;
6219 goto out_free;
6220 }
6221 cond_resched();
6222 }
6223 /* at this point all bpf functions were successfully JITed
6224 * now populate all bpf_calls with correct addresses and
6225 * run last pass of JIT
6226 */
f910cefa 6227 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
6228 insn = func[i]->insnsi;
6229 for (j = 0; j < func[i]->len; j++, insn++) {
6230 if (insn->code != (BPF_JMP | BPF_CALL) ||
6231 insn->src_reg != BPF_PSEUDO_CALL)
6232 continue;
6233 subprog = insn->off;
1c2a088a
AS
6234 insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
6235 func[subprog]->bpf_func -
6236 __bpf_call_base;
6237 }
2162fed4
SD
6238
6239 /* we use the aux data to keep a list of the start addresses
6240 * of the JITed images for each function in the program
6241 *
6242 * for some architectures, such as powerpc64, the imm field
6243 * might not be large enough to hold the offset of the start
6244 * address of the callee's JITed image from __bpf_call_base
6245 *
6246 * in such cases, we can lookup the start address of a callee
6247 * by using its subprog id, available from the off field of
6248 * the call instruction, as an index for this list
6249 */
6250 func[i]->aux->func = func;
6251 func[i]->aux->func_cnt = env->subprog_cnt;
1c2a088a 6252 }
f910cefa 6253 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
6254 old_bpf_func = func[i]->bpf_func;
6255 tmp = bpf_int_jit_compile(func[i]);
6256 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
6257 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
c7a89784 6258 err = -ENOTSUPP;
1c2a088a
AS
6259 goto out_free;
6260 }
6261 cond_resched();
6262 }
6263
6264 /* finally lock prog and jit images for all functions and
6265 * populate kallsysm
6266 */
f910cefa 6267 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
6268 bpf_prog_lock_ro(func[i]);
6269 bpf_prog_kallsyms_add(func[i]);
6270 }
7105e828
DB
6271
6272 /* Last step: make now unused interpreter insns from main
6273 * prog consistent for later dump requests, so they can
6274 * later look the same as if they were interpreted only.
6275 */
6276 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7105e828
DB
6277 if (insn->code != (BPF_JMP | BPF_CALL) ||
6278 insn->src_reg != BPF_PSEUDO_CALL)
6279 continue;
6280 insn->off = env->insn_aux_data[i].call_imm;
6281 subprog = find_subprog(env, i + insn->off + 1);
dbecd738 6282 insn->imm = subprog;
7105e828
DB
6283 }
6284
1c2a088a
AS
6285 prog->jited = 1;
6286 prog->bpf_func = func[0]->bpf_func;
6287 prog->aux->func = func;
f910cefa 6288 prog->aux->func_cnt = env->subprog_cnt;
c454a46b 6289 bpf_prog_free_unused_jited_linfo(prog);
1c2a088a
AS
6290 return 0;
6291out_free:
f910cefa 6292 for (i = 0; i < env->subprog_cnt; i++)
1c2a088a
AS
6293 if (func[i])
6294 bpf_jit_free(func[i]);
6295 kfree(func);
c7a89784 6296out_undo_insn:
1c2a088a
AS
6297 /* cleanup main prog to be interpreted */
6298 prog->jit_requested = 0;
6299 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
6300 if (insn->code != (BPF_JMP | BPF_CALL) ||
6301 insn->src_reg != BPF_PSEUDO_CALL)
6302 continue;
6303 insn->off = 0;
6304 insn->imm = env->insn_aux_data[i].call_imm;
6305 }
c454a46b 6306 bpf_prog_free_jited_linfo(prog);
1c2a088a
AS
6307 return err;
6308}
6309
1ea47e01
AS
6310static int fixup_call_args(struct bpf_verifier_env *env)
6311{
19d28fbd 6312#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
6313 struct bpf_prog *prog = env->prog;
6314 struct bpf_insn *insn = prog->insnsi;
6315 int i, depth;
19d28fbd 6316#endif
e4052d06 6317 int err = 0;
1ea47e01 6318
e4052d06
QM
6319 if (env->prog->jit_requested &&
6320 !bpf_prog_is_dev_bound(env->prog->aux)) {
19d28fbd
DM
6321 err = jit_subprogs(env);
6322 if (err == 0)
1c2a088a 6323 return 0;
c7a89784
DB
6324 if (err == -EFAULT)
6325 return err;
19d28fbd
DM
6326 }
6327#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
6328 for (i = 0; i < prog->len; i++, insn++) {
6329 if (insn->code != (BPF_JMP | BPF_CALL) ||
6330 insn->src_reg != BPF_PSEUDO_CALL)
6331 continue;
6332 depth = get_callee_stack_depth(env, insn, i);
6333 if (depth < 0)
6334 return depth;
6335 bpf_patch_call_args(insn, depth);
6336 }
19d28fbd
DM
6337 err = 0;
6338#endif
6339 return err;
1ea47e01
AS
6340}
6341
79741b3b 6342/* fixup insn->imm field of bpf_call instructions
81ed18ab 6343 * and inline eligible helpers as explicit sequence of BPF instructions
e245c5c6
AS
6344 *
6345 * this function is called after eBPF program passed verification
6346 */
79741b3b 6347static int fixup_bpf_calls(struct bpf_verifier_env *env)
e245c5c6 6348{
79741b3b
AS
6349 struct bpf_prog *prog = env->prog;
6350 struct bpf_insn *insn = prog->insnsi;
e245c5c6 6351 const struct bpf_func_proto *fn;
79741b3b 6352 const int insn_cnt = prog->len;
09772d92 6353 const struct bpf_map_ops *ops;
c93552c4 6354 struct bpf_insn_aux_data *aux;
81ed18ab
AS
6355 struct bpf_insn insn_buf[16];
6356 struct bpf_prog *new_prog;
6357 struct bpf_map *map_ptr;
6358 int i, cnt, delta = 0;
e245c5c6 6359
79741b3b 6360 for (i = 0; i < insn_cnt; i++, insn++) {
f6b1b3bf
DB
6361 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
6362 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
6363 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
68fda450 6364 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
f6b1b3bf
DB
6365 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
6366 struct bpf_insn mask_and_div[] = {
6367 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
6368 /* Rx div 0 -> 0 */
6369 BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
6370 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
6371 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
6372 *insn,
6373 };
6374 struct bpf_insn mask_and_mod[] = {
6375 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
6376 /* Rx mod 0 -> Rx */
6377 BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
6378 *insn,
6379 };
6380 struct bpf_insn *patchlet;
6381
6382 if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
6383 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
6384 patchlet = mask_and_div + (is64 ? 1 : 0);
6385 cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
6386 } else {
6387 patchlet = mask_and_mod + (is64 ? 1 : 0);
6388 cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
6389 }
6390
6391 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
68fda450
AS
6392 if (!new_prog)
6393 return -ENOMEM;
6394
6395 delta += cnt - 1;
6396 env->prog = prog = new_prog;
6397 insn = new_prog->insnsi + i + delta;
6398 continue;
6399 }
6400
e0cea7ce
DB
6401 if (BPF_CLASS(insn->code) == BPF_LD &&
6402 (BPF_MODE(insn->code) == BPF_ABS ||
6403 BPF_MODE(insn->code) == BPF_IND)) {
6404 cnt = env->ops->gen_ld_abs(insn, insn_buf);
6405 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
6406 verbose(env, "bpf verifier is misconfigured\n");
6407 return -EINVAL;
6408 }
6409
6410 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
6411 if (!new_prog)
6412 return -ENOMEM;
6413
6414 delta += cnt - 1;
6415 env->prog = prog = new_prog;
6416 insn = new_prog->insnsi + i + delta;
6417 continue;
6418 }
6419
79741b3b
AS
6420 if (insn->code != (BPF_JMP | BPF_CALL))
6421 continue;
cc8b0b92
AS
6422 if (insn->src_reg == BPF_PSEUDO_CALL)
6423 continue;
e245c5c6 6424
79741b3b
AS
6425 if (insn->imm == BPF_FUNC_get_route_realm)
6426 prog->dst_needed = 1;
6427 if (insn->imm == BPF_FUNC_get_prandom_u32)
6428 bpf_user_rnd_init_once();
9802d865
JB
6429 if (insn->imm == BPF_FUNC_override_return)
6430 prog->kprobe_override = 1;
79741b3b 6431 if (insn->imm == BPF_FUNC_tail_call) {
7b9f6da1
DM
6432 /* If we tail call into other programs, we
6433 * cannot make any assumptions since they can
6434 * be replaced dynamically during runtime in
6435 * the program array.
6436 */
6437 prog->cb_access = 1;
80a58d02 6438 env->prog->aux->stack_depth = MAX_BPF_STACK;
e647815a 6439 env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
7b9f6da1 6440
79741b3b
AS
6441 /* mark bpf_tail_call as different opcode to avoid
6442 * conditional branch in the interpeter for every normal
6443 * call and to prevent accidental JITing by JIT compiler
6444 * that doesn't support bpf_tail_call yet
e245c5c6 6445 */
79741b3b 6446 insn->imm = 0;
71189fa9 6447 insn->code = BPF_JMP | BPF_TAIL_CALL;
b2157399 6448
c93552c4
DB
6449 aux = &env->insn_aux_data[i + delta];
6450 if (!bpf_map_ptr_unpriv(aux))
6451 continue;
6452
b2157399
AS
6453 /* instead of changing every JIT dealing with tail_call
6454 * emit two extra insns:
6455 * if (index >= max_entries) goto out;
6456 * index &= array->index_mask;
6457 * to avoid out-of-bounds cpu speculation
6458 */
c93552c4 6459 if (bpf_map_ptr_poisoned(aux)) {
40950343 6460 verbose(env, "tail_call abusing map_ptr\n");
b2157399
AS
6461 return -EINVAL;
6462 }
c93552c4
DB
6463
6464 map_ptr = BPF_MAP_PTR(aux->map_state);
b2157399
AS
6465 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
6466 map_ptr->max_entries, 2);
6467 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
6468 container_of(map_ptr,
6469 struct bpf_array,
6470 map)->index_mask);
6471 insn_buf[2] = *insn;
6472 cnt = 3;
6473 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
6474 if (!new_prog)
6475 return -ENOMEM;
6476
6477 delta += cnt - 1;
6478 env->prog = prog = new_prog;
6479 insn = new_prog->insnsi + i + delta;
79741b3b
AS
6480 continue;
6481 }
e245c5c6 6482
89c63074 6483 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
09772d92
DB
6484 * and other inlining handlers are currently limited to 64 bit
6485 * only.
89c63074 6486 */
60b58afc 6487 if (prog->jit_requested && BITS_PER_LONG == 64 &&
09772d92
DB
6488 (insn->imm == BPF_FUNC_map_lookup_elem ||
6489 insn->imm == BPF_FUNC_map_update_elem ||
84430d42
DB
6490 insn->imm == BPF_FUNC_map_delete_elem ||
6491 insn->imm == BPF_FUNC_map_push_elem ||
6492 insn->imm == BPF_FUNC_map_pop_elem ||
6493 insn->imm == BPF_FUNC_map_peek_elem)) {
c93552c4
DB
6494 aux = &env->insn_aux_data[i + delta];
6495 if (bpf_map_ptr_poisoned(aux))
6496 goto patch_call_imm;
6497
6498 map_ptr = BPF_MAP_PTR(aux->map_state);
09772d92
DB
6499 ops = map_ptr->ops;
6500 if (insn->imm == BPF_FUNC_map_lookup_elem &&
6501 ops->map_gen_lookup) {
6502 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
6503 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
6504 verbose(env, "bpf verifier is misconfigured\n");
6505 return -EINVAL;
6506 }
81ed18ab 6507
09772d92
DB
6508 new_prog = bpf_patch_insn_data(env, i + delta,
6509 insn_buf, cnt);
6510 if (!new_prog)
6511 return -ENOMEM;
81ed18ab 6512
09772d92
DB
6513 delta += cnt - 1;
6514 env->prog = prog = new_prog;
6515 insn = new_prog->insnsi + i + delta;
6516 continue;
6517 }
81ed18ab 6518
09772d92
DB
6519 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
6520 (void *(*)(struct bpf_map *map, void *key))NULL));
6521 BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
6522 (int (*)(struct bpf_map *map, void *key))NULL));
6523 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
6524 (int (*)(struct bpf_map *map, void *key, void *value,
6525 u64 flags))NULL));
84430d42
DB
6526 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
6527 (int (*)(struct bpf_map *map, void *value,
6528 u64 flags))NULL));
6529 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
6530 (int (*)(struct bpf_map *map, void *value))NULL));
6531 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
6532 (int (*)(struct bpf_map *map, void *value))NULL));
6533
09772d92
DB
6534 switch (insn->imm) {
6535 case BPF_FUNC_map_lookup_elem:
6536 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
6537 __bpf_call_base;
6538 continue;
6539 case BPF_FUNC_map_update_elem:
6540 insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
6541 __bpf_call_base;
6542 continue;
6543 case BPF_FUNC_map_delete_elem:
6544 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
6545 __bpf_call_base;
6546 continue;
84430d42
DB
6547 case BPF_FUNC_map_push_elem:
6548 insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
6549 __bpf_call_base;
6550 continue;
6551 case BPF_FUNC_map_pop_elem:
6552 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
6553 __bpf_call_base;
6554 continue;
6555 case BPF_FUNC_map_peek_elem:
6556 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
6557 __bpf_call_base;
6558 continue;
09772d92 6559 }
81ed18ab 6560
09772d92 6561 goto patch_call_imm;
81ed18ab
AS
6562 }
6563
6564patch_call_imm:
5e43f899 6565 fn = env->ops->get_func_proto(insn->imm, env->prog);
79741b3b
AS
6566 /* all functions that have prototype and verifier allowed
6567 * programs to call them, must be real in-kernel functions
6568 */
6569 if (!fn->func) {
61bd5218
JK
6570 verbose(env,
6571 "kernel subsystem misconfigured func %s#%d\n",
79741b3b
AS
6572 func_id_name(insn->imm), insn->imm);
6573 return -EFAULT;
e245c5c6 6574 }
79741b3b 6575 insn->imm = fn->func - __bpf_call_base;
e245c5c6 6576 }
e245c5c6 6577
79741b3b
AS
6578 return 0;
6579}
e245c5c6 6580
58e2af8b 6581static void free_states(struct bpf_verifier_env *env)
f1bca824 6582{
58e2af8b 6583 struct bpf_verifier_state_list *sl, *sln;
f1bca824
AS
6584 int i;
6585
6586 if (!env->explored_states)
6587 return;
6588
6589 for (i = 0; i < env->prog->len; i++) {
6590 sl = env->explored_states[i];
6591
6592 if (sl)
6593 while (sl != STATE_LIST_MARK) {
6594 sln = sl->next;
1969db47 6595 free_verifier_state(&sl->state, false);
f1bca824
AS
6596 kfree(sl);
6597 sl = sln;
6598 }
6599 }
6600
6601 kfree(env->explored_states);
6602}
6603
838e9690
YS
6604int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
6605 union bpf_attr __user *uattr)
51580e79 6606{
58e2af8b 6607 struct bpf_verifier_env *env;
b9193c1b 6608 struct bpf_verifier_log *log;
51580e79
AS
6609 int ret = -EINVAL;
6610
eba0c929
AB
6611 /* no program is valid */
6612 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
6613 return -EINVAL;
6614
58e2af8b 6615 /* 'struct bpf_verifier_env' can be global, but since it's not small,
cbd35700
AS
6616 * allocate/free it every time bpf_check() is called
6617 */
58e2af8b 6618 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
cbd35700
AS
6619 if (!env)
6620 return -ENOMEM;
61bd5218 6621 log = &env->log;
cbd35700 6622
fad953ce
KC
6623 env->insn_aux_data =
6624 vzalloc(array_size(sizeof(struct bpf_insn_aux_data),
6625 (*prog)->len));
3df126f3
JK
6626 ret = -ENOMEM;
6627 if (!env->insn_aux_data)
6628 goto err_free_env;
9bac3d6d 6629 env->prog = *prog;
00176a34 6630 env->ops = bpf_verifier_ops[env->prog->type];
0246e64d 6631
cbd35700
AS
6632 /* grab the mutex to protect few globals used by verifier */
6633 mutex_lock(&bpf_verifier_lock);
6634
6635 if (attr->log_level || attr->log_buf || attr->log_size) {
6636 /* user requested verbose verifier output
6637 * and supplied buffer to store the verification trace
6638 */
e7bf8249
JK
6639 log->level = attr->log_level;
6640 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
6641 log->len_total = attr->log_size;
cbd35700
AS
6642
6643 ret = -EINVAL;
e7bf8249
JK
6644 /* log attributes have to be sane */
6645 if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
6646 !log->level || !log->ubuf)
3df126f3 6647 goto err_unlock;
cbd35700 6648 }
1ad2f583
DB
6649
6650 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
6651 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
e07b98d9 6652 env->strict_alignment = true;
e9ee9efc
DM
6653 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
6654 env->strict_alignment = false;
cbd35700 6655
f4e3ec0d
JK
6656 ret = replace_map_fd_with_map_ptr(env);
6657 if (ret < 0)
6658 goto skip_full_check;
6659
cae1927c 6660 if (bpf_prog_is_dev_bound(env->prog->aux)) {
a40a2632 6661 ret = bpf_prog_offload_verifier_prep(env->prog);
ab3f0063 6662 if (ret)
f4e3ec0d 6663 goto skip_full_check;
ab3f0063
JK
6664 }
6665
9bac3d6d 6666 env->explored_states = kcalloc(env->prog->len,
58e2af8b 6667 sizeof(struct bpf_verifier_state_list *),
f1bca824
AS
6668 GFP_USER);
6669 ret = -ENOMEM;
6670 if (!env->explored_states)
6671 goto skip_full_check;
6672
cc8b0b92
AS
6673 env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
6674
475fb78f
AS
6675 ret = check_cfg(env);
6676 if (ret < 0)
6677 goto skip_full_check;
6678
c454a46b 6679 ret = check_btf_info(env, attr, uattr);
838e9690
YS
6680 if (ret < 0)
6681 goto skip_full_check;
6682
17a52670 6683 ret = do_check(env);
8c01c4f8
CG
6684 if (env->cur_state) {
6685 free_verifier_state(env->cur_state, true);
6686 env->cur_state = NULL;
6687 }
cbd35700 6688
c941ce9c
QM
6689 if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
6690 ret = bpf_prog_offload_finalize(env);
6691
0246e64d 6692skip_full_check:
638f5b90 6693 while (!pop_stack(env, NULL, NULL));
f1bca824 6694 free_states(env);
0246e64d 6695
c131187d
AS
6696 if (ret == 0)
6697 sanitize_dead_code(env);
6698
70a87ffe
AS
6699 if (ret == 0)
6700 ret = check_max_stack_depth(env);
6701
9bac3d6d
AS
6702 if (ret == 0)
6703 /* program is valid, convert *(u32*)(ctx + off) accesses */
6704 ret = convert_ctx_accesses(env);
6705
e245c5c6 6706 if (ret == 0)
79741b3b 6707 ret = fixup_bpf_calls(env);
e245c5c6 6708
1ea47e01
AS
6709 if (ret == 0)
6710 ret = fixup_call_args(env);
6711
a2a7d570 6712 if (log->level && bpf_verifier_log_full(log))
cbd35700 6713 ret = -ENOSPC;
a2a7d570 6714 if (log->level && !log->ubuf) {
cbd35700 6715 ret = -EFAULT;
a2a7d570 6716 goto err_release_maps;
cbd35700
AS
6717 }
6718
0246e64d
AS
6719 if (ret == 0 && env->used_map_cnt) {
6720 /* if program passed verifier, update used_maps in bpf_prog_info */
9bac3d6d
AS
6721 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
6722 sizeof(env->used_maps[0]),
6723 GFP_KERNEL);
0246e64d 6724
9bac3d6d 6725 if (!env->prog->aux->used_maps) {
0246e64d 6726 ret = -ENOMEM;
a2a7d570 6727 goto err_release_maps;
0246e64d
AS
6728 }
6729
9bac3d6d 6730 memcpy(env->prog->aux->used_maps, env->used_maps,
0246e64d 6731 sizeof(env->used_maps[0]) * env->used_map_cnt);
9bac3d6d 6732 env->prog->aux->used_map_cnt = env->used_map_cnt;
0246e64d
AS
6733
6734 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
6735 * bpf_ld_imm64 instructions
6736 */
6737 convert_pseudo_ld_imm64(env);
6738 }
cbd35700 6739
ba64e7d8
YS
6740 if (ret == 0)
6741 adjust_btf_func(env);
6742
a2a7d570 6743err_release_maps:
9bac3d6d 6744 if (!env->prog->aux->used_maps)
0246e64d 6745 /* if we didn't copy map pointers into bpf_prog_info, release
ab7f5bf0 6746 * them now. Otherwise free_used_maps() will release them.
0246e64d
AS
6747 */
6748 release_maps(env);
9bac3d6d 6749 *prog = env->prog;
3df126f3 6750err_unlock:
cbd35700 6751 mutex_unlock(&bpf_verifier_lock);
3df126f3
JK
6752 vfree(env->insn_aux_data);
6753err_free_env:
6754 kfree(env);
51580e79
AS
6755 return ret;
6756}