bpf: fix use after free in bpf_evict_inode
[linux-2.6-block.git] / kernel / bpf / verifier.c
CommitLineData
51580e79 1/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
969bf05e 2 * Copyright (c) 2016 Facebook
fd978bf7 3 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
51580e79
AS
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 */
838e9690 14#include <uapi/linux/btf.h>
51580e79
AS
15#include <linux/kernel.h>
16#include <linux/types.h>
17#include <linux/slab.h>
18#include <linux/bpf.h>
838e9690 19#include <linux/btf.h>
58e2af8b 20#include <linux/bpf_verifier.h>
51580e79
AS
21#include <linux/filter.h>
22#include <net/netlink.h>
23#include <linux/file.h>
24#include <linux/vmalloc.h>
ebb676da 25#include <linux/stringify.h>
cc8b0b92
AS
26#include <linux/bsearch.h>
27#include <linux/sort.h>
c195651e 28#include <linux/perf_event.h>
d9762e84 29#include <linux/ctype.h>
51580e79 30
f4ac7e0b
JK
31#include "disasm.h"
32
00176a34
JK
33static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
34#define BPF_PROG_TYPE(_id, _name) \
35 [_id] = & _name ## _verifier_ops,
36#define BPF_MAP_TYPE(_id, _ops)
37#include <linux/bpf_types.h>
38#undef BPF_PROG_TYPE
39#undef BPF_MAP_TYPE
40};
41
51580e79
AS
42/* bpf_check() is a static code analyzer that walks eBPF program
43 * instruction by instruction and updates register/stack state.
44 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
45 *
46 * The first pass is depth-first-search to check that the program is a DAG.
47 * It rejects the following programs:
48 * - larger than BPF_MAXINSNS insns
49 * - if loop is present (detected via back-edge)
50 * - unreachable insns exist (shouldn't be a forest. program = one function)
51 * - out of bounds or malformed jumps
52 * The second pass is all possible path descent from the 1st insn.
53 * Since it's analyzing all pathes through the program, the length of the
eba38a96 54 * analysis is limited to 64k insn, which may be hit even if total number of
51580e79
AS
55 * insn is less then 4K, but there are too many branches that change stack/regs.
56 * Number of 'branches to be analyzed' is limited to 1k
57 *
58 * On entry to each instruction, each register has a type, and the instruction
59 * changes the types of the registers depending on instruction semantics.
60 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
61 * copied to R1.
62 *
63 * All registers are 64-bit.
64 * R0 - return register
65 * R1-R5 argument passing registers
66 * R6-R9 callee saved registers
67 * R10 - frame pointer read-only
68 *
69 * At the start of BPF program the register R1 contains a pointer to bpf_context
70 * and has type PTR_TO_CTX.
71 *
72 * Verifier tracks arithmetic operations on pointers in case:
73 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
74 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
75 * 1st insn copies R10 (which has FRAME_PTR) type into R1
76 * and 2nd arithmetic instruction is pattern matched to recognize
77 * that it wants to construct a pointer to some element within stack.
78 * So after 2nd insn, the register R1 has type PTR_TO_STACK
79 * (and -20 constant is saved for further stack bounds checking).
80 * Meaning that this reg is a pointer to stack plus known immediate constant.
81 *
f1174f77 82 * Most of the time the registers have SCALAR_VALUE type, which
51580e79 83 * means the register has some value, but it's not a valid pointer.
f1174f77 84 * (like pointer plus pointer becomes SCALAR_VALUE type)
51580e79
AS
85 *
86 * When verifier sees load or store instructions the type of base register
c64b7983
JS
87 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
88 * four pointer types recognized by check_mem_access() function.
51580e79
AS
89 *
90 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
91 * and the range of [ptr, ptr + map's value_size) is accessible.
92 *
93 * registers used to pass values to function calls are checked against
94 * function argument constraints.
95 *
96 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
97 * It means that the register type passed to this function must be
98 * PTR_TO_STACK and it will be used inside the function as
99 * 'pointer to map element key'
100 *
101 * For example the argument constraints for bpf_map_lookup_elem():
102 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
103 * .arg1_type = ARG_CONST_MAP_PTR,
104 * .arg2_type = ARG_PTR_TO_MAP_KEY,
105 *
106 * ret_type says that this function returns 'pointer to map elem value or null'
107 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
108 * 2nd argument should be a pointer to stack, which will be used inside
109 * the helper function as a pointer to map element key.
110 *
111 * On the kernel side the helper function looks like:
112 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
113 * {
114 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
115 * void *key = (void *) (unsigned long) r2;
116 * void *value;
117 *
118 * here kernel can access 'key' and 'map' pointers safely, knowing that
119 * [key, key + map->key_size) bytes are valid and were initialized on
120 * the stack of eBPF program.
121 * }
122 *
123 * Corresponding eBPF program may look like:
124 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
125 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
126 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
127 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
128 * here verifier looks at prototype of map_lookup_elem() and sees:
129 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
130 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
131 *
132 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
133 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
134 * and were initialized prior to this call.
135 * If it's ok, then verifier allows this BPF_CALL insn and looks at
136 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
137 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
138 * returns ether pointer to map value or NULL.
139 *
140 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
141 * insn, the register holding that pointer in the true branch changes state to
142 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
143 * branch. See check_cond_jmp_op().
144 *
145 * After the call R0 is set to return type of the function and registers R1-R5
146 * are set to NOT_INIT to indicate that they are no longer readable.
fd978bf7
JS
147 *
148 * The following reference types represent a potential reference to a kernel
149 * resource which, after first being allocated, must be checked and freed by
150 * the BPF program:
151 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
152 *
153 * When the verifier sees a helper call return a reference type, it allocates a
154 * pointer id for the reference and stores it in the current function state.
155 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
156 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
157 * passes through a NULL-check conditional. For the branch wherein the state is
158 * changed to CONST_IMM, the verifier releases the reference.
6acc9b43
JS
159 *
160 * For each helper function that allocates a reference, such as
161 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
162 * bpf_sk_release(). When a reference type passes into the release function,
163 * the verifier also releases the reference. If any unchecked or unreleased
164 * reference remains at the end of the program, the verifier rejects it.
51580e79
AS
165 */
166
17a52670 167/* verifier_state + insn_idx are pushed to stack when branch is encountered */
58e2af8b 168struct bpf_verifier_stack_elem {
17a52670
AS
169 /* verifer state is 'st'
170 * before processing instruction 'insn_idx'
171 * and after processing instruction 'prev_insn_idx'
172 */
58e2af8b 173 struct bpf_verifier_state st;
17a52670
AS
174 int insn_idx;
175 int prev_insn_idx;
58e2af8b 176 struct bpf_verifier_stack_elem *next;
cbd35700
AS
177};
178
8e17c1b1 179#define BPF_COMPLEXITY_LIMIT_INSNS 131072
07016151 180#define BPF_COMPLEXITY_LIMIT_STACK 1024
ceefbc96 181#define BPF_COMPLEXITY_LIMIT_STATES 64
07016151 182
c93552c4
DB
183#define BPF_MAP_PTR_UNPRIV 1UL
184#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
185 POISON_POINTER_DELTA))
186#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
187
188static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
189{
190 return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
191}
192
193static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
194{
195 return aux->map_state & BPF_MAP_PTR_UNPRIV;
196}
197
198static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
199 const struct bpf_map *map, bool unpriv)
200{
201 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
202 unpriv |= bpf_map_ptr_unpriv(aux);
203 aux->map_state = (unsigned long)map |
204 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
205}
fad73a1a 206
33ff9823
DB
207struct bpf_call_arg_meta {
208 struct bpf_map *map_ptr;
435faee1 209 bool raw_mode;
36bbef52 210 bool pkt_access;
435faee1
DB
211 int regno;
212 int access_size;
849fa506
YS
213 s64 msize_smax_value;
214 u64 msize_umax_value;
1b986589 215 int ref_obj_id;
d83525ca 216 int func_id;
33ff9823
DB
217};
218
cbd35700
AS
219static DEFINE_MUTEX(bpf_verifier_lock);
220
d9762e84
MKL
221static const struct bpf_line_info *
222find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
223{
224 const struct bpf_line_info *linfo;
225 const struct bpf_prog *prog;
226 u32 i, nr_linfo;
227
228 prog = env->prog;
229 nr_linfo = prog->aux->nr_linfo;
230
231 if (!nr_linfo || insn_off >= prog->len)
232 return NULL;
233
234 linfo = prog->aux->linfo;
235 for (i = 1; i < nr_linfo; i++)
236 if (insn_off < linfo[i].insn_off)
237 break;
238
239 return &linfo[i - 1];
240}
241
77d2e05a
MKL
242void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
243 va_list args)
cbd35700 244{
a2a7d570 245 unsigned int n;
cbd35700 246
a2a7d570 247 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
a2a7d570
JK
248
249 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
250 "verifier log line truncated - local buffer too short\n");
251
252 n = min(log->len_total - log->len_used - 1, n);
253 log->kbuf[n] = '\0';
254
255 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
256 log->len_used += n;
257 else
258 log->ubuf = NULL;
cbd35700 259}
abe08840
JO
260
261/* log_level controls verbosity level of eBPF verifier.
262 * bpf_verifier_log_write() is used to dump the verification trace to the log,
263 * so the user can figure out what's wrong with the program
430e68d1 264 */
abe08840
JO
265__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
266 const char *fmt, ...)
267{
268 va_list args;
269
77d2e05a
MKL
270 if (!bpf_verifier_log_needed(&env->log))
271 return;
272
abe08840 273 va_start(args, fmt);
77d2e05a 274 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
275 va_end(args);
276}
277EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
278
279__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
280{
77d2e05a 281 struct bpf_verifier_env *env = private_data;
abe08840
JO
282 va_list args;
283
77d2e05a
MKL
284 if (!bpf_verifier_log_needed(&env->log))
285 return;
286
abe08840 287 va_start(args, fmt);
77d2e05a 288 bpf_verifier_vlog(&env->log, fmt, args);
abe08840
JO
289 va_end(args);
290}
cbd35700 291
d9762e84
MKL
292static const char *ltrim(const char *s)
293{
294 while (isspace(*s))
295 s++;
296
297 return s;
298}
299
300__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
301 u32 insn_off,
302 const char *prefix_fmt, ...)
303{
304 const struct bpf_line_info *linfo;
305
306 if (!bpf_verifier_log_needed(&env->log))
307 return;
308
309 linfo = find_linfo(env, insn_off);
310 if (!linfo || linfo == env->prev_linfo)
311 return;
312
313 if (prefix_fmt) {
314 va_list args;
315
316 va_start(args, prefix_fmt);
317 bpf_verifier_vlog(&env->log, prefix_fmt, args);
318 va_end(args);
319 }
320
321 verbose(env, "%s\n",
322 ltrim(btf_name_by_offset(env->prog->aux->btf,
323 linfo->line_off)));
324
325 env->prev_linfo = linfo;
326}
327
de8f3a83
DB
328static bool type_is_pkt_pointer(enum bpf_reg_type type)
329{
330 return type == PTR_TO_PACKET ||
331 type == PTR_TO_PACKET_META;
332}
333
46f8bc92
MKL
334static bool type_is_sk_pointer(enum bpf_reg_type type)
335{
336 return type == PTR_TO_SOCKET ||
655a51e5
MKL
337 type == PTR_TO_SOCK_COMMON ||
338 type == PTR_TO_TCP_SOCK;
46f8bc92
MKL
339}
340
840b9615
JS
341static bool reg_type_may_be_null(enum bpf_reg_type type)
342{
fd978bf7 343 return type == PTR_TO_MAP_VALUE_OR_NULL ||
46f8bc92 344 type == PTR_TO_SOCKET_OR_NULL ||
655a51e5
MKL
345 type == PTR_TO_SOCK_COMMON_OR_NULL ||
346 type == PTR_TO_TCP_SOCK_OR_NULL;
fd978bf7
JS
347}
348
d83525ca
AS
349static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
350{
351 return reg->type == PTR_TO_MAP_VALUE &&
352 map_value_has_spin_lock(reg->map_ptr);
353}
354
cba368c1
MKL
355static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
356{
357 return type == PTR_TO_SOCKET ||
358 type == PTR_TO_SOCKET_OR_NULL ||
359 type == PTR_TO_TCP_SOCK ||
360 type == PTR_TO_TCP_SOCK_OR_NULL;
361}
362
1b986589 363static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
fd978bf7 364{
1b986589 365 return type == ARG_PTR_TO_SOCK_COMMON;
fd978bf7
JS
366}
367
368/* Determine whether the function releases some resources allocated by another
369 * function call. The first reference type argument will be assumed to be
370 * released by release_reference().
371 */
372static bool is_release_function(enum bpf_func_id func_id)
373{
6acc9b43 374 return func_id == BPF_FUNC_sk_release;
840b9615
JS
375}
376
46f8bc92
MKL
377static bool is_acquire_function(enum bpf_func_id func_id)
378{
379 return func_id == BPF_FUNC_sk_lookup_tcp ||
380 func_id == BPF_FUNC_sk_lookup_udp;
381}
382
1b986589
MKL
383static bool is_ptr_cast_function(enum bpf_func_id func_id)
384{
385 return func_id == BPF_FUNC_tcp_sock ||
386 func_id == BPF_FUNC_sk_fullsock;
387}
388
17a52670
AS
389/* string representation of 'enum bpf_reg_type' */
390static const char * const reg_type_str[] = {
391 [NOT_INIT] = "?",
f1174f77 392 [SCALAR_VALUE] = "inv",
17a52670
AS
393 [PTR_TO_CTX] = "ctx",
394 [CONST_PTR_TO_MAP] = "map_ptr",
395 [PTR_TO_MAP_VALUE] = "map_value",
396 [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
17a52670 397 [PTR_TO_STACK] = "fp",
969bf05e 398 [PTR_TO_PACKET] = "pkt",
de8f3a83 399 [PTR_TO_PACKET_META] = "pkt_meta",
969bf05e 400 [PTR_TO_PACKET_END] = "pkt_end",
d58e468b 401 [PTR_TO_FLOW_KEYS] = "flow_keys",
c64b7983
JS
402 [PTR_TO_SOCKET] = "sock",
403 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
46f8bc92
MKL
404 [PTR_TO_SOCK_COMMON] = "sock_common",
405 [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
655a51e5
MKL
406 [PTR_TO_TCP_SOCK] = "tcp_sock",
407 [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
17a52670
AS
408};
409
8efea21d
EC
410static char slot_type_char[] = {
411 [STACK_INVALID] = '?',
412 [STACK_SPILL] = 'r',
413 [STACK_MISC] = 'm',
414 [STACK_ZERO] = '0',
415};
416
4e92024a
AS
417static void print_liveness(struct bpf_verifier_env *env,
418 enum bpf_reg_liveness live)
419{
9242b5f5 420 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
4e92024a
AS
421 verbose(env, "_");
422 if (live & REG_LIVE_READ)
423 verbose(env, "r");
424 if (live & REG_LIVE_WRITTEN)
425 verbose(env, "w");
9242b5f5
AS
426 if (live & REG_LIVE_DONE)
427 verbose(env, "D");
4e92024a
AS
428}
429
f4d7e40a
AS
430static struct bpf_func_state *func(struct bpf_verifier_env *env,
431 const struct bpf_reg_state *reg)
432{
433 struct bpf_verifier_state *cur = env->cur_state;
434
435 return cur->frame[reg->frameno];
436}
437
61bd5218 438static void print_verifier_state(struct bpf_verifier_env *env,
f4d7e40a 439 const struct bpf_func_state *state)
17a52670 440{
f4d7e40a 441 const struct bpf_reg_state *reg;
17a52670
AS
442 enum bpf_reg_type t;
443 int i;
444
f4d7e40a
AS
445 if (state->frameno)
446 verbose(env, " frame%d:", state->frameno);
17a52670 447 for (i = 0; i < MAX_BPF_REG; i++) {
1a0dc1ac
AS
448 reg = &state->regs[i];
449 t = reg->type;
17a52670
AS
450 if (t == NOT_INIT)
451 continue;
4e92024a
AS
452 verbose(env, " R%d", i);
453 print_liveness(env, reg->live);
454 verbose(env, "=%s", reg_type_str[t]);
f1174f77
EC
455 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
456 tnum_is_const(reg->var_off)) {
457 /* reg->off should be 0 for SCALAR_VALUE */
61bd5218 458 verbose(env, "%lld", reg->var_off.value + reg->off);
f4d7e40a
AS
459 if (t == PTR_TO_STACK)
460 verbose(env, ",call_%d", func(env, reg)->callsite);
f1174f77 461 } else {
cba368c1
MKL
462 verbose(env, "(id=%d", reg->id);
463 if (reg_type_may_be_refcounted_or_null(t))
464 verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
f1174f77 465 if (t != SCALAR_VALUE)
61bd5218 466 verbose(env, ",off=%d", reg->off);
de8f3a83 467 if (type_is_pkt_pointer(t))
61bd5218 468 verbose(env, ",r=%d", reg->range);
f1174f77
EC
469 else if (t == CONST_PTR_TO_MAP ||
470 t == PTR_TO_MAP_VALUE ||
471 t == PTR_TO_MAP_VALUE_OR_NULL)
61bd5218 472 verbose(env, ",ks=%d,vs=%d",
f1174f77
EC
473 reg->map_ptr->key_size,
474 reg->map_ptr->value_size);
7d1238f2
EC
475 if (tnum_is_const(reg->var_off)) {
476 /* Typically an immediate SCALAR_VALUE, but
477 * could be a pointer whose offset is too big
478 * for reg->off
479 */
61bd5218 480 verbose(env, ",imm=%llx", reg->var_off.value);
7d1238f2
EC
481 } else {
482 if (reg->smin_value != reg->umin_value &&
483 reg->smin_value != S64_MIN)
61bd5218 484 verbose(env, ",smin_value=%lld",
7d1238f2
EC
485 (long long)reg->smin_value);
486 if (reg->smax_value != reg->umax_value &&
487 reg->smax_value != S64_MAX)
61bd5218 488 verbose(env, ",smax_value=%lld",
7d1238f2
EC
489 (long long)reg->smax_value);
490 if (reg->umin_value != 0)
61bd5218 491 verbose(env, ",umin_value=%llu",
7d1238f2
EC
492 (unsigned long long)reg->umin_value);
493 if (reg->umax_value != U64_MAX)
61bd5218 494 verbose(env, ",umax_value=%llu",
7d1238f2
EC
495 (unsigned long long)reg->umax_value);
496 if (!tnum_is_unknown(reg->var_off)) {
497 char tn_buf[48];
f1174f77 498
7d1238f2 499 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 500 verbose(env, ",var_off=%s", tn_buf);
7d1238f2 501 }
f1174f77 502 }
61bd5218 503 verbose(env, ")");
f1174f77 504 }
17a52670 505 }
638f5b90 506 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
8efea21d
EC
507 char types_buf[BPF_REG_SIZE + 1];
508 bool valid = false;
509 int j;
510
511 for (j = 0; j < BPF_REG_SIZE; j++) {
512 if (state->stack[i].slot_type[j] != STACK_INVALID)
513 valid = true;
514 types_buf[j] = slot_type_char[
515 state->stack[i].slot_type[j]];
516 }
517 types_buf[BPF_REG_SIZE] = 0;
518 if (!valid)
519 continue;
520 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
521 print_liveness(env, state->stack[i].spilled_ptr.live);
522 if (state->stack[i].slot_type[0] == STACK_SPILL)
4e92024a 523 verbose(env, "=%s",
638f5b90 524 reg_type_str[state->stack[i].spilled_ptr.type]);
8efea21d
EC
525 else
526 verbose(env, "=%s", types_buf);
17a52670 527 }
fd978bf7
JS
528 if (state->acquired_refs && state->refs[0].id) {
529 verbose(env, " refs=%d", state->refs[0].id);
530 for (i = 1; i < state->acquired_refs; i++)
531 if (state->refs[i].id)
532 verbose(env, ",%d", state->refs[i].id);
533 }
61bd5218 534 verbose(env, "\n");
17a52670
AS
535}
536
84dbf350
JS
537#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \
538static int copy_##NAME##_state(struct bpf_func_state *dst, \
539 const struct bpf_func_state *src) \
540{ \
541 if (!src->FIELD) \
542 return 0; \
543 if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \
544 /* internal bug, make state invalid to reject the program */ \
545 memset(dst, 0, sizeof(*dst)); \
546 return -EFAULT; \
547 } \
548 memcpy(dst->FIELD, src->FIELD, \
549 sizeof(*src->FIELD) * (src->COUNT / SIZE)); \
550 return 0; \
638f5b90 551}
fd978bf7
JS
552/* copy_reference_state() */
553COPY_STATE_FN(reference, acquired_refs, refs, 1)
84dbf350
JS
554/* copy_stack_state() */
555COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
556#undef COPY_STATE_FN
557
558#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \
559static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
560 bool copy_old) \
561{ \
562 u32 old_size = state->COUNT; \
563 struct bpf_##NAME##_state *new_##FIELD; \
564 int slot = size / SIZE; \
565 \
566 if (size <= old_size || !size) { \
567 if (copy_old) \
568 return 0; \
569 state->COUNT = slot * SIZE; \
570 if (!size && old_size) { \
571 kfree(state->FIELD); \
572 state->FIELD = NULL; \
573 } \
574 return 0; \
575 } \
576 new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
577 GFP_KERNEL); \
578 if (!new_##FIELD) \
579 return -ENOMEM; \
580 if (copy_old) { \
581 if (state->FIELD) \
582 memcpy(new_##FIELD, state->FIELD, \
583 sizeof(*new_##FIELD) * (old_size / SIZE)); \
584 memset(new_##FIELD + old_size / SIZE, 0, \
585 sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
586 } \
587 state->COUNT = slot * SIZE; \
588 kfree(state->FIELD); \
589 state->FIELD = new_##FIELD; \
590 return 0; \
591}
fd978bf7
JS
592/* realloc_reference_state() */
593REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
84dbf350
JS
594/* realloc_stack_state() */
595REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
596#undef REALLOC_STATE_FN
638f5b90
AS
597
598/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
599 * make it consume minimal amount of memory. check_stack_write() access from
f4d7e40a 600 * the program calls into realloc_func_state() to grow the stack size.
84dbf350
JS
601 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
602 * which realloc_stack_state() copies over. It points to previous
603 * bpf_verifier_state which is never reallocated.
638f5b90 604 */
fd978bf7
JS
605static int realloc_func_state(struct bpf_func_state *state, int stack_size,
606 int refs_size, bool copy_old)
638f5b90 607{
fd978bf7
JS
608 int err = realloc_reference_state(state, refs_size, copy_old);
609 if (err)
610 return err;
611 return realloc_stack_state(state, stack_size, copy_old);
612}
613
614/* Acquire a pointer id from the env and update the state->refs to include
615 * this new pointer reference.
616 * On success, returns a valid pointer id to associate with the register
617 * On failure, returns a negative errno.
638f5b90 618 */
fd978bf7 619static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
638f5b90 620{
fd978bf7
JS
621 struct bpf_func_state *state = cur_func(env);
622 int new_ofs = state->acquired_refs;
623 int id, err;
624
625 err = realloc_reference_state(state, state->acquired_refs + 1, true);
626 if (err)
627 return err;
628 id = ++env->id_gen;
629 state->refs[new_ofs].id = id;
630 state->refs[new_ofs].insn_idx = insn_idx;
638f5b90 631
fd978bf7
JS
632 return id;
633}
634
635/* release function corresponding to acquire_reference_state(). Idempotent. */
46f8bc92 636static int release_reference_state(struct bpf_func_state *state, int ptr_id)
fd978bf7
JS
637{
638 int i, last_idx;
639
fd978bf7
JS
640 last_idx = state->acquired_refs - 1;
641 for (i = 0; i < state->acquired_refs; i++) {
642 if (state->refs[i].id == ptr_id) {
643 if (last_idx && i != last_idx)
644 memcpy(&state->refs[i], &state->refs[last_idx],
645 sizeof(*state->refs));
646 memset(&state->refs[last_idx], 0, sizeof(*state->refs));
647 state->acquired_refs--;
638f5b90 648 return 0;
638f5b90 649 }
638f5b90 650 }
46f8bc92 651 return -EINVAL;
fd978bf7
JS
652}
653
654static int transfer_reference_state(struct bpf_func_state *dst,
655 struct bpf_func_state *src)
656{
657 int err = realloc_reference_state(dst, src->acquired_refs, false);
658 if (err)
659 return err;
660 err = copy_reference_state(dst, src);
661 if (err)
662 return err;
638f5b90
AS
663 return 0;
664}
665
f4d7e40a
AS
666static void free_func_state(struct bpf_func_state *state)
667{
5896351e
AS
668 if (!state)
669 return;
fd978bf7 670 kfree(state->refs);
f4d7e40a
AS
671 kfree(state->stack);
672 kfree(state);
673}
674
1969db47
AS
675static void free_verifier_state(struct bpf_verifier_state *state,
676 bool free_self)
638f5b90 677{
f4d7e40a
AS
678 int i;
679
680 for (i = 0; i <= state->curframe; i++) {
681 free_func_state(state->frame[i]);
682 state->frame[i] = NULL;
683 }
1969db47
AS
684 if (free_self)
685 kfree(state);
638f5b90
AS
686}
687
688/* copy verifier state from src to dst growing dst stack space
689 * when necessary to accommodate larger src stack
690 */
f4d7e40a
AS
691static int copy_func_state(struct bpf_func_state *dst,
692 const struct bpf_func_state *src)
638f5b90
AS
693{
694 int err;
695
fd978bf7
JS
696 err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
697 false);
698 if (err)
699 return err;
700 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
701 err = copy_reference_state(dst, src);
638f5b90
AS
702 if (err)
703 return err;
638f5b90
AS
704 return copy_stack_state(dst, src);
705}
706
f4d7e40a
AS
707static int copy_verifier_state(struct bpf_verifier_state *dst_state,
708 const struct bpf_verifier_state *src)
709{
710 struct bpf_func_state *dst;
711 int i, err;
712
713 /* if dst has more stack frames then src frame, free them */
714 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
715 free_func_state(dst_state->frame[i]);
716 dst_state->frame[i] = NULL;
717 }
979d63d5 718 dst_state->speculative = src->speculative;
f4d7e40a 719 dst_state->curframe = src->curframe;
d83525ca 720 dst_state->active_spin_lock = src->active_spin_lock;
f4d7e40a
AS
721 for (i = 0; i <= src->curframe; i++) {
722 dst = dst_state->frame[i];
723 if (!dst) {
724 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
725 if (!dst)
726 return -ENOMEM;
727 dst_state->frame[i] = dst;
728 }
729 err = copy_func_state(dst, src->frame[i]);
730 if (err)
731 return err;
732 }
733 return 0;
734}
735
638f5b90
AS
736static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
737 int *insn_idx)
738{
739 struct bpf_verifier_state *cur = env->cur_state;
740 struct bpf_verifier_stack_elem *elem, *head = env->head;
741 int err;
17a52670
AS
742
743 if (env->head == NULL)
638f5b90 744 return -ENOENT;
17a52670 745
638f5b90
AS
746 if (cur) {
747 err = copy_verifier_state(cur, &head->st);
748 if (err)
749 return err;
750 }
751 if (insn_idx)
752 *insn_idx = head->insn_idx;
17a52670 753 if (prev_insn_idx)
638f5b90
AS
754 *prev_insn_idx = head->prev_insn_idx;
755 elem = head->next;
1969db47 756 free_verifier_state(&head->st, false);
638f5b90 757 kfree(head);
17a52670
AS
758 env->head = elem;
759 env->stack_size--;
638f5b90 760 return 0;
17a52670
AS
761}
762
58e2af8b 763static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
979d63d5
DB
764 int insn_idx, int prev_insn_idx,
765 bool speculative)
17a52670 766{
638f5b90 767 struct bpf_verifier_state *cur = env->cur_state;
58e2af8b 768 struct bpf_verifier_stack_elem *elem;
638f5b90 769 int err;
17a52670 770
638f5b90 771 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
17a52670
AS
772 if (!elem)
773 goto err;
774
17a52670
AS
775 elem->insn_idx = insn_idx;
776 elem->prev_insn_idx = prev_insn_idx;
777 elem->next = env->head;
778 env->head = elem;
779 env->stack_size++;
1969db47
AS
780 err = copy_verifier_state(&elem->st, cur);
781 if (err)
782 goto err;
979d63d5 783 elem->st.speculative |= speculative;
07016151 784 if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
61bd5218 785 verbose(env, "BPF program is too complex\n");
17a52670
AS
786 goto err;
787 }
788 return &elem->st;
789err:
5896351e
AS
790 free_verifier_state(env->cur_state, true);
791 env->cur_state = NULL;
17a52670 792 /* pop all elements and return */
638f5b90 793 while (!pop_stack(env, NULL, NULL));
17a52670
AS
794 return NULL;
795}
796
797#define CALLER_SAVED_REGS 6
798static const int caller_saved[CALLER_SAVED_REGS] = {
799 BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
800};
801
f1174f77
EC
802static void __mark_reg_not_init(struct bpf_reg_state *reg);
803
b03c9f9f
EC
804/* Mark the unknown part of a register (variable offset or scalar value) as
805 * known to have the value @imm.
806 */
807static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
808{
a9c676bc
AS
809 /* Clear id, off, and union(map_ptr, range) */
810 memset(((u8 *)reg) + sizeof(reg->type), 0,
811 offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
b03c9f9f
EC
812 reg->var_off = tnum_const(imm);
813 reg->smin_value = (s64)imm;
814 reg->smax_value = (s64)imm;
815 reg->umin_value = imm;
816 reg->umax_value = imm;
817}
818
f1174f77
EC
819/* Mark the 'variable offset' part of a register as zero. This should be
820 * used only on registers holding a pointer type.
821 */
822static void __mark_reg_known_zero(struct bpf_reg_state *reg)
a9789ef9 823{
b03c9f9f 824 __mark_reg_known(reg, 0);
f1174f77 825}
a9789ef9 826
cc2b14d5
AS
827static void __mark_reg_const_zero(struct bpf_reg_state *reg)
828{
829 __mark_reg_known(reg, 0);
cc2b14d5
AS
830 reg->type = SCALAR_VALUE;
831}
832
61bd5218
JK
833static void mark_reg_known_zero(struct bpf_verifier_env *env,
834 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
835{
836 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 837 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
f1174f77
EC
838 /* Something bad happened, let's kill all regs */
839 for (regno = 0; regno < MAX_BPF_REG; regno++)
840 __mark_reg_not_init(regs + regno);
841 return;
842 }
843 __mark_reg_known_zero(regs + regno);
844}
845
de8f3a83
DB
846static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
847{
848 return type_is_pkt_pointer(reg->type);
849}
850
851static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
852{
853 return reg_is_pkt_pointer(reg) ||
854 reg->type == PTR_TO_PACKET_END;
855}
856
857/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
858static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
859 enum bpf_reg_type which)
860{
861 /* The register can already have a range from prior markings.
862 * This is fine as long as it hasn't been advanced from its
863 * origin.
864 */
865 return reg->type == which &&
866 reg->id == 0 &&
867 reg->off == 0 &&
868 tnum_equals_const(reg->var_off, 0);
869}
870
b03c9f9f
EC
871/* Attempts to improve min/max values based on var_off information */
872static void __update_reg_bounds(struct bpf_reg_state *reg)
873{
874 /* min signed is max(sign bit) | min(other bits) */
875 reg->smin_value = max_t(s64, reg->smin_value,
876 reg->var_off.value | (reg->var_off.mask & S64_MIN));
877 /* max signed is min(sign bit) | max(other bits) */
878 reg->smax_value = min_t(s64, reg->smax_value,
879 reg->var_off.value | (reg->var_off.mask & S64_MAX));
880 reg->umin_value = max(reg->umin_value, reg->var_off.value);
881 reg->umax_value = min(reg->umax_value,
882 reg->var_off.value | reg->var_off.mask);
883}
884
885/* Uses signed min/max values to inform unsigned, and vice-versa */
886static void __reg_deduce_bounds(struct bpf_reg_state *reg)
887{
888 /* Learn sign from signed bounds.
889 * If we cannot cross the sign boundary, then signed and unsigned bounds
890 * are the same, so combine. This works even in the negative case, e.g.
891 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
892 */
893 if (reg->smin_value >= 0 || reg->smax_value < 0) {
894 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
895 reg->umin_value);
896 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
897 reg->umax_value);
898 return;
899 }
900 /* Learn sign from unsigned bounds. Signed bounds cross the sign
901 * boundary, so we must be careful.
902 */
903 if ((s64)reg->umax_value >= 0) {
904 /* Positive. We can't learn anything from the smin, but smax
905 * is positive, hence safe.
906 */
907 reg->smin_value = reg->umin_value;
908 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
909 reg->umax_value);
910 } else if ((s64)reg->umin_value < 0) {
911 /* Negative. We can't learn anything from the smax, but smin
912 * is negative, hence safe.
913 */
914 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
915 reg->umin_value);
916 reg->smax_value = reg->umax_value;
917 }
918}
919
920/* Attempts to improve var_off based on unsigned min/max information */
921static void __reg_bound_offset(struct bpf_reg_state *reg)
922{
923 reg->var_off = tnum_intersect(reg->var_off,
924 tnum_range(reg->umin_value,
925 reg->umax_value));
926}
927
928/* Reset the min/max bounds of a register */
929static void __mark_reg_unbounded(struct bpf_reg_state *reg)
930{
931 reg->smin_value = S64_MIN;
932 reg->smax_value = S64_MAX;
933 reg->umin_value = 0;
934 reg->umax_value = U64_MAX;
935}
936
f1174f77
EC
937/* Mark a register as having a completely unknown (scalar) value. */
938static void __mark_reg_unknown(struct bpf_reg_state *reg)
939{
a9c676bc
AS
940 /*
941 * Clear type, id, off, and union(map_ptr, range) and
942 * padding between 'type' and union
943 */
944 memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
f1174f77 945 reg->type = SCALAR_VALUE;
f1174f77 946 reg->var_off = tnum_unknown;
f4d7e40a 947 reg->frameno = 0;
b03c9f9f 948 __mark_reg_unbounded(reg);
f1174f77
EC
949}
950
61bd5218
JK
951static void mark_reg_unknown(struct bpf_verifier_env *env,
952 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
953{
954 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 955 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
19ceb417
AS
956 /* Something bad happened, let's kill all regs except FP */
957 for (regno = 0; regno < BPF_REG_FP; regno++)
f1174f77
EC
958 __mark_reg_not_init(regs + regno);
959 return;
960 }
961 __mark_reg_unknown(regs + regno);
962}
963
964static void __mark_reg_not_init(struct bpf_reg_state *reg)
965{
966 __mark_reg_unknown(reg);
967 reg->type = NOT_INIT;
968}
969
61bd5218
JK
970static void mark_reg_not_init(struct bpf_verifier_env *env,
971 struct bpf_reg_state *regs, u32 regno)
f1174f77
EC
972{
973 if (WARN_ON(regno >= MAX_BPF_REG)) {
61bd5218 974 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
19ceb417
AS
975 /* Something bad happened, let's kill all regs except FP */
976 for (regno = 0; regno < BPF_REG_FP; regno++)
f1174f77
EC
977 __mark_reg_not_init(regs + regno);
978 return;
979 }
980 __mark_reg_not_init(regs + regno);
a9789ef9
DB
981}
982
61bd5218 983static void init_reg_state(struct bpf_verifier_env *env,
f4d7e40a 984 struct bpf_func_state *state)
17a52670 985{
f4d7e40a 986 struct bpf_reg_state *regs = state->regs;
17a52670
AS
987 int i;
988
dc503a8a 989 for (i = 0; i < MAX_BPF_REG; i++) {
61bd5218 990 mark_reg_not_init(env, regs, i);
dc503a8a 991 regs[i].live = REG_LIVE_NONE;
679c782d 992 regs[i].parent = NULL;
dc503a8a 993 }
17a52670
AS
994
995 /* frame pointer */
f1174f77 996 regs[BPF_REG_FP].type = PTR_TO_STACK;
61bd5218 997 mark_reg_known_zero(env, regs, BPF_REG_FP);
f4d7e40a 998 regs[BPF_REG_FP].frameno = state->frameno;
17a52670
AS
999
1000 /* 1st arg to a function */
1001 regs[BPF_REG_1].type = PTR_TO_CTX;
61bd5218 1002 mark_reg_known_zero(env, regs, BPF_REG_1);
6760bf2d
DB
1003}
1004
f4d7e40a
AS
1005#define BPF_MAIN_FUNC (-1)
1006static void init_func_state(struct bpf_verifier_env *env,
1007 struct bpf_func_state *state,
1008 int callsite, int frameno, int subprogno)
1009{
1010 state->callsite = callsite;
1011 state->frameno = frameno;
1012 state->subprogno = subprogno;
1013 init_reg_state(env, state);
1014}
1015
17a52670
AS
1016enum reg_arg_type {
1017 SRC_OP, /* register is used as source operand */
1018 DST_OP, /* register is used as destination operand */
1019 DST_OP_NO_MARK /* same as above, check only, don't mark */
1020};
1021
cc8b0b92
AS
1022static int cmp_subprogs(const void *a, const void *b)
1023{
9c8105bd
JW
1024 return ((struct bpf_subprog_info *)a)->start -
1025 ((struct bpf_subprog_info *)b)->start;
cc8b0b92
AS
1026}
1027
1028static int find_subprog(struct bpf_verifier_env *env, int off)
1029{
9c8105bd 1030 struct bpf_subprog_info *p;
cc8b0b92 1031
9c8105bd
JW
1032 p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1033 sizeof(env->subprog_info[0]), cmp_subprogs);
cc8b0b92
AS
1034 if (!p)
1035 return -ENOENT;
9c8105bd 1036 return p - env->subprog_info;
cc8b0b92
AS
1037
1038}
1039
1040static int add_subprog(struct bpf_verifier_env *env, int off)
1041{
1042 int insn_cnt = env->prog->len;
1043 int ret;
1044
1045 if (off >= insn_cnt || off < 0) {
1046 verbose(env, "call to invalid destination\n");
1047 return -EINVAL;
1048 }
1049 ret = find_subprog(env, off);
1050 if (ret >= 0)
1051 return 0;
4cb3d99c 1052 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
cc8b0b92
AS
1053 verbose(env, "too many subprograms\n");
1054 return -E2BIG;
1055 }
9c8105bd
JW
1056 env->subprog_info[env->subprog_cnt++].start = off;
1057 sort(env->subprog_info, env->subprog_cnt,
1058 sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
cc8b0b92
AS
1059 return 0;
1060}
1061
1062static int check_subprogs(struct bpf_verifier_env *env)
1063{
1064 int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
9c8105bd 1065 struct bpf_subprog_info *subprog = env->subprog_info;
cc8b0b92
AS
1066 struct bpf_insn *insn = env->prog->insnsi;
1067 int insn_cnt = env->prog->len;
1068
f910cefa
JW
1069 /* Add entry function. */
1070 ret = add_subprog(env, 0);
1071 if (ret < 0)
1072 return ret;
1073
cc8b0b92
AS
1074 /* determine subprog starts. The end is one before the next starts */
1075 for (i = 0; i < insn_cnt; i++) {
1076 if (insn[i].code != (BPF_JMP | BPF_CALL))
1077 continue;
1078 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1079 continue;
1080 if (!env->allow_ptr_leaks) {
1081 verbose(env, "function calls to other bpf functions are allowed for root only\n");
1082 return -EPERM;
1083 }
cc8b0b92
AS
1084 ret = add_subprog(env, i + insn[i].imm + 1);
1085 if (ret < 0)
1086 return ret;
1087 }
1088
4cb3d99c
JW
1089 /* Add a fake 'exit' subprog which could simplify subprog iteration
1090 * logic. 'subprog_cnt' should not be increased.
1091 */
1092 subprog[env->subprog_cnt].start = insn_cnt;
1093
cc8b0b92
AS
1094 if (env->log.level > 1)
1095 for (i = 0; i < env->subprog_cnt; i++)
9c8105bd 1096 verbose(env, "func#%d @%d\n", i, subprog[i].start);
cc8b0b92
AS
1097
1098 /* now check that all jumps are within the same subprog */
4cb3d99c
JW
1099 subprog_start = subprog[cur_subprog].start;
1100 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
1101 for (i = 0; i < insn_cnt; i++) {
1102 u8 code = insn[i].code;
1103
092ed096 1104 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
cc8b0b92
AS
1105 goto next;
1106 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1107 goto next;
1108 off = i + insn[i].off + 1;
1109 if (off < subprog_start || off >= subprog_end) {
1110 verbose(env, "jump out of range from insn %d to %d\n", i, off);
1111 return -EINVAL;
1112 }
1113next:
1114 if (i == subprog_end - 1) {
1115 /* to avoid fall-through from one subprog into another
1116 * the last insn of the subprog should be either exit
1117 * or unconditional jump back
1118 */
1119 if (code != (BPF_JMP | BPF_EXIT) &&
1120 code != (BPF_JMP | BPF_JA)) {
1121 verbose(env, "last insn is not an exit or jmp\n");
1122 return -EINVAL;
1123 }
1124 subprog_start = subprog_end;
4cb3d99c
JW
1125 cur_subprog++;
1126 if (cur_subprog < env->subprog_cnt)
9c8105bd 1127 subprog_end = subprog[cur_subprog + 1].start;
cc8b0b92
AS
1128 }
1129 }
1130 return 0;
1131}
1132
679c782d
EC
1133/* Parentage chain of this register (or stack slot) should take care of all
1134 * issues like callee-saved registers, stack slot allocation time, etc.
1135 */
f4d7e40a 1136static int mark_reg_read(struct bpf_verifier_env *env,
679c782d
EC
1137 const struct bpf_reg_state *state,
1138 struct bpf_reg_state *parent)
f4d7e40a
AS
1139{
1140 bool writes = parent == state->parent; /* Observe write marks */
dc503a8a
EC
1141
1142 while (parent) {
1143 /* if read wasn't screened by an earlier write ... */
679c782d 1144 if (writes && state->live & REG_LIVE_WRITTEN)
dc503a8a 1145 break;
9242b5f5
AS
1146 if (parent->live & REG_LIVE_DONE) {
1147 verbose(env, "verifier BUG type %s var_off %lld off %d\n",
1148 reg_type_str[parent->type],
1149 parent->var_off.value, parent->off);
1150 return -EFAULT;
1151 }
dc503a8a 1152 /* ... then we depend on parent's value */
679c782d 1153 parent->live |= REG_LIVE_READ;
dc503a8a
EC
1154 state = parent;
1155 parent = state->parent;
f4d7e40a 1156 writes = true;
dc503a8a 1157 }
f4d7e40a 1158 return 0;
dc503a8a
EC
1159}
1160
1161static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
17a52670
AS
1162 enum reg_arg_type t)
1163{
f4d7e40a
AS
1164 struct bpf_verifier_state *vstate = env->cur_state;
1165 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1166 struct bpf_reg_state *regs = state->regs;
dc503a8a 1167
17a52670 1168 if (regno >= MAX_BPF_REG) {
61bd5218 1169 verbose(env, "R%d is invalid\n", regno);
17a52670
AS
1170 return -EINVAL;
1171 }
1172
1173 if (t == SRC_OP) {
1174 /* check whether register used as source operand can be read */
1175 if (regs[regno].type == NOT_INIT) {
61bd5218 1176 verbose(env, "R%d !read_ok\n", regno);
17a52670
AS
1177 return -EACCES;
1178 }
679c782d
EC
1179 /* We don't need to worry about FP liveness because it's read-only */
1180 if (regno != BPF_REG_FP)
1181 return mark_reg_read(env, &regs[regno],
1182 regs[regno].parent);
17a52670
AS
1183 } else {
1184 /* check whether register used as dest operand can be written to */
1185 if (regno == BPF_REG_FP) {
61bd5218 1186 verbose(env, "frame pointer is read only\n");
17a52670
AS
1187 return -EACCES;
1188 }
dc503a8a 1189 regs[regno].live |= REG_LIVE_WRITTEN;
17a52670 1190 if (t == DST_OP)
61bd5218 1191 mark_reg_unknown(env, regs, regno);
17a52670
AS
1192 }
1193 return 0;
1194}
1195
1be7f75d
AS
1196static bool is_spillable_regtype(enum bpf_reg_type type)
1197{
1198 switch (type) {
1199 case PTR_TO_MAP_VALUE:
1200 case PTR_TO_MAP_VALUE_OR_NULL:
1201 case PTR_TO_STACK:
1202 case PTR_TO_CTX:
969bf05e 1203 case PTR_TO_PACKET:
de8f3a83 1204 case PTR_TO_PACKET_META:
969bf05e 1205 case PTR_TO_PACKET_END:
d58e468b 1206 case PTR_TO_FLOW_KEYS:
1be7f75d 1207 case CONST_PTR_TO_MAP:
c64b7983
JS
1208 case PTR_TO_SOCKET:
1209 case PTR_TO_SOCKET_OR_NULL:
46f8bc92
MKL
1210 case PTR_TO_SOCK_COMMON:
1211 case PTR_TO_SOCK_COMMON_OR_NULL:
655a51e5
MKL
1212 case PTR_TO_TCP_SOCK:
1213 case PTR_TO_TCP_SOCK_OR_NULL:
1be7f75d
AS
1214 return true;
1215 default:
1216 return false;
1217 }
1218}
1219
cc2b14d5
AS
1220/* Does this register contain a constant zero? */
1221static bool register_is_null(struct bpf_reg_state *reg)
1222{
1223 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
1224}
1225
17a52670
AS
1226/* check_stack_read/write functions track spill/fill of registers,
1227 * stack boundary and alignment are checked in check_mem_access()
1228 */
61bd5218 1229static int check_stack_write(struct bpf_verifier_env *env,
f4d7e40a 1230 struct bpf_func_state *state, /* func where register points to */
af86ca4e 1231 int off, int size, int value_regno, int insn_idx)
17a52670 1232{
f4d7e40a 1233 struct bpf_func_state *cur; /* state of the current function */
638f5b90 1234 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
f4d7e40a 1235 enum bpf_reg_type type;
638f5b90 1236
f4d7e40a 1237 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
fd978bf7 1238 state->acquired_refs, true);
638f5b90
AS
1239 if (err)
1240 return err;
9c399760
AS
1241 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
1242 * so it's aligned access and [off, off + size) are within stack limits
1243 */
638f5b90
AS
1244 if (!env->allow_ptr_leaks &&
1245 state->stack[spi].slot_type[0] == STACK_SPILL &&
1246 size != BPF_REG_SIZE) {
1247 verbose(env, "attempt to corrupt spilled pointer on stack\n");
1248 return -EACCES;
1249 }
17a52670 1250
f4d7e40a 1251 cur = env->cur_state->frame[env->cur_state->curframe];
17a52670 1252 if (value_regno >= 0 &&
f4d7e40a 1253 is_spillable_regtype((type = cur->regs[value_regno].type))) {
17a52670
AS
1254
1255 /* register containing pointer is being spilled into stack */
9c399760 1256 if (size != BPF_REG_SIZE) {
61bd5218 1257 verbose(env, "invalid size of register spill\n");
17a52670
AS
1258 return -EACCES;
1259 }
1260
f4d7e40a
AS
1261 if (state != cur && type == PTR_TO_STACK) {
1262 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
1263 return -EINVAL;
1264 }
1265
17a52670 1266 /* save register state */
f4d7e40a 1267 state->stack[spi].spilled_ptr = cur->regs[value_regno];
638f5b90 1268 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
17a52670 1269
af86ca4e
AS
1270 for (i = 0; i < BPF_REG_SIZE; i++) {
1271 if (state->stack[spi].slot_type[i] == STACK_MISC &&
1272 !env->allow_ptr_leaks) {
1273 int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
1274 int soff = (-spi - 1) * BPF_REG_SIZE;
1275
1276 /* detected reuse of integer stack slot with a pointer
1277 * which means either llvm is reusing stack slot or
1278 * an attacker is trying to exploit CVE-2018-3639
1279 * (speculative store bypass)
1280 * Have to sanitize that slot with preemptive
1281 * store of zero.
1282 */
1283 if (*poff && *poff != soff) {
1284 /* disallow programs where single insn stores
1285 * into two different stack slots, since verifier
1286 * cannot sanitize them
1287 */
1288 verbose(env,
1289 "insn %d cannot access two stack slots fp%d and fp%d",
1290 insn_idx, *poff, soff);
1291 return -EINVAL;
1292 }
1293 *poff = soff;
1294 }
638f5b90 1295 state->stack[spi].slot_type[i] = STACK_SPILL;
af86ca4e 1296 }
9c399760 1297 } else {
cc2b14d5
AS
1298 u8 type = STACK_MISC;
1299
679c782d
EC
1300 /* regular write of data into stack destroys any spilled ptr */
1301 state->stack[spi].spilled_ptr.type = NOT_INIT;
0bae2d4d
JW
1302 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
1303 if (state->stack[spi].slot_type[0] == STACK_SPILL)
1304 for (i = 0; i < BPF_REG_SIZE; i++)
1305 state->stack[spi].slot_type[i] = STACK_MISC;
9c399760 1306
cc2b14d5
AS
1307 /* only mark the slot as written if all 8 bytes were written
1308 * otherwise read propagation may incorrectly stop too soon
1309 * when stack slots are partially written.
1310 * This heuristic means that read propagation will be
1311 * conservative, since it will add reg_live_read marks
1312 * to stack slots all the way to first state when programs
1313 * writes+reads less than 8 bytes
1314 */
1315 if (size == BPF_REG_SIZE)
1316 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1317
1318 /* when we zero initialize stack slots mark them as such */
1319 if (value_regno >= 0 &&
1320 register_is_null(&cur->regs[value_regno]))
1321 type = STACK_ZERO;
1322
0bae2d4d 1323 /* Mark slots affected by this stack write. */
9c399760 1324 for (i = 0; i < size; i++)
638f5b90 1325 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
cc2b14d5 1326 type;
17a52670
AS
1327 }
1328 return 0;
1329}
1330
61bd5218 1331static int check_stack_read(struct bpf_verifier_env *env,
f4d7e40a
AS
1332 struct bpf_func_state *reg_state /* func where register points to */,
1333 int off, int size, int value_regno)
17a52670 1334{
f4d7e40a
AS
1335 struct bpf_verifier_state *vstate = env->cur_state;
1336 struct bpf_func_state *state = vstate->frame[vstate->curframe];
638f5b90
AS
1337 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
1338 u8 *stype;
17a52670 1339
f4d7e40a 1340 if (reg_state->allocated_stack <= slot) {
638f5b90
AS
1341 verbose(env, "invalid read from stack off %d+0 size %d\n",
1342 off, size);
1343 return -EACCES;
1344 }
f4d7e40a 1345 stype = reg_state->stack[spi].slot_type;
17a52670 1346
638f5b90 1347 if (stype[0] == STACK_SPILL) {
9c399760 1348 if (size != BPF_REG_SIZE) {
61bd5218 1349 verbose(env, "invalid size of register spill\n");
17a52670
AS
1350 return -EACCES;
1351 }
9c399760 1352 for (i = 1; i < BPF_REG_SIZE; i++) {
638f5b90 1353 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
61bd5218 1354 verbose(env, "corrupted spill memory\n");
17a52670
AS
1355 return -EACCES;
1356 }
1357 }
1358
dc503a8a 1359 if (value_regno >= 0) {
17a52670 1360 /* restore register state from stack */
f4d7e40a 1361 state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
2f18f62e
AS
1362 /* mark reg as written since spilled pointer state likely
1363 * has its liveness marks cleared by is_state_visited()
1364 * which resets stack/reg liveness for state transitions
1365 */
1366 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
dc503a8a 1367 }
679c782d
EC
1368 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1369 reg_state->stack[spi].spilled_ptr.parent);
17a52670
AS
1370 return 0;
1371 } else {
cc2b14d5
AS
1372 int zeros = 0;
1373
17a52670 1374 for (i = 0; i < size; i++) {
cc2b14d5
AS
1375 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
1376 continue;
1377 if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
1378 zeros++;
1379 continue;
17a52670 1380 }
cc2b14d5
AS
1381 verbose(env, "invalid read from stack off %d+%d size %d\n",
1382 off, i, size);
1383 return -EACCES;
1384 }
679c782d
EC
1385 mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
1386 reg_state->stack[spi].spilled_ptr.parent);
cc2b14d5
AS
1387 if (value_regno >= 0) {
1388 if (zeros == size) {
1389 /* any size read into register is zero extended,
1390 * so the whole register == const_zero
1391 */
1392 __mark_reg_const_zero(&state->regs[value_regno]);
1393 } else {
1394 /* have read misc data from the stack */
1395 mark_reg_unknown(env, state->regs, value_regno);
1396 }
1397 state->regs[value_regno].live |= REG_LIVE_WRITTEN;
17a52670 1398 }
17a52670
AS
1399 return 0;
1400 }
1401}
1402
e4298d25
DB
1403static int check_stack_access(struct bpf_verifier_env *env,
1404 const struct bpf_reg_state *reg,
1405 int off, int size)
1406{
1407 /* Stack accesses must be at a fixed offset, so that we
1408 * can determine what type of data were returned. See
1409 * check_stack_read().
1410 */
1411 if (!tnum_is_const(reg->var_off)) {
1412 char tn_buf[48];
1413
1414 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1415 verbose(env, "variable stack access var_off=%s off=%d size=%d",
1416 tn_buf, off, size);
1417 return -EACCES;
1418 }
1419
1420 if (off >= 0 || off < -MAX_BPF_STACK) {
1421 verbose(env, "invalid stack off=%d size=%d\n", off, size);
1422 return -EACCES;
1423 }
1424
1425 return 0;
1426}
1427
17a52670 1428/* check read/write into map element returned by bpf_map_lookup_elem() */
f1174f77 1429static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 1430 int size, bool zero_size_allowed)
17a52670 1431{
638f5b90
AS
1432 struct bpf_reg_state *regs = cur_regs(env);
1433 struct bpf_map *map = regs[regno].map_ptr;
17a52670 1434
9fd29c08
YS
1435 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1436 off + size > map->value_size) {
61bd5218 1437 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
17a52670
AS
1438 map->value_size, off, size);
1439 return -EACCES;
1440 }
1441 return 0;
1442}
1443
f1174f77
EC
1444/* check read/write into a map element with possible variable offset */
1445static int check_map_access(struct bpf_verifier_env *env, u32 regno,
9fd29c08 1446 int off, int size, bool zero_size_allowed)
dbcfe5f7 1447{
f4d7e40a
AS
1448 struct bpf_verifier_state *vstate = env->cur_state;
1449 struct bpf_func_state *state = vstate->frame[vstate->curframe];
dbcfe5f7
GB
1450 struct bpf_reg_state *reg = &state->regs[regno];
1451 int err;
1452
f1174f77
EC
1453 /* We may have adjusted the register to this map value, so we
1454 * need to try adding each of min_value and max_value to off
1455 * to make sure our theoretical access will be safe.
dbcfe5f7 1456 */
61bd5218
JK
1457 if (env->log.level)
1458 print_verifier_state(env, state);
b7137c4e 1459
dbcfe5f7
GB
1460 /* The minimum value is only important with signed
1461 * comparisons where we can't assume the floor of a
1462 * value is 0. If we are using signed variables for our
1463 * index'es we need to make sure that whatever we use
1464 * will have a set floor within our range.
1465 */
b7137c4e
DB
1466 if (reg->smin_value < 0 &&
1467 (reg->smin_value == S64_MIN ||
1468 (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
1469 reg->smin_value + off < 0)) {
61bd5218 1470 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
dbcfe5f7
GB
1471 regno);
1472 return -EACCES;
1473 }
9fd29c08
YS
1474 err = __check_map_access(env, regno, reg->smin_value + off, size,
1475 zero_size_allowed);
dbcfe5f7 1476 if (err) {
61bd5218
JK
1477 verbose(env, "R%d min value is outside of the array range\n",
1478 regno);
dbcfe5f7
GB
1479 return err;
1480 }
1481
b03c9f9f
EC
1482 /* If we haven't set a max value then we need to bail since we can't be
1483 * sure we won't do bad things.
1484 * If reg->umax_value + off could overflow, treat that as unbounded too.
dbcfe5f7 1485 */
b03c9f9f 1486 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
61bd5218 1487 verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
dbcfe5f7
GB
1488 regno);
1489 return -EACCES;
1490 }
9fd29c08
YS
1491 err = __check_map_access(env, regno, reg->umax_value + off, size,
1492 zero_size_allowed);
f1174f77 1493 if (err)
61bd5218
JK
1494 verbose(env, "R%d max value is outside of the array range\n",
1495 regno);
d83525ca
AS
1496
1497 if (map_value_has_spin_lock(reg->map_ptr)) {
1498 u32 lock = reg->map_ptr->spin_lock_off;
1499
1500 /* if any part of struct bpf_spin_lock can be touched by
1501 * load/store reject this program.
1502 * To check that [x1, x2) overlaps with [y1, y2)
1503 * it is sufficient to check x1 < y2 && y1 < x2.
1504 */
1505 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
1506 lock < reg->umax_value + off + size) {
1507 verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
1508 return -EACCES;
1509 }
1510 }
f1174f77 1511 return err;
dbcfe5f7
GB
1512}
1513
969bf05e
AS
1514#define MAX_PACKET_OFF 0xffff
1515
58e2af8b 1516static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3a0af8fd
TG
1517 const struct bpf_call_arg_meta *meta,
1518 enum bpf_access_type t)
4acf6c0b 1519{
36bbef52 1520 switch (env->prog->type) {
5d66fa7d 1521 /* Program types only with direct read access go here! */
3a0af8fd
TG
1522 case BPF_PROG_TYPE_LWT_IN:
1523 case BPF_PROG_TYPE_LWT_OUT:
004d4b27 1524 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2dbb9b9e 1525 case BPF_PROG_TYPE_SK_REUSEPORT:
5d66fa7d 1526 case BPF_PROG_TYPE_FLOW_DISSECTOR:
d5563d36 1527 case BPF_PROG_TYPE_CGROUP_SKB:
3a0af8fd
TG
1528 if (t == BPF_WRITE)
1529 return false;
7e57fbb2 1530 /* fallthrough */
5d66fa7d
DB
1531
1532 /* Program types with direct read + write access go here! */
36bbef52
DB
1533 case BPF_PROG_TYPE_SCHED_CLS:
1534 case BPF_PROG_TYPE_SCHED_ACT:
4acf6c0b 1535 case BPF_PROG_TYPE_XDP:
3a0af8fd 1536 case BPF_PROG_TYPE_LWT_XMIT:
8a31db56 1537 case BPF_PROG_TYPE_SK_SKB:
4f738adb 1538 case BPF_PROG_TYPE_SK_MSG:
36bbef52
DB
1539 if (meta)
1540 return meta->pkt_access;
1541
1542 env->seen_direct_write = true;
4acf6c0b
BB
1543 return true;
1544 default:
1545 return false;
1546 }
1547}
1548
f1174f77 1549static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
9fd29c08 1550 int off, int size, bool zero_size_allowed)
969bf05e 1551{
638f5b90 1552 struct bpf_reg_state *regs = cur_regs(env);
58e2af8b 1553 struct bpf_reg_state *reg = &regs[regno];
969bf05e 1554
9fd29c08
YS
1555 if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
1556 (u64)off + size > reg->range) {
61bd5218 1557 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
d91b28ed 1558 off, size, regno, reg->id, reg->off, reg->range);
969bf05e
AS
1559 return -EACCES;
1560 }
1561 return 0;
1562}
1563
f1174f77 1564static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
9fd29c08 1565 int size, bool zero_size_allowed)
f1174f77 1566{
638f5b90 1567 struct bpf_reg_state *regs = cur_regs(env);
f1174f77
EC
1568 struct bpf_reg_state *reg = &regs[regno];
1569 int err;
1570
1571 /* We may have added a variable offset to the packet pointer; but any
1572 * reg->range we have comes after that. We are only checking the fixed
1573 * offset.
1574 */
1575
1576 /* We don't allow negative numbers, because we aren't tracking enough
1577 * detail to prove they're safe.
1578 */
b03c9f9f 1579 if (reg->smin_value < 0) {
61bd5218 1580 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
f1174f77
EC
1581 regno);
1582 return -EACCES;
1583 }
9fd29c08 1584 err = __check_packet_access(env, regno, off, size, zero_size_allowed);
f1174f77 1585 if (err) {
61bd5218 1586 verbose(env, "R%d offset is outside of the packet\n", regno);
f1174f77
EC
1587 return err;
1588 }
e647815a
JW
1589
1590 /* __check_packet_access has made sure "off + size - 1" is within u16.
1591 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
1592 * otherwise find_good_pkt_pointers would have refused to set range info
1593 * that __check_packet_access would have rejected this pkt access.
1594 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
1595 */
1596 env->prog->aux->max_pkt_offset =
1597 max_t(u32, env->prog->aux->max_pkt_offset,
1598 off + reg->umax_value + size - 1);
1599
f1174f77
EC
1600 return err;
1601}
1602
1603/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
31fd8581 1604static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
19de99f7 1605 enum bpf_access_type t, enum bpf_reg_type *reg_type)
17a52670 1606{
f96da094
DB
1607 struct bpf_insn_access_aux info = {
1608 .reg_type = *reg_type,
1609 };
31fd8581 1610
4f9218aa 1611 if (env->ops->is_valid_access &&
5e43f899 1612 env->ops->is_valid_access(off, size, t, env->prog, &info)) {
f96da094
DB
1613 /* A non zero info.ctx_field_size indicates that this field is a
1614 * candidate for later verifier transformation to load the whole
1615 * field and then apply a mask when accessed with a narrower
1616 * access than actual ctx access size. A zero info.ctx_field_size
1617 * will only allow for whole field access and rejects any other
1618 * type of narrower access.
31fd8581 1619 */
23994631 1620 *reg_type = info.reg_type;
31fd8581 1621
4f9218aa 1622 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
32bbe007
AS
1623 /* remember the offset of last byte accessed in ctx */
1624 if (env->prog->aux->max_ctx_offset < off + size)
1625 env->prog->aux->max_ctx_offset = off + size;
17a52670 1626 return 0;
32bbe007 1627 }
17a52670 1628
61bd5218 1629 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
17a52670
AS
1630 return -EACCES;
1631}
1632
d58e468b
PP
1633static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
1634 int size)
1635{
1636 if (size < 0 || off < 0 ||
1637 (u64)off + size > sizeof(struct bpf_flow_keys)) {
1638 verbose(env, "invalid access to flow keys off=%d size=%d\n",
1639 off, size);
1640 return -EACCES;
1641 }
1642 return 0;
1643}
1644
5f456649
MKL
1645static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
1646 u32 regno, int off, int size,
1647 enum bpf_access_type t)
c64b7983
JS
1648{
1649 struct bpf_reg_state *regs = cur_regs(env);
1650 struct bpf_reg_state *reg = &regs[regno];
5f456649 1651 struct bpf_insn_access_aux info = {};
46f8bc92 1652 bool valid;
c64b7983
JS
1653
1654 if (reg->smin_value < 0) {
1655 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
1656 regno);
1657 return -EACCES;
1658 }
1659
46f8bc92
MKL
1660 switch (reg->type) {
1661 case PTR_TO_SOCK_COMMON:
1662 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
1663 break;
1664 case PTR_TO_SOCKET:
1665 valid = bpf_sock_is_valid_access(off, size, t, &info);
1666 break;
655a51e5
MKL
1667 case PTR_TO_TCP_SOCK:
1668 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
1669 break;
46f8bc92
MKL
1670 default:
1671 valid = false;
c64b7983
JS
1672 }
1673
5f456649 1674
46f8bc92
MKL
1675 if (valid) {
1676 env->insn_aux_data[insn_idx].ctx_field_size =
1677 info.ctx_field_size;
1678 return 0;
1679 }
1680
1681 verbose(env, "R%d invalid %s access off=%d size=%d\n",
1682 regno, reg_type_str[reg->type], off, size);
1683
1684 return -EACCES;
c64b7983
JS
1685}
1686
4cabc5b1
DB
1687static bool __is_pointer_value(bool allow_ptr_leaks,
1688 const struct bpf_reg_state *reg)
1be7f75d 1689{
4cabc5b1 1690 if (allow_ptr_leaks)
1be7f75d
AS
1691 return false;
1692
f1174f77 1693 return reg->type != SCALAR_VALUE;
1be7f75d
AS
1694}
1695
2a159c6f
DB
1696static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
1697{
1698 return cur_regs(env) + regno;
1699}
1700
4cabc5b1
DB
1701static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1702{
2a159c6f 1703 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
4cabc5b1
DB
1704}
1705
f37a8cb8
DB
1706static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1707{
2a159c6f 1708 const struct bpf_reg_state *reg = reg_state(env, regno);
f37a8cb8 1709
46f8bc92
MKL
1710 return reg->type == PTR_TO_CTX;
1711}
1712
1713static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
1714{
1715 const struct bpf_reg_state *reg = reg_state(env, regno);
1716
1717 return type_is_sk_pointer(reg->type);
f37a8cb8
DB
1718}
1719
ca369602
DB
1720static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
1721{
2a159c6f 1722 const struct bpf_reg_state *reg = reg_state(env, regno);
ca369602
DB
1723
1724 return type_is_pkt_pointer(reg->type);
1725}
1726
4b5defde
DB
1727static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
1728{
1729 const struct bpf_reg_state *reg = reg_state(env, regno);
1730
1731 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
1732 return reg->type == PTR_TO_FLOW_KEYS;
1733}
1734
61bd5218
JK
1735static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
1736 const struct bpf_reg_state *reg,
d1174416 1737 int off, int size, bool strict)
969bf05e 1738{
f1174f77 1739 struct tnum reg_off;
e07b98d9 1740 int ip_align;
d1174416
DM
1741
1742 /* Byte size accesses are always allowed. */
1743 if (!strict || size == 1)
1744 return 0;
1745
e4eda884
DM
1746 /* For platforms that do not have a Kconfig enabling
1747 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
1748 * NET_IP_ALIGN is universally set to '2'. And on platforms
1749 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
1750 * to this code only in strict mode where we want to emulate
1751 * the NET_IP_ALIGN==2 checking. Therefore use an
1752 * unconditional IP align value of '2'.
e07b98d9 1753 */
e4eda884 1754 ip_align = 2;
f1174f77
EC
1755
1756 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
1757 if (!tnum_is_aligned(reg_off, size)) {
1758 char tn_buf[48];
1759
1760 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218
JK
1761 verbose(env,
1762 "misaligned packet access off %d+%s+%d+%d size %d\n",
f1174f77 1763 ip_align, tn_buf, reg->off, off, size);
969bf05e
AS
1764 return -EACCES;
1765 }
79adffcd 1766
969bf05e
AS
1767 return 0;
1768}
1769
61bd5218
JK
1770static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
1771 const struct bpf_reg_state *reg,
f1174f77
EC
1772 const char *pointer_desc,
1773 int off, int size, bool strict)
79adffcd 1774{
f1174f77
EC
1775 struct tnum reg_off;
1776
1777 /* Byte size accesses are always allowed. */
1778 if (!strict || size == 1)
1779 return 0;
1780
1781 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
1782 if (!tnum_is_aligned(reg_off, size)) {
1783 char tn_buf[48];
1784
1785 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 1786 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
f1174f77 1787 pointer_desc, tn_buf, reg->off, off, size);
79adffcd
DB
1788 return -EACCES;
1789 }
1790
969bf05e
AS
1791 return 0;
1792}
1793
e07b98d9 1794static int check_ptr_alignment(struct bpf_verifier_env *env,
ca369602
DB
1795 const struct bpf_reg_state *reg, int off,
1796 int size, bool strict_alignment_once)
79adffcd 1797{
ca369602 1798 bool strict = env->strict_alignment || strict_alignment_once;
f1174f77 1799 const char *pointer_desc = "";
d1174416 1800
79adffcd
DB
1801 switch (reg->type) {
1802 case PTR_TO_PACKET:
de8f3a83
DB
1803 case PTR_TO_PACKET_META:
1804 /* Special case, because of NET_IP_ALIGN. Given metadata sits
1805 * right in front, treat it the very same way.
1806 */
61bd5218 1807 return check_pkt_ptr_alignment(env, reg, off, size, strict);
d58e468b
PP
1808 case PTR_TO_FLOW_KEYS:
1809 pointer_desc = "flow keys ";
1810 break;
f1174f77
EC
1811 case PTR_TO_MAP_VALUE:
1812 pointer_desc = "value ";
1813 break;
1814 case PTR_TO_CTX:
1815 pointer_desc = "context ";
1816 break;
1817 case PTR_TO_STACK:
1818 pointer_desc = "stack ";
a5ec6ae1
JH
1819 /* The stack spill tracking logic in check_stack_write()
1820 * and check_stack_read() relies on stack accesses being
1821 * aligned.
1822 */
1823 strict = true;
f1174f77 1824 break;
c64b7983
JS
1825 case PTR_TO_SOCKET:
1826 pointer_desc = "sock ";
1827 break;
46f8bc92
MKL
1828 case PTR_TO_SOCK_COMMON:
1829 pointer_desc = "sock_common ";
1830 break;
655a51e5
MKL
1831 case PTR_TO_TCP_SOCK:
1832 pointer_desc = "tcp_sock ";
1833 break;
79adffcd 1834 default:
f1174f77 1835 break;
79adffcd 1836 }
61bd5218
JK
1837 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
1838 strict);
79adffcd
DB
1839}
1840
f4d7e40a
AS
1841static int update_stack_depth(struct bpf_verifier_env *env,
1842 const struct bpf_func_state *func,
1843 int off)
1844{
9c8105bd 1845 u16 stack = env->subprog_info[func->subprogno].stack_depth;
f4d7e40a
AS
1846
1847 if (stack >= -off)
1848 return 0;
1849
1850 /* update known max for given subprogram */
9c8105bd 1851 env->subprog_info[func->subprogno].stack_depth = -off;
70a87ffe
AS
1852 return 0;
1853}
f4d7e40a 1854
70a87ffe
AS
1855/* starting from main bpf function walk all instructions of the function
1856 * and recursively walk all callees that given function can call.
1857 * Ignore jump and exit insns.
1858 * Since recursion is prevented by check_cfg() this algorithm
1859 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
1860 */
1861static int check_max_stack_depth(struct bpf_verifier_env *env)
1862{
9c8105bd
JW
1863 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
1864 struct bpf_subprog_info *subprog = env->subprog_info;
70a87ffe 1865 struct bpf_insn *insn = env->prog->insnsi;
70a87ffe
AS
1866 int ret_insn[MAX_CALL_FRAMES];
1867 int ret_prog[MAX_CALL_FRAMES];
f4d7e40a 1868
70a87ffe
AS
1869process_func:
1870 /* round up to 32-bytes, since this is granularity
1871 * of interpreter stack size
1872 */
9c8105bd 1873 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe 1874 if (depth > MAX_BPF_STACK) {
f4d7e40a 1875 verbose(env, "combined stack size of %d calls is %d. Too large\n",
70a87ffe 1876 frame + 1, depth);
f4d7e40a
AS
1877 return -EACCES;
1878 }
70a87ffe 1879continue_func:
4cb3d99c 1880 subprog_end = subprog[idx + 1].start;
70a87ffe
AS
1881 for (; i < subprog_end; i++) {
1882 if (insn[i].code != (BPF_JMP | BPF_CALL))
1883 continue;
1884 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1885 continue;
1886 /* remember insn and function to return to */
1887 ret_insn[frame] = i + 1;
9c8105bd 1888 ret_prog[frame] = idx;
70a87ffe
AS
1889
1890 /* find the callee */
1891 i = i + insn[i].imm + 1;
9c8105bd
JW
1892 idx = find_subprog(env, i);
1893 if (idx < 0) {
70a87ffe
AS
1894 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1895 i);
1896 return -EFAULT;
1897 }
70a87ffe
AS
1898 frame++;
1899 if (frame >= MAX_CALL_FRAMES) {
1900 WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
1901 return -EFAULT;
1902 }
1903 goto process_func;
1904 }
1905 /* end of for() loop means the last insn of the 'subprog'
1906 * was reached. Doesn't matter whether it was JA or EXIT
1907 */
1908 if (frame == 0)
1909 return 0;
9c8105bd 1910 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
70a87ffe
AS
1911 frame--;
1912 i = ret_insn[frame];
9c8105bd 1913 idx = ret_prog[frame];
70a87ffe 1914 goto continue_func;
f4d7e40a
AS
1915}
1916
19d28fbd 1917#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
1918static int get_callee_stack_depth(struct bpf_verifier_env *env,
1919 const struct bpf_insn *insn, int idx)
1920{
1921 int start = idx + insn->imm + 1, subprog;
1922
1923 subprog = find_subprog(env, start);
1924 if (subprog < 0) {
1925 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
1926 start);
1927 return -EFAULT;
1928 }
9c8105bd 1929 return env->subprog_info[subprog].stack_depth;
1ea47e01 1930}
19d28fbd 1931#endif
1ea47e01 1932
58990d1f
DB
1933static int check_ctx_reg(struct bpf_verifier_env *env,
1934 const struct bpf_reg_state *reg, int regno)
1935{
1936 /* Access to ctx or passing it to a helper is only allowed in
1937 * its original, unmodified form.
1938 */
1939
1940 if (reg->off) {
1941 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
1942 regno, reg->off);
1943 return -EACCES;
1944 }
1945
1946 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1947 char tn_buf[48];
1948
1949 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1950 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
1951 return -EACCES;
1952 }
1953
1954 return 0;
1955}
1956
0c17d1d2
JH
1957/* truncate register to smaller size (in bytes)
1958 * must be called with size < BPF_REG_SIZE
1959 */
1960static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1961{
1962 u64 mask;
1963
1964 /* clear high bits in bit representation */
1965 reg->var_off = tnum_cast(reg->var_off, size);
1966
1967 /* fix arithmetic bounds */
1968 mask = ((u64)1 << (size * 8)) - 1;
1969 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1970 reg->umin_value &= mask;
1971 reg->umax_value &= mask;
1972 } else {
1973 reg->umin_value = 0;
1974 reg->umax_value = mask;
1975 }
1976 reg->smin_value = reg->umin_value;
1977 reg->smax_value = reg->umax_value;
1978}
1979
17a52670
AS
1980/* check whether memory at (regno + off) is accessible for t = (read | write)
1981 * if t==write, value_regno is a register which value is stored into memory
1982 * if t==read, value_regno is a register which will receive the value from memory
1983 * if t==write && value_regno==-1, some unknown value is stored into memory
1984 * if t==read && value_regno==-1, don't care what we read from memory
1985 */
ca369602
DB
1986static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
1987 int off, int bpf_size, enum bpf_access_type t,
1988 int value_regno, bool strict_alignment_once)
17a52670 1989{
638f5b90
AS
1990 struct bpf_reg_state *regs = cur_regs(env);
1991 struct bpf_reg_state *reg = regs + regno;
f4d7e40a 1992 struct bpf_func_state *state;
17a52670
AS
1993 int size, err = 0;
1994
1995 size = bpf_size_to_bytes(bpf_size);
1996 if (size < 0)
1997 return size;
1998
f1174f77 1999 /* alignment checks will add in reg->off themselves */
ca369602 2000 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
969bf05e
AS
2001 if (err)
2002 return err;
17a52670 2003
f1174f77
EC
2004 /* for access checks, reg->off is just part of off */
2005 off += reg->off;
2006
2007 if (reg->type == PTR_TO_MAP_VALUE) {
1be7f75d
AS
2008 if (t == BPF_WRITE && value_regno >= 0 &&
2009 is_pointer_value(env, value_regno)) {
61bd5218 2010 verbose(env, "R%d leaks addr into map\n", value_regno);
1be7f75d
AS
2011 return -EACCES;
2012 }
48461135 2013
9fd29c08 2014 err = check_map_access(env, regno, off, size, false);
17a52670 2015 if (!err && t == BPF_READ && value_regno >= 0)
638f5b90 2016 mark_reg_unknown(env, regs, value_regno);
17a52670 2017
1a0dc1ac 2018 } else if (reg->type == PTR_TO_CTX) {
f1174f77 2019 enum bpf_reg_type reg_type = SCALAR_VALUE;
19de99f7 2020
1be7f75d
AS
2021 if (t == BPF_WRITE && value_regno >= 0 &&
2022 is_pointer_value(env, value_regno)) {
61bd5218 2023 verbose(env, "R%d leaks addr into ctx\n", value_regno);
1be7f75d
AS
2024 return -EACCES;
2025 }
f1174f77 2026
58990d1f
DB
2027 err = check_ctx_reg(env, reg, regno);
2028 if (err < 0)
2029 return err;
2030
31fd8581 2031 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
969bf05e 2032 if (!err && t == BPF_READ && value_regno >= 0) {
f1174f77 2033 /* ctx access returns either a scalar, or a
de8f3a83
DB
2034 * PTR_TO_PACKET[_META,_END]. In the latter
2035 * case, we know the offset is zero.
f1174f77 2036 */
46f8bc92 2037 if (reg_type == SCALAR_VALUE) {
638f5b90 2038 mark_reg_unknown(env, regs, value_regno);
46f8bc92 2039 } else {
638f5b90 2040 mark_reg_known_zero(env, regs,
61bd5218 2041 value_regno);
46f8bc92
MKL
2042 if (reg_type_may_be_null(reg_type))
2043 regs[value_regno].id = ++env->id_gen;
2044 }
638f5b90 2045 regs[value_regno].type = reg_type;
969bf05e 2046 }
17a52670 2047
f1174f77 2048 } else if (reg->type == PTR_TO_STACK) {
f1174f77 2049 off += reg->var_off.value;
e4298d25
DB
2050 err = check_stack_access(env, reg, off, size);
2051 if (err)
2052 return err;
8726679a 2053
f4d7e40a
AS
2054 state = func(env, reg);
2055 err = update_stack_depth(env, state, off);
2056 if (err)
2057 return err;
8726679a 2058
638f5b90 2059 if (t == BPF_WRITE)
61bd5218 2060 err = check_stack_write(env, state, off, size,
af86ca4e 2061 value_regno, insn_idx);
638f5b90 2062 else
61bd5218
JK
2063 err = check_stack_read(env, state, off, size,
2064 value_regno);
de8f3a83 2065 } else if (reg_is_pkt_pointer(reg)) {
3a0af8fd 2066 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
61bd5218 2067 verbose(env, "cannot write into packet\n");
969bf05e
AS
2068 return -EACCES;
2069 }
4acf6c0b
BB
2070 if (t == BPF_WRITE && value_regno >= 0 &&
2071 is_pointer_value(env, value_regno)) {
61bd5218
JK
2072 verbose(env, "R%d leaks addr into packet\n",
2073 value_regno);
4acf6c0b
BB
2074 return -EACCES;
2075 }
9fd29c08 2076 err = check_packet_access(env, regno, off, size, false);
969bf05e 2077 if (!err && t == BPF_READ && value_regno >= 0)
638f5b90 2078 mark_reg_unknown(env, regs, value_regno);
d58e468b
PP
2079 } else if (reg->type == PTR_TO_FLOW_KEYS) {
2080 if (t == BPF_WRITE && value_regno >= 0 &&
2081 is_pointer_value(env, value_regno)) {
2082 verbose(env, "R%d leaks addr into flow keys\n",
2083 value_regno);
2084 return -EACCES;
2085 }
2086
2087 err = check_flow_keys_access(env, off, size);
2088 if (!err && t == BPF_READ && value_regno >= 0)
2089 mark_reg_unknown(env, regs, value_regno);
46f8bc92 2090 } else if (type_is_sk_pointer(reg->type)) {
c64b7983 2091 if (t == BPF_WRITE) {
46f8bc92
MKL
2092 verbose(env, "R%d cannot write into %s\n",
2093 regno, reg_type_str[reg->type]);
c64b7983
JS
2094 return -EACCES;
2095 }
5f456649 2096 err = check_sock_access(env, insn_idx, regno, off, size, t);
c64b7983
JS
2097 if (!err && value_regno >= 0)
2098 mark_reg_unknown(env, regs, value_regno);
17a52670 2099 } else {
61bd5218
JK
2100 verbose(env, "R%d invalid mem access '%s'\n", regno,
2101 reg_type_str[reg->type]);
17a52670
AS
2102 return -EACCES;
2103 }
969bf05e 2104
f1174f77 2105 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
638f5b90 2106 regs[value_regno].type == SCALAR_VALUE) {
f1174f77 2107 /* b/h/w load zero-extends, mark upper bits as known 0 */
0c17d1d2 2108 coerce_reg_to_size(&regs[value_regno], size);
969bf05e 2109 }
17a52670
AS
2110 return err;
2111}
2112
31fd8581 2113static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
17a52670 2114{
17a52670
AS
2115 int err;
2116
2117 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
2118 insn->imm != 0) {
61bd5218 2119 verbose(env, "BPF_XADD uses reserved fields\n");
17a52670
AS
2120 return -EINVAL;
2121 }
2122
2123 /* check src1 operand */
dc503a8a 2124 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
2125 if (err)
2126 return err;
2127
2128 /* check src2 operand */
dc503a8a 2129 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
2130 if (err)
2131 return err;
2132
6bdf6abc 2133 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 2134 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6bdf6abc
DB
2135 return -EACCES;
2136 }
2137
ca369602 2138 if (is_ctx_reg(env, insn->dst_reg) ||
4b5defde 2139 is_pkt_reg(env, insn->dst_reg) ||
46f8bc92
MKL
2140 is_flow_key_reg(env, insn->dst_reg) ||
2141 is_sk_reg(env, insn->dst_reg)) {
ca369602 2142 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
2a159c6f
DB
2143 insn->dst_reg,
2144 reg_type_str[reg_state(env, insn->dst_reg)->type]);
f37a8cb8
DB
2145 return -EACCES;
2146 }
2147
17a52670 2148 /* check whether atomic_add can read the memory */
31fd8581 2149 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
ca369602 2150 BPF_SIZE(insn->code), BPF_READ, -1, true);
17a52670
AS
2151 if (err)
2152 return err;
2153
2154 /* check whether atomic_add can write into the same memory */
31fd8581 2155 return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
ca369602 2156 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
17a52670
AS
2157}
2158
2159/* when register 'regno' is passed into function that will read 'access_size'
2160 * bytes from that pointer, make sure that it's within stack boundary
f1174f77
EC
2161 * and all elements of stack are initialized.
2162 * Unlike most pointer bounds-checking functions, this one doesn't take an
2163 * 'off' argument, so it has to add in reg->off itself.
17a52670 2164 */
58e2af8b 2165static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
435faee1
DB
2166 int access_size, bool zero_size_allowed,
2167 struct bpf_call_arg_meta *meta)
17a52670 2168{
2a159c6f 2169 struct bpf_reg_state *reg = reg_state(env, regno);
f4d7e40a 2170 struct bpf_func_state *state = func(env, reg);
638f5b90 2171 int off, i, slot, spi;
17a52670 2172
914cb781 2173 if (reg->type != PTR_TO_STACK) {
f1174f77 2174 /* Allow zero-byte read from NULL, regardless of pointer type */
8e2fe1d9 2175 if (zero_size_allowed && access_size == 0 &&
914cb781 2176 register_is_null(reg))
8e2fe1d9
DB
2177 return 0;
2178
61bd5218 2179 verbose(env, "R%d type=%s expected=%s\n", regno,
914cb781 2180 reg_type_str[reg->type],
8e2fe1d9 2181 reg_type_str[PTR_TO_STACK]);
17a52670 2182 return -EACCES;
8e2fe1d9 2183 }
17a52670 2184
f1174f77 2185 /* Only allow fixed-offset stack reads */
914cb781 2186 if (!tnum_is_const(reg->var_off)) {
f1174f77
EC
2187 char tn_buf[48];
2188
914cb781 2189 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 2190 verbose(env, "invalid variable stack read R%d var_off=%s\n",
f1174f77 2191 regno, tn_buf);
ea25f914 2192 return -EACCES;
f1174f77 2193 }
914cb781 2194 off = reg->off + reg->var_off.value;
17a52670 2195 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
9fd29c08 2196 access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
61bd5218 2197 verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
17a52670
AS
2198 regno, off, access_size);
2199 return -EACCES;
2200 }
2201
435faee1
DB
2202 if (meta && meta->raw_mode) {
2203 meta->access_size = access_size;
2204 meta->regno = regno;
2205 return 0;
2206 }
2207
17a52670 2208 for (i = 0; i < access_size; i++) {
cc2b14d5
AS
2209 u8 *stype;
2210
638f5b90
AS
2211 slot = -(off + i) - 1;
2212 spi = slot / BPF_REG_SIZE;
cc2b14d5
AS
2213 if (state->allocated_stack <= slot)
2214 goto err;
2215 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2216 if (*stype == STACK_MISC)
2217 goto mark;
2218 if (*stype == STACK_ZERO) {
2219 /* helper can write anything into the stack */
2220 *stype = STACK_MISC;
2221 goto mark;
17a52670 2222 }
cc2b14d5
AS
2223err:
2224 verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
2225 off, i, access_size);
2226 return -EACCES;
2227mark:
2228 /* reading any byte out of 8-byte 'spill_slot' will cause
2229 * the whole slot to be marked as 'read'
2230 */
679c782d
EC
2231 mark_reg_read(env, &state->stack[spi].spilled_ptr,
2232 state->stack[spi].spilled_ptr.parent);
17a52670 2233 }
f4d7e40a 2234 return update_stack_depth(env, state, off);
17a52670
AS
2235}
2236
06c1c049
GB
2237static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
2238 int access_size, bool zero_size_allowed,
2239 struct bpf_call_arg_meta *meta)
2240{
638f5b90 2241 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
06c1c049 2242
f1174f77 2243 switch (reg->type) {
06c1c049 2244 case PTR_TO_PACKET:
de8f3a83 2245 case PTR_TO_PACKET_META:
9fd29c08
YS
2246 return check_packet_access(env, regno, reg->off, access_size,
2247 zero_size_allowed);
06c1c049 2248 case PTR_TO_MAP_VALUE:
9fd29c08
YS
2249 return check_map_access(env, regno, reg->off, access_size,
2250 zero_size_allowed);
f1174f77 2251 default: /* scalar_value|ptr_to_stack or invalid ptr */
06c1c049
GB
2252 return check_stack_boundary(env, regno, access_size,
2253 zero_size_allowed, meta);
2254 }
2255}
2256
d83525ca
AS
2257/* Implementation details:
2258 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
2259 * Two bpf_map_lookups (even with the same key) will have different reg->id.
2260 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
2261 * value_or_null->value transition, since the verifier only cares about
2262 * the range of access to valid map value pointer and doesn't care about actual
2263 * address of the map element.
2264 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
2265 * reg->id > 0 after value_or_null->value transition. By doing so
2266 * two bpf_map_lookups will be considered two different pointers that
2267 * point to different bpf_spin_locks.
2268 * The verifier allows taking only one bpf_spin_lock at a time to avoid
2269 * dead-locks.
2270 * Since only one bpf_spin_lock is allowed the checks are simpler than
2271 * reg_is_refcounted() logic. The verifier needs to remember only
2272 * one spin_lock instead of array of acquired_refs.
2273 * cur_state->active_spin_lock remembers which map value element got locked
2274 * and clears it after bpf_spin_unlock.
2275 */
2276static int process_spin_lock(struct bpf_verifier_env *env, int regno,
2277 bool is_lock)
2278{
2279 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
2280 struct bpf_verifier_state *cur = env->cur_state;
2281 bool is_const = tnum_is_const(reg->var_off);
2282 struct bpf_map *map = reg->map_ptr;
2283 u64 val = reg->var_off.value;
2284
2285 if (reg->type != PTR_TO_MAP_VALUE) {
2286 verbose(env, "R%d is not a pointer to map_value\n", regno);
2287 return -EINVAL;
2288 }
2289 if (!is_const) {
2290 verbose(env,
2291 "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
2292 regno);
2293 return -EINVAL;
2294 }
2295 if (!map->btf) {
2296 verbose(env,
2297 "map '%s' has to have BTF in order to use bpf_spin_lock\n",
2298 map->name);
2299 return -EINVAL;
2300 }
2301 if (!map_value_has_spin_lock(map)) {
2302 if (map->spin_lock_off == -E2BIG)
2303 verbose(env,
2304 "map '%s' has more than one 'struct bpf_spin_lock'\n",
2305 map->name);
2306 else if (map->spin_lock_off == -ENOENT)
2307 verbose(env,
2308 "map '%s' doesn't have 'struct bpf_spin_lock'\n",
2309 map->name);
2310 else
2311 verbose(env,
2312 "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
2313 map->name);
2314 return -EINVAL;
2315 }
2316 if (map->spin_lock_off != val + reg->off) {
2317 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
2318 val + reg->off);
2319 return -EINVAL;
2320 }
2321 if (is_lock) {
2322 if (cur->active_spin_lock) {
2323 verbose(env,
2324 "Locking two bpf_spin_locks are not allowed\n");
2325 return -EINVAL;
2326 }
2327 cur->active_spin_lock = reg->id;
2328 } else {
2329 if (!cur->active_spin_lock) {
2330 verbose(env, "bpf_spin_unlock without taking a lock\n");
2331 return -EINVAL;
2332 }
2333 if (cur->active_spin_lock != reg->id) {
2334 verbose(env, "bpf_spin_unlock of different lock\n");
2335 return -EINVAL;
2336 }
2337 cur->active_spin_lock = 0;
2338 }
2339 return 0;
2340}
2341
90133415
DB
2342static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
2343{
2344 return type == ARG_PTR_TO_MEM ||
2345 type == ARG_PTR_TO_MEM_OR_NULL ||
2346 type == ARG_PTR_TO_UNINIT_MEM;
2347}
2348
2349static bool arg_type_is_mem_size(enum bpf_arg_type type)
2350{
2351 return type == ARG_CONST_SIZE ||
2352 type == ARG_CONST_SIZE_OR_ZERO;
2353}
2354
58e2af8b 2355static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
33ff9823
DB
2356 enum bpf_arg_type arg_type,
2357 struct bpf_call_arg_meta *meta)
17a52670 2358{
638f5b90 2359 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
6841de8b 2360 enum bpf_reg_type expected_type, type = reg->type;
17a52670
AS
2361 int err = 0;
2362
80f1d68c 2363 if (arg_type == ARG_DONTCARE)
17a52670
AS
2364 return 0;
2365
dc503a8a
EC
2366 err = check_reg_arg(env, regno, SRC_OP);
2367 if (err)
2368 return err;
17a52670 2369
1be7f75d
AS
2370 if (arg_type == ARG_ANYTHING) {
2371 if (is_pointer_value(env, regno)) {
61bd5218
JK
2372 verbose(env, "R%d leaks addr into helper function\n",
2373 regno);
1be7f75d
AS
2374 return -EACCES;
2375 }
80f1d68c 2376 return 0;
1be7f75d 2377 }
80f1d68c 2378
de8f3a83 2379 if (type_is_pkt_pointer(type) &&
3a0af8fd 2380 !may_access_direct_pkt_data(env, meta, BPF_READ)) {
61bd5218 2381 verbose(env, "helper access to the packet is not allowed\n");
6841de8b
AS
2382 return -EACCES;
2383 }
2384
8e2fe1d9 2385 if (arg_type == ARG_PTR_TO_MAP_KEY ||
2ea864c5
MV
2386 arg_type == ARG_PTR_TO_MAP_VALUE ||
2387 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
17a52670 2388 expected_type = PTR_TO_STACK;
d71962f3 2389 if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
de8f3a83 2390 type != expected_type)
6841de8b 2391 goto err_type;
39f19ebb
AS
2392 } else if (arg_type == ARG_CONST_SIZE ||
2393 arg_type == ARG_CONST_SIZE_OR_ZERO) {
f1174f77
EC
2394 expected_type = SCALAR_VALUE;
2395 if (type != expected_type)
6841de8b 2396 goto err_type;
17a52670
AS
2397 } else if (arg_type == ARG_CONST_MAP_PTR) {
2398 expected_type = CONST_PTR_TO_MAP;
6841de8b
AS
2399 if (type != expected_type)
2400 goto err_type;
608cd71a
AS
2401 } else if (arg_type == ARG_PTR_TO_CTX) {
2402 expected_type = PTR_TO_CTX;
6841de8b
AS
2403 if (type != expected_type)
2404 goto err_type;
58990d1f
DB
2405 err = check_ctx_reg(env, reg, regno);
2406 if (err < 0)
2407 return err;
46f8bc92
MKL
2408 } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
2409 expected_type = PTR_TO_SOCK_COMMON;
2410 /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
2411 if (!type_is_sk_pointer(type))
2412 goto err_type;
1b986589
MKL
2413 if (reg->ref_obj_id) {
2414 if (meta->ref_obj_id) {
2415 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
2416 regno, reg->ref_obj_id,
2417 meta->ref_obj_id);
2418 return -EFAULT;
2419 }
2420 meta->ref_obj_id = reg->ref_obj_id;
fd978bf7 2421 }
d83525ca
AS
2422 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
2423 if (meta->func_id == BPF_FUNC_spin_lock) {
2424 if (process_spin_lock(env, regno, true))
2425 return -EACCES;
2426 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
2427 if (process_spin_lock(env, regno, false))
2428 return -EACCES;
2429 } else {
2430 verbose(env, "verifier internal error\n");
2431 return -EFAULT;
2432 }
90133415 2433 } else if (arg_type_is_mem_ptr(arg_type)) {
8e2fe1d9
DB
2434 expected_type = PTR_TO_STACK;
2435 /* One exception here. In case function allows for NULL to be
f1174f77 2436 * passed in as argument, it's a SCALAR_VALUE type. Final test
8e2fe1d9
DB
2437 * happens during stack boundary checking.
2438 */
914cb781 2439 if (register_is_null(reg) &&
db1ac496 2440 arg_type == ARG_PTR_TO_MEM_OR_NULL)
6841de8b 2441 /* final test in check_stack_boundary() */;
de8f3a83
DB
2442 else if (!type_is_pkt_pointer(type) &&
2443 type != PTR_TO_MAP_VALUE &&
f1174f77 2444 type != expected_type)
6841de8b 2445 goto err_type;
39f19ebb 2446 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
17a52670 2447 } else {
61bd5218 2448 verbose(env, "unsupported arg_type %d\n", arg_type);
17a52670
AS
2449 return -EFAULT;
2450 }
2451
17a52670
AS
2452 if (arg_type == ARG_CONST_MAP_PTR) {
2453 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
33ff9823 2454 meta->map_ptr = reg->map_ptr;
17a52670
AS
2455 } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
2456 /* bpf_map_xxx(..., map_ptr, ..., key) call:
2457 * check that [key, key + map->key_size) are within
2458 * stack limits and initialized
2459 */
33ff9823 2460 if (!meta->map_ptr) {
17a52670
AS
2461 /* in function declaration map_ptr must come before
2462 * map_key, so that it's verified and known before
2463 * we have to check map_key here. Otherwise it means
2464 * that kernel subsystem misconfigured verifier
2465 */
61bd5218 2466 verbose(env, "invalid map_ptr to access map->key\n");
17a52670
AS
2467 return -EACCES;
2468 }
d71962f3
PC
2469 err = check_helper_mem_access(env, regno,
2470 meta->map_ptr->key_size, false,
2471 NULL);
2ea864c5
MV
2472 } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
2473 arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
17a52670
AS
2474 /* bpf_map_xxx(..., map_ptr, ..., value) call:
2475 * check [value, value + map->value_size) validity
2476 */
33ff9823 2477 if (!meta->map_ptr) {
17a52670 2478 /* kernel subsystem misconfigured verifier */
61bd5218 2479 verbose(env, "invalid map_ptr to access map->value\n");
17a52670
AS
2480 return -EACCES;
2481 }
2ea864c5 2482 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
d71962f3
PC
2483 err = check_helper_mem_access(env, regno,
2484 meta->map_ptr->value_size, false,
2ea864c5 2485 meta);
90133415 2486 } else if (arg_type_is_mem_size(arg_type)) {
39f19ebb 2487 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
17a52670 2488
849fa506
YS
2489 /* remember the mem_size which may be used later
2490 * to refine return values.
2491 */
2492 meta->msize_smax_value = reg->smax_value;
2493 meta->msize_umax_value = reg->umax_value;
2494
f1174f77
EC
2495 /* The register is SCALAR_VALUE; the access check
2496 * happens using its boundaries.
06c1c049 2497 */
f1174f77 2498 if (!tnum_is_const(reg->var_off))
06c1c049
GB
2499 /* For unprivileged variable accesses, disable raw
2500 * mode so that the program is required to
2501 * initialize all the memory that the helper could
2502 * just partially fill up.
2503 */
2504 meta = NULL;
2505
b03c9f9f 2506 if (reg->smin_value < 0) {
61bd5218 2507 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
f1174f77
EC
2508 regno);
2509 return -EACCES;
2510 }
06c1c049 2511
b03c9f9f 2512 if (reg->umin_value == 0) {
f1174f77
EC
2513 err = check_helper_mem_access(env, regno - 1, 0,
2514 zero_size_allowed,
2515 meta);
06c1c049
GB
2516 if (err)
2517 return err;
06c1c049 2518 }
f1174f77 2519
b03c9f9f 2520 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
61bd5218 2521 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
f1174f77
EC
2522 regno);
2523 return -EACCES;
2524 }
2525 err = check_helper_mem_access(env, regno - 1,
b03c9f9f 2526 reg->umax_value,
f1174f77 2527 zero_size_allowed, meta);
17a52670
AS
2528 }
2529
2530 return err;
6841de8b 2531err_type:
61bd5218 2532 verbose(env, "R%d type=%s expected=%s\n", regno,
6841de8b
AS
2533 reg_type_str[type], reg_type_str[expected_type]);
2534 return -EACCES;
17a52670
AS
2535}
2536
61bd5218
JK
2537static int check_map_func_compatibility(struct bpf_verifier_env *env,
2538 struct bpf_map *map, int func_id)
35578d79 2539{
35578d79
KX
2540 if (!map)
2541 return 0;
2542
6aff67c8
AS
2543 /* We need a two way check, first is from map perspective ... */
2544 switch (map->map_type) {
2545 case BPF_MAP_TYPE_PROG_ARRAY:
2546 if (func_id != BPF_FUNC_tail_call)
2547 goto error;
2548 break;
2549 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
2550 if (func_id != BPF_FUNC_perf_event_read &&
908432ca
YS
2551 func_id != BPF_FUNC_perf_event_output &&
2552 func_id != BPF_FUNC_perf_event_read_value)
6aff67c8
AS
2553 goto error;
2554 break;
2555 case BPF_MAP_TYPE_STACK_TRACE:
2556 if (func_id != BPF_FUNC_get_stackid)
2557 goto error;
2558 break;
4ed8ec52 2559 case BPF_MAP_TYPE_CGROUP_ARRAY:
60747ef4 2560 if (func_id != BPF_FUNC_skb_under_cgroup &&
60d20f91 2561 func_id != BPF_FUNC_current_task_under_cgroup)
4a482f34
MKL
2562 goto error;
2563 break;
cd339431 2564 case BPF_MAP_TYPE_CGROUP_STORAGE:
b741f163 2565 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
cd339431
RG
2566 if (func_id != BPF_FUNC_get_local_storage)
2567 goto error;
2568 break;
546ac1ff
JF
2569 /* devmap returns a pointer to a live net_device ifindex that we cannot
2570 * allow to be modified from bpf side. So do not allow lookup elements
2571 * for now.
2572 */
2573 case BPF_MAP_TYPE_DEVMAP:
2ddf71e2 2574 if (func_id != BPF_FUNC_redirect_map)
546ac1ff
JF
2575 goto error;
2576 break;
fbfc504a
BT
2577 /* Restrict bpf side of cpumap and xskmap, open when use-cases
2578 * appear.
2579 */
6710e112 2580 case BPF_MAP_TYPE_CPUMAP:
fbfc504a 2581 case BPF_MAP_TYPE_XSKMAP:
6710e112
JDB
2582 if (func_id != BPF_FUNC_redirect_map)
2583 goto error;
2584 break;
56f668df 2585 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
bcc6b1b7 2586 case BPF_MAP_TYPE_HASH_OF_MAPS:
56f668df
MKL
2587 if (func_id != BPF_FUNC_map_lookup_elem)
2588 goto error;
16a43625 2589 break;
174a79ff
JF
2590 case BPF_MAP_TYPE_SOCKMAP:
2591 if (func_id != BPF_FUNC_sk_redirect_map &&
2592 func_id != BPF_FUNC_sock_map_update &&
4f738adb
JF
2593 func_id != BPF_FUNC_map_delete_elem &&
2594 func_id != BPF_FUNC_msg_redirect_map)
174a79ff
JF
2595 goto error;
2596 break;
81110384
JF
2597 case BPF_MAP_TYPE_SOCKHASH:
2598 if (func_id != BPF_FUNC_sk_redirect_hash &&
2599 func_id != BPF_FUNC_sock_hash_update &&
2600 func_id != BPF_FUNC_map_delete_elem &&
2601 func_id != BPF_FUNC_msg_redirect_hash)
2602 goto error;
2603 break;
2dbb9b9e
MKL
2604 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
2605 if (func_id != BPF_FUNC_sk_select_reuseport)
2606 goto error;
2607 break;
f1a2e44a
MV
2608 case BPF_MAP_TYPE_QUEUE:
2609 case BPF_MAP_TYPE_STACK:
2610 if (func_id != BPF_FUNC_map_peek_elem &&
2611 func_id != BPF_FUNC_map_pop_elem &&
2612 func_id != BPF_FUNC_map_push_elem)
2613 goto error;
2614 break;
6aff67c8
AS
2615 default:
2616 break;
2617 }
2618
2619 /* ... and second from the function itself. */
2620 switch (func_id) {
2621 case BPF_FUNC_tail_call:
2622 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
2623 goto error;
f910cefa 2624 if (env->subprog_cnt > 1) {
f4d7e40a
AS
2625 verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
2626 return -EINVAL;
2627 }
6aff67c8
AS
2628 break;
2629 case BPF_FUNC_perf_event_read:
2630 case BPF_FUNC_perf_event_output:
908432ca 2631 case BPF_FUNC_perf_event_read_value:
6aff67c8
AS
2632 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
2633 goto error;
2634 break;
2635 case BPF_FUNC_get_stackid:
2636 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
2637 goto error;
2638 break;
60d20f91 2639 case BPF_FUNC_current_task_under_cgroup:
747ea55e 2640 case BPF_FUNC_skb_under_cgroup:
4a482f34
MKL
2641 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
2642 goto error;
2643 break;
97f91a7c 2644 case BPF_FUNC_redirect_map:
9c270af3 2645 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
fbfc504a
BT
2646 map->map_type != BPF_MAP_TYPE_CPUMAP &&
2647 map->map_type != BPF_MAP_TYPE_XSKMAP)
97f91a7c
JF
2648 goto error;
2649 break;
174a79ff 2650 case BPF_FUNC_sk_redirect_map:
4f738adb 2651 case BPF_FUNC_msg_redirect_map:
81110384 2652 case BPF_FUNC_sock_map_update:
174a79ff
JF
2653 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
2654 goto error;
2655 break;
81110384
JF
2656 case BPF_FUNC_sk_redirect_hash:
2657 case BPF_FUNC_msg_redirect_hash:
2658 case BPF_FUNC_sock_hash_update:
2659 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
174a79ff
JF
2660 goto error;
2661 break;
cd339431 2662 case BPF_FUNC_get_local_storage:
b741f163
RG
2663 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
2664 map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
cd339431
RG
2665 goto error;
2666 break;
2dbb9b9e
MKL
2667 case BPF_FUNC_sk_select_reuseport:
2668 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
2669 goto error;
2670 break;
f1a2e44a
MV
2671 case BPF_FUNC_map_peek_elem:
2672 case BPF_FUNC_map_pop_elem:
2673 case BPF_FUNC_map_push_elem:
2674 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
2675 map->map_type != BPF_MAP_TYPE_STACK)
2676 goto error;
2677 break;
6aff67c8
AS
2678 default:
2679 break;
35578d79
KX
2680 }
2681
2682 return 0;
6aff67c8 2683error:
61bd5218 2684 verbose(env, "cannot pass map_type %d into func %s#%d\n",
ebb676da 2685 map->map_type, func_id_name(func_id), func_id);
6aff67c8 2686 return -EINVAL;
35578d79
KX
2687}
2688
90133415 2689static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
435faee1
DB
2690{
2691 int count = 0;
2692
39f19ebb 2693 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2694 count++;
39f19ebb 2695 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2696 count++;
39f19ebb 2697 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2698 count++;
39f19ebb 2699 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
435faee1 2700 count++;
39f19ebb 2701 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
435faee1
DB
2702 count++;
2703
90133415
DB
2704 /* We only support one arg being in raw mode at the moment,
2705 * which is sufficient for the helper functions we have
2706 * right now.
2707 */
2708 return count <= 1;
2709}
2710
2711static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
2712 enum bpf_arg_type arg_next)
2713{
2714 return (arg_type_is_mem_ptr(arg_curr) &&
2715 !arg_type_is_mem_size(arg_next)) ||
2716 (!arg_type_is_mem_ptr(arg_curr) &&
2717 arg_type_is_mem_size(arg_next));
2718}
2719
2720static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
2721{
2722 /* bpf_xxx(..., buf, len) call will access 'len'
2723 * bytes from memory 'buf'. Both arg types need
2724 * to be paired, so make sure there's no buggy
2725 * helper function specification.
2726 */
2727 if (arg_type_is_mem_size(fn->arg1_type) ||
2728 arg_type_is_mem_ptr(fn->arg5_type) ||
2729 check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
2730 check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
2731 check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
2732 check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
2733 return false;
2734
2735 return true;
2736}
2737
1b986589 2738static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
fd978bf7
JS
2739{
2740 int count = 0;
2741
1b986589 2742 if (arg_type_may_be_refcounted(fn->arg1_type))
fd978bf7 2743 count++;
1b986589 2744 if (arg_type_may_be_refcounted(fn->arg2_type))
fd978bf7 2745 count++;
1b986589 2746 if (arg_type_may_be_refcounted(fn->arg3_type))
fd978bf7 2747 count++;
1b986589 2748 if (arg_type_may_be_refcounted(fn->arg4_type))
fd978bf7 2749 count++;
1b986589 2750 if (arg_type_may_be_refcounted(fn->arg5_type))
fd978bf7
JS
2751 count++;
2752
1b986589
MKL
2753 /* A reference acquiring function cannot acquire
2754 * another refcounted ptr.
2755 */
2756 if (is_acquire_function(func_id) && count)
2757 return false;
2758
fd978bf7
JS
2759 /* We only support one arg being unreferenced at the moment,
2760 * which is sufficient for the helper functions we have right now.
2761 */
2762 return count <= 1;
2763}
2764
1b986589 2765static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
90133415
DB
2766{
2767 return check_raw_mode_ok(fn) &&
fd978bf7 2768 check_arg_pair_ok(fn) &&
1b986589 2769 check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
435faee1
DB
2770}
2771
de8f3a83
DB
2772/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
2773 * are now invalid, so turn them into unknown SCALAR_VALUE.
f1174f77 2774 */
f4d7e40a
AS
2775static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
2776 struct bpf_func_state *state)
969bf05e 2777{
58e2af8b 2778 struct bpf_reg_state *regs = state->regs, *reg;
969bf05e
AS
2779 int i;
2780
2781 for (i = 0; i < MAX_BPF_REG; i++)
de8f3a83 2782 if (reg_is_pkt_pointer_any(&regs[i]))
61bd5218 2783 mark_reg_unknown(env, regs, i);
969bf05e 2784
f3709f69
JS
2785 bpf_for_each_spilled_reg(i, state, reg) {
2786 if (!reg)
969bf05e 2787 continue;
de8f3a83
DB
2788 if (reg_is_pkt_pointer_any(reg))
2789 __mark_reg_unknown(reg);
969bf05e
AS
2790 }
2791}
2792
f4d7e40a
AS
2793static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
2794{
2795 struct bpf_verifier_state *vstate = env->cur_state;
2796 int i;
2797
2798 for (i = 0; i <= vstate->curframe; i++)
2799 __clear_all_pkt_pointers(env, vstate->frame[i]);
2800}
2801
fd978bf7 2802static void release_reg_references(struct bpf_verifier_env *env,
1b986589
MKL
2803 struct bpf_func_state *state,
2804 int ref_obj_id)
fd978bf7
JS
2805{
2806 struct bpf_reg_state *regs = state->regs, *reg;
2807 int i;
2808
2809 for (i = 0; i < MAX_BPF_REG; i++)
1b986589 2810 if (regs[i].ref_obj_id == ref_obj_id)
fd978bf7
JS
2811 mark_reg_unknown(env, regs, i);
2812
2813 bpf_for_each_spilled_reg(i, state, reg) {
2814 if (!reg)
2815 continue;
1b986589 2816 if (reg->ref_obj_id == ref_obj_id)
fd978bf7
JS
2817 __mark_reg_unknown(reg);
2818 }
2819}
2820
2821/* The pointer with the specified id has released its reference to kernel
2822 * resources. Identify all copies of the same pointer and clear the reference.
2823 */
2824static int release_reference(struct bpf_verifier_env *env,
1b986589 2825 int ref_obj_id)
fd978bf7
JS
2826{
2827 struct bpf_verifier_state *vstate = env->cur_state;
1b986589 2828 int err;
fd978bf7
JS
2829 int i;
2830
1b986589
MKL
2831 err = release_reference_state(cur_func(env), ref_obj_id);
2832 if (err)
2833 return err;
2834
fd978bf7 2835 for (i = 0; i <= vstate->curframe; i++)
1b986589 2836 release_reg_references(env, vstate->frame[i], ref_obj_id);
fd978bf7 2837
1b986589 2838 return 0;
fd978bf7
JS
2839}
2840
f4d7e40a
AS
2841static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
2842 int *insn_idx)
2843{
2844 struct bpf_verifier_state *state = env->cur_state;
2845 struct bpf_func_state *caller, *callee;
fd978bf7 2846 int i, err, subprog, target_insn;
f4d7e40a 2847
aada9ce6 2848 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
f4d7e40a 2849 verbose(env, "the call stack of %d frames is too deep\n",
aada9ce6 2850 state->curframe + 2);
f4d7e40a
AS
2851 return -E2BIG;
2852 }
2853
2854 target_insn = *insn_idx + insn->imm;
2855 subprog = find_subprog(env, target_insn + 1);
2856 if (subprog < 0) {
2857 verbose(env, "verifier bug. No program starts at insn %d\n",
2858 target_insn + 1);
2859 return -EFAULT;
2860 }
2861
2862 caller = state->frame[state->curframe];
2863 if (state->frame[state->curframe + 1]) {
2864 verbose(env, "verifier bug. Frame %d already allocated\n",
2865 state->curframe + 1);
2866 return -EFAULT;
2867 }
2868
2869 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
2870 if (!callee)
2871 return -ENOMEM;
2872 state->frame[state->curframe + 1] = callee;
2873
2874 /* callee cannot access r0, r6 - r9 for reading and has to write
2875 * into its own stack before reading from it.
2876 * callee can read/write into caller's stack
2877 */
2878 init_func_state(env, callee,
2879 /* remember the callsite, it will be used by bpf_exit */
2880 *insn_idx /* callsite */,
2881 state->curframe + 1 /* frameno within this callchain */,
f910cefa 2882 subprog /* subprog number within this prog */);
f4d7e40a 2883
fd978bf7
JS
2884 /* Transfer references to the callee */
2885 err = transfer_reference_state(callee, caller);
2886 if (err)
2887 return err;
2888
679c782d
EC
2889 /* copy r1 - r5 args that callee can access. The copy includes parent
2890 * pointers, which connects us up to the liveness chain
2891 */
f4d7e40a
AS
2892 for (i = BPF_REG_1; i <= BPF_REG_5; i++)
2893 callee->regs[i] = caller->regs[i];
2894
679c782d 2895 /* after the call registers r0 - r5 were scratched */
f4d7e40a
AS
2896 for (i = 0; i < CALLER_SAVED_REGS; i++) {
2897 mark_reg_not_init(env, caller->regs, caller_saved[i]);
2898 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
2899 }
2900
2901 /* only increment it after check_reg_arg() finished */
2902 state->curframe++;
2903
2904 /* and go analyze first insn of the callee */
2905 *insn_idx = target_insn;
2906
2907 if (env->log.level) {
2908 verbose(env, "caller:\n");
2909 print_verifier_state(env, caller);
2910 verbose(env, "callee:\n");
2911 print_verifier_state(env, callee);
2912 }
2913 return 0;
2914}
2915
2916static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
2917{
2918 struct bpf_verifier_state *state = env->cur_state;
2919 struct bpf_func_state *caller, *callee;
2920 struct bpf_reg_state *r0;
fd978bf7 2921 int err;
f4d7e40a
AS
2922
2923 callee = state->frame[state->curframe];
2924 r0 = &callee->regs[BPF_REG_0];
2925 if (r0->type == PTR_TO_STACK) {
2926 /* technically it's ok to return caller's stack pointer
2927 * (or caller's caller's pointer) back to the caller,
2928 * since these pointers are valid. Only current stack
2929 * pointer will be invalid as soon as function exits,
2930 * but let's be conservative
2931 */
2932 verbose(env, "cannot return stack pointer to the caller\n");
2933 return -EINVAL;
2934 }
2935
2936 state->curframe--;
2937 caller = state->frame[state->curframe];
2938 /* return to the caller whatever r0 had in the callee */
2939 caller->regs[BPF_REG_0] = *r0;
2940
fd978bf7
JS
2941 /* Transfer references to the caller */
2942 err = transfer_reference_state(caller, callee);
2943 if (err)
2944 return err;
2945
f4d7e40a
AS
2946 *insn_idx = callee->callsite + 1;
2947 if (env->log.level) {
2948 verbose(env, "returning from callee:\n");
2949 print_verifier_state(env, callee);
2950 verbose(env, "to caller at %d:\n", *insn_idx);
2951 print_verifier_state(env, caller);
2952 }
2953 /* clear everything in the callee */
2954 free_func_state(callee);
2955 state->frame[state->curframe + 1] = NULL;
2956 return 0;
2957}
2958
849fa506
YS
2959static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
2960 int func_id,
2961 struct bpf_call_arg_meta *meta)
2962{
2963 struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
2964
2965 if (ret_type != RET_INTEGER ||
2966 (func_id != BPF_FUNC_get_stack &&
2967 func_id != BPF_FUNC_probe_read_str))
2968 return;
2969
2970 ret_reg->smax_value = meta->msize_smax_value;
2971 ret_reg->umax_value = meta->msize_umax_value;
2972 __reg_deduce_bounds(ret_reg);
2973 __reg_bound_offset(ret_reg);
2974}
2975
c93552c4
DB
2976static int
2977record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
2978 int func_id, int insn_idx)
2979{
2980 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
2981
2982 if (func_id != BPF_FUNC_tail_call &&
09772d92
DB
2983 func_id != BPF_FUNC_map_lookup_elem &&
2984 func_id != BPF_FUNC_map_update_elem &&
f1a2e44a
MV
2985 func_id != BPF_FUNC_map_delete_elem &&
2986 func_id != BPF_FUNC_map_push_elem &&
2987 func_id != BPF_FUNC_map_pop_elem &&
2988 func_id != BPF_FUNC_map_peek_elem)
c93552c4 2989 return 0;
09772d92 2990
c93552c4
DB
2991 if (meta->map_ptr == NULL) {
2992 verbose(env, "kernel subsystem misconfigured verifier\n");
2993 return -EINVAL;
2994 }
2995
2996 if (!BPF_MAP_PTR(aux->map_state))
2997 bpf_map_ptr_store(aux, meta->map_ptr,
2998 meta->map_ptr->unpriv_array);
2999 else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
3000 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
3001 meta->map_ptr->unpriv_array);
3002 return 0;
3003}
3004
fd978bf7
JS
3005static int check_reference_leak(struct bpf_verifier_env *env)
3006{
3007 struct bpf_func_state *state = cur_func(env);
3008 int i;
3009
3010 for (i = 0; i < state->acquired_refs; i++) {
3011 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
3012 state->refs[i].id, state->refs[i].insn_idx);
3013 }
3014 return state->acquired_refs ? -EINVAL : 0;
3015}
3016
f4d7e40a 3017static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
17a52670 3018{
17a52670 3019 const struct bpf_func_proto *fn = NULL;
638f5b90 3020 struct bpf_reg_state *regs;
33ff9823 3021 struct bpf_call_arg_meta meta;
969bf05e 3022 bool changes_data;
17a52670
AS
3023 int i, err;
3024
3025 /* find function prototype */
3026 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
61bd5218
JK
3027 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
3028 func_id);
17a52670
AS
3029 return -EINVAL;
3030 }
3031
00176a34 3032 if (env->ops->get_func_proto)
5e43f899 3033 fn = env->ops->get_func_proto(func_id, env->prog);
17a52670 3034 if (!fn) {
61bd5218
JK
3035 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
3036 func_id);
17a52670
AS
3037 return -EINVAL;
3038 }
3039
3040 /* eBPF programs must be GPL compatible to use GPL-ed functions */
24701ece 3041 if (!env->prog->gpl_compatible && fn->gpl_only) {
3fe2867c 3042 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
17a52670
AS
3043 return -EINVAL;
3044 }
3045
04514d13 3046 /* With LD_ABS/IND some JITs save/restore skb from r1. */
17bedab2 3047 changes_data = bpf_helper_changes_pkt_data(fn->func);
04514d13
DB
3048 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
3049 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
3050 func_id_name(func_id), func_id);
3051 return -EINVAL;
3052 }
969bf05e 3053
33ff9823 3054 memset(&meta, 0, sizeof(meta));
36bbef52 3055 meta.pkt_access = fn->pkt_access;
33ff9823 3056
1b986589 3057 err = check_func_proto(fn, func_id);
435faee1 3058 if (err) {
61bd5218 3059 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
ebb676da 3060 func_id_name(func_id), func_id);
435faee1
DB
3061 return err;
3062 }
3063
d83525ca 3064 meta.func_id = func_id;
17a52670 3065 /* check args */
33ff9823 3066 err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
17a52670
AS
3067 if (err)
3068 return err;
33ff9823 3069 err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
17a52670
AS
3070 if (err)
3071 return err;
33ff9823 3072 err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
17a52670
AS
3073 if (err)
3074 return err;
33ff9823 3075 err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
17a52670
AS
3076 if (err)
3077 return err;
33ff9823 3078 err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
17a52670
AS
3079 if (err)
3080 return err;
3081
c93552c4
DB
3082 err = record_func_map(env, &meta, func_id, insn_idx);
3083 if (err)
3084 return err;
3085
435faee1
DB
3086 /* Mark slots with STACK_MISC in case of raw mode, stack offset
3087 * is inferred from register state.
3088 */
3089 for (i = 0; i < meta.access_size; i++) {
ca369602
DB
3090 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
3091 BPF_WRITE, -1, false);
435faee1
DB
3092 if (err)
3093 return err;
3094 }
3095
fd978bf7
JS
3096 if (func_id == BPF_FUNC_tail_call) {
3097 err = check_reference_leak(env);
3098 if (err) {
3099 verbose(env, "tail_call would lead to reference leak\n");
3100 return err;
3101 }
3102 } else if (is_release_function(func_id)) {
1b986589 3103 err = release_reference(env, meta.ref_obj_id);
46f8bc92
MKL
3104 if (err) {
3105 verbose(env, "func %s#%d reference has not been acquired before\n",
3106 func_id_name(func_id), func_id);
fd978bf7 3107 return err;
46f8bc92 3108 }
fd978bf7
JS
3109 }
3110
638f5b90 3111 regs = cur_regs(env);
cd339431
RG
3112
3113 /* check that flags argument in get_local_storage(map, flags) is 0,
3114 * this is required because get_local_storage() can't return an error.
3115 */
3116 if (func_id == BPF_FUNC_get_local_storage &&
3117 !register_is_null(&regs[BPF_REG_2])) {
3118 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
3119 return -EINVAL;
3120 }
3121
17a52670 3122 /* reset caller saved regs */
dc503a8a 3123 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 3124 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
3125 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
3126 }
17a52670 3127
dc503a8a 3128 /* update return register (already marked as written above) */
17a52670 3129 if (fn->ret_type == RET_INTEGER) {
f1174f77 3130 /* sets type to SCALAR_VALUE */
61bd5218 3131 mark_reg_unknown(env, regs, BPF_REG_0);
17a52670
AS
3132 } else if (fn->ret_type == RET_VOID) {
3133 regs[BPF_REG_0].type = NOT_INIT;
3e6a4b3e
RG
3134 } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
3135 fn->ret_type == RET_PTR_TO_MAP_VALUE) {
f1174f77 3136 /* There is no offset yet applied, variable or fixed */
61bd5218 3137 mark_reg_known_zero(env, regs, BPF_REG_0);
17a52670
AS
3138 /* remember map_ptr, so that check_map_access()
3139 * can check 'value_size' boundary of memory access
3140 * to map element returned from bpf_map_lookup_elem()
3141 */
33ff9823 3142 if (meta.map_ptr == NULL) {
61bd5218
JK
3143 verbose(env,
3144 "kernel subsystem misconfigured verifier\n");
17a52670
AS
3145 return -EINVAL;
3146 }
33ff9823 3147 regs[BPF_REG_0].map_ptr = meta.map_ptr;
4d31f301
DB
3148 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
3149 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
e16d2f1a
AS
3150 if (map_value_has_spin_lock(meta.map_ptr))
3151 regs[BPF_REG_0].id = ++env->id_gen;
4d31f301
DB
3152 } else {
3153 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
3154 regs[BPF_REG_0].id = ++env->id_gen;
3155 }
c64b7983
JS
3156 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
3157 mark_reg_known_zero(env, regs, BPF_REG_0);
3158 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
46f8bc92
MKL
3159 if (is_acquire_function(func_id)) {
3160 int id = acquire_reference_state(env, insn_idx);
3161
3162 if (id < 0)
3163 return id;
1b986589 3164 /* For mark_ptr_or_null_reg() */
46f8bc92 3165 regs[BPF_REG_0].id = id;
1b986589
MKL
3166 /* For release_reference() */
3167 regs[BPF_REG_0].ref_obj_id = id;
46f8bc92
MKL
3168 } else {
3169 /* For mark_ptr_or_null_reg() */
3170 regs[BPF_REG_0].id = ++env->id_gen;
3171 }
655a51e5
MKL
3172 } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
3173 mark_reg_known_zero(env, regs, BPF_REG_0);
3174 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
3175 regs[BPF_REG_0].id = ++env->id_gen;
17a52670 3176 } else {
61bd5218 3177 verbose(env, "unknown return type %d of func %s#%d\n",
ebb676da 3178 fn->ret_type, func_id_name(func_id), func_id);
17a52670
AS
3179 return -EINVAL;
3180 }
04fd61ab 3181
1b986589
MKL
3182 if (is_ptr_cast_function(func_id))
3183 /* For release_reference() */
3184 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
3185
849fa506
YS
3186 do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
3187
61bd5218 3188 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
35578d79
KX
3189 if (err)
3190 return err;
04fd61ab 3191
c195651e
YS
3192 if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
3193 const char *err_str;
3194
3195#ifdef CONFIG_PERF_EVENTS
3196 err = get_callchain_buffers(sysctl_perf_event_max_stack);
3197 err_str = "cannot get callchain buffer for func %s#%d\n";
3198#else
3199 err = -ENOTSUPP;
3200 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
3201#endif
3202 if (err) {
3203 verbose(env, err_str, func_id_name(func_id), func_id);
3204 return err;
3205 }
3206
3207 env->prog->has_callchain_buf = true;
3208 }
3209
969bf05e
AS
3210 if (changes_data)
3211 clear_all_pkt_pointers(env);
3212 return 0;
3213}
3214
b03c9f9f
EC
3215static bool signed_add_overflows(s64 a, s64 b)
3216{
3217 /* Do the add in u64, where overflow is well-defined */
3218 s64 res = (s64)((u64)a + (u64)b);
3219
3220 if (b < 0)
3221 return res > a;
3222 return res < a;
3223}
3224
3225static bool signed_sub_overflows(s64 a, s64 b)
3226{
3227 /* Do the sub in u64, where overflow is well-defined */
3228 s64 res = (s64)((u64)a - (u64)b);
3229
3230 if (b < 0)
3231 return res < a;
3232 return res > a;
969bf05e
AS
3233}
3234
bb7f0f98
AS
3235static bool check_reg_sane_offset(struct bpf_verifier_env *env,
3236 const struct bpf_reg_state *reg,
3237 enum bpf_reg_type type)
3238{
3239 bool known = tnum_is_const(reg->var_off);
3240 s64 val = reg->var_off.value;
3241 s64 smin = reg->smin_value;
3242
3243 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
3244 verbose(env, "math between %s pointer and %lld is not allowed\n",
3245 reg_type_str[type], val);
3246 return false;
3247 }
3248
3249 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
3250 verbose(env, "%s pointer offset %d is not allowed\n",
3251 reg_type_str[type], reg->off);
3252 return false;
3253 }
3254
3255 if (smin == S64_MIN) {
3256 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
3257 reg_type_str[type]);
3258 return false;
3259 }
3260
3261 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
3262 verbose(env, "value %lld makes %s pointer be out of bounds\n",
3263 smin, reg_type_str[type]);
3264 return false;
3265 }
3266
3267 return true;
3268}
3269
979d63d5
DB
3270static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
3271{
3272 return &env->insn_aux_data[env->insn_idx];
3273}
3274
3275static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
3276 u32 *ptr_limit, u8 opcode, bool off_is_neg)
3277{
3278 bool mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
3279 (opcode == BPF_SUB && !off_is_neg);
3280 u32 off;
3281
3282 switch (ptr_reg->type) {
3283 case PTR_TO_STACK:
3284 off = ptr_reg->off + ptr_reg->var_off.value;
3285 if (mask_to_left)
3286 *ptr_limit = MAX_BPF_STACK + off;
3287 else
3288 *ptr_limit = -off;
3289 return 0;
3290 case PTR_TO_MAP_VALUE:
3291 if (mask_to_left) {
3292 *ptr_limit = ptr_reg->umax_value + ptr_reg->off;
3293 } else {
3294 off = ptr_reg->smin_value + ptr_reg->off;
3295 *ptr_limit = ptr_reg->map_ptr->value_size - off;
3296 }
3297 return 0;
3298 default:
3299 return -EINVAL;
3300 }
3301}
3302
d3bd7413
DB
3303static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
3304 const struct bpf_insn *insn)
3305{
3306 return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
3307}
3308
3309static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
3310 u32 alu_state, u32 alu_limit)
3311{
3312 /* If we arrived here from different branches with different
3313 * state or limits to sanitize, then this won't work.
3314 */
3315 if (aux->alu_state &&
3316 (aux->alu_state != alu_state ||
3317 aux->alu_limit != alu_limit))
3318 return -EACCES;
3319
3320 /* Corresponding fixup done in fixup_bpf_calls(). */
3321 aux->alu_state = alu_state;
3322 aux->alu_limit = alu_limit;
3323 return 0;
3324}
3325
3326static int sanitize_val_alu(struct bpf_verifier_env *env,
3327 struct bpf_insn *insn)
3328{
3329 struct bpf_insn_aux_data *aux = cur_aux(env);
3330
3331 if (can_skip_alu_sanitation(env, insn))
3332 return 0;
3333
3334 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
3335}
3336
979d63d5
DB
3337static int sanitize_ptr_alu(struct bpf_verifier_env *env,
3338 struct bpf_insn *insn,
3339 const struct bpf_reg_state *ptr_reg,
3340 struct bpf_reg_state *dst_reg,
3341 bool off_is_neg)
3342{
3343 struct bpf_verifier_state *vstate = env->cur_state;
3344 struct bpf_insn_aux_data *aux = cur_aux(env);
3345 bool ptr_is_dst_reg = ptr_reg == dst_reg;
3346 u8 opcode = BPF_OP(insn->code);
3347 u32 alu_state, alu_limit;
3348 struct bpf_reg_state tmp;
3349 bool ret;
3350
d3bd7413 3351 if (can_skip_alu_sanitation(env, insn))
979d63d5
DB
3352 return 0;
3353
3354 /* We already marked aux for masking from non-speculative
3355 * paths, thus we got here in the first place. We only care
3356 * to explore bad access from here.
3357 */
3358 if (vstate->speculative)
3359 goto do_sim;
3360
3361 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
3362 alu_state |= ptr_is_dst_reg ?
3363 BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
3364
3365 if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
3366 return 0;
d3bd7413 3367 if (update_alu_sanitation_state(aux, alu_state, alu_limit))
979d63d5 3368 return -EACCES;
979d63d5
DB
3369do_sim:
3370 /* Simulate and find potential out-of-bounds access under
3371 * speculative execution from truncation as a result of
3372 * masking when off was not within expected range. If off
3373 * sits in dst, then we temporarily need to move ptr there
3374 * to simulate dst (== 0) +/-= ptr. Needed, for example,
3375 * for cases where we use K-based arithmetic in one direction
3376 * and truncated reg-based in the other in order to explore
3377 * bad access.
3378 */
3379 if (!ptr_is_dst_reg) {
3380 tmp = *dst_reg;
3381 *dst_reg = *ptr_reg;
3382 }
3383 ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
0803278b 3384 if (!ptr_is_dst_reg && ret)
979d63d5
DB
3385 *dst_reg = tmp;
3386 return !ret ? -EFAULT : 0;
3387}
3388
f1174f77 3389/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
f1174f77
EC
3390 * Caller should also handle BPF_MOV case separately.
3391 * If we return -EACCES, caller may want to try again treating pointer as a
3392 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
3393 */
3394static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
3395 struct bpf_insn *insn,
3396 const struct bpf_reg_state *ptr_reg,
3397 const struct bpf_reg_state *off_reg)
969bf05e 3398{
f4d7e40a
AS
3399 struct bpf_verifier_state *vstate = env->cur_state;
3400 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3401 struct bpf_reg_state *regs = state->regs, *dst_reg;
f1174f77 3402 bool known = tnum_is_const(off_reg->var_off);
b03c9f9f
EC
3403 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
3404 smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
3405 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
3406 umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
9d7eceed 3407 u32 dst = insn->dst_reg, src = insn->src_reg;
969bf05e 3408 u8 opcode = BPF_OP(insn->code);
979d63d5 3409 int ret;
969bf05e 3410
f1174f77 3411 dst_reg = &regs[dst];
969bf05e 3412
6f16101e
DB
3413 if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
3414 smin_val > smax_val || umin_val > umax_val) {
3415 /* Taint dst register if offset had invalid bounds derived from
3416 * e.g. dead branches.
3417 */
3418 __mark_reg_unknown(dst_reg);
3419 return 0;
f1174f77
EC
3420 }
3421
3422 if (BPF_CLASS(insn->code) != BPF_ALU64) {
3423 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
82abbf8d
AS
3424 verbose(env,
3425 "R%d 32-bit pointer arithmetic prohibited\n",
3426 dst);
f1174f77 3427 return -EACCES;
969bf05e
AS
3428 }
3429
aad2eeaf
JS
3430 switch (ptr_reg->type) {
3431 case PTR_TO_MAP_VALUE_OR_NULL:
3432 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
3433 dst, reg_type_str[ptr_reg->type]);
f1174f77 3434 return -EACCES;
aad2eeaf
JS
3435 case CONST_PTR_TO_MAP:
3436 case PTR_TO_PACKET_END:
c64b7983
JS
3437 case PTR_TO_SOCKET:
3438 case PTR_TO_SOCKET_OR_NULL:
46f8bc92
MKL
3439 case PTR_TO_SOCK_COMMON:
3440 case PTR_TO_SOCK_COMMON_OR_NULL:
655a51e5
MKL
3441 case PTR_TO_TCP_SOCK:
3442 case PTR_TO_TCP_SOCK_OR_NULL:
aad2eeaf
JS
3443 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
3444 dst, reg_type_str[ptr_reg->type]);
f1174f77 3445 return -EACCES;
9d7eceed
DB
3446 case PTR_TO_MAP_VALUE:
3447 if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
3448 verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
3449 off_reg == dst_reg ? dst : src);
3450 return -EACCES;
3451 }
3452 /* fall-through */
aad2eeaf
JS
3453 default:
3454 break;
f1174f77
EC
3455 }
3456
3457 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
3458 * The id may be overwritten later if we create a new variable offset.
969bf05e 3459 */
f1174f77
EC
3460 dst_reg->type = ptr_reg->type;
3461 dst_reg->id = ptr_reg->id;
969bf05e 3462
bb7f0f98
AS
3463 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
3464 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
3465 return -EINVAL;
3466
f1174f77
EC
3467 switch (opcode) {
3468 case BPF_ADD:
979d63d5
DB
3469 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
3470 if (ret < 0) {
3471 verbose(env, "R%d tried to add from different maps or paths\n", dst);
3472 return ret;
3473 }
f1174f77
EC
3474 /* We can take a fixed offset as long as it doesn't overflow
3475 * the s32 'off' field
969bf05e 3476 */
b03c9f9f
EC
3477 if (known && (ptr_reg->off + smin_val ==
3478 (s64)(s32)(ptr_reg->off + smin_val))) {
f1174f77 3479 /* pointer += K. Accumulate it into fixed offset */
b03c9f9f
EC
3480 dst_reg->smin_value = smin_ptr;
3481 dst_reg->smax_value = smax_ptr;
3482 dst_reg->umin_value = umin_ptr;
3483 dst_reg->umax_value = umax_ptr;
f1174f77 3484 dst_reg->var_off = ptr_reg->var_off;
b03c9f9f 3485 dst_reg->off = ptr_reg->off + smin_val;
0962590e 3486 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
3487 break;
3488 }
f1174f77
EC
3489 /* A new variable offset is created. Note that off_reg->off
3490 * == 0, since it's a scalar.
3491 * dst_reg gets the pointer type and since some positive
3492 * integer value was added to the pointer, give it a new 'id'
3493 * if it's a PTR_TO_PACKET.
3494 * this creates a new 'base' pointer, off_reg (variable) gets
3495 * added into the variable offset, and we copy the fixed offset
3496 * from ptr_reg.
969bf05e 3497 */
b03c9f9f
EC
3498 if (signed_add_overflows(smin_ptr, smin_val) ||
3499 signed_add_overflows(smax_ptr, smax_val)) {
3500 dst_reg->smin_value = S64_MIN;
3501 dst_reg->smax_value = S64_MAX;
3502 } else {
3503 dst_reg->smin_value = smin_ptr + smin_val;
3504 dst_reg->smax_value = smax_ptr + smax_val;
3505 }
3506 if (umin_ptr + umin_val < umin_ptr ||
3507 umax_ptr + umax_val < umax_ptr) {
3508 dst_reg->umin_value = 0;
3509 dst_reg->umax_value = U64_MAX;
3510 } else {
3511 dst_reg->umin_value = umin_ptr + umin_val;
3512 dst_reg->umax_value = umax_ptr + umax_val;
3513 }
f1174f77
EC
3514 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
3515 dst_reg->off = ptr_reg->off;
0962590e 3516 dst_reg->raw = ptr_reg->raw;
de8f3a83 3517 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
3518 dst_reg->id = ++env->id_gen;
3519 /* something was added to pkt_ptr, set range to zero */
0962590e 3520 dst_reg->raw = 0;
f1174f77
EC
3521 }
3522 break;
3523 case BPF_SUB:
979d63d5
DB
3524 ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
3525 if (ret < 0) {
3526 verbose(env, "R%d tried to sub from different maps or paths\n", dst);
3527 return ret;
3528 }
f1174f77
EC
3529 if (dst_reg == off_reg) {
3530 /* scalar -= pointer. Creates an unknown scalar */
82abbf8d
AS
3531 verbose(env, "R%d tried to subtract pointer from scalar\n",
3532 dst);
f1174f77
EC
3533 return -EACCES;
3534 }
3535 /* We don't allow subtraction from FP, because (according to
3536 * test_verifier.c test "invalid fp arithmetic", JITs might not
3537 * be able to deal with it.
969bf05e 3538 */
f1174f77 3539 if (ptr_reg->type == PTR_TO_STACK) {
82abbf8d
AS
3540 verbose(env, "R%d subtraction from stack pointer prohibited\n",
3541 dst);
f1174f77
EC
3542 return -EACCES;
3543 }
b03c9f9f
EC
3544 if (known && (ptr_reg->off - smin_val ==
3545 (s64)(s32)(ptr_reg->off - smin_val))) {
f1174f77 3546 /* pointer -= K. Subtract it from fixed offset */
b03c9f9f
EC
3547 dst_reg->smin_value = smin_ptr;
3548 dst_reg->smax_value = smax_ptr;
3549 dst_reg->umin_value = umin_ptr;
3550 dst_reg->umax_value = umax_ptr;
f1174f77
EC
3551 dst_reg->var_off = ptr_reg->var_off;
3552 dst_reg->id = ptr_reg->id;
b03c9f9f 3553 dst_reg->off = ptr_reg->off - smin_val;
0962590e 3554 dst_reg->raw = ptr_reg->raw;
f1174f77
EC
3555 break;
3556 }
f1174f77
EC
3557 /* A new variable offset is created. If the subtrahend is known
3558 * nonnegative, then any reg->range we had before is still good.
969bf05e 3559 */
b03c9f9f
EC
3560 if (signed_sub_overflows(smin_ptr, smax_val) ||
3561 signed_sub_overflows(smax_ptr, smin_val)) {
3562 /* Overflow possible, we know nothing */
3563 dst_reg->smin_value = S64_MIN;
3564 dst_reg->smax_value = S64_MAX;
3565 } else {
3566 dst_reg->smin_value = smin_ptr - smax_val;
3567 dst_reg->smax_value = smax_ptr - smin_val;
3568 }
3569 if (umin_ptr < umax_val) {
3570 /* Overflow possible, we know nothing */
3571 dst_reg->umin_value = 0;
3572 dst_reg->umax_value = U64_MAX;
3573 } else {
3574 /* Cannot overflow (as long as bounds are consistent) */
3575 dst_reg->umin_value = umin_ptr - umax_val;
3576 dst_reg->umax_value = umax_ptr - umin_val;
3577 }
f1174f77
EC
3578 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
3579 dst_reg->off = ptr_reg->off;
0962590e 3580 dst_reg->raw = ptr_reg->raw;
de8f3a83 3581 if (reg_is_pkt_pointer(ptr_reg)) {
f1174f77
EC
3582 dst_reg->id = ++env->id_gen;
3583 /* something was added to pkt_ptr, set range to zero */
b03c9f9f 3584 if (smin_val < 0)
0962590e 3585 dst_reg->raw = 0;
43188702 3586 }
f1174f77
EC
3587 break;
3588 case BPF_AND:
3589 case BPF_OR:
3590 case BPF_XOR:
82abbf8d
AS
3591 /* bitwise ops on pointers are troublesome, prohibit. */
3592 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
3593 dst, bpf_alu_string[opcode >> 4]);
f1174f77
EC
3594 return -EACCES;
3595 default:
3596 /* other operators (e.g. MUL,LSH) produce non-pointer results */
82abbf8d
AS
3597 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
3598 dst, bpf_alu_string[opcode >> 4]);
f1174f77 3599 return -EACCES;
43188702
JF
3600 }
3601
bb7f0f98
AS
3602 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
3603 return -EINVAL;
3604
b03c9f9f
EC
3605 __update_reg_bounds(dst_reg);
3606 __reg_deduce_bounds(dst_reg);
3607 __reg_bound_offset(dst_reg);
0d6303db
DB
3608
3609 /* For unprivileged we require that resulting offset must be in bounds
3610 * in order to be able to sanitize access later on.
3611 */
e4298d25
DB
3612 if (!env->allow_ptr_leaks) {
3613 if (dst_reg->type == PTR_TO_MAP_VALUE &&
3614 check_map_access(env, dst, dst_reg->off, 1, false)) {
3615 verbose(env, "R%d pointer arithmetic of map value goes out of range, "
3616 "prohibited for !root\n", dst);
3617 return -EACCES;
3618 } else if (dst_reg->type == PTR_TO_STACK &&
3619 check_stack_access(env, dst_reg, dst_reg->off +
3620 dst_reg->var_off.value, 1)) {
3621 verbose(env, "R%d stack pointer arithmetic goes out of range, "
3622 "prohibited for !root\n", dst);
3623 return -EACCES;
3624 }
0d6303db
DB
3625 }
3626
43188702
JF
3627 return 0;
3628}
3629
468f6eaf
JH
3630/* WARNING: This function does calculations on 64-bit values, but the actual
3631 * execution may occur on 32-bit values. Therefore, things like bitshifts
3632 * need extra checks in the 32-bit case.
3633 */
f1174f77
EC
3634static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
3635 struct bpf_insn *insn,
3636 struct bpf_reg_state *dst_reg,
3637 struct bpf_reg_state src_reg)
969bf05e 3638{
638f5b90 3639 struct bpf_reg_state *regs = cur_regs(env);
48461135 3640 u8 opcode = BPF_OP(insn->code);
f1174f77 3641 bool src_known, dst_known;
b03c9f9f
EC
3642 s64 smin_val, smax_val;
3643 u64 umin_val, umax_val;
468f6eaf 3644 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
d3bd7413
DB
3645 u32 dst = insn->dst_reg;
3646 int ret;
48461135 3647
b799207e
JH
3648 if (insn_bitness == 32) {
3649 /* Relevant for 32-bit RSH: Information can propagate towards
3650 * LSB, so it isn't sufficient to only truncate the output to
3651 * 32 bits.
3652 */
3653 coerce_reg_to_size(dst_reg, 4);
3654 coerce_reg_to_size(&src_reg, 4);
3655 }
3656
b03c9f9f
EC
3657 smin_val = src_reg.smin_value;
3658 smax_val = src_reg.smax_value;
3659 umin_val = src_reg.umin_value;
3660 umax_val = src_reg.umax_value;
f1174f77
EC
3661 src_known = tnum_is_const(src_reg.var_off);
3662 dst_known = tnum_is_const(dst_reg->var_off);
f23cc643 3663
6f16101e
DB
3664 if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
3665 smin_val > smax_val || umin_val > umax_val) {
3666 /* Taint dst register if offset had invalid bounds derived from
3667 * e.g. dead branches.
3668 */
3669 __mark_reg_unknown(dst_reg);
3670 return 0;
3671 }
3672
bb7f0f98
AS
3673 if (!src_known &&
3674 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
3675 __mark_reg_unknown(dst_reg);
3676 return 0;
3677 }
3678
48461135
JB
3679 switch (opcode) {
3680 case BPF_ADD:
d3bd7413
DB
3681 ret = sanitize_val_alu(env, insn);
3682 if (ret < 0) {
3683 verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
3684 return ret;
3685 }
b03c9f9f
EC
3686 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
3687 signed_add_overflows(dst_reg->smax_value, smax_val)) {
3688 dst_reg->smin_value = S64_MIN;
3689 dst_reg->smax_value = S64_MAX;
3690 } else {
3691 dst_reg->smin_value += smin_val;
3692 dst_reg->smax_value += smax_val;
3693 }
3694 if (dst_reg->umin_value + umin_val < umin_val ||
3695 dst_reg->umax_value + umax_val < umax_val) {
3696 dst_reg->umin_value = 0;
3697 dst_reg->umax_value = U64_MAX;
3698 } else {
3699 dst_reg->umin_value += umin_val;
3700 dst_reg->umax_value += umax_val;
3701 }
f1174f77 3702 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
48461135
JB
3703 break;
3704 case BPF_SUB:
d3bd7413
DB
3705 ret = sanitize_val_alu(env, insn);
3706 if (ret < 0) {
3707 verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
3708 return ret;
3709 }
b03c9f9f
EC
3710 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
3711 signed_sub_overflows(dst_reg->smax_value, smin_val)) {
3712 /* Overflow possible, we know nothing */
3713 dst_reg->smin_value = S64_MIN;
3714 dst_reg->smax_value = S64_MAX;
3715 } else {
3716 dst_reg->smin_value -= smax_val;
3717 dst_reg->smax_value -= smin_val;
3718 }
3719 if (dst_reg->umin_value < umax_val) {
3720 /* Overflow possible, we know nothing */
3721 dst_reg->umin_value = 0;
3722 dst_reg->umax_value = U64_MAX;
3723 } else {
3724 /* Cannot overflow (as long as bounds are consistent) */
3725 dst_reg->umin_value -= umax_val;
3726 dst_reg->umax_value -= umin_val;
3727 }
f1174f77 3728 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
48461135
JB
3729 break;
3730 case BPF_MUL:
b03c9f9f
EC
3731 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
3732 if (smin_val < 0 || dst_reg->smin_value < 0) {
f1174f77 3733 /* Ain't nobody got time to multiply that sign */
b03c9f9f
EC
3734 __mark_reg_unbounded(dst_reg);
3735 __update_reg_bounds(dst_reg);
f1174f77
EC
3736 break;
3737 }
b03c9f9f
EC
3738 /* Both values are positive, so we can work with unsigned and
3739 * copy the result to signed (unless it exceeds S64_MAX).
f1174f77 3740 */
b03c9f9f
EC
3741 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
3742 /* Potential overflow, we know nothing */
3743 __mark_reg_unbounded(dst_reg);
3744 /* (except what we can learn from the var_off) */
3745 __update_reg_bounds(dst_reg);
3746 break;
3747 }
3748 dst_reg->umin_value *= umin_val;
3749 dst_reg->umax_value *= umax_val;
3750 if (dst_reg->umax_value > S64_MAX) {
3751 /* Overflow possible, we know nothing */
3752 dst_reg->smin_value = S64_MIN;
3753 dst_reg->smax_value = S64_MAX;
3754 } else {
3755 dst_reg->smin_value = dst_reg->umin_value;
3756 dst_reg->smax_value = dst_reg->umax_value;
3757 }
48461135
JB
3758 break;
3759 case BPF_AND:
f1174f77 3760 if (src_known && dst_known) {
b03c9f9f
EC
3761 __mark_reg_known(dst_reg, dst_reg->var_off.value &
3762 src_reg.var_off.value);
f1174f77
EC
3763 break;
3764 }
b03c9f9f
EC
3765 /* We get our minimum from the var_off, since that's inherently
3766 * bitwise. Our maximum is the minimum of the operands' maxima.
f23cc643 3767 */
f1174f77 3768 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
b03c9f9f
EC
3769 dst_reg->umin_value = dst_reg->var_off.value;
3770 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
3771 if (dst_reg->smin_value < 0 || smin_val < 0) {
3772 /* Lose signed bounds when ANDing negative numbers,
3773 * ain't nobody got time for that.
3774 */
3775 dst_reg->smin_value = S64_MIN;
3776 dst_reg->smax_value = S64_MAX;
3777 } else {
3778 /* ANDing two positives gives a positive, so safe to
3779 * cast result into s64.
3780 */
3781 dst_reg->smin_value = dst_reg->umin_value;
3782 dst_reg->smax_value = dst_reg->umax_value;
3783 }
3784 /* We may learn something more from the var_off */
3785 __update_reg_bounds(dst_reg);
f1174f77
EC
3786 break;
3787 case BPF_OR:
3788 if (src_known && dst_known) {
b03c9f9f
EC
3789 __mark_reg_known(dst_reg, dst_reg->var_off.value |
3790 src_reg.var_off.value);
f1174f77
EC
3791 break;
3792 }
b03c9f9f
EC
3793 /* We get our maximum from the var_off, and our minimum is the
3794 * maximum of the operands' minima
f1174f77
EC
3795 */
3796 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
b03c9f9f
EC
3797 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
3798 dst_reg->umax_value = dst_reg->var_off.value |
3799 dst_reg->var_off.mask;
3800 if (dst_reg->smin_value < 0 || smin_val < 0) {
3801 /* Lose signed bounds when ORing negative numbers,
3802 * ain't nobody got time for that.
3803 */
3804 dst_reg->smin_value = S64_MIN;
3805 dst_reg->smax_value = S64_MAX;
f1174f77 3806 } else {
b03c9f9f
EC
3807 /* ORing two positives gives a positive, so safe to
3808 * cast result into s64.
3809 */
3810 dst_reg->smin_value = dst_reg->umin_value;
3811 dst_reg->smax_value = dst_reg->umax_value;
f1174f77 3812 }
b03c9f9f
EC
3813 /* We may learn something more from the var_off */
3814 __update_reg_bounds(dst_reg);
48461135
JB
3815 break;
3816 case BPF_LSH:
468f6eaf
JH
3817 if (umax_val >= insn_bitness) {
3818 /* Shifts greater than 31 or 63 are undefined.
3819 * This includes shifts by a negative number.
b03c9f9f 3820 */
61bd5218 3821 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
3822 break;
3823 }
b03c9f9f
EC
3824 /* We lose all sign bit information (except what we can pick
3825 * up from var_off)
48461135 3826 */
b03c9f9f
EC
3827 dst_reg->smin_value = S64_MIN;
3828 dst_reg->smax_value = S64_MAX;
3829 /* If we might shift our top bit out, then we know nothing */
3830 if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
3831 dst_reg->umin_value = 0;
3832 dst_reg->umax_value = U64_MAX;
d1174416 3833 } else {
b03c9f9f
EC
3834 dst_reg->umin_value <<= umin_val;
3835 dst_reg->umax_value <<= umax_val;
d1174416 3836 }
afbe1a5b 3837 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
b03c9f9f
EC
3838 /* We may learn something more from the var_off */
3839 __update_reg_bounds(dst_reg);
48461135
JB
3840 break;
3841 case BPF_RSH:
468f6eaf
JH
3842 if (umax_val >= insn_bitness) {
3843 /* Shifts greater than 31 or 63 are undefined.
3844 * This includes shifts by a negative number.
b03c9f9f 3845 */
61bd5218 3846 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77
EC
3847 break;
3848 }
4374f256
EC
3849 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
3850 * be negative, then either:
3851 * 1) src_reg might be zero, so the sign bit of the result is
3852 * unknown, so we lose our signed bounds
3853 * 2) it's known negative, thus the unsigned bounds capture the
3854 * signed bounds
3855 * 3) the signed bounds cross zero, so they tell us nothing
3856 * about the result
3857 * If the value in dst_reg is known nonnegative, then again the
3858 * unsigned bounts capture the signed bounds.
3859 * Thus, in all cases it suffices to blow away our signed bounds
3860 * and rely on inferring new ones from the unsigned bounds and
3861 * var_off of the result.
3862 */
3863 dst_reg->smin_value = S64_MIN;
3864 dst_reg->smax_value = S64_MAX;
afbe1a5b 3865 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
b03c9f9f
EC
3866 dst_reg->umin_value >>= umax_val;
3867 dst_reg->umax_value >>= umin_val;
3868 /* We may learn something more from the var_off */
3869 __update_reg_bounds(dst_reg);
48461135 3870 break;
9cbe1f5a
YS
3871 case BPF_ARSH:
3872 if (umax_val >= insn_bitness) {
3873 /* Shifts greater than 31 or 63 are undefined.
3874 * This includes shifts by a negative number.
3875 */
3876 mark_reg_unknown(env, regs, insn->dst_reg);
3877 break;
3878 }
3879
3880 /* Upon reaching here, src_known is true and
3881 * umax_val is equal to umin_val.
3882 */
3883 dst_reg->smin_value >>= umin_val;
3884 dst_reg->smax_value >>= umin_val;
3885 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
3886
3887 /* blow away the dst_reg umin_value/umax_value and rely on
3888 * dst_reg var_off to refine the result.
3889 */
3890 dst_reg->umin_value = 0;
3891 dst_reg->umax_value = U64_MAX;
3892 __update_reg_bounds(dst_reg);
3893 break;
48461135 3894 default:
61bd5218 3895 mark_reg_unknown(env, regs, insn->dst_reg);
48461135
JB
3896 break;
3897 }
3898
468f6eaf
JH
3899 if (BPF_CLASS(insn->code) != BPF_ALU64) {
3900 /* 32-bit ALU ops are (32,32)->32 */
3901 coerce_reg_to_size(dst_reg, 4);
468f6eaf
JH
3902 }
3903
b03c9f9f
EC
3904 __reg_deduce_bounds(dst_reg);
3905 __reg_bound_offset(dst_reg);
f1174f77
EC
3906 return 0;
3907}
3908
3909/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
3910 * and var_off.
3911 */
3912static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
3913 struct bpf_insn *insn)
3914{
f4d7e40a
AS
3915 struct bpf_verifier_state *vstate = env->cur_state;
3916 struct bpf_func_state *state = vstate->frame[vstate->curframe];
3917 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
f1174f77
EC
3918 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
3919 u8 opcode = BPF_OP(insn->code);
f1174f77
EC
3920
3921 dst_reg = &regs[insn->dst_reg];
f1174f77
EC
3922 src_reg = NULL;
3923 if (dst_reg->type != SCALAR_VALUE)
3924 ptr_reg = dst_reg;
3925 if (BPF_SRC(insn->code) == BPF_X) {
3926 src_reg = &regs[insn->src_reg];
f1174f77
EC
3927 if (src_reg->type != SCALAR_VALUE) {
3928 if (dst_reg->type != SCALAR_VALUE) {
3929 /* Combining two pointers by any ALU op yields
82abbf8d
AS
3930 * an arbitrary scalar. Disallow all math except
3931 * pointer subtraction
f1174f77 3932 */
dd066823 3933 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
82abbf8d
AS
3934 mark_reg_unknown(env, regs, insn->dst_reg);
3935 return 0;
f1174f77 3936 }
82abbf8d
AS
3937 verbose(env, "R%d pointer %s pointer prohibited\n",
3938 insn->dst_reg,
3939 bpf_alu_string[opcode >> 4]);
3940 return -EACCES;
f1174f77
EC
3941 } else {
3942 /* scalar += pointer
3943 * This is legal, but we have to reverse our
3944 * src/dest handling in computing the range
3945 */
82abbf8d
AS
3946 return adjust_ptr_min_max_vals(env, insn,
3947 src_reg, dst_reg);
f1174f77
EC
3948 }
3949 } else if (ptr_reg) {
3950 /* pointer += scalar */
82abbf8d
AS
3951 return adjust_ptr_min_max_vals(env, insn,
3952 dst_reg, src_reg);
f1174f77
EC
3953 }
3954 } else {
3955 /* Pretend the src is a reg with a known value, since we only
3956 * need to be able to read from this state.
3957 */
3958 off_reg.type = SCALAR_VALUE;
b03c9f9f 3959 __mark_reg_known(&off_reg, insn->imm);
f1174f77 3960 src_reg = &off_reg;
82abbf8d
AS
3961 if (ptr_reg) /* pointer += K */
3962 return adjust_ptr_min_max_vals(env, insn,
3963 ptr_reg, src_reg);
f1174f77
EC
3964 }
3965
3966 /* Got here implies adding two SCALAR_VALUEs */
3967 if (WARN_ON_ONCE(ptr_reg)) {
f4d7e40a 3968 print_verifier_state(env, state);
61bd5218 3969 verbose(env, "verifier internal error: unexpected ptr_reg\n");
f1174f77
EC
3970 return -EINVAL;
3971 }
3972 if (WARN_ON(!src_reg)) {
f4d7e40a 3973 print_verifier_state(env, state);
61bd5218 3974 verbose(env, "verifier internal error: no src_reg\n");
f1174f77
EC
3975 return -EINVAL;
3976 }
3977 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
48461135
JB
3978}
3979
17a52670 3980/* check validity of 32-bit and 64-bit arithmetic operations */
58e2af8b 3981static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 3982{
638f5b90 3983 struct bpf_reg_state *regs = cur_regs(env);
17a52670
AS
3984 u8 opcode = BPF_OP(insn->code);
3985 int err;
3986
3987 if (opcode == BPF_END || opcode == BPF_NEG) {
3988 if (opcode == BPF_NEG) {
3989 if (BPF_SRC(insn->code) != 0 ||
3990 insn->src_reg != BPF_REG_0 ||
3991 insn->off != 0 || insn->imm != 0) {
61bd5218 3992 verbose(env, "BPF_NEG uses reserved fields\n");
17a52670
AS
3993 return -EINVAL;
3994 }
3995 } else {
3996 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
e67b8a68
EC
3997 (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
3998 BPF_CLASS(insn->code) == BPF_ALU64) {
61bd5218 3999 verbose(env, "BPF_END uses reserved fields\n");
17a52670
AS
4000 return -EINVAL;
4001 }
4002 }
4003
4004 /* check src operand */
dc503a8a 4005 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
4006 if (err)
4007 return err;
4008
1be7f75d 4009 if (is_pointer_value(env, insn->dst_reg)) {
61bd5218 4010 verbose(env, "R%d pointer arithmetic prohibited\n",
1be7f75d
AS
4011 insn->dst_reg);
4012 return -EACCES;
4013 }
4014
17a52670 4015 /* check dest operand */
dc503a8a 4016 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
4017 if (err)
4018 return err;
4019
4020 } else if (opcode == BPF_MOV) {
4021
4022 if (BPF_SRC(insn->code) == BPF_X) {
4023 if (insn->imm != 0 || insn->off != 0) {
61bd5218 4024 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
4025 return -EINVAL;
4026 }
4027
4028 /* check src operand */
dc503a8a 4029 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
4030 if (err)
4031 return err;
4032 } else {
4033 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 4034 verbose(env, "BPF_MOV uses reserved fields\n");
17a52670
AS
4035 return -EINVAL;
4036 }
4037 }
4038
fbeb1603
AF
4039 /* check dest operand, mark as required later */
4040 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
4041 if (err)
4042 return err;
4043
4044 if (BPF_SRC(insn->code) == BPF_X) {
e434b8cd
JW
4045 struct bpf_reg_state *src_reg = regs + insn->src_reg;
4046 struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
4047
17a52670
AS
4048 if (BPF_CLASS(insn->code) == BPF_ALU64) {
4049 /* case: R1 = R2
4050 * copy register state to dest reg
4051 */
e434b8cd
JW
4052 *dst_reg = *src_reg;
4053 dst_reg->live |= REG_LIVE_WRITTEN;
17a52670 4054 } else {
f1174f77 4055 /* R1 = (u32) R2 */
1be7f75d 4056 if (is_pointer_value(env, insn->src_reg)) {
61bd5218
JK
4057 verbose(env,
4058 "R%d partial copy of pointer\n",
1be7f75d
AS
4059 insn->src_reg);
4060 return -EACCES;
e434b8cd
JW
4061 } else if (src_reg->type == SCALAR_VALUE) {
4062 *dst_reg = *src_reg;
4063 dst_reg->live |= REG_LIVE_WRITTEN;
4064 } else {
4065 mark_reg_unknown(env, regs,
4066 insn->dst_reg);
1be7f75d 4067 }
e434b8cd 4068 coerce_reg_to_size(dst_reg, 4);
17a52670
AS
4069 }
4070 } else {
4071 /* case: R = imm
4072 * remember the value we stored into this reg
4073 */
fbeb1603
AF
4074 /* clear any state __mark_reg_known doesn't set */
4075 mark_reg_unknown(env, regs, insn->dst_reg);
f1174f77 4076 regs[insn->dst_reg].type = SCALAR_VALUE;
95a762e2
JH
4077 if (BPF_CLASS(insn->code) == BPF_ALU64) {
4078 __mark_reg_known(regs + insn->dst_reg,
4079 insn->imm);
4080 } else {
4081 __mark_reg_known(regs + insn->dst_reg,
4082 (u32)insn->imm);
4083 }
17a52670
AS
4084 }
4085
4086 } else if (opcode > BPF_END) {
61bd5218 4087 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
17a52670
AS
4088 return -EINVAL;
4089
4090 } else { /* all other ALU ops: and, sub, xor, add, ... */
4091
17a52670
AS
4092 if (BPF_SRC(insn->code) == BPF_X) {
4093 if (insn->imm != 0 || insn->off != 0) {
61bd5218 4094 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
4095 return -EINVAL;
4096 }
4097 /* check src1 operand */
dc503a8a 4098 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
4099 if (err)
4100 return err;
4101 } else {
4102 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
61bd5218 4103 verbose(env, "BPF_ALU uses reserved fields\n");
17a52670
AS
4104 return -EINVAL;
4105 }
4106 }
4107
4108 /* check src2 operand */
dc503a8a 4109 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
4110 if (err)
4111 return err;
4112
4113 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
4114 BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
61bd5218 4115 verbose(env, "div by zero\n");
17a52670
AS
4116 return -EINVAL;
4117 }
4118
229394e8
RV
4119 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
4120 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
4121 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
4122
4123 if (insn->imm < 0 || insn->imm >= size) {
61bd5218 4124 verbose(env, "invalid shift %d\n", insn->imm);
229394e8
RV
4125 return -EINVAL;
4126 }
4127 }
4128
1a0dc1ac 4129 /* check dest operand */
dc503a8a 4130 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
1a0dc1ac
AS
4131 if (err)
4132 return err;
4133
f1174f77 4134 return adjust_reg_min_max_vals(env, insn);
17a52670
AS
4135 }
4136
4137 return 0;
4138}
4139
f4d7e40a 4140static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
de8f3a83 4141 struct bpf_reg_state *dst_reg,
f8ddadc4 4142 enum bpf_reg_type type,
fb2a311a 4143 bool range_right_open)
969bf05e 4144{
f4d7e40a 4145 struct bpf_func_state *state = vstate->frame[vstate->curframe];
58e2af8b 4146 struct bpf_reg_state *regs = state->regs, *reg;
fb2a311a 4147 u16 new_range;
f4d7e40a 4148 int i, j;
2d2be8ca 4149
fb2a311a
DB
4150 if (dst_reg->off < 0 ||
4151 (dst_reg->off == 0 && range_right_open))
f1174f77
EC
4152 /* This doesn't give us any range */
4153 return;
4154
b03c9f9f
EC
4155 if (dst_reg->umax_value > MAX_PACKET_OFF ||
4156 dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
f1174f77
EC
4157 /* Risk of overflow. For instance, ptr + (1<<63) may be less
4158 * than pkt_end, but that's because it's also less than pkt.
4159 */
4160 return;
4161
fb2a311a
DB
4162 new_range = dst_reg->off;
4163 if (range_right_open)
4164 new_range--;
4165
4166 /* Examples for register markings:
2d2be8ca 4167 *
fb2a311a 4168 * pkt_data in dst register:
2d2be8ca
DB
4169 *
4170 * r2 = r3;
4171 * r2 += 8;
4172 * if (r2 > pkt_end) goto <handle exception>
4173 * <access okay>
4174 *
b4e432f1
DB
4175 * r2 = r3;
4176 * r2 += 8;
4177 * if (r2 < pkt_end) goto <access okay>
4178 * <handle exception>
4179 *
2d2be8ca
DB
4180 * Where:
4181 * r2 == dst_reg, pkt_end == src_reg
4182 * r2=pkt(id=n,off=8,r=0)
4183 * r3=pkt(id=n,off=0,r=0)
4184 *
fb2a311a 4185 * pkt_data in src register:
2d2be8ca
DB
4186 *
4187 * r2 = r3;
4188 * r2 += 8;
4189 * if (pkt_end >= r2) goto <access okay>
4190 * <handle exception>
4191 *
b4e432f1
DB
4192 * r2 = r3;
4193 * r2 += 8;
4194 * if (pkt_end <= r2) goto <handle exception>
4195 * <access okay>
4196 *
2d2be8ca
DB
4197 * Where:
4198 * pkt_end == dst_reg, r2 == src_reg
4199 * r2=pkt(id=n,off=8,r=0)
4200 * r3=pkt(id=n,off=0,r=0)
4201 *
4202 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
fb2a311a
DB
4203 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
4204 * and [r3, r3 + 8-1) respectively is safe to access depending on
4205 * the check.
969bf05e 4206 */
2d2be8ca 4207
f1174f77
EC
4208 /* If our ids match, then we must have the same max_value. And we
4209 * don't care about the other reg's fixed offset, since if it's too big
4210 * the range won't allow anything.
4211 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
4212 */
969bf05e 4213 for (i = 0; i < MAX_BPF_REG; i++)
de8f3a83 4214 if (regs[i].type == type && regs[i].id == dst_reg->id)
b1977682 4215 /* keep the maximum range already checked */
fb2a311a 4216 regs[i].range = max(regs[i].range, new_range);
969bf05e 4217
f4d7e40a
AS
4218 for (j = 0; j <= vstate->curframe; j++) {
4219 state = vstate->frame[j];
f3709f69
JS
4220 bpf_for_each_spilled_reg(i, state, reg) {
4221 if (!reg)
f4d7e40a 4222 continue;
f4d7e40a
AS
4223 if (reg->type == type && reg->id == dst_reg->id)
4224 reg->range = max(reg->range, new_range);
4225 }
969bf05e
AS
4226 }
4227}
4228
4f7b3e82
AS
4229/* compute branch direction of the expression "if (reg opcode val) goto target;"
4230 * and return:
4231 * 1 - branch will be taken and "goto target" will be executed
4232 * 0 - branch will not be taken and fall-through to next insn
4233 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
4234 */
092ed096
JW
4235static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
4236 bool is_jmp32)
4f7b3e82 4237{
092ed096 4238 struct bpf_reg_state reg_lo;
a72dafaf
JW
4239 s64 sval;
4240
4f7b3e82
AS
4241 if (__is_pointer_value(false, reg))
4242 return -1;
4243
092ed096
JW
4244 if (is_jmp32) {
4245 reg_lo = *reg;
4246 reg = &reg_lo;
4247 /* For JMP32, only low 32 bits are compared, coerce_reg_to_size
4248 * could truncate high bits and update umin/umax according to
4249 * information of low bits.
4250 */
4251 coerce_reg_to_size(reg, 4);
4252 /* smin/smax need special handling. For example, after coerce,
4253 * if smin_value is 0x00000000ffffffffLL, the value is -1 when
4254 * used as operand to JMP32. It is a negative number from s32's
4255 * point of view, while it is a positive number when seen as
4256 * s64. The smin/smax are kept as s64, therefore, when used with
4257 * JMP32, they need to be transformed into s32, then sign
4258 * extended back to s64.
4259 *
4260 * Also, smin/smax were copied from umin/umax. If umin/umax has
4261 * different sign bit, then min/max relationship doesn't
4262 * maintain after casting into s32, for this case, set smin/smax
4263 * to safest range.
4264 */
4265 if ((reg->umax_value ^ reg->umin_value) &
4266 (1ULL << 31)) {
4267 reg->smin_value = S32_MIN;
4268 reg->smax_value = S32_MAX;
4269 }
4270 reg->smin_value = (s64)(s32)reg->smin_value;
4271 reg->smax_value = (s64)(s32)reg->smax_value;
4272
4273 val = (u32)val;
4274 sval = (s64)(s32)val;
4275 } else {
4276 sval = (s64)val;
4277 }
a72dafaf 4278
4f7b3e82
AS
4279 switch (opcode) {
4280 case BPF_JEQ:
4281 if (tnum_is_const(reg->var_off))
4282 return !!tnum_equals_const(reg->var_off, val);
4283 break;
4284 case BPF_JNE:
4285 if (tnum_is_const(reg->var_off))
4286 return !tnum_equals_const(reg->var_off, val);
4287 break;
960ea056
JK
4288 case BPF_JSET:
4289 if ((~reg->var_off.mask & reg->var_off.value) & val)
4290 return 1;
4291 if (!((reg->var_off.mask | reg->var_off.value) & val))
4292 return 0;
4293 break;
4f7b3e82
AS
4294 case BPF_JGT:
4295 if (reg->umin_value > val)
4296 return 1;
4297 else if (reg->umax_value <= val)
4298 return 0;
4299 break;
4300 case BPF_JSGT:
a72dafaf 4301 if (reg->smin_value > sval)
4f7b3e82 4302 return 1;
a72dafaf 4303 else if (reg->smax_value < sval)
4f7b3e82
AS
4304 return 0;
4305 break;
4306 case BPF_JLT:
4307 if (reg->umax_value < val)
4308 return 1;
4309 else if (reg->umin_value >= val)
4310 return 0;
4311 break;
4312 case BPF_JSLT:
a72dafaf 4313 if (reg->smax_value < sval)
4f7b3e82 4314 return 1;
a72dafaf 4315 else if (reg->smin_value >= sval)
4f7b3e82
AS
4316 return 0;
4317 break;
4318 case BPF_JGE:
4319 if (reg->umin_value >= val)
4320 return 1;
4321 else if (reg->umax_value < val)
4322 return 0;
4323 break;
4324 case BPF_JSGE:
a72dafaf 4325 if (reg->smin_value >= sval)
4f7b3e82 4326 return 1;
a72dafaf 4327 else if (reg->smax_value < sval)
4f7b3e82
AS
4328 return 0;
4329 break;
4330 case BPF_JLE:
4331 if (reg->umax_value <= val)
4332 return 1;
4333 else if (reg->umin_value > val)
4334 return 0;
4335 break;
4336 case BPF_JSLE:
a72dafaf 4337 if (reg->smax_value <= sval)
4f7b3e82 4338 return 1;
a72dafaf 4339 else if (reg->smin_value > sval)
4f7b3e82
AS
4340 return 0;
4341 break;
4342 }
4343
4344 return -1;
4345}
4346
092ed096
JW
4347/* Generate min value of the high 32-bit from TNUM info. */
4348static u64 gen_hi_min(struct tnum var)
4349{
4350 return var.value & ~0xffffffffULL;
4351}
4352
4353/* Generate max value of the high 32-bit from TNUM info. */
4354static u64 gen_hi_max(struct tnum var)
4355{
4356 return (var.value | var.mask) & ~0xffffffffULL;
4357}
4358
4359/* Return true if VAL is compared with a s64 sign extended from s32, and they
4360 * are with the same signedness.
4361 */
4362static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg)
4363{
4364 return ((s32)sval >= 0 &&
4365 reg->smin_value >= 0 && reg->smax_value <= S32_MAX) ||
4366 ((s32)sval < 0 &&
4367 reg->smax_value <= 0 && reg->smin_value >= S32_MIN);
4368}
4369
48461135
JB
4370/* Adjusts the register min/max values in the case that the dst_reg is the
4371 * variable register that we are working on, and src_reg is a constant or we're
4372 * simply doing a BPF_K check.
f1174f77 4373 * In JEQ/JNE cases we also adjust the var_off values.
48461135
JB
4374 */
4375static void reg_set_min_max(struct bpf_reg_state *true_reg,
4376 struct bpf_reg_state *false_reg, u64 val,
092ed096 4377 u8 opcode, bool is_jmp32)
48461135 4378{
a72dafaf
JW
4379 s64 sval;
4380
f1174f77
EC
4381 /* If the dst_reg is a pointer, we can't learn anything about its
4382 * variable offset from the compare (unless src_reg were a pointer into
4383 * the same object, but we don't bother with that.
4384 * Since false_reg and true_reg have the same type by construction, we
4385 * only need to check one of them for pointerness.
4386 */
4387 if (__is_pointer_value(false, false_reg))
4388 return;
4cabc5b1 4389
092ed096
JW
4390 val = is_jmp32 ? (u32)val : val;
4391 sval = is_jmp32 ? (s64)(s32)val : (s64)val;
a72dafaf 4392
48461135
JB
4393 switch (opcode) {
4394 case BPF_JEQ:
48461135 4395 case BPF_JNE:
a72dafaf
JW
4396 {
4397 struct bpf_reg_state *reg =
4398 opcode == BPF_JEQ ? true_reg : false_reg;
4399
4400 /* For BPF_JEQ, if this is false we know nothing Jon Snow, but
4401 * if it is true we know the value for sure. Likewise for
4402 * BPF_JNE.
48461135 4403 */
092ed096
JW
4404 if (is_jmp32) {
4405 u64 old_v = reg->var_off.value;
4406 u64 hi_mask = ~0xffffffffULL;
4407
4408 reg->var_off.value = (old_v & hi_mask) | val;
4409 reg->var_off.mask &= hi_mask;
4410 } else {
4411 __mark_reg_known(reg, val);
4412 }
48461135 4413 break;
a72dafaf 4414 }
960ea056
JK
4415 case BPF_JSET:
4416 false_reg->var_off = tnum_and(false_reg->var_off,
4417 tnum_const(~val));
4418 if (is_power_of_2(val))
4419 true_reg->var_off = tnum_or(true_reg->var_off,
4420 tnum_const(val));
4421 break;
48461135 4422 case BPF_JGE:
a72dafaf
JW
4423 case BPF_JGT:
4424 {
4425 u64 false_umax = opcode == BPF_JGT ? val : val - 1;
4426 u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
4427
092ed096
JW
4428 if (is_jmp32) {
4429 false_umax += gen_hi_max(false_reg->var_off);
4430 true_umin += gen_hi_min(true_reg->var_off);
4431 }
a72dafaf
JW
4432 false_reg->umax_value = min(false_reg->umax_value, false_umax);
4433 true_reg->umin_value = max(true_reg->umin_value, true_umin);
b03c9f9f 4434 break;
a72dafaf 4435 }
48461135 4436 case BPF_JSGE:
a72dafaf
JW
4437 case BPF_JSGT:
4438 {
4439 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
4440 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
4441
092ed096
JW
4442 /* If the full s64 was not sign-extended from s32 then don't
4443 * deduct further info.
4444 */
4445 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4446 break;
a72dafaf
JW
4447 false_reg->smax_value = min(false_reg->smax_value, false_smax);
4448 true_reg->smin_value = max(true_reg->smin_value, true_smin);
48461135 4449 break;
a72dafaf 4450 }
b4e432f1 4451 case BPF_JLE:
a72dafaf
JW
4452 case BPF_JLT:
4453 {
4454 u64 false_umin = opcode == BPF_JLT ? val : val + 1;
4455 u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
4456
092ed096
JW
4457 if (is_jmp32) {
4458 false_umin += gen_hi_min(false_reg->var_off);
4459 true_umax += gen_hi_max(true_reg->var_off);
4460 }
a72dafaf
JW
4461 false_reg->umin_value = max(false_reg->umin_value, false_umin);
4462 true_reg->umax_value = min(true_reg->umax_value, true_umax);
b4e432f1 4463 break;
a72dafaf 4464 }
b4e432f1 4465 case BPF_JSLE:
a72dafaf
JW
4466 case BPF_JSLT:
4467 {
4468 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
4469 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
4470
092ed096
JW
4471 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4472 break;
a72dafaf
JW
4473 false_reg->smin_value = max(false_reg->smin_value, false_smin);
4474 true_reg->smax_value = min(true_reg->smax_value, true_smax);
b4e432f1 4475 break;
a72dafaf 4476 }
48461135
JB
4477 default:
4478 break;
4479 }
4480
b03c9f9f
EC
4481 __reg_deduce_bounds(false_reg);
4482 __reg_deduce_bounds(true_reg);
4483 /* We might have learned some bits from the bounds. */
4484 __reg_bound_offset(false_reg);
4485 __reg_bound_offset(true_reg);
4486 /* Intersecting with the old var_off might have improved our bounds
4487 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4488 * then new var_off is (0; 0x7f...fc) which improves our umax.
4489 */
4490 __update_reg_bounds(false_reg);
4491 __update_reg_bounds(true_reg);
48461135
JB
4492}
4493
f1174f77
EC
4494/* Same as above, but for the case that dst_reg holds a constant and src_reg is
4495 * the variable reg.
48461135
JB
4496 */
4497static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
4498 struct bpf_reg_state *false_reg, u64 val,
092ed096 4499 u8 opcode, bool is_jmp32)
48461135 4500{
a72dafaf
JW
4501 s64 sval;
4502
f1174f77
EC
4503 if (__is_pointer_value(false, false_reg))
4504 return;
4cabc5b1 4505
092ed096
JW
4506 val = is_jmp32 ? (u32)val : val;
4507 sval = is_jmp32 ? (s64)(s32)val : (s64)val;
a72dafaf 4508
48461135
JB
4509 switch (opcode) {
4510 case BPF_JEQ:
48461135 4511 case BPF_JNE:
a72dafaf
JW
4512 {
4513 struct bpf_reg_state *reg =
4514 opcode == BPF_JEQ ? true_reg : false_reg;
4515
092ed096
JW
4516 if (is_jmp32) {
4517 u64 old_v = reg->var_off.value;
4518 u64 hi_mask = ~0xffffffffULL;
4519
4520 reg->var_off.value = (old_v & hi_mask) | val;
4521 reg->var_off.mask &= hi_mask;
4522 } else {
4523 __mark_reg_known(reg, val);
4524 }
48461135 4525 break;
a72dafaf 4526 }
960ea056
JK
4527 case BPF_JSET:
4528 false_reg->var_off = tnum_and(false_reg->var_off,
4529 tnum_const(~val));
4530 if (is_power_of_2(val))
4531 true_reg->var_off = tnum_or(true_reg->var_off,
4532 tnum_const(val));
4533 break;
48461135 4534 case BPF_JGE:
a72dafaf
JW
4535 case BPF_JGT:
4536 {
4537 u64 false_umin = opcode == BPF_JGT ? val : val + 1;
4538 u64 true_umax = opcode == BPF_JGT ? val - 1 : val;
4539
092ed096
JW
4540 if (is_jmp32) {
4541 false_umin += gen_hi_min(false_reg->var_off);
4542 true_umax += gen_hi_max(true_reg->var_off);
4543 }
a72dafaf
JW
4544 false_reg->umin_value = max(false_reg->umin_value, false_umin);
4545 true_reg->umax_value = min(true_reg->umax_value, true_umax);
b03c9f9f 4546 break;
a72dafaf 4547 }
48461135 4548 case BPF_JSGE:
a72dafaf
JW
4549 case BPF_JSGT:
4550 {
4551 s64 false_smin = opcode == BPF_JSGT ? sval : sval + 1;
4552 s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval;
4553
092ed096
JW
4554 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4555 break;
a72dafaf
JW
4556 false_reg->smin_value = max(false_reg->smin_value, false_smin);
4557 true_reg->smax_value = min(true_reg->smax_value, true_smax);
48461135 4558 break;
a72dafaf 4559 }
b4e432f1 4560 case BPF_JLE:
a72dafaf
JW
4561 case BPF_JLT:
4562 {
4563 u64 false_umax = opcode == BPF_JLT ? val : val - 1;
4564 u64 true_umin = opcode == BPF_JLT ? val + 1 : val;
4565
092ed096
JW
4566 if (is_jmp32) {
4567 false_umax += gen_hi_max(false_reg->var_off);
4568 true_umin += gen_hi_min(true_reg->var_off);
4569 }
a72dafaf
JW
4570 false_reg->umax_value = min(false_reg->umax_value, false_umax);
4571 true_reg->umin_value = max(true_reg->umin_value, true_umin);
b4e432f1 4572 break;
a72dafaf 4573 }
b4e432f1 4574 case BPF_JSLE:
a72dafaf
JW
4575 case BPF_JSLT:
4576 {
4577 s64 false_smax = opcode == BPF_JSLT ? sval : sval - 1;
4578 s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval;
4579
092ed096
JW
4580 if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
4581 break;
a72dafaf
JW
4582 false_reg->smax_value = min(false_reg->smax_value, false_smax);
4583 true_reg->smin_value = max(true_reg->smin_value, true_smin);
b4e432f1 4584 break;
a72dafaf 4585 }
48461135
JB
4586 default:
4587 break;
4588 }
4589
b03c9f9f
EC
4590 __reg_deduce_bounds(false_reg);
4591 __reg_deduce_bounds(true_reg);
4592 /* We might have learned some bits from the bounds. */
4593 __reg_bound_offset(false_reg);
4594 __reg_bound_offset(true_reg);
4595 /* Intersecting with the old var_off might have improved our bounds
4596 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4597 * then new var_off is (0; 0x7f...fc) which improves our umax.
4598 */
4599 __update_reg_bounds(false_reg);
4600 __update_reg_bounds(true_reg);
f1174f77
EC
4601}
4602
4603/* Regs are known to be equal, so intersect their min/max/var_off */
4604static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
4605 struct bpf_reg_state *dst_reg)
4606{
b03c9f9f
EC
4607 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
4608 dst_reg->umin_value);
4609 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
4610 dst_reg->umax_value);
4611 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
4612 dst_reg->smin_value);
4613 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
4614 dst_reg->smax_value);
f1174f77
EC
4615 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
4616 dst_reg->var_off);
b03c9f9f
EC
4617 /* We might have learned new bounds from the var_off. */
4618 __update_reg_bounds(src_reg);
4619 __update_reg_bounds(dst_reg);
4620 /* We might have learned something about the sign bit. */
4621 __reg_deduce_bounds(src_reg);
4622 __reg_deduce_bounds(dst_reg);
4623 /* We might have learned some bits from the bounds. */
4624 __reg_bound_offset(src_reg);
4625 __reg_bound_offset(dst_reg);
4626 /* Intersecting with the old var_off might have improved our bounds
4627 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
4628 * then new var_off is (0; 0x7f...fc) which improves our umax.
4629 */
4630 __update_reg_bounds(src_reg);
4631 __update_reg_bounds(dst_reg);
f1174f77
EC
4632}
4633
4634static void reg_combine_min_max(struct bpf_reg_state *true_src,
4635 struct bpf_reg_state *true_dst,
4636 struct bpf_reg_state *false_src,
4637 struct bpf_reg_state *false_dst,
4638 u8 opcode)
4639{
4640 switch (opcode) {
4641 case BPF_JEQ:
4642 __reg_combine_min_max(true_src, true_dst);
4643 break;
4644 case BPF_JNE:
4645 __reg_combine_min_max(false_src, false_dst);
b03c9f9f 4646 break;
4cabc5b1 4647 }
48461135
JB
4648}
4649
fd978bf7
JS
4650static void mark_ptr_or_null_reg(struct bpf_func_state *state,
4651 struct bpf_reg_state *reg, u32 id,
840b9615 4652 bool is_null)
57a09bf0 4653{
840b9615 4654 if (reg_type_may_be_null(reg->type) && reg->id == id) {
f1174f77
EC
4655 /* Old offset (both fixed and variable parts) should
4656 * have been known-zero, because we don't allow pointer
4657 * arithmetic on pointers that might be NULL.
4658 */
b03c9f9f
EC
4659 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
4660 !tnum_equals_const(reg->var_off, 0) ||
f1174f77 4661 reg->off)) {
b03c9f9f
EC
4662 __mark_reg_known_zero(reg);
4663 reg->off = 0;
f1174f77
EC
4664 }
4665 if (is_null) {
4666 reg->type = SCALAR_VALUE;
840b9615
JS
4667 } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
4668 if (reg->map_ptr->inner_map_meta) {
4669 reg->type = CONST_PTR_TO_MAP;
4670 reg->map_ptr = reg->map_ptr->inner_map_meta;
4671 } else {
4672 reg->type = PTR_TO_MAP_VALUE;
4673 }
c64b7983
JS
4674 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
4675 reg->type = PTR_TO_SOCKET;
46f8bc92
MKL
4676 } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
4677 reg->type = PTR_TO_SOCK_COMMON;
655a51e5
MKL
4678 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
4679 reg->type = PTR_TO_TCP_SOCK;
56f668df 4680 }
1b986589
MKL
4681 if (is_null) {
4682 /* We don't need id and ref_obj_id from this point
4683 * onwards anymore, thus we should better reset it,
4684 * so that state pruning has chances to take effect.
4685 */
4686 reg->id = 0;
4687 reg->ref_obj_id = 0;
4688 } else if (!reg_may_point_to_spin_lock(reg)) {
4689 /* For not-NULL ptr, reg->ref_obj_id will be reset
4690 * in release_reg_references().
4691 *
4692 * reg->id is still used by spin_lock ptr. Other
4693 * than spin_lock ptr type, reg->id can be reset.
fd978bf7
JS
4694 */
4695 reg->id = 0;
56f668df 4696 }
57a09bf0
TG
4697 }
4698}
4699
4700/* The logic is similar to find_good_pkt_pointers(), both could eventually
4701 * be folded together at some point.
4702 */
840b9615
JS
4703static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
4704 bool is_null)
57a09bf0 4705{
f4d7e40a 4706 struct bpf_func_state *state = vstate->frame[vstate->curframe];
f3709f69 4707 struct bpf_reg_state *reg, *regs = state->regs;
1b986589 4708 u32 ref_obj_id = regs[regno].ref_obj_id;
a08dd0da 4709 u32 id = regs[regno].id;
f4d7e40a 4710 int i, j;
57a09bf0 4711
1b986589
MKL
4712 if (ref_obj_id && ref_obj_id == id && is_null)
4713 /* regs[regno] is in the " == NULL" branch.
4714 * No one could have freed the reference state before
4715 * doing the NULL check.
4716 */
4717 WARN_ON_ONCE(release_reference_state(state, id));
fd978bf7 4718
57a09bf0 4719 for (i = 0; i < MAX_BPF_REG; i++)
fd978bf7 4720 mark_ptr_or_null_reg(state, &regs[i], id, is_null);
57a09bf0 4721
f4d7e40a
AS
4722 for (j = 0; j <= vstate->curframe; j++) {
4723 state = vstate->frame[j];
f3709f69
JS
4724 bpf_for_each_spilled_reg(i, state, reg) {
4725 if (!reg)
f4d7e40a 4726 continue;
fd978bf7 4727 mark_ptr_or_null_reg(state, reg, id, is_null);
f4d7e40a 4728 }
57a09bf0
TG
4729 }
4730}
4731
5beca081
DB
4732static bool try_match_pkt_pointers(const struct bpf_insn *insn,
4733 struct bpf_reg_state *dst_reg,
4734 struct bpf_reg_state *src_reg,
4735 struct bpf_verifier_state *this_branch,
4736 struct bpf_verifier_state *other_branch)
4737{
4738 if (BPF_SRC(insn->code) != BPF_X)
4739 return false;
4740
092ed096
JW
4741 /* Pointers are always 64-bit. */
4742 if (BPF_CLASS(insn->code) == BPF_JMP32)
4743 return false;
4744
5beca081
DB
4745 switch (BPF_OP(insn->code)) {
4746 case BPF_JGT:
4747 if ((dst_reg->type == PTR_TO_PACKET &&
4748 src_reg->type == PTR_TO_PACKET_END) ||
4749 (dst_reg->type == PTR_TO_PACKET_META &&
4750 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4751 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
4752 find_good_pkt_pointers(this_branch, dst_reg,
4753 dst_reg->type, false);
4754 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4755 src_reg->type == PTR_TO_PACKET) ||
4756 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4757 src_reg->type == PTR_TO_PACKET_META)) {
4758 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
4759 find_good_pkt_pointers(other_branch, src_reg,
4760 src_reg->type, true);
4761 } else {
4762 return false;
4763 }
4764 break;
4765 case BPF_JLT:
4766 if ((dst_reg->type == PTR_TO_PACKET &&
4767 src_reg->type == PTR_TO_PACKET_END) ||
4768 (dst_reg->type == PTR_TO_PACKET_META &&
4769 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4770 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
4771 find_good_pkt_pointers(other_branch, dst_reg,
4772 dst_reg->type, true);
4773 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4774 src_reg->type == PTR_TO_PACKET) ||
4775 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4776 src_reg->type == PTR_TO_PACKET_META)) {
4777 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
4778 find_good_pkt_pointers(this_branch, src_reg,
4779 src_reg->type, false);
4780 } else {
4781 return false;
4782 }
4783 break;
4784 case BPF_JGE:
4785 if ((dst_reg->type == PTR_TO_PACKET &&
4786 src_reg->type == PTR_TO_PACKET_END) ||
4787 (dst_reg->type == PTR_TO_PACKET_META &&
4788 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4789 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
4790 find_good_pkt_pointers(this_branch, dst_reg,
4791 dst_reg->type, true);
4792 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4793 src_reg->type == PTR_TO_PACKET) ||
4794 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4795 src_reg->type == PTR_TO_PACKET_META)) {
4796 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
4797 find_good_pkt_pointers(other_branch, src_reg,
4798 src_reg->type, false);
4799 } else {
4800 return false;
4801 }
4802 break;
4803 case BPF_JLE:
4804 if ((dst_reg->type == PTR_TO_PACKET &&
4805 src_reg->type == PTR_TO_PACKET_END) ||
4806 (dst_reg->type == PTR_TO_PACKET_META &&
4807 reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
4808 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
4809 find_good_pkt_pointers(other_branch, dst_reg,
4810 dst_reg->type, false);
4811 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
4812 src_reg->type == PTR_TO_PACKET) ||
4813 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
4814 src_reg->type == PTR_TO_PACKET_META)) {
4815 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
4816 find_good_pkt_pointers(this_branch, src_reg,
4817 src_reg->type, true);
4818 } else {
4819 return false;
4820 }
4821 break;
4822 default:
4823 return false;
4824 }
4825
4826 return true;
4827}
4828
58e2af8b 4829static int check_cond_jmp_op(struct bpf_verifier_env *env,
17a52670
AS
4830 struct bpf_insn *insn, int *insn_idx)
4831{
f4d7e40a
AS
4832 struct bpf_verifier_state *this_branch = env->cur_state;
4833 struct bpf_verifier_state *other_branch;
4834 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
4835 struct bpf_reg_state *dst_reg, *other_branch_regs;
17a52670 4836 u8 opcode = BPF_OP(insn->code);
092ed096 4837 bool is_jmp32;
17a52670
AS
4838 int err;
4839
092ed096
JW
4840 /* Only conditional jumps are expected to reach here. */
4841 if (opcode == BPF_JA || opcode > BPF_JSLE) {
4842 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
17a52670
AS
4843 return -EINVAL;
4844 }
4845
4846 if (BPF_SRC(insn->code) == BPF_X) {
4847 if (insn->imm != 0) {
092ed096 4848 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17a52670
AS
4849 return -EINVAL;
4850 }
4851
4852 /* check src1 operand */
dc503a8a 4853 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
4854 if (err)
4855 return err;
1be7f75d
AS
4856
4857 if (is_pointer_value(env, insn->src_reg)) {
61bd5218 4858 verbose(env, "R%d pointer comparison prohibited\n",
1be7f75d
AS
4859 insn->src_reg);
4860 return -EACCES;
4861 }
17a52670
AS
4862 } else {
4863 if (insn->src_reg != BPF_REG_0) {
092ed096 4864 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
17a52670
AS
4865 return -EINVAL;
4866 }
4867 }
4868
4869 /* check src2 operand */
dc503a8a 4870 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
4871 if (err)
4872 return err;
4873
1a0dc1ac 4874 dst_reg = &regs[insn->dst_reg];
092ed096 4875 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
1a0dc1ac 4876
4f7b3e82 4877 if (BPF_SRC(insn->code) == BPF_K) {
092ed096
JW
4878 int pred = is_branch_taken(dst_reg, insn->imm, opcode,
4879 is_jmp32);
4f7b3e82
AS
4880
4881 if (pred == 1) {
4882 /* only follow the goto, ignore fall-through */
17a52670
AS
4883 *insn_idx += insn->off;
4884 return 0;
4f7b3e82
AS
4885 } else if (pred == 0) {
4886 /* only follow fall-through branch, since
17a52670
AS
4887 * that's where the program will go
4888 */
4889 return 0;
4890 }
4891 }
4892
979d63d5
DB
4893 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
4894 false);
17a52670
AS
4895 if (!other_branch)
4896 return -EFAULT;
f4d7e40a 4897 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
17a52670 4898
48461135
JB
4899 /* detect if we are comparing against a constant value so we can adjust
4900 * our min/max values for our dst register.
f1174f77
EC
4901 * this is only legit if both are scalars (or pointers to the same
4902 * object, I suppose, but we don't support that right now), because
4903 * otherwise the different base pointers mean the offsets aren't
4904 * comparable.
48461135
JB
4905 */
4906 if (BPF_SRC(insn->code) == BPF_X) {
092ed096
JW
4907 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
4908 struct bpf_reg_state lo_reg0 = *dst_reg;
4909 struct bpf_reg_state lo_reg1 = *src_reg;
4910 struct bpf_reg_state *src_lo, *dst_lo;
4911
4912 dst_lo = &lo_reg0;
4913 src_lo = &lo_reg1;
4914 coerce_reg_to_size(dst_lo, 4);
4915 coerce_reg_to_size(src_lo, 4);
4916
f1174f77 4917 if (dst_reg->type == SCALAR_VALUE &&
092ed096
JW
4918 src_reg->type == SCALAR_VALUE) {
4919 if (tnum_is_const(src_reg->var_off) ||
4920 (is_jmp32 && tnum_is_const(src_lo->var_off)))
f4d7e40a 4921 reg_set_min_max(&other_branch_regs[insn->dst_reg],
092ed096
JW
4922 dst_reg,
4923 is_jmp32
4924 ? src_lo->var_off.value
4925 : src_reg->var_off.value,
4926 opcode, is_jmp32);
4927 else if (tnum_is_const(dst_reg->var_off) ||
4928 (is_jmp32 && tnum_is_const(dst_lo->var_off)))
f4d7e40a 4929 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
092ed096
JW
4930 src_reg,
4931 is_jmp32
4932 ? dst_lo->var_off.value
4933 : dst_reg->var_off.value,
4934 opcode, is_jmp32);
4935 else if (!is_jmp32 &&
4936 (opcode == BPF_JEQ || opcode == BPF_JNE))
f1174f77 4937 /* Comparing for equality, we can combine knowledge */
f4d7e40a
AS
4938 reg_combine_min_max(&other_branch_regs[insn->src_reg],
4939 &other_branch_regs[insn->dst_reg],
092ed096 4940 src_reg, dst_reg, opcode);
f1174f77
EC
4941 }
4942 } else if (dst_reg->type == SCALAR_VALUE) {
f4d7e40a 4943 reg_set_min_max(&other_branch_regs[insn->dst_reg],
092ed096 4944 dst_reg, insn->imm, opcode, is_jmp32);
48461135
JB
4945 }
4946
092ed096
JW
4947 /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
4948 * NOTE: these optimizations below are related with pointer comparison
4949 * which will never be JMP32.
4950 */
4951 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
1a0dc1ac 4952 insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
840b9615
JS
4953 reg_type_may_be_null(dst_reg->type)) {
4954 /* Mark all identical registers in each branch as either
57a09bf0
TG
4955 * safe or unknown depending R == 0 or R != 0 conditional.
4956 */
840b9615
JS
4957 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
4958 opcode == BPF_JNE);
4959 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
4960 opcode == BPF_JEQ);
5beca081
DB
4961 } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
4962 this_branch, other_branch) &&
4963 is_pointer_value(env, insn->dst_reg)) {
61bd5218
JK
4964 verbose(env, "R%d pointer comparison prohibited\n",
4965 insn->dst_reg);
1be7f75d 4966 return -EACCES;
17a52670 4967 }
61bd5218 4968 if (env->log.level)
f4d7e40a 4969 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
17a52670
AS
4970 return 0;
4971}
4972
0246e64d
AS
4973/* return the map pointer stored inside BPF_LD_IMM64 instruction */
4974static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
4975{
4976 u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
4977
4978 return (struct bpf_map *) (unsigned long) imm64;
4979}
4980
17a52670 4981/* verify BPF_LD_IMM64 instruction */
58e2af8b 4982static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
17a52670 4983{
638f5b90 4984 struct bpf_reg_state *regs = cur_regs(env);
17a52670
AS
4985 int err;
4986
4987 if (BPF_SIZE(insn->code) != BPF_DW) {
61bd5218 4988 verbose(env, "invalid BPF_LD_IMM insn\n");
17a52670
AS
4989 return -EINVAL;
4990 }
4991 if (insn->off != 0) {
61bd5218 4992 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
17a52670
AS
4993 return -EINVAL;
4994 }
4995
dc503a8a 4996 err = check_reg_arg(env, insn->dst_reg, DST_OP);
17a52670
AS
4997 if (err)
4998 return err;
4999
6b173873 5000 if (insn->src_reg == 0) {
6b173873
JK
5001 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
5002
f1174f77 5003 regs[insn->dst_reg].type = SCALAR_VALUE;
b03c9f9f 5004 __mark_reg_known(&regs[insn->dst_reg], imm);
17a52670 5005 return 0;
6b173873 5006 }
17a52670
AS
5007
5008 /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
5009 BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
5010
5011 regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
5012 regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
5013 return 0;
5014}
5015
96be4325
DB
5016static bool may_access_skb(enum bpf_prog_type type)
5017{
5018 switch (type) {
5019 case BPF_PROG_TYPE_SOCKET_FILTER:
5020 case BPF_PROG_TYPE_SCHED_CLS:
94caee8c 5021 case BPF_PROG_TYPE_SCHED_ACT:
96be4325
DB
5022 return true;
5023 default:
5024 return false;
5025 }
5026}
5027
ddd872bc
AS
5028/* verify safety of LD_ABS|LD_IND instructions:
5029 * - they can only appear in the programs where ctx == skb
5030 * - since they are wrappers of function calls, they scratch R1-R5 registers,
5031 * preserve R6-R9, and store return value into R0
5032 *
5033 * Implicit input:
5034 * ctx == skb == R6 == CTX
5035 *
5036 * Explicit input:
5037 * SRC == any register
5038 * IMM == 32-bit immediate
5039 *
5040 * Output:
5041 * R0 - 8/16/32-bit skb data converted to cpu endianness
5042 */
58e2af8b 5043static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
ddd872bc 5044{
638f5b90 5045 struct bpf_reg_state *regs = cur_regs(env);
ddd872bc 5046 u8 mode = BPF_MODE(insn->code);
ddd872bc
AS
5047 int i, err;
5048
24701ece 5049 if (!may_access_skb(env->prog->type)) {
61bd5218 5050 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
ddd872bc
AS
5051 return -EINVAL;
5052 }
5053
e0cea7ce
DB
5054 if (!env->ops->gen_ld_abs) {
5055 verbose(env, "bpf verifier is misconfigured\n");
5056 return -EINVAL;
5057 }
5058
f910cefa 5059 if (env->subprog_cnt > 1) {
f4d7e40a
AS
5060 /* when program has LD_ABS insn JITs and interpreter assume
5061 * that r1 == ctx == skb which is not the case for callees
5062 * that can have arbitrary arguments. It's problematic
5063 * for main prog as well since JITs would need to analyze
5064 * all functions in order to make proper register save/restore
5065 * decisions in the main prog. Hence disallow LD_ABS with calls
5066 */
5067 verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
5068 return -EINVAL;
5069 }
5070
ddd872bc 5071 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
d82bccc6 5072 BPF_SIZE(insn->code) == BPF_DW ||
ddd872bc 5073 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
61bd5218 5074 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
ddd872bc
AS
5075 return -EINVAL;
5076 }
5077
5078 /* check whether implicit source operand (register R6) is readable */
dc503a8a 5079 err = check_reg_arg(env, BPF_REG_6, SRC_OP);
ddd872bc
AS
5080 if (err)
5081 return err;
5082
fd978bf7
JS
5083 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
5084 * gen_ld_abs() may terminate the program at runtime, leading to
5085 * reference leak.
5086 */
5087 err = check_reference_leak(env);
5088 if (err) {
5089 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
5090 return err;
5091 }
5092
d83525ca
AS
5093 if (env->cur_state->active_spin_lock) {
5094 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
5095 return -EINVAL;
5096 }
5097
ddd872bc 5098 if (regs[BPF_REG_6].type != PTR_TO_CTX) {
61bd5218
JK
5099 verbose(env,
5100 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
ddd872bc
AS
5101 return -EINVAL;
5102 }
5103
5104 if (mode == BPF_IND) {
5105 /* check explicit source operand */
dc503a8a 5106 err = check_reg_arg(env, insn->src_reg, SRC_OP);
ddd872bc
AS
5107 if (err)
5108 return err;
5109 }
5110
5111 /* reset caller saved regs to unreadable */
dc503a8a 5112 for (i = 0; i < CALLER_SAVED_REGS; i++) {
61bd5218 5113 mark_reg_not_init(env, regs, caller_saved[i]);
dc503a8a
EC
5114 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5115 }
ddd872bc
AS
5116
5117 /* mark destination R0 register as readable, since it contains
dc503a8a
EC
5118 * the value fetched from the packet.
5119 * Already marked as written above.
ddd872bc 5120 */
61bd5218 5121 mark_reg_unknown(env, regs, BPF_REG_0);
ddd872bc
AS
5122 return 0;
5123}
5124
390ee7e2
AS
5125static int check_return_code(struct bpf_verifier_env *env)
5126{
5127 struct bpf_reg_state *reg;
5128 struct tnum range = tnum_range(0, 1);
5129
5130 switch (env->prog->type) {
5131 case BPF_PROG_TYPE_CGROUP_SKB:
5132 case BPF_PROG_TYPE_CGROUP_SOCK:
4fbac77d 5133 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
390ee7e2 5134 case BPF_PROG_TYPE_SOCK_OPS:
ebc614f6 5135 case BPF_PROG_TYPE_CGROUP_DEVICE:
390ee7e2
AS
5136 break;
5137 default:
5138 return 0;
5139 }
5140
638f5b90 5141 reg = cur_regs(env) + BPF_REG_0;
390ee7e2 5142 if (reg->type != SCALAR_VALUE) {
61bd5218 5143 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
390ee7e2
AS
5144 reg_type_str[reg->type]);
5145 return -EINVAL;
5146 }
5147
5148 if (!tnum_in(range, reg->var_off)) {
61bd5218 5149 verbose(env, "At program exit the register R0 ");
390ee7e2
AS
5150 if (!tnum_is_unknown(reg->var_off)) {
5151 char tn_buf[48];
5152
5153 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
61bd5218 5154 verbose(env, "has value %s", tn_buf);
390ee7e2 5155 } else {
61bd5218 5156 verbose(env, "has unknown scalar value");
390ee7e2 5157 }
61bd5218 5158 verbose(env, " should have been 0 or 1\n");
390ee7e2
AS
5159 return -EINVAL;
5160 }
5161 return 0;
5162}
5163
475fb78f
AS
5164/* non-recursive DFS pseudo code
5165 * 1 procedure DFS-iterative(G,v):
5166 * 2 label v as discovered
5167 * 3 let S be a stack
5168 * 4 S.push(v)
5169 * 5 while S is not empty
5170 * 6 t <- S.pop()
5171 * 7 if t is what we're looking for:
5172 * 8 return t
5173 * 9 for all edges e in G.adjacentEdges(t) do
5174 * 10 if edge e is already labelled
5175 * 11 continue with the next edge
5176 * 12 w <- G.adjacentVertex(t,e)
5177 * 13 if vertex w is not discovered and not explored
5178 * 14 label e as tree-edge
5179 * 15 label w as discovered
5180 * 16 S.push(w)
5181 * 17 continue at 5
5182 * 18 else if vertex w is discovered
5183 * 19 label e as back-edge
5184 * 20 else
5185 * 21 // vertex w is explored
5186 * 22 label e as forward- or cross-edge
5187 * 23 label t as explored
5188 * 24 S.pop()
5189 *
5190 * convention:
5191 * 0x10 - discovered
5192 * 0x11 - discovered and fall-through edge labelled
5193 * 0x12 - discovered and fall-through and branch edges labelled
5194 * 0x20 - explored
5195 */
5196
5197enum {
5198 DISCOVERED = 0x10,
5199 EXPLORED = 0x20,
5200 FALLTHROUGH = 1,
5201 BRANCH = 2,
5202};
5203
58e2af8b 5204#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
f1bca824 5205
475fb78f
AS
5206static int *insn_stack; /* stack of insns to process */
5207static int cur_stack; /* current stack index */
5208static int *insn_state;
5209
5210/* t, w, e - match pseudo-code above:
5211 * t - index of current instruction
5212 * w - next instruction
5213 * e - edge
5214 */
58e2af8b 5215static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
475fb78f
AS
5216{
5217 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
5218 return 0;
5219
5220 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
5221 return 0;
5222
5223 if (w < 0 || w >= env->prog->len) {
d9762e84 5224 verbose_linfo(env, t, "%d: ", t);
61bd5218 5225 verbose(env, "jump out of range from insn %d to %d\n", t, w);
475fb78f
AS
5226 return -EINVAL;
5227 }
5228
f1bca824
AS
5229 if (e == BRANCH)
5230 /* mark branch target for state pruning */
5231 env->explored_states[w] = STATE_LIST_MARK;
5232
475fb78f
AS
5233 if (insn_state[w] == 0) {
5234 /* tree-edge */
5235 insn_state[t] = DISCOVERED | e;
5236 insn_state[w] = DISCOVERED;
5237 if (cur_stack >= env->prog->len)
5238 return -E2BIG;
5239 insn_stack[cur_stack++] = w;
5240 return 1;
5241 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
d9762e84
MKL
5242 verbose_linfo(env, t, "%d: ", t);
5243 verbose_linfo(env, w, "%d: ", w);
61bd5218 5244 verbose(env, "back-edge from insn %d to %d\n", t, w);
475fb78f
AS
5245 return -EINVAL;
5246 } else if (insn_state[w] == EXPLORED) {
5247 /* forward- or cross-edge */
5248 insn_state[t] = DISCOVERED | e;
5249 } else {
61bd5218 5250 verbose(env, "insn state internal bug\n");
475fb78f
AS
5251 return -EFAULT;
5252 }
5253 return 0;
5254}
5255
5256/* non-recursive depth-first-search to detect loops in BPF program
5257 * loop == back-edge in directed graph
5258 */
58e2af8b 5259static int check_cfg(struct bpf_verifier_env *env)
475fb78f
AS
5260{
5261 struct bpf_insn *insns = env->prog->insnsi;
5262 int insn_cnt = env->prog->len;
5263 int ret = 0;
5264 int i, t;
5265
5266 insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
5267 if (!insn_state)
5268 return -ENOMEM;
5269
5270 insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
5271 if (!insn_stack) {
5272 kfree(insn_state);
5273 return -ENOMEM;
5274 }
5275
5276 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
5277 insn_stack[0] = 0; /* 0 is the first instruction */
5278 cur_stack = 1;
5279
5280peek_stack:
5281 if (cur_stack == 0)
5282 goto check_state;
5283 t = insn_stack[cur_stack - 1];
5284
092ed096
JW
5285 if (BPF_CLASS(insns[t].code) == BPF_JMP ||
5286 BPF_CLASS(insns[t].code) == BPF_JMP32) {
475fb78f
AS
5287 u8 opcode = BPF_OP(insns[t].code);
5288
5289 if (opcode == BPF_EXIT) {
5290 goto mark_explored;
5291 } else if (opcode == BPF_CALL) {
5292 ret = push_insn(t, t + 1, FALLTHROUGH, env);
5293 if (ret == 1)
5294 goto peek_stack;
5295 else if (ret < 0)
5296 goto err_free;
07016151
DB
5297 if (t + 1 < insn_cnt)
5298 env->explored_states[t + 1] = STATE_LIST_MARK;
cc8b0b92
AS
5299 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
5300 env->explored_states[t] = STATE_LIST_MARK;
5301 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
5302 if (ret == 1)
5303 goto peek_stack;
5304 else if (ret < 0)
5305 goto err_free;
5306 }
475fb78f
AS
5307 } else if (opcode == BPF_JA) {
5308 if (BPF_SRC(insns[t].code) != BPF_K) {
5309 ret = -EINVAL;
5310 goto err_free;
5311 }
5312 /* unconditional jump with single edge */
5313 ret = push_insn(t, t + insns[t].off + 1,
5314 FALLTHROUGH, env);
5315 if (ret == 1)
5316 goto peek_stack;
5317 else if (ret < 0)
5318 goto err_free;
f1bca824
AS
5319 /* tell verifier to check for equivalent states
5320 * after every call and jump
5321 */
c3de6317
AS
5322 if (t + 1 < insn_cnt)
5323 env->explored_states[t + 1] = STATE_LIST_MARK;
475fb78f
AS
5324 } else {
5325 /* conditional jump with two edges */
3c2ce60b 5326 env->explored_states[t] = STATE_LIST_MARK;
475fb78f
AS
5327 ret = push_insn(t, t + 1, FALLTHROUGH, env);
5328 if (ret == 1)
5329 goto peek_stack;
5330 else if (ret < 0)
5331 goto err_free;
5332
5333 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
5334 if (ret == 1)
5335 goto peek_stack;
5336 else if (ret < 0)
5337 goto err_free;
5338 }
5339 } else {
5340 /* all other non-branch instructions with single
5341 * fall-through edge
5342 */
5343 ret = push_insn(t, t + 1, FALLTHROUGH, env);
5344 if (ret == 1)
5345 goto peek_stack;
5346 else if (ret < 0)
5347 goto err_free;
5348 }
5349
5350mark_explored:
5351 insn_state[t] = EXPLORED;
5352 if (cur_stack-- <= 0) {
61bd5218 5353 verbose(env, "pop stack internal bug\n");
475fb78f
AS
5354 ret = -EFAULT;
5355 goto err_free;
5356 }
5357 goto peek_stack;
5358
5359check_state:
5360 for (i = 0; i < insn_cnt; i++) {
5361 if (insn_state[i] != EXPLORED) {
61bd5218 5362 verbose(env, "unreachable insn %d\n", i);
475fb78f
AS
5363 ret = -EINVAL;
5364 goto err_free;
5365 }
5366 }
5367 ret = 0; /* cfg looks good */
5368
5369err_free:
5370 kfree(insn_state);
5371 kfree(insn_stack);
5372 return ret;
5373}
5374
838e9690
YS
5375/* The minimum supported BTF func info size */
5376#define MIN_BPF_FUNCINFO_SIZE 8
5377#define MAX_FUNCINFO_REC_SIZE 252
5378
c454a46b
MKL
5379static int check_btf_func(struct bpf_verifier_env *env,
5380 const union bpf_attr *attr,
5381 union bpf_attr __user *uattr)
838e9690 5382{
d0b2818e 5383 u32 i, nfuncs, urec_size, min_size;
838e9690 5384 u32 krec_size = sizeof(struct bpf_func_info);
c454a46b 5385 struct bpf_func_info *krecord;
838e9690 5386 const struct btf_type *type;
c454a46b
MKL
5387 struct bpf_prog *prog;
5388 const struct btf *btf;
838e9690 5389 void __user *urecord;
d0b2818e 5390 u32 prev_offset = 0;
838e9690
YS
5391 int ret = 0;
5392
5393 nfuncs = attr->func_info_cnt;
5394 if (!nfuncs)
5395 return 0;
5396
5397 if (nfuncs != env->subprog_cnt) {
5398 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
5399 return -EINVAL;
5400 }
5401
5402 urec_size = attr->func_info_rec_size;
5403 if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
5404 urec_size > MAX_FUNCINFO_REC_SIZE ||
5405 urec_size % sizeof(u32)) {
5406 verbose(env, "invalid func info rec size %u\n", urec_size);
5407 return -EINVAL;
5408 }
5409
c454a46b
MKL
5410 prog = env->prog;
5411 btf = prog->aux->btf;
838e9690
YS
5412
5413 urecord = u64_to_user_ptr(attr->func_info);
5414 min_size = min_t(u32, krec_size, urec_size);
5415
ba64e7d8 5416 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
c454a46b
MKL
5417 if (!krecord)
5418 return -ENOMEM;
ba64e7d8 5419
838e9690
YS
5420 for (i = 0; i < nfuncs; i++) {
5421 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
5422 if (ret) {
5423 if (ret == -E2BIG) {
5424 verbose(env, "nonzero tailing record in func info");
5425 /* set the size kernel expects so loader can zero
5426 * out the rest of the record.
5427 */
5428 if (put_user(min_size, &uattr->func_info_rec_size))
5429 ret = -EFAULT;
5430 }
c454a46b 5431 goto err_free;
838e9690
YS
5432 }
5433
ba64e7d8 5434 if (copy_from_user(&krecord[i], urecord, min_size)) {
838e9690 5435 ret = -EFAULT;
c454a46b 5436 goto err_free;
838e9690
YS
5437 }
5438
d30d42e0 5439 /* check insn_off */
838e9690 5440 if (i == 0) {
d30d42e0 5441 if (krecord[i].insn_off) {
838e9690 5442 verbose(env,
d30d42e0
MKL
5443 "nonzero insn_off %u for the first func info record",
5444 krecord[i].insn_off);
838e9690 5445 ret = -EINVAL;
c454a46b 5446 goto err_free;
838e9690 5447 }
d30d42e0 5448 } else if (krecord[i].insn_off <= prev_offset) {
838e9690
YS
5449 verbose(env,
5450 "same or smaller insn offset (%u) than previous func info record (%u)",
d30d42e0 5451 krecord[i].insn_off, prev_offset);
838e9690 5452 ret = -EINVAL;
c454a46b 5453 goto err_free;
838e9690
YS
5454 }
5455
d30d42e0 5456 if (env->subprog_info[i].start != krecord[i].insn_off) {
838e9690
YS
5457 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
5458 ret = -EINVAL;
c454a46b 5459 goto err_free;
838e9690
YS
5460 }
5461
5462 /* check type_id */
ba64e7d8 5463 type = btf_type_by_id(btf, krecord[i].type_id);
838e9690
YS
5464 if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
5465 verbose(env, "invalid type id %d in func info",
ba64e7d8 5466 krecord[i].type_id);
838e9690 5467 ret = -EINVAL;
c454a46b 5468 goto err_free;
838e9690
YS
5469 }
5470
d30d42e0 5471 prev_offset = krecord[i].insn_off;
838e9690
YS
5472 urecord += urec_size;
5473 }
5474
ba64e7d8
YS
5475 prog->aux->func_info = krecord;
5476 prog->aux->func_info_cnt = nfuncs;
838e9690
YS
5477 return 0;
5478
c454a46b 5479err_free:
ba64e7d8 5480 kvfree(krecord);
838e9690
YS
5481 return ret;
5482}
5483
ba64e7d8
YS
5484static void adjust_btf_func(struct bpf_verifier_env *env)
5485{
5486 int i;
5487
5488 if (!env->prog->aux->func_info)
5489 return;
5490
5491 for (i = 0; i < env->subprog_cnt; i++)
d30d42e0 5492 env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
ba64e7d8
YS
5493}
5494
c454a46b
MKL
5495#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
5496 sizeof(((struct bpf_line_info *)(0))->line_col))
5497#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
5498
5499static int check_btf_line(struct bpf_verifier_env *env,
5500 const union bpf_attr *attr,
5501 union bpf_attr __user *uattr)
5502{
5503 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
5504 struct bpf_subprog_info *sub;
5505 struct bpf_line_info *linfo;
5506 struct bpf_prog *prog;
5507 const struct btf *btf;
5508 void __user *ulinfo;
5509 int err;
5510
5511 nr_linfo = attr->line_info_cnt;
5512 if (!nr_linfo)
5513 return 0;
5514
5515 rec_size = attr->line_info_rec_size;
5516 if (rec_size < MIN_BPF_LINEINFO_SIZE ||
5517 rec_size > MAX_LINEINFO_REC_SIZE ||
5518 rec_size & (sizeof(u32) - 1))
5519 return -EINVAL;
5520
5521 /* Need to zero it in case the userspace may
5522 * pass in a smaller bpf_line_info object.
5523 */
5524 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
5525 GFP_KERNEL | __GFP_NOWARN);
5526 if (!linfo)
5527 return -ENOMEM;
5528
5529 prog = env->prog;
5530 btf = prog->aux->btf;
5531
5532 s = 0;
5533 sub = env->subprog_info;
5534 ulinfo = u64_to_user_ptr(attr->line_info);
5535 expected_size = sizeof(struct bpf_line_info);
5536 ncopy = min_t(u32, expected_size, rec_size);
5537 for (i = 0; i < nr_linfo; i++) {
5538 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
5539 if (err) {
5540 if (err == -E2BIG) {
5541 verbose(env, "nonzero tailing record in line_info");
5542 if (put_user(expected_size,
5543 &uattr->line_info_rec_size))
5544 err = -EFAULT;
5545 }
5546 goto err_free;
5547 }
5548
5549 if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
5550 err = -EFAULT;
5551 goto err_free;
5552 }
5553
5554 /*
5555 * Check insn_off to ensure
5556 * 1) strictly increasing AND
5557 * 2) bounded by prog->len
5558 *
5559 * The linfo[0].insn_off == 0 check logically falls into
5560 * the later "missing bpf_line_info for func..." case
5561 * because the first linfo[0].insn_off must be the
5562 * first sub also and the first sub must have
5563 * subprog_info[0].start == 0.
5564 */
5565 if ((i && linfo[i].insn_off <= prev_offset) ||
5566 linfo[i].insn_off >= prog->len) {
5567 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
5568 i, linfo[i].insn_off, prev_offset,
5569 prog->len);
5570 err = -EINVAL;
5571 goto err_free;
5572 }
5573
fdbaa0be
MKL
5574 if (!prog->insnsi[linfo[i].insn_off].code) {
5575 verbose(env,
5576 "Invalid insn code at line_info[%u].insn_off\n",
5577 i);
5578 err = -EINVAL;
5579 goto err_free;
5580 }
5581
23127b33
MKL
5582 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
5583 !btf_name_by_offset(btf, linfo[i].file_name_off)) {
c454a46b
MKL
5584 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
5585 err = -EINVAL;
5586 goto err_free;
5587 }
5588
5589 if (s != env->subprog_cnt) {
5590 if (linfo[i].insn_off == sub[s].start) {
5591 sub[s].linfo_idx = i;
5592 s++;
5593 } else if (sub[s].start < linfo[i].insn_off) {
5594 verbose(env, "missing bpf_line_info for func#%u\n", s);
5595 err = -EINVAL;
5596 goto err_free;
5597 }
5598 }
5599
5600 prev_offset = linfo[i].insn_off;
5601 ulinfo += rec_size;
5602 }
5603
5604 if (s != env->subprog_cnt) {
5605 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
5606 env->subprog_cnt - s, s);
5607 err = -EINVAL;
5608 goto err_free;
5609 }
5610
5611 prog->aux->linfo = linfo;
5612 prog->aux->nr_linfo = nr_linfo;
5613
5614 return 0;
5615
5616err_free:
5617 kvfree(linfo);
5618 return err;
5619}
5620
5621static int check_btf_info(struct bpf_verifier_env *env,
5622 const union bpf_attr *attr,
5623 union bpf_attr __user *uattr)
5624{
5625 struct btf *btf;
5626 int err;
5627
5628 if (!attr->func_info_cnt && !attr->line_info_cnt)
5629 return 0;
5630
5631 btf = btf_get_by_fd(attr->prog_btf_fd);
5632 if (IS_ERR(btf))
5633 return PTR_ERR(btf);
5634 env->prog->aux->btf = btf;
5635
5636 err = check_btf_func(env, attr, uattr);
5637 if (err)
5638 return err;
5639
5640 err = check_btf_line(env, attr, uattr);
5641 if (err)
5642 return err;
5643
5644 return 0;
ba64e7d8
YS
5645}
5646
f1174f77
EC
5647/* check %cur's range satisfies %old's */
5648static bool range_within(struct bpf_reg_state *old,
5649 struct bpf_reg_state *cur)
5650{
b03c9f9f
EC
5651 return old->umin_value <= cur->umin_value &&
5652 old->umax_value >= cur->umax_value &&
5653 old->smin_value <= cur->smin_value &&
5654 old->smax_value >= cur->smax_value;
f1174f77
EC
5655}
5656
5657/* Maximum number of register states that can exist at once */
5658#define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
5659struct idpair {
5660 u32 old;
5661 u32 cur;
5662};
5663
5664/* If in the old state two registers had the same id, then they need to have
5665 * the same id in the new state as well. But that id could be different from
5666 * the old state, so we need to track the mapping from old to new ids.
5667 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
5668 * regs with old id 5 must also have new id 9 for the new state to be safe. But
5669 * regs with a different old id could still have new id 9, we don't care about
5670 * that.
5671 * So we look through our idmap to see if this old id has been seen before. If
5672 * so, we require the new id to match; otherwise, we add the id pair to the map.
969bf05e 5673 */
f1174f77 5674static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
969bf05e 5675{
f1174f77 5676 unsigned int i;
969bf05e 5677
f1174f77
EC
5678 for (i = 0; i < ID_MAP_SIZE; i++) {
5679 if (!idmap[i].old) {
5680 /* Reached an empty slot; haven't seen this id before */
5681 idmap[i].old = old_id;
5682 idmap[i].cur = cur_id;
5683 return true;
5684 }
5685 if (idmap[i].old == old_id)
5686 return idmap[i].cur == cur_id;
5687 }
5688 /* We ran out of idmap slots, which should be impossible */
5689 WARN_ON_ONCE(1);
5690 return false;
5691}
5692
9242b5f5
AS
5693static void clean_func_state(struct bpf_verifier_env *env,
5694 struct bpf_func_state *st)
5695{
5696 enum bpf_reg_liveness live;
5697 int i, j;
5698
5699 for (i = 0; i < BPF_REG_FP; i++) {
5700 live = st->regs[i].live;
5701 /* liveness must not touch this register anymore */
5702 st->regs[i].live |= REG_LIVE_DONE;
5703 if (!(live & REG_LIVE_READ))
5704 /* since the register is unused, clear its state
5705 * to make further comparison simpler
5706 */
5707 __mark_reg_not_init(&st->regs[i]);
5708 }
5709
5710 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
5711 live = st->stack[i].spilled_ptr.live;
5712 /* liveness must not touch this stack slot anymore */
5713 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
5714 if (!(live & REG_LIVE_READ)) {
5715 __mark_reg_not_init(&st->stack[i].spilled_ptr);
5716 for (j = 0; j < BPF_REG_SIZE; j++)
5717 st->stack[i].slot_type[j] = STACK_INVALID;
5718 }
5719 }
5720}
5721
5722static void clean_verifier_state(struct bpf_verifier_env *env,
5723 struct bpf_verifier_state *st)
5724{
5725 int i;
5726
5727 if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
5728 /* all regs in this state in all frames were already marked */
5729 return;
5730
5731 for (i = 0; i <= st->curframe; i++)
5732 clean_func_state(env, st->frame[i]);
5733}
5734
5735/* the parentage chains form a tree.
5736 * the verifier states are added to state lists at given insn and
5737 * pushed into state stack for future exploration.
5738 * when the verifier reaches bpf_exit insn some of the verifer states
5739 * stored in the state lists have their final liveness state already,
5740 * but a lot of states will get revised from liveness point of view when
5741 * the verifier explores other branches.
5742 * Example:
5743 * 1: r0 = 1
5744 * 2: if r1 == 100 goto pc+1
5745 * 3: r0 = 2
5746 * 4: exit
5747 * when the verifier reaches exit insn the register r0 in the state list of
5748 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
5749 * of insn 2 and goes exploring further. At the insn 4 it will walk the
5750 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
5751 *
5752 * Since the verifier pushes the branch states as it sees them while exploring
5753 * the program the condition of walking the branch instruction for the second
5754 * time means that all states below this branch were already explored and
5755 * their final liveness markes are already propagated.
5756 * Hence when the verifier completes the search of state list in is_state_visited()
5757 * we can call this clean_live_states() function to mark all liveness states
5758 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
5759 * will not be used.
5760 * This function also clears the registers and stack for states that !READ
5761 * to simplify state merging.
5762 *
5763 * Important note here that walking the same branch instruction in the callee
5764 * doesn't meant that the states are DONE. The verifier has to compare
5765 * the callsites
5766 */
5767static void clean_live_states(struct bpf_verifier_env *env, int insn,
5768 struct bpf_verifier_state *cur)
5769{
5770 struct bpf_verifier_state_list *sl;
5771 int i;
5772
5773 sl = env->explored_states[insn];
5774 if (!sl)
5775 return;
5776
5777 while (sl != STATE_LIST_MARK) {
5778 if (sl->state.curframe != cur->curframe)
5779 goto next;
5780 for (i = 0; i <= cur->curframe; i++)
5781 if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
5782 goto next;
5783 clean_verifier_state(env, &sl->state);
5784next:
5785 sl = sl->next;
5786 }
5787}
5788
f1174f77 5789/* Returns true if (rold safe implies rcur safe) */
1b688a19
EC
5790static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
5791 struct idpair *idmap)
f1174f77 5792{
f4d7e40a
AS
5793 bool equal;
5794
dc503a8a
EC
5795 if (!(rold->live & REG_LIVE_READ))
5796 /* explored state didn't use this */
5797 return true;
5798
679c782d 5799 equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
f4d7e40a
AS
5800
5801 if (rold->type == PTR_TO_STACK)
5802 /* two stack pointers are equal only if they're pointing to
5803 * the same stack frame, since fp-8 in foo != fp-8 in bar
5804 */
5805 return equal && rold->frameno == rcur->frameno;
5806
5807 if (equal)
969bf05e
AS
5808 return true;
5809
f1174f77
EC
5810 if (rold->type == NOT_INIT)
5811 /* explored state can't have used this */
969bf05e 5812 return true;
f1174f77
EC
5813 if (rcur->type == NOT_INIT)
5814 return false;
5815 switch (rold->type) {
5816 case SCALAR_VALUE:
5817 if (rcur->type == SCALAR_VALUE) {
5818 /* new val must satisfy old val knowledge */
5819 return range_within(rold, rcur) &&
5820 tnum_in(rold->var_off, rcur->var_off);
5821 } else {
179d1c56
JH
5822 /* We're trying to use a pointer in place of a scalar.
5823 * Even if the scalar was unbounded, this could lead to
5824 * pointer leaks because scalars are allowed to leak
5825 * while pointers are not. We could make this safe in
5826 * special cases if root is calling us, but it's
5827 * probably not worth the hassle.
f1174f77 5828 */
179d1c56 5829 return false;
f1174f77
EC
5830 }
5831 case PTR_TO_MAP_VALUE:
1b688a19
EC
5832 /* If the new min/max/var_off satisfy the old ones and
5833 * everything else matches, we are OK.
d83525ca
AS
5834 * 'id' is not compared, since it's only used for maps with
5835 * bpf_spin_lock inside map element and in such cases if
5836 * the rest of the prog is valid for one map element then
5837 * it's valid for all map elements regardless of the key
5838 * used in bpf_map_lookup()
1b688a19
EC
5839 */
5840 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
5841 range_within(rold, rcur) &&
5842 tnum_in(rold->var_off, rcur->var_off);
f1174f77
EC
5843 case PTR_TO_MAP_VALUE_OR_NULL:
5844 /* a PTR_TO_MAP_VALUE could be safe to use as a
5845 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
5846 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
5847 * checked, doing so could have affected others with the same
5848 * id, and we can't check for that because we lost the id when
5849 * we converted to a PTR_TO_MAP_VALUE.
5850 */
5851 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
5852 return false;
5853 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
5854 return false;
5855 /* Check our ids match any regs they're supposed to */
5856 return check_ids(rold->id, rcur->id, idmap);
de8f3a83 5857 case PTR_TO_PACKET_META:
f1174f77 5858 case PTR_TO_PACKET:
de8f3a83 5859 if (rcur->type != rold->type)
f1174f77
EC
5860 return false;
5861 /* We must have at least as much range as the old ptr
5862 * did, so that any accesses which were safe before are
5863 * still safe. This is true even if old range < old off,
5864 * since someone could have accessed through (ptr - k), or
5865 * even done ptr -= k in a register, to get a safe access.
5866 */
5867 if (rold->range > rcur->range)
5868 return false;
5869 /* If the offsets don't match, we can't trust our alignment;
5870 * nor can we be sure that we won't fall out of range.
5871 */
5872 if (rold->off != rcur->off)
5873 return false;
5874 /* id relations must be preserved */
5875 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
5876 return false;
5877 /* new val must satisfy old val knowledge */
5878 return range_within(rold, rcur) &&
5879 tnum_in(rold->var_off, rcur->var_off);
5880 case PTR_TO_CTX:
5881 case CONST_PTR_TO_MAP:
f1174f77 5882 case PTR_TO_PACKET_END:
d58e468b 5883 case PTR_TO_FLOW_KEYS:
c64b7983
JS
5884 case PTR_TO_SOCKET:
5885 case PTR_TO_SOCKET_OR_NULL:
46f8bc92
MKL
5886 case PTR_TO_SOCK_COMMON:
5887 case PTR_TO_SOCK_COMMON_OR_NULL:
655a51e5
MKL
5888 case PTR_TO_TCP_SOCK:
5889 case PTR_TO_TCP_SOCK_OR_NULL:
f1174f77
EC
5890 /* Only valid matches are exact, which memcmp() above
5891 * would have accepted
5892 */
5893 default:
5894 /* Don't know what's going on, just say it's not safe */
5895 return false;
5896 }
969bf05e 5897
f1174f77
EC
5898 /* Shouldn't get here; if we do, say it's not safe */
5899 WARN_ON_ONCE(1);
969bf05e
AS
5900 return false;
5901}
5902
f4d7e40a
AS
5903static bool stacksafe(struct bpf_func_state *old,
5904 struct bpf_func_state *cur,
638f5b90
AS
5905 struct idpair *idmap)
5906{
5907 int i, spi;
5908
638f5b90
AS
5909 /* walk slots of the explored stack and ignore any additional
5910 * slots in the current stack, since explored(safe) state
5911 * didn't use them
5912 */
5913 for (i = 0; i < old->allocated_stack; i++) {
5914 spi = i / BPF_REG_SIZE;
5915
b233920c
AS
5916 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
5917 i += BPF_REG_SIZE - 1;
cc2b14d5 5918 /* explored state didn't use this */
fd05e57b 5919 continue;
b233920c 5920 }
cc2b14d5 5921
638f5b90
AS
5922 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
5923 continue;
19e2dbb7
AS
5924
5925 /* explored stack has more populated slots than current stack
5926 * and these slots were used
5927 */
5928 if (i >= cur->allocated_stack)
5929 return false;
5930
cc2b14d5
AS
5931 /* if old state was safe with misc data in the stack
5932 * it will be safe with zero-initialized stack.
5933 * The opposite is not true
5934 */
5935 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
5936 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
5937 continue;
638f5b90
AS
5938 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
5939 cur->stack[spi].slot_type[i % BPF_REG_SIZE])
5940 /* Ex: old explored (safe) state has STACK_SPILL in
5941 * this stack slot, but current has has STACK_MISC ->
5942 * this verifier states are not equivalent,
5943 * return false to continue verification of this path
5944 */
5945 return false;
5946 if (i % BPF_REG_SIZE)
5947 continue;
5948 if (old->stack[spi].slot_type[0] != STACK_SPILL)
5949 continue;
5950 if (!regsafe(&old->stack[spi].spilled_ptr,
5951 &cur->stack[spi].spilled_ptr,
5952 idmap))
5953 /* when explored and current stack slot are both storing
5954 * spilled registers, check that stored pointers types
5955 * are the same as well.
5956 * Ex: explored safe path could have stored
5957 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
5958 * but current path has stored:
5959 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
5960 * such verifier states are not equivalent.
5961 * return false to continue verification of this path
5962 */
5963 return false;
5964 }
5965 return true;
5966}
5967
fd978bf7
JS
5968static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
5969{
5970 if (old->acquired_refs != cur->acquired_refs)
5971 return false;
5972 return !memcmp(old->refs, cur->refs,
5973 sizeof(*old->refs) * old->acquired_refs);
5974}
5975
f1bca824
AS
5976/* compare two verifier states
5977 *
5978 * all states stored in state_list are known to be valid, since
5979 * verifier reached 'bpf_exit' instruction through them
5980 *
5981 * this function is called when verifier exploring different branches of
5982 * execution popped from the state stack. If it sees an old state that has
5983 * more strict register state and more strict stack state then this execution
5984 * branch doesn't need to be explored further, since verifier already
5985 * concluded that more strict state leads to valid finish.
5986 *
5987 * Therefore two states are equivalent if register state is more conservative
5988 * and explored stack state is more conservative than the current one.
5989 * Example:
5990 * explored current
5991 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
5992 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
5993 *
5994 * In other words if current stack state (one being explored) has more
5995 * valid slots than old one that already passed validation, it means
5996 * the verifier can stop exploring and conclude that current state is valid too
5997 *
5998 * Similarly with registers. If explored state has register type as invalid
5999 * whereas register type in current state is meaningful, it means that
6000 * the current state will reach 'bpf_exit' instruction safely
6001 */
f4d7e40a
AS
6002static bool func_states_equal(struct bpf_func_state *old,
6003 struct bpf_func_state *cur)
f1bca824 6004{
f1174f77
EC
6005 struct idpair *idmap;
6006 bool ret = false;
f1bca824
AS
6007 int i;
6008
f1174f77
EC
6009 idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
6010 /* If we failed to allocate the idmap, just say it's not safe */
6011 if (!idmap)
1a0dc1ac 6012 return false;
f1174f77
EC
6013
6014 for (i = 0; i < MAX_BPF_REG; i++) {
1b688a19 6015 if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
f1174f77 6016 goto out_free;
f1bca824
AS
6017 }
6018
638f5b90
AS
6019 if (!stacksafe(old, cur, idmap))
6020 goto out_free;
fd978bf7
JS
6021
6022 if (!refsafe(old, cur))
6023 goto out_free;
f1174f77
EC
6024 ret = true;
6025out_free:
6026 kfree(idmap);
6027 return ret;
f1bca824
AS
6028}
6029
f4d7e40a
AS
6030static bool states_equal(struct bpf_verifier_env *env,
6031 struct bpf_verifier_state *old,
6032 struct bpf_verifier_state *cur)
6033{
6034 int i;
6035
6036 if (old->curframe != cur->curframe)
6037 return false;
6038
979d63d5
DB
6039 /* Verification state from speculative execution simulation
6040 * must never prune a non-speculative execution one.
6041 */
6042 if (old->speculative && !cur->speculative)
6043 return false;
6044
d83525ca
AS
6045 if (old->active_spin_lock != cur->active_spin_lock)
6046 return false;
6047
f4d7e40a
AS
6048 /* for states to be equal callsites have to be the same
6049 * and all frame states need to be equivalent
6050 */
6051 for (i = 0; i <= old->curframe; i++) {
6052 if (old->frame[i]->callsite != cur->frame[i]->callsite)
6053 return false;
6054 if (!func_states_equal(old->frame[i], cur->frame[i]))
6055 return false;
6056 }
6057 return true;
6058}
6059
8e9cd9ce 6060/* A write screens off any subsequent reads; but write marks come from the
f4d7e40a
AS
6061 * straight-line code between a state and its parent. When we arrive at an
6062 * equivalent state (jump target or such) we didn't arrive by the straight-line
6063 * code, so read marks in the state must propagate to the parent regardless
6064 * of the state's write marks. That's what 'parent == state->parent' comparison
679c782d 6065 * in mark_reg_read() is for.
8e9cd9ce 6066 */
f4d7e40a
AS
6067static int propagate_liveness(struct bpf_verifier_env *env,
6068 const struct bpf_verifier_state *vstate,
6069 struct bpf_verifier_state *vparent)
dc503a8a 6070{
f4d7e40a
AS
6071 int i, frame, err = 0;
6072 struct bpf_func_state *state, *parent;
dc503a8a 6073
f4d7e40a
AS
6074 if (vparent->curframe != vstate->curframe) {
6075 WARN(1, "propagate_live: parent frame %d current frame %d\n",
6076 vparent->curframe, vstate->curframe);
6077 return -EFAULT;
6078 }
dc503a8a
EC
6079 /* Propagate read liveness of registers... */
6080 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
83d16312
JK
6081 for (frame = 0; frame <= vstate->curframe; frame++) {
6082 /* We don't need to worry about FP liveness, it's read-only */
6083 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
6084 if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ)
6085 continue;
6086 if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) {
6087 err = mark_reg_read(env, &vstate->frame[frame]->regs[i],
6088 &vparent->frame[frame]->regs[i]);
6089 if (err)
6090 return err;
6091 }
dc503a8a
EC
6092 }
6093 }
f4d7e40a 6094
dc503a8a 6095 /* ... and stack slots */
f4d7e40a
AS
6096 for (frame = 0; frame <= vstate->curframe; frame++) {
6097 state = vstate->frame[frame];
6098 parent = vparent->frame[frame];
6099 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
6100 i < parent->allocated_stack / BPF_REG_SIZE; i++) {
f4d7e40a
AS
6101 if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
6102 continue;
6103 if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
679c782d
EC
6104 mark_reg_read(env, &state->stack[i].spilled_ptr,
6105 &parent->stack[i].spilled_ptr);
dc503a8a
EC
6106 }
6107 }
f4d7e40a 6108 return err;
dc503a8a
EC
6109}
6110
58e2af8b 6111static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
f1bca824 6112{
58e2af8b
JK
6113 struct bpf_verifier_state_list *new_sl;
6114 struct bpf_verifier_state_list *sl;
679c782d 6115 struct bpf_verifier_state *cur = env->cur_state, *new;
ceefbc96 6116 int i, j, err, states_cnt = 0;
f1bca824
AS
6117
6118 sl = env->explored_states[insn_idx];
6119 if (!sl)
6120 /* this 'insn_idx' instruction wasn't marked, so we will not
6121 * be doing state search here
6122 */
6123 return 0;
6124
9242b5f5
AS
6125 clean_live_states(env, insn_idx, cur);
6126
f1bca824 6127 while (sl != STATE_LIST_MARK) {
638f5b90 6128 if (states_equal(env, &sl->state, cur)) {
f1bca824 6129 /* reached equivalent register/stack state,
dc503a8a
EC
6130 * prune the search.
6131 * Registers read by the continuation are read by us.
8e9cd9ce
EC
6132 * If we have any write marks in env->cur_state, they
6133 * will prevent corresponding reads in the continuation
6134 * from reaching our parent (an explored_state). Our
6135 * own state will get the read marks recorded, but
6136 * they'll be immediately forgotten as we're pruning
6137 * this state and will pop a new one.
f1bca824 6138 */
f4d7e40a
AS
6139 err = propagate_liveness(env, &sl->state, cur);
6140 if (err)
6141 return err;
f1bca824 6142 return 1;
dc503a8a 6143 }
f1bca824 6144 sl = sl->next;
ceefbc96 6145 states_cnt++;
f1bca824
AS
6146 }
6147
ceefbc96
AS
6148 if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
6149 return 0;
6150
f1bca824
AS
6151 /* there were no equivalent states, remember current one.
6152 * technically the current state is not proven to be safe yet,
f4d7e40a
AS
6153 * but it will either reach outer most bpf_exit (which means it's safe)
6154 * or it will be rejected. Since there are no loops, we won't be
6155 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
6156 * again on the way to bpf_exit
f1bca824 6157 */
638f5b90 6158 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
f1bca824
AS
6159 if (!new_sl)
6160 return -ENOMEM;
6161
6162 /* add new state to the head of linked list */
679c782d
EC
6163 new = &new_sl->state;
6164 err = copy_verifier_state(new, cur);
1969db47 6165 if (err) {
679c782d 6166 free_verifier_state(new, false);
1969db47
AS
6167 kfree(new_sl);
6168 return err;
6169 }
f1bca824
AS
6170 new_sl->next = env->explored_states[insn_idx];
6171 env->explored_states[insn_idx] = new_sl;
7640ead9
JK
6172 /* connect new state to parentage chain. Current frame needs all
6173 * registers connected. Only r6 - r9 of the callers are alive (pushed
6174 * to the stack implicitly by JITs) so in callers' frames connect just
6175 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
6176 * the state of the call instruction (with WRITTEN set), and r0 comes
6177 * from callee with its full parentage chain, anyway.
6178 */
6179 for (j = 0; j <= cur->curframe; j++)
6180 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
6181 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
8e9cd9ce
EC
6182 /* clear write marks in current state: the writes we did are not writes
6183 * our child did, so they don't screen off its reads from us.
6184 * (There are no read marks in current state, because reads always mark
6185 * their parent and current state never has children yet. Only
6186 * explored_states can get read marks.)
6187 */
dc503a8a 6188 for (i = 0; i < BPF_REG_FP; i++)
f4d7e40a
AS
6189 cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
6190
6191 /* all stack frames are accessible from callee, clear them all */
6192 for (j = 0; j <= cur->curframe; j++) {
6193 struct bpf_func_state *frame = cur->frame[j];
679c782d 6194 struct bpf_func_state *newframe = new->frame[j];
f4d7e40a 6195
679c782d 6196 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
cc2b14d5 6197 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
679c782d
EC
6198 frame->stack[i].spilled_ptr.parent =
6199 &newframe->stack[i].spilled_ptr;
6200 }
f4d7e40a 6201 }
f1bca824
AS
6202 return 0;
6203}
6204
c64b7983
JS
6205/* Return true if it's OK to have the same insn return a different type. */
6206static bool reg_type_mismatch_ok(enum bpf_reg_type type)
6207{
6208 switch (type) {
6209 case PTR_TO_CTX:
6210 case PTR_TO_SOCKET:
6211 case PTR_TO_SOCKET_OR_NULL:
46f8bc92
MKL
6212 case PTR_TO_SOCK_COMMON:
6213 case PTR_TO_SOCK_COMMON_OR_NULL:
655a51e5
MKL
6214 case PTR_TO_TCP_SOCK:
6215 case PTR_TO_TCP_SOCK_OR_NULL:
c64b7983
JS
6216 return false;
6217 default:
6218 return true;
6219 }
6220}
6221
6222/* If an instruction was previously used with particular pointer types, then we
6223 * need to be careful to avoid cases such as the below, where it may be ok
6224 * for one branch accessing the pointer, but not ok for the other branch:
6225 *
6226 * R1 = sock_ptr
6227 * goto X;
6228 * ...
6229 * R1 = some_other_valid_ptr;
6230 * goto X;
6231 * ...
6232 * R2 = *(u32 *)(R1 + 0);
6233 */
6234static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
6235{
6236 return src != prev && (!reg_type_mismatch_ok(src) ||
6237 !reg_type_mismatch_ok(prev));
6238}
6239
58e2af8b 6240static int do_check(struct bpf_verifier_env *env)
17a52670 6241{
638f5b90 6242 struct bpf_verifier_state *state;
17a52670 6243 struct bpf_insn *insns = env->prog->insnsi;
638f5b90 6244 struct bpf_reg_state *regs;
f4d7e40a 6245 int insn_cnt = env->prog->len, i;
17a52670
AS
6246 int insn_processed = 0;
6247 bool do_print_state = false;
6248
d9762e84
MKL
6249 env->prev_linfo = NULL;
6250
638f5b90
AS
6251 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
6252 if (!state)
6253 return -ENOMEM;
f4d7e40a 6254 state->curframe = 0;
979d63d5 6255 state->speculative = false;
f4d7e40a
AS
6256 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
6257 if (!state->frame[0]) {
6258 kfree(state);
6259 return -ENOMEM;
6260 }
6261 env->cur_state = state;
6262 init_func_state(env, state->frame[0],
6263 BPF_MAIN_FUNC /* callsite */,
6264 0 /* frameno */,
6265 0 /* subprogno, zero == main subprog */);
c08435ec 6266
17a52670
AS
6267 for (;;) {
6268 struct bpf_insn *insn;
6269 u8 class;
6270 int err;
6271
c08435ec 6272 if (env->insn_idx >= insn_cnt) {
61bd5218 6273 verbose(env, "invalid insn idx %d insn_cnt %d\n",
c08435ec 6274 env->insn_idx, insn_cnt);
17a52670
AS
6275 return -EFAULT;
6276 }
6277
c08435ec 6278 insn = &insns[env->insn_idx];
17a52670
AS
6279 class = BPF_CLASS(insn->code);
6280
07016151 6281 if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
61bd5218
JK
6282 verbose(env,
6283 "BPF program is too large. Processed %d insn\n",
17a52670
AS
6284 insn_processed);
6285 return -E2BIG;
6286 }
6287
c08435ec 6288 err = is_state_visited(env, env->insn_idx);
f1bca824
AS
6289 if (err < 0)
6290 return err;
6291 if (err == 1) {
6292 /* found equivalent state, can prune the search */
61bd5218 6293 if (env->log.level) {
f1bca824 6294 if (do_print_state)
979d63d5
DB
6295 verbose(env, "\nfrom %d to %d%s: safe\n",
6296 env->prev_insn_idx, env->insn_idx,
6297 env->cur_state->speculative ?
6298 " (speculative execution)" : "");
f1bca824 6299 else
c08435ec 6300 verbose(env, "%d: safe\n", env->insn_idx);
f1bca824
AS
6301 }
6302 goto process_bpf_exit;
6303 }
6304
c3494801
AS
6305 if (signal_pending(current))
6306 return -EAGAIN;
6307
3c2ce60b
DB
6308 if (need_resched())
6309 cond_resched();
6310
61bd5218
JK
6311 if (env->log.level > 1 || (env->log.level && do_print_state)) {
6312 if (env->log.level > 1)
c08435ec 6313 verbose(env, "%d:", env->insn_idx);
c5fc9692 6314 else
979d63d5
DB
6315 verbose(env, "\nfrom %d to %d%s:",
6316 env->prev_insn_idx, env->insn_idx,
6317 env->cur_state->speculative ?
6318 " (speculative execution)" : "");
f4d7e40a 6319 print_verifier_state(env, state->frame[state->curframe]);
17a52670
AS
6320 do_print_state = false;
6321 }
6322
61bd5218 6323 if (env->log.level) {
7105e828
DB
6324 const struct bpf_insn_cbs cbs = {
6325 .cb_print = verbose,
abe08840 6326 .private_data = env,
7105e828
DB
6327 };
6328
c08435ec
DB
6329 verbose_linfo(env, env->insn_idx, "; ");
6330 verbose(env, "%d: ", env->insn_idx);
abe08840 6331 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
17a52670
AS
6332 }
6333
cae1927c 6334 if (bpf_prog_is_dev_bound(env->prog->aux)) {
c08435ec
DB
6335 err = bpf_prog_offload_verify_insn(env, env->insn_idx,
6336 env->prev_insn_idx);
cae1927c
JK
6337 if (err)
6338 return err;
6339 }
13a27dfc 6340
638f5b90 6341 regs = cur_regs(env);
c08435ec 6342 env->insn_aux_data[env->insn_idx].seen = true;
fd978bf7 6343
17a52670 6344 if (class == BPF_ALU || class == BPF_ALU64) {
1be7f75d 6345 err = check_alu_op(env, insn);
17a52670
AS
6346 if (err)
6347 return err;
6348
6349 } else if (class == BPF_LDX) {
3df126f3 6350 enum bpf_reg_type *prev_src_type, src_reg_type;
9bac3d6d
AS
6351
6352 /* check for reserved fields is already done */
6353
17a52670 6354 /* check src operand */
dc503a8a 6355 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
6356 if (err)
6357 return err;
6358
dc503a8a 6359 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
17a52670
AS
6360 if (err)
6361 return err;
6362
725f9dcd
AS
6363 src_reg_type = regs[insn->src_reg].type;
6364
17a52670
AS
6365 /* check that memory (src_reg + off) is readable,
6366 * the state of dst_reg will be updated by this func
6367 */
c08435ec
DB
6368 err = check_mem_access(env, env->insn_idx, insn->src_reg,
6369 insn->off, BPF_SIZE(insn->code),
6370 BPF_READ, insn->dst_reg, false);
17a52670
AS
6371 if (err)
6372 return err;
6373
c08435ec 6374 prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
3df126f3
JK
6375
6376 if (*prev_src_type == NOT_INIT) {
9bac3d6d
AS
6377 /* saw a valid insn
6378 * dst_reg = *(u32 *)(src_reg + off)
3df126f3 6379 * save type to validate intersecting paths
9bac3d6d 6380 */
3df126f3 6381 *prev_src_type = src_reg_type;
9bac3d6d 6382
c64b7983 6383 } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9bac3d6d
AS
6384 /* ABuser program is trying to use the same insn
6385 * dst_reg = *(u32*) (src_reg + off)
6386 * with different pointer types:
6387 * src_reg == ctx in one branch and
6388 * src_reg == stack|map in some other branch.
6389 * Reject it.
6390 */
61bd5218 6391 verbose(env, "same insn cannot be used with different pointers\n");
9bac3d6d
AS
6392 return -EINVAL;
6393 }
6394
17a52670 6395 } else if (class == BPF_STX) {
3df126f3 6396 enum bpf_reg_type *prev_dst_type, dst_reg_type;
d691f9e8 6397
17a52670 6398 if (BPF_MODE(insn->code) == BPF_XADD) {
c08435ec 6399 err = check_xadd(env, env->insn_idx, insn);
17a52670
AS
6400 if (err)
6401 return err;
c08435ec 6402 env->insn_idx++;
17a52670
AS
6403 continue;
6404 }
6405
17a52670 6406 /* check src1 operand */
dc503a8a 6407 err = check_reg_arg(env, insn->src_reg, SRC_OP);
17a52670
AS
6408 if (err)
6409 return err;
6410 /* check src2 operand */
dc503a8a 6411 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
6412 if (err)
6413 return err;
6414
d691f9e8
AS
6415 dst_reg_type = regs[insn->dst_reg].type;
6416
17a52670 6417 /* check that memory (dst_reg + off) is writeable */
c08435ec
DB
6418 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
6419 insn->off, BPF_SIZE(insn->code),
6420 BPF_WRITE, insn->src_reg, false);
17a52670
AS
6421 if (err)
6422 return err;
6423
c08435ec 6424 prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
3df126f3
JK
6425
6426 if (*prev_dst_type == NOT_INIT) {
6427 *prev_dst_type = dst_reg_type;
c64b7983 6428 } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
61bd5218 6429 verbose(env, "same insn cannot be used with different pointers\n");
d691f9e8
AS
6430 return -EINVAL;
6431 }
6432
17a52670
AS
6433 } else if (class == BPF_ST) {
6434 if (BPF_MODE(insn->code) != BPF_MEM ||
6435 insn->src_reg != BPF_REG_0) {
61bd5218 6436 verbose(env, "BPF_ST uses reserved fields\n");
17a52670
AS
6437 return -EINVAL;
6438 }
6439 /* check src operand */
dc503a8a 6440 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17a52670
AS
6441 if (err)
6442 return err;
6443
f37a8cb8 6444 if (is_ctx_reg(env, insn->dst_reg)) {
9d2be44a 6445 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
2a159c6f
DB
6446 insn->dst_reg,
6447 reg_type_str[reg_state(env, insn->dst_reg)->type]);
f37a8cb8
DB
6448 return -EACCES;
6449 }
6450
17a52670 6451 /* check that memory (dst_reg + off) is writeable */
c08435ec
DB
6452 err = check_mem_access(env, env->insn_idx, insn->dst_reg,
6453 insn->off, BPF_SIZE(insn->code),
6454 BPF_WRITE, -1, false);
17a52670
AS
6455 if (err)
6456 return err;
6457
092ed096 6458 } else if (class == BPF_JMP || class == BPF_JMP32) {
17a52670
AS
6459 u8 opcode = BPF_OP(insn->code);
6460
6461 if (opcode == BPF_CALL) {
6462 if (BPF_SRC(insn->code) != BPF_K ||
6463 insn->off != 0 ||
f4d7e40a
AS
6464 (insn->src_reg != BPF_REG_0 &&
6465 insn->src_reg != BPF_PSEUDO_CALL) ||
092ed096
JW
6466 insn->dst_reg != BPF_REG_0 ||
6467 class == BPF_JMP32) {
61bd5218 6468 verbose(env, "BPF_CALL uses reserved fields\n");
17a52670
AS
6469 return -EINVAL;
6470 }
6471
d83525ca
AS
6472 if (env->cur_state->active_spin_lock &&
6473 (insn->src_reg == BPF_PSEUDO_CALL ||
6474 insn->imm != BPF_FUNC_spin_unlock)) {
6475 verbose(env, "function calls are not allowed while holding a lock\n");
6476 return -EINVAL;
6477 }
f4d7e40a 6478 if (insn->src_reg == BPF_PSEUDO_CALL)
c08435ec 6479 err = check_func_call(env, insn, &env->insn_idx);
f4d7e40a 6480 else
c08435ec 6481 err = check_helper_call(env, insn->imm, env->insn_idx);
17a52670
AS
6482 if (err)
6483 return err;
6484
6485 } else if (opcode == BPF_JA) {
6486 if (BPF_SRC(insn->code) != BPF_K ||
6487 insn->imm != 0 ||
6488 insn->src_reg != BPF_REG_0 ||
092ed096
JW
6489 insn->dst_reg != BPF_REG_0 ||
6490 class == BPF_JMP32) {
61bd5218 6491 verbose(env, "BPF_JA uses reserved fields\n");
17a52670
AS
6492 return -EINVAL;
6493 }
6494
c08435ec 6495 env->insn_idx += insn->off + 1;
17a52670
AS
6496 continue;
6497
6498 } else if (opcode == BPF_EXIT) {
6499 if (BPF_SRC(insn->code) != BPF_K ||
6500 insn->imm != 0 ||
6501 insn->src_reg != BPF_REG_0 ||
092ed096
JW
6502 insn->dst_reg != BPF_REG_0 ||
6503 class == BPF_JMP32) {
61bd5218 6504 verbose(env, "BPF_EXIT uses reserved fields\n");
17a52670
AS
6505 return -EINVAL;
6506 }
6507
d83525ca
AS
6508 if (env->cur_state->active_spin_lock) {
6509 verbose(env, "bpf_spin_unlock is missing\n");
6510 return -EINVAL;
6511 }
6512
f4d7e40a
AS
6513 if (state->curframe) {
6514 /* exit from nested function */
c08435ec
DB
6515 env->prev_insn_idx = env->insn_idx;
6516 err = prepare_func_exit(env, &env->insn_idx);
f4d7e40a
AS
6517 if (err)
6518 return err;
6519 do_print_state = true;
6520 continue;
6521 }
6522
fd978bf7
JS
6523 err = check_reference_leak(env);
6524 if (err)
6525 return err;
6526
17a52670
AS
6527 /* eBPF calling convetion is such that R0 is used
6528 * to return the value from eBPF program.
6529 * Make sure that it's readable at this time
6530 * of bpf_exit, which means that program wrote
6531 * something into it earlier
6532 */
dc503a8a 6533 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
17a52670
AS
6534 if (err)
6535 return err;
6536
1be7f75d 6537 if (is_pointer_value(env, BPF_REG_0)) {
61bd5218 6538 verbose(env, "R0 leaks addr as return value\n");
1be7f75d
AS
6539 return -EACCES;
6540 }
6541
390ee7e2
AS
6542 err = check_return_code(env);
6543 if (err)
6544 return err;
f1bca824 6545process_bpf_exit:
c08435ec
DB
6546 err = pop_stack(env, &env->prev_insn_idx,
6547 &env->insn_idx);
638f5b90
AS
6548 if (err < 0) {
6549 if (err != -ENOENT)
6550 return err;
17a52670
AS
6551 break;
6552 } else {
6553 do_print_state = true;
6554 continue;
6555 }
6556 } else {
c08435ec 6557 err = check_cond_jmp_op(env, insn, &env->insn_idx);
17a52670
AS
6558 if (err)
6559 return err;
6560 }
6561 } else if (class == BPF_LD) {
6562 u8 mode = BPF_MODE(insn->code);
6563
6564 if (mode == BPF_ABS || mode == BPF_IND) {
ddd872bc
AS
6565 err = check_ld_abs(env, insn);
6566 if (err)
6567 return err;
6568
17a52670
AS
6569 } else if (mode == BPF_IMM) {
6570 err = check_ld_imm(env, insn);
6571 if (err)
6572 return err;
6573
c08435ec
DB
6574 env->insn_idx++;
6575 env->insn_aux_data[env->insn_idx].seen = true;
17a52670 6576 } else {
61bd5218 6577 verbose(env, "invalid BPF_LD mode\n");
17a52670
AS
6578 return -EINVAL;
6579 }
6580 } else {
61bd5218 6581 verbose(env, "unknown insn class %d\n", class);
17a52670
AS
6582 return -EINVAL;
6583 }
6584
c08435ec 6585 env->insn_idx++;
17a52670
AS
6586 }
6587
4bd95f4b
DB
6588 verbose(env, "processed %d insns (limit %d), stack depth ",
6589 insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
f910cefa 6590 for (i = 0; i < env->subprog_cnt; i++) {
9c8105bd 6591 u32 depth = env->subprog_info[i].stack_depth;
f4d7e40a
AS
6592
6593 verbose(env, "%d", depth);
f910cefa 6594 if (i + 1 < env->subprog_cnt)
f4d7e40a
AS
6595 verbose(env, "+");
6596 }
6597 verbose(env, "\n");
9c8105bd 6598 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
17a52670
AS
6599 return 0;
6600}
6601
56f668df
MKL
6602static int check_map_prealloc(struct bpf_map *map)
6603{
6604 return (map->map_type != BPF_MAP_TYPE_HASH &&
bcc6b1b7
MKL
6605 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
6606 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
56f668df
MKL
6607 !(map->map_flags & BPF_F_NO_PREALLOC);
6608}
6609
d83525ca
AS
6610static bool is_tracing_prog_type(enum bpf_prog_type type)
6611{
6612 switch (type) {
6613 case BPF_PROG_TYPE_KPROBE:
6614 case BPF_PROG_TYPE_TRACEPOINT:
6615 case BPF_PROG_TYPE_PERF_EVENT:
6616 case BPF_PROG_TYPE_RAW_TRACEPOINT:
6617 return true;
6618 default:
6619 return false;
6620 }
6621}
6622
61bd5218
JK
6623static int check_map_prog_compatibility(struct bpf_verifier_env *env,
6624 struct bpf_map *map,
fdc15d38
AS
6625 struct bpf_prog *prog)
6626
6627{
56f668df
MKL
6628 /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
6629 * preallocated hash maps, since doing memory allocation
6630 * in overflow_handler can crash depending on where nmi got
6631 * triggered.
6632 */
6633 if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
6634 if (!check_map_prealloc(map)) {
61bd5218 6635 verbose(env, "perf_event programs can only use preallocated hash map\n");
56f668df
MKL
6636 return -EINVAL;
6637 }
6638 if (map->inner_map_meta &&
6639 !check_map_prealloc(map->inner_map_meta)) {
61bd5218 6640 verbose(env, "perf_event programs can only use preallocated inner hash map\n");
56f668df
MKL
6641 return -EINVAL;
6642 }
fdc15d38 6643 }
a3884572 6644
d83525ca
AS
6645 if ((is_tracing_prog_type(prog->type) ||
6646 prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
6647 map_value_has_spin_lock(map)) {
6648 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
6649 return -EINVAL;
6650 }
6651
a3884572 6652 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
09728266 6653 !bpf_offload_prog_map_match(prog, map)) {
a3884572
JK
6654 verbose(env, "offload device mismatch between prog and map\n");
6655 return -EINVAL;
6656 }
6657
fdc15d38
AS
6658 return 0;
6659}
6660
b741f163
RG
6661static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
6662{
6663 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
6664 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
6665}
6666
0246e64d
AS
6667/* look for pseudo eBPF instructions that access map FDs and
6668 * replace them with actual map pointers
6669 */
58e2af8b 6670static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
0246e64d
AS
6671{
6672 struct bpf_insn *insn = env->prog->insnsi;
6673 int insn_cnt = env->prog->len;
fdc15d38 6674 int i, j, err;
0246e64d 6675
f1f7714e 6676 err = bpf_prog_calc_tag(env->prog);
aafe6ae9
DB
6677 if (err)
6678 return err;
6679
0246e64d 6680 for (i = 0; i < insn_cnt; i++, insn++) {
9bac3d6d 6681 if (BPF_CLASS(insn->code) == BPF_LDX &&
d691f9e8 6682 (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
61bd5218 6683 verbose(env, "BPF_LDX uses reserved fields\n");
9bac3d6d
AS
6684 return -EINVAL;
6685 }
6686
d691f9e8
AS
6687 if (BPF_CLASS(insn->code) == BPF_STX &&
6688 ((BPF_MODE(insn->code) != BPF_MEM &&
6689 BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
61bd5218 6690 verbose(env, "BPF_STX uses reserved fields\n");
d691f9e8
AS
6691 return -EINVAL;
6692 }
6693
0246e64d
AS
6694 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
6695 struct bpf_map *map;
6696 struct fd f;
6697
6698 if (i == insn_cnt - 1 || insn[1].code != 0 ||
6699 insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
6700 insn[1].off != 0) {
61bd5218 6701 verbose(env, "invalid bpf_ld_imm64 insn\n");
0246e64d
AS
6702 return -EINVAL;
6703 }
6704
6705 if (insn->src_reg == 0)
6706 /* valid generic load 64-bit imm */
6707 goto next_insn;
6708
20182390
DB
6709 if (insn[0].src_reg != BPF_PSEUDO_MAP_FD ||
6710 insn[1].imm != 0) {
6711 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
0246e64d
AS
6712 return -EINVAL;
6713 }
6714
20182390 6715 f = fdget(insn[0].imm);
c2101297 6716 map = __bpf_map_get(f);
0246e64d 6717 if (IS_ERR(map)) {
61bd5218 6718 verbose(env, "fd %d is not pointing to valid bpf_map\n",
20182390 6719 insn[0].imm);
0246e64d
AS
6720 return PTR_ERR(map);
6721 }
6722
61bd5218 6723 err = check_map_prog_compatibility(env, map, env->prog);
fdc15d38
AS
6724 if (err) {
6725 fdput(f);
6726 return err;
6727 }
6728
0246e64d
AS
6729 /* store map pointer inside BPF_LD_IMM64 instruction */
6730 insn[0].imm = (u32) (unsigned long) map;
6731 insn[1].imm = ((u64) (unsigned long) map) >> 32;
6732
6733 /* check whether we recorded this map already */
6734 for (j = 0; j < env->used_map_cnt; j++)
6735 if (env->used_maps[j] == map) {
6736 fdput(f);
6737 goto next_insn;
6738 }
6739
6740 if (env->used_map_cnt >= MAX_USED_MAPS) {
6741 fdput(f);
6742 return -E2BIG;
6743 }
6744
0246e64d
AS
6745 /* hold the map. If the program is rejected by verifier,
6746 * the map will be released by release_maps() or it
6747 * will be used by the valid program until it's unloaded
ab7f5bf0 6748 * and all maps are released in free_used_maps()
0246e64d 6749 */
92117d84
AS
6750 map = bpf_map_inc(map, false);
6751 if (IS_ERR(map)) {
6752 fdput(f);
6753 return PTR_ERR(map);
6754 }
6755 env->used_maps[env->used_map_cnt++] = map;
6756
b741f163 6757 if (bpf_map_is_cgroup_storage(map) &&
de9cbbaa 6758 bpf_cgroup_storage_assign(env->prog, map)) {
b741f163 6759 verbose(env, "only one cgroup storage of each type is allowed\n");
de9cbbaa
RG
6760 fdput(f);
6761 return -EBUSY;
6762 }
6763
0246e64d
AS
6764 fdput(f);
6765next_insn:
6766 insn++;
6767 i++;
5e581dad
DB
6768 continue;
6769 }
6770
6771 /* Basic sanity check before we invest more work here. */
6772 if (!bpf_opcode_in_insntable(insn->code)) {
6773 verbose(env, "unknown opcode %02x\n", insn->code);
6774 return -EINVAL;
0246e64d
AS
6775 }
6776 }
6777
6778 /* now all pseudo BPF_LD_IMM64 instructions load valid
6779 * 'struct bpf_map *' into a register instead of user map_fd.
6780 * These pointers will be used later by verifier to validate map access.
6781 */
6782 return 0;
6783}
6784
6785/* drop refcnt of maps used by the rejected program */
58e2af8b 6786static void release_maps(struct bpf_verifier_env *env)
0246e64d 6787{
8bad74f9 6788 enum bpf_cgroup_storage_type stype;
0246e64d
AS
6789 int i;
6790
8bad74f9
RG
6791 for_each_cgroup_storage_type(stype) {
6792 if (!env->prog->aux->cgroup_storage[stype])
6793 continue;
de9cbbaa 6794 bpf_cgroup_storage_release(env->prog,
8bad74f9
RG
6795 env->prog->aux->cgroup_storage[stype]);
6796 }
de9cbbaa 6797
0246e64d
AS
6798 for (i = 0; i < env->used_map_cnt; i++)
6799 bpf_map_put(env->used_maps[i]);
6800}
6801
6802/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
58e2af8b 6803static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
0246e64d
AS
6804{
6805 struct bpf_insn *insn = env->prog->insnsi;
6806 int insn_cnt = env->prog->len;
6807 int i;
6808
6809 for (i = 0; i < insn_cnt; i++, insn++)
6810 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
6811 insn->src_reg = 0;
6812}
6813
8041902d
AS
6814/* single env->prog->insni[off] instruction was replaced with the range
6815 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
6816 * [0, off) and [off, end) to new locations, so the patched range stays zero
6817 */
6818static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
6819 u32 off, u32 cnt)
6820{
6821 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
c131187d 6822 int i;
8041902d
AS
6823
6824 if (cnt == 1)
6825 return 0;
fad953ce
KC
6826 new_data = vzalloc(array_size(prog_len,
6827 sizeof(struct bpf_insn_aux_data)));
8041902d
AS
6828 if (!new_data)
6829 return -ENOMEM;
6830 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
6831 memcpy(new_data + off + cnt - 1, old_data + off,
6832 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
c131187d
AS
6833 for (i = off; i < off + cnt - 1; i++)
6834 new_data[i].seen = true;
8041902d
AS
6835 env->insn_aux_data = new_data;
6836 vfree(old_data);
6837 return 0;
6838}
6839
cc8b0b92
AS
6840static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
6841{
6842 int i;
6843
6844 if (len == 1)
6845 return;
4cb3d99c
JW
6846 /* NOTE: fake 'exit' subprog should be updated as well. */
6847 for (i = 0; i <= env->subprog_cnt; i++) {
afd59424 6848 if (env->subprog_info[i].start <= off)
cc8b0b92 6849 continue;
9c8105bd 6850 env->subprog_info[i].start += len - 1;
cc8b0b92
AS
6851 }
6852}
6853
8041902d
AS
6854static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
6855 const struct bpf_insn *patch, u32 len)
6856{
6857 struct bpf_prog *new_prog;
6858
6859 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
6860 if (!new_prog)
6861 return NULL;
6862 if (adjust_insn_aux_data(env, new_prog->len, off, len))
6863 return NULL;
cc8b0b92 6864 adjust_subprog_starts(env, off, len);
8041902d
AS
6865 return new_prog;
6866}
6867
52875a04
JK
6868static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
6869 u32 off, u32 cnt)
6870{
6871 int i, j;
6872
6873 /* find first prog starting at or after off (first to remove) */
6874 for (i = 0; i < env->subprog_cnt; i++)
6875 if (env->subprog_info[i].start >= off)
6876 break;
6877 /* find first prog starting at or after off + cnt (first to stay) */
6878 for (j = i; j < env->subprog_cnt; j++)
6879 if (env->subprog_info[j].start >= off + cnt)
6880 break;
6881 /* if j doesn't start exactly at off + cnt, we are just removing
6882 * the front of previous prog
6883 */
6884 if (env->subprog_info[j].start != off + cnt)
6885 j--;
6886
6887 if (j > i) {
6888 struct bpf_prog_aux *aux = env->prog->aux;
6889 int move;
6890
6891 /* move fake 'exit' subprog as well */
6892 move = env->subprog_cnt + 1 - j;
6893
6894 memmove(env->subprog_info + i,
6895 env->subprog_info + j,
6896 sizeof(*env->subprog_info) * move);
6897 env->subprog_cnt -= j - i;
6898
6899 /* remove func_info */
6900 if (aux->func_info) {
6901 move = aux->func_info_cnt - j;
6902
6903 memmove(aux->func_info + i,
6904 aux->func_info + j,
6905 sizeof(*aux->func_info) * move);
6906 aux->func_info_cnt -= j - i;
6907 /* func_info->insn_off is set after all code rewrites,
6908 * in adjust_btf_func() - no need to adjust
6909 */
6910 }
6911 } else {
6912 /* convert i from "first prog to remove" to "first to adjust" */
6913 if (env->subprog_info[i].start == off)
6914 i++;
6915 }
6916
6917 /* update fake 'exit' subprog as well */
6918 for (; i <= env->subprog_cnt; i++)
6919 env->subprog_info[i].start -= cnt;
6920
6921 return 0;
6922}
6923
6924static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
6925 u32 cnt)
6926{
6927 struct bpf_prog *prog = env->prog;
6928 u32 i, l_off, l_cnt, nr_linfo;
6929 struct bpf_line_info *linfo;
6930
6931 nr_linfo = prog->aux->nr_linfo;
6932 if (!nr_linfo)
6933 return 0;
6934
6935 linfo = prog->aux->linfo;
6936
6937 /* find first line info to remove, count lines to be removed */
6938 for (i = 0; i < nr_linfo; i++)
6939 if (linfo[i].insn_off >= off)
6940 break;
6941
6942 l_off = i;
6943 l_cnt = 0;
6944 for (; i < nr_linfo; i++)
6945 if (linfo[i].insn_off < off + cnt)
6946 l_cnt++;
6947 else
6948 break;
6949
6950 /* First live insn doesn't match first live linfo, it needs to "inherit"
6951 * last removed linfo. prog is already modified, so prog->len == off
6952 * means no live instructions after (tail of the program was removed).
6953 */
6954 if (prog->len != off && l_cnt &&
6955 (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
6956 l_cnt--;
6957 linfo[--i].insn_off = off + cnt;
6958 }
6959
6960 /* remove the line info which refer to the removed instructions */
6961 if (l_cnt) {
6962 memmove(linfo + l_off, linfo + i,
6963 sizeof(*linfo) * (nr_linfo - i));
6964
6965 prog->aux->nr_linfo -= l_cnt;
6966 nr_linfo = prog->aux->nr_linfo;
6967 }
6968
6969 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
6970 for (i = l_off; i < nr_linfo; i++)
6971 linfo[i].insn_off -= cnt;
6972
6973 /* fix up all subprogs (incl. 'exit') which start >= off */
6974 for (i = 0; i <= env->subprog_cnt; i++)
6975 if (env->subprog_info[i].linfo_idx > l_off) {
6976 /* program may have started in the removed region but
6977 * may not be fully removed
6978 */
6979 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
6980 env->subprog_info[i].linfo_idx -= l_cnt;
6981 else
6982 env->subprog_info[i].linfo_idx = l_off;
6983 }
6984
6985 return 0;
6986}
6987
6988static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
6989{
6990 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
6991 unsigned int orig_prog_len = env->prog->len;
6992 int err;
6993
08ca90af
JK
6994 if (bpf_prog_is_dev_bound(env->prog->aux))
6995 bpf_prog_offload_remove_insns(env, off, cnt);
6996
52875a04
JK
6997 err = bpf_remove_insns(env->prog, off, cnt);
6998 if (err)
6999 return err;
7000
7001 err = adjust_subprog_starts_after_remove(env, off, cnt);
7002 if (err)
7003 return err;
7004
7005 err = bpf_adj_linfo_after_remove(env, off, cnt);
7006 if (err)
7007 return err;
7008
7009 memmove(aux_data + off, aux_data + off + cnt,
7010 sizeof(*aux_data) * (orig_prog_len - off - cnt));
7011
7012 return 0;
7013}
7014
2a5418a1
DB
7015/* The verifier does more data flow analysis than llvm and will not
7016 * explore branches that are dead at run time. Malicious programs can
7017 * have dead code too. Therefore replace all dead at-run-time code
7018 * with 'ja -1'.
7019 *
7020 * Just nops are not optimal, e.g. if they would sit at the end of the
7021 * program and through another bug we would manage to jump there, then
7022 * we'd execute beyond program memory otherwise. Returning exception
7023 * code also wouldn't work since we can have subprogs where the dead
7024 * code could be located.
c131187d
AS
7025 */
7026static void sanitize_dead_code(struct bpf_verifier_env *env)
7027{
7028 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
2a5418a1 7029 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
c131187d
AS
7030 struct bpf_insn *insn = env->prog->insnsi;
7031 const int insn_cnt = env->prog->len;
7032 int i;
7033
7034 for (i = 0; i < insn_cnt; i++) {
7035 if (aux_data[i].seen)
7036 continue;
2a5418a1 7037 memcpy(insn + i, &trap, sizeof(trap));
c131187d
AS
7038 }
7039}
7040
e2ae4ca2
JK
7041static bool insn_is_cond_jump(u8 code)
7042{
7043 u8 op;
7044
092ed096
JW
7045 if (BPF_CLASS(code) == BPF_JMP32)
7046 return true;
7047
e2ae4ca2
JK
7048 if (BPF_CLASS(code) != BPF_JMP)
7049 return false;
7050
7051 op = BPF_OP(code);
7052 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
7053}
7054
7055static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
7056{
7057 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
7058 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
7059 struct bpf_insn *insn = env->prog->insnsi;
7060 const int insn_cnt = env->prog->len;
7061 int i;
7062
7063 for (i = 0; i < insn_cnt; i++, insn++) {
7064 if (!insn_is_cond_jump(insn->code))
7065 continue;
7066
7067 if (!aux_data[i + 1].seen)
7068 ja.off = insn->off;
7069 else if (!aux_data[i + 1 + insn->off].seen)
7070 ja.off = 0;
7071 else
7072 continue;
7073
08ca90af
JK
7074 if (bpf_prog_is_dev_bound(env->prog->aux))
7075 bpf_prog_offload_replace_insn(env, i, &ja);
7076
e2ae4ca2
JK
7077 memcpy(insn, &ja, sizeof(ja));
7078 }
7079}
7080
52875a04
JK
7081static int opt_remove_dead_code(struct bpf_verifier_env *env)
7082{
7083 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
7084 int insn_cnt = env->prog->len;
7085 int i, err;
7086
7087 for (i = 0; i < insn_cnt; i++) {
7088 int j;
7089
7090 j = 0;
7091 while (i + j < insn_cnt && !aux_data[i + j].seen)
7092 j++;
7093 if (!j)
7094 continue;
7095
7096 err = verifier_remove_insns(env, i, j);
7097 if (err)
7098 return err;
7099 insn_cnt = env->prog->len;
7100 }
7101
7102 return 0;
7103}
7104
a1b14abc
JK
7105static int opt_remove_nops(struct bpf_verifier_env *env)
7106{
7107 const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
7108 struct bpf_insn *insn = env->prog->insnsi;
7109 int insn_cnt = env->prog->len;
7110 int i, err;
7111
7112 for (i = 0; i < insn_cnt; i++) {
7113 if (memcmp(&insn[i], &ja, sizeof(ja)))
7114 continue;
7115
7116 err = verifier_remove_insns(env, i, 1);
7117 if (err)
7118 return err;
7119 insn_cnt--;
7120 i--;
7121 }
7122
7123 return 0;
7124}
7125
c64b7983
JS
7126/* convert load instructions that access fields of a context type into a
7127 * sequence of instructions that access fields of the underlying structure:
7128 * struct __sk_buff -> struct sk_buff
7129 * struct bpf_sock_ops -> struct sock
9bac3d6d 7130 */
58e2af8b 7131static int convert_ctx_accesses(struct bpf_verifier_env *env)
9bac3d6d 7132{
00176a34 7133 const struct bpf_verifier_ops *ops = env->ops;
f96da094 7134 int i, cnt, size, ctx_field_size, delta = 0;
3df126f3 7135 const int insn_cnt = env->prog->len;
36bbef52 7136 struct bpf_insn insn_buf[16], *insn;
46f53a65 7137 u32 target_size, size_default, off;
9bac3d6d 7138 struct bpf_prog *new_prog;
d691f9e8 7139 enum bpf_access_type type;
f96da094 7140 bool is_narrower_load;
9bac3d6d 7141
b09928b9
DB
7142 if (ops->gen_prologue || env->seen_direct_write) {
7143 if (!ops->gen_prologue) {
7144 verbose(env, "bpf verifier is misconfigured\n");
7145 return -EINVAL;
7146 }
36bbef52
DB
7147 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
7148 env->prog);
7149 if (cnt >= ARRAY_SIZE(insn_buf)) {
61bd5218 7150 verbose(env, "bpf verifier is misconfigured\n");
36bbef52
DB
7151 return -EINVAL;
7152 } else if (cnt) {
8041902d 7153 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
36bbef52
DB
7154 if (!new_prog)
7155 return -ENOMEM;
8041902d 7156
36bbef52 7157 env->prog = new_prog;
3df126f3 7158 delta += cnt - 1;
36bbef52
DB
7159 }
7160 }
7161
c64b7983 7162 if (bpf_prog_is_dev_bound(env->prog->aux))
9bac3d6d
AS
7163 return 0;
7164
3df126f3 7165 insn = env->prog->insnsi + delta;
36bbef52 7166
9bac3d6d 7167 for (i = 0; i < insn_cnt; i++, insn++) {
c64b7983
JS
7168 bpf_convert_ctx_access_t convert_ctx_access;
7169
62c7989b
DB
7170 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
7171 insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
7172 insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
ea2e7ce5 7173 insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
d691f9e8 7174 type = BPF_READ;
62c7989b
DB
7175 else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
7176 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
7177 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
ea2e7ce5 7178 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
d691f9e8
AS
7179 type = BPF_WRITE;
7180 else
9bac3d6d
AS
7181 continue;
7182
af86ca4e
AS
7183 if (type == BPF_WRITE &&
7184 env->insn_aux_data[i + delta].sanitize_stack_off) {
7185 struct bpf_insn patch[] = {
7186 /* Sanitize suspicious stack slot with zero.
7187 * There are no memory dependencies for this store,
7188 * since it's only using frame pointer and immediate
7189 * constant of zero
7190 */
7191 BPF_ST_MEM(BPF_DW, BPF_REG_FP,
7192 env->insn_aux_data[i + delta].sanitize_stack_off,
7193 0),
7194 /* the original STX instruction will immediately
7195 * overwrite the same stack slot with appropriate value
7196 */
7197 *insn,
7198 };
7199
7200 cnt = ARRAY_SIZE(patch);
7201 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
7202 if (!new_prog)
7203 return -ENOMEM;
7204
7205 delta += cnt - 1;
7206 env->prog = new_prog;
7207 insn = new_prog->insnsi + i + delta;
7208 continue;
7209 }
7210
c64b7983
JS
7211 switch (env->insn_aux_data[i + delta].ptr_type) {
7212 case PTR_TO_CTX:
7213 if (!ops->convert_ctx_access)
7214 continue;
7215 convert_ctx_access = ops->convert_ctx_access;
7216 break;
7217 case PTR_TO_SOCKET:
46f8bc92 7218 case PTR_TO_SOCK_COMMON:
c64b7983
JS
7219 convert_ctx_access = bpf_sock_convert_ctx_access;
7220 break;
655a51e5
MKL
7221 case PTR_TO_TCP_SOCK:
7222 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
7223 break;
c64b7983 7224 default:
9bac3d6d 7225 continue;
c64b7983 7226 }
9bac3d6d 7227
31fd8581 7228 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
f96da094 7229 size = BPF_LDST_BYTES(insn);
31fd8581
YS
7230
7231 /* If the read access is a narrower load of the field,
7232 * convert to a 4/8-byte load, to minimum program type specific
7233 * convert_ctx_access changes. If conversion is successful,
7234 * we will apply proper mask to the result.
7235 */
f96da094 7236 is_narrower_load = size < ctx_field_size;
46f53a65
AI
7237 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
7238 off = insn->off;
31fd8581 7239 if (is_narrower_load) {
f96da094
DB
7240 u8 size_code;
7241
7242 if (type == BPF_WRITE) {
61bd5218 7243 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
f96da094
DB
7244 return -EINVAL;
7245 }
31fd8581 7246
f96da094 7247 size_code = BPF_H;
31fd8581
YS
7248 if (ctx_field_size == 4)
7249 size_code = BPF_W;
7250 else if (ctx_field_size == 8)
7251 size_code = BPF_DW;
f96da094 7252
bc23105c 7253 insn->off = off & ~(size_default - 1);
31fd8581
YS
7254 insn->code = BPF_LDX | BPF_MEM | size_code;
7255 }
f96da094
DB
7256
7257 target_size = 0;
c64b7983
JS
7258 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
7259 &target_size);
f96da094
DB
7260 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
7261 (ctx_field_size && !target_size)) {
61bd5218 7262 verbose(env, "bpf verifier is misconfigured\n");
9bac3d6d
AS
7263 return -EINVAL;
7264 }
f96da094
DB
7265
7266 if (is_narrower_load && size < target_size) {
46f53a65
AI
7267 u8 shift = (off & (size_default - 1)) * 8;
7268
7269 if (ctx_field_size <= 4) {
7270 if (shift)
7271 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
7272 insn->dst_reg,
7273 shift);
31fd8581 7274 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
f96da094 7275 (1 << size * 8) - 1);
46f53a65
AI
7276 } else {
7277 if (shift)
7278 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
7279 insn->dst_reg,
7280 shift);
31fd8581 7281 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
f96da094 7282 (1 << size * 8) - 1);
46f53a65 7283 }
31fd8581 7284 }
9bac3d6d 7285
8041902d 7286 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9bac3d6d
AS
7287 if (!new_prog)
7288 return -ENOMEM;
7289
3df126f3 7290 delta += cnt - 1;
9bac3d6d
AS
7291
7292 /* keep walking new program and skip insns we just inserted */
7293 env->prog = new_prog;
3df126f3 7294 insn = new_prog->insnsi + i + delta;
9bac3d6d
AS
7295 }
7296
7297 return 0;
7298}
7299
1c2a088a
AS
7300static int jit_subprogs(struct bpf_verifier_env *env)
7301{
7302 struct bpf_prog *prog = env->prog, **func, *tmp;
7303 int i, j, subprog_start, subprog_end = 0, len, subprog;
7105e828 7304 struct bpf_insn *insn;
1c2a088a 7305 void *old_bpf_func;
c454a46b 7306 int err;
1c2a088a 7307
f910cefa 7308 if (env->subprog_cnt <= 1)
1c2a088a
AS
7309 return 0;
7310
7105e828 7311 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
1c2a088a
AS
7312 if (insn->code != (BPF_JMP | BPF_CALL) ||
7313 insn->src_reg != BPF_PSEUDO_CALL)
7314 continue;
c7a89784
DB
7315 /* Upon error here we cannot fall back to interpreter but
7316 * need a hard reject of the program. Thus -EFAULT is
7317 * propagated in any case.
7318 */
1c2a088a
AS
7319 subprog = find_subprog(env, i + insn->imm + 1);
7320 if (subprog < 0) {
7321 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
7322 i + insn->imm + 1);
7323 return -EFAULT;
7324 }
7325 /* temporarily remember subprog id inside insn instead of
7326 * aux_data, since next loop will split up all insns into funcs
7327 */
f910cefa 7328 insn->off = subprog;
1c2a088a
AS
7329 /* remember original imm in case JIT fails and fallback
7330 * to interpreter will be needed
7331 */
7332 env->insn_aux_data[i].call_imm = insn->imm;
7333 /* point imm to __bpf_call_base+1 from JITs point of view */
7334 insn->imm = 1;
7335 }
7336
c454a46b
MKL
7337 err = bpf_prog_alloc_jited_linfo(prog);
7338 if (err)
7339 goto out_undo_insn;
7340
7341 err = -ENOMEM;
6396bb22 7342 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
1c2a088a 7343 if (!func)
c7a89784 7344 goto out_undo_insn;
1c2a088a 7345
f910cefa 7346 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a 7347 subprog_start = subprog_end;
4cb3d99c 7348 subprog_end = env->subprog_info[i + 1].start;
1c2a088a
AS
7349
7350 len = subprog_end - subprog_start;
492ecee8
AS
7351 /* BPF_PROG_RUN doesn't call subprogs directly,
7352 * hence main prog stats include the runtime of subprogs.
7353 * subprogs don't have IDs and not reachable via prog_get_next_id
7354 * func[i]->aux->stats will never be accessed and stays NULL
7355 */
7356 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
1c2a088a
AS
7357 if (!func[i])
7358 goto out_free;
7359 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
7360 len * sizeof(struct bpf_insn));
4f74d809 7361 func[i]->type = prog->type;
1c2a088a 7362 func[i]->len = len;
4f74d809
DB
7363 if (bpf_prog_calc_tag(func[i]))
7364 goto out_free;
1c2a088a 7365 func[i]->is_func = 1;
ba64e7d8
YS
7366 func[i]->aux->func_idx = i;
7367 /* the btf and func_info will be freed only at prog->aux */
7368 func[i]->aux->btf = prog->aux->btf;
7369 func[i]->aux->func_info = prog->aux->func_info;
7370
1c2a088a
AS
7371 /* Use bpf_prog_F_tag to indicate functions in stack traces.
7372 * Long term would need debug info to populate names
7373 */
7374 func[i]->aux->name[0] = 'F';
9c8105bd 7375 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
1c2a088a 7376 func[i]->jit_requested = 1;
c454a46b
MKL
7377 func[i]->aux->linfo = prog->aux->linfo;
7378 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
7379 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
7380 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
1c2a088a
AS
7381 func[i] = bpf_int_jit_compile(func[i]);
7382 if (!func[i]->jited) {
7383 err = -ENOTSUPP;
7384 goto out_free;
7385 }
7386 cond_resched();
7387 }
7388 /* at this point all bpf functions were successfully JITed
7389 * now populate all bpf_calls with correct addresses and
7390 * run last pass of JIT
7391 */
f910cefa 7392 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
7393 insn = func[i]->insnsi;
7394 for (j = 0; j < func[i]->len; j++, insn++) {
7395 if (insn->code != (BPF_JMP | BPF_CALL) ||
7396 insn->src_reg != BPF_PSEUDO_CALL)
7397 continue;
7398 subprog = insn->off;
1c2a088a
AS
7399 insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
7400 func[subprog]->bpf_func -
7401 __bpf_call_base;
7402 }
2162fed4
SD
7403
7404 /* we use the aux data to keep a list of the start addresses
7405 * of the JITed images for each function in the program
7406 *
7407 * for some architectures, such as powerpc64, the imm field
7408 * might not be large enough to hold the offset of the start
7409 * address of the callee's JITed image from __bpf_call_base
7410 *
7411 * in such cases, we can lookup the start address of a callee
7412 * by using its subprog id, available from the off field of
7413 * the call instruction, as an index for this list
7414 */
7415 func[i]->aux->func = func;
7416 func[i]->aux->func_cnt = env->subprog_cnt;
1c2a088a 7417 }
f910cefa 7418 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
7419 old_bpf_func = func[i]->bpf_func;
7420 tmp = bpf_int_jit_compile(func[i]);
7421 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
7422 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
c7a89784 7423 err = -ENOTSUPP;
1c2a088a
AS
7424 goto out_free;
7425 }
7426 cond_resched();
7427 }
7428
7429 /* finally lock prog and jit images for all functions and
7430 * populate kallsysm
7431 */
f910cefa 7432 for (i = 0; i < env->subprog_cnt; i++) {
1c2a088a
AS
7433 bpf_prog_lock_ro(func[i]);
7434 bpf_prog_kallsyms_add(func[i]);
7435 }
7105e828
DB
7436
7437 /* Last step: make now unused interpreter insns from main
7438 * prog consistent for later dump requests, so they can
7439 * later look the same as if they were interpreted only.
7440 */
7441 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7105e828
DB
7442 if (insn->code != (BPF_JMP | BPF_CALL) ||
7443 insn->src_reg != BPF_PSEUDO_CALL)
7444 continue;
7445 insn->off = env->insn_aux_data[i].call_imm;
7446 subprog = find_subprog(env, i + insn->off + 1);
dbecd738 7447 insn->imm = subprog;
7105e828
DB
7448 }
7449
1c2a088a
AS
7450 prog->jited = 1;
7451 prog->bpf_func = func[0]->bpf_func;
7452 prog->aux->func = func;
f910cefa 7453 prog->aux->func_cnt = env->subprog_cnt;
c454a46b 7454 bpf_prog_free_unused_jited_linfo(prog);
1c2a088a
AS
7455 return 0;
7456out_free:
f910cefa 7457 for (i = 0; i < env->subprog_cnt; i++)
1c2a088a
AS
7458 if (func[i])
7459 bpf_jit_free(func[i]);
7460 kfree(func);
c7a89784 7461out_undo_insn:
1c2a088a
AS
7462 /* cleanup main prog to be interpreted */
7463 prog->jit_requested = 0;
7464 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
7465 if (insn->code != (BPF_JMP | BPF_CALL) ||
7466 insn->src_reg != BPF_PSEUDO_CALL)
7467 continue;
7468 insn->off = 0;
7469 insn->imm = env->insn_aux_data[i].call_imm;
7470 }
c454a46b 7471 bpf_prog_free_jited_linfo(prog);
1c2a088a
AS
7472 return err;
7473}
7474
1ea47e01
AS
7475static int fixup_call_args(struct bpf_verifier_env *env)
7476{
19d28fbd 7477#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
7478 struct bpf_prog *prog = env->prog;
7479 struct bpf_insn *insn = prog->insnsi;
7480 int i, depth;
19d28fbd 7481#endif
e4052d06 7482 int err = 0;
1ea47e01 7483
e4052d06
QM
7484 if (env->prog->jit_requested &&
7485 !bpf_prog_is_dev_bound(env->prog->aux)) {
19d28fbd
DM
7486 err = jit_subprogs(env);
7487 if (err == 0)
1c2a088a 7488 return 0;
c7a89784
DB
7489 if (err == -EFAULT)
7490 return err;
19d28fbd
DM
7491 }
7492#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1ea47e01
AS
7493 for (i = 0; i < prog->len; i++, insn++) {
7494 if (insn->code != (BPF_JMP | BPF_CALL) ||
7495 insn->src_reg != BPF_PSEUDO_CALL)
7496 continue;
7497 depth = get_callee_stack_depth(env, insn, i);
7498 if (depth < 0)
7499 return depth;
7500 bpf_patch_call_args(insn, depth);
7501 }
19d28fbd
DM
7502 err = 0;
7503#endif
7504 return err;
1ea47e01
AS
7505}
7506
79741b3b 7507/* fixup insn->imm field of bpf_call instructions
81ed18ab 7508 * and inline eligible helpers as explicit sequence of BPF instructions
e245c5c6
AS
7509 *
7510 * this function is called after eBPF program passed verification
7511 */
79741b3b 7512static int fixup_bpf_calls(struct bpf_verifier_env *env)
e245c5c6 7513{
79741b3b
AS
7514 struct bpf_prog *prog = env->prog;
7515 struct bpf_insn *insn = prog->insnsi;
e245c5c6 7516 const struct bpf_func_proto *fn;
79741b3b 7517 const int insn_cnt = prog->len;
09772d92 7518 const struct bpf_map_ops *ops;
c93552c4 7519 struct bpf_insn_aux_data *aux;
81ed18ab
AS
7520 struct bpf_insn insn_buf[16];
7521 struct bpf_prog *new_prog;
7522 struct bpf_map *map_ptr;
7523 int i, cnt, delta = 0;
e245c5c6 7524
79741b3b 7525 for (i = 0; i < insn_cnt; i++, insn++) {
f6b1b3bf
DB
7526 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
7527 insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
7528 insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
68fda450 7529 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
f6b1b3bf
DB
7530 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
7531 struct bpf_insn mask_and_div[] = {
7532 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
7533 /* Rx div 0 -> 0 */
7534 BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
7535 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
7536 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
7537 *insn,
7538 };
7539 struct bpf_insn mask_and_mod[] = {
7540 BPF_MOV32_REG(insn->src_reg, insn->src_reg),
7541 /* Rx mod 0 -> Rx */
7542 BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
7543 *insn,
7544 };
7545 struct bpf_insn *patchlet;
7546
7547 if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
7548 insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
7549 patchlet = mask_and_div + (is64 ? 1 : 0);
7550 cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
7551 } else {
7552 patchlet = mask_and_mod + (is64 ? 1 : 0);
7553 cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
7554 }
7555
7556 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
68fda450
AS
7557 if (!new_prog)
7558 return -ENOMEM;
7559
7560 delta += cnt - 1;
7561 env->prog = prog = new_prog;
7562 insn = new_prog->insnsi + i + delta;
7563 continue;
7564 }
7565
e0cea7ce
DB
7566 if (BPF_CLASS(insn->code) == BPF_LD &&
7567 (BPF_MODE(insn->code) == BPF_ABS ||
7568 BPF_MODE(insn->code) == BPF_IND)) {
7569 cnt = env->ops->gen_ld_abs(insn, insn_buf);
7570 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
7571 verbose(env, "bpf verifier is misconfigured\n");
7572 return -EINVAL;
7573 }
7574
7575 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7576 if (!new_prog)
7577 return -ENOMEM;
7578
7579 delta += cnt - 1;
7580 env->prog = prog = new_prog;
7581 insn = new_prog->insnsi + i + delta;
7582 continue;
7583 }
7584
979d63d5
DB
7585 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
7586 insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
7587 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
7588 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
7589 struct bpf_insn insn_buf[16];
7590 struct bpf_insn *patch = &insn_buf[0];
7591 bool issrc, isneg;
7592 u32 off_reg;
7593
7594 aux = &env->insn_aux_data[i + delta];
3612af78
DB
7595 if (!aux->alu_state ||
7596 aux->alu_state == BPF_ALU_NON_POINTER)
979d63d5
DB
7597 continue;
7598
7599 isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
7600 issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
7601 BPF_ALU_SANITIZE_SRC;
7602
7603 off_reg = issrc ? insn->src_reg : insn->dst_reg;
7604 if (isneg)
7605 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
7606 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
7607 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
7608 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
7609 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
7610 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
7611 if (issrc) {
7612 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
7613 off_reg);
7614 insn->src_reg = BPF_REG_AX;
7615 } else {
7616 *patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
7617 BPF_REG_AX);
7618 }
7619 if (isneg)
7620 insn->code = insn->code == code_add ?
7621 code_sub : code_add;
7622 *patch++ = *insn;
7623 if (issrc && isneg)
7624 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
7625 cnt = patch - insn_buf;
7626
7627 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7628 if (!new_prog)
7629 return -ENOMEM;
7630
7631 delta += cnt - 1;
7632 env->prog = prog = new_prog;
7633 insn = new_prog->insnsi + i + delta;
7634 continue;
7635 }
7636
79741b3b
AS
7637 if (insn->code != (BPF_JMP | BPF_CALL))
7638 continue;
cc8b0b92
AS
7639 if (insn->src_reg == BPF_PSEUDO_CALL)
7640 continue;
e245c5c6 7641
79741b3b
AS
7642 if (insn->imm == BPF_FUNC_get_route_realm)
7643 prog->dst_needed = 1;
7644 if (insn->imm == BPF_FUNC_get_prandom_u32)
7645 bpf_user_rnd_init_once();
9802d865
JB
7646 if (insn->imm == BPF_FUNC_override_return)
7647 prog->kprobe_override = 1;
79741b3b 7648 if (insn->imm == BPF_FUNC_tail_call) {
7b9f6da1
DM
7649 /* If we tail call into other programs, we
7650 * cannot make any assumptions since they can
7651 * be replaced dynamically during runtime in
7652 * the program array.
7653 */
7654 prog->cb_access = 1;
80a58d02 7655 env->prog->aux->stack_depth = MAX_BPF_STACK;
e647815a 7656 env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
7b9f6da1 7657
79741b3b
AS
7658 /* mark bpf_tail_call as different opcode to avoid
7659 * conditional branch in the interpeter for every normal
7660 * call and to prevent accidental JITing by JIT compiler
7661 * that doesn't support bpf_tail_call yet
e245c5c6 7662 */
79741b3b 7663 insn->imm = 0;
71189fa9 7664 insn->code = BPF_JMP | BPF_TAIL_CALL;
b2157399 7665
c93552c4
DB
7666 aux = &env->insn_aux_data[i + delta];
7667 if (!bpf_map_ptr_unpriv(aux))
7668 continue;
7669
b2157399
AS
7670 /* instead of changing every JIT dealing with tail_call
7671 * emit two extra insns:
7672 * if (index >= max_entries) goto out;
7673 * index &= array->index_mask;
7674 * to avoid out-of-bounds cpu speculation
7675 */
c93552c4 7676 if (bpf_map_ptr_poisoned(aux)) {
40950343 7677 verbose(env, "tail_call abusing map_ptr\n");
b2157399
AS
7678 return -EINVAL;
7679 }
c93552c4
DB
7680
7681 map_ptr = BPF_MAP_PTR(aux->map_state);
b2157399
AS
7682 insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
7683 map_ptr->max_entries, 2);
7684 insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
7685 container_of(map_ptr,
7686 struct bpf_array,
7687 map)->index_mask);
7688 insn_buf[2] = *insn;
7689 cnt = 3;
7690 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
7691 if (!new_prog)
7692 return -ENOMEM;
7693
7694 delta += cnt - 1;
7695 env->prog = prog = new_prog;
7696 insn = new_prog->insnsi + i + delta;
79741b3b
AS
7697 continue;
7698 }
e245c5c6 7699
89c63074 7700 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
09772d92
DB
7701 * and other inlining handlers are currently limited to 64 bit
7702 * only.
89c63074 7703 */
60b58afc 7704 if (prog->jit_requested && BITS_PER_LONG == 64 &&
09772d92
DB
7705 (insn->imm == BPF_FUNC_map_lookup_elem ||
7706 insn->imm == BPF_FUNC_map_update_elem ||
84430d42
DB
7707 insn->imm == BPF_FUNC_map_delete_elem ||
7708 insn->imm == BPF_FUNC_map_push_elem ||
7709 insn->imm == BPF_FUNC_map_pop_elem ||
7710 insn->imm == BPF_FUNC_map_peek_elem)) {
c93552c4
DB
7711 aux = &env->insn_aux_data[i + delta];
7712 if (bpf_map_ptr_poisoned(aux))
7713 goto patch_call_imm;
7714
7715 map_ptr = BPF_MAP_PTR(aux->map_state);
09772d92
DB
7716 ops = map_ptr->ops;
7717 if (insn->imm == BPF_FUNC_map_lookup_elem &&
7718 ops->map_gen_lookup) {
7719 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
7720 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
7721 verbose(env, "bpf verifier is misconfigured\n");
7722 return -EINVAL;
7723 }
81ed18ab 7724
09772d92
DB
7725 new_prog = bpf_patch_insn_data(env, i + delta,
7726 insn_buf, cnt);
7727 if (!new_prog)
7728 return -ENOMEM;
81ed18ab 7729
09772d92
DB
7730 delta += cnt - 1;
7731 env->prog = prog = new_prog;
7732 insn = new_prog->insnsi + i + delta;
7733 continue;
7734 }
81ed18ab 7735
09772d92
DB
7736 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
7737 (void *(*)(struct bpf_map *map, void *key))NULL));
7738 BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
7739 (int (*)(struct bpf_map *map, void *key))NULL));
7740 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
7741 (int (*)(struct bpf_map *map, void *key, void *value,
7742 u64 flags))NULL));
84430d42
DB
7743 BUILD_BUG_ON(!__same_type(ops->map_push_elem,
7744 (int (*)(struct bpf_map *map, void *value,
7745 u64 flags))NULL));
7746 BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
7747 (int (*)(struct bpf_map *map, void *value))NULL));
7748 BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
7749 (int (*)(struct bpf_map *map, void *value))NULL));
7750
09772d92
DB
7751 switch (insn->imm) {
7752 case BPF_FUNC_map_lookup_elem:
7753 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
7754 __bpf_call_base;
7755 continue;
7756 case BPF_FUNC_map_update_elem:
7757 insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
7758 __bpf_call_base;
7759 continue;
7760 case BPF_FUNC_map_delete_elem:
7761 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
7762 __bpf_call_base;
7763 continue;
84430d42
DB
7764 case BPF_FUNC_map_push_elem:
7765 insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
7766 __bpf_call_base;
7767 continue;
7768 case BPF_FUNC_map_pop_elem:
7769 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
7770 __bpf_call_base;
7771 continue;
7772 case BPF_FUNC_map_peek_elem:
7773 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
7774 __bpf_call_base;
7775 continue;
09772d92 7776 }
81ed18ab 7777
09772d92 7778 goto patch_call_imm;
81ed18ab
AS
7779 }
7780
7781patch_call_imm:
5e43f899 7782 fn = env->ops->get_func_proto(insn->imm, env->prog);
79741b3b
AS
7783 /* all functions that have prototype and verifier allowed
7784 * programs to call them, must be real in-kernel functions
7785 */
7786 if (!fn->func) {
61bd5218
JK
7787 verbose(env,
7788 "kernel subsystem misconfigured func %s#%d\n",
79741b3b
AS
7789 func_id_name(insn->imm), insn->imm);
7790 return -EFAULT;
e245c5c6 7791 }
79741b3b 7792 insn->imm = fn->func - __bpf_call_base;
e245c5c6 7793 }
e245c5c6 7794
79741b3b
AS
7795 return 0;
7796}
e245c5c6 7797
58e2af8b 7798static void free_states(struct bpf_verifier_env *env)
f1bca824 7799{
58e2af8b 7800 struct bpf_verifier_state_list *sl, *sln;
f1bca824
AS
7801 int i;
7802
7803 if (!env->explored_states)
7804 return;
7805
7806 for (i = 0; i < env->prog->len; i++) {
7807 sl = env->explored_states[i];
7808
7809 if (sl)
7810 while (sl != STATE_LIST_MARK) {
7811 sln = sl->next;
1969db47 7812 free_verifier_state(&sl->state, false);
f1bca824
AS
7813 kfree(sl);
7814 sl = sln;
7815 }
7816 }
7817
7818 kfree(env->explored_states);
7819}
7820
838e9690
YS
7821int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
7822 union bpf_attr __user *uattr)
51580e79 7823{
58e2af8b 7824 struct bpf_verifier_env *env;
b9193c1b 7825 struct bpf_verifier_log *log;
9e4c24e7 7826 int i, len, ret = -EINVAL;
e2ae4ca2 7827 bool is_priv;
51580e79 7828
eba0c929
AB
7829 /* no program is valid */
7830 if (ARRAY_SIZE(bpf_verifier_ops) == 0)
7831 return -EINVAL;
7832
58e2af8b 7833 /* 'struct bpf_verifier_env' can be global, but since it's not small,
cbd35700
AS
7834 * allocate/free it every time bpf_check() is called
7835 */
58e2af8b 7836 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
cbd35700
AS
7837 if (!env)
7838 return -ENOMEM;
61bd5218 7839 log = &env->log;
cbd35700 7840
9e4c24e7 7841 len = (*prog)->len;
fad953ce 7842 env->insn_aux_data =
9e4c24e7 7843 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
3df126f3
JK
7844 ret = -ENOMEM;
7845 if (!env->insn_aux_data)
7846 goto err_free_env;
9e4c24e7
JK
7847 for (i = 0; i < len; i++)
7848 env->insn_aux_data[i].orig_idx = i;
9bac3d6d 7849 env->prog = *prog;
00176a34 7850 env->ops = bpf_verifier_ops[env->prog->type];
0246e64d 7851
cbd35700
AS
7852 /* grab the mutex to protect few globals used by verifier */
7853 mutex_lock(&bpf_verifier_lock);
7854
7855 if (attr->log_level || attr->log_buf || attr->log_size) {
7856 /* user requested verbose verifier output
7857 * and supplied buffer to store the verification trace
7858 */
e7bf8249
JK
7859 log->level = attr->log_level;
7860 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
7861 log->len_total = attr->log_size;
cbd35700
AS
7862
7863 ret = -EINVAL;
e7bf8249
JK
7864 /* log attributes have to be sane */
7865 if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
7866 !log->level || !log->ubuf)
3df126f3 7867 goto err_unlock;
cbd35700 7868 }
1ad2f583
DB
7869
7870 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
7871 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
e07b98d9 7872 env->strict_alignment = true;
e9ee9efc
DM
7873 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
7874 env->strict_alignment = false;
cbd35700 7875
e2ae4ca2
JK
7876 is_priv = capable(CAP_SYS_ADMIN);
7877 env->allow_ptr_leaks = is_priv;
7878
f4e3ec0d
JK
7879 ret = replace_map_fd_with_map_ptr(env);
7880 if (ret < 0)
7881 goto skip_full_check;
7882
cae1927c 7883 if (bpf_prog_is_dev_bound(env->prog->aux)) {
a40a2632 7884 ret = bpf_prog_offload_verifier_prep(env->prog);
ab3f0063 7885 if (ret)
f4e3ec0d 7886 goto skip_full_check;
ab3f0063
JK
7887 }
7888
9bac3d6d 7889 env->explored_states = kcalloc(env->prog->len,
58e2af8b 7890 sizeof(struct bpf_verifier_state_list *),
f1bca824
AS
7891 GFP_USER);
7892 ret = -ENOMEM;
7893 if (!env->explored_states)
7894 goto skip_full_check;
7895
d9762e84 7896 ret = check_subprogs(env);
475fb78f
AS
7897 if (ret < 0)
7898 goto skip_full_check;
7899
c454a46b 7900 ret = check_btf_info(env, attr, uattr);
838e9690
YS
7901 if (ret < 0)
7902 goto skip_full_check;
7903
d9762e84
MKL
7904 ret = check_cfg(env);
7905 if (ret < 0)
7906 goto skip_full_check;
7907
17a52670 7908 ret = do_check(env);
8c01c4f8
CG
7909 if (env->cur_state) {
7910 free_verifier_state(env->cur_state, true);
7911 env->cur_state = NULL;
7912 }
cbd35700 7913
c941ce9c
QM
7914 if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
7915 ret = bpf_prog_offload_finalize(env);
7916
0246e64d 7917skip_full_check:
638f5b90 7918 while (!pop_stack(env, NULL, NULL));
f1bca824 7919 free_states(env);
0246e64d 7920
c131187d 7921 if (ret == 0)
9b38c405 7922 ret = check_max_stack_depth(env);
c131187d 7923
9b38c405 7924 /* instruction rewrites happen after this point */
e2ae4ca2
JK
7925 if (is_priv) {
7926 if (ret == 0)
7927 opt_hard_wire_dead_code_branches(env);
52875a04
JK
7928 if (ret == 0)
7929 ret = opt_remove_dead_code(env);
a1b14abc
JK
7930 if (ret == 0)
7931 ret = opt_remove_nops(env);
52875a04
JK
7932 } else {
7933 if (ret == 0)
7934 sanitize_dead_code(env);
e2ae4ca2
JK
7935 }
7936
9bac3d6d
AS
7937 if (ret == 0)
7938 /* program is valid, convert *(u32*)(ctx + off) accesses */
7939 ret = convert_ctx_accesses(env);
7940
e245c5c6 7941 if (ret == 0)
79741b3b 7942 ret = fixup_bpf_calls(env);
e245c5c6 7943
1ea47e01
AS
7944 if (ret == 0)
7945 ret = fixup_call_args(env);
7946
a2a7d570 7947 if (log->level && bpf_verifier_log_full(log))
cbd35700 7948 ret = -ENOSPC;
a2a7d570 7949 if (log->level && !log->ubuf) {
cbd35700 7950 ret = -EFAULT;
a2a7d570 7951 goto err_release_maps;
cbd35700
AS
7952 }
7953
0246e64d
AS
7954 if (ret == 0 && env->used_map_cnt) {
7955 /* if program passed verifier, update used_maps in bpf_prog_info */
9bac3d6d
AS
7956 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
7957 sizeof(env->used_maps[0]),
7958 GFP_KERNEL);
0246e64d 7959
9bac3d6d 7960 if (!env->prog->aux->used_maps) {
0246e64d 7961 ret = -ENOMEM;
a2a7d570 7962 goto err_release_maps;
0246e64d
AS
7963 }
7964
9bac3d6d 7965 memcpy(env->prog->aux->used_maps, env->used_maps,
0246e64d 7966 sizeof(env->used_maps[0]) * env->used_map_cnt);
9bac3d6d 7967 env->prog->aux->used_map_cnt = env->used_map_cnt;
0246e64d
AS
7968
7969 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
7970 * bpf_ld_imm64 instructions
7971 */
7972 convert_pseudo_ld_imm64(env);
7973 }
cbd35700 7974
ba64e7d8
YS
7975 if (ret == 0)
7976 adjust_btf_func(env);
7977
a2a7d570 7978err_release_maps:
9bac3d6d 7979 if (!env->prog->aux->used_maps)
0246e64d 7980 /* if we didn't copy map pointers into bpf_prog_info, release
ab7f5bf0 7981 * them now. Otherwise free_used_maps() will release them.
0246e64d
AS
7982 */
7983 release_maps(env);
9bac3d6d 7984 *prog = env->prog;
3df126f3 7985err_unlock:
cbd35700 7986 mutex_unlock(&bpf_verifier_lock);
3df126f3
JK
7987 vfree(env->insn_aux_data);
7988err_free_env:
7989 kfree(env);
51580e79
AS
7990 return ret;
7991}