kernel/bpf/verifier.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3  * Copyright (c) 2016 Facebook
   4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
   5  */
   6 #include <uapi/linux/btf.h>
   7 #include <linux/bpf-cgroup.h>
   8 #include <linux/kernel.h>
   9 #include <linux/types.h>
  10 #include <linux/slab.h>
  11 #include <linux/bpf.h>
  12 #include <linux/btf.h>
  13 #include <linux/bpf_verifier.h>
  14 #include <linux/filter.h>
  15 #include <net/netlink.h>
  16 #include <linux/file.h>
  17 #include <linux/vmalloc.h>
  18 #include <linux/stringify.h>
  19 #include <linux/bsearch.h>
  20 #include <linux/sort.h>
  21 #include <linux/perf_event.h>
  22 #include <linux/ctype.h>
  23 #include <linux/error-injection.h>
  24 #include <linux/bpf_lsm.h>
  25 #include <linux/btf_ids.h>
  26
  27 #include "disasm.h"
  28
  29 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  30 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
  31         [_id] = & _name ## _verifier_ops,
  32 #define BPF_MAP_TYPE(_id, _ops)
  33 #define BPF_LINK_TYPE(_id, _name)
  34 #include <linux/bpf_types.h>
  35 #undef BPF_PROG_TYPE
  36 #undef BPF_MAP_TYPE
  37 #undef BPF_LINK_TYPE
  38 };
  39
  40 /* bpf_check() is a static code analyzer that walks eBPF program
  41  * instruction by instruction and updates register/stack state.
  42  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
  43  *
  44  * The first pass is depth-first-search to check that the program is a DAG.
  45  * It rejects the following programs:
  46  * - larger than BPF_MAXINSNS insns
  47  * - if loop is present (detected via back-edge)
  48  * - unreachable insns exist (shouldn't be a forest. program = one function)
  49  * - out of bounds or malformed jumps
  50  * The second pass is all possible path descent from the 1st insn.
  51  * Since it's analyzing all paths through the program, the length of the
  52  * analysis is limited to 64k insn, which may be hit even if total number of
  53  * insn is less then 4K, but there are too many branches that change stack/regs.
  54  * Number of 'branches to be analyzed' is limited to 1k
  55  *
  56  * On entry to each instruction, each register has a type, and the instruction
  57  * changes the types of the registers depending on instruction semantics.
  58  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
  59  * copied to R1.
  60  *
  61  * All registers are 64-bit.
  62  * R0 - return register
  63  * R1-R5 argument passing registers
  64  * R6-R9 callee saved registers
  65  * R10 - frame pointer read-only
  66  *
  67  * At the start of BPF program the register R1 contains a pointer to bpf_context
  68  * and has type PTR_TO_CTX.
  69  *
  70  * Verifier tracks arithmetic operations on pointers in case:
  71  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
  72  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
  73  * 1st insn copies R10 (which has FRAME_PTR) type into R1
  74  * and 2nd arithmetic instruction is pattern matched to recognize
  75  * that it wants to construct a pointer to some element within stack.
  76  * So after 2nd insn, the register R1 has type PTR_TO_STACK
  77  * (and -20 constant is saved for further stack bounds checking).
  78  * Meaning that this reg is a pointer to stack plus known immediate constant.
  79  *
  80  * Most of the time the registers have SCALAR_VALUE type, which
  81  * means the register has some value, but it's not a valid pointer.
  82  * (like pointer plus pointer becomes SCALAR_VALUE type)
  83  *
  84  * When verifier sees load or store instructions the type of base register
  85  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
  86  * four pointer types recognized by check_mem_access() function.
  87  *
  88  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  89  * and the range of [ptr, ptr + map's value_size) is accessible.
  90  *
  91  * registers used to pass values to function calls are checked against
  92  * function argument constraints.
  93  *
  94  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
  95  * It means that the register type passed to this function must be
  96  * PTR_TO_STACK and it will be used inside the function as
  97  * 'pointer to map element key'
  98  *
  99  * For example the argument constraints for bpf_map_lookup_elem():
 100  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
 101  *   .arg1_type = ARG_CONST_MAP_PTR,
 102  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
 103  *
 104  * ret_type says that this function returns 'pointer to map elem value or null'
 105  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 106  * 2nd argument should be a pointer to stack, which will be used inside
 107  * the helper function as a pointer to map element key.
 108  *
 109  * On the kernel side the helper function looks like:
 110  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 111  * {
 112  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 113  *    void *key = (void *) (unsigned long) r2;
 114  *    void *value;
 115  *
 116  *    here kernel can access 'key' and 'map' pointers safely, knowing that
 117  *    [key, key + map->key_size) bytes are valid and were initialized on
 118  *    the stack of eBPF program.
 119  * }
 120  *
 121  * Corresponding eBPF program may look like:
 122  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
 123  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
 124  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
 125  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 126  * here verifier looks at prototype of map_lookup_elem() and sees:
 127  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
 128  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
 129  *
 130  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
 131  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
 132  * and were initialized prior to this call.
 133  * If it's ok, then verifier allows this BPF_CALL insn and looks at
 134  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
 135  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 136  * returns either pointer to map value or NULL.
 137  *
 138  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
 139  * insn, the register holding that pointer in the true branch changes state to
 140  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
 141  * branch. See check_cond_jmp_op().
 142  *
 143  * After the call R0 is set to return type of the function and registers R1-R5
 144  * are set to NOT_INIT to indicate that they are no longer readable.
 145  *
 146  * The following reference types represent a potential reference to a kernel
 147  * resource which, after first being allocated, must be checked and freed by
 148  * the BPF program:
 149  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
 150  *
 151  * When the verifier sees a helper call return a reference type, it allocates a
 152  * pointer id for the reference and stores it in the current function state.
 153  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
 154  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
 155  * passes through a NULL-check conditional. For the branch wherein the state is
 156  * changed to CONST_IMM, the verifier releases the reference.
 157  *
 158  * For each helper function that allocates a reference, such as
 159  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
 160  * bpf_sk_release(). When a reference type passes into the release function,
 161  * the verifier also releases the reference. If any unchecked or unreleased
 162  * reference remains at the end of the program, the verifier rejects it.
 163  */
 164
 165 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
 166 struct bpf_verifier_stack_elem {
 167         /* verifer state is 'st'
 168          * before processing instruction 'insn_idx'
 169          * and after processing instruction 'prev_insn_idx'
 170          */
 171         struct bpf_verifier_state st;
 172         int insn_idx;
 173         int prev_insn_idx;
 174         struct bpf_verifier_stack_elem *next;
 175         /* length of verifier log at the time this state was pushed on stack */
 176         u32 log_pos;
 177 };
 178
 179 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ    8192
 180 #define BPF_COMPLEXITY_LIMIT_STATES     64
 181
 182 #define BPF_MAP_KEY_POISON      (1ULL << 63)
 183 #define BPF_MAP_KEY_SEEN        (1ULL << 62)
 184
 185 #define BPF_MAP_PTR_UNPRIV      1UL
 186 #define BPF_MAP_PTR_POISON      ((void *)((0xeB9FUL << 1) +     \
 187                                           POISON_POINTER_DELTA))
 188 #define BPF_MAP_PTR(X)          ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
 189
 190 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
 191 {
 192         return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
 193 }
 194
 195 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
 196 {
 197         return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
 198 }
 199
 200 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
 201                               const struct bpf_map *map, bool unpriv)
 202 {
 203         BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
 204         unpriv |= bpf_map_ptr_unpriv(aux);
 205         aux->map_ptr_state = (unsigned long)map |
 206                              (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
 207 }
 208
 209 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
 210 {
 211         return aux->map_key_state & BPF_MAP_KEY_POISON;
 212 }
 213
 214 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
 215 {
 216         return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
 217 }
 218
 219 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
 220 {
 221         return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
 222 }
 223
 224 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
 225 {
 226         bool poisoned = bpf_map_key_poisoned(aux);
 227
 228         aux->map_key_state = state | BPF_MAP_KEY_SEEN |
 229                              (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
 230 }
 231
 232 static bool bpf_pseudo_call(const struct bpf_insn *insn)
 233 {
 234         return insn->code == (BPF_JMP | BPF_CALL) &&
 235                insn->src_reg == BPF_PSEUDO_CALL;
 236 }
 237
 238 static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
 239 {
 240         return insn->code == (BPF_JMP | BPF_CALL) &&
 241                insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
 242 }
 243
 244 struct bpf_call_arg_meta {
 245         struct bpf_map *map_ptr;
 246         bool raw_mode;
 247         bool pkt_access;
 248         int regno;
 249         int access_size;
 250         int mem_size;
 251         u64 msize_max_value;
 252         int ref_obj_id;
 253         int map_uid;
 254         int func_id;
 255         struct btf *btf;
 256         u32 btf_id;
 257         struct btf *ret_btf;
 258         u32 ret_btf_id;
 259         u32 subprogno;
 260 };
 261
 262 struct btf *btf_vmlinux;
 263
 264 static DEFINE_MUTEX(bpf_verifier_lock);
 265
 266 static const struct bpf_line_info *
 267 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
 268 {
 269         const struct bpf_line_info *linfo;
 270         const struct bpf_prog *prog;
 271         u32 i, nr_linfo;
 272
 273         prog = env->prog;
 274         nr_linfo = prog->aux->nr_linfo;
 275
 276         if (!nr_linfo || insn_off >= prog->len)
 277                 return NULL;
 278
 279         linfo = prog->aux->linfo;
 280         for (i = 1; i < nr_linfo; i++)
 281                 if (insn_off < linfo[i].insn_off)
 282                         break;
 283
 284         return &linfo[i - 1];
 285 }
 286
 287 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 288                        va_list args)
 289 {
 290         unsigned int n;
 291
 292         n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 293
 294         WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
 295                   "verifier log line truncated - local buffer too short\n");
 296
 297         if (log->level == BPF_LOG_KERNEL) {
 298                 bool newline = n > 0 && log->kbuf[n - 1] == '\n';
 299
 300                 pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
 301                 return;
 302         }
 303
 304         n = min(log->len_total - log->len_used - 1, n);
 305         log->kbuf[n] = '\0';
 306         if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
 307                 log->len_used += n;
 308         else
 309                 log->ubuf = NULL;
 310 }
 311
 312 static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
 313 {
 314         char zero = 0;
 315
 316         if (!bpf_verifier_log_needed(log))
 317                 return;
 318
 319         log->len_used = new_pos;
 320         if (put_user(zero, log->ubuf + new_pos))
 321                 log->ubuf = NULL;
 322 }
 323
 324 /* log_level controls verbosity level of eBPF verifier.
 325  * bpf_verifier_log_write() is used to dump the verification trace to the log,
 326  * so the user can figure out what's wrong with the program
 327  */
 328 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 329                                            const char *fmt, ...)
 330 {
 331         va_list args;
 332
 333         if (!bpf_verifier_log_needed(&env->log))
 334                 return;
 335
 336         va_start(args, fmt);
 337         bpf_verifier_vlog(&env->log, fmt, args);
 338         va_end(args);
 339 }
 340 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
 341
 342 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 343 {
 344         struct bpf_verifier_env *env = private_data;
 345         va_list args;
 346
 347         if (!bpf_verifier_log_needed(&env->log))
 348                 return;
 349
 350         va_start(args, fmt);
 351         bpf_verifier_vlog(&env->log, fmt, args);
 352         va_end(args);
 353 }
 354
 355 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
 356                             const char *fmt, ...)
 357 {
 358         va_list args;
 359
 360         if (!bpf_verifier_log_needed(log))
 361                 return;
 362
 363         va_start(args, fmt);
 364         bpf_verifier_vlog(log, fmt, args);
 365         va_end(args);
 366 }
 367
 368 static const char *ltrim(const char *s)
 369 {
 370         while (isspace(*s))
 371                 s++;
 372
 373         return s;
 374 }
 375
 376 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
 377                                          u32 insn_off,
 378                                          const char *prefix_fmt, ...)
 379 {
 380         const struct bpf_line_info *linfo;
 381
 382         if (!bpf_verifier_log_needed(&env->log))
 383                 return;
 384
 385         linfo = find_linfo(env, insn_off);
 386         if (!linfo || linfo == env->prev_linfo)
 387                 return;
 388
 389         if (prefix_fmt) {
 390                 va_list args;
 391
 392                 va_start(args, prefix_fmt);
 393                 bpf_verifier_vlog(&env->log, prefix_fmt, args);
 394                 va_end(args);
 395         }
 396
 397         verbose(env, "%s\n",
 398                 ltrim(btf_name_by_offset(env->prog->aux->btf,
 399                                          linfo->line_off)));
 400
 401         env->prev_linfo = linfo;
 402 }
 403
 404 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
 405                                    struct bpf_reg_state *reg,
 406                                    struct tnum *range, const char *ctx,
 407                                    const char *reg_name)
 408 {
 409         char tn_buf[48];
 410
 411         verbose(env, "At %s the register %s ", ctx, reg_name);
 412         if (!tnum_is_unknown(reg->var_off)) {
 413                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 414                 verbose(env, "has value %s", tn_buf);
 415         } else {
 416                 verbose(env, "has unknown scalar value");
 417         }
 418         tnum_strn(tn_buf, sizeof(tn_buf), *range);
 419         verbose(env, " should have been in %s\n", tn_buf);
 420 }
 421
 422 static bool type_is_pkt_pointer(enum bpf_reg_type type)
 423 {
 424         return type == PTR_TO_PACKET ||
 425                type == PTR_TO_PACKET_META;
 426 }
 427
 428 static bool type_is_sk_pointer(enum bpf_reg_type type)
 429 {
 430         return type == PTR_TO_SOCKET ||
 431                 type == PTR_TO_SOCK_COMMON ||
 432                 type == PTR_TO_TCP_SOCK ||
 433                 type == PTR_TO_XDP_SOCK;
 434 }
 435
 436 static bool reg_type_not_null(enum bpf_reg_type type)
 437 {
 438         return type == PTR_TO_SOCKET ||
 439                 type == PTR_TO_TCP_SOCK ||
 440                 type == PTR_TO_MAP_VALUE ||
 441                 type == PTR_TO_MAP_KEY ||
 442                 type == PTR_TO_SOCK_COMMON;
 443 }
 444
 445 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
 446 {
 447         return reg->type == PTR_TO_MAP_VALUE &&
 448                 map_value_has_spin_lock(reg->map_ptr);
 449 }
 450
 451 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
 452 {
 453         return base_type(type) == PTR_TO_SOCKET ||
 454                 base_type(type) == PTR_TO_TCP_SOCK ||
 455                 base_type(type) == PTR_TO_MEM ||
 456                 base_type(type) == PTR_TO_BTF_ID;
 457 }
 458
 459 static bool type_is_rdonly_mem(u32 type)
 460 {
 461         return type & MEM_RDONLY;
 462 }
 463
 464 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
 465 {
 466         return type == ARG_PTR_TO_SOCK_COMMON;
 467 }
 468
 469 static bool type_may_be_null(u32 type)
 470 {
 471         return type & PTR_MAYBE_NULL;
 472 }
 473
 474 /* Determine whether the function releases some resources allocated by another
 475  * function call. The first reference type argument will be assumed to be
 476  * released by release_reference().
 477  */
 478 static bool is_release_function(enum bpf_func_id func_id)
 479 {
 480         return func_id == BPF_FUNC_sk_release ||
 481                func_id == BPF_FUNC_ringbuf_submit ||
 482                func_id == BPF_FUNC_ringbuf_discard;
 483 }
 484
 485 static bool may_be_acquire_function(enum bpf_func_id func_id)
 486 {
 487         return func_id == BPF_FUNC_sk_lookup_tcp ||
 488                 func_id == BPF_FUNC_sk_lookup_udp ||
 489                 func_id == BPF_FUNC_skc_lookup_tcp ||
 490                 func_id == BPF_FUNC_map_lookup_elem ||
 491                 func_id == BPF_FUNC_ringbuf_reserve;
 492 }
 493
 494 static bool is_acquire_function(enum bpf_func_id func_id,
 495                                 const struct bpf_map *map)
 496 {
 497         enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
 498
 499         if (func_id == BPF_FUNC_sk_lookup_tcp ||
 500             func_id == BPF_FUNC_sk_lookup_udp ||
 501             func_id == BPF_FUNC_skc_lookup_tcp ||
 502             func_id == BPF_FUNC_ringbuf_reserve)
 503                 return true;
 504
 505         if (func_id == BPF_FUNC_map_lookup_elem &&
 506             (map_type == BPF_MAP_TYPE_SOCKMAP ||
 507              map_type == BPF_MAP_TYPE_SOCKHASH))
 508                 return true;
 509
 510         return false;
 511 }
 512
 513 static bool is_ptr_cast_function(enum bpf_func_id func_id)
 514 {
 515         return func_id == BPF_FUNC_tcp_sock ||
 516                 func_id == BPF_FUNC_sk_fullsock ||
 517                 func_id == BPF_FUNC_skc_to_tcp_sock ||
 518                 func_id == BPF_FUNC_skc_to_tcp6_sock ||
 519                 func_id == BPF_FUNC_skc_to_udp6_sock ||
 520                 func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
 521                 func_id == BPF_FUNC_skc_to_tcp_request_sock;
 522 }
 523
 524 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
 525 {
 526         return BPF_CLASS(insn->code) == BPF_STX &&
 527                BPF_MODE(insn->code) == BPF_ATOMIC &&
 528                insn->imm == BPF_CMPXCHG;
 529 }
 530
 531 /* string representation of 'enum bpf_reg_type'
 532  *
 533  * Note that reg_type_str() can not appear more than once in a single verbose()
 534  * statement.
 535  */
 536 static const char *reg_type_str(struct bpf_verifier_env *env,
 537                                 enum bpf_reg_type type)
 538 {
 539         char postfix[16] = {0}, prefix[16] = {0};
 540         static const char * const str[] = {
 541                 [NOT_INIT]              = "?",
 542                 [SCALAR_VALUE]          = "inv",
 543                 [PTR_TO_CTX]            = "ctx",
 544                 [CONST_PTR_TO_MAP]      = "map_ptr",
 545                 [PTR_TO_MAP_VALUE]      = "map_value",
 546                 [PTR_TO_STACK]          = "fp",
 547                 [PTR_TO_PACKET]         = "pkt",
 548                 [PTR_TO_PACKET_META]    = "pkt_meta",
 549                 [PTR_TO_PACKET_END]     = "pkt_end",
 550                 [PTR_TO_FLOW_KEYS]      = "flow_keys",
 551                 [PTR_TO_SOCKET]         = "sock",
 552                 [PTR_TO_SOCK_COMMON]    = "sock_common",
 553                 [PTR_TO_TCP_SOCK]       = "tcp_sock",
 554                 [PTR_TO_TP_BUFFER]      = "tp_buffer",
 555                 [PTR_TO_XDP_SOCK]       = "xdp_sock",
 556                 [PTR_TO_BTF_ID]         = "ptr_",
 557                 [PTR_TO_PERCPU_BTF_ID]  = "percpu_ptr_",
 558                 [PTR_TO_MEM]            = "mem",
 559                 [PTR_TO_BUF]            = "buf",
 560                 [PTR_TO_FUNC]           = "func",
 561                 [PTR_TO_MAP_KEY]        = "map_key",
 562         };
 563
 564         if (type & PTR_MAYBE_NULL) {
 565                 if (base_type(type) == PTR_TO_BTF_ID ||
 566                     base_type(type) == PTR_TO_PERCPU_BTF_ID)
 567                         strncpy(postfix, "or_null_", 16);
 568                 else
 569                         strncpy(postfix, "_or_null", 16);
 570         }
 571
 572         if (type & MEM_RDONLY)
 573                 strncpy(prefix, "rdonly_", 16);
 574         if (type & MEM_ALLOC)
 575                 strncpy(prefix, "alloc_", 16);
 576
 577         snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
 578                  prefix, str[base_type(type)], postfix);
 579         return env->type_str_buf;
 580 }
 581
 582 static char slot_type_char[] = {
 583         [STACK_INVALID] = '?',
 584         [STACK_SPILL]   = 'r',
 585         [STACK_MISC]    = 'm',
 586         [STACK_ZERO]    = '0',
 587 };
 588
 589 static void print_liveness(struct bpf_verifier_env *env,
 590                            enum bpf_reg_liveness live)
 591 {
 592         if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
 593             verbose(env, "_");
 594         if (live & REG_LIVE_READ)
 595                 verbose(env, "r");
 596         if (live & REG_LIVE_WRITTEN)
 597                 verbose(env, "w");
 598         if (live & REG_LIVE_DONE)
 599                 verbose(env, "D");
 600 }
 601
 602 static struct bpf_func_state *func(struct bpf_verifier_env *env,
 603                                    const struct bpf_reg_state *reg)
 604 {
 605         struct bpf_verifier_state *cur = env->cur_state;
 606
 607         return cur->frame[reg->frameno];
 608 }
 609
 610 static const char *kernel_type_name(const struct btf* btf, u32 id)
 611 {
 612         return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
 613 }
 614
 615 static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
 616 {
 617         env->scratched_regs |= 1U << regno;
 618 }
 619
 620 static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
 621 {
 622         env->scratched_stack_slots |= 1ULL << spi;
 623 }
 624
 625 static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
 626 {
 627         return (env->scratched_regs >> regno) & 1;
 628 }
 629
 630 static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
 631 {
 632         return (env->scratched_stack_slots >> regno) & 1;
 633 }
 634
 635 static bool verifier_state_scratched(const struct bpf_verifier_env *env)
 636 {
 637         return env->scratched_regs || env->scratched_stack_slots;
 638 }
 639
 640 static void mark_verifier_state_clean(struct bpf_verifier_env *env)
 641 {
 642         env->scratched_regs = 0U;
 643         env->scratched_stack_slots = 0ULL;
 644 }
 645
 646 /* Used for printing the entire verifier state. */
 647 static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
 648 {
 649         env->scratched_regs = ~0U;
 650         env->scratched_stack_slots = ~0ULL;
 651 }
 652
 653 /* The reg state of a pointer or a bounded scalar was saved when
 654  * it was spilled to the stack.
 655  */
 656 static bool is_spilled_reg(const struct bpf_stack_state *stack)
 657 {
 658         return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
 659 }
 660
 661 static void scrub_spilled_slot(u8 *stype)
 662 {
 663         if (*stype != STACK_INVALID)
 664                 *stype = STACK_MISC;
 665 }
 666
 667 static void print_verifier_state(struct bpf_verifier_env *env,
 668                                  const struct bpf_func_state *state,
 669                                  bool print_all)
 670 {
 671         const struct bpf_reg_state *reg;
 672         enum bpf_reg_type t;
 673         int i;
 674
 675         if (state->frameno)
 676                 verbose(env, " frame%d:", state->frameno);
 677         for (i = 0; i < MAX_BPF_REG; i++) {
 678                 reg = &state->regs[i];
 679                 t = reg->type;
 680                 if (t == NOT_INIT)
 681                         continue;
 682                 if (!print_all && !reg_scratched(env, i))
 683                         continue;
 684                 verbose(env, " R%d", i);
 685                 print_liveness(env, reg->live);
 686                 verbose(env, "=%s", reg_type_str(env, t));
 687                 if (t == SCALAR_VALUE && reg->precise)
 688                         verbose(env, "P");
 689                 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
 690                     tnum_is_const(reg->var_off)) {
 691                         /* reg->off should be 0 for SCALAR_VALUE */
 692                         verbose(env, "%lld", reg->var_off.value + reg->off);
 693                 } else {
 694                         if (base_type(t) == PTR_TO_BTF_ID ||
 695                             base_type(t) == PTR_TO_PERCPU_BTF_ID)
 696                                 verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
 697                         verbose(env, "(id=%d", reg->id);
 698                         if (reg_type_may_be_refcounted_or_null(t))
 699                                 verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
 700                         if (t != SCALAR_VALUE)
 701                                 verbose(env, ",off=%d", reg->off);
 702                         if (type_is_pkt_pointer(t))
 703                                 verbose(env, ",r=%d", reg->range);
 704                         else if (base_type(t) == CONST_PTR_TO_MAP ||
 705                                  base_type(t) == PTR_TO_MAP_KEY ||
 706                                  base_type(t) == PTR_TO_MAP_VALUE)
 707                                 verbose(env, ",ks=%d,vs=%d",
 708                                         reg->map_ptr->key_size,
 709                                         reg->map_ptr->value_size);
 710                         if (tnum_is_const(reg->var_off)) {
 711                                 /* Typically an immediate SCALAR_VALUE, but
 712                                  * could be a pointer whose offset is too big
 713                                  * for reg->off
 714                                  */
 715                                 verbose(env, ",imm=%llx", reg->var_off.value);
 716                         } else {
 717                                 if (reg->smin_value != reg->umin_value &&
 718                                     reg->smin_value != S64_MIN)
 719                                         verbose(env, ",smin_value=%lld",
 720                                                 (long long)reg->smin_value);
 721                                 if (reg->smax_value != reg->umax_value &&
 722                                     reg->smax_value != S64_MAX)
 723                                         verbose(env, ",smax_value=%lld",
 724                                                 (long long)reg->smax_value);
 725                                 if (reg->umin_value != 0)
 726                                         verbose(env, ",umin_value=%llu",
 727                                                 (unsigned long long)reg->umin_value);
 728                                 if (reg->umax_value != U64_MAX)
 729                                         verbose(env, ",umax_value=%llu",
 730                                                 (unsigned long long)reg->umax_value);
 731                                 if (!tnum_is_unknown(reg->var_off)) {
 732                                         char tn_buf[48];
 733
 734                                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 735                                         verbose(env, ",var_off=%s", tn_buf);
 736                                 }
 737                                 if (reg->s32_min_value != reg->smin_value &&
 738                                     reg->s32_min_value != S32_MIN)
 739                                         verbose(env, ",s32_min_value=%d",
 740                                                 (int)(reg->s32_min_value));
 741                                 if (reg->s32_max_value != reg->smax_value &&
 742                                     reg->s32_max_value != S32_MAX)
 743                                         verbose(env, ",s32_max_value=%d",
 744                                                 (int)(reg->s32_max_value));
 745                                 if (reg->u32_min_value != reg->umin_value &&
 746                                     reg->u32_min_value != U32_MIN)
 747                                         verbose(env, ",u32_min_value=%d",
 748                                                 (int)(reg->u32_min_value));
 749                                 if (reg->u32_max_value != reg->umax_value &&
 750                                     reg->u32_max_value != U32_MAX)
 751                                         verbose(env, ",u32_max_value=%d",
 752                                                 (int)(reg->u32_max_value));
 753                         }
 754                         verbose(env, ")");
 755                 }
 756         }
 757         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
 758                 char types_buf[BPF_REG_SIZE + 1];
 759                 bool valid = false;
 760                 int j;
 761
 762                 for (j = 0; j < BPF_REG_SIZE; j++) {
 763                         if (state->stack[i].slot_type[j] != STACK_INVALID)
 764                                 valid = true;
 765                         types_buf[j] = slot_type_char[
 766                                         state->stack[i].slot_type[j]];
 767                 }
 768                 types_buf[BPF_REG_SIZE] = 0;
 769                 if (!valid)
 770                         continue;
 771                 if (!print_all && !stack_slot_scratched(env, i))
 772                         continue;
 773                 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
 774                 print_liveness(env, state->stack[i].spilled_ptr.live);
 775                 if (is_spilled_reg(&state->stack[i])) {
 776                         reg = &state->stack[i].spilled_ptr;
 777                         t = reg->type;
 778                         verbose(env, "=%s", reg_type_str(env, t));
 779                         if (t == SCALAR_VALUE && reg->precise)
 780                                 verbose(env, "P");
 781                         if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
 782                                 verbose(env, "%lld", reg->var_off.value + reg->off);
 783                 } else {
 784                         verbose(env, "=%s", types_buf);
 785                 }
 786         }
 787         if (state->acquired_refs && state->refs[0].id) {
 788                 verbose(env, " refs=%d", state->refs[0].id);
 789                 for (i = 1; i < state->acquired_refs; i++)
 790                         if (state->refs[i].id)
 791                                 verbose(env, ",%d", state->refs[i].id);
 792         }
 793         if (state->in_callback_fn)
 794                 verbose(env, " cb");
 795         if (state->in_async_callback_fn)
 796                 verbose(env, " async_cb");
 797         verbose(env, "\n");
 798         mark_verifier_state_clean(env);
 799 }
 800
 801 static inline u32 vlog_alignment(u32 pos)
 802 {
 803         return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
 804                         BPF_LOG_MIN_ALIGNMENT) - pos - 1;
 805 }
 806
 807 static void print_insn_state(struct bpf_verifier_env *env,
 808                              const struct bpf_func_state *state)
 809 {
 810         if (env->prev_log_len && env->prev_log_len == env->log.len_used) {
 811                 /* remove new line character */
 812                 bpf_vlog_reset(&env->log, env->prev_log_len - 1);
 813                 verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' ');
 814         } else {
 815                 verbose(env, "%d:", env->insn_idx);
 816         }
 817         print_verifier_state(env, state, false);
 818 }
 819
 820 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
 821  * small to hold src. This is different from krealloc since we don't want to preserve
 822  * the contents of dst.
 823  *
 824  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
 825  * not be allocated.
 826  */
 827 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
 828 {
 829         size_t bytes;
 830
 831         if (ZERO_OR_NULL_PTR(src))
 832                 goto out;
 833
 834         if (unlikely(check_mul_overflow(n, size, &bytes)))
 835                 return NULL;
 836
 837         if (ksize(dst) < bytes) {
 838                 kfree(dst);
 839                 dst = kmalloc_track_caller(bytes, flags);
 840                 if (!dst)
 841                         return NULL;
 842         }
 843
 844         memcpy(dst, src, bytes);
 845 out:
 846         return dst ? dst : ZERO_SIZE_PTR;
 847 }
 848
 849 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
 850  * small to hold new_n items. new items are zeroed out if the array grows.
 851  *
 852  * Contrary to krealloc_array, does not free arr if new_n is zero.
 853  */
 854 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
 855 {
 856         if (!new_n || old_n == new_n)
 857                 goto out;
 858
 859         arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
 860         if (!arr)
 861                 return NULL;
 862
 863         if (new_n > old_n)
 864                 memset(arr + old_n * size, 0, (new_n - old_n) * size);
 865
 866 out:
 867         return arr ? arr : ZERO_SIZE_PTR;
 868 }
 869
 870 static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
 871 {
 872         dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
 873                                sizeof(struct bpf_reference_state), GFP_KERNEL);
 874         if (!dst->refs)
 875                 return -ENOMEM;
 876
 877         dst->acquired_refs = src->acquired_refs;
 878         return 0;
 879 }
 880
 881 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
 882 {
 883         size_t n = src->allocated_stack / BPF_REG_SIZE;
 884
 885         dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
 886                                 GFP_KERNEL);
 887         if (!dst->stack)
 888                 return -ENOMEM;
 889
 890         dst->allocated_stack = src->allocated_stack;
 891         return 0;
 892 }
 893
 894 static int resize_reference_state(struct bpf_func_state *state, size_t n)
 895 {
 896         state->refs = realloc_array(state->refs, state->acquired_refs, n,
 897                                     sizeof(struct bpf_reference_state));
 898         if (!state->refs)
 899                 return -ENOMEM;
 900
 901         state->acquired_refs = n;
 902         return 0;
 903 }
 904
 905 static int grow_stack_state(struct bpf_func_state *state, int size)
 906 {
 907         size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
 908
 909         if (old_n >= n)
 910                 return 0;
 911
 912         state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
 913         if (!state->stack)
 914                 return -ENOMEM;
 915
 916         state->allocated_stack = size;
 917         return 0;
 918 }
 919
 920 /* Acquire a pointer id from the env and update the state->refs to include
 921  * this new pointer reference.
 922  * On success, returns a valid pointer id to associate with the register
 923  * On failure, returns a negative errno.
 924  */
 925 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
 926 {
 927         struct bpf_func_state *state = cur_func(env);
 928         int new_ofs = state->acquired_refs;
 929         int id, err;
 930
 931         err = resize_reference_state(state, state->acquired_refs + 1);
 932         if (err)
 933                 return err;
 934         id = ++env->id_gen;
 935         state->refs[new_ofs].id = id;
 936         state->refs[new_ofs].insn_idx = insn_idx;
 937
 938         return id;
 939 }
 940
 941 /* release function corresponding to acquire_reference_state(). Idempotent. */
 942 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
 943 {
 944         int i, last_idx;
 945
 946         last_idx = state->acquired_refs - 1;
 947         for (i = 0; i < state->acquired_refs; i++) {
 948                 if (state->refs[i].id == ptr_id) {
 949                         if (last_idx && i != last_idx)
 950                                 memcpy(&state->refs[i], &state->refs[last_idx],
 951                                        sizeof(*state->refs));
 952                         memset(&state->refs[last_idx], 0, sizeof(*state->refs));
 953                         state->acquired_refs--;
 954                         return 0;
 955                 }
 956         }
 957         return -EINVAL;
 958 }
 959
 960 static void free_func_state(struct bpf_func_state *state)
 961 {
 962         if (!state)
 963                 return;
 964         kfree(state->refs);
 965         kfree(state->stack);
 966         kfree(state);
 967 }
 968
 969 static void clear_jmp_history(struct bpf_verifier_state *state)
 970 {
 971         kfree(state->jmp_history);
 972         state->jmp_history = NULL;
 973         state->jmp_history_cnt = 0;
 974 }
 975
 976 static void free_verifier_state(struct bpf_verifier_state *state,
 977                                 bool free_self)
 978 {
 979         int i;
 980
 981         for (i = 0; i <= state->curframe; i++) {
 982                 free_func_state(state->frame[i]);
 983                 state->frame[i] = NULL;
 984         }
 985         clear_jmp_history(state);
 986         if (free_self)
 987                 kfree(state);
 988 }
 989
 990 /* copy verifier state from src to dst growing dst stack space
 991  * when necessary to accommodate larger src stack
 992  */
 993 static int copy_func_state(struct bpf_func_state *dst,
 994                            const struct bpf_func_state *src)
 995 {
 996         int err;
 997
 998         memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
 999         err = copy_reference_state(dst, src);
1000         if (err)
1001                 return err;
1002         return copy_stack_state(dst, src);
1003 }
1004
1005 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1006                                const struct bpf_verifier_state *src)
1007 {
1008         struct bpf_func_state *dst;
1009         int i, err;
1010
1011         dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1012                                             src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
1013                                             GFP_USER);
1014         if (!dst_state->jmp_history)
1015                 return -ENOMEM;
1016         dst_state->jmp_history_cnt = src->jmp_history_cnt;
1017
1018         /* if dst has more stack frames then src frame, free them */
1019         for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1020                 free_func_state(dst_state->frame[i]);
1021                 dst_state->frame[i] = NULL;
1022         }
1023         dst_state->speculative = src->speculative;
1024         dst_state->curframe = src->curframe;
1025         dst_state->active_spin_lock = src->active_spin_lock;
1026         dst_state->branches = src->branches;
1027         dst_state->parent = src->parent;
1028         dst_state->first_insn_idx = src->first_insn_idx;
1029         dst_state->last_insn_idx = src->last_insn_idx;
1030         for (i = 0; i <= src->curframe; i++) {
1031                 dst = dst_state->frame[i];
1032                 if (!dst) {
1033                         dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1034                         if (!dst)
1035                                 return -ENOMEM;
1036                         dst_state->frame[i] = dst;
1037                 }
1038                 err = copy_func_state(dst, src->frame[i]);
1039                 if (err)
1040                         return err;
1041         }
1042         return 0;
1043 }
1044
1045 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1046 {
1047         while (st) {
1048                 u32 br = --st->branches;
1049
1050                 /* WARN_ON(br > 1) technically makes sense here,
1051                  * but see comment in push_stack(), hence:
1052                  */
1053                 WARN_ONCE((int)br < 0,
1054                           "BUG update_branch_counts:branches_to_explore=%d\n",
1055                           br);
1056                 if (br)
1057                         break;
1058                 st = st->parent;
1059         }
1060 }
1061
1062 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
1063                      int *insn_idx, bool pop_log)
1064 {
1065         struct bpf_verifier_state *cur = env->cur_state;
1066         struct bpf_verifier_stack_elem *elem, *head = env->head;
1067         int err;
1068
1069         if (env->head == NULL)
1070                 return -ENOENT;
1071
1072         if (cur) {
1073                 err = copy_verifier_state(cur, &head->st);
1074                 if (err)
1075                         return err;
1076         }
1077         if (pop_log)
1078                 bpf_vlog_reset(&env->log, head->log_pos);
1079         if (insn_idx)
1080                 *insn_idx = head->insn_idx;
1081         if (prev_insn_idx)
1082                 *prev_insn_idx = head->prev_insn_idx;
1083         elem = head->next;
1084         free_verifier_state(&head->st, false);
1085         kfree(head);
1086         env->head = elem;
1087         env->stack_size--;
1088         return 0;
1089 }
1090
1091 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1092                                              int insn_idx, int prev_insn_idx,
1093                                              bool speculative)
1094 {
1095         struct bpf_verifier_state *cur = env->cur_state;
1096         struct bpf_verifier_stack_elem *elem;
1097         int err;
1098
1099         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1100         if (!elem)
1101                 goto err;
1102
1103         elem->insn_idx = insn_idx;
1104         elem->prev_insn_idx = prev_insn_idx;
1105         elem->next = env->head;
1106         elem->log_pos = env->log.len_used;
1107         env->head = elem;
1108         env->stack_size++;
1109         err = copy_verifier_state(&elem->st, cur);
1110         if (err)
1111                 goto err;
1112         elem->st.speculative |= speculative;
1113         if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1114                 verbose(env, "The sequence of %d jumps is too complex.\n",
1115                         env->stack_size);
1116                 goto err;
1117         }
1118         if (elem->st.parent) {
1119                 ++elem->st.parent->branches;
1120                 /* WARN_ON(branches > 2) technically makes sense here,
1121                  * but
1122                  * 1. speculative states will bump 'branches' for non-branch
1123                  * instructions
1124                  * 2. is_state_visited() heuristics may decide not to create
1125                  * a new state for a sequence of branches and all such current
1126                  * and cloned states will be pointing to a single parent state
1127                  * which might have large 'branches' count.
1128                  */
1129         }
1130         return &elem->st;
1131 err:
1132         free_verifier_state(env->cur_state, true);
1133         env->cur_state = NULL;
1134         /* pop all elements and return */
1135         while (!pop_stack(env, NULL, NULL, false));
1136         return NULL;
1137 }
1138
1139 #define CALLER_SAVED_REGS 6
1140 static const int caller_saved[CALLER_SAVED_REGS] = {
1141         BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1142 };
1143
1144 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1145                                 struct bpf_reg_state *reg);
1146
1147 /* This helper doesn't clear reg->id */
1148 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1149 {
1150         reg->var_off = tnum_const(imm);
1151         reg->smin_value = (s64)imm;
1152         reg->smax_value = (s64)imm;
1153         reg->umin_value = imm;
1154         reg->umax_value = imm;
1155
1156         reg->s32_min_value = (s32)imm;
1157         reg->s32_max_value = (s32)imm;
1158         reg->u32_min_value = (u32)imm;
1159         reg->u32_max_value = (u32)imm;
1160 }
1161
1162 /* Mark the unknown part of a register (variable offset or scalar value) as
1163  * known to have the value @imm.
1164  */
1165 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1166 {
1167         /* Clear id, off, and union(map_ptr, range) */
1168         memset(((u8 *)reg) + sizeof(reg->type), 0,
1169                offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1170         ___mark_reg_known(reg, imm);
1171 }
1172
1173 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1174 {
1175         reg->var_off = tnum_const_subreg(reg->var_off, imm);
1176         reg->s32_min_value = (s32)imm;
1177         reg->s32_max_value = (s32)imm;
1178         reg->u32_min_value = (u32)imm;
1179         reg->u32_max_value = (u32)imm;
1180 }
1181
1182 /* Mark the 'variable offset' part of a register as zero.  This should be
1183  * used only on registers holding a pointer type.
1184  */
1185 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1186 {
1187         __mark_reg_known(reg, 0);
1188 }
1189
1190 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1191 {
1192         __mark_reg_known(reg, 0);
1193         reg->type = SCALAR_VALUE;
1194 }
1195
1196 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1197                                 struct bpf_reg_state *regs, u32 regno)
1198 {
1199         if (WARN_ON(regno >= MAX_BPF_REG)) {
1200                 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1201                 /* Something bad happened, let's kill all regs */
1202                 for (regno = 0; regno < MAX_BPF_REG; regno++)
1203                         __mark_reg_not_init(env, regs + regno);
1204                 return;
1205         }
1206         __mark_reg_known_zero(regs + regno);
1207 }
1208
1209 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1210 {
1211         if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
1212                 const struct bpf_map *map = reg->map_ptr;
1213
1214                 if (map->inner_map_meta) {
1215                         reg->type = CONST_PTR_TO_MAP;
1216                         reg->map_ptr = map->inner_map_meta;
1217                         /* transfer reg's id which is unique for every map_lookup_elem
1218                          * as UID of the inner map.
1219                          */
1220                         if (map_value_has_timer(map->inner_map_meta))
1221                                 reg->map_uid = reg->id;
1222                 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1223                         reg->type = PTR_TO_XDP_SOCK;
1224                 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1225                            map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1226                         reg->type = PTR_TO_SOCKET;
1227                 } else {
1228                         reg->type = PTR_TO_MAP_VALUE;
1229                 }
1230                 return;
1231         }
1232
1233         reg->type &= ~PTR_MAYBE_NULL;
1234 }
1235
1236 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1237 {
1238         return type_is_pkt_pointer(reg->type);
1239 }
1240
1241 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1242 {
1243         return reg_is_pkt_pointer(reg) ||
1244                reg->type == PTR_TO_PACKET_END;
1245 }
1246
1247 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1248 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1249                                     enum bpf_reg_type which)
1250 {
1251         /* The register can already have a range from prior markings.
1252          * This is fine as long as it hasn't been advanced from its
1253          * origin.
1254          */
1255         return reg->type == which &&
1256                reg->id == 0 &&
1257                reg->off == 0 &&
1258                tnum_equals_const(reg->var_off, 0);
1259 }
1260
1261 /* Reset the min/max bounds of a register */
1262 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1263 {
1264         reg->smin_value = S64_MIN;
1265         reg->smax_value = S64_MAX;
1266         reg->umin_value = 0;
1267         reg->umax_value = U64_MAX;
1268
1269         reg->s32_min_value = S32_MIN;
1270         reg->s32_max_value = S32_MAX;
1271         reg->u32_min_value = 0;
1272         reg->u32_max_value = U32_MAX;
1273 }
1274
1275 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1276 {
1277         reg->smin_value = S64_MIN;
1278         reg->smax_value = S64_MAX;
1279         reg->umin_value = 0;
1280         reg->umax_value = U64_MAX;
1281 }
1282
1283 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1284 {
1285         reg->s32_min_value = S32_MIN;
1286         reg->s32_max_value = S32_MAX;
1287         reg->u32_min_value = 0;
1288         reg->u32_max_value = U32_MAX;
1289 }
1290
1291 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1292 {
1293         struct tnum var32_off = tnum_subreg(reg->var_off);
1294
1295         /* min signed is max(sign bit) | min(other bits) */
1296         reg->s32_min_value = max_t(s32, reg->s32_min_value,
1297                         var32_off.value | (var32_off.mask & S32_MIN));
1298         /* max signed is min(sign bit) | max(other bits) */
1299         reg->s32_max_value = min_t(s32, reg->s32_max_value,
1300                         var32_off.value | (var32_off.mask & S32_MAX));
1301         reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1302         reg->u32_max_value = min(reg->u32_max_value,
1303                                  (u32)(var32_off.value | var32_off.mask));
1304 }
1305
1306 static void __update_reg64_bounds(struct bpf_reg_state *reg)
1307 {
1308         /* min signed is max(sign bit) | min(other bits) */
1309         reg->smin_value = max_t(s64, reg->smin_value,
1310                                 reg->var_off.value | (reg->var_off.mask & S64_MIN));
1311         /* max signed is min(sign bit) | max(other bits) */
1312         reg->smax_value = min_t(s64, reg->smax_value,
1313                                 reg->var_off.value | (reg->var_off.mask & S64_MAX));
1314         reg->umin_value = max(reg->umin_value, reg->var_off.value);
1315         reg->umax_value = min(reg->umax_value,
1316                               reg->var_off.value | reg->var_off.mask);
1317 }
1318
1319 static void __update_reg_bounds(struct bpf_reg_state *reg)
1320 {
1321         __update_reg32_bounds(reg);
1322         __update_reg64_bounds(reg);
1323 }
1324
1325 /* Uses signed min/max values to inform unsigned, and vice-versa */
1326 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1327 {
1328         /* Learn sign from signed bounds.
1329          * If we cannot cross the sign boundary, then signed and unsigned bounds
1330          * are the same, so combine.  This works even in the negative case, e.g.
1331          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1332          */
1333         if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1334                 reg->s32_min_value = reg->u32_min_value =
1335                         max_t(u32, reg->s32_min_value, reg->u32_min_value);
1336                 reg->s32_max_value = reg->u32_max_value =
1337                         min_t(u32, reg->s32_max_value, reg->u32_max_value);
1338                 return;
1339         }
1340         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1341          * boundary, so we must be careful.
1342          */
1343         if ((s32)reg->u32_max_value >= 0) {
1344                 /* Positive.  We can't learn anything from the smin, but smax
1345                  * is positive, hence safe.
1346                  */
1347                 reg->s32_min_value = reg->u32_min_value;
1348                 reg->s32_max_value = reg->u32_max_value =
1349                         min_t(u32, reg->s32_max_value, reg->u32_max_value);
1350         } else if ((s32)reg->u32_min_value < 0) {
1351                 /* Negative.  We can't learn anything from the smax, but smin
1352                  * is negative, hence safe.
1353                  */
1354                 reg->s32_min_value = reg->u32_min_value =
1355                         max_t(u32, reg->s32_min_value, reg->u32_min_value);
1356                 reg->s32_max_value = reg->u32_max_value;
1357         }
1358 }
1359
1360 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
1361 {
1362         /* Learn sign from signed bounds.
1363          * If we cannot cross the sign boundary, then signed and unsigned bounds
1364          * are the same, so combine.  This works even in the negative case, e.g.
1365          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1366          */
1367         if (reg->smin_value >= 0 || reg->smax_value < 0) {
1368                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1369                                                           reg->umin_value);
1370                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1371                                                           reg->umax_value);
1372                 return;
1373         }
1374         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1375          * boundary, so we must be careful.
1376          */
1377         if ((s64)reg->umax_value >= 0) {
1378                 /* Positive.  We can't learn anything from the smin, but smax
1379                  * is positive, hence safe.
1380                  */
1381                 reg->smin_value = reg->umin_value;
1382                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1383                                                           reg->umax_value);
1384         } else if ((s64)reg->umin_value < 0) {
1385                 /* Negative.  We can't learn anything from the smax, but smin
1386                  * is negative, hence safe.
1387                  */
1388                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1389                                                           reg->umin_value);
1390                 reg->smax_value = reg->umax_value;
1391         }
1392 }
1393
1394 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1395 {
1396         __reg32_deduce_bounds(reg);
1397         __reg64_deduce_bounds(reg);
1398 }
1399
1400 /* Attempts to improve var_off based on unsigned min/max information */
1401 static void __reg_bound_offset(struct bpf_reg_state *reg)
1402 {
1403         struct tnum var64_off = tnum_intersect(reg->var_off,
1404                                                tnum_range(reg->umin_value,
1405                                                           reg->umax_value));
1406         struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1407                                                 tnum_range(reg->u32_min_value,
1408                                                            reg->u32_max_value));
1409
1410         reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1411 }
1412
1413 static bool __reg32_bound_s64(s32 a)
1414 {
1415         return a >= 0 && a <= S32_MAX;
1416 }
1417
1418 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1419 {
1420         reg->umin_value = reg->u32_min_value;
1421         reg->umax_value = reg->u32_max_value;
1422
1423         /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1424          * be positive otherwise set to worse case bounds and refine later
1425          * from tnum.
1426          */
1427         if (__reg32_bound_s64(reg->s32_min_value) &&
1428             __reg32_bound_s64(reg->s32_max_value)) {
1429                 reg->smin_value = reg->s32_min_value;
1430                 reg->smax_value = reg->s32_max_value;
1431         } else {
1432                 reg->smin_value = 0;
1433                 reg->smax_value = U32_MAX;
1434         }
1435 }
1436
1437 static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1438 {
1439         /* special case when 64-bit register has upper 32-bit register
1440          * zeroed. Typically happens after zext or <<32, >>32 sequence
1441          * allowing us to use 32-bit bounds directly,
1442          */
1443         if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1444                 __reg_assign_32_into_64(reg);
1445         } else {
1446                 /* Otherwise the best we can do is push lower 32bit known and
1447                  * unknown bits into register (var_off set from jmp logic)
1448                  * then learn as much as possible from the 64-bit tnum
1449                  * known and unknown bits. The previous smin/smax bounds are
1450                  * invalid here because of jmp32 compare so mark them unknown
1451                  * so they do not impact tnum bounds calculation.
1452                  */
1453                 __mark_reg64_unbounded(reg);
1454                 __update_reg_bounds(reg);
1455         }
1456
1457         /* Intersecting with the old var_off might have improved our bounds
1458          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1459          * then new var_off is (0; 0x7f...fc) which improves our umax.
1460          */
1461         __reg_deduce_bounds(reg);
1462         __reg_bound_offset(reg);
1463         __update_reg_bounds(reg);
1464 }
1465
1466 static bool __reg64_bound_s32(s64 a)
1467 {
1468         return a >= S32_MIN && a <= S32_MAX;
1469 }
1470
1471 static bool __reg64_bound_u32(u64 a)
1472 {
1473         return a >= U32_MIN && a <= U32_MAX;
1474 }
1475
1476 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1477 {
1478         __mark_reg32_unbounded(reg);
1479
1480         if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
1481                 reg->s32_min_value = (s32)reg->smin_value;
1482                 reg->s32_max_value = (s32)reg->smax_value;
1483         }
1484         if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
1485                 reg->u32_min_value = (u32)reg->umin_value;
1486                 reg->u32_max_value = (u32)reg->umax_value;
1487         }
1488
1489         /* Intersecting with the old var_off might have improved our bounds
1490          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1491          * then new var_off is (0; 0x7f...fc) which improves our umax.
1492          */
1493         __reg_deduce_bounds(reg);
1494         __reg_bound_offset(reg);
1495         __update_reg_bounds(reg);
1496 }
1497
1498 /* Mark a register as having a completely unknown (scalar) value. */
1499 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1500                                struct bpf_reg_state *reg)
1501 {
1502         /*
1503          * Clear type, id, off, and union(map_ptr, range) and
1504          * padding between 'type' and union
1505          */
1506         memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1507         reg->type = SCALAR_VALUE;
1508         reg->var_off = tnum_unknown;
1509         reg->frameno = 0;
1510         reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1511         __mark_reg_unbounded(reg);
1512 }
1513
1514 static void mark_reg_unknown(struct bpf_verifier_env *env,
1515                              struct bpf_reg_state *regs, u32 regno)
1516 {
1517         if (WARN_ON(regno >= MAX_BPF_REG)) {
1518                 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1519                 /* Something bad happened, let's kill all regs except FP */
1520                 for (regno = 0; regno < BPF_REG_FP; regno++)
1521                         __mark_reg_not_init(env, regs + regno);
1522                 return;
1523         }
1524         __mark_reg_unknown(env, regs + regno);
1525 }
1526
1527 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1528                                 struct bpf_reg_state *reg)
1529 {
1530         __mark_reg_unknown(env, reg);
1531         reg->type = NOT_INIT;
1532 }
1533
1534 static void mark_reg_not_init(struct bpf_verifier_env *env,
1535                               struct bpf_reg_state *regs, u32 regno)
1536 {
1537         if (WARN_ON(regno >= MAX_BPF_REG)) {
1538                 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1539                 /* Something bad happened, let's kill all regs except FP */
1540                 for (regno = 0; regno < BPF_REG_FP; regno++)
1541                         __mark_reg_not_init(env, regs + regno);
1542                 return;
1543         }
1544         __mark_reg_not_init(env, regs + regno);
1545 }
1546
1547 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1548                             struct bpf_reg_state *regs, u32 regno,
1549                             enum bpf_reg_type reg_type,
1550                             struct btf *btf, u32 btf_id)
1551 {
1552         if (reg_type == SCALAR_VALUE) {
1553                 mark_reg_unknown(env, regs, regno);
1554                 return;
1555         }
1556         mark_reg_known_zero(env, regs, regno);
1557         regs[regno].type = PTR_TO_BTF_ID;
1558         regs[regno].btf = btf;
1559         regs[regno].btf_id = btf_id;
1560 }
1561
1562 #define DEF_NOT_SUBREG  (0)
1563 static void init_reg_state(struct bpf_verifier_env *env,
1564                            struct bpf_func_state *state)
1565 {
1566         struct bpf_reg_state *regs = state->regs;
1567         int i;
1568
1569         for (i = 0; i < MAX_BPF_REG; i++) {
1570                 mark_reg_not_init(env, regs, i);
1571                 regs[i].live = REG_LIVE_NONE;
1572                 regs[i].parent = NULL;
1573                 regs[i].subreg_def = DEF_NOT_SUBREG;
1574         }
1575
1576         /* frame pointer */
1577         regs[BPF_REG_FP].type = PTR_TO_STACK;
1578         mark_reg_known_zero(env, regs, BPF_REG_FP);
1579         regs[BPF_REG_FP].frameno = state->frameno;
1580 }
1581
1582 #define BPF_MAIN_FUNC (-1)
1583 static void init_func_state(struct bpf_verifier_env *env,
1584                             struct bpf_func_state *state,
1585                             int callsite, int frameno, int subprogno)
1586 {
1587         state->callsite = callsite;
1588         state->frameno = frameno;
1589         state->subprogno = subprogno;
1590         init_reg_state(env, state);
1591         mark_verifier_state_scratched(env);
1592 }
1593
1594 /* Similar to push_stack(), but for async callbacks */
1595 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
1596                                                 int insn_idx, int prev_insn_idx,
1597                                                 int subprog)
1598 {
1599         struct bpf_verifier_stack_elem *elem;
1600         struct bpf_func_state *frame;
1601
1602         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1603         if (!elem)
1604                 goto err;
1605
1606         elem->insn_idx = insn_idx;
1607         elem->prev_insn_idx = prev_insn_idx;
1608         elem->next = env->head;
1609         elem->log_pos = env->log.len_used;
1610         env->head = elem;
1611         env->stack_size++;
1612         if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1613                 verbose(env,
1614                         "The sequence of %d jumps is too complex for async cb.\n",
1615                         env->stack_size);
1616                 goto err;
1617         }
1618         /* Unlike push_stack() do not copy_verifier_state().
1619          * The caller state doesn't matter.
1620          * This is async callback. It starts in a fresh stack.
1621          * Initialize it similar to do_check_common().
1622          */
1623         elem->st.branches = 1;
1624         frame = kzalloc(sizeof(*frame), GFP_KERNEL);
1625         if (!frame)
1626                 goto err;
1627         init_func_state(env, frame,
1628                         BPF_MAIN_FUNC /* callsite */,
1629                         0 /* frameno within this callchain */,
1630                         subprog /* subprog number within this prog */);
1631         elem->st.frame[0] = frame;
1632         return &elem->st;
1633 err:
1634         free_verifier_state(env->cur_state, true);
1635         env->cur_state = NULL;
1636         /* pop all elements and return */
1637         while (!pop_stack(env, NULL, NULL, false));
1638         return NULL;
1639 }
1640
1641
1642 enum reg_arg_type {
1643         SRC_OP,         /* register is used as source operand */
1644         DST_OP,         /* register is used as destination operand */
1645         DST_OP_NO_MARK  /* same as above, check only, don't mark */
1646 };
1647
1648 static int cmp_subprogs(const void *a, const void *b)
1649 {
1650         return ((struct bpf_subprog_info *)a)->start -
1651                ((struct bpf_subprog_info *)b)->start;
1652 }
1653
1654 static int find_subprog(struct bpf_verifier_env *env, int off)
1655 {
1656         struct bpf_subprog_info *p;
1657
1658         p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1659                     sizeof(env->subprog_info[0]), cmp_subprogs);
1660         if (!p)
1661                 return -ENOENT;
1662         return p - env->subprog_info;
1663
1664 }
1665
1666 static int add_subprog(struct bpf_verifier_env *env, int off)
1667 {
1668         int insn_cnt = env->prog->len;
1669         int ret;
1670
1671         if (off >= insn_cnt || off < 0) {
1672                 verbose(env, "call to invalid destination\n");
1673                 return -EINVAL;
1674         }
1675         ret = find_subprog(env, off);
1676         if (ret >= 0)
1677                 return ret;
1678         if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1679                 verbose(env, "too many subprograms\n");
1680                 return -E2BIG;
1681         }
1682         /* determine subprog starts. The end is one before the next starts */
1683         env->subprog_info[env->subprog_cnt++].start = off;
1684         sort(env->subprog_info, env->subprog_cnt,
1685              sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1686         return env->subprog_cnt - 1;
1687 }
1688
1689 #define MAX_KFUNC_DESCS 256
1690 #define MAX_KFUNC_BTFS  256
1691
1692 struct bpf_kfunc_desc {
1693         struct btf_func_model func_model;
1694         u32 func_id;
1695         s32 imm;
1696         u16 offset;
1697 };
1698
1699 struct bpf_kfunc_btf {
1700         struct btf *btf;
1701         struct module *module;
1702         u16 offset;
1703 };
1704
1705 struct bpf_kfunc_desc_tab {
1706         struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
1707         u32 nr_descs;
1708 };
1709
1710 struct bpf_kfunc_btf_tab {
1711         struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
1712         u32 nr_descs;
1713 };
1714
1715 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
1716 {
1717         const struct bpf_kfunc_desc *d0 = a;
1718         const struct bpf_kfunc_desc *d1 = b;
1719
1720         /* func_id is not greater than BTF_MAX_TYPE */
1721         return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
1722 }
1723
1724 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
1725 {
1726         const struct bpf_kfunc_btf *d0 = a;
1727         const struct bpf_kfunc_btf *d1 = b;
1728
1729         return d0->offset - d1->offset;
1730 }
1731
1732 static const struct bpf_kfunc_desc *
1733 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
1734 {
1735         struct bpf_kfunc_desc desc = {
1736                 .func_id = func_id,
1737                 .offset = offset,
1738         };
1739         struct bpf_kfunc_desc_tab *tab;
1740
1741         tab = prog->aux->kfunc_tab;
1742         return bsearch(&desc, tab->descs, tab->nr_descs,
1743                        sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
1744 }
1745
1746 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
1747                                          s16 offset)
1748 {
1749         struct bpf_kfunc_btf kf_btf = { .offset = offset };
1750         struct bpf_kfunc_btf_tab *tab;
1751         struct bpf_kfunc_btf *b;
1752         struct module *mod;
1753         struct btf *btf;
1754         int btf_fd;
1755
1756         tab = env->prog->aux->kfunc_btf_tab;
1757         b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
1758                     sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
1759         if (!b) {
1760                 if (tab->nr_descs == MAX_KFUNC_BTFS) {
1761                         verbose(env, "too many different module BTFs\n");
1762                         return ERR_PTR(-E2BIG);
1763                 }
1764
1765                 if (bpfptr_is_null(env->fd_array)) {
1766                         verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
1767                         return ERR_PTR(-EPROTO);
1768                 }
1769
1770                 if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
1771                                             offset * sizeof(btf_fd),
1772                                             sizeof(btf_fd)))
1773                         return ERR_PTR(-EFAULT);
1774
1775                 btf = btf_get_by_fd(btf_fd);
1776                 if (IS_ERR(btf)) {
1777                         verbose(env, "invalid module BTF fd specified\n");
1778                         return btf;
1779                 }
1780
1781                 if (!btf_is_module(btf)) {
1782                         verbose(env, "BTF fd for kfunc is not a module BTF\n");
1783                         btf_put(btf);
1784                         return ERR_PTR(-EINVAL);
1785                 }
1786
1787                 mod = btf_try_get_module(btf);
1788                 if (!mod) {
1789                         btf_put(btf);
1790                         return ERR_PTR(-ENXIO);
1791                 }
1792
1793                 b = &tab->descs[tab->nr_descs++];
1794                 b->btf = btf;
1795                 b->module = mod;
1796                 b->offset = offset;
1797
1798                 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1799                      kfunc_btf_cmp_by_off, NULL);
1800         }
1801         return b->btf;
1802 }
1803
1804 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
1805 {
1806         if (!tab)
1807                 return;
1808
1809         while (tab->nr_descs--) {
1810                 module_put(tab->descs[tab->nr_descs].module);
1811                 btf_put(tab->descs[tab->nr_descs].btf);
1812         }
1813         kfree(tab);
1814 }
1815
1816 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
1817                                        u32 func_id, s16 offset)
1818 {
1819         if (offset) {
1820                 if (offset < 0) {
1821                         /* In the future, this can be allowed to increase limit
1822                          * of fd index into fd_array, interpreted as u16.
1823                          */
1824                         verbose(env, "negative offset disallowed for kernel module function call\n");
1825                         return ERR_PTR(-EINVAL);
1826                 }
1827
1828                 return __find_kfunc_desc_btf(env, offset);
1829         }
1830         return btf_vmlinux ?: ERR_PTR(-ENOENT);
1831 }
1832
1833 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
1834 {
1835         const struct btf_type *func, *func_proto;
1836         struct bpf_kfunc_btf_tab *btf_tab;
1837         struct bpf_kfunc_desc_tab *tab;
1838         struct bpf_prog_aux *prog_aux;
1839         struct bpf_kfunc_desc *desc;
1840         const char *func_name;
1841         struct btf *desc_btf;
1842         unsigned long addr;
1843         int err;
1844
1845         prog_aux = env->prog->aux;
1846         tab = prog_aux->kfunc_tab;
1847         btf_tab = prog_aux->kfunc_btf_tab;
1848         if (!tab) {
1849                 if (!btf_vmlinux) {
1850                         verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
1851                         return -ENOTSUPP;
1852                 }
1853
1854                 if (!env->prog->jit_requested) {
1855                         verbose(env, "JIT is required for calling kernel function\n");
1856                         return -ENOTSUPP;
1857                 }
1858
1859                 if (!bpf_jit_supports_kfunc_call()) {
1860                         verbose(env, "JIT does not support calling kernel function\n");
1861                         return -ENOTSUPP;
1862                 }
1863
1864                 if (!env->prog->gpl_compatible) {
1865                         verbose(env, "cannot call kernel function from non-GPL compatible program\n");
1866                         return -EINVAL;
1867                 }
1868
1869                 tab = kzalloc(sizeof(*tab), GFP_KERNEL);
1870                 if (!tab)
1871                         return -ENOMEM;
1872                 prog_aux->kfunc_tab = tab;
1873         }
1874
1875         /* func_id == 0 is always invalid, but instead of returning an error, be
1876          * conservative and wait until the code elimination pass before returning
1877          * error, so that invalid calls that get pruned out can be in BPF programs
1878          * loaded from userspace.  It is also required that offset be untouched
1879          * for such calls.
1880          */
1881         if (!func_id && !offset)
1882                 return 0;
1883
1884         if (!btf_tab && offset) {
1885                 btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
1886                 if (!btf_tab)
1887                         return -ENOMEM;
1888                 prog_aux->kfunc_btf_tab = btf_tab;
1889         }
1890
1891         desc_btf = find_kfunc_desc_btf(env, func_id, offset);
1892         if (IS_ERR(desc_btf)) {
1893                 verbose(env, "failed to find BTF for kernel function\n");
1894                 return PTR_ERR(desc_btf);
1895         }
1896
1897         if (find_kfunc_desc(env->prog, func_id, offset))
1898                 return 0;
1899
1900         if (tab->nr_descs == MAX_KFUNC_DESCS) {
1901                 verbose(env, "too many different kernel function calls\n");
1902                 return -E2BIG;
1903         }
1904
1905         func = btf_type_by_id(desc_btf, func_id);
1906         if (!func || !btf_type_is_func(func)) {
1907                 verbose(env, "kernel btf_id %u is not a function\n",
1908                         func_id);
1909                 return -EINVAL;
1910         }
1911         func_proto = btf_type_by_id(desc_btf, func->type);
1912         if (!func_proto || !btf_type_is_func_proto(func_proto)) {
1913                 verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
1914                         func_id);
1915                 return -EINVAL;
1916         }
1917
1918         func_name = btf_name_by_offset(desc_btf, func->name_off);
1919         addr = kallsyms_lookup_name(func_name);
1920         if (!addr) {
1921                 verbose(env, "cannot find address for kernel function %s\n",
1922                         func_name);
1923                 return -EINVAL;
1924         }
1925
1926         desc = &tab->descs[tab->nr_descs++];
1927         desc->func_id = func_id;
1928         desc->imm = BPF_CALL_IMM(addr);
1929         desc->offset = offset;
1930         err = btf_distill_func_proto(&env->log, desc_btf,
1931                                      func_proto, func_name,
1932                                      &desc->func_model);
1933         if (!err)
1934                 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1935                      kfunc_desc_cmp_by_id_off, NULL);
1936         return err;
1937 }
1938
1939 static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
1940 {
1941         const struct bpf_kfunc_desc *d0 = a;
1942         const struct bpf_kfunc_desc *d1 = b;
1943
1944         if (d0->imm > d1->imm)
1945                 return 1;
1946         else if (d0->imm < d1->imm)
1947                 return -1;
1948         return 0;
1949 }
1950
1951 static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
1952 {
1953         struct bpf_kfunc_desc_tab *tab;
1954
1955         tab = prog->aux->kfunc_tab;
1956         if (!tab)
1957                 return;
1958
1959         sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1960              kfunc_desc_cmp_by_imm, NULL);
1961 }
1962
1963 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
1964 {
1965         return !!prog->aux->kfunc_tab;
1966 }
1967
1968 const struct btf_func_model *
1969 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
1970                          const struct bpf_insn *insn)
1971 {
1972         const struct bpf_kfunc_desc desc = {
1973                 .imm = insn->imm,
1974         };
1975         const struct bpf_kfunc_desc *res;
1976         struct bpf_kfunc_desc_tab *tab;
1977
1978         tab = prog->aux->kfunc_tab;
1979         res = bsearch(&desc, tab->descs, tab->nr_descs,
1980                       sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
1981
1982         return res ? &res->func_model : NULL;
1983 }
1984
1985 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
1986 {
1987         struct bpf_subprog_info *subprog = env->subprog_info;
1988         struct bpf_insn *insn = env->prog->insnsi;
1989         int i, ret, insn_cnt = env->prog->len;
1990
1991         /* Add entry function. */
1992         ret = add_subprog(env, 0);
1993         if (ret)
1994                 return ret;
1995
1996         for (i = 0; i < insn_cnt; i++, insn++) {
1997                 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
1998                     !bpf_pseudo_kfunc_call(insn))
1999                         continue;
2000
2001                 if (!env->bpf_capable) {
2002                         verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
2003                         return -EPERM;
2004                 }
2005
2006                 if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
2007                         ret = add_subprog(env, i + insn->imm + 1);
2008                 else
2009                         ret = add_kfunc_call(env, insn->imm, insn->off);
2010
2011                 if (ret < 0)
2012                         return ret;
2013         }
2014
2015         /* Add a fake 'exit' subprog which could simplify subprog iteration
2016          * logic. 'subprog_cnt' should not be increased.
2017          */
2018         subprog[env->subprog_cnt].start = insn_cnt;
2019
2020         if (env->log.level & BPF_LOG_LEVEL2)
2021                 for (i = 0; i < env->subprog_cnt; i++)
2022                         verbose(env, "func#%d @%d\n", i, subprog[i].start);
2023
2024         return 0;
2025 }
2026
2027 static int check_subprogs(struct bpf_verifier_env *env)
2028 {
2029         int i, subprog_start, subprog_end, off, cur_subprog = 0;
2030         struct bpf_subprog_info *subprog = env->subprog_info;
2031         struct bpf_insn *insn = env->prog->insnsi;
2032         int insn_cnt = env->prog->len;
2033
2034         /* now check that all jumps are within the same subprog */
2035         subprog_start = subprog[cur_subprog].start;
2036         subprog_end = subprog[cur_subprog + 1].start;
2037         for (i = 0; i < insn_cnt; i++) {
2038                 u8 code = insn[i].code;
2039
2040                 if (code == (BPF_JMP | BPF_CALL) &&
2041                     insn[i].imm == BPF_FUNC_tail_call &&
2042                     insn[i].src_reg != BPF_PSEUDO_CALL)
2043                         subprog[cur_subprog].has_tail_call = true;
2044                 if (BPF_CLASS(code) == BPF_LD &&
2045                     (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
2046                         subprog[cur_subprog].has_ld_abs = true;
2047                 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2048                         goto next;
2049                 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
2050                         goto next;
2051                 off = i + insn[i].off + 1;
2052                 if (off < subprog_start || off >= subprog_end) {
2053                         verbose(env, "jump out of range from insn %d to %d\n", i, off);
2054                         return -EINVAL;
2055                 }
2056 next:
2057                 if (i == subprog_end - 1) {
2058                         /* to avoid fall-through from one subprog into another
2059                          * the last insn of the subprog should be either exit
2060                          * or unconditional jump back
2061                          */
2062                         if (code != (BPF_JMP | BPF_EXIT) &&
2063                             code != (BPF_JMP | BPF_JA)) {
2064                                 verbose(env, "last insn is not an exit or jmp\n");
2065                                 return -EINVAL;
2066                         }
2067                         subprog_start = subprog_end;
2068                         cur_subprog++;
2069                         if (cur_subprog < env->subprog_cnt)
2070                                 subprog_end = subprog[cur_subprog + 1].start;
2071                 }
2072         }
2073         return 0;
2074 }
2075
2076 /* Parentage chain of this register (or stack slot) should take care of all
2077  * issues like callee-saved registers, stack slot allocation time, etc.
2078  */
2079 static int mark_reg_read(struct bpf_verifier_env *env,
2080                          const struct bpf_reg_state *state,
2081                          struct bpf_reg_state *parent, u8 flag)
2082 {
2083         bool writes = parent == state->parent; /* Observe write marks */
2084         int cnt = 0;
2085
2086         while (parent) {
2087                 /* if read wasn't screened by an earlier write ... */
2088                 if (writes && state->live & REG_LIVE_WRITTEN)
2089                         break;
2090                 if (parent->live & REG_LIVE_DONE) {
2091                         verbose(env, "verifier BUG type %s var_off %lld off %d\n",
2092                                 reg_type_str(env, parent->type),
2093                                 parent->var_off.value, parent->off);
2094                         return -EFAULT;
2095                 }
2096                 /* The first condition is more likely to be true than the
2097                  * second, checked it first.
2098                  */
2099                 if ((parent->live & REG_LIVE_READ) == flag ||
2100                     parent->live & REG_LIVE_READ64)
2101                         /* The parentage chain never changes and
2102                          * this parent was already marked as LIVE_READ.
2103                          * There is no need to keep walking the chain again and
2104                          * keep re-marking all parents as LIVE_READ.
2105                          * This case happens when the same register is read
2106                          * multiple times without writes into it in-between.
2107                          * Also, if parent has the stronger REG_LIVE_READ64 set,
2108                          * then no need to set the weak REG_LIVE_READ32.
2109                          */
2110                         break;
2111                 /* ... then we depend on parent's value */
2112                 parent->live |= flag;
2113                 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
2114                 if (flag == REG_LIVE_READ64)
2115                         parent->live &= ~REG_LIVE_READ32;
2116                 state = parent;
2117                 parent = state->parent;
2118                 writes = true;
2119                 cnt++;
2120         }
2121
2122         if (env->longest_mark_read_walk < cnt)
2123                 env->longest_mark_read_walk = cnt;
2124         return 0;
2125 }
2126
2127 /* This function is supposed to be used by the following 32-bit optimization
2128  * code only. It returns TRUE if the source or destination register operates
2129  * on 64-bit, otherwise return FALSE.
2130  */
2131 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
2132                      u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
2133 {
2134         u8 code, class, op;
2135
2136         code = insn->code;
2137         class = BPF_CLASS(code);
2138         op = BPF_OP(code);
2139         if (class == BPF_JMP) {
2140                 /* BPF_EXIT for "main" will reach here. Return TRUE
2141                  * conservatively.
2142                  */
2143                 if (op == BPF_EXIT)
2144                         return true;
2145                 if (op == BPF_CALL) {
2146                         /* BPF to BPF call will reach here because of marking
2147                          * caller saved clobber with DST_OP_NO_MARK for which we
2148                          * don't care the register def because they are anyway
2149                          * marked as NOT_INIT already.
2150                          */
2151                         if (insn->src_reg == BPF_PSEUDO_CALL)
2152                                 return false;
2153                         /* Helper call will reach here because of arg type
2154                          * check, conservatively return TRUE.
2155                          */
2156                         if (t == SRC_OP)
2157                                 return true;
2158
2159                         return false;
2160                 }
2161         }
2162
2163         if (class == BPF_ALU64 || class == BPF_JMP ||
2164             /* BPF_END always use BPF_ALU class. */
2165             (class == BPF_ALU && op == BPF_END && insn->imm == 64))
2166                 return true;
2167
2168         if (class == BPF_ALU || class == BPF_JMP32)
2169                 return false;
2170
2171         if (class == BPF_LDX) {
2172                 if (t != SRC_OP)
2173                         return BPF_SIZE(code) == BPF_DW;
2174                 /* LDX source must be ptr. */
2175                 return true;
2176         }
2177
2178         if (class == BPF_STX) {
2179                 /* BPF_STX (including atomic variants) has multiple source
2180                  * operands, one of which is a ptr. Check whether the caller is
2181                  * asking about it.
2182                  */
2183                 if (t == SRC_OP && reg->type != SCALAR_VALUE)
2184                         return true;
2185                 return BPF_SIZE(code) == BPF_DW;
2186         }
2187
2188         if (class == BPF_LD) {
2189                 u8 mode = BPF_MODE(code);
2190
2191                 /* LD_IMM64 */
2192                 if (mode == BPF_IMM)
2193                         return true;
2194
2195                 /* Both LD_IND and LD_ABS return 32-bit data. */
2196                 if (t != SRC_OP)
2197                         return  false;
2198
2199                 /* Implicit ctx ptr. */
2200                 if (regno == BPF_REG_6)
2201                         return true;
2202
2203                 /* Explicit source could be any width. */
2204                 return true;
2205         }
2206
2207         if (class == BPF_ST)
2208                 /* The only source register for BPF_ST is a ptr. */
2209                 return true;
2210
2211         /* Conservatively return true at default. */
2212         return true;
2213 }
2214
2215 /* Return the regno defined by the insn, or -1. */
2216 static int insn_def_regno(const struct bpf_insn *insn)
2217 {
2218         switch (BPF_CLASS(insn->code)) {
2219         case BPF_JMP:
2220         case BPF_JMP32:
2221         case BPF_ST:
2222                 return -1;
2223         case BPF_STX:
2224                 if (BPF_MODE(insn->code) == BPF_ATOMIC &&
2225                     (insn->imm & BPF_FETCH)) {
2226                         if (insn->imm == BPF_CMPXCHG)
2227                                 return BPF_REG_0;
2228                         else
2229                                 return insn->src_reg;
2230                 } else {
2231                         return -1;
2232                 }
2233         default:
2234                 return insn->dst_reg;
2235         }
2236 }
2237
2238 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
2239 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
2240 {
2241         int dst_reg = insn_def_regno(insn);
2242
2243         if (dst_reg == -1)
2244                 return false;
2245
2246         return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
2247 }
2248
2249 static void mark_insn_zext(struct bpf_verifier_env *env,
2250                            struct bpf_reg_state *reg)
2251 {
2252         s32 def_idx = reg->subreg_def;
2253
2254         if (def_idx == DEF_NOT_SUBREG)
2255                 return;
2256
2257         env->insn_aux_data[def_idx - 1].zext_dst = true;
2258         /* The dst will be zero extended, so won't be sub-register anymore. */
2259         reg->subreg_def = DEF_NOT_SUBREG;
2260 }
2261
2262 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
2263                          enum reg_arg_type t)
2264 {
2265         struct bpf_verifier_state *vstate = env->cur_state;
2266         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2267         struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
2268         struct bpf_reg_state *reg, *regs = state->regs;
2269         bool rw64;
2270
2271         if (regno >= MAX_BPF_REG) {
2272                 verbose(env, "R%d is invalid\n", regno);
2273                 return -EINVAL;
2274         }
2275
2276         mark_reg_scratched(env, regno);
2277
2278         reg = &regs[regno];
2279         rw64 = is_reg64(env, insn, regno, reg, t);
2280         if (t == SRC_OP) {
2281                 /* check whether register used as source operand can be read */
2282                 if (reg->type == NOT_INIT) {
2283                         verbose(env, "R%d !read_ok\n", regno);
2284                         return -EACCES;
2285                 }
2286                 /* We don't need to worry about FP liveness because it's read-only */
2287                 if (regno == BPF_REG_FP)
2288                         return 0;
2289
2290                 if (rw64)
2291                         mark_insn_zext(env, reg);
2292
2293                 return mark_reg_read(env, reg, reg->parent,
2294                                      rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
2295         } else {
2296                 /* check whether register used as dest operand can be written to */
2297                 if (regno == BPF_REG_FP) {
2298                         verbose(env, "frame pointer is read only\n");
2299                         return -EACCES;
2300                 }
2301                 reg->live |= REG_LIVE_WRITTEN;
2302                 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
2303                 if (t == DST_OP)
2304                         mark_reg_unknown(env, regs, regno);
2305         }
2306         return 0;
2307 }
2308
2309 /* for any branch, call, exit record the history of jmps in the given state */
2310 static int push_jmp_history(struct bpf_verifier_env *env,
2311                             struct bpf_verifier_state *cur)
2312 {
2313         u32 cnt = cur->jmp_history_cnt;
2314         struct bpf_idx_pair *p;
2315
2316         cnt++;
2317         p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
2318         if (!p)
2319                 return -ENOMEM;
2320         p[cnt - 1].idx = env->insn_idx;
2321         p[cnt - 1].prev_idx = env->prev_insn_idx;
2322         cur->jmp_history = p;
2323         cur->jmp_history_cnt = cnt;
2324         return 0;
2325 }
2326
2327 /* Backtrack one insn at a time. If idx is not at the top of recorded
2328  * history then previous instruction came from straight line execution.
2329  */
2330 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
2331                              u32 *history)
2332 {
2333         u32 cnt = *history;
2334
2335         if (cnt && st->jmp_history[cnt - 1].idx == i) {
2336                 i = st->jmp_history[cnt - 1].prev_idx;
2337                 (*history)--;
2338         } else {
2339                 i--;
2340         }
2341         return i;
2342 }
2343
2344 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
2345 {
2346         const struct btf_type *func;
2347         struct btf *desc_btf;
2348
2349         if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
2350                 return NULL;
2351
2352         desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off);
2353         if (IS_ERR(desc_btf))
2354                 return "<error>";
2355
2356         func = btf_type_by_id(desc_btf, insn->imm);
2357         return btf_name_by_offset(desc_btf, func->name_off);
2358 }
2359
2360 /* For given verifier state backtrack_insn() is called from the last insn to
2361  * the first insn. Its purpose is to compute a bitmask of registers and
2362  * stack slots that needs precision in the parent verifier state.
2363  */
2364 static int backtrack_insn(struct bpf_verifier_env *env, int idx,
2365                           u32 *reg_mask, u64 *stack_mask)
2366 {
2367         const struct bpf_insn_cbs cbs = {
2368                 .cb_call        = disasm_kfunc_name,
2369                 .cb_print       = verbose,
2370                 .private_data   = env,
2371         };
2372         struct bpf_insn *insn = env->prog->insnsi + idx;
2373         u8 class = BPF_CLASS(insn->code);
2374         u8 opcode = BPF_OP(insn->code);
2375         u8 mode = BPF_MODE(insn->code);
2376         u32 dreg = 1u << insn->dst_reg;
2377         u32 sreg = 1u << insn->src_reg;
2378         u32 spi;
2379
2380         if (insn->code == 0)
2381                 return 0;
2382         if (env->log.level & BPF_LOG_LEVEL2) {
2383                 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
2384                 verbose(env, "%d: ", idx);
2385                 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
2386         }
2387
2388         if (class == BPF_ALU || class == BPF_ALU64) {
2389                 if (!(*reg_mask & dreg))
2390                         return 0;
2391                 if (opcode == BPF_MOV) {
2392                         if (BPF_SRC(insn->code) == BPF_X) {
2393                                 /* dreg = sreg
2394                                  * dreg needs precision after this insn
2395                                  * sreg needs precision before this insn
2396                                  */
2397                                 *reg_mask &= ~dreg;
2398                                 *reg_mask |= sreg;
2399                         } else {
2400                                 /* dreg = K
2401                                  * dreg needs precision after this insn.
2402                                  * Corresponding register is already marked
2403                                  * as precise=true in this verifier state.
2404                                  * No further markings in parent are necessary
2405                                  */
2406                                 *reg_mask &= ~dreg;
2407                         }
2408                 } else {
2409                         if (BPF_SRC(insn->code) == BPF_X) {
2410                                 /* dreg += sreg
2411                                  * both dreg and sreg need precision
2412                                  * before this insn
2413                                  */
2414                                 *reg_mask |= sreg;
2415                         } /* else dreg += K
2416                            * dreg still needs precision before this insn
2417                            */
2418                 }
2419         } else if (class == BPF_LDX) {
2420                 if (!(*reg_mask & dreg))
2421                         return 0;
2422                 *reg_mask &= ~dreg;
2423
2424                 /* scalars can only be spilled into stack w/o losing precision.
2425                  * Load from any other memory can be zero extended.
2426                  * The desire to keep that precision is already indicated
2427                  * by 'precise' mark in corresponding register of this state.
2428                  * No further tracking necessary.
2429                  */
2430                 if (insn->src_reg != BPF_REG_FP)
2431                         return 0;
2432
2433                 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
2434                  * that [fp - off] slot contains scalar that needs to be
2435                  * tracked with precision
2436                  */
2437                 spi = (-insn->off - 1) / BPF_REG_SIZE;
2438                 if (spi >= 64) {
2439                         verbose(env, "BUG spi %d\n", spi);
2440                         WARN_ONCE(1, "verifier backtracking bug");
2441                         return -EFAULT;
2442                 }
2443                 *stack_mask |= 1ull << spi;
2444         } else if (class == BPF_STX || class == BPF_ST) {
2445                 if (*reg_mask & dreg)
2446                         /* stx & st shouldn't be using _scalar_ dst_reg
2447                          * to access memory. It means backtracking
2448                          * encountered a case of pointer subtraction.
2449                          */
2450                         return -ENOTSUPP;
2451                 /* scalars can only be spilled into stack */
2452                 if (insn->dst_reg != BPF_REG_FP)
2453                         return 0;
2454                 spi = (-insn->off - 1) / BPF_REG_SIZE;
2455                 if (spi >= 64) {
2456                         verbose(env, "BUG spi %d\n", spi);
2457                         WARN_ONCE(1, "verifier backtracking bug");
2458                         return -EFAULT;
2459                 }
2460                 if (!(*stack_mask & (1ull << spi)))
2461                         return 0;
2462                 *stack_mask &= ~(1ull << spi);
2463                 if (class == BPF_STX)
2464                         *reg_mask |= sreg;
2465         } else if (class == BPF_JMP || class == BPF_JMP32) {
2466                 if (opcode == BPF_CALL) {
2467                         if (insn->src_reg == BPF_PSEUDO_CALL)
2468                                 return -ENOTSUPP;
2469                         /* regular helper call sets R0 */
2470                         *reg_mask &= ~1;
2471                         if (*reg_mask & 0x3f) {
2472                                 /* if backtracing was looking for registers R1-R5
2473                                  * they should have been found already.
2474                                  */
2475                                 verbose(env, "BUG regs %x\n", *reg_mask);
2476                                 WARN_ONCE(1, "verifier backtracking bug");
2477                                 return -EFAULT;
2478                         }
2479                 } else if (opcode == BPF_EXIT) {
2480                         return -ENOTSUPP;
2481                 }
2482         } else if (class == BPF_LD) {
2483                 if (!(*reg_mask & dreg))
2484                         return 0;
2485                 *reg_mask &= ~dreg;
2486                 /* It's ld_imm64 or ld_abs or ld_ind.
2487                  * For ld_imm64 no further tracking of precision
2488                  * into parent is necessary
2489                  */
2490                 if (mode == BPF_IND || mode == BPF_ABS)
2491                         /* to be analyzed */
2492                         return -ENOTSUPP;
2493         }
2494         return 0;
2495 }
2496
2497 /* the scalar precision tracking algorithm:
2498  * . at the start all registers have precise=false.
2499  * . scalar ranges are tracked as normal through alu and jmp insns.
2500  * . once precise value of the scalar register is used in:
2501  *   .  ptr + scalar alu
2502  *   . if (scalar cond K|scalar)
2503  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
2504  *   backtrack through the verifier states and mark all registers and
2505  *   stack slots with spilled constants that these scalar regisers
2506  *   should be precise.
2507  * . during state pruning two registers (or spilled stack slots)
2508  *   are equivalent if both are not precise.
2509  *
2510  * Note the verifier cannot simply walk register parentage chain,
2511  * since many different registers and stack slots could have been
2512  * used to compute single precise scalar.
2513  *
2514  * The approach of starting with precise=true for all registers and then
2515  * backtrack to mark a register as not precise when the verifier detects
2516  * that program doesn't care about specific value (e.g., when helper
2517  * takes register as ARG_ANYTHING parameter) is not safe.
2518  *
2519  * It's ok to walk single parentage chain of the verifier states.
2520  * It's possible that this backtracking will go all the way till 1st insn.
2521  * All other branches will be explored for needing precision later.
2522  *
2523  * The backtracking needs to deal with cases like:
2524  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
2525  * r9 -= r8
2526  * r5 = r9
2527  * if r5 > 0x79f goto pc+7
2528  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
2529  * r5 += 1
2530  * ...
2531  * call bpf_perf_event_output#25
2532  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
2533  *
2534  * and this case:
2535  * r6 = 1
2536  * call foo // uses callee's r6 inside to compute r0
2537  * r0 += r6
2538  * if r0 == 0 goto
2539  *
2540  * to track above reg_mask/stack_mask needs to be independent for each frame.
2541  *
2542  * Also if parent's curframe > frame where backtracking started,
2543  * the verifier need to mark registers in both frames, otherwise callees
2544  * may incorrectly prune callers. This is similar to
2545  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
2546  *
2547  * For now backtracking falls back into conservative marking.
2548  */
2549 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
2550                                      struct bpf_verifier_state *st)
2551 {
2552         struct bpf_func_state *func;
2553         struct bpf_reg_state *reg;
2554         int i, j;
2555
2556         /* big hammer: mark all scalars precise in this path.
2557          * pop_stack may still get !precise scalars.
2558          */
2559         for (; st; st = st->parent)
2560                 for (i = 0; i <= st->curframe; i++) {
2561                         func = st->frame[i];
2562                         for (j = 0; j < BPF_REG_FP; j++) {
2563                                 reg = &func->regs[j];
2564                                 if (reg->type != SCALAR_VALUE)
2565                                         continue;
2566                                 reg->precise = true;
2567                         }
2568                         for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2569                                 if (!is_spilled_reg(&func->stack[j]))
2570                                         continue;
2571                                 reg = &func->stack[j].spilled_ptr;
2572                                 if (reg->type != SCALAR_VALUE)
2573                                         continue;
2574                                 reg->precise = true;
2575                         }
2576                 }
2577 }
2578
2579 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
2580                                   int spi)
2581 {
2582         struct bpf_verifier_state *st = env->cur_state;
2583         int first_idx = st->first_insn_idx;
2584         int last_idx = env->insn_idx;
2585         struct bpf_func_state *func;
2586         struct bpf_reg_state *reg;
2587         u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2588         u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2589         bool skip_first = true;
2590         bool new_marks = false;
2591         int i, err;
2592
2593         if (!env->bpf_capable)
2594                 return 0;
2595
2596         func = st->frame[st->curframe];
2597         if (regno >= 0) {
2598                 reg = &func->regs[regno];
2599                 if (reg->type != SCALAR_VALUE) {
2600                         WARN_ONCE(1, "backtracing misuse");
2601                         return -EFAULT;
2602                 }
2603                 if (!reg->precise)
2604                         new_marks = true;
2605                 else
2606                         reg_mask = 0;
2607                 reg->precise = true;
2608         }
2609
2610         while (spi >= 0) {
2611                 if (!is_spilled_reg(&func->stack[spi])) {
2612                         stack_mask = 0;
2613                         break;
2614                 }
2615                 reg = &func->stack[spi].spilled_ptr;
2616                 if (reg->type != SCALAR_VALUE) {
2617                         stack_mask = 0;
2618                         break;
2619                 }
2620                 if (!reg->precise)
2621                         new_marks = true;
2622                 else
2623                         stack_mask = 0;
2624                 reg->precise = true;
2625                 break;
2626         }
2627
2628         if (!new_marks)
2629                 return 0;
2630         if (!reg_mask && !stack_mask)
2631                 return 0;
2632         for (;;) {
2633                 DECLARE_BITMAP(mask, 64);
2634                 u32 history = st->jmp_history_cnt;
2635
2636                 if (env->log.level & BPF_LOG_LEVEL2)
2637                         verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2638                 for (i = last_idx;;) {
2639                         if (skip_first) {
2640                                 err = 0;
2641                                 skip_first = false;
2642                         } else {
2643                                 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2644                         }
2645                         if (err == -ENOTSUPP) {
2646                                 mark_all_scalars_precise(env, st);
2647                                 return 0;
2648                         } else if (err) {
2649                                 return err;
2650                         }
2651                         if (!reg_mask && !stack_mask)
2652                                 /* Found assignment(s) into tracked register in this state.
2653                                  * Since this state is already marked, just return.
2654                                  * Nothing to be tracked further in the parent state.
2655                                  */
2656                                 return 0;
2657                         if (i == first_idx)
2658                                 break;
2659                         i = get_prev_insn_idx(st, i, &history);
2660                         if (i >= env->prog->len) {
2661                                 /* This can happen if backtracking reached insn 0
2662                                  * and there are still reg_mask or stack_mask
2663                                  * to backtrack.
2664                                  * It means the backtracking missed the spot where
2665                                  * particular register was initialized with a constant.
2666                                  */
2667                                 verbose(env, "BUG backtracking idx %d\n", i);
2668                                 WARN_ONCE(1, "verifier backtracking bug");
2669                                 return -EFAULT;
2670                         }
2671                 }
2672                 st = st->parent;
2673                 if (!st)
2674                         break;
2675
2676                 new_marks = false;
2677                 func = st->frame[st->curframe];
2678                 bitmap_from_u64(mask, reg_mask);
2679                 for_each_set_bit(i, mask, 32) {
2680                         reg = &func->regs[i];
2681                         if (reg->type != SCALAR_VALUE) {
2682                                 reg_mask &= ~(1u << i);
2683                                 continue;
2684                         }
2685                         if (!reg->precise)
2686                                 new_marks = true;
2687                         reg->precise = true;
2688                 }
2689
2690                 bitmap_from_u64(mask, stack_mask);
2691                 for_each_set_bit(i, mask, 64) {
2692                         if (i >= func->allocated_stack / BPF_REG_SIZE) {
2693                                 /* the sequence of instructions:
2694                                  * 2: (bf) r3 = r10
2695                                  * 3: (7b) *(u64 *)(r3 -8) = r0
2696                                  * 4: (79) r4 = *(u64 *)(r10 -8)
2697                                  * doesn't contain jmps. It's backtracked
2698                                  * as a single block.
2699                                  * During backtracking insn 3 is not recognized as
2700                                  * stack access, so at the end of backtracking
2701                                  * stack slot fp-8 is still marked in stack_mask.
2702                                  * However the parent state may not have accessed
2703                                  * fp-8 and it's "unallocated" stack space.
2704                                  * In such case fallback to conservative.
2705                                  */
2706                                 mark_all_scalars_precise(env, st);
2707                                 return 0;
2708                         }
2709
2710                         if (!is_spilled_reg(&func->stack[i])) {
2711                                 stack_mask &= ~(1ull << i);
2712                                 continue;
2713                         }
2714                         reg = &func->stack[i].spilled_ptr;
2715                         if (reg->type != SCALAR_VALUE) {
2716                                 stack_mask &= ~(1ull << i);
2717                                 continue;
2718                         }
2719                         if (!reg->precise)
2720                                 new_marks = true;
2721                         reg->precise = true;
2722                 }
2723                 if (env->log.level & BPF_LOG_LEVEL2) {
2724                         verbose(env, "parent %s regs=%x stack=%llx marks:",
2725                                 new_marks ? "didn't have" : "already had",
2726                                 reg_mask, stack_mask);
2727                         print_verifier_state(env, func, true);
2728                 }
2729
2730                 if (!reg_mask && !stack_mask)
2731                         break;
2732                 if (!new_marks)
2733                         break;
2734
2735                 last_idx = st->last_insn_idx;
2736                 first_idx = st->first_insn_idx;
2737         }
2738         return 0;
2739 }
2740
2741 static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2742 {
2743         return __mark_chain_precision(env, regno, -1);
2744 }
2745
2746 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2747 {
2748         return __mark_chain_precision(env, -1, spi);
2749 }
2750
2751 static bool is_spillable_regtype(enum bpf_reg_type type)
2752 {
2753         switch (base_type(type)) {
2754         case PTR_TO_MAP_VALUE:
2755         case PTR_TO_STACK:
2756         case PTR_TO_CTX:
2757         case PTR_TO_PACKET:
2758         case PTR_TO_PACKET_META:
2759         case PTR_TO_PACKET_END:
2760         case PTR_TO_FLOW_KEYS:
2761         case CONST_PTR_TO_MAP:
2762         case PTR_TO_SOCKET:
2763         case PTR_TO_SOCK_COMMON:
2764         case PTR_TO_TCP_SOCK:
2765         case PTR_TO_XDP_SOCK:
2766         case PTR_TO_BTF_ID:
2767         case PTR_TO_BUF:
2768         case PTR_TO_PERCPU_BTF_ID:
2769         case PTR_TO_MEM:
2770         case PTR_TO_FUNC:
2771         case PTR_TO_MAP_KEY:
2772                 return true;
2773         default:
2774                 return false;
2775         }
2776 }
2777
2778 /* Does this register contain a constant zero? */
2779 static bool register_is_null(struct bpf_reg_state *reg)
2780 {
2781         return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2782 }
2783
2784 static bool register_is_const(struct bpf_reg_state *reg)
2785 {
2786         return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2787 }
2788
2789 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2790 {
2791         return tnum_is_unknown(reg->var_off) &&
2792                reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2793                reg->umin_value == 0 && reg->umax_value == U64_MAX &&
2794                reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
2795                reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2796 }
2797
2798 static bool register_is_bounded(struct bpf_reg_state *reg)
2799 {
2800         return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2801 }
2802
2803 static bool __is_pointer_value(bool allow_ptr_leaks,
2804                                const struct bpf_reg_state *reg)
2805 {
2806         if (allow_ptr_leaks)
2807                 return false;
2808
2809         return reg->type != SCALAR_VALUE;
2810 }
2811
2812 static void save_register_state(struct bpf_func_state *state,
2813                                 int spi, struct bpf_reg_state *reg,
2814                                 int size)
2815 {
2816         int i;
2817
2818         state->stack[spi].spilled_ptr = *reg;
2819         if (size == BPF_REG_SIZE)
2820                 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2821
2822         for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
2823                 state->stack[spi].slot_type[i - 1] = STACK_SPILL;
2824
2825         /* size < 8 bytes spill */
2826         for (; i; i--)
2827                 scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
2828 }
2829
2830 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
2831  * stack boundary and alignment are checked in check_mem_access()
2832  */
2833 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2834                                        /* stack frame we're writing to */
2835                                        struct bpf_func_state *state,
2836                                        int off, int size, int value_regno,
2837                                        int insn_idx)
2838 {
2839         struct bpf_func_state *cur; /* state of the current function */
2840         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2841         u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
2842         struct bpf_reg_state *reg = NULL;
2843
2844         err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
2845         if (err)
2846                 return err;
2847         /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
2848          * so it's aligned access and [off, off + size) are within stack limits
2849          */
2850         if (!env->allow_ptr_leaks &&
2851             state->stack[spi].slot_type[0] == STACK_SPILL &&
2852             size != BPF_REG_SIZE) {
2853                 verbose(env, "attempt to corrupt spilled pointer on stack\n");
2854                 return -EACCES;
2855         }
2856
2857         cur = env->cur_state->frame[env->cur_state->curframe];
2858         if (value_regno >= 0)
2859                 reg = &cur->regs[value_regno];
2860         if (!env->bypass_spec_v4) {
2861                 bool sanitize = reg && is_spillable_regtype(reg->type);
2862
2863                 for (i = 0; i < size; i++) {
2864                         if (state->stack[spi].slot_type[i] == STACK_INVALID) {
2865                                 sanitize = true;
2866                                 break;
2867                         }
2868                 }
2869
2870                 if (sanitize)
2871                         env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
2872         }
2873
2874         mark_stack_slot_scratched(env, spi);
2875         if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
2876             !register_is_null(reg) && env->bpf_capable) {
2877                 if (dst_reg != BPF_REG_FP) {
2878                         /* The backtracking logic can only recognize explicit
2879                          * stack slot address like [fp - 8]. Other spill of
2880                          * scalar via different register has to be conservative.
2881                          * Backtrack from here and mark all registers as precise
2882                          * that contributed into 'reg' being a constant.
2883                          */
2884                         err = mark_chain_precision(env, value_regno);
2885                         if (err)
2886                                 return err;
2887                 }
2888                 save_register_state(state, spi, reg, size);
2889         } else if (reg && is_spillable_regtype(reg->type)) {
2890                 /* register containing pointer is being spilled into stack */
2891                 if (size != BPF_REG_SIZE) {
2892                         verbose_linfo(env, insn_idx, "; ");
2893                         verbose(env, "invalid size of register spill\n");
2894                         return -EACCES;
2895                 }
2896                 if (state != cur && reg->type == PTR_TO_STACK) {
2897                         verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
2898                         return -EINVAL;
2899                 }
2900                 save_register_state(state, spi, reg, size);
2901         } else {
2902                 u8 type = STACK_MISC;
2903
2904                 /* regular write of data into stack destroys any spilled ptr */
2905                 state->stack[spi].spilled_ptr.type = NOT_INIT;
2906                 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
2907                 if (is_spilled_reg(&state->stack[spi]))
2908                         for (i = 0; i < BPF_REG_SIZE; i++)
2909                                 scrub_spilled_slot(&state->stack[spi].slot_type[i]);
2910
2911                 /* only mark the slot as written if all 8 bytes were written
2912                  * otherwise read propagation may incorrectly stop too soon
2913                  * when stack slots are partially written.
2914                  * This heuristic means that read propagation will be
2915                  * conservative, since it will add reg_live_read marks
2916                  * to stack slots all the way to first state when programs
2917                  * writes+reads less than 8 bytes
2918                  */
2919                 if (size == BPF_REG_SIZE)
2920                         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2921
2922                 /* when we zero initialize stack slots mark them as such */
2923                 if (reg && register_is_null(reg)) {
2924                         /* backtracking doesn't work for STACK_ZERO yet. */
2925                         err = mark_chain_precision(env, value_regno);
2926                         if (err)
2927                                 return err;
2928                         type = STACK_ZERO;
2929                 }
2930
2931                 /* Mark slots affected by this stack write. */
2932                 for (i = 0; i < size; i++)
2933                         state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
2934                                 type;
2935         }
2936         return 0;
2937 }
2938
2939 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
2940  * known to contain a variable offset.
2941  * This function checks whether the write is permitted and conservatively
2942  * tracks the effects of the write, considering that each stack slot in the
2943  * dynamic range is potentially written to.
2944  *
2945  * 'off' includes 'regno->off'.
2946  * 'value_regno' can be -1, meaning that an unknown value is being written to
2947  * the stack.
2948  *
2949  * Spilled pointers in range are not marked as written because we don't know
2950  * what's going to be actually written. This means that read propagation for
2951  * future reads cannot be terminated by this write.
2952  *
2953  * For privileged programs, uninitialized stack slots are considered
2954  * initialized by this write (even though we don't know exactly what offsets
2955  * are going to be written to). The idea is that we don't want the verifier to
2956  * reject future reads that access slots written to through variable offsets.
2957  */
2958 static int check_stack_write_var_off(struct bpf_verifier_env *env,
2959                                      /* func where register points to */
2960                                      struct bpf_func_state *state,
2961                                      int ptr_regno, int off, int size,
2962                                      int value_regno, int insn_idx)
2963 {
2964         struct bpf_func_state *cur; /* state of the current function */
2965         int min_off, max_off;
2966         int i, err;
2967         struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
2968         bool writing_zero = false;
2969         /* set if the fact that we're writing a zero is used to let any
2970          * stack slots remain STACK_ZERO
2971          */
2972         bool zero_used = false;
2973
2974         cur = env->cur_state->frame[env->cur_state->curframe];
2975         ptr_reg = &cur->regs[ptr_regno];
2976         min_off = ptr_reg->smin_value + off;
2977         max_off = ptr_reg->smax_value + off + size;
2978         if (value_regno >= 0)
2979                 value_reg = &cur->regs[value_regno];
2980         if (value_reg && register_is_null(value_reg))
2981                 writing_zero = true;
2982
2983         err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
2984         if (err)
2985                 return err;
2986
2987
2988         /* Variable offset writes destroy any spilled pointers in range. */
2989         for (i = min_off; i < max_off; i++) {
2990                 u8 new_type, *stype;
2991                 int slot, spi;
2992
2993                 slot = -i - 1;
2994                 spi = slot / BPF_REG_SIZE;
2995                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2996                 mark_stack_slot_scratched(env, spi);
2997
2998                 if (!env->allow_ptr_leaks
2999                                 && *stype != NOT_INIT
3000                                 && *stype != SCALAR_VALUE) {
3001                         /* Reject the write if there's are spilled pointers in
3002                          * range. If we didn't reject here, the ptr status
3003                          * would be erased below (even though not all slots are
3004                          * actually overwritten), possibly opening the door to
3005                          * leaks.
3006                          */
3007                         verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
3008                                 insn_idx, i);
3009                         return -EINVAL;
3010                 }
3011
3012                 /* Erase all spilled pointers. */
3013                 state->stack[spi].spilled_ptr.type = NOT_INIT;
3014
3015                 /* Update the slot type. */
3016                 new_type = STACK_MISC;
3017                 if (writing_zero && *stype == STACK_ZERO) {
3018                         new_type = STACK_ZERO;
3019                         zero_used = true;
3020                 }
3021                 /* If the slot is STACK_INVALID, we check whether it's OK to
3022                  * pretend that it will be initialized by this write. The slot
3023                  * might not actually be written to, and so if we mark it as
3024                  * initialized future reads might leak uninitialized memory.
3025                  * For privileged programs, we will accept such reads to slots
3026                  * that may or may not be written because, if we're reject
3027                  * them, the error would be too confusing.
3028                  */
3029                 if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
3030                         verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
3031                                         insn_idx, i);
3032                         return -EINVAL;
3033                 }
3034                 *stype = new_type;
3035         }
3036         if (zero_used) {
3037                 /* backtracking doesn't work for STACK_ZERO yet. */
3038                 err = mark_chain_precision(env, value_regno);
3039                 if (err)
3040                         return err;
3041         }
3042         return 0;
3043 }
3044
3045 /* When register 'dst_regno' is assigned some values from stack[min_off,
3046  * max_off), we set the register's type according to the types of the
3047  * respective stack slots. If all the stack values are known to be zeros, then
3048  * so is the destination reg. Otherwise, the register is considered to be
3049  * SCALAR. This function does not deal with register filling; the caller must
3050  * ensure that all spilled registers in the stack range have been marked as
3051  * read.
3052  */
3053 static void mark_reg_stack_read(struct bpf_verifier_env *env,
3054                                 /* func where src register points to */
3055                                 struct bpf_func_state *ptr_state,
3056                                 int min_off, int max_off, int dst_regno)
3057 {
3058         struct bpf_verifier_state *vstate = env->cur_state;
3059         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3060         int i, slot, spi;
3061         u8 *stype;
3062         int zeros = 0;
3063
3064         for (i = min_off; i < max_off; i++) {
3065                 slot = -i - 1;
3066                 spi = slot / BPF_REG_SIZE;
3067                 stype = ptr_state->stack[spi].slot_type;
3068                 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3069                         break;
3070                 zeros++;
3071         }
3072         if (zeros == max_off - min_off) {
3073                 /* any access_size read into register is zero extended,
3074                  * so the whole register == const_zero
3075                  */
3076                 __mark_reg_const_zero(&state->regs[dst_regno]);
3077                 /* backtracking doesn't support STACK_ZERO yet,
3078                  * so mark it precise here, so that later
3079                  * backtracking can stop here.
3080                  * Backtracking may not need this if this register
3081                  * doesn't participate in pointer adjustment.
3082                  * Forward propagation of precise flag is not
3083                  * necessary either. This mark is only to stop
3084                  * backtracking. Any register that contributed
3085                  * to const 0 was marked precise before spill.
3086                  */
3087                 state->regs[dst_regno].precise = true;
3088         } else {
3089                 /* have read misc data from the stack */
3090                 mark_reg_unknown(env, state->regs, dst_regno);
3091         }
3092         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3093 }
3094
3095 /* Read the stack at 'off' and put the results into the register indicated by
3096  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3097  * spilled reg.
3098  *
3099  * 'dst_regno' can be -1, meaning that the read value is not going to a
3100  * register.
3101  *
3102  * The access is assumed to be within the current stack bounds.
3103  */
3104 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3105                                       /* func where src register points to */
3106                                       struct bpf_func_state *reg_state,
3107                                       int off, int size, int dst_regno)
3108 {
3109         struct bpf_verifier_state *vstate = env->cur_state;
3110         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3111         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
3112         struct bpf_reg_state *reg;
3113         u8 *stype, type;
3114
3115         stype = reg_state->stack[spi].slot_type;
3116         reg = &reg_state->stack[spi].spilled_ptr;
3117
3118         if (is_spilled_reg(&reg_state->stack[spi])) {
3119                 u8 spill_size = 1;
3120
3121                 for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3122                         spill_size++;
3123
3124                 if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
3125                         if (reg->type != SCALAR_VALUE) {
3126                                 verbose_linfo(env, env->insn_idx, "; ");
3127                                 verbose(env, "invalid size of register fill\n");
3128                                 return -EACCES;
3129                         }
3130
3131                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3132                         if (dst_regno < 0)
3133                                 return 0;
3134
3135                         if (!(off % BPF_REG_SIZE) && size == spill_size) {
3136                                 /* The earlier check_reg_arg() has decided the
3137                                  * subreg_def for this insn.  Save it first.
3138                                  */
3139                                 s32 subreg_def = state->regs[dst_regno].subreg_def;
3140
3141                                 state->regs[dst_regno] = *reg;
3142                                 state->regs[dst_regno].subreg_def = subreg_def;
3143                         } else {
3144                                 for (i = 0; i < size; i++) {
3145                                         type = stype[(slot - i) % BPF_REG_SIZE];
3146                                         if (type == STACK_SPILL)
3147                                                 continue;
3148                                         if (type == STACK_MISC)
3149                                                 continue;
3150                                         verbose(env, "invalid read from stack off %d+%d size %d\n",
3151                                                 off, i, size);
3152                                         return -EACCES;
3153                                 }
3154                                 mark_reg_unknown(env, state->regs, dst_regno);
3155                         }
3156                         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3157                         return 0;
3158                 }
3159
3160                 if (dst_regno >= 0) {
3161                         /* restore register state from stack */
3162                         state->regs[dst_regno] = *reg;
3163                         /* mark reg as written since spilled pointer state likely
3164                          * has its liveness marks cleared by is_state_visited()
3165                          * which resets stack/reg liveness for state transitions
3166                          */
3167                         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3168                 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
3169                         /* If dst_regno==-1, the caller is asking us whether
3170                          * it is acceptable to use this value as a SCALAR_VALUE
3171                          * (e.g. for XADD).
3172                          * We must not allow unprivileged callers to do that
3173                          * with spilled pointers.
3174                          */
3175                         verbose(env, "leaking pointer from stack off %d\n",
3176                                 off);
3177                         return -EACCES;
3178                 }
3179                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3180         } else {
3181                 for (i = 0; i < size; i++) {
3182                         type = stype[(slot - i) % BPF_REG_SIZE];
3183                         if (type == STACK_MISC)
3184                                 continue;
3185                         if (type == STACK_ZERO)
3186                                 continue;
3187                         verbose(env, "invalid read from stack off %d+%d size %d\n",
3188                                 off, i, size);
3189                         return -EACCES;
3190                 }
3191                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3192                 if (dst_regno >= 0)
3193                         mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
3194         }
3195         return 0;
3196 }
3197
3198 enum stack_access_src {
3199         ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
3200         ACCESS_HELPER = 2,  /* the access is performed by a helper */
3201 };
3202
3203 static int check_stack_range_initialized(struct bpf_verifier_env *env,
3204                                          int regno, int off, int access_size,
3205                                          bool zero_size_allowed,
3206                                          enum stack_access_src type,
3207                                          struct bpf_call_arg_meta *meta);
3208
3209 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3210 {
3211         return cur_regs(env) + regno;
3212 }
3213
3214 /* Read the stack at 'ptr_regno + off' and put the result into the register
3215  * 'dst_regno'.
3216  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
3217  * but not its variable offset.
3218  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3219  *
3220  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3221  * filling registers (i.e. reads of spilled register cannot be detected when
3222  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3223  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
3224  * offset; for a fixed offset check_stack_read_fixed_off should be used
3225  * instead.
3226  */
3227 static int check_stack_read_var_off(struct bpf_verifier_env *env,
3228                                     int ptr_regno, int off, int size, int dst_regno)
3229 {
3230         /* The state of the source register. */
3231         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3232         struct bpf_func_state *ptr_state = func(env, reg);
3233         int err;
3234         int min_off, max_off;
3235
3236         /* Note that we pass a NULL meta, so raw access will not be permitted.
3237          */
3238         err = check_stack_range_initialized(env, ptr_regno, off, size,
3239                                             false, ACCESS_DIRECT, NULL);
3240         if (err)
3241                 return err;
3242
3243         min_off = reg->smin_value + off;
3244         max_off = reg->smax_value + off;
3245         mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
3246         return 0;
3247 }
3248
3249 /* check_stack_read dispatches to check_stack_read_fixed_off or
3250  * check_stack_read_var_off.
3251  *
3252  * The caller must ensure that the offset falls within the allocated stack
3253  * bounds.
3254  *
3255  * 'dst_regno' is a register which will receive the value from the stack. It
3256  * can be -1, meaning that the read value is not going to a register.
3257  */
3258 static int check_stack_read(struct bpf_verifier_env *env,
3259                             int ptr_regno, int off, int size,
3260                             int dst_regno)
3261 {
3262         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3263         struct bpf_func_state *state = func(env, reg);
3264         int err;
3265         /* Some accesses are only permitted with a static offset. */
3266         bool var_off = !tnum_is_const(reg->var_off);
3267
3268         /* The offset is required to be static when reads don't go to a
3269          * register, in order to not leak pointers (see
3270          * check_stack_read_fixed_off).
3271          */
3272         if (dst_regno < 0 && var_off) {
3273                 char tn_buf[48];
3274
3275                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3276                 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
3277                         tn_buf, off, size);
3278                 return -EACCES;
3279         }
3280         /* Variable offset is prohibited for unprivileged mode for simplicity
3281          * since it requires corresponding support in Spectre masking for stack
3282          * ALU. See also retrieve_ptr_limit().
3283          */
3284         if (!env->bypass_spec_v1 && var_off) {
3285                 char tn_buf[48];
3286
3287                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3288                 verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
3289                                 ptr_regno, tn_buf);
3290                 return -EACCES;
3291         }
3292
3293         if (!var_off) {
3294                 off += reg->var_off.value;
3295                 err = check_stack_read_fixed_off(env, state, off, size,
3296                                                  dst_regno);
3297         } else {
3298                 /* Variable offset stack reads need more conservative handling
3299                  * than fixed offset ones. Note that dst_regno >= 0 on this
3300                  * branch.
3301                  */
3302                 err = check_stack_read_var_off(env, ptr_regno, off, size,
3303                                                dst_regno);
3304         }
3305         return err;
3306 }
3307
3308
3309 /* check_stack_write dispatches to check_stack_write_fixed_off or
3310  * check_stack_write_var_off.
3311  *
3312  * 'ptr_regno' is the register used as a pointer into the stack.
3313  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
3314  * 'value_regno' is the register whose value we're writing to the stack. It can
3315  * be -1, meaning that we're not writing from a register.
3316  *
3317  * The caller must ensure that the offset falls within the maximum stack size.
3318  */
3319 static int check_stack_write(struct bpf_verifier_env *env,
3320                              int ptr_regno, int off, int size,
3321                              int value_regno, int insn_idx)
3322 {
3323         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3324         struct bpf_func_state *state = func(env, reg);
3325         int err;
3326
3327         if (tnum_is_const(reg->var_off)) {
3328                 off += reg->var_off.value;
3329                 err = check_stack_write_fixed_off(env, state, off, size,
3330                                                   value_regno, insn_idx);
3331         } else {
3332                 /* Variable offset stack reads need more conservative handling
3333                  * than fixed offset ones.
3334                  */
3335                 err = check_stack_write_var_off(env, state,
3336                                                 ptr_regno, off, size,
3337                                                 value_regno, insn_idx);
3338         }
3339         return err;
3340 }
3341
3342 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
3343                                  int off, int size, enum bpf_access_type type)
3344 {
3345         struct bpf_reg_state *regs = cur_regs(env);
3346         struct bpf_map *map = regs[regno].map_ptr;
3347         u32 cap = bpf_map_flags_to_cap(map);
3348
3349         if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
3350                 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
3351                         map->value_size, off, size);
3352                 return -EACCES;
3353         }
3354
3355         if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
3356                 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
3357                         map->value_size, off, size);
3358                 return -EACCES;
3359         }
3360
3361         return 0;
3362 }
3363
3364 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
3365 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
3366                               int off, int size, u32 mem_size,
3367                               bool zero_size_allowed)
3368 {
3369         bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
3370         struct bpf_reg_state *reg;
3371
3372         if (off >= 0 && size_ok && (u64)off + size <= mem_size)
3373                 return 0;
3374
3375         reg = &cur_regs(env)[regno];
3376         switch (reg->type) {
3377         case PTR_TO_MAP_KEY:
3378                 verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
3379                         mem_size, off, size);
3380                 break;
3381         case PTR_TO_MAP_VALUE:
3382                 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
3383                         mem_size, off, size);
3384                 break;
3385         case PTR_TO_PACKET:
3386         case PTR_TO_PACKET_META:
3387         case PTR_TO_PACKET_END:
3388                 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
3389                         off, size, regno, reg->id, off, mem_size);
3390                 break;
3391         case PTR_TO_MEM:
3392         default:
3393                 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
3394                         mem_size, off, size);
3395         }
3396
3397         return -EACCES;
3398 }
3399
3400 /* check read/write into a memory region with possible variable offset */
3401 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
3402                                    int off, int size, u32 mem_size,
3403                                    bool zero_size_allowed)
3404 {
3405         struct bpf_verifier_state *vstate = env->cur_state;
3406         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3407         struct bpf_reg_state *reg = &state->regs[regno];
3408         int err;
3409
3410         /* We may have adjusted the register pointing to memory region, so we
3411          * need to try adding each of min_value and max_value to off
3412          * to make sure our theoretical access will be safe.
3413          *
3414          * The minimum value is only important with signed
3415          * comparisons where we can't assume the floor of a
3416          * value is 0.  If we are using signed variables for our
3417          * index'es we need to make sure that whatever we use
3418          * will have a set floor within our range.
3419          */
3420         if (reg->smin_value < 0 &&
3421             (reg->smin_value == S64_MIN ||
3422              (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
3423               reg->smin_value + off < 0)) {
3424                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3425                         regno);
3426                 return -EACCES;
3427         }
3428         err = __check_mem_access(env, regno, reg->smin_value + off, size,
3429                                  mem_size, zero_size_allowed);
3430         if (err) {
3431                 verbose(env, "R%d min value is outside of the allowed memory range\n",
3432                         regno);
3433                 return err;
3434         }
3435
3436         /* If we haven't set a max value then we need to bail since we can't be
3437          * sure we won't do bad things.
3438          * If reg->umax_value + off could overflow, treat that as unbounded too.
3439          */
3440         if (reg->umax_value >= BPF_MAX_VAR_OFF) {
3441                 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
3442                         regno);
3443                 return -EACCES;
3444         }
3445         err = __check_mem_access(env, regno, reg->umax_value + off, size,
3446                                  mem_size, zero_size_allowed);
3447         if (err) {
3448                 verbose(env, "R%d max value is outside of the allowed memory range\n",
3449                         regno);
3450                 return err;
3451         }
3452
3453         return 0;
3454 }
3455
3456 /* check read/write into a map element with possible variable offset */
3457 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
3458                             int off, int size, bool zero_size_allowed)
3459 {
3460         struct bpf_verifier_state *vstate = env->cur_state;
3461         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3462         struct bpf_reg_state *reg = &state->regs[regno];
3463         struct bpf_map *map = reg->map_ptr;
3464         int err;
3465
3466         err = check_mem_region_access(env, regno, off, size, map->value_size,
3467                                       zero_size_allowed);
3468         if (err)
3469                 return err;
3470
3471         if (map_value_has_spin_lock(map)) {
3472                 u32 lock = map->spin_lock_off;
3473
3474                 /* if any part of struct bpf_spin_lock can be touched by
3475                  * load/store reject this program.
3476                  * To check that [x1, x2) overlaps with [y1, y2)
3477                  * it is sufficient to check x1 < y2 && y1 < x2.
3478                  */
3479                 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
3480                      lock < reg->umax_value + off + size) {
3481                         verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
3482                         return -EACCES;
3483                 }
3484         }
3485         if (map_value_has_timer(map)) {
3486                 u32 t = map->timer_off;
3487
3488                 if (reg->smin_value + off < t + sizeof(struct bpf_timer) &&
3489                      t < reg->umax_value + off + size) {
3490                         verbose(env, "bpf_timer cannot be accessed directly by load/store\n");
3491                         return -EACCES;
3492                 }
3493         }
3494         return err;
3495 }
3496
3497 #define MAX_PACKET_OFF 0xffff
3498
3499 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3500                                        const struct bpf_call_arg_meta *meta,
3501                                        enum bpf_access_type t)
3502 {
3503         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
3504
3505         switch (prog_type) {
3506         /* Program types only with direct read access go here! */
3507         case BPF_PROG_TYPE_LWT_IN:
3508         case BPF_PROG_TYPE_LWT_OUT:
3509         case BPF_PROG_TYPE_LWT_SEG6LOCAL:
3510         case BPF_PROG_TYPE_SK_REUSEPORT:
3511         case BPF_PROG_TYPE_FLOW_DISSECTOR:
3512         case BPF_PROG_TYPE_CGROUP_SKB:
3513                 if (t == BPF_WRITE)
3514                         return false;
3515                 fallthrough;
3516
3517         /* Program types with direct read + write access go here! */
3518         case BPF_PROG_TYPE_SCHED_CLS:
3519         case BPF_PROG_TYPE_SCHED_ACT:
3520         case BPF_PROG_TYPE_XDP:
3521         case BPF_PROG_TYPE_LWT_XMIT:
3522         case BPF_PROG_TYPE_SK_SKB:
3523         case BPF_PROG_TYPE_SK_MSG:
3524                 if (meta)
3525                         return meta->pkt_access;
3526
3527                 env->seen_direct_write = true;
3528                 return true;
3529
3530         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3531                 if (t == BPF_WRITE)
3532                         env->seen_direct_write = true;
3533
3534                 return true;
3535
3536         default:
3537                 return false;
3538         }
3539 }
3540
3541 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
3542                                int size, bool zero_size_allowed)
3543 {
3544         struct bpf_reg_state *regs = cur_regs(env);
3545         struct bpf_reg_state *reg = &regs[regno];
3546         int err;
3547
3548         /* We may have added a variable offset to the packet pointer; but any
3549          * reg->range we have comes after that.  We are only checking the fixed
3550          * offset.
3551          */
3552
3553         /* We don't allow negative numbers, because we aren't tracking enough
3554          * detail to prove they're safe.
3555          */
3556         if (reg->smin_value < 0) {
3557                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3558                         regno);
3559                 return -EACCES;
3560         }
3561
3562         err = reg->range < 0 ? -EINVAL :
3563               __check_mem_access(env, regno, off, size, reg->range,
3564                                  zero_size_allowed);
3565         if (err) {
3566                 verbose(env, "R%d offset is outside of the packet\n", regno);
3567                 return err;
3568         }
3569
3570         /* __check_mem_access has made sure "off + size - 1" is within u16.
3571          * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
3572          * otherwise find_good_pkt_pointers would have refused to set range info
3573          * that __check_mem_access would have rejected this pkt access.
3574          * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
3575          */
3576         env->prog->aux->max_pkt_offset =
3577                 max_t(u32, env->prog->aux->max_pkt_offset,
3578                       off + reg->umax_value + size - 1);
3579
3580         return err;
3581 }
3582
3583 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
3584 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
3585                             enum bpf_access_type t, enum bpf_reg_type *reg_type,
3586                             struct btf **btf, u32 *btf_id)
3587 {
3588         struct bpf_insn_access_aux info = {
3589                 .reg_type = *reg_type,
3590                 .log = &env->log,
3591         };
3592
3593         if (env->ops->is_valid_access &&
3594             env->ops->is_valid_access(off, size, t, env->prog, &info)) {
3595                 /* A non zero info.ctx_field_size indicates that this field is a
3596                  * candidate for later verifier transformation to load the whole
3597                  * field and then apply a mask when accessed with a narrower
3598                  * access than actual ctx access size. A zero info.ctx_field_size
3599                  * will only allow for whole field access and rejects any other
3600                  * type of narrower access.
3601                  */
3602                 *reg_type = info.reg_type;
3603
3604                 if (base_type(*reg_type) == PTR_TO_BTF_ID) {
3605                         *btf = info.btf;
3606                         *btf_id = info.btf_id;
3607                 } else {
3608                         env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3609                 }
3610                 /* remember the offset of last byte accessed in ctx */
3611                 if (env->prog->aux->max_ctx_offset < off + size)
3612                         env->prog->aux->max_ctx_offset = off + size;
3613                 return 0;
3614         }
3615
3616         verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
3617         return -EACCES;
3618 }
3619
3620 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
3621                                   int size)
3622 {
3623         if (size < 0 || off < 0 ||
3624             (u64)off + size > sizeof(struct bpf_flow_keys)) {
3625                 verbose(env, "invalid access to flow keys off=%d size=%d\n",
3626                         off, size);
3627                 return -EACCES;
3628         }
3629         return 0;
3630 }
3631
3632 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
3633                              u32 regno, int off, int size,
3634                              enum bpf_access_type t)
3635 {
3636         struct bpf_reg_state *regs = cur_regs(env);
3637         struct bpf_reg_state *reg = &regs[regno];
3638         struct bpf_insn_access_aux info = {};
3639         bool valid;
3640
3641         if (reg->smin_value < 0) {
3642                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3643                         regno);
3644                 return -EACCES;
3645         }
3646
3647         switch (reg->type) {
3648         case PTR_TO_SOCK_COMMON:
3649                 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
3650                 break;
3651         case PTR_TO_SOCKET:
3652                 valid = bpf_sock_is_valid_access(off, size, t, &info);
3653                 break;
3654         case PTR_TO_TCP_SOCK:
3655                 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
3656                 break;
3657         case PTR_TO_XDP_SOCK:
3658                 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
3659                 break;
3660         default:
3661                 valid = false;
3662         }
3663
3664
3665         if (valid) {
3666                 env->insn_aux_data[insn_idx].ctx_field_size =
3667                         info.ctx_field_size;
3668                 return 0;
3669         }
3670
3671         verbose(env, "R%d invalid %s access off=%d size=%d\n",
3672                 regno, reg_type_str(env, reg->type), off, size);
3673
3674         return -EACCES;
3675 }
3676
3677 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
3678 {
3679         return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
3680 }
3681
3682 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
3683 {
3684         const struct bpf_reg_state *reg = reg_state(env, regno);
3685
3686         return reg->type == PTR_TO_CTX;
3687 }
3688
3689 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
3690 {
3691         const struct bpf_reg_state *reg = reg_state(env, regno);
3692
3693         return type_is_sk_pointer(reg->type);
3694 }
3695
3696 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
3697 {
3698         const struct bpf_reg_state *reg = reg_state(env, regno);
3699
3700         return type_is_pkt_pointer(reg->type);
3701 }
3702
3703 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
3704 {
3705         const struct bpf_reg_state *reg = reg_state(env, regno);
3706
3707         /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
3708         return reg->type == PTR_TO_FLOW_KEYS;
3709 }
3710
3711 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
3712                                    const struct bpf_reg_state *reg,
3713                                    int off, int size, bool strict)
3714 {
3715         struct tnum reg_off;
3716         int ip_align;
3717
3718         /* Byte size accesses are always allowed. */
3719         if (!strict || size == 1)
3720                 return 0;
3721
3722         /* For platforms that do not have a Kconfig enabling
3723          * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
3724          * NET_IP_ALIGN is universally set to '2'.  And on platforms
3725          * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
3726          * to this code only in strict mode where we want to emulate
3727          * the NET_IP_ALIGN==2 checking.  Therefore use an
3728          * unconditional IP align value of '2'.
3729          */
3730         ip_align = 2;
3731
3732         reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
3733         if (!tnum_is_aligned(reg_off, size)) {
3734                 char tn_buf[48];
3735
3736                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3737                 verbose(env,
3738                         "misaligned packet access off %d+%s+%d+%d size %d\n",
3739                         ip_align, tn_buf, reg->off, off, size);
3740                 return -EACCES;
3741         }
3742
3743         return 0;
3744 }
3745
3746 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
3747                                        const struct bpf_reg_state *reg,
3748                                        const char *pointer_desc,
3749                                        int off, int size, bool strict)
3750 {
3751         struct tnum reg_off;
3752
3753         /* Byte size accesses are always allowed. */
3754         if (!strict || size == 1)
3755                 return 0;
3756
3757         reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
3758         if (!tnum_is_aligned(reg_off, size)) {
3759                 char tn_buf[48];
3760
3761                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3762                 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
3763                         pointer_desc, tn_buf, reg->off, off, size);
3764                 return -EACCES;
3765         }
3766
3767         return 0;
3768 }
3769
3770 static int check_ptr_alignment(struct bpf_verifier_env *env,
3771                                const struct bpf_reg_state *reg, int off,
3772                                int size, bool strict_alignment_once)
3773 {
3774         bool strict = env->strict_alignment || strict_alignment_once;
3775         const char *pointer_desc = "";
3776
3777         switch (reg->type) {
3778         case PTR_TO_PACKET:
3779         case PTR_TO_PACKET_META:
3780                 /* Special case, because of NET_IP_ALIGN. Given metadata sits
3781                  * right in front, treat it the very same way.
3782                  */
3783                 return check_pkt_ptr_alignment(env, reg, off, size, strict);
3784         case PTR_TO_FLOW_KEYS:
3785                 pointer_desc = "flow keys ";
3786                 break;
3787         case PTR_TO_MAP_KEY:
3788                 pointer_desc = "key ";
3789                 break;
3790         case PTR_TO_MAP_VALUE:
3791                 pointer_desc = "value ";
3792                 break;
3793         case PTR_TO_CTX:
3794                 pointer_desc = "context ";
3795                 break;
3796         case PTR_TO_STACK:
3797                 pointer_desc = "stack ";
3798                 /* The stack spill tracking logic in check_stack_write_fixed_off()
3799                  * and check_stack_read_fixed_off() relies on stack accesses being
3800                  * aligned.
3801                  */
3802                 strict = true;
3803                 break;
3804         case PTR_TO_SOCKET:
3805                 pointer_desc = "sock ";
3806                 break;
3807         case PTR_TO_SOCK_COMMON:
3808                 pointer_desc = "sock_common ";
3809                 break;
3810         case PTR_TO_TCP_SOCK:
3811                 pointer_desc = "tcp_sock ";
3812                 break;
3813         case PTR_TO_XDP_SOCK:
3814                 pointer_desc = "xdp_sock ";
3815                 break;
3816         default:
3817                 break;
3818         }
3819         return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
3820                                            strict);
3821 }
3822
3823 static int update_stack_depth(struct bpf_verifier_env *env,
3824                               const struct bpf_func_state *func,
3825                               int off)
3826 {
3827         u16 stack = env->subprog_info[func->subprogno].stack_depth;
3828
3829         if (stack >= -off)
3830                 return 0;
3831
3832         /* update known max for given subprogram */
3833         env->subprog_info[func->subprogno].stack_depth = -off;
3834         return 0;
3835 }
3836
3837 /* starting from main bpf function walk all instructions of the function
3838  * and recursively walk all callees that given function can call.
3839  * Ignore jump and exit insns.
3840  * Since recursion is prevented by check_cfg() this algorithm
3841  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
3842  */
3843 static int check_max_stack_depth(struct bpf_verifier_env *env)
3844 {
3845         int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
3846         struct bpf_subprog_info *subprog = env->subprog_info;
3847         struct bpf_insn *insn = env->prog->insnsi;
3848         bool tail_call_reachable = false;
3849         int ret_insn[MAX_CALL_FRAMES];
3850         int ret_prog[MAX_CALL_FRAMES];
3851         int j;
3852
3853 process_func:
3854         /* protect against potential stack overflow that might happen when
3855          * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
3856          * depth for such case down to 256 so that the worst case scenario
3857          * would result in 8k stack size (32 which is tailcall limit * 256 =
3858          * 8k).
3859          *
3860          * To get the idea what might happen, see an example:
3861          * func1 -> sub rsp, 128
3862          *  subfunc1 -> sub rsp, 256
3863          *  tailcall1 -> add rsp, 256
3864          *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
3865          *   subfunc2 -> sub rsp, 64
3866          *   subfunc22 -> sub rsp, 128
3867          *   tailcall2 -> add rsp, 128
3868          *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
3869          *
3870          * tailcall will unwind the current stack frame but it will not get rid
3871          * of caller's stack as shown on the example above.
3872          */
3873         if (idx && subprog[idx].has_tail_call && depth >= 256) {
3874                 verbose(env,
3875                         "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
3876                         depth);
3877                 return -EACCES;
3878         }
3879         /* round up to 32-bytes, since this is granularity
3880          * of interpreter stack size
3881          */
3882         depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3883         if (depth > MAX_BPF_STACK) {
3884                 verbose(env, "combined stack size of %d calls is %d. Too large\n",
3885                         frame + 1, depth);
3886                 return -EACCES;
3887         }
3888 continue_func:
3889         subprog_end = subprog[idx + 1].start;
3890         for (; i < subprog_end; i++) {
3891                 int next_insn;
3892
3893                 if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
3894                         continue;
3895                 /* remember insn and function to return to */
3896                 ret_insn[frame] = i + 1;
3897                 ret_prog[frame] = idx;
3898
3899                 /* find the callee */
3900                 next_insn = i + insn[i].imm + 1;
3901                 idx = find_subprog(env, next_insn);
3902                 if (idx < 0) {
3903                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3904                                   next_insn);
3905                         return -EFAULT;
3906                 }
3907                 if (subprog[idx].is_async_cb) {
3908                         if (subprog[idx].has_tail_call) {
3909                                 verbose(env, "verifier bug. subprog has tail_call and async cb\n");
3910                                 return -EFAULT;
3911                         }
3912                          /* async callbacks don't increase bpf prog stack size */
3913                         continue;
3914                 }
3915                 i = next_insn;
3916
3917                 if (subprog[idx].has_tail_call)
3918                         tail_call_reachable = true;
3919
3920                 frame++;
3921                 if (frame >= MAX_CALL_FRAMES) {
3922                         verbose(env, "the call stack of %d frames is too deep !\n",
3923                                 frame);
3924                         return -E2BIG;
3925                 }
3926                 goto process_func;
3927         }
3928         /* if tail call got detected across bpf2bpf calls then mark each of the
3929          * currently present subprog frames as tail call reachable subprogs;
3930          * this info will be utilized by JIT so that we will be preserving the
3931          * tail call counter throughout bpf2bpf calls combined with tailcalls
3932          */
3933         if (tail_call_reachable)
3934                 for (j = 0; j < frame; j++)
3935                         subprog[ret_prog[j]].tail_call_reachable = true;
3936         if (subprog[0].tail_call_reachable)
3937                 env->prog->aux->tail_call_reachable = true;
3938
3939         /* end of for() loop means the last insn of the 'subprog'
3940          * was reached. Doesn't matter whether it was JA or EXIT
3941          */
3942         if (frame == 0)
3943                 return 0;
3944         depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3945         frame--;
3946         i = ret_insn[frame];
3947         idx = ret_prog[frame];
3948         goto continue_func;
3949 }
3950
3951 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
3952 static int get_callee_stack_depth(struct bpf_verifier_env *env,
3953                                   const struct bpf_insn *insn, int idx)
3954 {
3955         int start = idx + insn->imm + 1, subprog;
3956
3957         subprog = find_subprog(env, start);
3958         if (subprog < 0) {
3959                 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3960                           start);
3961                 return -EFAULT;
3962         }
3963         return env->subprog_info[subprog].stack_depth;
3964 }
3965 #endif
3966
3967 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
3968                                const struct bpf_reg_state *reg, int regno,
3969                                bool fixed_off_ok)
3970 {
3971         /* Access to this pointer-typed register or passing it to a helper
3972          * is only allowed in its original, unmodified form.
3973          */
3974
3975         if (!fixed_off_ok && reg->off) {
3976                 verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
3977                         reg_type_str(env, reg->type), regno, reg->off);
3978                 return -EACCES;
3979         }
3980
3981         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3982                 char tn_buf[48];
3983
3984                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3985                 verbose(env, "variable %s access var_off=%s disallowed\n",
3986                         reg_type_str(env, reg->type), tn_buf);
3987                 return -EACCES;
3988         }
3989
3990         return 0;
3991 }
3992
3993 int check_ptr_off_reg(struct bpf_verifier_env *env,
3994                       const struct bpf_reg_state *reg, int regno)
3995 {
3996         return __check_ptr_off_reg(env, reg, regno, false);
3997 }
3998
3999 static int __check_buffer_access(struct bpf_verifier_env *env,
4000                                  const char *buf_info,
4001                                  const struct bpf_reg_state *reg,
4002                                  int regno, int off, int size)
4003 {
4004         if (off < 0) {
4005                 verbose(env,
4006                         "R%d invalid %s buffer access: off=%d, size=%d\n",
4007                         regno, buf_info, off, size);
4008                 return -EACCES;
4009         }
4010         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4011                 char tn_buf[48];
4012
4013                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4014                 verbose(env,
4015                         "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
4016                         regno, off, tn_buf);
4017                 return -EACCES;
4018         }
4019
4020         return 0;
4021 }
4022
4023 static int check_tp_buffer_access(struct bpf_verifier_env *env,
4024                                   const struct bpf_reg_state *reg,
4025                                   int regno, int off, int size)
4026 {
4027         int err;
4028
4029         err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
4030         if (err)
4031                 return err;
4032
4033         if (off + size > env->prog->aux->max_tp_access)
4034                 env->prog->aux->max_tp_access = off + size;
4035
4036         return 0;
4037 }
4038
4039 static int check_buffer_access(struct bpf_verifier_env *env,
4040                                const struct bpf_reg_state *reg,
4041                                int regno, int off, int size,
4042                                bool zero_size_allowed,
4043                                const char *buf_info,
4044                                u32 *max_access)
4045 {
4046         int err;
4047
4048         err = __check_buffer_access(env, buf_info, reg, regno, off, size);
4049         if (err)
4050                 return err;
4051
4052         if (off + size > *max_access)
4053                 *max_access = off + size;
4054
4055         return 0;
4056 }
4057
4058 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
4059 static void zext_32_to_64(struct bpf_reg_state *reg)
4060 {
4061         reg->var_off = tnum_subreg(reg->var_off);
4062         __reg_assign_32_into_64(reg);
4063 }
4064
4065 /* truncate register to smaller size (in bytes)
4066  * must be called with size < BPF_REG_SIZE
4067  */
4068 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
4069 {
4070         u64 mask;
4071
4072         /* clear high bits in bit representation */
4073         reg->var_off = tnum_cast(reg->var_off, size);
4074
4075         /* fix arithmetic bounds */
4076         mask = ((u64)1 << (size * 8)) - 1;
4077         if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
4078                 reg->umin_value &= mask;
4079                 reg->umax_value &= mask;
4080         } else {
4081                 reg->umin_value = 0;
4082                 reg->umax_value = mask;
4083         }
4084         reg->smin_value = reg->umin_value;
4085         reg->smax_value = reg->umax_value;
4086
4087         /* If size is smaller than 32bit register the 32bit register
4088          * values are also truncated so we push 64-bit bounds into
4089          * 32-bit bounds. Above were truncated < 32-bits already.
4090          */
4091         if (size >= 4)
4092                 return;
4093         __reg_combine_64_into_32(reg);
4094 }
4095
4096 static bool bpf_map_is_rdonly(const struct bpf_map *map)
4097 {
4098         /* A map is considered read-only if the following condition are true:
4099          *
4100          * 1) BPF program side cannot change any of the map content. The
4101          *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
4102          *    and was set at map creation time.
4103          * 2) The map value(s) have been initialized from user space by a
4104          *    loader and then "frozen", such that no new map update/delete
4105          *    operations from syscall side are possible for the rest of
4106          *    the map's lifetime from that point onwards.
4107          * 3) Any parallel/pending map update/delete operations from syscall
4108          *    side have been completed. Only after that point, it's safe to
4109          *    assume that map value(s) are immutable.
4110          */
4111         return (map->map_flags & BPF_F_RDONLY_PROG) &&
4112                READ_ONCE(map->frozen) &&
4113                !bpf_map_write_active(map);
4114 }
4115
4116 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
4117 {
4118         void *ptr;
4119         u64 addr;
4120         int err;
4121
4122         err = map->ops->map_direct_value_addr(map, &addr, off);
4123         if (err)
4124                 return err;
4125         ptr = (void *)(long)addr + off;
4126
4127         switch (size) {
4128         case sizeof(u8):
4129                 *val = (u64)*(u8 *)ptr;
4130                 break;
4131         case sizeof(u16):
4132                 *val = (u64)*(u16 *)ptr;
4133                 break;
4134         case sizeof(u32):
4135                 *val = (u64)*(u32 *)ptr;
4136                 break;
4137         case sizeof(u64):
4138                 *val = *(u64 *)ptr;
4139                 break;
4140         default:
4141                 return -EINVAL;
4142         }
4143         return 0;
4144 }
4145
4146 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
4147                                    struct bpf_reg_state *regs,
4148                                    int regno, int off, int size,
4149                                    enum bpf_access_type atype,
4150                                    int value_regno)
4151 {
4152         struct bpf_reg_state *reg = regs + regno;
4153         const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
4154         const char *tname = btf_name_by_offset(reg->btf, t->name_off);
4155         u32 btf_id;
4156         int ret;
4157
4158         if (off < 0) {
4159                 verbose(env,
4160                         "R%d is ptr_%s invalid negative access: off=%d\n",
4161                         regno, tname, off);
4162                 return -EACCES;
4163         }
4164         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4165                 char tn_buf[48];
4166
4167                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4168                 verbose(env,
4169                         "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
4170                         regno, tname, off, tn_buf);
4171                 return -EACCES;
4172         }
4173
4174         if (env->ops->btf_struct_access) {
4175                 ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
4176                                                   off, size, atype, &btf_id);
4177         } else {
4178                 if (atype != BPF_READ) {
4179                         verbose(env, "only read is supported\n");
4180                         return -EACCES;
4181                 }
4182
4183                 ret = btf_struct_access(&env->log, reg->btf, t, off, size,
4184                                         atype, &btf_id);
4185         }
4186
4187         if (ret < 0)
4188                 return ret;
4189
4190         if (atype == BPF_READ && value_regno >= 0)
4191                 mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id);
4192
4193         return 0;
4194 }
4195
4196 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
4197                                    struct bpf_reg_state *regs,
4198                                    int regno, int off, int size,
4199                                    enum bpf_access_type atype,
4200                                    int value_regno)
4201 {
4202         struct bpf_reg_state *reg = regs + regno;
4203         struct bpf_map *map = reg->map_ptr;
4204         const struct btf_type *t;
4205         const char *tname;
4206         u32 btf_id;
4207         int ret;
4208
4209         if (!btf_vmlinux) {
4210                 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
4211                 return -ENOTSUPP;
4212         }
4213
4214         if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
4215                 verbose(env, "map_ptr access not supported for map type %d\n",
4216                         map->map_type);
4217                 return -ENOTSUPP;
4218         }
4219
4220         t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
4221         tname = btf_name_by_offset(btf_vmlinux, t->name_off);
4222
4223         if (!env->allow_ptr_to_map_access) {
4224                 verbose(env,
4225                         "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
4226                         tname);
4227                 return -EPERM;
4228         }
4229
4230         if (off < 0) {
4231                 verbose(env, "R%d is %s invalid negative access: off=%d\n",
4232                         regno, tname, off);
4233                 return -EACCES;
4234         }
4235
4236         if (atype != BPF_READ) {
4237                 verbose(env, "only read from %s is supported\n", tname);
4238                 return -EACCES;
4239         }
4240
4241         ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id);
4242         if (ret < 0)
4243                 return ret;
4244
4245         if (value_regno >= 0)
4246                 mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id);
4247
4248         return 0;
4249 }
4250
4251 /* Check that the stack access at the given offset is within bounds. The
4252  * maximum valid offset is -1.
4253  *
4254  * The minimum valid offset is -MAX_BPF_STACK for writes, and
4255  * -state->allocated_stack for reads.
4256  */
4257 static int check_stack_slot_within_bounds(int off,
4258                                           struct bpf_func_state *state,
4259                                           enum bpf_access_type t)
4260 {
4261         int min_valid_off;
4262
4263         if (t == BPF_WRITE)
4264                 min_valid_off = -MAX_BPF_STACK;
4265         else
4266                 min_valid_off = -state->allocated_stack;
4267
4268         if (off < min_valid_off || off > -1)
4269                 return -EACCES;
4270         return 0;
4271 }
4272
4273 /* Check that the stack access at 'regno + off' falls within the maximum stack
4274  * bounds.
4275  *
4276  * 'off' includes `regno->offset`, but not its dynamic part (if any).
4277  */
4278 static int check_stack_access_within_bounds(
4279                 struct bpf_verifier_env *env,
4280                 int regno, int off, int access_size,
4281                 enum stack_access_src src, enum bpf_access_type type)
4282 {
4283         struct bpf_reg_state *regs = cur_regs(env);
4284         struct bpf_reg_state *reg = regs + regno;
4285         struct bpf_func_state *state = func(env, reg);
4286         int min_off, max_off;
4287         int err;
4288         char *err_extra;
4289
4290         if (src == ACCESS_HELPER)
4291                 /* We don't know if helpers are reading or writing (or both). */
4292                 err_extra = " indirect access to";
4293         else if (type == BPF_READ)
4294                 err_extra = " read from";
4295         else
4296                 err_extra = " write to";
4297
4298         if (tnum_is_const(reg->var_off)) {
4299                 min_off = reg->var_off.value + off;
4300                 if (access_size > 0)
4301                         max_off = min_off + access_size - 1;
4302                 else
4303                         max_off = min_off;
4304         } else {
4305                 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
4306                     reg->smin_value <= -BPF_MAX_VAR_OFF) {
4307                         verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
4308                                 err_extra, regno);
4309                         return -EACCES;
4310                 }
4311                 min_off = reg->smin_value + off;
4312                 if (access_size > 0)
4313                         max_off = reg->smax_value + off + access_size - 1;
4314                 else
4315                         max_off = min_off;
4316         }
4317
4318         err = check_stack_slot_within_bounds(min_off, state, type);
4319         if (!err)
4320                 err = check_stack_slot_within_bounds(max_off, state, type);
4321
4322         if (err) {
4323                 if (tnum_is_const(reg->var_off)) {
4324                         verbose(env, "invalid%s stack R%d off=%d size=%d\n",
4325                                 err_extra, regno, off, access_size);
4326                 } else {
4327                         char tn_buf[48];
4328
4329                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4330                         verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
4331                                 err_extra, regno, tn_buf, access_size);
4332                 }
4333         }
4334         return err;
4335 }
4336
4337 /* check whether memory at (regno + off) is accessible for t = (read | write)
4338  * if t==write, value_regno is a register which value is stored into memory
4339  * if t==read, value_regno is a register which will receive the value from memory
4340  * if t==write && value_regno==-1, some unknown value is stored into memory
4341  * if t==read && value_regno==-1, don't care what we read from memory
4342  */
4343 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
4344                             int off, int bpf_size, enum bpf_access_type t,
4345                             int value_regno, bool strict_alignment_once)
4346 {
4347         struct bpf_reg_state *regs = cur_regs(env);
4348         struct bpf_reg_state *reg = regs + regno;
4349         struct bpf_func_state *state;
4350         int size, err = 0;
4351
4352         size = bpf_size_to_bytes(bpf_size);
4353         if (size < 0)
4354                 return size;
4355
4356         /* alignment checks will add in reg->off themselves */
4357         err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
4358         if (err)
4359                 return err;
4360
4361         /* for access checks, reg->off is just part of off */
4362         off += reg->off;
4363
4364         if (reg->type == PTR_TO_MAP_KEY) {
4365                 if (t == BPF_WRITE) {
4366                         verbose(env, "write to change key R%d not allowed\n", regno);
4367                         return -EACCES;
4368                 }
4369
4370                 err = check_mem_region_access(env, regno, off, size,
4371                                               reg->map_ptr->key_size, false);
4372                 if (err)
4373                         return err;
4374                 if (value_regno >= 0)
4375                         mark_reg_unknown(env, regs, value_regno);
4376         } else if (reg->type == PTR_TO_MAP_VALUE) {
4377                 if (t == BPF_WRITE && value_regno >= 0 &&
4378                     is_pointer_value(env, value_regno)) {
4379                         verbose(env, "R%d leaks addr into map\n", value_regno);
4380                         return -EACCES;
4381                 }
4382                 err = check_map_access_type(env, regno, off, size, t);
4383                 if (err)
4384                         return err;
4385                 err = check_map_access(env, regno, off, size, false);
4386                 if (!err && t == BPF_READ && value_regno >= 0) {
4387                         struct bpf_map *map = reg->map_ptr;
4388
4389                         /* if map is read-only, track its contents as scalars */
4390                         if (tnum_is_const(reg->var_off) &&
4391                             bpf_map_is_rdonly(map) &&
4392                             map->ops->map_direct_value_addr) {
4393                                 int map_off = off + reg->var_off.value;
4394                                 u64 val = 0;
4395
4396                                 err = bpf_map_direct_read(map, map_off, size,
4397                                                           &val);
4398                                 if (err)
4399                                         return err;
4400
4401                                 regs[value_regno].type = SCALAR_VALUE;
4402                                 __mark_reg_known(&regs[value_regno], val);
4403                         } else {
4404                                 mark_reg_unknown(env, regs, value_regno);
4405                         }
4406                 }
4407         } else if (base_type(reg->type) == PTR_TO_MEM) {
4408                 bool rdonly_mem = type_is_rdonly_mem(reg->type);
4409
4410                 if (type_may_be_null(reg->type)) {
4411                         verbose(env, "R%d invalid mem access '%s'\n", regno,
4412                                 reg_type_str(env, reg->type));
4413                         return -EACCES;
4414                 }
4415
4416                 if (t == BPF_WRITE && rdonly_mem) {
4417                         verbose(env, "R%d cannot write into %s\n",
4418                                 regno, reg_type_str(env, reg->type));
4419                         return -EACCES;
4420                 }
4421
4422                 if (t == BPF_WRITE && value_regno >= 0 &&
4423                     is_pointer_value(env, value_regno)) {
4424                         verbose(env, "R%d leaks addr into mem\n", value_regno);
4425                         return -EACCES;
4426                 }
4427
4428                 err = check_mem_region_access(env, regno, off, size,
4429                                               reg->mem_size, false);
4430                 if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
4431                         mark_reg_unknown(env, regs, value_regno);
4432         } else if (reg->type == PTR_TO_CTX) {
4433                 enum bpf_reg_type reg_type = SCALAR_VALUE;
4434                 struct btf *btf = NULL;
4435                 u32 btf_id = 0;
4436
4437                 if (t == BPF_WRITE && value_regno >= 0 &&
4438                     is_pointer_value(env, value_regno)) {
4439                         verbose(env, "R%d leaks addr into ctx\n", value_regno);
4440                         return -EACCES;
4441                 }
4442
4443                 err = check_ptr_off_reg(env, reg, regno);
4444                 if (err < 0)
4445                         return err;
4446
4447                 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf, &btf_id);
4448                 if (err)
4449                         verbose_linfo(env, insn_idx, "; ");
4450                 if (!err && t == BPF_READ && value_regno >= 0) {
4451                         /* ctx access returns either a scalar, or a
4452                          * PTR_TO_PACKET[_META,_END]. In the latter
4453                          * case, we know the offset is zero.
4454                          */
4455                         if (reg_type == SCALAR_VALUE) {
4456                                 mark_reg_unknown(env, regs, value_regno);
4457                         } else {
4458                                 mark_reg_known_zero(env, regs,
4459                                                     value_regno);
4460                                 if (type_may_be_null(reg_type))
4461                                         regs[value_regno].id = ++env->id_gen;
4462                                 /* A load of ctx field could have different
4463                                  * actual load size with the one encoded in the
4464                                  * insn. When the dst is PTR, it is for sure not
4465                                  * a sub-register.
4466                                  */
4467                                 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
4468                                 if (base_type(reg_type) == PTR_TO_BTF_ID) {
4469                                         regs[value_regno].btf = btf;
4470                                         regs[value_regno].btf_id = btf_id;
4471                                 }
4472                         }
4473                         regs[value_regno].type = reg_type;
4474                 }
4475
4476         } else if (reg->type == PTR_TO_STACK) {
4477                 /* Basic bounds checks. */
4478                 err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
4479                 if (err)
4480                         return err;
4481
4482                 state = func(env, reg);
4483                 err = update_stack_depth(env, state, off);
4484                 if (err)
4485                         return err;
4486
4487                 if (t == BPF_READ)
4488                         err = check_stack_read(env, regno, off, size,
4489                                                value_regno);
4490                 else
4491                         err = check_stack_write(env, regno, off, size,
4492                                                 value_regno, insn_idx);
4493         } else if (reg_is_pkt_pointer(reg)) {
4494                 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
4495                         verbose(env, "cannot write into packet\n");
4496                         return -EACCES;
4497                 }
4498                 if (t == BPF_WRITE && value_regno >= 0 &&
4499                     is_pointer_value(env, value_regno)) {
4500                         verbose(env, "R%d leaks addr into packet\n",
4501                                 value_regno);
4502                         return -EACCES;
4503                 }
4504                 err = check_packet_access(env, regno, off, size, false);
4505                 if (!err && t == BPF_READ && value_regno >= 0)
4506                         mark_reg_unknown(env, regs, value_regno);
4507         } else if (reg->type == PTR_TO_FLOW_KEYS) {
4508                 if (t == BPF_WRITE && value_regno >= 0 &&
4509                     is_pointer_value(env, value_regno)) {
4510                         verbose(env, "R%d leaks addr into flow keys\n",
4511                                 value_regno);
4512                         return -EACCES;
4513                 }
4514
4515                 err = check_flow_keys_access(env, off, size);
4516                 if (!err && t == BPF_READ && value_regno >= 0)
4517                         mark_reg_unknown(env, regs, value_regno);
4518         } else if (type_is_sk_pointer(reg->type)) {
4519                 if (t == BPF_WRITE) {
4520                         verbose(env, "R%d cannot write into %s\n",
4521                                 regno, reg_type_str(env, reg->type));
4522                         return -EACCES;
4523                 }
4524                 err = check_sock_access(env, insn_idx, regno, off, size, t);
4525                 if (!err && value_regno >= 0)
4526                         mark_reg_unknown(env, regs, value_regno);
4527         } else if (reg->type == PTR_TO_TP_BUFFER) {
4528                 err = check_tp_buffer_access(env, reg, regno, off, size);
4529                 if (!err && t == BPF_READ && value_regno >= 0)
4530                         mark_reg_unknown(env, regs, value_regno);
4531         } else if (reg->type == PTR_TO_BTF_ID) {
4532                 err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
4533                                               value_regno);
4534         } else if (reg->type == CONST_PTR_TO_MAP) {
4535                 err = check_ptr_to_map_access(env, regs, regno, off, size, t,
4536                                               value_regno);
4537         } else if (base_type(reg->type) == PTR_TO_BUF) {
4538                 bool rdonly_mem = type_is_rdonly_mem(reg->type);
4539                 const char *buf_info;
4540                 u32 *max_access;
4541
4542                 if (rdonly_mem) {
4543                         if (t == BPF_WRITE) {
4544                                 verbose(env, "R%d cannot write into %s\n",
4545                                         regno, reg_type_str(env, reg->type));
4546                                 return -EACCES;
4547                         }
4548                         buf_info = "rdonly";
4549                         max_access = &env->prog->aux->max_rdonly_access;
4550                 } else {
4551                         buf_info = "rdwr";
4552                         max_access = &env->prog->aux->max_rdwr_access;
4553                 }
4554
4555                 err = check_buffer_access(env, reg, regno, off, size, false,
4556                                           buf_info, max_access);
4557
4558                 if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
4559                         mark_reg_unknown(env, regs, value_regno);
4560         } else {
4561                 verbose(env, "R%d invalid mem access '%s'\n", regno,
4562                         reg_type_str(env, reg->type));
4563                 return -EACCES;
4564         }
4565
4566         if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
4567             regs[value_regno].type == SCALAR_VALUE) {
4568                 /* b/h/w load zero-extends, mark upper bits as known 0 */
4569                 coerce_reg_to_size(&regs[value_regno], size);
4570         }
4571         return err;
4572 }
4573
4574 static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
4575 {
4576         int load_reg;
4577         int err;
4578
4579         switch (insn->imm) {
4580         case BPF_ADD:
4581         case BPF_ADD | BPF_FETCH:
4582         case BPF_AND:
4583         case BPF_AND | BPF_FETCH:
4584         case BPF_OR:
4585         case BPF_OR | BPF_FETCH:
4586         case BPF_XOR:
4587         case BPF_XOR | BPF_FETCH:
4588         case BPF_XCHG:
4589         case BPF_CMPXCHG:
4590                 break;
4591         default:
4592                 verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
4593                 return -EINVAL;
4594         }
4595
4596         if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
4597                 verbose(env, "invalid atomic operand size\n");
4598                 return -EINVAL;
4599         }
4600
4601         /* check src1 operand */
4602         err = check_reg_arg(env, insn->src_reg, SRC_OP);
4603         if (err)
4604                 return err;
4605
4606         /* check src2 operand */
4607         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4608         if (err)
4609                 return err;
4610
4611         if (insn->imm == BPF_CMPXCHG) {
4612                 /* Check comparison of R0 with memory location */
4613                 const u32 aux_reg = BPF_REG_0;
4614
4615                 err = check_reg_arg(env, aux_reg, SRC_OP);
4616                 if (err)
4617                         return err;
4618
4619                 if (is_pointer_value(env, aux_reg)) {
4620                         verbose(env, "R%d leaks addr into mem\n", aux_reg);
4621                         return -EACCES;
4622                 }
4623         }
4624
4625         if (is_pointer_value(env, insn->src_reg)) {
4626                 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
4627                 return -EACCES;
4628         }
4629
4630         if (is_ctx_reg(env, insn->dst_reg) ||
4631             is_pkt_reg(env, insn->dst_reg) ||
4632             is_flow_key_reg(env, insn->dst_reg) ||
4633             is_sk_reg(env, insn->dst_reg)) {
4634                 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
4635                         insn->dst_reg,
4636                         reg_type_str(env, reg_state(env, insn->dst_reg)->type));
4637                 return -EACCES;
4638         }
4639
4640         if (insn->imm & BPF_FETCH) {
4641                 if (insn->imm == BPF_CMPXCHG)
4642                         load_reg = BPF_REG_0;
4643                 else
4644                         load_reg = insn->src_reg;
4645
4646                 /* check and record load of old value */
4647                 err = check_reg_arg(env, load_reg, DST_OP);
4648                 if (err)
4649                         return err;
4650         } else {
4651                 /* This instruction accesses a memory location but doesn't
4652                  * actually load it into a register.
4653                  */
4654                 load_reg = -1;
4655         }
4656
4657         /* Check whether we can read the memory, with second call for fetch
4658          * case to simulate the register fill.
4659          */
4660         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4661                                BPF_SIZE(insn->code), BPF_READ, -1, true);
4662         if (!err && load_reg >= 0)
4663                 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4664                                        BPF_SIZE(insn->code), BPF_READ, load_reg,
4665                                        true);
4666         if (err)
4667                 return err;
4668
4669         /* Check whether we can write into the same memory. */
4670         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4671                                BPF_SIZE(insn->code), BPF_WRITE, -1, true);
4672         if (err)
4673                 return err;
4674
4675         return 0;
4676 }
4677
4678 /* When register 'regno' is used to read the stack (either directly or through
4679  * a helper function) make sure that it's within stack boundary and, depending
4680  * on the access type, that all elements of the stack are initialized.
4681  *
4682  * 'off' includes 'regno->off', but not its dynamic part (if any).
4683  *
4684  * All registers that have been spilled on the stack in the slots within the
4685  * read offsets are marked as read.
4686  */
4687 static int check_stack_range_initialized(
4688                 struct bpf_verifier_env *env, int regno, int off,
4689                 int access_size, bool zero_size_allowed,
4690                 enum stack_access_src type, struct bpf_call_arg_meta *meta)
4691 {
4692         struct bpf_reg_state *reg = reg_state(env, regno);
4693         struct bpf_func_state *state = func(env, reg);
4694         int err, min_off, max_off, i, j, slot, spi;
4695         char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
4696         enum bpf_access_type bounds_check_type;
4697         /* Some accesses can write anything into the stack, others are
4698          * read-only.
4699          */
4700         bool clobber = false;
4701
4702         if (access_size == 0 && !zero_size_allowed) {
4703                 verbose(env, "invalid zero-sized read\n");
4704                 return -EACCES;
4705         }
4706
4707         if (type == ACCESS_HELPER) {
4708                 /* The bounds checks for writes are more permissive than for
4709                  * reads. However, if raw_mode is not set, we'll do extra
4710                  * checks below.
4711                  */
4712                 bounds_check_type = BPF_WRITE;
4713                 clobber = true;
4714         } else {
4715                 bounds_check_type = BPF_READ;
4716         }
4717         err = check_stack_access_within_bounds(env, regno, off, access_size,
4718                                                type, bounds_check_type);
4719         if (err)
4720                 return err;
4721
4722
4723         if (tnum_is_const(reg->var_off)) {
4724                 min_off = max_off = reg->var_off.value + off;
4725         } else {
4726                 /* Variable offset is prohibited for unprivileged mode for
4727                  * simplicity since it requires corresponding support in
4728                  * Spectre masking for stack ALU.
4729                  * See also retrieve_ptr_limit().
4730                  */
4731                 if (!env->bypass_spec_v1) {
4732                         char tn_buf[48];
4733
4734                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4735                         verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
4736                                 regno, err_extra, tn_buf);
4737                         return -EACCES;
4738                 }
4739                 /* Only initialized buffer on stack is allowed to be accessed
4740                  * with variable offset. With uninitialized buffer it's hard to
4741                  * guarantee that whole memory is marked as initialized on
4742                  * helper return since specific bounds are unknown what may
4743                  * cause uninitialized stack leaking.
4744                  */
4745                 if (meta && meta->raw_mode)
4746                         meta = NULL;
4747
4748                 min_off = reg->smin_value + off;
4749                 max_off = reg->smax_value + off;
4750         }
4751
4752         if (meta && meta->raw_mode) {
4753                 meta->access_size = access_size;
4754                 meta->regno = regno;
4755                 return 0;
4756         }
4757
4758         for (i = min_off; i < max_off + access_size; i++) {
4759                 u8 *stype;
4760
4761                 slot = -i - 1;
4762                 spi = slot / BPF_REG_SIZE;
4763                 if (state->allocated_stack <= slot)
4764                         goto err;
4765                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4766                 if (*stype == STACK_MISC)
4767                         goto mark;
4768                 if (*stype == STACK_ZERO) {
4769                         if (clobber) {
4770                                 /* helper can write anything into the stack */
4771                                 *stype = STACK_MISC;
4772                         }
4773                         goto mark;
4774                 }
4775
4776                 if (is_spilled_reg(&state->stack[spi]) &&
4777                     state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
4778                         goto mark;
4779
4780                 if (is_spilled_reg(&state->stack[spi]) &&
4781                     (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
4782                      env->allow_ptr_leaks)) {
4783                         if (clobber) {
4784                                 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
4785                                 for (j = 0; j < BPF_REG_SIZE; j++)
4786                                         scrub_spilled_slot(&state->stack[spi].slot_type[j]);
4787                         }
4788                         goto mark;
4789                 }
4790
4791 err:
4792                 if (tnum_is_const(reg->var_off)) {
4793                         verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
4794                                 err_extra, regno, min_off, i - min_off, access_size);
4795                 } else {
4796                         char tn_buf[48];
4797
4798                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4799                         verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
4800                                 err_extra, regno, tn_buf, i - min_off, access_size);
4801                 }
4802                 return -EACCES;
4803 mark:
4804                 /* reading any byte out of 8-byte 'spill_slot' will cause
4805                  * the whole slot to be marked as 'read'
4806                  */
4807                 mark_reg_read(env, &state->stack[spi].spilled_ptr,
4808                               state->stack[spi].spilled_ptr.parent,
4809                               REG_LIVE_READ64);
4810         }
4811         return update_stack_depth(env, state, min_off);
4812 }
4813
4814 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
4815                                    int access_size, bool zero_size_allowed,
4816                                    struct bpf_call_arg_meta *meta)
4817 {
4818         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4819         const char *buf_info;
4820         u32 *max_access;
4821
4822         switch (base_type(reg->type)) {
4823         case PTR_TO_PACKET:
4824         case PTR_TO_PACKET_META:
4825                 return check_packet_access(env, regno, reg->off, access_size,
4826                                            zero_size_allowed);
4827         case PTR_TO_MAP_KEY:
4828                 return check_mem_region_access(env, regno, reg->off, access_size,
4829                                                reg->map_ptr->key_size, false);
4830         case PTR_TO_MAP_VALUE:
4831                 if (check_map_access_type(env, regno, reg->off, access_size,
4832                                           meta && meta->raw_mode ? BPF_WRITE :
4833                                           BPF_READ))
4834                         return -EACCES;
4835                 return check_map_access(env, regno, reg->off, access_size,
4836                                         zero_size_allowed);
4837         case PTR_TO_MEM:
4838                 return check_mem_region_access(env, regno, reg->off,
4839                                                access_size, reg->mem_size,
4840                                                zero_size_allowed);
4841         case PTR_TO_BUF:
4842                 if (type_is_rdonly_mem(reg->type)) {
4843                         if (meta && meta->raw_mode)
4844                                 return -EACCES;
4845
4846                         buf_info = "rdonly";
4847                         max_access = &env->prog->aux->max_rdonly_access;
4848                 } else {
4849                         buf_info = "rdwr";
4850                         max_access = &env->prog->aux->max_rdwr_access;
4851                 }
4852                 return check_buffer_access(env, reg, regno, reg->off,
4853                                            access_size, zero_size_allowed,
4854                                            buf_info, max_access);
4855         case PTR_TO_STACK:
4856                 return check_stack_range_initialized(
4857                                 env,
4858                                 regno, reg->off, access_size,
4859                                 zero_size_allowed, ACCESS_HELPER, meta);
4860         default: /* scalar_value or invalid ptr */
4861                 /* Allow zero-byte read from NULL, regardless of pointer type */
4862                 if (zero_size_allowed && access_size == 0 &&
4863                     register_is_null(reg))
4864                         return 0;
4865
4866                 verbose(env, "R%d type=%s ", regno,
4867                         reg_type_str(env, reg->type));
4868                 verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
4869                 return -EACCES;
4870         }
4871 }
4872
4873 static int check_mem_size_reg(struct bpf_verifier_env *env,
4874                               struct bpf_reg_state *reg, u32 regno,
4875                               bool zero_size_allowed,
4876                               struct bpf_call_arg_meta *meta)
4877 {
4878         int err;
4879
4880         /* This is used to refine r0 return value bounds for helpers
4881          * that enforce this value as an upper bound on return values.
4882          * See do_refine_retval_range() for helpers that can refine
4883          * the return value. C type of helper is u32 so we pull register
4884          * bound from umax_value however, if negative verifier errors
4885          * out. Only upper bounds can be learned because retval is an
4886          * int type and negative retvals are allowed.
4887          */
4888         if (meta)
4889                 meta->msize_max_value = reg->umax_value;
4890
4891         /* The register is SCALAR_VALUE; the access check
4892          * happens using its boundaries.
4893          */
4894         if (!tnum_is_const(reg->var_off))
4895                 /* For unprivileged variable accesses, disable raw
4896                  * mode so that the program is required to
4897                  * initialize all the memory that the helper could
4898                  * just partially fill up.
4899                  */
4900                 meta = NULL;
4901
4902         if (reg->smin_value < 0) {
4903                 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
4904                         regno);
4905                 return -EACCES;
4906         }
4907
4908         if (reg->umin_value == 0) {
4909                 err = check_helper_mem_access(env, regno - 1, 0,
4910                                               zero_size_allowed,
4911                                               meta);
4912                 if (err)
4913                         return err;
4914         }
4915
4916         if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
4917                 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
4918                         regno);
4919                 return -EACCES;
4920         }
4921         err = check_helper_mem_access(env, regno - 1,
4922                                       reg->umax_value,
4923                                       zero_size_allowed, meta);
4924         if (!err)
4925                 err = mark_chain_precision(env, regno);
4926         return err;
4927 }
4928
4929 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
4930                    u32 regno, u32 mem_size)
4931 {
4932         if (register_is_null(reg))
4933                 return 0;
4934
4935         if (type_may_be_null(reg->type)) {
4936                 /* Assuming that the register contains a value check if the memory
4937                  * access is safe. Temporarily save and restore the register's state as
4938                  * the conversion shouldn't be visible to a caller.
4939                  */
4940                 const struct bpf_reg_state saved_reg = *reg;
4941                 int rv;
4942
4943                 mark_ptr_not_null_reg(reg);
4944                 rv = check_helper_mem_access(env, regno, mem_size, true, NULL);
4945                 *reg = saved_reg;
4946                 return rv;
4947         }
4948
4949         return check_helper_mem_access(env, regno, mem_size, true, NULL);
4950 }
4951
4952 int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
4953                              u32 regno)
4954 {
4955         struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
4956         bool may_be_null = type_may_be_null(mem_reg->type);
4957         struct bpf_reg_state saved_reg;
4958         int err;
4959
4960         WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
4961
4962         if (may_be_null) {
4963                 saved_reg = *mem_reg;
4964                 mark_ptr_not_null_reg(mem_reg);
4965         }
4966
4967         err = check_mem_size_reg(env, reg, regno, true, NULL);
4968
4969         if (may_be_null)
4970                 *mem_reg = saved_reg;
4971         return err;
4972 }
4973
4974 /* Implementation details:
4975  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
4976  * Two bpf_map_lookups (even with the same key) will have different reg->id.
4977  * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
4978  * value_or_null->value transition, since the verifier only cares about
4979  * the range of access to valid map value pointer and doesn't care about actual
4980  * address of the map element.
4981  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
4982  * reg->id > 0 after value_or_null->value transition. By doing so
4983  * two bpf_map_lookups will be considered two different pointers that
4984  * point to different bpf_spin_locks.
4985  * The verifier allows taking only one bpf_spin_lock at a time to avoid
4986  * dead-locks.
4987  * Since only one bpf_spin_lock is allowed the checks are simpler than
4988  * reg_is_refcounted() logic. The verifier needs to remember only
4989  * one spin_lock instead of array of acquired_refs.
4990  * cur_state->active_spin_lock remembers which map value element got locked
4991  * and clears it after bpf_spin_unlock.
4992  */
4993 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
4994                              bool is_lock)
4995 {
4996         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4997         struct bpf_verifier_state *cur = env->cur_state;
4998         bool is_const = tnum_is_const(reg->var_off);
4999         struct bpf_map *map = reg->map_ptr;
5000         u64 val = reg->var_off.value;
5001
5002         if (!is_const) {
5003                 verbose(env,
5004                         "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
5005                         regno);
5006                 return -EINVAL;
5007         }
5008         if (!map->btf) {
5009                 verbose(env,
5010                         "map '%s' has to have BTF in order to use bpf_spin_lock\n",
5011                         map->name);
5012                 return -EINVAL;
5013         }
5014         if (!map_value_has_spin_lock(map)) {
5015                 if (map->spin_lock_off == -E2BIG)
5016                         verbose(env,
5017                                 "map '%s' has more than one 'struct bpf_spin_lock'\n",
5018                                 map->name);
5019                 else if (map->spin_lock_off == -ENOENT)
5020                         verbose(env,
5021                                 "map '%s' doesn't have 'struct bpf_spin_lock'\n",
5022                                 map->name);
5023                 else
5024                         verbose(env,
5025                                 "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
5026                                 map->name);
5027                 return -EINVAL;
5028         }
5029         if (map->spin_lock_off != val + reg->off) {
5030                 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
5031                         val + reg->off);
5032                 return -EINVAL;
5033         }
5034         if (is_lock) {
5035                 if (cur->active_spin_lock) {
5036                         verbose(env,
5037                                 "Locking two bpf_spin_locks are not allowed\n");
5038                         return -EINVAL;
5039                 }
5040                 cur->active_spin_lock = reg->id;
5041         } else {
5042                 if (!cur->active_spin_lock) {
5043                         verbose(env, "bpf_spin_unlock without taking a lock\n");
5044                         return -EINVAL;
5045                 }
5046                 if (cur->active_spin_lock != reg->id) {
5047                         verbose(env, "bpf_spin_unlock of different lock\n");
5048                         return -EINVAL;
5049                 }
5050                 cur->active_spin_lock = 0;
5051         }
5052         return 0;
5053 }
5054
5055 static int process_timer_func(struct bpf_verifier_env *env, int regno,
5056                               struct bpf_call_arg_meta *meta)
5057 {
5058         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5059         bool is_const = tnum_is_const(reg->var_off);
5060         struct bpf_map *map = reg->map_ptr;
5061         u64 val = reg->var_off.value;
5062
5063         if (!is_const) {
5064                 verbose(env,
5065                         "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
5066                         regno);
5067                 return -EINVAL;
5068         }
5069         if (!map->btf) {
5070                 verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
5071                         map->name);
5072                 return -EINVAL;
5073         }
5074         if (!map_value_has_timer(map)) {
5075                 if (map->timer_off == -E2BIG)
5076                         verbose(env,
5077                                 "map '%s' has more than one 'struct bpf_timer'\n",
5078                                 map->name);
5079                 else if (map->timer_off == -ENOENT)
5080                         verbose(env,
5081                                 "map '%s' doesn't have 'struct bpf_timer'\n",
5082                                 map->name);
5083                 else
5084                         verbose(env,
5085                                 "map '%s' is not a struct type or bpf_timer is mangled\n",
5086                                 map->name);
5087                 return -EINVAL;
5088         }
5089         if (map->timer_off != val + reg->off) {
5090                 verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
5091                         val + reg->off, map->timer_off);
5092                 return -EINVAL;
5093         }
5094         if (meta->map_ptr) {
5095                 verbose(env, "verifier bug. Two map pointers in a timer helper\n");
5096                 return -EFAULT;
5097         }
5098         meta->map_uid = reg->map_uid;
5099         meta->map_ptr = map;
5100         return 0;
5101 }
5102
5103 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
5104 {
5105         return base_type(type) == ARG_PTR_TO_MEM ||
5106                base_type(type) == ARG_PTR_TO_UNINIT_MEM;
5107 }
5108
5109 static bool arg_type_is_mem_size(enum bpf_arg_type type)
5110 {
5111         return type == ARG_CONST_SIZE ||
5112                type == ARG_CONST_SIZE_OR_ZERO;
5113 }
5114
5115 static bool arg_type_is_alloc_size(enum bpf_arg_type type)
5116 {
5117         return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
5118 }
5119
5120 static bool arg_type_is_int_ptr(enum bpf_arg_type type)
5121 {
5122         return type == ARG_PTR_TO_INT ||
5123                type == ARG_PTR_TO_LONG;
5124 }
5125
5126 static int int_ptr_type_to_size(enum bpf_arg_type type)
5127 {
5128         if (type == ARG_PTR_TO_INT)
5129                 return sizeof(u32);
5130         else if (type == ARG_PTR_TO_LONG)
5131                 return sizeof(u64);
5132
5133         return -EINVAL;
5134 }
5135
5136 static int resolve_map_arg_type(struct bpf_verifier_env *env,
5137                                  const struct bpf_call_arg_meta *meta,
5138                                  enum bpf_arg_type *arg_type)
5139 {
5140         if (!meta->map_ptr) {
5141                 /* kernel subsystem misconfigured verifier */
5142                 verbose(env, "invalid map_ptr to access map->type\n");
5143                 return -EACCES;
5144         }
5145
5146         switch (meta->map_ptr->map_type) {
5147         case BPF_MAP_TYPE_SOCKMAP:
5148         case BPF_MAP_TYPE_SOCKHASH:
5149                 if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
5150                         *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
5151                 } else {
5152                         verbose(env, "invalid arg_type for sockmap/sockhash\n");
5153                         return -EINVAL;
5154                 }
5155                 break;
5156         case BPF_MAP_TYPE_BLOOM_FILTER:
5157                 if (meta->func_id == BPF_FUNC_map_peek_elem)
5158                         *arg_type = ARG_PTR_TO_MAP_VALUE;
5159                 break;
5160         default:
5161                 break;
5162         }
5163         return 0;
5164 }
5165
5166 struct bpf_reg_types {
5167         const enum bpf_reg_type types[10];
5168         u32 *btf_id;
5169 };
5170
5171 static const struct bpf_reg_types map_key_value_types = {
5172         .types = {
5173                 PTR_TO_STACK,
5174                 PTR_TO_PACKET,
5175                 PTR_TO_PACKET_META,
5176                 PTR_TO_MAP_KEY,
5177                 PTR_TO_MAP_VALUE,
5178         },
5179 };
5180
5181 static const struct bpf_reg_types sock_types = {
5182         .types = {
5183                 PTR_TO_SOCK_COMMON,
5184                 PTR_TO_SOCKET,
5185                 PTR_TO_TCP_SOCK,
5186                 PTR_TO_XDP_SOCK,
5187         },
5188 };
5189
5190 #ifdef CONFIG_NET
5191 static const struct bpf_reg_types btf_id_sock_common_types = {
5192         .types = {
5193                 PTR_TO_SOCK_COMMON,
5194                 PTR_TO_SOCKET,
5195                 PTR_TO_TCP_SOCK,
5196                 PTR_TO_XDP_SOCK,
5197                 PTR_TO_BTF_ID,
5198         },
5199         .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5200 };
5201 #endif
5202
5203 static const struct bpf_reg_types mem_types = {
5204         .types = {
5205                 PTR_TO_STACK,
5206                 PTR_TO_PACKET,
5207                 PTR_TO_PACKET_META,
5208                 PTR_TO_MAP_KEY,
5209                 PTR_TO_MAP_VALUE,
5210                 PTR_TO_MEM,
5211                 PTR_TO_MEM | MEM_ALLOC,
5212                 PTR_TO_BUF,
5213         },
5214 };
5215
5216 static const struct bpf_reg_types int_ptr_types = {
5217         .types = {
5218                 PTR_TO_STACK,
5219                 PTR_TO_PACKET,
5220                 PTR_TO_PACKET_META,
5221                 PTR_TO_MAP_KEY,
5222                 PTR_TO_MAP_VALUE,
5223         },
5224 };
5225
5226 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
5227 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
5228 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
5229 static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
5230 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
5231 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
5232 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
5233 static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
5234 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
5235 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
5236 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
5237 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
5238
5239 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
5240         [ARG_PTR_TO_MAP_KEY]            = &map_key_value_types,
5241         [ARG_PTR_TO_MAP_VALUE]          = &map_key_value_types,
5242         [ARG_PTR_TO_UNINIT_MAP_VALUE]   = &map_key_value_types,
5243         [ARG_CONST_SIZE]                = &scalar_types,
5244         [ARG_CONST_SIZE_OR_ZERO]        = &scalar_types,
5245         [ARG_CONST_ALLOC_SIZE_OR_ZERO]  = &scalar_types,
5246         [ARG_CONST_MAP_PTR]             = &const_map_ptr_types,
5247         [ARG_PTR_TO_CTX]                = &context_types,
5248         [ARG_PTR_TO_SOCK_COMMON]        = &sock_types,
5249 #ifdef CONFIG_NET
5250         [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
5251 #endif
5252         [ARG_PTR_TO_SOCKET]             = &fullsock_types,
5253         [ARG_PTR_TO_BTF_ID]             = &btf_ptr_types,
5254         [ARG_PTR_TO_SPIN_LOCK]          = &spin_lock_types,
5255         [ARG_PTR_TO_MEM]                = &mem_types,
5256         [ARG_PTR_TO_UNINIT_MEM]         = &mem_types,
5257         [ARG_PTR_TO_ALLOC_MEM]          = &alloc_mem_types,
5258         [ARG_PTR_TO_INT]                = &int_ptr_types,
5259         [ARG_PTR_TO_LONG]               = &int_ptr_types,
5260         [ARG_PTR_TO_PERCPU_BTF_ID]      = &percpu_btf_ptr_types,
5261         [ARG_PTR_TO_FUNC]               = &func_ptr_types,
5262         [ARG_PTR_TO_STACK]              = &stack_ptr_types,
5263         [ARG_PTR_TO_CONST_STR]          = &const_str_ptr_types,
5264         [ARG_PTR_TO_TIMER]              = &timer_types,
5265 };
5266
5267 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
5268                           enum bpf_arg_type arg_type,
5269                           const u32 *arg_btf_id)
5270 {
5271         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5272         enum bpf_reg_type expected, type = reg->type;
5273         const struct bpf_reg_types *compatible;
5274         int i, j;
5275
5276         compatible = compatible_reg_types[base_type(arg_type)];
5277         if (!compatible) {
5278                 verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
5279                 return -EFAULT;
5280         }
5281
5282         /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
5283          * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
5284          *
5285          * Same for MAYBE_NULL:
5286          *
5287          * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
5288          * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
5289          *
5290          * Therefore we fold these flags depending on the arg_type before comparison.
5291          */
5292         if (arg_type & MEM_RDONLY)
5293                 type &= ~MEM_RDONLY;
5294         if (arg_type & PTR_MAYBE_NULL)
5295                 type &= ~PTR_MAYBE_NULL;
5296
5297         for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
5298                 expected = compatible->types[i];
5299                 if (expected == NOT_INIT)
5300                         break;
5301
5302                 if (type == expected)
5303                         goto found;
5304         }
5305
5306         verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
5307         for (j = 0; j + 1 < i; j++)
5308                 verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
5309         verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
5310         return -EACCES;
5311
5312 found:
5313         if (reg->type == PTR_TO_BTF_ID) {
5314                 if (!arg_btf_id) {
5315                         if (!compatible->btf_id) {
5316                                 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
5317                                 return -EFAULT;
5318                         }
5319                         arg_btf_id = compatible->btf_id;
5320                 }
5321
5322                 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5323                                           btf_vmlinux, *arg_btf_id)) {
5324                         verbose(env, "R%d is of type %s but %s is expected\n",
5325                                 regno, kernel_type_name(reg->btf, reg->btf_id),
5326                                 kernel_type_name(btf_vmlinux, *arg_btf_id));
5327                         return -EACCES;
5328                 }
5329         }
5330
5331         return 0;
5332 }
5333
5334 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
5335                           struct bpf_call_arg_meta *meta,
5336                           const struct bpf_func_proto *fn)
5337 {
5338         u32 regno = BPF_REG_1 + arg;
5339         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5340         enum bpf_arg_type arg_type = fn->arg_type[arg];
5341         enum bpf_reg_type type = reg->type;
5342         int err = 0;
5343
5344         if (arg_type == ARG_DONTCARE)
5345                 return 0;
5346
5347         err = check_reg_arg(env, regno, SRC_OP);
5348         if (err)
5349                 return err;
5350
5351         if (arg_type == ARG_ANYTHING) {
5352                 if (is_pointer_value(env, regno)) {
5353                         verbose(env, "R%d leaks addr into helper function\n",
5354                                 regno);
5355                         return -EACCES;
5356                 }
5357                 return 0;
5358         }
5359
5360         if (type_is_pkt_pointer(type) &&
5361             !may_access_direct_pkt_data(env, meta, BPF_READ)) {
5362                 verbose(env, "helper access to the packet is not allowed\n");
5363                 return -EACCES;
5364         }
5365
5366         if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
5367             base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
5368                 err = resolve_map_arg_type(env, meta, &arg_type);
5369                 if (err)
5370                         return err;
5371         }
5372
5373         if (register_is_null(reg) && type_may_be_null(arg_type))
5374                 /* A NULL register has a SCALAR_VALUE type, so skip
5375                  * type checking.
5376                  */
5377                 goto skip_type_check;
5378
5379         err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
5380         if (err)
5381                 return err;
5382
5383         switch ((u32)type) {
5384         case SCALAR_VALUE:
5385         /* Pointer types where reg offset is explicitly allowed: */
5386         case PTR_TO_PACKET:
5387         case PTR_TO_PACKET_META:
5388         case PTR_TO_MAP_KEY:
5389         case PTR_TO_MAP_VALUE:
5390         case PTR_TO_MEM:
5391         case PTR_TO_MEM | MEM_RDONLY:
5392         case PTR_TO_MEM | MEM_ALLOC:
5393         case PTR_TO_BUF:
5394         case PTR_TO_BUF | MEM_RDONLY:
5395         case PTR_TO_STACK:
5396                 /* Some of the argument types nevertheless require a
5397                  * zero register offset.
5398                  */
5399                 if (arg_type == ARG_PTR_TO_ALLOC_MEM)
5400                         goto force_off_check;
5401                 break;
5402         /* All the rest must be rejected: */
5403         default:
5404 force_off_check:
5405                 err = __check_ptr_off_reg(env, reg, regno,
5406                                           type == PTR_TO_BTF_ID);
5407                 if (err < 0)
5408                         return err;
5409                 break;
5410         }
5411
5412 skip_type_check:
5413         if (reg->ref_obj_id) {
5414                 if (meta->ref_obj_id) {
5415                         verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
5416                                 regno, reg->ref_obj_id,
5417                                 meta->ref_obj_id);
5418                         return -EFAULT;
5419                 }
5420                 meta->ref_obj_id = reg->ref_obj_id;
5421         }
5422
5423         if (arg_type == ARG_CONST_MAP_PTR) {
5424                 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
5425                 if (meta->map_ptr) {
5426                         /* Use map_uid (which is unique id of inner map) to reject:
5427                          * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
5428                          * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
5429                          * if (inner_map1 && inner_map2) {
5430                          *     timer = bpf_map_lookup_elem(inner_map1);
5431                          *     if (timer)
5432                          *         // mismatch would have been allowed
5433                          *         bpf_timer_init(timer, inner_map2);
5434                          * }
5435                          *
5436                          * Comparing map_ptr is enough to distinguish normal and outer maps.
5437                          */
5438                         if (meta->map_ptr != reg->map_ptr ||
5439                             meta->map_uid != reg->map_uid) {
5440                                 verbose(env,
5441                                         "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
5442                                         meta->map_uid, reg->map_uid);
5443                                 return -EINVAL;
5444                         }
5445                 }
5446                 meta->map_ptr = reg->map_ptr;
5447                 meta->map_uid = reg->map_uid;
5448         } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
5449                 /* bpf_map_xxx(..., map_ptr, ..., key) call:
5450                  * check that [key, key + map->key_size) are within
5451                  * stack limits and initialized
5452                  */
5453                 if (!meta->map_ptr) {
5454                         /* in function declaration map_ptr must come before
5455                          * map_key, so that it's verified and known before
5456                          * we have to check map_key here. Otherwise it means
5457                          * that kernel subsystem misconfigured verifier
5458                          */
5459                         verbose(env, "invalid map_ptr to access map->key\n");
5460                         return -EACCES;
5461                 }
5462                 err = check_helper_mem_access(env, regno,
5463                                               meta->map_ptr->key_size, false,
5464                                               NULL);
5465         } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE ||
5466                    base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
5467                 if (type_may_be_null(arg_type) && register_is_null(reg))
5468                         return 0;
5469
5470                 /* bpf_map_xxx(..., map_ptr, ..., value) call:
5471                  * check [value, value + map->value_size) validity
5472                  */
5473                 if (!meta->map_ptr) {
5474                         /* kernel subsystem misconfigured verifier */
5475                         verbose(env, "invalid map_ptr to access map->value\n");
5476                         return -EACCES;
5477                 }
5478                 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
5479                 err = check_helper_mem_access(env, regno,
5480                                               meta->map_ptr->value_size, false,
5481                                               meta);
5482         } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
5483                 if (!reg->btf_id) {
5484                         verbose(env, "Helper has invalid btf_id in R%d\n", regno);
5485                         return -EACCES;
5486                 }
5487                 meta->ret_btf = reg->btf;
5488                 meta->ret_btf_id = reg->btf_id;
5489         } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
5490                 if (meta->func_id == BPF_FUNC_spin_lock) {
5491                         if (process_spin_lock(env, regno, true))
5492                                 return -EACCES;
5493                 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
5494                         if (process_spin_lock(env, regno, false))
5495                                 return -EACCES;
5496                 } else {
5497                         verbose(env, "verifier internal error\n");
5498                         return -EFAULT;
5499                 }
5500         } else if (arg_type == ARG_PTR_TO_TIMER) {
5501                 if (process_timer_func(env, regno, meta))
5502                         return -EACCES;
5503         } else if (arg_type == ARG_PTR_TO_FUNC) {
5504                 meta->subprogno = reg->subprogno;
5505         } else if (arg_type_is_mem_ptr(arg_type)) {
5506                 /* The access to this pointer is only checked when we hit the
5507                  * next is_mem_size argument below.
5508                  */
5509                 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
5510         } else if (arg_type_is_mem_size(arg_type)) {
5511                 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
5512
5513                 err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
5514         } else if (arg_type_is_alloc_size(arg_type)) {
5515                 if (!tnum_is_const(reg->var_off)) {
5516                         verbose(env, "R%d is not a known constant'\n",
5517                                 regno);
5518                         return -EACCES;
5519                 }
5520                 meta->mem_size = reg->var_off.value;
5521         } else if (arg_type_is_int_ptr(arg_type)) {
5522                 int size = int_ptr_type_to_size(arg_type);
5523
5524                 err = check_helper_mem_access(env, regno, size, false, meta);
5525                 if (err)
5526                         return err;
5527                 err = check_ptr_alignment(env, reg, 0, size, true);
5528         } else if (arg_type == ARG_PTR_TO_CONST_STR) {
5529                 struct bpf_map *map = reg->map_ptr;
5530                 int map_off;
5531                 u64 map_addr;
5532                 char *str_ptr;
5533
5534                 if (!bpf_map_is_rdonly(map)) {
5535                         verbose(env, "R%d does not point to a readonly map'\n", regno);
5536                         return -EACCES;
5537                 }
5538
5539                 if (!tnum_is_const(reg->var_off)) {
5540                         verbose(env, "R%d is not a constant address'\n", regno);
5541                         return -EACCES;
5542                 }
5543
5544                 if (!map->ops->map_direct_value_addr) {
5545                         verbose(env, "no direct value access support for this map type\n");
5546                         return -EACCES;
5547                 }
5548
5549                 err = check_map_access(env, regno, reg->off,
5550                                        map->value_size - reg->off, false);
5551                 if (err)
5552                         return err;
5553
5554                 map_off = reg->off + reg->var_off.value;
5555                 err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
5556                 if (err) {
5557                         verbose(env, "direct value access on string failed\n");
5558                         return err;
5559                 }
5560
5561                 str_ptr = (char *)(long)(map_addr);
5562                 if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
5563                         verbose(env, "string is not zero-terminated\n");
5564                         return -EINVAL;
5565                 }
5566         }
5567
5568         return err;
5569 }
5570
5571 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
5572 {
5573         enum bpf_attach_type eatype = env->prog->expected_attach_type;
5574         enum bpf_prog_type type = resolve_prog_type(env->prog);
5575
5576         if (func_id != BPF_FUNC_map_update_elem)
5577                 return false;
5578
5579         /* It's not possible to get access to a locked struct sock in these
5580          * contexts, so updating is safe.
5581          */
5582         switch (type) {
5583         case BPF_PROG_TYPE_TRACING:
5584                 if (eatype == BPF_TRACE_ITER)
5585                         return true;
5586                 break;
5587         case BPF_PROG_TYPE_SOCKET_FILTER:
5588         case BPF_PROG_TYPE_SCHED_CLS:
5589         case BPF_PROG_TYPE_SCHED_ACT:
5590         case BPF_PROG_TYPE_XDP:
5591         case BPF_PROG_TYPE_SK_REUSEPORT:
5592         case BPF_PROG_TYPE_FLOW_DISSECTOR:
5593         case BPF_PROG_TYPE_SK_LOOKUP:
5594                 return true;
5595         default:
5596                 break;
5597         }
5598
5599         verbose(env, "cannot update sockmap in this context\n");
5600         return false;
5601 }
5602
5603 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
5604 {
5605         return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
5606 }
5607
5608 static int check_map_func_compatibility(struct bpf_verifier_env *env,
5609                                         struct bpf_map *map, int func_id)
5610 {
5611         if (!map)
5612                 return 0;
5613
5614         /* We need a two way check, first is from map perspective ... */
5615         switch (map->map_type) {
5616         case BPF_MAP_TYPE_PROG_ARRAY:
5617                 if (func_id != BPF_FUNC_tail_call)
5618                         goto error;
5619                 break;
5620         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5621                 if (func_id != BPF_FUNC_perf_event_read &&
5622                     func_id != BPF_FUNC_perf_event_output &&
5623                     func_id != BPF_FUNC_skb_output &&
5624                     func_id != BPF_FUNC_perf_event_read_value &&
5625                     func_id != BPF_FUNC_xdp_output)
5626                         goto error;
5627                 break;
5628         case BPF_MAP_TYPE_RINGBUF:
5629                 if (func_id != BPF_FUNC_ringbuf_output &&
5630                     func_id != BPF_FUNC_ringbuf_reserve &&
5631                     func_id != BPF_FUNC_ringbuf_query)
5632                         goto error;
5633                 break;
5634         case BPF_MAP_TYPE_STACK_TRACE:
5635                 if (func_id != BPF_FUNC_get_stackid)
5636                         goto error;
5637                 break;
5638         case BPF_MAP_TYPE_CGROUP_ARRAY:
5639                 if (func_id != BPF_FUNC_skb_under_cgroup &&
5640                     func_id != BPF_FUNC_current_task_under_cgroup)
5641                         goto error;
5642                 break;
5643         case BPF_MAP_TYPE_CGROUP_STORAGE:
5644         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
5645                 if (func_id != BPF_FUNC_get_local_storage)
5646                         goto error;
5647                 break;
5648         case BPF_MAP_TYPE_DEVMAP:
5649         case BPF_MAP_TYPE_DEVMAP_HASH:
5650                 if (func_id != BPF_FUNC_redirect_map &&
5651                     func_id != BPF_FUNC_map_lookup_elem)
5652                         goto error;
5653                 break;
5654         /* Restrict bpf side of cpumap and xskmap, open when use-cases
5655          * appear.
5656          */
5657         case BPF_MAP_TYPE_CPUMAP:
5658                 if (func_id != BPF_FUNC_redirect_map)
5659                         goto error;
5660                 break;
5661         case BPF_MAP_TYPE_XSKMAP:
5662                 if (func_id != BPF_FUNC_redirect_map &&
5663                     func_id != BPF_FUNC_map_lookup_elem)
5664                         goto error;
5665                 break;
5666         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
5667         case BPF_MAP_TYPE_HASH_OF_MAPS:
5668                 if (func_id != BPF_FUNC_map_lookup_elem)
5669                         goto error;
5670                 break;
5671         case BPF_MAP_TYPE_SOCKMAP:
5672                 if (func_id != BPF_FUNC_sk_redirect_map &&
5673                     func_id != BPF_FUNC_sock_map_update &&
5674                     func_id != BPF_FUNC_map_delete_elem &&
5675                     func_id != BPF_FUNC_msg_redirect_map &&
5676                     func_id != BPF_FUNC_sk_select_reuseport &&
5677                     func_id != BPF_FUNC_map_lookup_elem &&
5678                     !may_update_sockmap(env, func_id))
5679                         goto error;
5680                 break;
5681         case BPF_MAP_TYPE_SOCKHASH:
5682                 if (func_id != BPF_FUNC_sk_redirect_hash &&
5683                     func_id != BPF_FUNC_sock_hash_update &&
5684                     func_id != BPF_FUNC_map_delete_elem &&
5685                     func_id != BPF_FUNC_msg_redirect_hash &&
5686                     func_id != BPF_FUNC_sk_select_reuseport &&
5687                     func_id != BPF_FUNC_map_lookup_elem &&
5688                     !may_update_sockmap(env, func_id))
5689                         goto error;
5690                 break;
5691         case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
5692                 if (func_id != BPF_FUNC_sk_select_reuseport)
5693                         goto error;
5694                 break;
5695         case BPF_MAP_TYPE_QUEUE:
5696         case BPF_MAP_TYPE_STACK:
5697                 if (func_id != BPF_FUNC_map_peek_elem &&
5698                     func_id != BPF_FUNC_map_pop_elem &&
5699                     func_id != BPF_FUNC_map_push_elem)
5700                         goto error;
5701                 break;
5702         case BPF_MAP_TYPE_SK_STORAGE:
5703                 if (func_id != BPF_FUNC_sk_storage_get &&
5704                     func_id != BPF_FUNC_sk_storage_delete)
5705                         goto error;
5706                 break;
5707         case BPF_MAP_TYPE_INODE_STORAGE:
5708                 if (func_id != BPF_FUNC_inode_storage_get &&
5709                     func_id != BPF_FUNC_inode_storage_delete)
5710                         goto error;
5711                 break;
5712         case BPF_MAP_TYPE_TASK_STORAGE:
5713                 if (func_id != BPF_FUNC_task_storage_get &&
5714                     func_id != BPF_FUNC_task_storage_delete)
5715                         goto error;
5716                 break;
5717         case BPF_MAP_TYPE_BLOOM_FILTER:
5718                 if (func_id != BPF_FUNC_map_peek_elem &&
5719                     func_id != BPF_FUNC_map_push_elem)
5720                         goto error;
5721                 break;
5722         default:
5723                 break;
5724         }
5725
5726         /* ... and second from the function itself. */
5727         switch (func_id) {
5728         case BPF_FUNC_tail_call:
5729                 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
5730                         goto error;
5731                 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
5732                         verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
5733                         return -EINVAL;
5734                 }
5735                 break;
5736         case BPF_FUNC_perf_event_read:
5737         case BPF_FUNC_perf_event_output:
5738         case BPF_FUNC_perf_event_read_value:
5739         case BPF_FUNC_skb_output:
5740         case BPF_FUNC_xdp_output:
5741                 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
5742                         goto error;
5743                 break;
5744         case BPF_FUNC_ringbuf_output:
5745         case BPF_FUNC_ringbuf_reserve:
5746         case BPF_FUNC_ringbuf_query:
5747                 if (map->map_type != BPF_MAP_TYPE_RINGBUF)
5748                         goto error;
5749                 break;
5750         case BPF_FUNC_get_stackid:
5751                 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
5752                         goto error;
5753                 break;
5754         case BPF_FUNC_current_task_under_cgroup:
5755         case BPF_FUNC_skb_under_cgroup:
5756                 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
5757                         goto error;
5758                 break;
5759         case BPF_FUNC_redirect_map:
5760                 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
5761                     map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
5762                     map->map_type != BPF_MAP_TYPE_CPUMAP &&
5763                     map->map_type != BPF_MAP_TYPE_XSKMAP)
5764                         goto error;
5765                 break;
5766         case BPF_FUNC_sk_redirect_map:
5767         case BPF_FUNC_msg_redirect_map:
5768         case BPF_FUNC_sock_map_update:
5769                 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
5770                         goto error;
5771                 break;
5772         case BPF_FUNC_sk_redirect_hash:
5773         case BPF_FUNC_msg_redirect_hash:
5774         case BPF_FUNC_sock_hash_update:
5775                 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
5776                         goto error;
5777                 break;
5778         case BPF_FUNC_get_local_storage:
5779                 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
5780                     map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
5781                         goto error;
5782                 break;
5783         case BPF_FUNC_sk_select_reuseport:
5784                 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
5785                     map->map_type != BPF_MAP_TYPE_SOCKMAP &&
5786                     map->map_type != BPF_MAP_TYPE_SOCKHASH)
5787                         goto error;
5788                 break;
5789         case BPF_FUNC_map_pop_elem:
5790                 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
5791                     map->map_type != BPF_MAP_TYPE_STACK)
5792                         goto error;
5793                 break;
5794         case BPF_FUNC_map_peek_elem:
5795         case BPF_FUNC_map_push_elem:
5796                 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
5797                     map->map_type != BPF_MAP_TYPE_STACK &&
5798                     map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
5799                         goto error;
5800                 break;
5801         case BPF_FUNC_sk_storage_get:
5802         case BPF_FUNC_sk_storage_delete:
5803                 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
5804                         goto error;
5805                 break;
5806         case BPF_FUNC_inode_storage_get:
5807         case BPF_FUNC_inode_storage_delete:
5808                 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
5809                         goto error;
5810                 break;
5811         case BPF_FUNC_task_storage_get:
5812         case BPF_FUNC_task_storage_delete:
5813                 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
5814                         goto error;
5815                 break;
5816         default:
5817                 break;
5818         }
5819
5820         return 0;
5821 error:
5822         verbose(env, "cannot pass map_type %d into func %s#%d\n",
5823                 map->map_type, func_id_name(func_id), func_id);
5824         return -EINVAL;
5825 }
5826
5827 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
5828 {
5829         int count = 0;
5830
5831         if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
5832                 count++;
5833         if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
5834                 count++;
5835         if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
5836                 count++;
5837         if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
5838                 count++;
5839         if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
5840                 count++;
5841
5842         /* We only support one arg being in raw mode at the moment,
5843          * which is sufficient for the helper functions we have
5844          * right now.
5845          */
5846         return count <= 1;
5847 }
5848
5849 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
5850                                     enum bpf_arg_type arg_next)
5851 {
5852         return (arg_type_is_mem_ptr(arg_curr) &&
5853                 !arg_type_is_mem_size(arg_next)) ||
5854                (!arg_type_is_mem_ptr(arg_curr) &&
5855                 arg_type_is_mem_size(arg_next));
5856 }
5857
5858 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
5859 {
5860         /* bpf_xxx(..., buf, len) call will access 'len'
5861          * bytes from memory 'buf'. Both arg types need
5862          * to be paired, so make sure there's no buggy
5863          * helper function specification.
5864          */
5865         if (arg_type_is_mem_size(fn->arg1_type) ||
5866             arg_type_is_mem_ptr(fn->arg5_type)  ||
5867             check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
5868             check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
5869             check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
5870             check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
5871                 return false;
5872
5873         return true;
5874 }
5875
5876 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
5877 {
5878         int count = 0;
5879
5880         if (arg_type_may_be_refcounted(fn->arg1_type))
5881                 count++;
5882         if (arg_type_may_be_refcounted(fn->arg2_type))
5883                 count++;
5884         if (arg_type_may_be_refcounted(fn->arg3_type))
5885                 count++;
5886         if (arg_type_may_be_refcounted(fn->arg4_type))
5887                 count++;
5888         if (arg_type_may_be_refcounted(fn->arg5_type))
5889                 count++;
5890
5891         /* A reference acquiring function cannot acquire
5892          * another refcounted ptr.
5893          */
5894         if (may_be_acquire_function(func_id) && count)
5895                 return false;
5896
5897         /* We only support one arg being unreferenced at the moment,
5898          * which is sufficient for the helper functions we have right now.
5899          */
5900         return count <= 1;
5901 }
5902
5903 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
5904 {
5905         int i;
5906
5907         for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
5908                 if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
5909                         return false;
5910
5911                 if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
5912                         return false;
5913         }
5914
5915         return true;
5916 }
5917
5918 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
5919 {
5920         return check_raw_mode_ok(fn) &&
5921                check_arg_pair_ok(fn) &&
5922                check_btf_id_ok(fn) &&
5923                check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
5924 }
5925
5926 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
5927  * are now invalid, so turn them into unknown SCALAR_VALUE.
5928  */
5929 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
5930                                      struct bpf_func_state *state)
5931 {
5932         struct bpf_reg_state *regs = state->regs, *reg;
5933         int i;
5934
5935         for (i = 0; i < MAX_BPF_REG; i++)
5936                 if (reg_is_pkt_pointer_any(&regs[i]))
5937                         mark_reg_unknown(env, regs, i);
5938
5939         bpf_for_each_spilled_reg(i, state, reg) {
5940                 if (!reg)
5941                         continue;
5942                 if (reg_is_pkt_pointer_any(reg))
5943                         __mark_reg_unknown(env, reg);
5944         }
5945 }
5946
5947 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
5948 {
5949         struct bpf_verifier_state *vstate = env->cur_state;
5950         int i;
5951
5952         for (i = 0; i <= vstate->curframe; i++)
5953                 __clear_all_pkt_pointers(env, vstate->frame[i]);
5954 }
5955
5956 enum {
5957         AT_PKT_END = -1,
5958         BEYOND_PKT_END = -2,
5959 };
5960
5961 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
5962 {
5963         struct bpf_func_state *state = vstate->frame[vstate->curframe];
5964         struct bpf_reg_state *reg = &state->regs[regn];
5965
5966         if (reg->type != PTR_TO_PACKET)
5967                 /* PTR_TO_PACKET_META is not supported yet */
5968                 return;
5969
5970         /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
5971          * How far beyond pkt_end it goes is unknown.
5972          * if (!range_open) it's the case of pkt >= pkt_end
5973          * if (range_open) it's the case of pkt > pkt_end
5974          * hence this pointer is at least 1 byte bigger than pkt_end
5975          */
5976         if (range_open)
5977                 reg->range = BEYOND_PKT_END;
5978         else
5979                 reg->range = AT_PKT_END;
5980 }
5981
5982 static void release_reg_references(struct bpf_verifier_env *env,
5983                                    struct bpf_func_state *state,
5984                                    int ref_obj_id)
5985 {
5986         struct bpf_reg_state *regs = state->regs, *reg;
5987         int i;
5988
5989         for (i = 0; i < MAX_BPF_REG; i++)
5990                 if (regs[i].ref_obj_id == ref_obj_id)
5991                         mark_reg_unknown(env, regs, i);
5992
5993         bpf_for_each_spilled_reg(i, state, reg) {
5994                 if (!reg)
5995                         continue;
5996                 if (reg->ref_obj_id == ref_obj_id)
5997                         __mark_reg_unknown(env, reg);
5998         }
5999 }
6000
6001 /* The pointer with the specified id has released its reference to kernel
6002  * resources. Identify all copies of the same pointer and clear the reference.
6003  */
6004 static int release_reference(struct bpf_verifier_env *env,
6005                              int ref_obj_id)
6006 {
6007         struct bpf_verifier_state *vstate = env->cur_state;
6008         int err;
6009         int i;
6010
6011         err = release_reference_state(cur_func(env), ref_obj_id);
6012         if (err)
6013                 return err;
6014
6015         for (i = 0; i <= vstate->curframe; i++)
6016                 release_reg_references(env, vstate->frame[i], ref_obj_id);
6017
6018         return 0;
6019 }
6020
6021 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
6022                                     struct bpf_reg_state *regs)
6023 {
6024         int i;
6025
6026         /* after the call registers r0 - r5 were scratched */
6027         for (i = 0; i < CALLER_SAVED_REGS; i++) {
6028                 mark_reg_not_init(env, regs, caller_saved[i]);
6029                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
6030         }
6031 }
6032
6033 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
6034                                    struct bpf_func_state *caller,
6035                                    struct bpf_func_state *callee,
6036                                    int insn_idx);
6037
6038 static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6039                              int *insn_idx, int subprog,
6040                              set_callee_state_fn set_callee_state_cb)
6041 {
6042         struct bpf_verifier_state *state = env->cur_state;
6043         struct bpf_func_info_aux *func_info_aux;
6044         struct bpf_func_state *caller, *callee;
6045         int err;
6046         bool is_global = false;
6047
6048         if (state->curframe + 1 >= MAX_CALL_FRAMES) {
6049                 verbose(env, "the call stack of %d frames is too deep\n",
6050                         state->curframe + 2);
6051                 return -E2BIG;
6052         }
6053
6054         caller = state->frame[state->curframe];
6055         if (state->frame[state->curframe + 1]) {
6056                 verbose(env, "verifier bug. Frame %d already allocated\n",
6057                         state->curframe + 1);
6058                 return -EFAULT;
6059         }
6060
6061         func_info_aux = env->prog->aux->func_info_aux;
6062         if (func_info_aux)
6063                 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
6064         err = btf_check_subprog_arg_match(env, subprog, caller->regs);
6065         if (err == -EFAULT)
6066                 return err;
6067         if (is_global) {
6068                 if (err) {
6069                         verbose(env, "Caller passes invalid args into func#%d\n",
6070                                 subprog);
6071                         return err;
6072                 } else {
6073                         if (env->log.level & BPF_LOG_LEVEL)
6074                                 verbose(env,
6075                                         "Func#%d is global and valid. Skipping.\n",
6076                                         subprog);
6077                         clear_caller_saved_regs(env, caller->regs);
6078
6079                         /* All global functions return a 64-bit SCALAR_VALUE */
6080                         mark_reg_unknown(env, caller->regs, BPF_REG_0);
6081                         caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
6082
6083                         /* continue with next insn after call */
6084                         return 0;
6085                 }
6086         }
6087
6088         if (insn->code == (BPF_JMP | BPF_CALL) &&
6089             insn->src_reg == 0 &&
6090             insn->imm == BPF_FUNC_timer_set_callback) {
6091                 struct bpf_verifier_state *async_cb;
6092
6093                 /* there is no real recursion here. timer callbacks are async */
6094                 env->subprog_info[subprog].is_async_cb = true;
6095                 async_cb = push_async_cb(env, env->subprog_info[subprog].start,
6096                                          *insn_idx, subprog);
6097                 if (!async_cb)
6098                         return -EFAULT;
6099                 callee = async_cb->frame[0];
6100                 callee->async_entry_cnt = caller->async_entry_cnt + 1;
6101
6102                 /* Convert bpf_timer_set_callback() args into timer callback args */
6103                 err = set_callee_state_cb(env, caller, callee, *insn_idx);
6104                 if (err)
6105                         return err;
6106
6107                 clear_caller_saved_regs(env, caller->regs);
6108                 mark_reg_unknown(env, caller->regs, BPF_REG_0);
6109                 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
6110                 /* continue with next insn after call */
6111                 return 0;
6112         }
6113
6114         callee = kzalloc(sizeof(*callee), GFP_KERNEL);
6115         if (!callee)
6116                 return -ENOMEM;
6117         state->frame[state->curframe + 1] = callee;
6118
6119         /* callee cannot access r0, r6 - r9 for reading and has to write
6120          * into its own stack before reading from it.
6121          * callee can read/write into caller's stack
6122          */
6123         init_func_state(env, callee,
6124                         /* remember the callsite, it will be used by bpf_exit */
6125                         *insn_idx /* callsite */,
6126                         state->curframe + 1 /* frameno within this callchain */,
6127                         subprog /* subprog number within this prog */);
6128
6129         /* Transfer references to the callee */
6130         err = copy_reference_state(callee, caller);
6131         if (err)
6132                 return err;
6133
6134         err = set_callee_state_cb(env, caller, callee, *insn_idx);
6135         if (err)
6136                 return err;
6137
6138         clear_caller_saved_regs(env, caller->regs);
6139
6140         /* only increment it after check_reg_arg() finished */
6141         state->curframe++;
6142
6143         /* and go analyze first insn of the callee */
6144         *insn_idx = env->subprog_info[subprog].start - 1;
6145
6146         if (env->log.level & BPF_LOG_LEVEL) {
6147                 verbose(env, "caller:\n");
6148                 print_verifier_state(env, caller, true);
6149                 verbose(env, "callee:\n");
6150                 print_verifier_state(env, callee, true);
6151         }
6152         return 0;
6153 }
6154
6155 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
6156                                    struct bpf_func_state *caller,
6157                                    struct bpf_func_state *callee)
6158 {
6159         /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
6160          *      void *callback_ctx, u64 flags);
6161          * callback_fn(struct bpf_map *map, void *key, void *value,
6162          *      void *callback_ctx);
6163          */
6164         callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
6165
6166         callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6167         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6168         callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6169
6170         callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6171         __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6172         callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6173
6174         /* pointer to stack or null */
6175         callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
6176
6177         /* unused */
6178         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6179         return 0;
6180 }
6181
6182 static int set_callee_state(struct bpf_verifier_env *env,
6183                             struct bpf_func_state *caller,
6184                             struct bpf_func_state *callee, int insn_idx)
6185 {
6186         int i;
6187
6188         /* copy r1 - r5 args that callee can access.  The copy includes parent
6189          * pointers, which connects us up to the liveness chain
6190          */
6191         for (i = BPF_REG_1; i <= BPF_REG_5; i++)
6192                 callee->regs[i] = caller->regs[i];
6193         return 0;
6194 }
6195
6196 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6197                            int *insn_idx)
6198 {
6199         int subprog, target_insn;
6200
6201         target_insn = *insn_idx + insn->imm + 1;
6202         subprog = find_subprog(env, target_insn);
6203         if (subprog < 0) {
6204                 verbose(env, "verifier bug. No program starts at insn %d\n",
6205                         target_insn);
6206                 return -EFAULT;
6207         }
6208
6209         return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
6210 }
6211
6212 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
6213                                        struct bpf_func_state *caller,
6214                                        struct bpf_func_state *callee,
6215                                        int insn_idx)
6216 {
6217         struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
6218         struct bpf_map *map;
6219         int err;
6220
6221         if (bpf_map_ptr_poisoned(insn_aux)) {
6222                 verbose(env, "tail_call abusing map_ptr\n");
6223                 return -EINVAL;
6224         }
6225
6226         map = BPF_MAP_PTR(insn_aux->map_ptr_state);
6227         if (!map->ops->map_set_for_each_callback_args ||
6228             !map->ops->map_for_each_callback) {
6229                 verbose(env, "callback function not allowed for map\n");
6230                 return -ENOTSUPP;
6231         }
6232
6233         err = map->ops->map_set_for_each_callback_args(env, caller, callee);
6234         if (err)
6235                 return err;
6236
6237         callee->in_callback_fn = true;
6238         return 0;
6239 }
6240
6241 static int set_loop_callback_state(struct bpf_verifier_env *env,
6242                                    struct bpf_func_state *caller,
6243                                    struct bpf_func_state *callee,
6244                                    int insn_idx)
6245 {
6246         /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
6247          *          u64 flags);
6248          * callback_fn(u32 index, void *callback_ctx);
6249          */
6250         callee->regs[BPF_REG_1].type = SCALAR_VALUE;
6251         callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
6252
6253         /* unused */
6254         __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
6255         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6256         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6257
6258         callee->in_callback_fn = true;
6259         return 0;
6260 }
6261
6262 static int set_timer_callback_state(struct bpf_verifier_env *env,
6263                                     struct bpf_func_state *caller,
6264                                     struct bpf_func_state *callee,
6265                                     int insn_idx)
6266 {
6267         struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
6268
6269         /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
6270          * callback_fn(struct bpf_map *map, void *key, void *value);
6271          */
6272         callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
6273         __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
6274         callee->regs[BPF_REG_1].map_ptr = map_ptr;
6275
6276         callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6277         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6278         callee->regs[BPF_REG_2].map_ptr = map_ptr;
6279
6280         callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6281         __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6282         callee->regs[BPF_REG_3].map_ptr = map_ptr;
6283
6284         /* unused */
6285         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6286         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6287         callee->in_async_callback_fn = true;
6288         return 0;
6289 }
6290
6291 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
6292                                        struct bpf_func_state *caller,
6293                                        struct bpf_func_state *callee,
6294                                        int insn_idx)
6295 {
6296         /* bpf_find_vma(struct task_struct *task, u64 addr,
6297          *               void *callback_fn, void *callback_ctx, u64 flags)
6298          * (callback_fn)(struct task_struct *task,
6299          *               struct vm_area_struct *vma, void *callback_ctx);
6300          */
6301         callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
6302
6303         callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
6304         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6305         callee->regs[BPF_REG_2].btf =  btf_vmlinux;
6306         callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA],
6307
6308         /* pointer to stack or null */
6309         callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
6310
6311         /* unused */
6312         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6313         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6314         callee->in_callback_fn = true;
6315         return 0;
6316 }
6317
6318 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
6319 {
6320         struct bpf_verifier_state *state = env->cur_state;
6321         struct bpf_func_state *caller, *callee;
6322         struct bpf_reg_state *r0;
6323         int err;
6324
6325         callee = state->frame[state->curframe];
6326         r0 = &callee->regs[BPF_REG_0];
6327         if (r0->type == PTR_TO_STACK) {
6328                 /* technically it's ok to return caller's stack pointer
6329                  * (or caller's caller's pointer) back to the caller,
6330                  * since these pointers are valid. Only current stack
6331                  * pointer will be invalid as soon as function exits,
6332                  * but let's be conservative
6333                  */
6334                 verbose(env, "cannot return stack pointer to the caller\n");
6335                 return -EINVAL;
6336         }
6337
6338         state->curframe--;
6339         caller = state->frame[state->curframe];
6340         if (callee->in_callback_fn) {
6341                 /* enforce R0 return value range [0, 1]. */
6342                 struct tnum range = tnum_range(0, 1);
6343
6344                 if (r0->type != SCALAR_VALUE) {
6345                         verbose(env, "R0 not a scalar value\n");
6346                         return -EACCES;
6347                 }
6348                 if (!tnum_in(range, r0->var_off)) {
6349                         verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
6350                         return -EINVAL;
6351                 }
6352         } else {
6353                 /* return to the caller whatever r0 had in the callee */
6354                 caller->regs[BPF_REG_0] = *r0;
6355         }
6356
6357         /* Transfer references to the caller */
6358         err = copy_reference_state(caller, callee);
6359         if (err)
6360                 return err;
6361
6362         *insn_idx = callee->callsite + 1;
6363         if (env->log.level & BPF_LOG_LEVEL) {
6364                 verbose(env, "returning from callee:\n");
6365                 print_verifier_state(env, callee, true);
6366                 verbose(env, "to caller at %d:\n", *insn_idx);
6367                 print_verifier_state(env, caller, true);
6368         }
6369         /* clear everything in the callee */
6370         free_func_state(callee);
6371         state->frame[state->curframe + 1] = NULL;
6372         return 0;
6373 }
6374
6375 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
6376                                    int func_id,
6377                                    struct bpf_call_arg_meta *meta)
6378 {
6379         struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
6380
6381         if (ret_type != RET_INTEGER ||
6382             (func_id != BPF_FUNC_get_stack &&
6383              func_id != BPF_FUNC_get_task_stack &&
6384              func_id != BPF_FUNC_probe_read_str &&
6385              func_id != BPF_FUNC_probe_read_kernel_str &&
6386              func_id != BPF_FUNC_probe_read_user_str))
6387                 return;
6388
6389         ret_reg->smax_value = meta->msize_max_value;
6390         ret_reg->s32_max_value = meta->msize_max_value;
6391         ret_reg->smin_value = -MAX_ERRNO;
6392         ret_reg->s32_min_value = -MAX_ERRNO;
6393         __reg_deduce_bounds(ret_reg);
6394         __reg_bound_offset(ret_reg);
6395         __update_reg_bounds(ret_reg);
6396 }
6397
6398 static int
6399 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
6400                 int func_id, int insn_idx)
6401 {
6402         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
6403         struct bpf_map *map = meta->map_ptr;
6404
6405         if (func_id != BPF_FUNC_tail_call &&
6406             func_id != BPF_FUNC_map_lookup_elem &&
6407             func_id != BPF_FUNC_map_update_elem &&
6408             func_id != BPF_FUNC_map_delete_elem &&
6409             func_id != BPF_FUNC_map_push_elem &&
6410             func_id != BPF_FUNC_map_pop_elem &&
6411             func_id != BPF_FUNC_map_peek_elem &&
6412             func_id != BPF_FUNC_for_each_map_elem &&
6413             func_id != BPF_FUNC_redirect_map)
6414                 return 0;
6415
6416         if (map == NULL) {
6417                 verbose(env, "kernel subsystem misconfigured verifier\n");
6418                 return -EINVAL;
6419         }
6420
6421         /* In case of read-only, some additional restrictions
6422          * need to be applied in order to prevent altering the
6423          * state of the map from program side.
6424          */
6425         if ((map->map_flags & BPF_F_RDONLY_PROG) &&
6426             (func_id == BPF_FUNC_map_delete_elem ||
6427              func_id == BPF_FUNC_map_update_elem ||
6428              func_id == BPF_FUNC_map_push_elem ||
6429              func_id == BPF_FUNC_map_pop_elem)) {
6430                 verbose(env, "write into map forbidden\n");
6431                 return -EACCES;
6432         }
6433
6434         if (!BPF_MAP_PTR(aux->map_ptr_state))
6435                 bpf_map_ptr_store(aux, meta->map_ptr,
6436                                   !meta->map_ptr->bypass_spec_v1);
6437         else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
6438                 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
6439                                   !meta->map_ptr->bypass_spec_v1);
6440         return 0;
6441 }
6442
6443 static int
6444 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
6445                 int func_id, int insn_idx)
6446 {
6447         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
6448         struct bpf_reg_state *regs = cur_regs(env), *reg;
6449         struct bpf_map *map = meta->map_ptr;
6450         struct tnum range;
6451         u64 val;
6452         int err;
6453
6454         if (func_id != BPF_FUNC_tail_call)
6455                 return 0;
6456         if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
6457                 verbose(env, "kernel subsystem misconfigured verifier\n");
6458                 return -EINVAL;
6459         }
6460
6461         range = tnum_range(0, map->max_entries - 1);
6462         reg = &regs[BPF_REG_3];
6463
6464         if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
6465                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
6466                 return 0;
6467         }
6468
6469         err = mark_chain_precision(env, BPF_REG_3);
6470         if (err)
6471                 return err;
6472
6473         val = reg->var_off.value;
6474         if (bpf_map_key_unseen(aux))
6475                 bpf_map_key_store(aux, val);
6476         else if (!bpf_map_key_poisoned(aux) &&
6477                   bpf_map_key_immediate(aux) != val)
6478                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
6479         return 0;
6480 }
6481
6482 static int check_reference_leak(struct bpf_verifier_env *env)
6483 {
6484         struct bpf_func_state *state = cur_func(env);
6485         int i;
6486
6487         for (i = 0; i < state->acquired_refs; i++) {
6488                 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
6489                         state->refs[i].id, state->refs[i].insn_idx);
6490         }
6491         return state->acquired_refs ? -EINVAL : 0;
6492 }
6493
6494 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
6495                                    struct bpf_reg_state *regs)
6496 {
6497         struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
6498         struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
6499         struct bpf_map *fmt_map = fmt_reg->map_ptr;
6500         int err, fmt_map_off, num_args;
6501         u64 fmt_addr;
6502         char *fmt;
6503
6504         /* data must be an array of u64 */
6505         if (data_len_reg->var_off.value % 8)
6506                 return -EINVAL;
6507         num_args = data_len_reg->var_off.value / 8;
6508
6509         /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
6510          * and map_direct_value_addr is set.
6511          */
6512         fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
6513         err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
6514                                                   fmt_map_off);
6515         if (err) {
6516                 verbose(env, "verifier bug\n");
6517                 return -EFAULT;
6518         }
6519         fmt = (char *)(long)fmt_addr + fmt_map_off;
6520
6521         /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
6522          * can focus on validating the format specifiers.
6523          */
6524         err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
6525         if (err < 0)
6526                 verbose(env, "Invalid format string\n");
6527
6528         return err;
6529 }
6530
6531 static int check_get_func_ip(struct bpf_verifier_env *env)
6532 {
6533         enum bpf_prog_type type = resolve_prog_type(env->prog);
6534         int func_id = BPF_FUNC_get_func_ip;
6535
6536         if (type == BPF_PROG_TYPE_TRACING) {
6537                 if (!bpf_prog_has_trampoline(env->prog)) {
6538                         verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
6539                                 func_id_name(func_id), func_id);
6540                         return -ENOTSUPP;
6541                 }
6542                 return 0;
6543         } else if (type == BPF_PROG_TYPE_KPROBE) {
6544                 return 0;
6545         }
6546
6547         verbose(env, "func %s#%d not supported for program type %d\n",
6548                 func_id_name(func_id), func_id, type);
6549         return -ENOTSUPP;
6550 }
6551
6552 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6553                              int *insn_idx_p)
6554 {
6555         const struct bpf_func_proto *fn = NULL;
6556         enum bpf_return_type ret_type;
6557         enum bpf_type_flag ret_flag;
6558         struct bpf_reg_state *regs;
6559         struct bpf_call_arg_meta meta;
6560         int insn_idx = *insn_idx_p;
6561         bool changes_data;
6562         int i, err, func_id;
6563
6564         /* find function prototype */
6565         func_id = insn->imm;
6566         if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
6567                 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
6568                         func_id);
6569                 return -EINVAL;
6570         }
6571
6572         if (env->ops->get_func_proto)
6573                 fn = env->ops->get_func_proto(func_id, env->prog);
6574         if (!fn) {
6575                 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
6576                         func_id);
6577                 return -EINVAL;
6578         }
6579
6580         /* eBPF programs must be GPL compatible to use GPL-ed functions */
6581         if (!env->prog->gpl_compatible && fn->gpl_only) {
6582                 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
6583                 return -EINVAL;
6584         }
6585
6586         if (fn->allowed && !fn->allowed(env->prog)) {
6587                 verbose(env, "helper call is not allowed in probe\n");
6588                 return -EINVAL;
6589         }
6590
6591         /* With LD_ABS/IND some JITs save/restore skb from r1. */
6592         changes_data = bpf_helper_changes_pkt_data(fn->func);
6593         if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
6594                 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
6595                         func_id_name(func_id), func_id);
6596                 return -EINVAL;
6597         }
6598
6599         memset(&meta, 0, sizeof(meta));
6600         meta.pkt_access = fn->pkt_access;
6601
6602         err = check_func_proto(fn, func_id);
6603         if (err) {
6604                 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
6605                         func_id_name(func_id), func_id);
6606                 return err;
6607         }
6608
6609         meta.func_id = func_id;
6610         /* check args */
6611         for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
6612                 err = check_func_arg(env, i, &meta, fn);
6613                 if (err)
6614                         return err;
6615         }
6616
6617         err = record_func_map(env, &meta, func_id, insn_idx);
6618         if (err)
6619                 return err;
6620
6621         err = record_func_key(env, &meta, func_id, insn_idx);
6622         if (err)
6623                 return err;
6624
6625         /* Mark slots with STACK_MISC in case of raw mode, stack offset
6626          * is inferred from register state.
6627          */
6628         for (i = 0; i < meta.access_size; i++) {
6629                 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
6630                                        BPF_WRITE, -1, false);
6631                 if (err)
6632                         return err;
6633         }
6634
6635         if (is_release_function(func_id)) {
6636                 err = release_reference(env, meta.ref_obj_id);
6637                 if (err) {
6638                         verbose(env, "func %s#%d reference has not been acquired before\n",
6639                                 func_id_name(func_id), func_id);
6640                         return err;
6641                 }
6642         }
6643
6644         regs = cur_regs(env);
6645
6646         switch (func_id) {
6647         case BPF_FUNC_tail_call:
6648                 err = check_reference_leak(env);
6649                 if (err) {
6650                         verbose(env, "tail_call would lead to reference leak\n");
6651                         return err;
6652                 }
6653                 break;
6654         case BPF_FUNC_get_local_storage:
6655                 /* check that flags argument in get_local_storage(map, flags) is 0,
6656                  * this is required because get_local_storage() can't return an error.
6657                  */
6658                 if (!register_is_null(&regs[BPF_REG_2])) {
6659                         verbose(env, "get_local_storage() doesn't support non-zero flags\n");
6660                         return -EINVAL;
6661                 }
6662                 break;
6663         case BPF_FUNC_for_each_map_elem:
6664                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6665                                         set_map_elem_callback_state);
6666                 break;
6667         case BPF_FUNC_timer_set_callback:
6668                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6669                                         set_timer_callback_state);
6670                 break;
6671         case BPF_FUNC_find_vma:
6672                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6673                                         set_find_vma_callback_state);
6674                 break;
6675         case BPF_FUNC_snprintf:
6676                 err = check_bpf_snprintf_call(env, regs);
6677                 break;
6678         case BPF_FUNC_loop:
6679                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6680                                         set_loop_callback_state);
6681                 break;
6682         }
6683
6684         if (err)
6685                 return err;
6686
6687         /* reset caller saved regs */
6688         for (i = 0; i < CALLER_SAVED_REGS; i++) {
6689                 mark_reg_not_init(env, regs, caller_saved[i]);
6690                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
6691         }
6692
6693         /* helper call returns 64-bit value. */
6694         regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
6695
6696         /* update return register (already marked as written above) */
6697         ret_type = fn->ret_type;
6698         ret_flag = type_flag(fn->ret_type);
6699         if (ret_type == RET_INTEGER) {
6700                 /* sets type to SCALAR_VALUE */
6701                 mark_reg_unknown(env, regs, BPF_REG_0);
6702         } else if (ret_type == RET_VOID) {
6703                 regs[BPF_REG_0].type = NOT_INIT;
6704         } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) {
6705                 /* There is no offset yet applied, variable or fixed */
6706                 mark_reg_known_zero(env, regs, BPF_REG_0);
6707                 /* remember map_ptr, so that check_map_access()
6708                  * can check 'value_size' boundary of memory access
6709                  * to map element returned from bpf_map_lookup_elem()
6710                  */
6711                 if (meta.map_ptr == NULL) {
6712                         verbose(env,
6713                                 "kernel subsystem misconfigured verifier\n");
6714                         return -EINVAL;
6715                 }
6716                 regs[BPF_REG_0].map_ptr = meta.map_ptr;
6717                 regs[BPF_REG_0].map_uid = meta.map_uid;
6718                 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
6719                 if (!type_may_be_null(ret_type) &&
6720                     map_value_has_spin_lock(meta.map_ptr)) {
6721                         regs[BPF_REG_0].id = ++env->id_gen;
6722                 }
6723         } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) {
6724                 mark_reg_known_zero(env, regs, BPF_REG_0);
6725                 regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
6726         } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) {
6727                 mark_reg_known_zero(env, regs, BPF_REG_0);
6728                 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
6729         } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) {
6730                 mark_reg_known_zero(env, regs, BPF_REG_0);
6731                 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
6732         } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) {
6733                 mark_reg_known_zero(env, regs, BPF_REG_0);
6734                 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
6735                 regs[BPF_REG_0].mem_size = meta.mem_size;
6736         } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) {
6737                 const struct btf_type *t;
6738
6739                 mark_reg_known_zero(env, regs, BPF_REG_0);
6740                 t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
6741                 if (!btf_type_is_struct(t)) {
6742                         u32 tsize;
6743                         const struct btf_type *ret;
6744                         const char *tname;
6745
6746                         /* resolve the type size of ksym. */
6747                         ret = btf_resolve_size(meta.ret_btf, t, &tsize);
6748                         if (IS_ERR(ret)) {
6749                                 tname = btf_name_by_offset(meta.ret_btf, t->name_off);
6750                                 verbose(env, "unable to resolve the size of type '%s': %ld\n",
6751                                         tname, PTR_ERR(ret));
6752                                 return -EINVAL;
6753                         }
6754                         regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
6755                         regs[BPF_REG_0].mem_size = tsize;
6756                 } else {
6757                         /* MEM_RDONLY may be carried from ret_flag, but it
6758                          * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
6759                          * it will confuse the check of PTR_TO_BTF_ID in
6760                          * check_mem_access().
6761                          */
6762                         ret_flag &= ~MEM_RDONLY;
6763
6764                         regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
6765                         regs[BPF_REG_0].btf = meta.ret_btf;
6766                         regs[BPF_REG_0].btf_id = meta.ret_btf_id;
6767                 }
6768         } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
6769                 int ret_btf_id;
6770
6771                 mark_reg_known_zero(env, regs, BPF_REG_0);
6772                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
6773                 ret_btf_id = *fn->ret_btf_id;
6774                 if (ret_btf_id == 0) {
6775                         verbose(env, "invalid return type %u of func %s#%d\n",
6776                                 base_type(ret_type), func_id_name(func_id),
6777                                 func_id);
6778                         return -EINVAL;
6779                 }
6780                 /* current BPF helper definitions are only coming from
6781                  * built-in code with type IDs from  vmlinux BTF
6782                  */
6783                 regs[BPF_REG_0].btf = btf_vmlinux;
6784                 regs[BPF_REG_0].btf_id = ret_btf_id;
6785         } else {
6786                 verbose(env, "unknown return type %u of func %s#%d\n",
6787                         base_type(ret_type), func_id_name(func_id), func_id);
6788                 return -EINVAL;
6789         }
6790
6791         if (type_may_be_null(regs[BPF_REG_0].type))
6792                 regs[BPF_REG_0].id = ++env->id_gen;
6793
6794         if (is_ptr_cast_function(func_id)) {
6795                 /* For release_reference() */
6796                 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
6797         } else if (is_acquire_function(func_id, meta.map_ptr)) {
6798                 int id = acquire_reference_state(env, insn_idx);
6799
6800                 if (id < 0)
6801                         return id;
6802                 /* For mark_ptr_or_null_reg() */
6803                 regs[BPF_REG_0].id = id;
6804                 /* For release_reference() */
6805                 regs[BPF_REG_0].ref_obj_id = id;
6806         }
6807
6808         do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
6809
6810         err = check_map_func_compatibility(env, meta.map_ptr, func_id);
6811         if (err)
6812                 return err;
6813
6814         if ((func_id == BPF_FUNC_get_stack ||
6815              func_id == BPF_FUNC_get_task_stack) &&
6816             !env->prog->has_callchain_buf) {
6817                 const char *err_str;
6818
6819 #ifdef CONFIG_PERF_EVENTS
6820                 err = get_callchain_buffers(sysctl_perf_event_max_stack);
6821                 err_str = "cannot get callchain buffer for func %s#%d\n";
6822 #else
6823                 err = -ENOTSUPP;
6824                 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
6825 #endif
6826                 if (err) {
6827                         verbose(env, err_str, func_id_name(func_id), func_id);
6828                         return err;
6829                 }
6830
6831                 env->prog->has_callchain_buf = true;
6832         }
6833
6834         if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
6835                 env->prog->call_get_stack = true;
6836
6837         if (func_id == BPF_FUNC_get_func_ip) {
6838                 if (check_get_func_ip(env))
6839                         return -ENOTSUPP;
6840                 env->prog->call_get_func_ip = true;
6841         }
6842
6843         if (changes_data)
6844                 clear_all_pkt_pointers(env);
6845         return 0;
6846 }
6847
6848 /* mark_btf_func_reg_size() is used when the reg size is determined by
6849  * the BTF func_proto's return value size and argument.
6850  */
6851 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
6852                                    size_t reg_size)
6853 {
6854         struct bpf_reg_state *reg = &cur_regs(env)[regno];
6855
6856         if (regno == BPF_REG_0) {
6857                 /* Function return value */
6858                 reg->live |= REG_LIVE_WRITTEN;
6859                 reg->subreg_def = reg_size == sizeof(u64) ?
6860                         DEF_NOT_SUBREG : env->insn_idx + 1;
6861         } else {
6862                 /* Function argument */
6863                 if (reg_size == sizeof(u64)) {
6864                         mark_insn_zext(env, reg);
6865                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
6866                 } else {
6867                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
6868                 }
6869         }
6870 }
6871
6872 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6873                             int *insn_idx_p)
6874 {
6875         const struct btf_type *t, *func, *func_proto, *ptr_type;
6876         struct bpf_reg_state *regs = cur_regs(env);
6877         const char *func_name, *ptr_type_name;
6878         u32 i, nargs, func_id, ptr_type_id;
6879         int err, insn_idx = *insn_idx_p;
6880         const struct btf_param *args;
6881         struct btf *desc_btf;
6882         bool acq;
6883
6884         /* skip for now, but return error when we find this in fixup_kfunc_call */
6885         if (!insn->imm)
6886                 return 0;
6887
6888         desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off);
6889         if (IS_ERR(desc_btf))
6890                 return PTR_ERR(desc_btf);
6891
6892         func_id = insn->imm;
6893         func = btf_type_by_id(desc_btf, func_id);
6894         func_name = btf_name_by_offset(desc_btf, func->name_off);
6895         func_proto = btf_type_by_id(desc_btf, func->type);
6896
6897         if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
6898                                       BTF_KFUNC_TYPE_CHECK, func_id)) {
6899                 verbose(env, "calling kernel function %s is not allowed\n",
6900                         func_name);
6901                 return -EACCES;
6902         }
6903
6904         acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
6905                                         BTF_KFUNC_TYPE_ACQUIRE, func_id);
6906
6907         /* Check the arguments */
6908         err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
6909         if (err < 0)
6910                 return err;
6911         /* In case of release function, we get register number of refcounted
6912          * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now
6913          */
6914         if (err) {
6915                 err = release_reference(env, regs[err].ref_obj_id);
6916                 if (err) {
6917                         verbose(env, "kfunc %s#%d reference has not been acquired before\n",
6918                                 func_name, func_id);
6919                         return err;
6920                 }
6921         }
6922
6923         for (i = 0; i < CALLER_SAVED_REGS; i++)
6924                 mark_reg_not_init(env, regs, caller_saved[i]);
6925
6926         /* Check return type */
6927         t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
6928
6929         if (acq && !btf_type_is_ptr(t)) {
6930                 verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
6931                 return -EINVAL;
6932         }
6933
6934         if (btf_type_is_scalar(t)) {
6935                 mark_reg_unknown(env, regs, BPF_REG_0);
6936                 mark_btf_func_reg_size(env, BPF_REG_0, t->size);
6937         } else if (btf_type_is_ptr(t)) {
6938                 ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
6939                                                    &ptr_type_id);
6940                 if (!btf_type_is_struct(ptr_type)) {
6941                         ptr_type_name = btf_name_by_offset(desc_btf,
6942                                                            ptr_type->name_off);
6943                         verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
6944                                 func_name, btf_type_str(ptr_type),
6945                                 ptr_type_name);
6946                         return -EINVAL;
6947                 }
6948                 mark_reg_known_zero(env, regs, BPF_REG_0);
6949                 regs[BPF_REG_0].btf = desc_btf;
6950                 regs[BPF_REG_0].type = PTR_TO_BTF_ID;
6951                 regs[BPF_REG_0].btf_id = ptr_type_id;
6952                 if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
6953                                               BTF_KFUNC_TYPE_RET_NULL, func_id)) {
6954                         regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
6955                         /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
6956                         regs[BPF_REG_0].id = ++env->id_gen;
6957                 }
6958                 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
6959                 if (acq) {
6960                         int id = acquire_reference_state(env, insn_idx);
6961
6962                         if (id < 0)
6963                                 return id;
6964                         regs[BPF_REG_0].id = id;
6965                         regs[BPF_REG_0].ref_obj_id = id;
6966                 }
6967         } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
6968
6969         nargs = btf_type_vlen(func_proto);
6970         args = (const struct btf_param *)(func_proto + 1);
6971         for (i = 0; i < nargs; i++) {
6972                 u32 regno = i + 1;
6973
6974                 t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
6975                 if (btf_type_is_ptr(t))
6976                         mark_btf_func_reg_size(env, regno, sizeof(void *));
6977                 else
6978                         /* scalar. ensured by btf_check_kfunc_arg_match() */
6979                         mark_btf_func_reg_size(env, regno, t->size);
6980         }
6981
6982         return 0;
6983 }
6984
6985 static bool signed_add_overflows(s64 a, s64 b)
6986 {
6987         /* Do the add in u64, where overflow is well-defined */
6988         s64 res = (s64)((u64)a + (u64)b);
6989
6990         if (b < 0)
6991                 return res > a;
6992         return res < a;
6993 }
6994
6995 static bool signed_add32_overflows(s32 a, s32 b)
6996 {
6997         /* Do the add in u32, where overflow is well-defined */
6998         s32 res = (s32)((u32)a + (u32)b);
6999
7000         if (b < 0)
7001                 return res > a;
7002         return res < a;
7003 }
7004
7005 static bool signed_sub_overflows(s64 a, s64 b)
7006 {
7007         /* Do the sub in u64, where overflow is well-defined */
7008         s64 res = (s64)((u64)a - (u64)b);
7009
7010         if (b < 0)
7011                 return res < a;
7012         return res > a;
7013 }
7014
7015 static bool signed_sub32_overflows(s32 a, s32 b)
7016 {
7017         /* Do the sub in u32, where overflow is well-defined */
7018         s32 res = (s32)((u32)a - (u32)b);
7019
7020         if (b < 0)
7021                 return res < a;
7022         return res > a;
7023 }
7024
7025 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
7026                                   const struct bpf_reg_state *reg,
7027                                   enum bpf_reg_type type)
7028 {
7029         bool known = tnum_is_const(reg->var_off);
7030         s64 val = reg->var_off.value;
7031         s64 smin = reg->smin_value;
7032
7033         if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
7034                 verbose(env, "math between %s pointer and %lld is not allowed\n",
7035                         reg_type_str(env, type), val);
7036                 return false;
7037         }
7038
7039         if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
7040                 verbose(env, "%s pointer offset %d is not allowed\n",
7041                         reg_type_str(env, type), reg->off);
7042                 return false;
7043         }
7044
7045         if (smin == S64_MIN) {
7046                 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
7047                         reg_type_str(env, type));
7048                 return false;
7049         }
7050
7051         if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
7052                 verbose(env, "value %lld makes %s pointer be out of bounds\n",
7053                         smin, reg_type_str(env, type));
7054                 return false;
7055         }
7056
7057         return true;
7058 }
7059
7060 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
7061 {
7062         return &env->insn_aux_data[env->insn_idx];
7063 }
7064
7065 enum {
7066         REASON_BOUNDS   = -1,
7067         REASON_TYPE     = -2,
7068         REASON_PATHS    = -3,
7069         REASON_LIMIT    = -4,
7070         REASON_STACK    = -5,
7071 };
7072
7073 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
7074                               u32 *alu_limit, bool mask_to_left)
7075 {
7076         u32 max = 0, ptr_limit = 0;
7077
7078         switch (ptr_reg->type) {
7079         case PTR_TO_STACK:
7080                 /* Offset 0 is out-of-bounds, but acceptable start for the
7081                  * left direction, see BPF_REG_FP. Also, unknown scalar
7082                  * offset where we would need to deal with min/max bounds is
7083                  * currently prohibited for unprivileged.
7084                  */
7085                 max = MAX_BPF_STACK + mask_to_left;
7086                 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
7087                 break;
7088         case PTR_TO_MAP_VALUE:
7089                 max = ptr_reg->map_ptr->value_size;
7090                 ptr_limit = (mask_to_left ?
7091                              ptr_reg->smin_value :
7092                              ptr_reg->umax_value) + ptr_reg->off;
7093                 break;
7094         default:
7095                 return REASON_TYPE;
7096         }
7097
7098         if (ptr_limit >= max)
7099                 return REASON_LIMIT;
7100         *alu_limit = ptr_limit;
7101         return 0;
7102 }
7103
7104 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
7105                                     const struct bpf_insn *insn)
7106 {
7107         return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
7108 }
7109
7110 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
7111                                        u32 alu_state, u32 alu_limit)
7112 {
7113         /* If we arrived here from different branches with different
7114          * state or limits to sanitize, then this won't work.
7115          */
7116         if (aux->alu_state &&
7117             (aux->alu_state != alu_state ||
7118              aux->alu_limit != alu_limit))
7119                 return REASON_PATHS;
7120
7121         /* Corresponding fixup done in do_misc_fixups(). */
7122         aux->alu_state = alu_state;
7123         aux->alu_limit = alu_limit;
7124         return 0;
7125 }
7126
7127 static int sanitize_val_alu(struct bpf_verifier_env *env,
7128                             struct bpf_insn *insn)
7129 {
7130         struct bpf_insn_aux_data *aux = cur_aux(env);
7131
7132         if (can_skip_alu_sanitation(env, insn))
7133                 return 0;
7134
7135         return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
7136 }
7137
7138 static bool sanitize_needed(u8 opcode)
7139 {
7140         return opcode == BPF_ADD || opcode == BPF_SUB;
7141 }
7142
7143 struct bpf_sanitize_info {
7144         struct bpf_insn_aux_data aux;
7145         bool mask_to_left;
7146 };
7147
7148 static struct bpf_verifier_state *
7149 sanitize_speculative_path(struct bpf_verifier_env *env,
7150                           const struct bpf_insn *insn,
7151                           u32 next_idx, u32 curr_idx)
7152 {
7153         struct bpf_verifier_state *branch;
7154         struct bpf_reg_state *regs;
7155
7156         branch = push_stack(env, next_idx, curr_idx, true);
7157         if (branch && insn) {
7158                 regs = branch->frame[branch->curframe]->regs;
7159                 if (BPF_SRC(insn->code) == BPF_K) {
7160                         mark_reg_unknown(env, regs, insn->dst_reg);
7161                 } else if (BPF_SRC(insn->code) == BPF_X) {
7162                         mark_reg_unknown(env, regs, insn->dst_reg);
7163                         mark_reg_unknown(env, regs, insn->src_reg);
7164                 }
7165         }
7166         return branch;
7167 }
7168
7169 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
7170                             struct bpf_insn *insn,
7171                             const struct bpf_reg_state *ptr_reg,
7172                             const struct bpf_reg_state *off_reg,
7173                             struct bpf_reg_state *dst_reg,
7174                             struct bpf_sanitize_info *info,
7175                             const bool commit_window)
7176 {
7177         struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
7178         struct bpf_verifier_state *vstate = env->cur_state;
7179         bool off_is_imm = tnum_is_const(off_reg->var_off);
7180         bool off_is_neg = off_reg->smin_value < 0;
7181         bool ptr_is_dst_reg = ptr_reg == dst_reg;
7182         u8 opcode = BPF_OP(insn->code);
7183         u32 alu_state, alu_limit;
7184         struct bpf_reg_state tmp;
7185         bool ret;
7186         int err;
7187
7188         if (can_skip_alu_sanitation(env, insn))
7189                 return 0;
7190
7191         /* We already marked aux for masking from non-speculative
7192          * paths, thus we got here in the first place. We only care
7193          * to explore bad access from here.
7194          */
7195         if (vstate->speculative)
7196                 goto do_sim;
7197
7198         if (!commit_window) {
7199                 if (!tnum_is_const(off_reg->var_off) &&
7200                     (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
7201                         return REASON_BOUNDS;
7202
7203                 info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
7204                                      (opcode == BPF_SUB && !off_is_neg);
7205         }
7206
7207         err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
7208         if (err < 0)
7209                 return err;
7210
7211         if (commit_window) {
7212                 /* In commit phase we narrow the masking window based on
7213                  * the observed pointer move after the simulated operation.
7214                  */
7215                 alu_state = info->aux.alu_state;
7216                 alu_limit = abs(info->aux.alu_limit - alu_limit);
7217         } else {
7218                 alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
7219                 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
7220                 alu_state |= ptr_is_dst_reg ?
7221                              BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
7222
7223                 /* Limit pruning on unknown scalars to enable deep search for
7224                  * potential masking differences from other program paths.
7225                  */
7226                 if (!off_is_imm)
7227                         env->explore_alu_limits = true;
7228         }
7229
7230         err = update_alu_sanitation_state(aux, alu_state, alu_limit);
7231         if (err < 0)
7232                 return err;
7233 do_sim:
7234         /* If we're in commit phase, we're done here given we already
7235          * pushed the truncated dst_reg into the speculative verification
7236          * stack.
7237          *
7238          * Also, when register is a known constant, we rewrite register-based
7239          * operation to immediate-based, and thus do not need masking (and as
7240          * a consequence, do not need to simulate the zero-truncation either).
7241          */
7242         if (commit_window || off_is_imm)
7243                 return 0;
7244
7245         /* Simulate and find potential out-of-bounds access under
7246          * speculative execution from truncation as a result of
7247          * masking when off was not within expected range. If off
7248          * sits in dst, then we temporarily need to move ptr there
7249          * to simulate dst (== 0) +/-= ptr. Needed, for example,
7250          * for cases where we use K-based arithmetic in one direction
7251          * and truncated reg-based in the other in order to explore
7252          * bad access.
7253          */
7254         if (!ptr_is_dst_reg) {
7255                 tmp = *dst_reg;
7256                 *dst_reg = *ptr_reg;
7257         }
7258         ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
7259                                         env->insn_idx);
7260         if (!ptr_is_dst_reg && ret)
7261                 *dst_reg = tmp;
7262         return !ret ? REASON_STACK : 0;
7263 }
7264
7265 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
7266 {
7267         struct bpf_verifier_state *vstate = env->cur_state;
7268
7269         /* If we simulate paths under speculation, we don't update the
7270          * insn as 'seen' such that when we verify unreachable paths in
7271          * the non-speculative domain, sanitize_dead_code() can still
7272          * rewrite/sanitize them.
7273          */
7274         if (!vstate->speculative)
7275                 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
7276 }
7277
7278 static int sanitize_err(struct bpf_verifier_env *env,
7279                         const struct bpf_insn *insn, int reason,
7280                         const struct bpf_reg_state *off_reg,
7281                         const struct bpf_reg_state *dst_reg)
7282 {
7283         static const char *err = "pointer arithmetic with it prohibited for !root";
7284         const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
7285         u32 dst = insn->dst_reg, src = insn->src_reg;
7286
7287         switch (reason) {
7288         case REASON_BOUNDS:
7289                 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
7290                         off_reg == dst_reg ? dst : src, err);
7291                 break;
7292         case REASON_TYPE:
7293                 verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
7294                         off_reg == dst_reg ? src : dst, err);
7295                 break;
7296         case REASON_PATHS:
7297                 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
7298                         dst, op, err);
7299                 break;
7300         case REASON_LIMIT:
7301                 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
7302                         dst, op, err);
7303                 break;
7304         case REASON_STACK:
7305                 verbose(env, "R%d could not be pushed for speculative verification, %s\n",
7306                         dst, err);
7307                 break;
7308         default:
7309                 verbose(env, "verifier internal error: unknown reason (%d)\n",
7310                         reason);
7311                 break;
7312         }
7313
7314         return -EACCES;
7315 }
7316
7317 /* check that stack access falls within stack limits and that 'reg' doesn't
7318  * have a variable offset.
7319  *
7320  * Variable offset is prohibited for unprivileged mode for simplicity since it
7321  * requires corresponding support in Spectre masking for stack ALU.  See also
7322  * retrieve_ptr_limit().
7323  *
7324  *
7325  * 'off' includes 'reg->off'.
7326  */
7327 static int check_stack_access_for_ptr_arithmetic(
7328                                 struct bpf_verifier_env *env,
7329                                 int regno,
7330                                 const struct bpf_reg_state *reg,
7331                                 int off)
7332 {
7333         if (!tnum_is_const(reg->var_off)) {
7334                 char tn_buf[48];
7335
7336                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7337                 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
7338                         regno, tn_buf, off);
7339                 return -EACCES;
7340         }
7341
7342         if (off >= 0 || off < -MAX_BPF_STACK) {
7343                 verbose(env, "R%d stack pointer arithmetic goes out of range, "
7344                         "prohibited for !root; off=%d\n", regno, off);
7345                 return -EACCES;
7346         }
7347
7348         return 0;
7349 }
7350
7351 static int sanitize_check_bounds(struct bpf_verifier_env *env,
7352                                  const struct bpf_insn *insn,
7353                                  const struct bpf_reg_state *dst_reg)
7354 {
7355         u32 dst = insn->dst_reg;
7356
7357         /* For unprivileged we require that resulting offset must be in bounds
7358          * in order to be able to sanitize access later on.
7359          */
7360         if (env->bypass_spec_v1)
7361                 return 0;
7362
7363         switch (dst_reg->type) {
7364         case PTR_TO_STACK:
7365                 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
7366                                         dst_reg->off + dst_reg->var_off.value))
7367                         return -EACCES;
7368                 break;
7369         case PTR_TO_MAP_VALUE:
7370                 if (check_map_access(env, dst, dst_reg->off, 1, false)) {
7371                         verbose(env, "R%d pointer arithmetic of map value goes out of range, "
7372                                 "prohibited for !root\n", dst);
7373                         return -EACCES;
7374                 }
7375                 break;
7376         default:
7377                 break;
7378         }
7379
7380         return 0;
7381 }
7382
7383 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
7384  * Caller should also handle BPF_MOV case separately.
7385  * If we return -EACCES, caller may want to try again treating pointer as a
7386  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
7387  */
7388 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
7389                                    struct bpf_insn *insn,
7390                                    const struct bpf_reg_state *ptr_reg,
7391                                    const struct bpf_reg_state *off_reg)
7392 {
7393         struct bpf_verifier_state *vstate = env->cur_state;
7394         struct bpf_func_state *state = vstate->frame[vstate->curframe];
7395         struct bpf_reg_state *regs = state->regs, *dst_reg;
7396         bool known = tnum_is_const(off_reg->var_off);
7397         s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
7398             smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
7399         u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
7400             umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
7401         struct bpf_sanitize_info info = {};
7402         u8 opcode = BPF_OP(insn->code);
7403         u32 dst = insn->dst_reg;
7404         int ret;
7405
7406         dst_reg = &regs[dst];
7407
7408         if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
7409             smin_val > smax_val || umin_val > umax_val) {
7410                 /* Taint dst register if offset had invalid bounds derived from
7411                  * e.g. dead branches.
7412                  */
7413                 __mark_reg_unknown(env, dst_reg);
7414                 return 0;
7415         }
7416
7417         if (BPF_CLASS(insn->code) != BPF_ALU64) {
7418                 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
7419                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
7420                         __mark_reg_unknown(env, dst_reg);
7421                         return 0;
7422                 }
7423
7424                 verbose(env,
7425                         "R%d 32-bit pointer arithmetic prohibited\n",
7426                         dst);
7427                 return -EACCES;
7428         }
7429
7430         if (ptr_reg->type & PTR_MAYBE_NULL) {
7431                 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
7432                         dst, reg_type_str(env, ptr_reg->type));
7433                 return -EACCES;
7434         }
7435
7436         switch (base_type(ptr_reg->type)) {
7437         case CONST_PTR_TO_MAP:
7438                 /* smin_val represents the known value */
7439                 if (known && smin_val == 0 && opcode == BPF_ADD)
7440                         break;
7441                 fallthrough;
7442         case PTR_TO_PACKET_END:
7443         case PTR_TO_SOCKET:
7444         case PTR_TO_SOCK_COMMON:
7445         case PTR_TO_TCP_SOCK:
7446         case PTR_TO_XDP_SOCK:
7447                 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
7448                         dst, reg_type_str(env, ptr_reg->type));
7449                 return -EACCES;
7450         default:
7451                 break;
7452         }
7453
7454         /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
7455          * The id may be overwritten later if we create a new variable offset.
7456          */
7457         dst_reg->type = ptr_reg->type;
7458         dst_reg->id = ptr_reg->id;
7459
7460         if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
7461             !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
7462                 return -EINVAL;
7463
7464         /* pointer types do not carry 32-bit bounds at the moment. */
7465         __mark_reg32_unbounded(dst_reg);
7466
7467         if (sanitize_needed(opcode)) {
7468                 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
7469                                        &info, false);
7470                 if (ret < 0)
7471                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
7472         }
7473
7474         switch (opcode) {
7475         case BPF_ADD:
7476                 /* We can take a fixed offset as long as it doesn't overflow
7477                  * the s32 'off' field
7478                  */
7479                 if (known && (ptr_reg->off + smin_val ==
7480                               (s64)(s32)(ptr_reg->off + smin_val))) {
7481                         /* pointer += K.  Accumulate it into fixed offset */
7482                         dst_reg->smin_value = smin_ptr;
7483                         dst_reg->smax_value = smax_ptr;
7484                         dst_reg->umin_value = umin_ptr;
7485                         dst_reg->umax_value = umax_ptr;
7486                         dst_reg->var_off = ptr_reg->var_off;
7487                         dst_reg->off = ptr_reg->off + smin_val;
7488                         dst_reg->raw = ptr_reg->raw;
7489                         break;
7490                 }
7491                 /* A new variable offset is created.  Note that off_reg->off
7492                  * == 0, since it's a scalar.
7493                  * dst_reg gets the pointer type and since some positive
7494                  * integer value was added to the pointer, give it a new 'id'
7495                  * if it's a PTR_TO_PACKET.
7496                  * this creates a new 'base' pointer, off_reg (variable) gets
7497                  * added into the variable offset, and we copy the fixed offset
7498                  * from ptr_reg.
7499                  */
7500                 if (signed_add_overflows(smin_ptr, smin_val) ||
7501                     signed_add_overflows(smax_ptr, smax_val)) {
7502                         dst_reg->smin_value = S64_MIN;
7503                         dst_reg->smax_value = S64_MAX;
7504                 } else {
7505                         dst_reg->smin_value = smin_ptr + smin_val;
7506                         dst_reg->smax_value = smax_ptr + smax_val;
7507                 }
7508                 if (umin_ptr + umin_val < umin_ptr ||
7509                     umax_ptr + umax_val < umax_ptr) {
7510                         dst_reg->umin_value = 0;
7511                         dst_reg->umax_value = U64_MAX;
7512                 } else {
7513                         dst_reg->umin_value = umin_ptr + umin_val;
7514                         dst_reg->umax_value = umax_ptr + umax_val;
7515                 }
7516                 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
7517                 dst_reg->off = ptr_reg->off;
7518                 dst_reg->raw = ptr_reg->raw;
7519                 if (reg_is_pkt_pointer(ptr_reg)) {
7520                         dst_reg->id = ++env->id_gen;
7521                         /* something was added to pkt_ptr, set range to zero */
7522                         memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
7523                 }
7524                 break;
7525         case BPF_SUB:
7526                 if (dst_reg == off_reg) {
7527                         /* scalar -= pointer.  Creates an unknown scalar */
7528                         verbose(env, "R%d tried to subtract pointer from scalar\n",
7529                                 dst);
7530                         return -EACCES;
7531                 }
7532                 /* We don't allow subtraction from FP, because (according to
7533                  * test_verifier.c test "invalid fp arithmetic", JITs might not
7534                  * be able to deal with it.
7535                  */
7536                 if (ptr_reg->type == PTR_TO_STACK) {
7537                         verbose(env, "R%d subtraction from stack pointer prohibited\n",
7538                                 dst);
7539                         return -EACCES;
7540                 }
7541                 if (known && (ptr_reg->off - smin_val ==
7542                               (s64)(s32)(ptr_reg->off - smin_val))) {
7543                         /* pointer -= K.  Subtract it from fixed offset */
7544                         dst_reg->smin_value = smin_ptr;
7545                         dst_reg->smax_value = smax_ptr;
7546                         dst_reg->umin_value = umin_ptr;
7547                         dst_reg->umax_value = umax_ptr;
7548                         dst_reg->var_off = ptr_reg->var_off;
7549                         dst_reg->id = ptr_reg->id;
7550                         dst_reg->off = ptr_reg->off - smin_val;
7551                         dst_reg->raw = ptr_reg->raw;
7552                         break;
7553                 }
7554                 /* A new variable offset is created.  If the subtrahend is known
7555                  * nonnegative, then any reg->range we had before is still good.
7556                  */
7557                 if (signed_sub_overflows(smin_ptr, smax_val) ||
7558                     signed_sub_overflows(smax_ptr, smin_val)) {
7559                         /* Overflow possible, we know nothing */
7560                         dst_reg->smin_value = S64_MIN;
7561                         dst_reg->smax_value = S64_MAX;
7562                 } else {
7563                         dst_reg->smin_value = smin_ptr - smax_val;
7564                         dst_reg->smax_value = smax_ptr - smin_val;
7565                 }
7566                 if (umin_ptr < umax_val) {
7567                         /* Overflow possible, we know nothing */
7568                         dst_reg->umin_value = 0;
7569                         dst_reg->umax_value = U64_MAX;
7570                 } else {
7571                         /* Cannot overflow (as long as bounds are consistent) */
7572                         dst_reg->umin_value = umin_ptr - umax_val;
7573                         dst_reg->umax_value = umax_ptr - umin_val;
7574                 }
7575                 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
7576                 dst_reg->off = ptr_reg->off;
7577                 dst_reg->raw = ptr_reg->raw;
7578                 if (reg_is_pkt_pointer(ptr_reg)) {
7579                         dst_reg->id = ++env->id_gen;
7580                         /* something was added to pkt_ptr, set range to zero */
7581                         if (smin_val < 0)
7582                                 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
7583                 }
7584                 break;
7585         case BPF_AND:
7586         case BPF_OR:
7587         case BPF_XOR:
7588                 /* bitwise ops on pointers are troublesome, prohibit. */
7589                 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
7590                         dst, bpf_alu_string[opcode >> 4]);
7591                 return -EACCES;
7592         default:
7593                 /* other operators (e.g. MUL,LSH) produce non-pointer results */
7594                 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
7595                         dst, bpf_alu_string[opcode >> 4]);
7596                 return -EACCES;
7597         }
7598
7599         if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
7600                 return -EINVAL;
7601
7602         __update_reg_bounds(dst_reg);
7603         __reg_deduce_bounds(dst_reg);
7604         __reg_bound_offset(dst_reg);
7605
7606         if (sanitize_check_bounds(env, insn, dst_reg) < 0)
7607                 return -EACCES;
7608         if (sanitize_needed(opcode)) {
7609                 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
7610                                        &info, true);
7611                 if (ret < 0)
7612                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
7613         }
7614
7615         return 0;
7616 }
7617
7618 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
7619                                  struct bpf_reg_state *src_reg)
7620 {
7621         s32 smin_val = src_reg->s32_min_value;
7622         s32 smax_val = src_reg->s32_max_value;
7623         u32 umin_val = src_reg->u32_min_value;
7624         u32 umax_val = src_reg->u32_max_value;
7625
7626         if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
7627             signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
7628                 dst_reg->s32_min_value = S32_MIN;
7629                 dst_reg->s32_max_value = S32_MAX;
7630         } else {
7631                 dst_reg->s32_min_value += smin_val;
7632                 dst_reg->s32_max_value += smax_val;
7633         }
7634         if (dst_reg->u32_min_value + umin_val < umin_val ||
7635             dst_reg->u32_max_value + umax_val < umax_val) {
7636                 dst_reg->u32_min_value = 0;
7637                 dst_reg->u32_max_value = U32_MAX;
7638         } else {
7639                 dst_reg->u32_min_value += umin_val;
7640                 dst_reg->u32_max_value += umax_val;
7641         }
7642 }
7643
7644 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
7645                                struct bpf_reg_state *src_reg)
7646 {
7647         s64 smin_val = src_reg->smin_value;
7648         s64 smax_val = src_reg->smax_value;
7649         u64 umin_val = src_reg->umin_value;
7650         u64 umax_val = src_reg->umax_value;
7651
7652         if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
7653             signed_add_overflows(dst_reg->smax_value, smax_val)) {
7654                 dst_reg->smin_value = S64_MIN;
7655                 dst_reg->smax_value = S64_MAX;
7656         } else {
7657                 dst_reg->smin_value += smin_val;
7658                 dst_reg->smax_value += smax_val;
7659         }
7660         if (dst_reg->umin_value + umin_val < umin_val ||
7661             dst_reg->umax_value + umax_val < umax_val) {
7662                 dst_reg->umin_value = 0;
7663                 dst_reg->umax_value = U64_MAX;
7664         } else {
7665                 dst_reg->umin_value += umin_val;
7666                 dst_reg->umax_value += umax_val;
7667         }
7668 }
7669
7670 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
7671                                  struct bpf_reg_state *src_reg)
7672 {
7673         s32 smin_val = src_reg->s32_min_value;
7674         s32 smax_val = src_reg->s32_max_value;
7675         u32 umin_val = src_reg->u32_min_value;
7676         u32 umax_val = src_reg->u32_max_value;
7677
7678         if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
7679             signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
7680                 /* Overflow possible, we know nothing */
7681                 dst_reg->s32_min_value = S32_MIN;
7682                 dst_reg->s32_max_value = S32_MAX;
7683         } else {
7684                 dst_reg->s32_min_value -= smax_val;
7685                 dst_reg->s32_max_value -= smin_val;
7686         }
7687         if (dst_reg->u32_min_value < umax_val) {
7688                 /* Overflow possible, we know nothing */
7689                 dst_reg->u32_min_value = 0;
7690                 dst_reg->u32_max_value = U32_MAX;
7691         } else {
7692                 /* Cannot overflow (as long as bounds are consistent) */
7693                 dst_reg->u32_min_value -= umax_val;
7694                 dst_reg->u32_max_value -= umin_val;
7695         }
7696 }
7697
7698 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
7699                                struct bpf_reg_state *src_reg)
7700 {
7701         s64 smin_val = src_reg->smin_value;
7702         s64 smax_val = src_reg->smax_value;
7703         u64 umin_val = src_reg->umin_value;
7704         u64 umax_val = src_reg->umax_value;
7705
7706         if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
7707             signed_sub_overflows(dst_reg->smax_value, smin_val)) {
7708                 /* Overflow possible, we know nothing */
7709                 dst_reg->smin_value = S64_MIN;
7710                 dst_reg->smax_value = S64_MAX;
7711         } else {
7712                 dst_reg->smin_value -= smax_val;
7713                 dst_reg->smax_value -= smin_val;
7714         }
7715         if (dst_reg->umin_value < umax_val) {
7716                 /* Overflow possible, we know nothing */
7717                 dst_reg->umin_value = 0;
7718                 dst_reg->umax_value = U64_MAX;
7719         } else {
7720                 /* Cannot overflow (as long as bounds are consistent) */
7721                 dst_reg->umin_value -= umax_val;
7722                 dst_reg->umax_value -= umin_val;
7723         }
7724 }
7725
7726 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
7727                                  struct bpf_reg_state *src_reg)
7728 {
7729         s32 smin_val = src_reg->s32_min_value;
7730         u32 umin_val = src_reg->u32_min_value;
7731         u32 umax_val = src_reg->u32_max_value;
7732
7733         if (smin_val < 0 || dst_reg->s32_min_value < 0) {
7734                 /* Ain't nobody got time to multiply that sign */
7735                 __mark_reg32_unbounded(dst_reg);
7736                 return;
7737         }
7738         /* Both values are positive, so we can work with unsigned and
7739          * copy the result to signed (unless it exceeds S32_MAX).
7740          */
7741         if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
7742                 /* Potential overflow, we know nothing */
7743                 __mark_reg32_unbounded(dst_reg);
7744                 return;
7745         }
7746         dst_reg->u32_min_value *= umin_val;
7747         dst_reg->u32_max_value *= umax_val;
7748         if (dst_reg->u32_max_value > S32_MAX) {
7749                 /* Overflow possible, we know nothing */
7750                 dst_reg->s32_min_value = S32_MIN;
7751                 dst_reg->s32_max_value = S32_MAX;
7752         } else {
7753                 dst_reg->s32_min_value = dst_reg->u32_min_value;
7754                 dst_reg->s32_max_value = dst_reg->u32_max_value;
7755         }
7756 }
7757
7758 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
7759                                struct bpf_reg_state *src_reg)
7760 {
7761         s64 smin_val = src_reg->smin_value;
7762         u64 umin_val = src_reg->umin_value;
7763         u64 umax_val = src_reg->umax_value;
7764
7765         if (smin_val < 0 || dst_reg->smin_value < 0) {
7766                 /* Ain't nobody got time to multiply that sign */
7767                 __mark_reg64_unbounded(dst_reg);
7768                 return;
7769         }
7770         /* Both values are positive, so we can work with unsigned and
7771          * copy the result to signed (unless it exceeds S64_MAX).
7772          */
7773         if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
7774                 /* Potential overflow, we know nothing */
7775                 __mark_reg64_unbounded(dst_reg);
7776                 return;
7777         }
7778         dst_reg->umin_value *= umin_val;
7779         dst_reg->umax_value *= umax_val;
7780         if (dst_reg->umax_value > S64_MAX) {
7781                 /* Overflow possible, we know nothing */
7782                 dst_reg->smin_value = S64_MIN;
7783                 dst_reg->smax_value = S64_MAX;
7784         } else {
7785                 dst_reg->smin_value = dst_reg->umin_value;
7786                 dst_reg->smax_value = dst_reg->umax_value;
7787         }
7788 }
7789
7790 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
7791                                  struct bpf_reg_state *src_reg)
7792 {
7793         bool src_known = tnum_subreg_is_const(src_reg->var_off);
7794         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7795         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7796         s32 smin_val = src_reg->s32_min_value;
7797         u32 umax_val = src_reg->u32_max_value;
7798
7799         if (src_known && dst_known) {
7800                 __mark_reg32_known(dst_reg, var32_off.value);
7801                 return;
7802         }
7803
7804         /* We get our minimum from the var_off, since that's inherently
7805          * bitwise.  Our maximum is the minimum of the operands' maxima.
7806          */
7807         dst_reg->u32_min_value = var32_off.value;
7808         dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
7809         if (dst_reg->s32_min_value < 0 || smin_val < 0) {
7810                 /* Lose signed bounds when ANDing negative numbers,
7811                  * ain't nobody got time for that.
7812                  */
7813                 dst_reg->s32_min_value = S32_MIN;
7814                 dst_reg->s32_max_value = S32_MAX;
7815         } else {
7816                 /* ANDing two positives gives a positive, so safe to
7817                  * cast result into s64.
7818                  */
7819                 dst_reg->s32_min_value = dst_reg->u32_min_value;
7820                 dst_reg->s32_max_value = dst_reg->u32_max_value;
7821         }
7822 }
7823
7824 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
7825                                struct bpf_reg_state *src_reg)
7826 {
7827         bool src_known = tnum_is_const(src_reg->var_off);
7828         bool dst_known = tnum_is_const(dst_reg->var_off);
7829         s64 smin_val = src_reg->smin_value;
7830         u64 umax_val = src_reg->umax_value;
7831
7832         if (src_known && dst_known) {
7833                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
7834                 return;
7835         }
7836
7837         /* We get our minimum from the var_off, since that's inherently
7838          * bitwise.  Our maximum is the minimum of the operands' maxima.
7839          */
7840         dst_reg->umin_value = dst_reg->var_off.value;
7841         dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
7842         if (dst_reg->smin_value < 0 || smin_val < 0) {
7843                 /* Lose signed bounds when ANDing negative numbers,
7844                  * ain't nobody got time for that.
7845                  */
7846                 dst_reg->smin_value = S64_MIN;
7847                 dst_reg->smax_value = S64_MAX;
7848         } else {
7849                 /* ANDing two positives gives a positive, so safe to
7850                  * cast result into s64.
7851                  */
7852                 dst_reg->smin_value = dst_reg->umin_value;
7853                 dst_reg->smax_value = dst_reg->umax_value;
7854         }
7855         /* We may learn something more from the var_off */
7856         __update_reg_bounds(dst_reg);
7857 }
7858
7859 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
7860                                 struct bpf_reg_state *src_reg)
7861 {
7862         bool src_known = tnum_subreg_is_const(src_reg->var_off);
7863         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7864         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7865         s32 smin_val = src_reg->s32_min_value;
7866         u32 umin_val = src_reg->u32_min_value;
7867
7868         if (src_known && dst_known) {
7869                 __mark_reg32_known(dst_reg, var32_off.value);
7870                 return;
7871         }
7872
7873         /* We get our maximum from the var_off, and our minimum is the
7874          * maximum of the operands' minima
7875          */
7876         dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
7877         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
7878         if (dst_reg->s32_min_value < 0 || smin_val < 0) {
7879                 /* Lose signed bounds when ORing negative numbers,
7880                  * ain't nobody got time for that.
7881                  */
7882                 dst_reg->s32_min_value = S32_MIN;
7883                 dst_reg->s32_max_value = S32_MAX;
7884         } else {
7885                 /* ORing two positives gives a positive, so safe to
7886                  * cast result into s64.
7887                  */
7888                 dst_reg->s32_min_value = dst_reg->u32_min_value;
7889                 dst_reg->s32_max_value = dst_reg->u32_max_value;
7890         }
7891 }
7892
7893 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
7894                               struct bpf_reg_state *src_reg)
7895 {
7896         bool src_known = tnum_is_const(src_reg->var_off);
7897         bool dst_known = tnum_is_const(dst_reg->var_off);
7898         s64 smin_val = src_reg->smin_value;
7899         u64 umin_val = src_reg->umin_value;
7900
7901         if (src_known && dst_known) {
7902                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
7903                 return;
7904         }
7905
7906         /* We get our maximum from the var_off, and our minimum is the
7907          * maximum of the operands' minima
7908          */
7909         dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
7910         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
7911         if (dst_reg->smin_value < 0 || smin_val < 0) {
7912                 /* Lose signed bounds when ORing negative numbers,
7913                  * ain't nobody got time for that.
7914                  */
7915                 dst_reg->smin_value = S64_MIN;
7916                 dst_reg->smax_value = S64_MAX;
7917         } else {
7918                 /* ORing two positives gives a positive, so safe to
7919                  * cast result into s64.
7920                  */
7921                 dst_reg->smin_value = dst_reg->umin_value;
7922                 dst_reg->smax_value = dst_reg->umax_value;
7923         }
7924         /* We may learn something more from the var_off */
7925         __update_reg_bounds(dst_reg);
7926 }
7927
7928 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
7929                                  struct bpf_reg_state *src_reg)
7930 {
7931         bool src_known = tnum_subreg_is_const(src_reg->var_off);
7932         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7933         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7934         s32 smin_val = src_reg->s32_min_value;
7935
7936         if (src_known && dst_known) {
7937                 __mark_reg32_known(dst_reg, var32_off.value);
7938                 return;
7939         }
7940
7941         /* We get both minimum and maximum from the var32_off. */
7942         dst_reg->u32_min_value = var32_off.value;
7943         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
7944
7945         if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
7946                 /* XORing two positive sign numbers gives a positive,
7947                  * so safe to cast u32 result into s32.
7948                  */
7949                 dst_reg->s32_min_value = dst_reg->u32_min_value;
7950                 dst_reg->s32_max_value = dst_reg->u32_max_value;
7951         } else {
7952                 dst_reg->s32_min_value = S32_MIN;
7953                 dst_reg->s32_max_value = S32_MAX;
7954         }
7955 }
7956
7957 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
7958                                struct bpf_reg_state *src_reg)
7959 {
7960         bool src_known = tnum_is_const(src_reg->var_off);
7961         bool dst_known = tnum_is_const(dst_reg->var_off);
7962         s64 smin_val = src_reg->smin_value;
7963
7964         if (src_known && dst_known) {
7965                 /* dst_reg->var_off.value has been updated earlier */
7966                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
7967                 return;
7968         }
7969
7970         /* We get both minimum and maximum from the var_off. */
7971         dst_reg->umin_value = dst_reg->var_off.value;
7972         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
7973
7974         if (dst_reg->smin_value >= 0 && smin_val >= 0) {
7975                 /* XORing two positive sign numbers gives a positive,
7976                  * so safe to cast u64 result into s64.
7977                  */
7978                 dst_reg->smin_value = dst_reg->umin_value;
7979                 dst_reg->smax_value = dst_reg->umax_value;
7980         } else {
7981                 dst_reg->smin_value = S64_MIN;
7982                 dst_reg->smax_value = S64_MAX;
7983         }
7984
7985         __update_reg_bounds(dst_reg);
7986 }
7987
7988 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
7989                                    u64 umin_val, u64 umax_val)
7990 {
7991         /* We lose all sign bit information (except what we can pick
7992          * up from var_off)
7993          */
7994         dst_reg->s32_min_value = S32_MIN;
7995         dst_reg->s32_max_value = S32_MAX;
7996         /* If we might shift our top bit out, then we know nothing */
7997         if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
7998                 dst_reg->u32_min_value = 0;
7999                 dst_reg->u32_max_value = U32_MAX;
8000         } else {
8001                 dst_reg->u32_min_value <<= umin_val;
8002                 dst_reg->u32_max_value <<= umax_val;
8003         }
8004 }
8005
8006 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
8007                                  struct bpf_reg_state *src_reg)
8008 {
8009         u32 umax_val = src_reg->u32_max_value;
8010         u32 umin_val = src_reg->u32_min_value;
8011         /* u32 alu operation will zext upper bits */
8012         struct tnum subreg = tnum_subreg(dst_reg->var_off);
8013
8014         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
8015         dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
8016         /* Not required but being careful mark reg64 bounds as unknown so
8017          * that we are forced to pick them up from tnum and zext later and
8018          * if some path skips this step we are still safe.
8019          */
8020         __mark_reg64_unbounded(dst_reg);
8021         __update_reg32_bounds(dst_reg);
8022 }
8023
8024 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
8025                                    u64 umin_val, u64 umax_val)
8026 {
8027         /* Special case <<32 because it is a common compiler pattern to sign
8028          * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
8029          * positive we know this shift will also be positive so we can track
8030          * bounds correctly. Otherwise we lose all sign bit information except
8031          * what we can pick up from var_off. Perhaps we can generalize this
8032          * later to shifts of any length.
8033          */
8034         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
8035                 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
8036         else
8037                 dst_reg->smax_value = S64_MAX;
8038
8039         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
8040                 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
8041         else
8042                 dst_reg->smin_value = S64_MIN;
8043
8044         /* If we might shift our top bit out, then we know nothing */
8045         if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
8046                 dst_reg->umin_value = 0;
8047                 dst_reg->umax_value = U64_MAX;
8048         } else {
8049                 dst_reg->umin_value <<= umin_val;
8050                 dst_reg->umax_value <<= umax_val;
8051         }
8052 }
8053
8054 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
8055                                struct bpf_reg_state *src_reg)
8056 {
8057         u64 umax_val = src_reg->umax_value;
8058         u64 umin_val = src_reg->umin_value;
8059
8060         /* scalar64 calc uses 32bit unshifted bounds so must be called first */
8061         __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
8062         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
8063
8064         dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
8065         /* We may learn something more from the var_off */
8066         __update_reg_bounds(dst_reg);
8067 }
8068
8069 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
8070                                  struct bpf_reg_state *src_reg)
8071 {
8072         struct tnum subreg = tnum_subreg(dst_reg->var_off);
8073         u32 umax_val = src_reg->u32_max_value;
8074         u32 umin_val = src_reg->u32_min_value;
8075
8076         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
8077          * be negative, then either:
8078          * 1) src_reg might be zero, so the sign bit of the result is
8079          *    unknown, so we lose our signed bounds
8080          * 2) it's known negative, thus the unsigned bounds capture the
8081          *    signed bounds
8082          * 3) the signed bounds cross zero, so they tell us nothing
8083          *    about the result
8084          * If the value in dst_reg is known nonnegative, then again the
8085          * unsigned bounds capture the signed bounds.
8086          * Thus, in all cases it suffices to blow away our signed bounds
8087          * and rely on inferring new ones from the unsigned bounds and
8088          * var_off of the result.
8089          */
8090         dst_reg->s32_min_value = S32_MIN;
8091         dst_reg->s32_max_value = S32_MAX;
8092
8093         dst_reg->var_off = tnum_rshift(subreg, umin_val);
8094         dst_reg->u32_min_value >>= umax_val;
8095         dst_reg->u32_max_value >>= umin_val;
8096
8097         __mark_reg64_unbounded(dst_reg);
8098         __update_reg32_bounds(dst_reg);
8099 }
8100
8101 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
8102                                struct bpf_reg_state *src_reg)
8103 {
8104         u64 umax_val = src_reg->umax_value;
8105         u64 umin_val = src_reg->umin_value;
8106
8107         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
8108          * be negative, then either:
8109          * 1) src_reg might be zero, so the sign bit of the result is
8110          *    unknown, so we lose our signed bounds
8111          * 2) it's known negative, thus the unsigned bounds capture the
8112          *    signed bounds
8113          * 3) the signed bounds cross zero, so they tell us nothing
8114          *    about the result
8115          * If the value in dst_reg is known nonnegative, then again the
8116          * unsigned bounds capture the signed bounds.
8117          * Thus, in all cases it suffices to blow away our signed bounds
8118          * and rely on inferring new ones from the unsigned bounds and
8119          * var_off of the result.
8120          */
8121         dst_reg->smin_value = S64_MIN;
8122         dst_reg->smax_value = S64_MAX;
8123         dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
8124         dst_reg->umin_value >>= umax_val;
8125         dst_reg->umax_value >>= umin_val;
8126
8127         /* Its not easy to operate on alu32 bounds here because it depends
8128          * on bits being shifted in. Take easy way out and mark unbounded
8129          * so we can recalculate later from tnum.
8130          */
8131         __mark_reg32_unbounded(dst_reg);
8132         __update_reg_bounds(dst_reg);
8133 }
8134
8135 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
8136                                   struct bpf_reg_state *src_reg)
8137 {
8138         u64 umin_val = src_reg->u32_min_value;
8139
8140         /* Upon reaching here, src_known is true and
8141          * umax_val is equal to umin_val.
8142          */
8143         dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
8144         dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
8145
8146         dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
8147
8148         /* blow away the dst_reg umin_value/umax_value and rely on
8149          * dst_reg var_off to refine the result.
8150          */
8151         dst_reg->u32_min_value = 0;
8152         dst_reg->u32_max_value = U32_MAX;
8153
8154         __mark_reg64_unbounded(dst_reg);
8155         __update_reg32_bounds(dst_reg);
8156 }
8157
8158 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
8159                                 struct bpf_reg_state *src_reg)
8160 {
8161         u64 umin_val = src_reg->umin_value;
8162
8163         /* Upon reaching here, src_known is true and umax_val is equal
8164          * to umin_val.
8165          */
8166         dst_reg->smin_value >>= umin_val;
8167         dst_reg->smax_value >>= umin_val;
8168
8169         dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
8170
8171         /* blow away the dst_reg umin_value/umax_value and rely on
8172          * dst_reg var_off to refine the result.
8173          */
8174         dst_reg->umin_value = 0;
8175         dst_reg->umax_value = U64_MAX;
8176
8177         /* Its not easy to operate on alu32 bounds here because it depends
8178          * on bits being shifted in from upper 32-bits. Take easy way out
8179          * and mark unbounded so we can recalculate later from tnum.
8180          */
8181         __mark_reg32_unbounded(dst_reg);
8182         __update_reg_bounds(dst_reg);
8183 }
8184
8185 /* WARNING: This function does calculations on 64-bit values, but the actual
8186  * execution may occur on 32-bit values. Therefore, things like bitshifts
8187  * need extra checks in the 32-bit case.
8188  */
8189 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8190                                       struct bpf_insn *insn,
8191                                       struct bpf_reg_state *dst_reg,
8192                                       struct bpf_reg_state src_reg)
8193 {
8194         struct bpf_reg_state *regs = cur_regs(env);
8195         u8 opcode = BPF_OP(insn->code);
8196         bool src_known;
8197         s64 smin_val, smax_val;
8198         u64 umin_val, umax_val;
8199         s32 s32_min_val, s32_max_val;
8200         u32 u32_min_val, u32_max_val;
8201         u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
8202         bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
8203         int ret;
8204
8205         smin_val = src_reg.smin_value;
8206         smax_val = src_reg.smax_value;
8207         umin_val = src_reg.umin_value;
8208         umax_val = src_reg.umax_value;
8209
8210         s32_min_val = src_reg.s32_min_value;
8211         s32_max_val = src_reg.s32_max_value;
8212         u32_min_val = src_reg.u32_min_value;
8213         u32_max_val = src_reg.u32_max_value;
8214
8215         if (alu32) {
8216                 src_known = tnum_subreg_is_const(src_reg.var_off);
8217                 if ((src_known &&
8218                      (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
8219                     s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
8220                         /* Taint dst register if offset had invalid bounds
8221                          * derived from e.g. dead branches.
8222                          */
8223                         __mark_reg_unknown(env, dst_reg);
8224                         return 0;
8225                 }
8226         } else {
8227                 src_known = tnum_is_const(src_reg.var_off);
8228                 if ((src_known &&
8229                      (smin_val != smax_val || umin_val != umax_val)) ||
8230                     smin_val > smax_val || umin_val > umax_val) {
8231                         /* Taint dst register if offset had invalid bounds
8232                          * derived from e.g. dead branches.
8233                          */
8234                         __mark_reg_unknown(env, dst_reg);
8235                         return 0;
8236                 }
8237         }
8238
8239         if (!src_known &&
8240             opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
8241                 __mark_reg_unknown(env, dst_reg);
8242                 return 0;
8243         }
8244
8245         if (sanitize_needed(opcode)) {
8246                 ret = sanitize_val_alu(env, insn);
8247                 if (ret < 0)
8248                         return sanitize_err(env, insn, ret, NULL, NULL);
8249         }
8250
8251         /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
8252          * There are two classes of instructions: The first class we track both
8253          * alu32 and alu64 sign/unsigned bounds independently this provides the
8254          * greatest amount of precision when alu operations are mixed with jmp32
8255          * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
8256          * and BPF_OR. This is possible because these ops have fairly easy to
8257          * understand and calculate behavior in both 32-bit and 64-bit alu ops.
8258          * See alu32 verifier tests for examples. The second class of
8259          * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
8260          * with regards to tracking sign/unsigned bounds because the bits may
8261          * cross subreg boundaries in the alu64 case. When this happens we mark
8262          * the reg unbounded in the subreg bound space and use the resulting
8263          * tnum to calculate an approximation of the sign/unsigned bounds.
8264          */
8265         switch (opcode) {
8266         case BPF_ADD:
8267                 scalar32_min_max_add(dst_reg, &src_reg);
8268                 scalar_min_max_add(dst_reg, &src_reg);
8269                 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
8270                 break;
8271         case BPF_SUB:
8272                 scalar32_min_max_sub(dst_reg, &src_reg);
8273                 scalar_min_max_sub(dst_reg, &src_reg);
8274                 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
8275                 break;
8276         case BPF_MUL:
8277                 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
8278                 scalar32_min_max_mul(dst_reg, &src_reg);
8279                 scalar_min_max_mul(dst_reg, &src_reg);
8280                 break;
8281         case BPF_AND:
8282                 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
8283                 scalar32_min_max_and(dst_reg, &src_reg);
8284                 scalar_min_max_and(dst_reg, &src_reg);
8285                 break;
8286         case BPF_OR:
8287                 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
8288                 scalar32_min_max_or(dst_reg, &src_reg);
8289                 scalar_min_max_or(dst_reg, &src_reg);
8290                 break;
8291         case BPF_XOR:
8292                 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
8293                 scalar32_min_max_xor(dst_reg, &src_reg);
8294                 scalar_min_max_xor(dst_reg, &src_reg);
8295                 break;
8296         case BPF_LSH:
8297                 if (umax_val >= insn_bitness) {
8298                         /* Shifts greater than 31 or 63 are undefined.
8299                          * This includes shifts by a negative number.
8300                          */
8301                         mark_reg_unknown(env, regs, insn->dst_reg);
8302                         break;
8303                 }
8304                 if (alu32)
8305                         scalar32_min_max_lsh(dst_reg, &src_reg);
8306                 else
8307                         scalar_min_max_lsh(dst_reg, &src_reg);
8308                 break;
8309         case BPF_RSH:
8310                 if (umax_val >= insn_bitness) {
8311                         /* Shifts greater than 31 or 63 are undefined.
8312                          * This includes shifts by a negative number.
8313                          */
8314                         mark_reg_unknown(env, regs, insn->dst_reg);
8315                         break;
8316                 }
8317                 if (alu32)
8318                         scalar32_min_max_rsh(dst_reg, &src_reg);
8319                 else
8320                         scalar_min_max_rsh(dst_reg, &src_reg);
8321                 break;
8322         case BPF_ARSH:
8323                 if (umax_val >= insn_bitness) {
8324                         /* Shifts greater than 31 or 63 are undefined.
8325                          * This includes shifts by a negative number.
8326                          */
8327                         mark_reg_unknown(env, regs, insn->dst_reg);
8328                         break;
8329                 }
8330                 if (alu32)
8331                         scalar32_min_max_arsh(dst_reg, &src_reg);
8332                 else
8333                         scalar_min_max_arsh(dst_reg, &src_reg);
8334                 break;
8335         default:
8336                 mark_reg_unknown(env, regs, insn->dst_reg);
8337                 break;
8338         }
8339
8340         /* ALU32 ops are zero extended into 64bit register */
8341         if (alu32)
8342                 zext_32_to_64(dst_reg);
8343
8344         __update_reg_bounds(dst_reg);
8345         __reg_deduce_bounds(dst_reg);
8346         __reg_bound_offset(dst_reg);
8347         return 0;
8348 }
8349
8350 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
8351  * and var_off.
8352  */
8353 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
8354                                    struct bpf_insn *insn)
8355 {
8356         struct bpf_verifier_state *vstate = env->cur_state;
8357         struct bpf_func_state *state = vstate->frame[vstate->curframe];
8358         struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
8359         struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
8360         u8 opcode = BPF_OP(insn->code);
8361         int err;
8362
8363         dst_reg = &regs[insn->dst_reg];
8364         src_reg = NULL;
8365         if (dst_reg->type != SCALAR_VALUE)
8366                 ptr_reg = dst_reg;
8367         else
8368                 /* Make sure ID is cleared otherwise dst_reg min/max could be
8369                  * incorrectly propagated into other registers by find_equal_scalars()
8370                  */
8371                 dst_reg->id = 0;
8372         if (BPF_SRC(insn->code) == BPF_X) {
8373                 src_reg = &regs[insn->src_reg];
8374                 if (src_reg->type != SCALAR_VALUE) {
8375                         if (dst_reg->type != SCALAR_VALUE) {
8376                                 /* Combining two pointers by any ALU op yields
8377                                  * an arbitrary scalar. Disallow all math except
8378                                  * pointer subtraction
8379                                  */
8380                                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
8381                                         mark_reg_unknown(env, regs, insn->dst_reg);
8382                                         return 0;
8383                                 }
8384                                 verbose(env, "R%d pointer %s pointer prohibited\n",
8385                                         insn->dst_reg,
8386                                         bpf_alu_string[opcode >> 4]);
8387                                 return -EACCES;
8388                         } else {
8389                                 /* scalar += pointer
8390                                  * This is legal, but we have to reverse our
8391                                  * src/dest handling in computing the range
8392                                  */
8393                                 err = mark_chain_precision(env, insn->dst_reg);
8394                                 if (err)
8395                                         return err;
8396                                 return adjust_ptr_min_max_vals(env, insn,
8397                                                                src_reg, dst_reg);
8398                         }
8399                 } else if (ptr_reg) {
8400                         /* pointer += scalar */
8401                         err = mark_chain_precision(env, insn->src_reg);
8402                         if (err)
8403                                 return err;
8404                         return adjust_ptr_min_max_vals(env, insn,
8405                                                        dst_reg, src_reg);
8406                 }
8407         } else {
8408                 /* Pretend the src is a reg with a known value, since we only
8409                  * need to be able to read from this state.
8410                  */
8411                 off_reg.type = SCALAR_VALUE;
8412                 __mark_reg_known(&off_reg, insn->imm);
8413                 src_reg = &off_reg;
8414                 if (ptr_reg) /* pointer += K */
8415                         return adjust_ptr_min_max_vals(env, insn,
8416                                                        ptr_reg, src_reg);
8417         }
8418
8419         /* Got here implies adding two SCALAR_VALUEs */
8420         if (WARN_ON_ONCE(ptr_reg)) {
8421                 print_verifier_state(env, state, true);
8422                 verbose(env, "verifier internal error: unexpected ptr_reg\n");
8423                 return -EINVAL;
8424         }
8425         if (WARN_ON(!src_reg)) {
8426                 print_verifier_state(env, state, true);
8427                 verbose(env, "verifier internal error: no src_reg\n");
8428                 return -EINVAL;
8429         }
8430         return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
8431 }
8432
8433 /* check validity of 32-bit and 64-bit arithmetic operations */
8434 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
8435 {
8436         struct bpf_reg_state *regs = cur_regs(env);
8437         u8 opcode = BPF_OP(insn->code);
8438         int err;
8439
8440         if (opcode == BPF_END || opcode == BPF_NEG) {
8441                 if (opcode == BPF_NEG) {
8442                         if (BPF_SRC(insn->code) != 0 ||
8443                             insn->src_reg != BPF_REG_0 ||
8444                             insn->off != 0 || insn->imm != 0) {
8445                                 verbose(env, "BPF_NEG uses reserved fields\n");
8446                                 return -EINVAL;
8447                         }
8448                 } else {
8449                         if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
8450                             (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
8451                             BPF_CLASS(insn->code) == BPF_ALU64) {
8452                                 verbose(env, "BPF_END uses reserved fields\n");
8453                                 return -EINVAL;
8454                         }
8455                 }
8456
8457                 /* check src operand */
8458                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
8459                 if (err)
8460                         return err;
8461
8462                 if (is_pointer_value(env, insn->dst_reg)) {
8463                         verbose(env, "R%d pointer arithmetic prohibited\n",
8464                                 insn->dst_reg);
8465                         return -EACCES;
8466                 }
8467
8468                 /* check dest operand */
8469                 err = check_reg_arg(env, insn->dst_reg, DST_OP);
8470                 if (err)
8471                         return err;
8472
8473         } else if (opcode == BPF_MOV) {
8474
8475                 if (BPF_SRC(insn->code) == BPF_X) {
8476                         if (insn->imm != 0 || insn->off != 0) {
8477                                 verbose(env, "BPF_MOV uses reserved fields\n");
8478                                 return -EINVAL;
8479                         }
8480
8481                         /* check src operand */
8482                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
8483                         if (err)
8484                                 return err;
8485                 } else {
8486                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
8487                                 verbose(env, "BPF_MOV uses reserved fields\n");
8488                                 return -EINVAL;
8489                         }
8490                 }
8491
8492                 /* check dest operand, mark as required later */
8493                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
8494                 if (err)
8495                         return err;
8496
8497                 if (BPF_SRC(insn->code) == BPF_X) {
8498                         struct bpf_reg_state *src_reg = regs + insn->src_reg;
8499                         struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
8500
8501                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
8502                                 /* case: R1 = R2
8503                                  * copy register state to dest reg
8504                                  */
8505                                 if (src_reg->type == SCALAR_VALUE && !src_reg->id)
8506                                         /* Assign src and dst registers the same ID
8507                                          * that will be used by find_equal_scalars()
8508                                          * to propagate min/max range.
8509                                          */
8510                                         src_reg->id = ++env->id_gen;
8511                                 *dst_reg = *src_reg;
8512                                 dst_reg->live |= REG_LIVE_WRITTEN;
8513                                 dst_reg->subreg_def = DEF_NOT_SUBREG;
8514                         } else {
8515                                 /* R1 = (u32) R2 */
8516                                 if (is_pointer_value(env, insn->src_reg)) {
8517                                         verbose(env,
8518                                                 "R%d partial copy of pointer\n",
8519                                                 insn->src_reg);
8520                                         return -EACCES;
8521                                 } else if (src_reg->type == SCALAR_VALUE) {
8522                                         *dst_reg = *src_reg;
8523                                         /* Make sure ID is cleared otherwise
8524                                          * dst_reg min/max could be incorrectly
8525                                          * propagated into src_reg by find_equal_scalars()
8526                                          */
8527                                         dst_reg->id = 0;
8528                                         dst_reg->live |= REG_LIVE_WRITTEN;
8529                                         dst_reg->subreg_def = env->insn_idx + 1;
8530                                 } else {
8531                                         mark_reg_unknown(env, regs,
8532                                                          insn->dst_reg);
8533                                 }
8534                                 zext_32_to_64(dst_reg);
8535
8536                                 __update_reg_bounds(dst_reg);
8537                                 __reg_deduce_bounds(dst_reg);
8538                                 __reg_bound_offset(dst_reg);
8539                         }
8540                 } else {
8541                         /* case: R = imm
8542                          * remember the value we stored into this reg
8543                          */
8544                         /* clear any state __mark_reg_known doesn't set */
8545                         mark_reg_unknown(env, regs, insn->dst_reg);
8546                         regs[insn->dst_reg].type = SCALAR_VALUE;
8547                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
8548                                 __mark_reg_known(regs + insn->dst_reg,
8549                                                  insn->imm);
8550                         } else {
8551                                 __mark_reg_known(regs + insn->dst_reg,
8552                                                  (u32)insn->imm);
8553                         }
8554                 }
8555
8556         } else if (opcode > BPF_END) {
8557                 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
8558                 return -EINVAL;
8559
8560         } else {        /* all other ALU ops: and, sub, xor, add, ... */
8561
8562                 if (BPF_SRC(insn->code) == BPF_X) {
8563                         if (insn->imm != 0 || insn->off != 0) {
8564                                 verbose(env, "BPF_ALU uses reserved fields\n");
8565                                 return -EINVAL;
8566                         }
8567                         /* check src1 operand */
8568                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
8569                         if (err)
8570                                 return err;
8571                 } else {
8572                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
8573                                 verbose(env, "BPF_ALU uses reserved fields\n");
8574                                 return -EINVAL;
8575                         }
8576                 }
8577
8578                 /* check src2 operand */
8579                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
8580                 if (err)
8581                         return err;
8582
8583                 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
8584                     BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
8585                         verbose(env, "div by zero\n");
8586                         return -EINVAL;
8587                 }
8588
8589                 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
8590                      opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
8591                         int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
8592
8593                         if (insn->imm < 0 || insn->imm >= size) {
8594                                 verbose(env, "invalid shift %d\n", insn->imm);
8595                                 return -EINVAL;
8596                         }
8597                 }
8598
8599                 /* check dest operand */
8600                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
8601                 if (err)
8602                         return err;
8603
8604                 return adjust_reg_min_max_vals(env, insn);
8605         }
8606
8607         return 0;
8608 }
8609
8610 static void __find_good_pkt_pointers(struct bpf_func_state *state,
8611                                      struct bpf_reg_state *dst_reg,
8612                                      enum bpf_reg_type type, int new_range)
8613 {
8614         struct bpf_reg_state *reg;
8615         int i;
8616
8617         for (i = 0; i < MAX_BPF_REG; i++) {
8618                 reg = &state->regs[i];
8619                 if (reg->type == type && reg->id == dst_reg->id)
8620                         /* keep the maximum range already checked */
8621                         reg->range = max(reg->range, new_range);
8622         }
8623
8624         bpf_for_each_spilled_reg(i, state, reg) {
8625                 if (!reg)
8626                         continue;
8627                 if (reg->type == type && reg->id == dst_reg->id)
8628                         reg->range = max(reg->range, new_range);
8629         }
8630 }
8631
8632 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
8633                                    struct bpf_reg_state *dst_reg,
8634                                    enum bpf_reg_type type,
8635                                    bool range_right_open)
8636 {
8637         int new_range, i;
8638
8639         if (dst_reg->off < 0 ||
8640             (dst_reg->off == 0 && range_right_open))
8641                 /* This doesn't give us any range */
8642                 return;
8643
8644         if (dst_reg->umax_value > MAX_PACKET_OFF ||
8645             dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
8646                 /* Risk of overflow.  For instance, ptr + (1<<63) may be less
8647                  * than pkt_end, but that's because it's also less than pkt.
8648                  */
8649                 return;
8650
8651         new_range = dst_reg->off;
8652         if (range_right_open)
8653                 new_range++;
8654
8655         /* Examples for register markings:
8656          *
8657          * pkt_data in dst register:
8658          *
8659          *   r2 = r3;
8660          *   r2 += 8;
8661          *   if (r2 > pkt_end) goto <handle exception>
8662          *   <access okay>
8663          *
8664          *   r2 = r3;
8665          *   r2 += 8;
8666          *   if (r2 < pkt_end) goto <access okay>
8667          *   <handle exception>
8668          *
8669          *   Where:
8670          *     r2 == dst_reg, pkt_end == src_reg
8671          *     r2=pkt(id=n,off=8,r=0)
8672          *     r3=pkt(id=n,off=0,r=0)
8673          *
8674          * pkt_data in src register:
8675          *
8676          *   r2 = r3;
8677          *   r2 += 8;
8678          *   if (pkt_end >= r2) goto <access okay>
8679          *   <handle exception>
8680          *
8681          *   r2 = r3;
8682          *   r2 += 8;
8683          *   if (pkt_end <= r2) goto <handle exception>
8684          *   <access okay>
8685          *
8686          *   Where:
8687          *     pkt_end == dst_reg, r2 == src_reg
8688          *     r2=pkt(id=n,off=8,r=0)
8689          *     r3=pkt(id=n,off=0,r=0)
8690          *
8691          * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
8692          * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
8693          * and [r3, r3 + 8-1) respectively is safe to access depending on
8694          * the check.
8695          */
8696
8697         /* If our ids match, then we must have the same max_value.  And we
8698          * don't care about the other reg's fixed offset, since if it's too big
8699          * the range won't allow anything.
8700          * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
8701          */
8702         for (i = 0; i <= vstate->curframe; i++)
8703                 __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
8704                                          new_range);
8705 }
8706
8707 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
8708 {
8709         struct tnum subreg = tnum_subreg(reg->var_off);
8710         s32 sval = (s32)val;
8711
8712         switch (opcode) {
8713         case BPF_JEQ:
8714                 if (tnum_is_const(subreg))
8715                         return !!tnum_equals_const(subreg, val);
8716                 break;
8717         case BPF_JNE:
8718                 if (tnum_is_const(subreg))
8719                         return !tnum_equals_const(subreg, val);
8720                 break;
8721         case BPF_JSET:
8722                 if ((~subreg.mask & subreg.value) & val)
8723                         return 1;
8724                 if (!((subreg.mask | subreg.value) & val))
8725                         return 0;
8726                 break;
8727         case BPF_JGT:
8728                 if (reg->u32_min_value > val)
8729                         return 1;
8730                 else if (reg->u32_max_value <= val)
8731                         return 0;
8732                 break;
8733         case BPF_JSGT:
8734                 if (reg->s32_min_value > sval)
8735                         return 1;
8736                 else if (reg->s32_max_value <= sval)
8737                         return 0;
8738                 break;
8739         case BPF_JLT:
8740                 if (reg->u32_max_value < val)
8741                         return 1;
8742                 else if (reg->u32_min_value >= val)
8743                         return 0;
8744                 break;
8745         case BPF_JSLT:
8746                 if (reg->s32_max_value < sval)
8747                         return 1;
8748                 else if (reg->s32_min_value >= sval)
8749                         return 0;
8750                 break;
8751         case BPF_JGE:
8752                 if (reg->u32_min_value >= val)
8753                         return 1;
8754                 else if (reg->u32_max_value < val)
8755                         return 0;
8756                 break;
8757         case BPF_JSGE:
8758                 if (reg->s32_min_value >= sval)
8759                         return 1;
8760                 else if (reg->s32_max_value < sval)
8761                         return 0;
8762                 break;
8763         case BPF_JLE:
8764                 if (reg->u32_max_value <= val)
8765                         return 1;
8766                 else if (reg->u32_min_value > val)
8767                         return 0;
8768                 break;
8769         case BPF_JSLE:
8770                 if (reg->s32_max_value <= sval)
8771                         return 1;
8772                 else if (reg->s32_min_value > sval)
8773                         return 0;
8774                 break;
8775         }
8776
8777         return -1;
8778 }
8779
8780
8781 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
8782 {
8783         s64 sval = (s64)val;
8784
8785         switch (opcode) {
8786         case BPF_JEQ:
8787                 if (tnum_is_const(reg->var_off))
8788                         return !!tnum_equals_const(reg->var_off, val);
8789                 break;
8790         case BPF_JNE:
8791                 if (tnum_is_const(reg->var_off))
8792                         return !tnum_equals_const(reg->var_off, val);
8793                 break;
8794         case BPF_JSET:
8795                 if ((~reg->var_off.mask & reg->var_off.value) & val)
8796                         return 1;
8797                 if (!((reg->var_off.mask | reg->var_off.value) & val))
8798                         return 0;
8799                 break;
8800         case BPF_JGT:
8801                 if (reg->umin_value > val)
8802                         return 1;
8803                 else if (reg->umax_value <= val)
8804                         return 0;
8805                 break;
8806         case BPF_JSGT:
8807                 if (reg->smin_value > sval)
8808                         return 1;
8809                 else if (reg->smax_value <= sval)
8810                         return 0;
8811                 break;
8812         case BPF_JLT:
8813                 if (reg->umax_value < val)
8814                         return 1;
8815                 else if (reg->umin_value >= val)
8816                         return 0;
8817                 break;
8818         case BPF_JSLT:
8819                 if (reg->smax_value < sval)
8820                         return 1;
8821                 else if (reg->smin_value >= sval)
8822                         return 0;
8823                 break;
8824         case BPF_JGE:
8825                 if (reg->umin_value >= val)
8826                         return 1;
8827                 else if (reg->umax_value < val)
8828                         return 0;
8829                 break;
8830         case BPF_JSGE:
8831                 if (reg->smin_value >= sval)
8832                         return 1;
8833                 else if (reg->smax_value < sval)
8834                         return 0;
8835                 break;
8836         case BPF_JLE:
8837                 if (reg->umax_value <= val)
8838                         return 1;
8839                 else if (reg->umin_value > val)
8840                         return 0;
8841                 break;
8842         case BPF_JSLE:
8843                 if (reg->smax_value <= sval)
8844                         return 1;
8845                 else if (reg->smin_value > sval)
8846                         return 0;
8847                 break;
8848         }
8849
8850         return -1;
8851 }
8852
8853 /* compute branch direction of the expression "if (reg opcode val) goto target;"
8854  * and return:
8855  *  1 - branch will be taken and "goto target" will be executed
8856  *  0 - branch will not be taken and fall-through to next insn
8857  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
8858  *      range [0,10]
8859  */
8860 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
8861                            bool is_jmp32)
8862 {
8863         if (__is_pointer_value(false, reg)) {
8864                 if (!reg_type_not_null(reg->type))
8865                         return -1;
8866
8867                 /* If pointer is valid tests against zero will fail so we can
8868                  * use this to direct branch taken.
8869                  */
8870                 if (val != 0)
8871                         return -1;
8872
8873                 switch (opcode) {
8874                 case BPF_JEQ:
8875                         return 0;
8876                 case BPF_JNE:
8877                         return 1;
8878                 default:
8879                         return -1;
8880                 }
8881         }
8882
8883         if (is_jmp32)
8884                 return is_branch32_taken(reg, val, opcode);
8885         return is_branch64_taken(reg, val, opcode);
8886 }
8887
8888 static int flip_opcode(u32 opcode)
8889 {
8890         /* How can we transform "a <op> b" into "b <op> a"? */
8891         static const u8 opcode_flip[16] = {
8892                 /* these stay the same */
8893                 [BPF_JEQ  >> 4] = BPF_JEQ,
8894                 [BPF_JNE  >> 4] = BPF_JNE,
8895                 [BPF_JSET >> 4] = BPF_JSET,
8896                 /* these swap "lesser" and "greater" (L and G in the opcodes) */
8897                 [BPF_JGE  >> 4] = BPF_JLE,
8898                 [BPF_JGT  >> 4] = BPF_JLT,
8899                 [BPF_JLE  >> 4] = BPF_JGE,
8900                 [BPF_JLT  >> 4] = BPF_JGT,
8901                 [BPF_JSGE >> 4] = BPF_JSLE,
8902                 [BPF_JSGT >> 4] = BPF_JSLT,
8903                 [BPF_JSLE >> 4] = BPF_JSGE,
8904                 [BPF_JSLT >> 4] = BPF_JSGT
8905         };
8906         return opcode_flip[opcode >> 4];
8907 }
8908
8909 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
8910                                    struct bpf_reg_state *src_reg,
8911                                    u8 opcode)
8912 {
8913         struct bpf_reg_state *pkt;
8914
8915         if (src_reg->type == PTR_TO_PACKET_END) {
8916                 pkt = dst_reg;
8917         } else if (dst_reg->type == PTR_TO_PACKET_END) {
8918                 pkt = src_reg;
8919                 opcode = flip_opcode(opcode);
8920         } else {
8921                 return -1;
8922         }
8923
8924         if (pkt->range >= 0)
8925                 return -1;
8926
8927         switch (opcode) {
8928         case BPF_JLE:
8929                 /* pkt <= pkt_end */
8930                 fallthrough;
8931         case BPF_JGT:
8932                 /* pkt > pkt_end */
8933                 if (pkt->range == BEYOND_PKT_END)
8934                         /* pkt has at last one extra byte beyond pkt_end */
8935                         return opcode == BPF_JGT;
8936                 break;
8937         case BPF_JLT:
8938                 /* pkt < pkt_end */
8939                 fallthrough;
8940         case BPF_JGE:
8941                 /* pkt >= pkt_end */
8942                 if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
8943                         return opcode == BPF_JGE;
8944                 break;
8945         }
8946         return -1;
8947 }
8948
8949 /* Adjusts the register min/max values in the case that the dst_reg is the
8950  * variable register that we are working on, and src_reg is a constant or we're
8951  * simply doing a BPF_K check.
8952  * In JEQ/JNE cases we also adjust the var_off values.
8953  */
8954 static void reg_set_min_max(struct bpf_reg_state *true_reg,
8955                             struct bpf_reg_state *false_reg,
8956                             u64 val, u32 val32,
8957                             u8 opcode, bool is_jmp32)
8958 {
8959         struct tnum false_32off = tnum_subreg(false_reg->var_off);
8960         struct tnum false_64off = false_reg->var_off;
8961         struct tnum true_32off = tnum_subreg(true_reg->var_off);
8962         struct tnum true_64off = true_reg->var_off;
8963         s64 sval = (s64)val;
8964         s32 sval32 = (s32)val32;
8965
8966         /* If the dst_reg is a pointer, we can't learn anything about its
8967          * variable offset from the compare (unless src_reg were a pointer into
8968          * the same object, but we don't bother with that.
8969          * Since false_reg and true_reg have the same type by construction, we
8970          * only need to check one of them for pointerness.
8971          */
8972         if (__is_pointer_value(false, false_reg))
8973                 return;
8974
8975         switch (opcode) {
8976         case BPF_JEQ:
8977         case BPF_JNE:
8978         {
8979                 struct bpf_reg_state *reg =
8980                         opcode == BPF_JEQ ? true_reg : false_reg;
8981
8982                 /* JEQ/JNE comparison doesn't change the register equivalence.
8983                  * r1 = r2;
8984                  * if (r1 == 42) goto label;
8985                  * ...
8986                  * label: // here both r1 and r2 are known to be 42.
8987                  *
8988                  * Hence when marking register as known preserve it's ID.
8989                  */
8990                 if (is_jmp32)
8991                         __mark_reg32_known(reg, val32);
8992                 else
8993                         ___mark_reg_known(reg, val);
8994                 break;
8995         }
8996         case BPF_JSET:
8997                 if (is_jmp32) {
8998                         false_32off = tnum_and(false_32off, tnum_const(~val32));
8999                         if (is_power_of_2(val32))
9000                                 true_32off = tnum_or(true_32off,
9001                                                      tnum_const(val32));
9002                 } else {
9003                         false_64off = tnum_and(false_64off, tnum_const(~val));
9004                         if (is_power_of_2(val))
9005                                 true_64off = tnum_or(true_64off,
9006                                                      tnum_const(val));
9007                 }
9008                 break;
9009         case BPF_JGE:
9010         case BPF_JGT:
9011         {
9012                 if (is_jmp32) {
9013                         u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
9014                         u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
9015
9016                         false_reg->u32_max_value = min(false_reg->u32_max_value,
9017                                                        false_umax);
9018                         true_reg->u32_min_value = max(true_reg->u32_min_value,
9019                                                       true_umin);
9020                 } else {
9021                         u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
9022                         u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
9023
9024                         false_reg->umax_value = min(false_reg->umax_value, false_umax);
9025                         true_reg->umin_value = max(true_reg->umin_value, true_umin);
9026                 }
9027                 break;
9028         }
9029         case BPF_JSGE:
9030         case BPF_JSGT:
9031         {
9032                 if (is_jmp32) {
9033                         s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
9034                         s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
9035
9036                         false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
9037                         true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
9038                 } else {
9039                         s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
9040                         s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
9041
9042                         false_reg->smax_value = min(false_reg->smax_value, false_smax);
9043                         true_reg->smin_value = max(true_reg->smin_value, true_smin);
9044                 }
9045                 break;
9046         }
9047         case BPF_JLE:
9048         case BPF_JLT:
9049         {
9050                 if (is_jmp32) {
9051                         u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
9052                         u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
9053
9054                         false_reg->u32_min_value = max(false_reg->u32_min_value,
9055                                                        false_umin);
9056                         true_reg->u32_max_value = min(true_reg->u32_max_value,
9057                                                       true_umax);
9058                 } else {
9059                         u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
9060                         u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
9061
9062                         false_reg->umin_value = max(false_reg->umin_value, false_umin);
9063                         true_reg->umax_value = min(true_reg->umax_value, true_umax);
9064                 }
9065                 break;
9066         }
9067         case BPF_JSLE:
9068         case BPF_JSLT:
9069         {
9070                 if (is_jmp32) {
9071                         s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
9072                         s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
9073
9074                         false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
9075                         true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
9076                 } else {
9077                         s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
9078                         s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
9079
9080                         false_reg->smin_value = max(false_reg->smin_value, false_smin);
9081                         true_reg->smax_value = min(true_reg->smax_value, true_smax);
9082                 }
9083                 break;
9084         }
9085         default:
9086                 return;
9087         }
9088
9089         if (is_jmp32) {
9090                 false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
9091                                              tnum_subreg(false_32off));
9092                 true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
9093                                             tnum_subreg(true_32off));
9094                 __reg_combine_32_into_64(false_reg);
9095                 __reg_combine_32_into_64(true_reg);
9096         } else {
9097                 false_reg->var_off = false_64off;
9098                 true_reg->var_off = true_64off;
9099                 __reg_combine_64_into_32(false_reg);
9100                 __reg_combine_64_into_32(true_reg);
9101         }
9102 }
9103
9104 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
9105  * the variable reg.
9106  */
9107 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
9108                                 struct bpf_reg_state *false_reg,
9109                                 u64 val, u32 val32,
9110                                 u8 opcode, bool is_jmp32)
9111 {
9112         opcode = flip_opcode(opcode);
9113         /* This uses zero as "not present in table"; luckily the zero opcode,
9114          * BPF_JA, can't get here.
9115          */
9116         if (opcode)
9117                 reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
9118 }
9119
9120 /* Regs are known to be equal, so intersect their min/max/var_off */
9121 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
9122                                   struct bpf_reg_state *dst_reg)
9123 {
9124         src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
9125                                                         dst_reg->umin_value);
9126         src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
9127                                                         dst_reg->umax_value);
9128         src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
9129                                                         dst_reg->smin_value);
9130         src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
9131                                                         dst_reg->smax_value);
9132         src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
9133                                                              dst_reg->var_off);
9134         /* We might have learned new bounds from the var_off. */
9135         __update_reg_bounds(src_reg);
9136         __update_reg_bounds(dst_reg);
9137         /* We might have learned something about the sign bit. */
9138         __reg_deduce_bounds(src_reg);
9139         __reg_deduce_bounds(dst_reg);
9140         /* We might have learned some bits from the bounds. */
9141         __reg_bound_offset(src_reg);
9142         __reg_bound_offset(dst_reg);
9143         /* Intersecting with the old var_off might have improved our bounds
9144          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
9145          * then new var_off is (0; 0x7f...fc) which improves our umax.
9146          */
9147         __update_reg_bounds(src_reg);
9148         __update_reg_bounds(dst_reg);
9149 }
9150
9151 static void reg_combine_min_max(struct bpf_reg_state *true_src,
9152                                 struct bpf_reg_state *true_dst,
9153                                 struct bpf_reg_state *false_src,
9154                                 struct bpf_reg_state *false_dst,
9155                                 u8 opcode)
9156 {
9157         switch (opcode) {
9158         case BPF_JEQ:
9159                 __reg_combine_min_max(true_src, true_dst);
9160                 break;
9161         case BPF_JNE:
9162                 __reg_combine_min_max(false_src, false_dst);
9163                 break;
9164         }
9165 }
9166
9167 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
9168                                  struct bpf_reg_state *reg, u32 id,
9169                                  bool is_null)
9170 {
9171         if (type_may_be_null(reg->type) && reg->id == id &&
9172             !WARN_ON_ONCE(!reg->id)) {
9173                 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
9174                                  !tnum_equals_const(reg->var_off, 0) ||
9175                                  reg->off)) {
9176                         /* Old offset (both fixed and variable parts) should
9177                          * have been known-zero, because we don't allow pointer
9178                          * arithmetic on pointers that might be NULL. If we
9179                          * see this happening, don't convert the register.
9180                          */
9181                         return;
9182                 }
9183                 if (is_null) {
9184                         reg->type = SCALAR_VALUE;
9185                         /* We don't need id and ref_obj_id from this point
9186                          * onwards anymore, thus we should better reset it,
9187                          * so that state pruning has chances to take effect.
9188                          */
9189                         reg->id = 0;
9190                         reg->ref_obj_id = 0;
9191
9192                         return;
9193                 }
9194
9195                 mark_ptr_not_null_reg(reg);
9196
9197                 if (!reg_may_point_to_spin_lock(reg)) {
9198                         /* For not-NULL ptr, reg->ref_obj_id will be reset
9199                          * in release_reg_references().
9200                          *
9201                          * reg->id is still used by spin_lock ptr. Other
9202                          * than spin_lock ptr type, reg->id can be reset.
9203                          */
9204                         reg->id = 0;
9205                 }
9206         }
9207 }
9208
9209 static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
9210                                     bool is_null)
9211 {
9212         struct bpf_reg_state *reg;
9213         int i;
9214
9215         for (i = 0; i < MAX_BPF_REG; i++)
9216                 mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
9217
9218         bpf_for_each_spilled_reg(i, state, reg) {
9219                 if (!reg)
9220                         continue;
9221                 mark_ptr_or_null_reg(state, reg, id, is_null);
9222         }
9223 }
9224
9225 /* The logic is similar to find_good_pkt_pointers(), both could eventually
9226  * be folded together at some point.
9227  */
9228 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
9229                                   bool is_null)
9230 {
9231         struct bpf_func_state *state = vstate->frame[vstate->curframe];
9232         struct bpf_reg_state *regs = state->regs;
9233         u32 ref_obj_id = regs[regno].ref_obj_id;
9234         u32 id = regs[regno].id;
9235         int i;
9236
9237         if (ref_obj_id && ref_obj_id == id && is_null)
9238                 /* regs[regno] is in the " == NULL" branch.
9239                  * No one could have freed the reference state before
9240                  * doing the NULL check.
9241                  */
9242                 WARN_ON_ONCE(release_reference_state(state, id));
9243
9244         for (i = 0; i <= vstate->curframe; i++)
9245                 __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
9246 }
9247
9248 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
9249                                    struct bpf_reg_state *dst_reg,
9250                                    struct bpf_reg_state *src_reg,
9251                                    struct bpf_verifier_state *this_branch,
9252                                    struct bpf_verifier_state *other_branch)
9253 {
9254         if (BPF_SRC(insn->code) != BPF_X)
9255                 return false;
9256
9257         /* Pointers are always 64-bit. */
9258         if (BPF_CLASS(insn->code) == BPF_JMP32)
9259                 return false;
9260
9261         switch (BPF_OP(insn->code)) {
9262         case BPF_JGT:
9263                 if ((dst_reg->type == PTR_TO_PACKET &&
9264                      src_reg->type == PTR_TO_PACKET_END) ||
9265                     (dst_reg->type == PTR_TO_PACKET_META &&
9266                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9267                         /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
9268                         find_good_pkt_pointers(this_branch, dst_reg,
9269                                                dst_reg->type, false);
9270                         mark_pkt_end(other_branch, insn->dst_reg, true);
9271                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9272                             src_reg->type == PTR_TO_PACKET) ||
9273                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9274                             src_reg->type == PTR_TO_PACKET_META)) {
9275                         /* pkt_end > pkt_data', pkt_data > pkt_meta' */
9276                         find_good_pkt_pointers(other_branch, src_reg,
9277                                                src_reg->type, true);
9278                         mark_pkt_end(this_branch, insn->src_reg, false);
9279                 } else {
9280                         return false;
9281                 }
9282                 break;
9283         case BPF_JLT:
9284                 if ((dst_reg->type == PTR_TO_PACKET &&
9285                      src_reg->type == PTR_TO_PACKET_END) ||
9286                     (dst_reg->type == PTR_TO_PACKET_META &&
9287                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9288                         /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
9289                         find_good_pkt_pointers(other_branch, dst_reg,
9290                                                dst_reg->type, true);
9291                         mark_pkt_end(this_branch, insn->dst_reg, false);
9292                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9293                             src_reg->type == PTR_TO_PACKET) ||
9294                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9295                             src_reg->type == PTR_TO_PACKET_META)) {
9296                         /* pkt_end < pkt_data', pkt_data > pkt_meta' */
9297                         find_good_pkt_pointers(this_branch, src_reg,
9298                                                src_reg->type, false);
9299                         mark_pkt_end(other_branch, insn->src_reg, true);
9300                 } else {
9301                         return false;
9302                 }
9303                 break;
9304         case BPF_JGE:
9305                 if ((dst_reg->type == PTR_TO_PACKET &&
9306                      src_reg->type == PTR_TO_PACKET_END) ||
9307                     (dst_reg->type == PTR_TO_PACKET_META &&
9308                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9309                         /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
9310                         find_good_pkt_pointers(this_branch, dst_reg,
9311                                                dst_reg->type, true);
9312                         mark_pkt_end(other_branch, insn->dst_reg, false);
9313                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9314                             src_reg->type == PTR_TO_PACKET) ||
9315                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9316                             src_reg->type == PTR_TO_PACKET_META)) {
9317                         /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
9318                         find_good_pkt_pointers(other_branch, src_reg,
9319                                                src_reg->type, false);
9320                         mark_pkt_end(this_branch, insn->src_reg, true);
9321                 } else {
9322                         return false;
9323                 }
9324                 break;
9325         case BPF_JLE:
9326                 if ((dst_reg->type == PTR_TO_PACKET &&
9327                      src_reg->type == PTR_TO_PACKET_END) ||
9328                     (dst_reg->type == PTR_TO_PACKET_META &&
9329                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9330                         /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
9331                         find_good_pkt_pointers(other_branch, dst_reg,
9332                                                dst_reg->type, false);
9333                         mark_pkt_end(this_branch, insn->dst_reg, true);
9334                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9335                             src_reg->type == PTR_TO_PACKET) ||
9336                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9337                             src_reg->type == PTR_TO_PACKET_META)) {
9338                         /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
9339                         find_good_pkt_pointers(this_branch, src_reg,
9340                                                src_reg->type, true);
9341                         mark_pkt_end(other_branch, insn->src_reg, false);
9342                 } else {
9343                         return false;
9344                 }
9345                 break;
9346         default:
9347                 return false;
9348         }
9349
9350         return true;
9351 }
9352
9353 static void find_equal_scalars(struct bpf_verifier_state *vstate,
9354                                struct bpf_reg_state *known_reg)
9355 {
9356         struct bpf_func_state *state;
9357         struct bpf_reg_state *reg;
9358         int i, j;
9359
9360         for (i = 0; i <= vstate->curframe; i++) {
9361                 state = vstate->frame[i];
9362                 for (j = 0; j < MAX_BPF_REG; j++) {
9363                         reg = &state->regs[j];
9364                         if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
9365                                 *reg = *known_reg;
9366                 }
9367
9368                 bpf_for_each_spilled_reg(j, state, reg) {
9369                         if (!reg)
9370                                 continue;
9371                         if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
9372                                 *reg = *known_reg;
9373                 }
9374         }
9375 }
9376
9377 static int check_cond_jmp_op(struct bpf_verifier_env *env,
9378                              struct bpf_insn *insn, int *insn_idx)
9379 {
9380         struct bpf_verifier_state *this_branch = env->cur_state;
9381         struct bpf_verifier_state *other_branch;
9382         struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
9383         struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
9384         u8 opcode = BPF_OP(insn->code);
9385         bool is_jmp32;
9386         int pred = -1;
9387         int err;
9388
9389         /* Only conditional jumps are expected to reach here. */
9390         if (opcode == BPF_JA || opcode > BPF_JSLE) {
9391                 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
9392                 return -EINVAL;
9393         }
9394
9395         if (BPF_SRC(insn->code) == BPF_X) {
9396                 if (insn->imm != 0) {
9397                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
9398                         return -EINVAL;
9399                 }
9400
9401                 /* check src1 operand */
9402                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
9403                 if (err)
9404                         return err;
9405
9406                 if (is_pointer_value(env, insn->src_reg)) {
9407                         verbose(env, "R%d pointer comparison prohibited\n",
9408                                 insn->src_reg);
9409                         return -EACCES;
9410                 }
9411                 src_reg = &regs[insn->src_reg];
9412         } else {
9413                 if (insn->src_reg != BPF_REG_0) {
9414                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
9415                         return -EINVAL;
9416                 }
9417         }
9418
9419         /* check src2 operand */
9420         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9421         if (err)
9422                 return err;
9423
9424         dst_reg = &regs[insn->dst_reg];
9425         is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
9426
9427         if (BPF_SRC(insn->code) == BPF_K) {
9428                 pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
9429         } else if (src_reg->type == SCALAR_VALUE &&
9430                    is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
9431                 pred = is_branch_taken(dst_reg,
9432                                        tnum_subreg(src_reg->var_off).value,
9433                                        opcode,
9434                                        is_jmp32);
9435         } else if (src_reg->type == SCALAR_VALUE &&
9436                    !is_jmp32 && tnum_is_const(src_reg->var_off)) {
9437                 pred = is_branch_taken(dst_reg,
9438                                        src_reg->var_off.value,
9439                                        opcode,
9440                                        is_jmp32);
9441         } else if (reg_is_pkt_pointer_any(dst_reg) &&
9442                    reg_is_pkt_pointer_any(src_reg) &&
9443                    !is_jmp32) {
9444                 pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
9445         }
9446
9447         if (pred >= 0) {
9448                 /* If we get here with a dst_reg pointer type it is because
9449                  * above is_branch_taken() special cased the 0 comparison.
9450                  */
9451                 if (!__is_pointer_value(false, dst_reg))
9452                         err = mark_chain_precision(env, insn->dst_reg);
9453                 if (BPF_SRC(insn->code) == BPF_X && !err &&
9454                     !__is_pointer_value(false, src_reg))
9455                         err = mark_chain_precision(env, insn->src_reg);
9456                 if (err)
9457                         return err;
9458         }
9459
9460         if (pred == 1) {
9461                 /* Only follow the goto, ignore fall-through. If needed, push
9462                  * the fall-through branch for simulation under speculative
9463                  * execution.
9464                  */
9465                 if (!env->bypass_spec_v1 &&
9466                     !sanitize_speculative_path(env, insn, *insn_idx + 1,
9467                                                *insn_idx))
9468                         return -EFAULT;
9469                 *insn_idx += insn->off;
9470                 return 0;
9471         } else if (pred == 0) {
9472                 /* Only follow the fall-through branch, since that's where the
9473                  * program will go. If needed, push the goto branch for
9474                  * simulation under speculative execution.
9475                  */
9476                 if (!env->bypass_spec_v1 &&
9477                     !sanitize_speculative_path(env, insn,
9478                                                *insn_idx + insn->off + 1,
9479                                                *insn_idx))
9480                         return -EFAULT;
9481                 return 0;
9482         }
9483
9484         other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
9485                                   false);
9486         if (!other_branch)
9487                 return -EFAULT;
9488         other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
9489
9490         /* detect if we are comparing against a constant value so we can adjust
9491          * our min/max values for our dst register.
9492          * this is only legit if both are scalars (or pointers to the same
9493          * object, I suppose, but we don't support that right now), because
9494          * otherwise the different base pointers mean the offsets aren't
9495          * comparable.
9496          */
9497         if (BPF_SRC(insn->code) == BPF_X) {
9498                 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
9499
9500                 if (dst_reg->type == SCALAR_VALUE &&
9501                     src_reg->type == SCALAR_VALUE) {
9502                         if (tnum_is_const(src_reg->var_off) ||
9503                             (is_jmp32 &&
9504                              tnum_is_const(tnum_subreg(src_reg->var_off))))
9505                                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
9506                                                 dst_reg,
9507                                                 src_reg->var_off.value,
9508                                                 tnum_subreg(src_reg->var_off).value,
9509                                                 opcode, is_jmp32);
9510                         else if (tnum_is_const(dst_reg->var_off) ||
9511                                  (is_jmp32 &&
9512                                   tnum_is_const(tnum_subreg(dst_reg->var_off))))
9513                                 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
9514                                                     src_reg,
9515                                                     dst_reg->var_off.value,
9516                                                     tnum_subreg(dst_reg->var_off).value,
9517                                                     opcode, is_jmp32);
9518                         else if (!is_jmp32 &&
9519                                  (opcode == BPF_JEQ || opcode == BPF_JNE))
9520                                 /* Comparing for equality, we can combine knowledge */
9521                                 reg_combine_min_max(&other_branch_regs[insn->src_reg],
9522                                                     &other_branch_regs[insn->dst_reg],
9523                                                     src_reg, dst_reg, opcode);
9524                         if (src_reg->id &&
9525                             !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
9526                                 find_equal_scalars(this_branch, src_reg);
9527                                 find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
9528                         }
9529
9530                 }
9531         } else if (dst_reg->type == SCALAR_VALUE) {
9532                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
9533                                         dst_reg, insn->imm, (u32)insn->imm,
9534                                         opcode, is_jmp32);
9535         }
9536
9537         if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
9538             !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
9539                 find_equal_scalars(this_branch, dst_reg);
9540                 find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
9541         }
9542
9543         /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
9544          * NOTE: these optimizations below are related with pointer comparison
9545          *       which will never be JMP32.
9546          */
9547         if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
9548             insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
9549             type_may_be_null(dst_reg->type)) {
9550                 /* Mark all identical registers in each branch as either
9551                  * safe or unknown depending R == 0 or R != 0 conditional.
9552                  */
9553                 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
9554                                       opcode == BPF_JNE);
9555                 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
9556                                       opcode == BPF_JEQ);
9557         } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
9558                                            this_branch, other_branch) &&
9559                    is_pointer_value(env, insn->dst_reg)) {
9560                 verbose(env, "R%d pointer comparison prohibited\n",
9561                         insn->dst_reg);
9562                 return -EACCES;
9563         }
9564         if (env->log.level & BPF_LOG_LEVEL)
9565                 print_insn_state(env, this_branch->frame[this_branch->curframe]);
9566         return 0;
9567 }
9568
9569 /* verify BPF_LD_IMM64 instruction */
9570 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
9571 {
9572         struct bpf_insn_aux_data *aux = cur_aux(env);
9573         struct bpf_reg_state *regs = cur_regs(env);
9574         struct bpf_reg_state *dst_reg;
9575         struct bpf_map *map;
9576         int err;
9577
9578         if (BPF_SIZE(insn->code) != BPF_DW) {
9579                 verbose(env, "invalid BPF_LD_IMM insn\n");
9580                 return -EINVAL;
9581         }
9582         if (insn->off != 0) {
9583                 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
9584                 return -EINVAL;
9585         }
9586
9587         err = check_reg_arg(env, insn->dst_reg, DST_OP);
9588         if (err)
9589                 return err;
9590
9591         dst_reg = &regs[insn->dst_reg];
9592         if (insn->src_reg == 0) {
9593                 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
9594
9595                 dst_reg->type = SCALAR_VALUE;
9596                 __mark_reg_known(&regs[insn->dst_reg], imm);
9597                 return 0;
9598         }
9599
9600         /* All special src_reg cases are listed below. From this point onwards
9601          * we either succeed and assign a corresponding dst_reg->type after
9602          * zeroing the offset, or fail and reject the program.
9603          */
9604         mark_reg_known_zero(env, regs, insn->dst_reg);
9605
9606         if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
9607                 dst_reg->type = aux->btf_var.reg_type;
9608                 switch (base_type(dst_reg->type)) {
9609                 case PTR_TO_MEM:
9610                         dst_reg->mem_size = aux->btf_var.mem_size;
9611                         break;
9612                 case PTR_TO_BTF_ID:
9613                 case PTR_TO_PERCPU_BTF_ID:
9614                         dst_reg->btf = aux->btf_var.btf;
9615                         dst_reg->btf_id = aux->btf_var.btf_id;
9616                         break;
9617                 default:
9618                         verbose(env, "bpf verifier is misconfigured\n");
9619                         return -EFAULT;
9620                 }
9621                 return 0;
9622         }
9623
9624         if (insn->src_reg == BPF_PSEUDO_FUNC) {
9625                 struct bpf_prog_aux *aux = env->prog->aux;
9626                 u32 subprogno = find_subprog(env,
9627                                              env->insn_idx + insn->imm + 1);
9628
9629                 if (!aux->func_info) {
9630                         verbose(env, "missing btf func_info\n");
9631                         return -EINVAL;
9632                 }
9633                 if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
9634                         verbose(env, "callback function not static\n");
9635                         return -EINVAL;
9636                 }
9637
9638                 dst_reg->type = PTR_TO_FUNC;
9639                 dst_reg->subprogno = subprogno;
9640                 return 0;
9641         }
9642
9643         map = env->used_maps[aux->map_index];
9644         dst_reg->map_ptr = map;
9645
9646         if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
9647             insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
9648                 dst_reg->type = PTR_TO_MAP_VALUE;
9649                 dst_reg->off = aux->map_off;
9650                 if (map_value_has_spin_lock(map))
9651                         dst_reg->id = ++env->id_gen;
9652         } else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
9653                    insn->src_reg == BPF_PSEUDO_MAP_IDX) {
9654                 dst_reg->type = CONST_PTR_TO_MAP;
9655         } else {
9656                 verbose(env, "bpf verifier is misconfigured\n");
9657                 return -EINVAL;
9658         }
9659
9660         return 0;
9661 }
9662
9663 static bool may_access_skb(enum bpf_prog_type type)
9664 {
9665         switch (type) {
9666         case BPF_PROG_TYPE_SOCKET_FILTER:
9667         case BPF_PROG_TYPE_SCHED_CLS:
9668         case BPF_PROG_TYPE_SCHED_ACT:
9669                 return true;
9670         default:
9671                 return false;
9672         }
9673 }
9674
9675 /* verify safety of LD_ABS|LD_IND instructions:
9676  * - they can only appear in the programs where ctx == skb
9677  * - since they are wrappers of function calls, they scratch R1-R5 registers,
9678  *   preserve R6-R9, and store return value into R0
9679  *
9680  * Implicit input:
9681  *   ctx == skb == R6 == CTX
9682  *
9683  * Explicit input:
9684  *   SRC == any register
9685  *   IMM == 32-bit immediate
9686  *
9687  * Output:
9688  *   R0 - 8/16/32-bit skb data converted to cpu endianness
9689  */
9690 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
9691 {
9692         struct bpf_reg_state *regs = cur_regs(env);
9693         static const int ctx_reg = BPF_REG_6;
9694         u8 mode = BPF_MODE(insn->code);
9695         int i, err;
9696
9697         if (!may_access_skb(resolve_prog_type(env->prog))) {
9698                 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
9699                 return -EINVAL;
9700         }
9701
9702         if (!env->ops->gen_ld_abs) {
9703                 verbose(env, "bpf verifier is misconfigured\n");
9704                 return -EINVAL;
9705         }
9706
9707         if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
9708             BPF_SIZE(insn->code) == BPF_DW ||
9709             (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
9710                 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
9711                 return -EINVAL;
9712         }
9713
9714         /* check whether implicit source operand (register R6) is readable */
9715         err = check_reg_arg(env, ctx_reg, SRC_OP);
9716         if (err)
9717                 return err;
9718
9719         /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
9720          * gen_ld_abs() may terminate the program at runtime, leading to
9721          * reference leak.
9722          */
9723         err = check_reference_leak(env);
9724         if (err) {
9725                 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
9726                 return err;
9727         }
9728
9729         if (env->cur_state->active_spin_lock) {
9730                 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
9731                 return -EINVAL;
9732         }
9733
9734         if (regs[ctx_reg].type != PTR_TO_CTX) {
9735                 verbose(env,
9736                         "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
9737                 return -EINVAL;
9738         }
9739
9740         if (mode == BPF_IND) {
9741                 /* check explicit source operand */
9742                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
9743                 if (err)
9744                         return err;
9745         }
9746
9747         err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
9748         if (err < 0)
9749                 return err;
9750
9751         /* reset caller saved regs to unreadable */
9752         for (i = 0; i < CALLER_SAVED_REGS; i++) {
9753                 mark_reg_not_init(env, regs, caller_saved[i]);
9754                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
9755         }
9756
9757         /* mark destination R0 register as readable, since it contains
9758          * the value fetched from the packet.
9759          * Already marked as written above.
9760          */
9761         mark_reg_unknown(env, regs, BPF_REG_0);
9762         /* ld_abs load up to 32-bit skb data. */
9763         regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
9764         return 0;
9765 }
9766
9767 static int check_return_code(struct bpf_verifier_env *env)
9768 {
9769         struct tnum enforce_attach_type_range = tnum_unknown;
9770         const struct bpf_prog *prog = env->prog;
9771         struct bpf_reg_state *reg;
9772         struct tnum range = tnum_range(0, 1);
9773         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
9774         int err;
9775         struct bpf_func_state *frame = env->cur_state->frame[0];
9776         const bool is_subprog = frame->subprogno;
9777
9778         /* LSM and struct_ops func-ptr's return type could be "void" */
9779         if (!is_subprog &&
9780             (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
9781              prog_type == BPF_PROG_TYPE_LSM) &&
9782             !prog->aux->attach_func_proto->type)
9783                 return 0;
9784
9785         /* eBPF calling convention is such that R0 is used
9786          * to return the value from eBPF program.
9787          * Make sure that it's readable at this time
9788          * of bpf_exit, which means that program wrote
9789          * something into it earlier
9790          */
9791         err = check_reg_arg(env, BPF_REG_0, SRC_OP);
9792         if (err)
9793                 return err;
9794
9795         if (is_pointer_value(env, BPF_REG_0)) {
9796                 verbose(env, "R0 leaks addr as return value\n");
9797                 return -EACCES;
9798         }
9799
9800         reg = cur_regs(env) + BPF_REG_0;
9801
9802         if (frame->in_async_callback_fn) {
9803                 /* enforce return zero from async callbacks like timer */
9804                 if (reg->type != SCALAR_VALUE) {
9805                         verbose(env, "In async callback the register R0 is not a known value (%s)\n",
9806                                 reg_type_str(env, reg->type));
9807                         return -EINVAL;
9808                 }
9809
9810                 if (!tnum_in(tnum_const(0), reg->var_off)) {
9811                         verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
9812                         return -EINVAL;
9813                 }
9814                 return 0;
9815         }
9816
9817         if (is_subprog) {
9818                 if (reg->type != SCALAR_VALUE) {
9819                         verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
9820                                 reg_type_str(env, reg->type));
9821                         return -EINVAL;
9822                 }
9823                 return 0;
9824         }
9825
9826         switch (prog_type) {
9827         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
9828                 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
9829                     env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
9830                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
9831                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
9832                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
9833                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
9834                         range = tnum_range(1, 1);
9835                 if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
9836                     env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
9837                         range = tnum_range(0, 3);
9838                 break;
9839         case BPF_PROG_TYPE_CGROUP_SKB:
9840                 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
9841                         range = tnum_range(0, 3);
9842                         enforce_attach_type_range = tnum_range(2, 3);
9843                 }
9844                 break;
9845         case BPF_PROG_TYPE_CGROUP_SOCK:
9846         case BPF_PROG_TYPE_SOCK_OPS:
9847         case BPF_PROG_TYPE_CGROUP_DEVICE:
9848         case BPF_PROG_TYPE_CGROUP_SYSCTL:
9849         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
9850                 break;
9851         case BPF_PROG_TYPE_RAW_TRACEPOINT:
9852                 if (!env->prog->aux->attach_btf_id)
9853                         return 0;
9854                 range = tnum_const(0);
9855                 break;
9856         case BPF_PROG_TYPE_TRACING:
9857                 switch (env->prog->expected_attach_type) {
9858                 case BPF_TRACE_FENTRY:
9859                 case BPF_TRACE_FEXIT:
9860                         range = tnum_const(0);
9861                         break;
9862                 case BPF_TRACE_RAW_TP:
9863                 case BPF_MODIFY_RETURN:
9864                         return 0;
9865                 case BPF_TRACE_ITER:
9866                         break;
9867                 default:
9868                         return -ENOTSUPP;
9869                 }
9870                 break;
9871         case BPF_PROG_TYPE_SK_LOOKUP:
9872                 range = tnum_range(SK_DROP, SK_PASS);
9873                 break;
9874         case BPF_PROG_TYPE_EXT:
9875                 /* freplace program can return anything as its return value
9876                  * depends on the to-be-replaced kernel func or bpf program.
9877                  */
9878         default:
9879                 return 0;
9880         }
9881
9882         if (reg->type != SCALAR_VALUE) {
9883                 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
9884                         reg_type_str(env, reg->type));
9885                 return -EINVAL;
9886         }
9887
9888         if (!tnum_in(range, reg->var_off)) {
9889                 verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
9890                 return -EINVAL;
9891         }
9892
9893         if (!tnum_is_unknown(enforce_attach_type_range) &&
9894             tnum_in(enforce_attach_type_range, reg->var_off))
9895                 env->prog->enforce_expected_attach_type = 1;
9896         return 0;
9897 }
9898
9899 /* non-recursive DFS pseudo code
9900  * 1  procedure DFS-iterative(G,v):
9901  * 2      label v as discovered
9902  * 3      let S be a stack
9903  * 4      S.push(v)
9904  * 5      while S is not empty
9905  * 6            t <- S.pop()
9906  * 7            if t is what we're looking for:
9907  * 8                return t
9908  * 9            for all edges e in G.adjacentEdges(t) do
9909  * 10               if edge e is already labelled
9910  * 11                   continue with the next edge
9911  * 12               w <- G.adjacentVertex(t,e)
9912  * 13               if vertex w is not discovered and not explored
9913  * 14                   label e as tree-edge
9914  * 15                   label w as discovered
9915  * 16                   S.push(w)
9916  * 17                   continue at 5
9917  * 18               else if vertex w is discovered
9918  * 19                   label e as back-edge
9919  * 20               else
9920  * 21                   // vertex w is explored
9921  * 22                   label e as forward- or cross-edge
9922  * 23           label t as explored
9923  * 24           S.pop()
9924  *
9925  * convention:
9926  * 0x10 - discovered
9927  * 0x11 - discovered and fall-through edge labelled
9928  * 0x12 - discovered and fall-through and branch edges labelled
9929  * 0x20 - explored
9930  */
9931
9932 enum {
9933         DISCOVERED = 0x10,
9934         EXPLORED = 0x20,
9935         FALLTHROUGH = 1,
9936         BRANCH = 2,
9937 };
9938
9939 static u32 state_htab_size(struct bpf_verifier_env *env)
9940 {
9941         return env->prog->len;
9942 }
9943
9944 static struct bpf_verifier_state_list **explored_state(
9945                                         struct bpf_verifier_env *env,
9946                                         int idx)
9947 {
9948         struct bpf_verifier_state *cur = env->cur_state;
9949         struct bpf_func_state *state = cur->frame[cur->curframe];
9950
9951         return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
9952 }
9953
9954 static void init_explored_state(struct bpf_verifier_env *env, int idx)
9955 {
9956         env->insn_aux_data[idx].prune_point = true;
9957 }
9958
9959 enum {
9960         DONE_EXPLORING = 0,
9961         KEEP_EXPLORING = 1,
9962 };
9963
9964 /* t, w, e - match pseudo-code above:
9965  * t - index of current instruction
9966  * w - next instruction
9967  * e - edge
9968  */
9969 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
9970                      bool loop_ok)
9971 {
9972         int *insn_stack = env->cfg.insn_stack;
9973         int *insn_state = env->cfg.insn_state;
9974
9975         if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
9976                 return DONE_EXPLORING;
9977
9978         if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
9979                 return DONE_EXPLORING;
9980
9981         if (w < 0 || w >= env->prog->len) {
9982                 verbose_linfo(env, t, "%d: ", t);
9983                 verbose(env, "jump out of range from insn %d to %d\n", t, w);
9984                 return -EINVAL;
9985         }
9986
9987         if (e == BRANCH)
9988                 /* mark branch target for state pruning */
9989                 init_explored_state(env, w);
9990
9991         if (insn_state[w] == 0) {
9992                 /* tree-edge */
9993                 insn_state[t] = DISCOVERED | e;
9994                 insn_state[w] = DISCOVERED;
9995                 if (env->cfg.cur_stack >= env->prog->len)
9996                         return -E2BIG;
9997                 insn_stack[env->cfg.cur_stack++] = w;
9998                 return KEEP_EXPLORING;
9999         } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
10000                 if (loop_ok && env->bpf_capable)
10001                         return DONE_EXPLORING;
10002                 verbose_linfo(env, t, "%d: ", t);
10003                 verbose_linfo(env, w, "%d: ", w);
10004                 verbose(env, "back-edge from insn %d to %d\n", t, w);
10005                 return -EINVAL;
10006         } else if (insn_state[w] == EXPLORED) {
10007                 /* forward- or cross-edge */
10008                 insn_state[t] = DISCOVERED | e;
10009         } else {
10010                 verbose(env, "insn state internal bug\n");
10011                 return -EFAULT;
10012         }
10013         return DONE_EXPLORING;
10014 }
10015
10016 static int visit_func_call_insn(int t, int insn_cnt,
10017                                 struct bpf_insn *insns,
10018                                 struct bpf_verifier_env *env,
10019                                 bool visit_callee)
10020 {
10021         int ret;
10022
10023         ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
10024         if (ret)
10025                 return ret;
10026
10027         if (t + 1 < insn_cnt)
10028                 init_explored_state(env, t + 1);
10029         if (visit_callee) {
10030                 init_explored_state(env, t);
10031                 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
10032                                 /* It's ok to allow recursion from CFG point of
10033                                  * view. __check_func_call() will do the actual
10034                                  * check.
10035                                  */
10036                                 bpf_pseudo_func(insns + t));
10037         }
10038         return ret;
10039 }
10040
10041 /* Visits the instruction at index t and returns one of the following:
10042  *  < 0 - an error occurred
10043  *  DONE_EXPLORING - the instruction was fully explored
10044  *  KEEP_EXPLORING - there is still work to be done before it is fully explored
10045  */
10046 static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
10047 {
10048         struct bpf_insn *insns = env->prog->insnsi;
10049         int ret;
10050
10051         if (bpf_pseudo_func(insns + t))
10052                 return visit_func_call_insn(t, insn_cnt, insns, env, true);
10053
10054         /* All non-branch instructions have a single fall-through edge. */
10055         if (BPF_CLASS(insns[t].code) != BPF_JMP &&
10056             BPF_CLASS(insns[t].code) != BPF_JMP32)
10057                 return push_insn(t, t + 1, FALLTHROUGH, env, false);
10058
10059         switch (BPF_OP(insns[t].code)) {
10060         case BPF_EXIT:
10061                 return DONE_EXPLORING;
10062
10063         case BPF_CALL:
10064                 if (insns[t].imm == BPF_FUNC_timer_set_callback)
10065                         /* Mark this call insn to trigger is_state_visited() check
10066                          * before call itself is processed by __check_func_call().
10067                          * Otherwise new async state will be pushed for further
10068                          * exploration.
10069                          */
10070                         init_explored_state(env, t);
10071                 return visit_func_call_insn(t, insn_cnt, insns, env,
10072                                             insns[t].src_reg == BPF_PSEUDO_CALL);
10073
10074         case BPF_JA:
10075                 if (BPF_SRC(insns[t].code) != BPF_K)
10076                         return -EINVAL;
10077
10078                 /* unconditional jump with single edge */
10079                 ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
10080                                 true);
10081                 if (ret)
10082                         return ret;
10083
10084                 /* unconditional jmp is not a good pruning point,
10085                  * but it's marked, since backtracking needs
10086                  * to record jmp history in is_state_visited().
10087                  */
10088                 init_explored_state(env, t + insns[t].off + 1);
10089                 /* tell verifier to check for equivalent states
10090                  * after every call and jump
10091                  */
10092                 if (t + 1 < insn_cnt)
10093                         init_explored_state(env, t + 1);
10094
10095                 return ret;
10096
10097         default:
10098                 /* conditional jump with two edges */
10099                 init_explored_state(env, t);
10100                 ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
10101                 if (ret)
10102                         return ret;
10103
10104                 return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
10105         }
10106 }
10107
10108 /* non-recursive depth-first-search to detect loops in BPF program
10109  * loop == back-edge in directed graph
10110  */
10111 static int check_cfg(struct bpf_verifier_env *env)
10112 {
10113         int insn_cnt = env->prog->len;
10114         int *insn_stack, *insn_state;
10115         int ret = 0;
10116         int i;
10117
10118         insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
10119         if (!insn_state)
10120                 return -ENOMEM;
10121
10122         insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
10123         if (!insn_stack) {
10124                 kvfree(insn_state);
10125                 return -ENOMEM;
10126         }
10127
10128         insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
10129         insn_stack[0] = 0; /* 0 is the first instruction */
10130         env->cfg.cur_stack = 1;
10131
10132         while (env->cfg.cur_stack > 0) {
10133                 int t = insn_stack[env->cfg.cur_stack - 1];
10134
10135                 ret = visit_insn(t, insn_cnt, env);
10136                 switch (ret) {
10137                 case DONE_EXPLORING:
10138                         insn_state[t] = EXPLORED;
10139                         env->cfg.cur_stack--;
10140                         break;
10141                 case KEEP_EXPLORING:
10142                         break;
10143                 default:
10144                         if (ret > 0) {
10145                                 verbose(env, "visit_insn internal bug\n");
10146                                 ret = -EFAULT;
10147                         }
10148                         goto err_free;
10149                 }
10150         }
10151
10152         if (env->cfg.cur_stack < 0) {
10153                 verbose(env, "pop stack internal bug\n");
10154                 ret = -EFAULT;
10155                 goto err_free;
10156         }
10157
10158         for (i = 0; i < insn_cnt; i++) {
10159                 if (insn_state[i] != EXPLORED) {
10160                         verbose(env, "unreachable insn %d\n", i);
10161                         ret = -EINVAL;
10162                         goto err_free;
10163                 }
10164         }
10165         ret = 0; /* cfg looks good */
10166
10167 err_free:
10168         kvfree(insn_state);
10169         kvfree(insn_stack);
10170         env->cfg.insn_state = env->cfg.insn_stack = NULL;
10171         return ret;
10172 }
10173
10174 static int check_abnormal_return(struct bpf_verifier_env *env)
10175 {
10176         int i;
10177
10178         for (i = 1; i < env->subprog_cnt; i++) {
10179                 if (env->subprog_info[i].has_ld_abs) {
10180                         verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
10181                         return -EINVAL;
10182                 }
10183                 if (env->subprog_info[i].has_tail_call) {
10184                         verbose(env, "tail_call is not allowed in subprogs without BTF\n");
10185                         return -EINVAL;
10186                 }
10187         }
10188         return 0;
10189 }
10190
10191 /* The minimum supported BTF func info size */
10192 #define MIN_BPF_FUNCINFO_SIZE   8
10193 #define MAX_FUNCINFO_REC_SIZE   252
10194
10195 static int check_btf_func(struct bpf_verifier_env *env,
10196                           const union bpf_attr *attr,
10197                           bpfptr_t uattr)
10198 {
10199         const struct btf_type *type, *func_proto, *ret_type;
10200         u32 i, nfuncs, urec_size, min_size;
10201         u32 krec_size = sizeof(struct bpf_func_info);
10202         struct bpf_func_info *krecord;
10203         struct bpf_func_info_aux *info_aux = NULL;
10204         struct bpf_prog *prog;
10205         const struct btf *btf;
10206         bpfptr_t urecord;
10207         u32 prev_offset = 0;
10208         bool scalar_return;
10209         int ret = -ENOMEM;
10210
10211         nfuncs = attr->func_info_cnt;
10212         if (!nfuncs) {
10213                 if (check_abnormal_return(env))
10214                         return -EINVAL;
10215                 return 0;
10216         }
10217
10218         if (nfuncs != env->subprog_cnt) {
10219                 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
10220                 return -EINVAL;
10221         }
10222
10223         urec_size = attr->func_info_rec_size;
10224         if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
10225             urec_size > MAX_FUNCINFO_REC_SIZE ||
10226             urec_size % sizeof(u32)) {
10227                 verbose(env, "invalid func info rec size %u\n", urec_size);
10228                 return -EINVAL;
10229         }
10230
10231         prog = env->prog;
10232         btf = prog->aux->btf;
10233
10234         urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
10235         min_size = min_t(u32, krec_size, urec_size);
10236
10237         krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
10238         if (!krecord)
10239                 return -ENOMEM;
10240         info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
10241         if (!info_aux)
10242                 goto err_free;
10243
10244         for (i = 0; i < nfuncs; i++) {
10245                 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
10246                 if (ret) {
10247                         if (ret == -E2BIG) {
10248                                 verbose(env, "nonzero tailing record in func info");
10249                                 /* set the size kernel expects so loader can zero
10250                                  * out the rest of the record.
10251                                  */
10252                                 if (copy_to_bpfptr_offset(uattr,
10253                                                           offsetof(union bpf_attr, func_info_rec_size),
10254                                                           &min_size, sizeof(min_size)))
10255                                         ret = -EFAULT;
10256                         }
10257                         goto err_free;
10258                 }
10259
10260                 if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
10261                         ret = -EFAULT;
10262                         goto err_free;
10263                 }
10264
10265                 /* check insn_off */
10266                 ret = -EINVAL;
10267                 if (i == 0) {
10268                         if (krecord[i].insn_off) {
10269                                 verbose(env,
10270                                         "nonzero insn_off %u for the first func info record",
10271                                         krecord[i].insn_off);
10272                                 goto err_free;
10273                         }
10274                 } else if (krecord[i].insn_off <= prev_offset) {
10275                         verbose(env,
10276                                 "same or smaller insn offset (%u) than previous func info record (%u)",
10277                                 krecord[i].insn_off, prev_offset);
10278                         goto err_free;
10279                 }
10280
10281                 if (env->subprog_info[i].start != krecord[i].insn_off) {
10282                         verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
10283                         goto err_free;
10284                 }
10285
10286                 /* check type_id */
10287                 type = btf_type_by_id(btf, krecord[i].type_id);
10288                 if (!type || !btf_type_is_func(type)) {
10289                         verbose(env, "invalid type id %d in func info",
10290                                 krecord[i].type_id);
10291                         goto err_free;
10292                 }
10293                 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
10294
10295                 func_proto = btf_type_by_id(btf, type->type);
10296                 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
10297                         /* btf_func_check() already verified it during BTF load */
10298                         goto err_free;
10299                 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
10300                 scalar_return =
10301                         btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
10302                 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
10303                         verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
10304                         goto err_free;
10305                 }
10306                 if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
10307                         verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
10308                         goto err_free;
10309                 }
10310
10311                 prev_offset = krecord[i].insn_off;
10312                 bpfptr_add(&urecord, urec_size);
10313         }
10314
10315         prog->aux->func_info = krecord;
10316         prog->aux->func_info_cnt = nfuncs;
10317         prog->aux->func_info_aux = info_aux;
10318         return 0;
10319
10320 err_free:
10321         kvfree(krecord);
10322         kfree(info_aux);
10323         return ret;
10324 }
10325
10326 static void adjust_btf_func(struct bpf_verifier_env *env)
10327 {
10328         struct bpf_prog_aux *aux = env->prog->aux;
10329         int i;
10330
10331         if (!aux->func_info)
10332                 return;
10333
10334         for (i = 0; i < env->subprog_cnt; i++)
10335                 aux->func_info[i].insn_off = env->subprog_info[i].start;
10336 }
10337
10338 #define MIN_BPF_LINEINFO_SIZE   (offsetof(struct bpf_line_info, line_col) + \
10339                 sizeof(((struct bpf_line_info *)(0))->line_col))
10340 #define MAX_LINEINFO_REC_SIZE   MAX_FUNCINFO_REC_SIZE
10341
10342 static int check_btf_line(struct bpf_verifier_env *env,
10343                           const union bpf_attr *attr,
10344                           bpfptr_t uattr)
10345 {
10346         u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
10347         struct bpf_subprog_info *sub;
10348         struct bpf_line_info *linfo;
10349         struct bpf_prog *prog;
10350         const struct btf *btf;
10351         bpfptr_t ulinfo;
10352         int err;
10353
10354         nr_linfo = attr->line_info_cnt;
10355         if (!nr_linfo)
10356                 return 0;
10357         if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
10358                 return -EINVAL;
10359
10360         rec_size = attr->line_info_rec_size;
10361         if (rec_size < MIN_BPF_LINEINFO_SIZE ||
10362             rec_size > MAX_LINEINFO_REC_SIZE ||
10363             rec_size & (sizeof(u32) - 1))
10364                 return -EINVAL;
10365
10366         /* Need to zero it in case the userspace may
10367          * pass in a smaller bpf_line_info object.
10368          */
10369         linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
10370                          GFP_KERNEL | __GFP_NOWARN);
10371         if (!linfo)
10372                 return -ENOMEM;
10373
10374         prog = env->prog;
10375         btf = prog->aux->btf;
10376
10377         s = 0;
10378         sub = env->subprog_info;
10379         ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
10380         expected_size = sizeof(struct bpf_line_info);
10381         ncopy = min_t(u32, expected_size, rec_size);
10382         for (i = 0; i < nr_linfo; i++) {
10383                 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
10384                 if (err) {
10385                         if (err == -E2BIG) {
10386                                 verbose(env, "nonzero tailing record in line_info");
10387                                 if (copy_to_bpfptr_offset(uattr,
10388                                                           offsetof(union bpf_attr, line_info_rec_size),
10389                                                           &expected_size, sizeof(expected_size)))
10390                                         err = -EFAULT;
10391                         }
10392                         goto err_free;
10393                 }
10394
10395                 if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
10396                         err = -EFAULT;
10397                         goto err_free;
10398                 }
10399
10400                 /*
10401                  * Check insn_off to ensure
10402                  * 1) strictly increasing AND
10403                  * 2) bounded by prog->len
10404                  *
10405                  * The linfo[0].insn_off == 0 check logically falls into
10406                  * the later "missing bpf_line_info for func..." case
10407                  * because the first linfo[0].insn_off must be the
10408                  * first sub also and the first sub must have
10409                  * subprog_info[0].start == 0.
10410                  */
10411                 if ((i && linfo[i].insn_off <= prev_offset) ||
10412                     linfo[i].insn_off >= prog->len) {
10413                         verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
10414                                 i, linfo[i].insn_off, prev_offset,
10415                                 prog->len);
10416                         err = -EINVAL;
10417                         goto err_free;
10418                 }
10419
10420                 if (!prog->insnsi[linfo[i].insn_off].code) {
10421                         verbose(env,
10422                                 "Invalid insn code at line_info[%u].insn_off\n",
10423                                 i);
10424                         err = -EINVAL;
10425                         goto err_free;
10426                 }
10427
10428                 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
10429                     !btf_name_by_offset(btf, linfo[i].file_name_off)) {
10430                         verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
10431                         err = -EINVAL;
10432                         goto err_free;
10433                 }
10434
10435                 if (s != env->subprog_cnt) {
10436                         if (linfo[i].insn_off == sub[s].start) {
10437                                 sub[s].linfo_idx = i;
10438                                 s++;
10439                         } else if (sub[s].start < linfo[i].insn_off) {
10440                                 verbose(env, "missing bpf_line_info for func#%u\n", s);
10441                                 err = -EINVAL;
10442                                 goto err_free;
10443                         }
10444                 }
10445
10446                 prev_offset = linfo[i].insn_off;
10447                 bpfptr_add(&ulinfo, rec_size);
10448         }
10449
10450         if (s != env->subprog_cnt) {
10451                 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
10452                         env->subprog_cnt - s, s);
10453                 err = -EINVAL;
10454                 goto err_free;
10455         }
10456
10457         prog->aux->linfo = linfo;
10458         prog->aux->nr_linfo = nr_linfo;
10459
10460         return 0;
10461
10462 err_free:
10463         kvfree(linfo);
10464         return err;
10465 }
10466
10467 #define MIN_CORE_RELO_SIZE      sizeof(struct bpf_core_relo)
10468 #define MAX_CORE_RELO_SIZE      MAX_FUNCINFO_REC_SIZE
10469
10470 static int check_core_relo(struct bpf_verifier_env *env,
10471                            const union bpf_attr *attr,
10472                            bpfptr_t uattr)
10473 {
10474         u32 i, nr_core_relo, ncopy, expected_size, rec_size;
10475         struct bpf_core_relo core_relo = {};
10476         struct bpf_prog *prog = env->prog;
10477         const struct btf *btf = prog->aux->btf;
10478         struct bpf_core_ctx ctx = {
10479                 .log = &env->log,
10480                 .btf = btf,
10481         };
10482         bpfptr_t u_core_relo;
10483         int err;
10484
10485         nr_core_relo = attr->core_relo_cnt;
10486         if (!nr_core_relo)
10487                 return 0;
10488         if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
10489                 return -EINVAL;
10490
10491         rec_size = attr->core_relo_rec_size;
10492         if (rec_size < MIN_CORE_RELO_SIZE ||
10493             rec_size > MAX_CORE_RELO_SIZE ||
10494             rec_size % sizeof(u32))
10495                 return -EINVAL;
10496
10497         u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
10498         expected_size = sizeof(struct bpf_core_relo);
10499         ncopy = min_t(u32, expected_size, rec_size);
10500
10501         /* Unlike func_info and line_info, copy and apply each CO-RE
10502          * relocation record one at a time.
10503          */
10504         for (i = 0; i < nr_core_relo; i++) {
10505                 /* future proofing when sizeof(bpf_core_relo) changes */
10506                 err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
10507                 if (err) {
10508                         if (err == -E2BIG) {
10509                                 verbose(env, "nonzero tailing record in core_relo");
10510                                 if (copy_to_bpfptr_offset(uattr,
10511                                                           offsetof(union bpf_attr, core_relo_rec_size),
10512                                                           &expected_size, sizeof(expected_size)))
10513                                         err = -EFAULT;
10514                         }
10515                         break;
10516                 }
10517
10518                 if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
10519                         err = -EFAULT;
10520                         break;
10521                 }
10522
10523                 if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
10524                         verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
10525                                 i, core_relo.insn_off, prog->len);
10526                         err = -EINVAL;
10527                         break;
10528                 }
10529
10530                 err = bpf_core_apply(&ctx, &core_relo, i,
10531                                      &prog->insnsi[core_relo.insn_off / 8]);
10532                 if (err)
10533                         break;
10534                 bpfptr_add(&u_core_relo, rec_size);
10535         }
10536         return err;
10537 }
10538
10539 static int check_btf_info(struct bpf_verifier_env *env,
10540                           const union bpf_attr *attr,
10541                           bpfptr_t uattr)
10542 {
10543         struct btf *btf;
10544         int err;
10545
10546         if (!attr->func_info_cnt && !attr->line_info_cnt) {
10547                 if (check_abnormal_return(env))
10548                         return -EINVAL;
10549                 return 0;
10550         }
10551
10552         btf = btf_get_by_fd(attr->prog_btf_fd);
10553         if (IS_ERR(btf))
10554                 return PTR_ERR(btf);
10555         if (btf_is_kernel(btf)) {
10556                 btf_put(btf);
10557                 return -EACCES;
10558         }
10559         env->prog->aux->btf = btf;
10560
10561         err = check_btf_func(env, attr, uattr);
10562         if (err)
10563                 return err;
10564
10565         err = check_btf_line(env, attr, uattr);
10566         if (err)
10567                 return err;
10568
10569         err = check_core_relo(env, attr, uattr);
10570         if (err)
10571                 return err;
10572
10573         return 0;
10574 }
10575
10576 /* check %cur's range satisfies %old's */
10577 static bool range_within(struct bpf_reg_state *old,
10578                          struct bpf_reg_state *cur)
10579 {
10580         return old->umin_value <= cur->umin_value &&
10581                old->umax_value >= cur->umax_value &&
10582                old->smin_value <= cur->smin_value &&
10583                old->smax_value >= cur->smax_value &&
10584                old->u32_min_value <= cur->u32_min_value &&
10585                old->u32_max_value >= cur->u32_max_value &&
10586                old->s32_min_value <= cur->s32_min_value &&
10587                old->s32_max_value >= cur->s32_max_value;
10588 }
10589
10590 /* If in the old state two registers had the same id, then they need to have
10591  * the same id in the new state as well.  But that id could be different from
10592  * the old state, so we need to track the mapping from old to new ids.
10593  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
10594  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
10595  * regs with a different old id could still have new id 9, we don't care about
10596  * that.
10597  * So we look through our idmap to see if this old id has been seen before.  If
10598  * so, we require the new id to match; otherwise, we add the id pair to the map.
10599  */
10600 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
10601 {
10602         unsigned int i;
10603
10604         for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
10605                 if (!idmap[i].old) {
10606                         /* Reached an empty slot; haven't seen this id before */
10607                         idmap[i].old = old_id;
10608                         idmap[i].cur = cur_id;
10609                         return true;
10610                 }
10611                 if (idmap[i].old == old_id)
10612                         return idmap[i].cur == cur_id;
10613         }
10614         /* We ran out of idmap slots, which should be impossible */
10615         WARN_ON_ONCE(1);
10616         return false;
10617 }
10618
10619 static void clean_func_state(struct bpf_verifier_env *env,
10620                              struct bpf_func_state *st)
10621 {
10622         enum bpf_reg_liveness live;
10623         int i, j;
10624
10625         for (i = 0; i < BPF_REG_FP; i++) {
10626                 live = st->regs[i].live;
10627                 /* liveness must not touch this register anymore */
10628                 st->regs[i].live |= REG_LIVE_DONE;
10629                 if (!(live & REG_LIVE_READ))
10630                         /* since the register is unused, clear its state
10631                          * to make further comparison simpler
10632                          */
10633                         __mark_reg_not_init(env, &st->regs[i]);
10634         }
10635
10636         for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
10637                 live = st->stack[i].spilled_ptr.live;
10638                 /* liveness must not touch this stack slot anymore */
10639                 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
10640                 if (!(live & REG_LIVE_READ)) {
10641                         __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
10642                         for (j = 0; j < BPF_REG_SIZE; j++)
10643                                 st->stack[i].slot_type[j] = STACK_INVALID;
10644                 }
10645         }
10646 }
10647
10648 static void clean_verifier_state(struct bpf_verifier_env *env,
10649                                  struct bpf_verifier_state *st)
10650 {
10651         int i;
10652
10653         if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
10654                 /* all regs in this state in all frames were already marked */
10655                 return;
10656
10657         for (i = 0; i <= st->curframe; i++)
10658                 clean_func_state(env, st->frame[i]);
10659 }
10660
10661 /* the parentage chains form a tree.
10662  * the verifier states are added to state lists at given insn and
10663  * pushed into state stack for future exploration.
10664  * when the verifier reaches bpf_exit insn some of the verifer states
10665  * stored in the state lists have their final liveness state already,
10666  * but a lot of states will get revised from liveness point of view when
10667  * the verifier explores other branches.
10668  * Example:
10669  * 1: r0 = 1
10670  * 2: if r1 == 100 goto pc+1
10671  * 3: r0 = 2
10672  * 4: exit
10673  * when the verifier reaches exit insn the register r0 in the state list of
10674  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
10675  * of insn 2 and goes exploring further. At the insn 4 it will walk the
10676  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
10677  *
10678  * Since the verifier pushes the branch states as it sees them while exploring
10679  * the program the condition of walking the branch instruction for the second
10680  * time means that all states below this branch were already explored and
10681  * their final liveness marks are already propagated.
10682  * Hence when the verifier completes the search of state list in is_state_visited()
10683  * we can call this clean_live_states() function to mark all liveness states
10684  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
10685  * will not be used.
10686  * This function also clears the registers and stack for states that !READ
10687  * to simplify state merging.
10688  *
10689  * Important note here that walking the same branch instruction in the callee
10690  * doesn't meant that the states are DONE. The verifier has to compare
10691  * the callsites
10692  */
10693 static void clean_live_states(struct bpf_verifier_env *env, int insn,
10694                               struct bpf_verifier_state *cur)
10695 {
10696         struct bpf_verifier_state_list *sl;
10697         int i;
10698
10699         sl = *explored_state(env, insn);
10700         while (sl) {
10701                 if (sl->state.branches)
10702                         goto next;
10703                 if (sl->state.insn_idx != insn ||
10704                     sl->state.curframe != cur->curframe)
10705                         goto next;
10706                 for (i = 0; i <= cur->curframe; i++)
10707                         if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
10708                                 goto next;
10709                 clean_verifier_state(env, &sl->state);
10710 next:
10711                 sl = sl->next;
10712         }
10713 }
10714
10715 /* Returns true if (rold safe implies rcur safe) */
10716 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
10717                     struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
10718 {
10719         bool equal;
10720
10721         if (!(rold->live & REG_LIVE_READ))
10722                 /* explored state didn't use this */
10723                 return true;
10724
10725         equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
10726
10727         if (rold->type == PTR_TO_STACK)
10728                 /* two stack pointers are equal only if they're pointing to
10729                  * the same stack frame, since fp-8 in foo != fp-8 in bar
10730                  */
10731                 return equal && rold->frameno == rcur->frameno;
10732
10733         if (equal)
10734                 return true;
10735
10736         if (rold->type == NOT_INIT)
10737                 /* explored state can't have used this */
10738                 return true;
10739         if (rcur->type == NOT_INIT)
10740                 return false;
10741         switch (base_type(rold->type)) {
10742         case SCALAR_VALUE:
10743                 if (env->explore_alu_limits)
10744                         return false;
10745                 if (rcur->type == SCALAR_VALUE) {
10746                         if (!rold->precise && !rcur->precise)
10747                                 return true;
10748                         /* new val must satisfy old val knowledge */
10749                         return range_within(rold, rcur) &&
10750                                tnum_in(rold->var_off, rcur->var_off);
10751                 } else {
10752                         /* We're trying to use a pointer in place of a scalar.
10753                          * Even if the scalar was unbounded, this could lead to
10754                          * pointer leaks because scalars are allowed to leak
10755                          * while pointers are not. We could make this safe in
10756                          * special cases if root is calling us, but it's
10757                          * probably not worth the hassle.
10758                          */
10759                         return false;
10760                 }
10761         case PTR_TO_MAP_KEY:
10762         case PTR_TO_MAP_VALUE:
10763                 /* a PTR_TO_MAP_VALUE could be safe to use as a
10764                  * PTR_TO_MAP_VALUE_OR_NULL into the same map.
10765                  * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
10766                  * checked, doing so could have affected others with the same
10767                  * id, and we can't check for that because we lost the id when
10768                  * we converted to a PTR_TO_MAP_VALUE.
10769                  */
10770                 if (type_may_be_null(rold->type)) {
10771                         if (!type_may_be_null(rcur->type))
10772                                 return false;
10773                         if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
10774                                 return false;
10775                         /* Check our ids match any regs they're supposed to */
10776                         return check_ids(rold->id, rcur->id, idmap);
10777                 }
10778
10779                 /* If the new min/max/var_off satisfy the old ones and
10780                  * everything else matches, we are OK.
10781                  * 'id' is not compared, since it's only used for maps with
10782                  * bpf_spin_lock inside map element and in such cases if
10783                  * the rest of the prog is valid for one map element then
10784                  * it's valid for all map elements regardless of the key
10785                  * used in bpf_map_lookup()
10786                  */
10787                 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
10788                        range_within(rold, rcur) &&
10789                        tnum_in(rold->var_off, rcur->var_off);
10790         case PTR_TO_PACKET_META:
10791         case PTR_TO_PACKET:
10792                 if (rcur->type != rold->type)
10793                         return false;
10794                 /* We must have at least as much range as the old ptr
10795                  * did, so that any accesses which were safe before are
10796                  * still safe.  This is true even if old range < old off,
10797                  * since someone could have accessed through (ptr - k), or
10798                  * even done ptr -= k in a register, to get a safe access.
10799                  */
10800                 if (rold->range > rcur->range)
10801                         return false;
10802                 /* If the offsets don't match, we can't trust our alignment;
10803                  * nor can we be sure that we won't fall out of range.
10804                  */
10805                 if (rold->off != rcur->off)
10806                         return false;
10807                 /* id relations must be preserved */
10808                 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
10809                         return false;
10810                 /* new val must satisfy old val knowledge */
10811                 return range_within(rold, rcur) &&
10812                        tnum_in(rold->var_off, rcur->var_off);
10813         case PTR_TO_CTX:
10814         case CONST_PTR_TO_MAP:
10815         case PTR_TO_PACKET_END:
10816         case PTR_TO_FLOW_KEYS:
10817         case PTR_TO_SOCKET:
10818         case PTR_TO_SOCK_COMMON:
10819         case PTR_TO_TCP_SOCK:
10820         case PTR_TO_XDP_SOCK:
10821                 /* Only valid matches are exact, which memcmp() above
10822                  * would have accepted
10823                  */
10824         default:
10825                 /* Don't know what's going on, just say it's not safe */
10826                 return false;
10827         }
10828
10829         /* Shouldn't get here; if we do, say it's not safe */
10830         WARN_ON_ONCE(1);
10831         return false;
10832 }
10833
10834 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
10835                       struct bpf_func_state *cur, struct bpf_id_pair *idmap)
10836 {
10837         int i, spi;
10838
10839         /* walk slots of the explored stack and ignore any additional
10840          * slots in the current stack, since explored(safe) state
10841          * didn't use them
10842          */
10843         for (i = 0; i < old->allocated_stack; i++) {
10844                 spi = i / BPF_REG_SIZE;
10845
10846                 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
10847                         i += BPF_REG_SIZE - 1;
10848                         /* explored state didn't use this */
10849                         continue;
10850                 }
10851
10852                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
10853                         continue;
10854
10855                 /* explored stack has more populated slots than current stack
10856                  * and these slots were used
10857                  */
10858                 if (i >= cur->allocated_stack)
10859                         return false;
10860
10861                 /* if old state was safe with misc data in the stack
10862                  * it will be safe with zero-initialized stack.
10863                  * The opposite is not true
10864                  */
10865                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
10866                     cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
10867                         continue;
10868                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
10869                     cur->stack[spi].slot_type[i % BPF_REG_SIZE])
10870                         /* Ex: old explored (safe) state has STACK_SPILL in
10871                          * this stack slot, but current has STACK_MISC ->
10872                          * this verifier states are not equivalent,
10873                          * return false to continue verification of this path
10874                          */
10875                         return false;
10876                 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
10877                         continue;
10878                 if (!is_spilled_reg(&old->stack[spi]))
10879                         continue;
10880                 if (!regsafe(env, &old->stack[spi].spilled_ptr,
10881                              &cur->stack[spi].spilled_ptr, idmap))
10882                         /* when explored and current stack slot are both storing
10883                          * spilled registers, check that stored pointers types
10884                          * are the same as well.
10885                          * Ex: explored safe path could have stored
10886                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
10887                          * but current path has stored:
10888                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
10889                          * such verifier states are not equivalent.
10890                          * return false to continue verification of this path
10891                          */
10892                         return false;
10893         }
10894         return true;
10895 }
10896
10897 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
10898 {
10899         if (old->acquired_refs != cur->acquired_refs)
10900                 return false;
10901         return !memcmp(old->refs, cur->refs,
10902                        sizeof(*old->refs) * old->acquired_refs);
10903 }
10904
10905 /* compare two verifier states
10906  *
10907  * all states stored in state_list are known to be valid, since
10908  * verifier reached 'bpf_exit' instruction through them
10909  *
10910  * this function is called when verifier exploring different branches of
10911  * execution popped from the state stack. If it sees an old state that has
10912  * more strict register state and more strict stack state then this execution
10913  * branch doesn't need to be explored further, since verifier already
10914  * concluded that more strict state leads to valid finish.
10915  *
10916  * Therefore two states are equivalent if register state is more conservative
10917  * and explored stack state is more conservative than the current one.
10918  * Example:
10919  *       explored                   current
10920  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
10921  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
10922  *
10923  * In other words if current stack state (one being explored) has more
10924  * valid slots than old one that already passed validation, it means
10925  * the verifier can stop exploring and conclude that current state is valid too
10926  *
10927  * Similarly with registers. If explored state has register type as invalid
10928  * whereas register type in current state is meaningful, it means that
10929  * the current state will reach 'bpf_exit' instruction safely
10930  */
10931 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
10932                               struct bpf_func_state *cur)
10933 {
10934         int i;
10935
10936         memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
10937         for (i = 0; i < MAX_BPF_REG; i++)
10938                 if (!regsafe(env, &old->regs[i], &cur->regs[i],
10939                              env->idmap_scratch))
10940                         return false;
10941
10942         if (!stacksafe(env, old, cur, env->idmap_scratch))
10943                 return false;
10944
10945         if (!refsafe(old, cur))
10946                 return false;
10947
10948         return true;
10949 }
10950
10951 static bool states_equal(struct bpf_verifier_env *env,
10952                          struct bpf_verifier_state *old,
10953                          struct bpf_verifier_state *cur)
10954 {
10955         int i;
10956
10957         if (old->curframe != cur->curframe)
10958                 return false;
10959
10960         /* Verification state from speculative execution simulation
10961          * must never prune a non-speculative execution one.
10962          */
10963         if (old->speculative && !cur->speculative)
10964                 return false;
10965
10966         if (old->active_spin_lock != cur->active_spin_lock)
10967                 return false;
10968
10969         /* for states to be equal callsites have to be the same
10970          * and all frame states need to be equivalent
10971          */
10972         for (i = 0; i <= old->curframe; i++) {
10973                 if (old->frame[i]->callsite != cur->frame[i]->callsite)
10974                         return false;
10975                 if (!func_states_equal(env, old->frame[i], cur->frame[i]))
10976                         return false;
10977         }
10978         return true;
10979 }
10980
10981 /* Return 0 if no propagation happened. Return negative error code if error
10982  * happened. Otherwise, return the propagated bit.
10983  */
10984 static int propagate_liveness_reg(struct bpf_verifier_env *env,
10985                                   struct bpf_reg_state *reg,
10986                                   struct bpf_reg_state *parent_reg)
10987 {
10988         u8 parent_flag = parent_reg->live & REG_LIVE_READ;
10989         u8 flag = reg->live & REG_LIVE_READ;
10990         int err;
10991
10992         /* When comes here, read flags of PARENT_REG or REG could be any of
10993          * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
10994          * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
10995          */
10996         if (parent_flag == REG_LIVE_READ64 ||
10997             /* Or if there is no read flag from REG. */
10998             !flag ||
10999             /* Or if the read flag from REG is the same as PARENT_REG. */
11000             parent_flag == flag)
11001                 return 0;
11002
11003         err = mark_reg_read(env, reg, parent_reg, flag);
11004         if (err)
11005                 return err;
11006
11007         return flag;
11008 }
11009
11010 /* A write screens off any subsequent reads; but write marks come from the
11011  * straight-line code between a state and its parent.  When we arrive at an
11012  * equivalent state (jump target or such) we didn't arrive by the straight-line
11013  * code, so read marks in the state must propagate to the parent regardless
11014  * of the state's write marks. That's what 'parent == state->parent' comparison
11015  * in mark_reg_read() is for.
11016  */
11017 static int propagate_liveness(struct bpf_verifier_env *env,
11018                               const struct bpf_verifier_state *vstate,
11019                               struct bpf_verifier_state *vparent)
11020 {
11021         struct bpf_reg_state *state_reg, *parent_reg;
11022         struct bpf_func_state *state, *parent;
11023         int i, frame, err = 0;
11024
11025         if (vparent->curframe != vstate->curframe) {
11026                 WARN(1, "propagate_live: parent frame %d current frame %d\n",
11027                      vparent->curframe, vstate->curframe);
11028                 return -EFAULT;
11029         }
11030         /* Propagate read liveness of registers... */
11031         BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
11032         for (frame = 0; frame <= vstate->curframe; frame++) {
11033                 parent = vparent->frame[frame];
11034                 state = vstate->frame[frame];
11035                 parent_reg = parent->regs;
11036                 state_reg = state->regs;
11037                 /* We don't need to worry about FP liveness, it's read-only */
11038                 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
11039                         err = propagate_liveness_reg(env, &state_reg[i],
11040                                                      &parent_reg[i]);
11041                         if (err < 0)
11042                                 return err;
11043                         if (err == REG_LIVE_READ64)
11044                                 mark_insn_zext(env, &parent_reg[i]);
11045                 }
11046
11047                 /* Propagate stack slots. */
11048                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
11049                             i < parent->allocated_stack / BPF_REG_SIZE; i++) {
11050                         parent_reg = &parent->stack[i].spilled_ptr;
11051                         state_reg = &state->stack[i].spilled_ptr;
11052                         err = propagate_liveness_reg(env, state_reg,
11053                                                      parent_reg);
11054                         if (err < 0)
11055                                 return err;
11056                 }
11057         }
11058         return 0;
11059 }
11060
11061 /* find precise scalars in the previous equivalent state and
11062  * propagate them into the current state
11063  */
11064 static int propagate_precision(struct bpf_verifier_env *env,
11065                                const struct bpf_verifier_state *old)
11066 {
11067         struct bpf_reg_state *state_reg;
11068         struct bpf_func_state *state;
11069         int i, err = 0;
11070
11071         state = old->frame[old->curframe];
11072         state_reg = state->regs;
11073         for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
11074                 if (state_reg->type != SCALAR_VALUE ||
11075                     !state_reg->precise)
11076                         continue;
11077                 if (env->log.level & BPF_LOG_LEVEL2)
11078                         verbose(env, "propagating r%d\n", i);
11079                 err = mark_chain_precision(env, i);
11080                 if (err < 0)
11081                         return err;
11082         }
11083
11084         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
11085                 if (!is_spilled_reg(&state->stack[i]))
11086                         continue;
11087                 state_reg = &state->stack[i].spilled_ptr;
11088                 if (state_reg->type != SCALAR_VALUE ||
11089                     !state_reg->precise)
11090                         continue;
11091                 if (env->log.level & BPF_LOG_LEVEL2)
11092                         verbose(env, "propagating fp%d\n",
11093                                 (-i - 1) * BPF_REG_SIZE);
11094                 err = mark_chain_precision_stack(env, i);
11095                 if (err < 0)
11096                         return err;
11097         }
11098         return 0;
11099 }
11100
11101 static bool states_maybe_looping(struct bpf_verifier_state *old,
11102                                  struct bpf_verifier_state *cur)
11103 {
11104         struct bpf_func_state *fold, *fcur;
11105         int i, fr = cur->curframe;
11106
11107         if (old->curframe != fr)
11108                 return false;
11109
11110         fold = old->frame[fr];
11111         fcur = cur->frame[fr];
11112         for (i = 0; i < MAX_BPF_REG; i++)
11113                 if (memcmp(&fold->regs[i], &fcur->regs[i],
11114                            offsetof(struct bpf_reg_state, parent)))
11115                         return false;
11116         return true;
11117 }
11118
11119
11120 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
11121 {
11122         struct bpf_verifier_state_list *new_sl;
11123         struct bpf_verifier_state_list *sl, **pprev;
11124         struct bpf_verifier_state *cur = env->cur_state, *new;
11125         int i, j, err, states_cnt = 0;
11126         bool add_new_state = env->test_state_freq ? true : false;
11127
11128         cur->last_insn_idx = env->prev_insn_idx;
11129         if (!env->insn_aux_data[insn_idx].prune_point)
11130                 /* this 'insn_idx' instruction wasn't marked, so we will not
11131                  * be doing state search here
11132                  */
11133                 return 0;
11134
11135         /* bpf progs typically have pruning point every 4 instructions
11136          * http://vger.kernel.org/bpfconf2019.html#session-1
11137          * Do not add new state for future pruning if the verifier hasn't seen
11138          * at least 2 jumps and at least 8 instructions.
11139          * This heuristics helps decrease 'total_states' and 'peak_states' metric.
11140          * In tests that amounts to up to 50% reduction into total verifier
11141          * memory consumption and 20% verifier time speedup.
11142          */
11143         if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
11144             env->insn_processed - env->prev_insn_processed >= 8)
11145                 add_new_state = true;
11146
11147         pprev = explored_state(env, insn_idx);
11148         sl = *pprev;
11149
11150         clean_live_states(env, insn_idx, cur);
11151
11152         while (sl) {
11153                 states_cnt++;
11154                 if (sl->state.insn_idx != insn_idx)
11155                         goto next;
11156
11157                 if (sl->state.branches) {
11158                         struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
11159
11160                         if (frame->in_async_callback_fn &&
11161                             frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
11162                                 /* Different async_entry_cnt means that the verifier is
11163                                  * processing another entry into async callback.
11164                                  * Seeing the same state is not an indication of infinite
11165                                  * loop or infinite recursion.
11166                                  * But finding the same state doesn't mean that it's safe
11167                                  * to stop processing the current state. The previous state
11168                                  * hasn't yet reached bpf_exit, since state.branches > 0.
11169                                  * Checking in_async_callback_fn alone is not enough either.
11170                                  * Since the verifier still needs to catch infinite loops
11171                                  * inside async callbacks.
11172                                  */
11173                         } else if (states_maybe_looping(&sl->state, cur) &&
11174                                    states_equal(env, &sl->state, cur)) {
11175                                 verbose_linfo(env, insn_idx, "; ");
11176                                 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
11177                                 return -EINVAL;
11178                         }
11179                         /* if the verifier is processing a loop, avoid adding new state
11180                          * too often, since different loop iterations have distinct
11181                          * states and may not help future pruning.
11182                          * This threshold shouldn't be too low to make sure that
11183                          * a loop with large bound will be rejected quickly.
11184                          * The most abusive loop will be:
11185                          * r1 += 1
11186                          * if r1 < 1000000 goto pc-2
11187                          * 1M insn_procssed limit / 100 == 10k peak states.
11188                          * This threshold shouldn't be too high either, since states
11189                          * at the end of the loop are likely to be useful in pruning.
11190                          */
11191                         if (env->jmps_processed - env->prev_jmps_processed < 20 &&
11192                             env->insn_processed - env->prev_insn_processed < 100)
11193                                 add_new_state = false;
11194                         goto miss;
11195                 }
11196                 if (states_equal(env, &sl->state, cur)) {
11197                         sl->hit_cnt++;
11198                         /* reached equivalent register/stack state,
11199                          * prune the search.
11200                          * Registers read by the continuation are read by us.
11201                          * If we have any write marks in env->cur_state, they
11202                          * will prevent corresponding reads in the continuation
11203                          * from reaching our parent (an explored_state).  Our
11204                          * own state will get the read marks recorded, but
11205                          * they'll be immediately forgotten as we're pruning
11206                          * this state and will pop a new one.
11207                          */
11208                         err = propagate_liveness(env, &sl->state, cur);
11209
11210                         /* if previous state reached the exit with precision and
11211                          * current state is equivalent to it (except precsion marks)
11212                          * the precision needs to be propagated back in
11213                          * the current state.
11214                          */
11215                         err = err ? : push_jmp_history(env, cur);
11216                         err = err ? : propagate_precision(env, &sl->state);
11217                         if (err)
11218                                 return err;
11219                         return 1;
11220                 }
11221 miss:
11222                 /* when new state is not going to be added do not increase miss count.
11223                  * Otherwise several loop iterations will remove the state
11224                  * recorded earlier. The goal of these heuristics is to have
11225                  * states from some iterations of the loop (some in the beginning
11226                  * and some at the end) to help pruning.
11227                  */
11228                 if (add_new_state)
11229                         sl->miss_cnt++;
11230                 /* heuristic to determine whether this state is beneficial
11231                  * to keep checking from state equivalence point of view.
11232                  * Higher numbers increase max_states_per_insn and verification time,
11233                  * but do not meaningfully decrease insn_processed.
11234                  */
11235                 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
11236                         /* the state is unlikely to be useful. Remove it to
11237                          * speed up verification
11238                          */
11239                         *pprev = sl->next;
11240                         if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
11241                                 u32 br = sl->state.branches;
11242
11243                                 WARN_ONCE(br,
11244                                           "BUG live_done but branches_to_explore %d\n",
11245                                           br);
11246                                 free_verifier_state(&sl->state, false);
11247                                 kfree(sl);
11248                                 env->peak_states--;
11249                         } else {
11250                                 /* cannot free this state, since parentage chain may
11251                                  * walk it later. Add it for free_list instead to
11252                                  * be freed at the end of verification
11253                                  */
11254                                 sl->next = env->free_list;
11255                                 env->free_list = sl;
11256                         }
11257                         sl = *pprev;
11258                         continue;
11259                 }
11260 next:
11261                 pprev = &sl->next;
11262                 sl = *pprev;
11263         }
11264
11265         if (env->max_states_per_insn < states_cnt)
11266                 env->max_states_per_insn = states_cnt;
11267
11268         if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
11269                 return push_jmp_history(env, cur);
11270
11271         if (!add_new_state)
11272                 return push_jmp_history(env, cur);
11273
11274         /* There were no equivalent states, remember the current one.
11275          * Technically the current state is not proven to be safe yet,
11276          * but it will either reach outer most bpf_exit (which means it's safe)
11277          * or it will be rejected. When there are no loops the verifier won't be
11278          * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
11279          * again on the way to bpf_exit.
11280          * When looping the sl->state.branches will be > 0 and this state
11281          * will not be considered for equivalence until branches == 0.
11282          */
11283         new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
11284         if (!new_sl)
11285                 return -ENOMEM;
11286         env->total_states++;
11287         env->peak_states++;
11288         env->prev_jmps_processed = env->jmps_processed;
11289         env->prev_insn_processed = env->insn_processed;
11290
11291         /* add new state to the head of linked list */
11292         new = &new_sl->state;
11293         err = copy_verifier_state(new, cur);
11294         if (err) {
11295                 free_verifier_state(new, false);
11296                 kfree(new_sl);
11297                 return err;
11298         }
11299         new->insn_idx = insn_idx;
11300         WARN_ONCE(new->branches != 1,
11301                   "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
11302
11303         cur->parent = new;
11304         cur->first_insn_idx = insn_idx;
11305         clear_jmp_history(cur);
11306         new_sl->next = *explored_state(env, insn_idx);
11307         *explored_state(env, insn_idx) = new_sl;
11308         /* connect new state to parentage chain. Current frame needs all
11309          * registers connected. Only r6 - r9 of the callers are alive (pushed
11310          * to the stack implicitly by JITs) so in callers' frames connect just
11311          * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
11312          * the state of the call instruction (with WRITTEN set), and r0 comes
11313          * from callee with its full parentage chain, anyway.
11314          */
11315         /* clear write marks in current state: the writes we did are not writes
11316          * our child did, so they don't screen off its reads from us.
11317          * (There are no read marks in current state, because reads always mark
11318          * their parent and current state never has children yet.  Only
11319          * explored_states can get read marks.)
11320          */
11321         for (j = 0; j <= cur->curframe; j++) {
11322                 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
11323                         cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
11324                 for (i = 0; i < BPF_REG_FP; i++)
11325                         cur->frame[j]->regs[i].live = REG_LIVE_NONE;
11326         }
11327
11328         /* all stack frames are accessible from callee, clear them all */
11329         for (j = 0; j <= cur->curframe; j++) {
11330                 struct bpf_func_state *frame = cur->frame[j];
11331                 struct bpf_func_state *newframe = new->frame[j];
11332
11333                 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
11334                         frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
11335                         frame->stack[i].spilled_ptr.parent =
11336                                                 &newframe->stack[i].spilled_ptr;
11337                 }
11338         }
11339         return 0;
11340 }
11341
11342 /* Return true if it's OK to have the same insn return a different type. */
11343 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
11344 {
11345         switch (base_type(type)) {
11346         case PTR_TO_CTX:
11347         case PTR_TO_SOCKET:
11348         case PTR_TO_SOCK_COMMON:
11349         case PTR_TO_TCP_SOCK:
11350         case PTR_TO_XDP_SOCK:
11351         case PTR_TO_BTF_ID:
11352                 return false;
11353         default:
11354                 return true;
11355         }
11356 }
11357
11358 /* If an instruction was previously used with particular pointer types, then we
11359  * need to be careful to avoid cases such as the below, where it may be ok
11360  * for one branch accessing the pointer, but not ok for the other branch:
11361  *
11362  * R1 = sock_ptr
11363  * goto X;
11364  * ...
11365  * R1 = some_other_valid_ptr;
11366  * goto X;
11367  * ...
11368  * R2 = *(u32 *)(R1 + 0);
11369  */
11370 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
11371 {
11372         return src != prev && (!reg_type_mismatch_ok(src) ||
11373                                !reg_type_mismatch_ok(prev));
11374 }
11375
11376 static int do_check(struct bpf_verifier_env *env)
11377 {
11378         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
11379         struct bpf_verifier_state *state = env->cur_state;
11380         struct bpf_insn *insns = env->prog->insnsi;
11381         struct bpf_reg_state *regs;
11382         int insn_cnt = env->prog->len;
11383         bool do_print_state = false;
11384         int prev_insn_idx = -1;
11385
11386         for (;;) {
11387                 struct bpf_insn *insn;
11388                 u8 class;
11389                 int err;
11390
11391                 env->prev_insn_idx = prev_insn_idx;
11392                 if (env->insn_idx >= insn_cnt) {
11393                         verbose(env, "invalid insn idx %d insn_cnt %d\n",
11394                                 env->insn_idx, insn_cnt);
11395                         return -EFAULT;
11396                 }
11397
11398                 insn = &insns[env->insn_idx];
11399                 class = BPF_CLASS(insn->code);
11400
11401                 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
11402                         verbose(env,
11403                                 "BPF program is too large. Processed %d insn\n",
11404                                 env->insn_processed);
11405                         return -E2BIG;
11406                 }
11407
11408                 err = is_state_visited(env, env->insn_idx);
11409                 if (err < 0)
11410                         return err;
11411                 if (err == 1) {
11412                         /* found equivalent state, can prune the search */
11413                         if (env->log.level & BPF_LOG_LEVEL) {
11414                                 if (do_print_state)
11415                                         verbose(env, "\nfrom %d to %d%s: safe\n",
11416                                                 env->prev_insn_idx, env->insn_idx,
11417                                                 env->cur_state->speculative ?
11418                                                 " (speculative execution)" : "");
11419                                 else
11420                                         verbose(env, "%d: safe\n", env->insn_idx);
11421                         }
11422                         goto process_bpf_exit;
11423                 }
11424
11425                 if (signal_pending(current))
11426                         return -EAGAIN;
11427
11428                 if (need_resched())
11429                         cond_resched();
11430
11431                 if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
11432                         verbose(env, "\nfrom %d to %d%s:",
11433                                 env->prev_insn_idx, env->insn_idx,
11434                                 env->cur_state->speculative ?
11435                                 " (speculative execution)" : "");
11436                         print_verifier_state(env, state->frame[state->curframe], true);
11437                         do_print_state = false;
11438                 }
11439
11440                 if (env->log.level & BPF_LOG_LEVEL) {
11441                         const struct bpf_insn_cbs cbs = {
11442                                 .cb_call        = disasm_kfunc_name,
11443                                 .cb_print       = verbose,
11444                                 .private_data   = env,
11445                         };
11446
11447                         if (verifier_state_scratched(env))
11448                                 print_insn_state(env, state->frame[state->curframe]);
11449
11450                         verbose_linfo(env, env->insn_idx, "; ");
11451                         env->prev_log_len = env->log.len_used;
11452                         verbose(env, "%d: ", env->insn_idx);
11453                         print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
11454                         env->prev_insn_print_len = env->log.len_used - env->prev_log_len;
11455                         env->prev_log_len = env->log.len_used;
11456                 }
11457
11458                 if (bpf_prog_is_dev_bound(env->prog->aux)) {
11459                         err = bpf_prog_offload_verify_insn(env, env->insn_idx,
11460                                                            env->prev_insn_idx);
11461                         if (err)
11462                                 return err;
11463                 }
11464
11465                 regs = cur_regs(env);
11466                 sanitize_mark_insn_seen(env);
11467                 prev_insn_idx = env->insn_idx;
11468
11469                 if (class == BPF_ALU || class == BPF_ALU64) {
11470                         err = check_alu_op(env, insn);
11471                         if (err)
11472                                 return err;
11473
11474                 } else if (class == BPF_LDX) {
11475                         enum bpf_reg_type *prev_src_type, src_reg_type;
11476
11477                         /* check for reserved fields is already done */
11478
11479                         /* check src operand */
11480                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
11481                         if (err)
11482                                 return err;
11483
11484                         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
11485                         if (err)
11486                                 return err;
11487
11488                         src_reg_type = regs[insn->src_reg].type;
11489
11490                         /* check that memory (src_reg + off) is readable,
11491                          * the state of dst_reg will be updated by this func
11492                          */
11493                         err = check_mem_access(env, env->insn_idx, insn->src_reg,
11494                                                insn->off, BPF_SIZE(insn->code),
11495                                                BPF_READ, insn->dst_reg, false);
11496                         if (err)
11497                                 return err;
11498
11499                         prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
11500
11501                         if (*prev_src_type == NOT_INIT) {
11502                                 /* saw a valid insn
11503                                  * dst_reg = *(u32 *)(src_reg + off)
11504                                  * save type to validate intersecting paths
11505                                  */
11506                                 *prev_src_type = src_reg_type;
11507
11508                         } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
11509                                 /* ABuser program is trying to use the same insn
11510                                  * dst_reg = *(u32*) (src_reg + off)
11511                                  * with different pointer types:
11512                                  * src_reg == ctx in one branch and
11513                                  * src_reg == stack|map in some other branch.
11514                                  * Reject it.
11515                                  */
11516                                 verbose(env, "same insn cannot be used with different pointers\n");
11517                                 return -EINVAL;
11518                         }
11519
11520                 } else if (class == BPF_STX) {
11521                         enum bpf_reg_type *prev_dst_type, dst_reg_type;
11522
11523                         if (BPF_MODE(insn->code) == BPF_ATOMIC) {
11524                                 err = check_atomic(env, env->insn_idx, insn);
11525                                 if (err)
11526                                         return err;
11527                                 env->insn_idx++;
11528                                 continue;
11529                         }
11530
11531                         if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
11532                                 verbose(env, "BPF_STX uses reserved fields\n");
11533                                 return -EINVAL;
11534                         }
11535
11536                         /* check src1 operand */
11537                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
11538                         if (err)
11539                                 return err;
11540                         /* check src2 operand */
11541                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11542                         if (err)
11543                                 return err;
11544
11545                         dst_reg_type = regs[insn->dst_reg].type;
11546
11547                         /* check that memory (dst_reg + off) is writeable */
11548                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
11549                                                insn->off, BPF_SIZE(insn->code),
11550                                                BPF_WRITE, insn->src_reg, false);
11551                         if (err)
11552                                 return err;
11553
11554                         prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
11555
11556                         if (*prev_dst_type == NOT_INIT) {
11557                                 *prev_dst_type = dst_reg_type;
11558                         } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
11559                                 verbose(env, "same insn cannot be used with different pointers\n");
11560                                 return -EINVAL;
11561                         }
11562
11563                 } else if (class == BPF_ST) {
11564                         if (BPF_MODE(insn->code) != BPF_MEM ||
11565                             insn->src_reg != BPF_REG_0) {
11566                                 verbose(env, "BPF_ST uses reserved fields\n");
11567                                 return -EINVAL;
11568                         }
11569                         /* check src operand */
11570                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11571                         if (err)
11572                                 return err;
11573
11574                         if (is_ctx_reg(env, insn->dst_reg)) {
11575                                 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
11576                                         insn->dst_reg,
11577                                         reg_type_str(env, reg_state(env, insn->dst_reg)->type));
11578                                 return -EACCES;
11579                         }
11580
11581                         /* check that memory (dst_reg + off) is writeable */
11582                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
11583                                                insn->off, BPF_SIZE(insn->code),
11584                                                BPF_WRITE, -1, false);
11585                         if (err)
11586                                 return err;
11587
11588                 } else if (class == BPF_JMP || class == BPF_JMP32) {
11589                         u8 opcode = BPF_OP(insn->code);
11590
11591                         env->jmps_processed++;
11592                         if (opcode == BPF_CALL) {
11593                                 if (BPF_SRC(insn->code) != BPF_K ||
11594                                     (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
11595                                      && insn->off != 0) ||
11596                                     (insn->src_reg != BPF_REG_0 &&
11597                                      insn->src_reg != BPF_PSEUDO_CALL &&
11598                                      insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
11599                                     insn->dst_reg != BPF_REG_0 ||
11600                                     class == BPF_JMP32) {
11601                                         verbose(env, "BPF_CALL uses reserved fields\n");
11602                                         return -EINVAL;
11603                                 }
11604
11605                                 if (env->cur_state->active_spin_lock &&
11606                                     (insn->src_reg == BPF_PSEUDO_CALL ||
11607                                      insn->imm != BPF_FUNC_spin_unlock)) {
11608                                         verbose(env, "function calls are not allowed while holding a lock\n");
11609                                         return -EINVAL;
11610                                 }
11611                                 if (insn->src_reg == BPF_PSEUDO_CALL)
11612                                         err = check_func_call(env, insn, &env->insn_idx);
11613                                 else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
11614                                         err = check_kfunc_call(env, insn, &env->insn_idx);
11615                                 else
11616                                         err = check_helper_call(env, insn, &env->insn_idx);
11617                                 if (err)
11618                                         return err;
11619                         } else if (opcode == BPF_JA) {
11620                                 if (BPF_SRC(insn->code) != BPF_K ||
11621                                     insn->imm != 0 ||
11622                                     insn->src_reg != BPF_REG_0 ||
11623                                     insn->dst_reg != BPF_REG_0 ||
11624                                     class == BPF_JMP32) {
11625                                         verbose(env, "BPF_JA uses reserved fields\n");
11626                                         return -EINVAL;
11627                                 }
11628
11629                                 env->insn_idx += insn->off + 1;
11630                                 continue;
11631
11632                         } else if (opcode == BPF_EXIT) {
11633                                 if (BPF_SRC(insn->code) != BPF_K ||
11634                                     insn->imm != 0 ||
11635                                     insn->src_reg != BPF_REG_0 ||
11636                                     insn->dst_reg != BPF_REG_0 ||
11637                                     class == BPF_JMP32) {
11638                                         verbose(env, "BPF_EXIT uses reserved fields\n");
11639                                         return -EINVAL;
11640                                 }
11641
11642                                 if (env->cur_state->active_spin_lock) {
11643                                         verbose(env, "bpf_spin_unlock is missing\n");
11644                                         return -EINVAL;
11645                                 }
11646
11647                                 if (state->curframe) {
11648                                         /* exit from nested function */
11649                                         err = prepare_func_exit(env, &env->insn_idx);
11650                                         if (err)
11651                                                 return err;
11652                                         do_print_state = true;
11653                                         continue;
11654                                 }
11655
11656                                 err = check_reference_leak(env);
11657                                 if (err)
11658                                         return err;
11659
11660                                 err = check_return_code(env);
11661                                 if (err)
11662                                         return err;
11663 process_bpf_exit:
11664                                 mark_verifier_state_scratched(env);
11665                                 update_branch_counts(env, env->cur_state);
11666                                 err = pop_stack(env, &prev_insn_idx,
11667                                                 &env->insn_idx, pop_log);
11668                                 if (err < 0) {
11669                                         if (err != -ENOENT)
11670                                                 return err;
11671                                         break;
11672                                 } else {
11673                                         do_print_state = true;
11674                                         continue;
11675                                 }
11676                         } else {
11677                                 err = check_cond_jmp_op(env, insn, &env->insn_idx);
11678                                 if (err)
11679                                         return err;
11680                         }
11681                 } else if (class == BPF_LD) {
11682                         u8 mode = BPF_MODE(insn->code);
11683
11684                         if (mode == BPF_ABS || mode == BPF_IND) {
11685                                 err = check_ld_abs(env, insn);
11686                                 if (err)
11687                                         return err;
11688
11689                         } else if (mode == BPF_IMM) {
11690                                 err = check_ld_imm(env, insn);
11691                                 if (err)
11692                                         return err;
11693
11694                                 env->insn_idx++;
11695                                 sanitize_mark_insn_seen(env);
11696                         } else {
11697                                 verbose(env, "invalid BPF_LD mode\n");
11698                                 return -EINVAL;
11699                         }
11700                 } else {
11701                         verbose(env, "unknown insn class %d\n", class);
11702                         return -EINVAL;
11703                 }
11704
11705                 env->insn_idx++;
11706         }
11707
11708         return 0;
11709 }
11710
11711 static int find_btf_percpu_datasec(struct btf *btf)
11712 {
11713         const struct btf_type *t;
11714         const char *tname;
11715         int i, n;
11716
11717         /*
11718          * Both vmlinux and module each have their own ".data..percpu"
11719          * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
11720          * types to look at only module's own BTF types.
11721          */
11722         n = btf_nr_types(btf);
11723         if (btf_is_module(btf))
11724                 i = btf_nr_types(btf_vmlinux);
11725         else
11726                 i = 1;
11727
11728         for(; i < n; i++) {
11729                 t = btf_type_by_id(btf, i);
11730                 if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
11731                         continue;
11732
11733                 tname = btf_name_by_offset(btf, t->name_off);
11734                 if (!strcmp(tname, ".data..percpu"))
11735                         return i;
11736         }
11737
11738         return -ENOENT;
11739 }
11740
11741 /* replace pseudo btf_id with kernel symbol address */
11742 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
11743                                struct bpf_insn *insn,
11744                                struct bpf_insn_aux_data *aux)
11745 {
11746         const struct btf_var_secinfo *vsi;
11747         const struct btf_type *datasec;
11748         struct btf_mod_pair *btf_mod;
11749         const struct btf_type *t;
11750         const char *sym_name;
11751         bool percpu = false;
11752         u32 type, id = insn->imm;
11753         struct btf *btf;
11754         s32 datasec_id;
11755         u64 addr;
11756         int i, btf_fd, err;
11757
11758         btf_fd = insn[1].imm;
11759         if (btf_fd) {
11760                 btf = btf_get_by_fd(btf_fd);
11761                 if (IS_ERR(btf)) {
11762                         verbose(env, "invalid module BTF object FD specified.\n");
11763                         return -EINVAL;
11764                 }
11765         } else {
11766                 if (!btf_vmlinux) {
11767                         verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
11768                         return -EINVAL;
11769                 }
11770                 btf = btf_vmlinux;
11771                 btf_get(btf);
11772         }
11773
11774         t = btf_type_by_id(btf, id);
11775         if (!t) {
11776                 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
11777                 err = -ENOENT;
11778                 goto err_put;
11779         }
11780
11781         if (!btf_type_is_var(t)) {
11782                 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
11783                 err = -EINVAL;
11784                 goto err_put;
11785         }
11786
11787         sym_name = btf_name_by_offset(btf, t->name_off);
11788         addr = kallsyms_lookup_name(sym_name);
11789         if (!addr) {
11790                 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
11791                         sym_name);
11792                 err = -ENOENT;
11793                 goto err_put;
11794         }
11795
11796         datasec_id = find_btf_percpu_datasec(btf);
11797         if (datasec_id > 0) {
11798                 datasec = btf_type_by_id(btf, datasec_id);
11799                 for_each_vsi(i, datasec, vsi) {
11800                         if (vsi->type == id) {
11801                                 percpu = true;
11802                                 break;
11803                         }
11804                 }
11805         }
11806
11807         insn[0].imm = (u32)addr;
11808         insn[1].imm = addr >> 32;
11809
11810         type = t->type;
11811         t = btf_type_skip_modifiers(btf, type, NULL);
11812         if (percpu) {
11813                 aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
11814                 aux->btf_var.btf = btf;
11815                 aux->btf_var.btf_id = type;
11816         } else if (!btf_type_is_struct(t)) {
11817                 const struct btf_type *ret;
11818                 const char *tname;
11819                 u32 tsize;
11820
11821                 /* resolve the type size of ksym. */
11822                 ret = btf_resolve_size(btf, t, &tsize);
11823                 if (IS_ERR(ret)) {
11824                         tname = btf_name_by_offset(btf, t->name_off);
11825                         verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
11826                                 tname, PTR_ERR(ret));
11827                         err = -EINVAL;
11828                         goto err_put;
11829                 }
11830                 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
11831                 aux->btf_var.mem_size = tsize;
11832         } else {
11833                 aux->btf_var.reg_type = PTR_TO_BTF_ID;
11834                 aux->btf_var.btf = btf;
11835                 aux->btf_var.btf_id = type;
11836         }
11837
11838         /* check whether we recorded this BTF (and maybe module) already */
11839         for (i = 0; i < env->used_btf_cnt; i++) {
11840                 if (env->used_btfs[i].btf == btf) {
11841                         btf_put(btf);
11842                         return 0;
11843                 }
11844         }
11845
11846         if (env->used_btf_cnt >= MAX_USED_BTFS) {
11847                 err = -E2BIG;
11848                 goto err_put;
11849         }
11850
11851         btf_mod = &env->used_btfs[env->used_btf_cnt];
11852         btf_mod->btf = btf;
11853         btf_mod->module = NULL;
11854
11855         /* if we reference variables from kernel module, bump its refcount */
11856         if (btf_is_module(btf)) {
11857                 btf_mod->module = btf_try_get_module(btf);
11858                 if (!btf_mod->module) {
11859                         err = -ENXIO;
11860                         goto err_put;
11861                 }
11862         }
11863
11864         env->used_btf_cnt++;
11865
11866         return 0;
11867 err_put:
11868         btf_put(btf);
11869         return err;
11870 }
11871
11872 static int check_map_prealloc(struct bpf_map *map)
11873 {
11874         return (map->map_type != BPF_MAP_TYPE_HASH &&
11875                 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
11876                 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
11877                 !(map->map_flags & BPF_F_NO_PREALLOC);
11878 }
11879
11880 static bool is_tracing_prog_type(enum bpf_prog_type type)
11881 {
11882         switch (type) {
11883         case BPF_PROG_TYPE_KPROBE:
11884         case BPF_PROG_TYPE_TRACEPOINT:
11885         case BPF_PROG_TYPE_PERF_EVENT:
11886         case BPF_PROG_TYPE_RAW_TRACEPOINT:
11887                 return true;
11888         default:
11889                 return false;
11890         }
11891 }
11892
11893 static bool is_preallocated_map(struct bpf_map *map)
11894 {
11895         if (!check_map_prealloc(map))
11896                 return false;
11897         if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
11898                 return false;
11899         return true;
11900 }
11901
11902 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
11903                                         struct bpf_map *map,
11904                                         struct bpf_prog *prog)
11905
11906 {
11907         enum bpf_prog_type prog_type = resolve_prog_type(prog);
11908         /*
11909          * Validate that trace type programs use preallocated hash maps.
11910          *
11911          * For programs attached to PERF events this is mandatory as the
11912          * perf NMI can hit any arbitrary code sequence.
11913          *
11914          * All other trace types using preallocated hash maps are unsafe as
11915          * well because tracepoint or kprobes can be inside locked regions
11916          * of the memory allocator or at a place where a recursion into the
11917          * memory allocator would see inconsistent state.
11918          *
11919          * On RT enabled kernels run-time allocation of all trace type
11920          * programs is strictly prohibited due to lock type constraints. On
11921          * !RT kernels it is allowed for backwards compatibility reasons for
11922          * now, but warnings are emitted so developers are made aware of
11923          * the unsafety and can fix their programs before this is enforced.
11924          */
11925         if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
11926                 if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
11927                         verbose(env, "perf_event programs can only use preallocated hash map\n");
11928                         return -EINVAL;
11929                 }
11930                 if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
11931                         verbose(env, "trace type programs can only use preallocated hash map\n");
11932                         return -EINVAL;
11933                 }
11934                 WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
11935                 verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
11936         }
11937
11938         if (map_value_has_spin_lock(map)) {
11939                 if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
11940                         verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
11941                         return -EINVAL;
11942                 }
11943
11944                 if (is_tracing_prog_type(prog_type)) {
11945                         verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
11946                         return -EINVAL;
11947                 }
11948
11949                 if (prog->aux->sleepable) {
11950                         verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n");
11951                         return -EINVAL;
11952                 }
11953         }
11954
11955         if (map_value_has_timer(map)) {
11956                 if (is_tracing_prog_type(prog_type)) {
11957                         verbose(env, "tracing progs cannot use bpf_timer yet\n");
11958                         return -EINVAL;
11959                 }
11960         }
11961
11962         if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
11963             !bpf_offload_prog_map_match(prog, map)) {
11964                 verbose(env, "offload device mismatch between prog and map\n");
11965                 return -EINVAL;
11966         }
11967
11968         if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
11969                 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
11970                 return -EINVAL;
11971         }
11972
11973         if (prog->aux->sleepable)
11974                 switch (map->map_type) {
11975                 case BPF_MAP_TYPE_HASH:
11976                 case BPF_MAP_TYPE_LRU_HASH:
11977                 case BPF_MAP_TYPE_ARRAY:
11978                 case BPF_MAP_TYPE_PERCPU_HASH:
11979                 case BPF_MAP_TYPE_PERCPU_ARRAY:
11980                 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
11981                 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
11982                 case BPF_MAP_TYPE_HASH_OF_MAPS:
11983                         if (!is_preallocated_map(map)) {
11984                                 verbose(env,
11985                                         "Sleepable programs can only use preallocated maps\n");
11986                                 return -EINVAL;
11987                         }
11988                         break;
11989                 case BPF_MAP_TYPE_RINGBUF:
11990                 case BPF_MAP_TYPE_INODE_STORAGE:
11991                 case BPF_MAP_TYPE_SK_STORAGE:
11992                 case BPF_MAP_TYPE_TASK_STORAGE:
11993                         break;
11994                 default:
11995                         verbose(env,
11996                                 "Sleepable programs can only use array, hash, and ringbuf maps\n");
11997                         return -EINVAL;
11998                 }
11999
12000         return 0;
12001 }
12002
12003 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
12004 {
12005         return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
12006                 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
12007 }
12008
12009 /* find and rewrite pseudo imm in ld_imm64 instructions:
12010  *
12011  * 1. if it accesses map FD, replace it with actual map pointer.
12012  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
12013  *
12014  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
12015  */
12016 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
12017 {
12018         struct bpf_insn *insn = env->prog->insnsi;
12019         int insn_cnt = env->prog->len;
12020         int i, j, err;
12021
12022         err = bpf_prog_calc_tag(env->prog);
12023         if (err)
12024                 return err;
12025
12026         for (i = 0; i < insn_cnt; i++, insn++) {
12027                 if (BPF_CLASS(insn->code) == BPF_LDX &&
12028                     (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
12029                         verbose(env, "BPF_LDX uses reserved fields\n");
12030                         return -EINVAL;
12031                 }
12032
12033                 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
12034                         struct bpf_insn_aux_data *aux;
12035                         struct bpf_map *map;
12036                         struct fd f;
12037                         u64 addr;
12038                         u32 fd;
12039
12040                         if (i == insn_cnt - 1 || insn[1].code != 0 ||
12041                             insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
12042                             insn[1].off != 0) {
12043                                 verbose(env, "invalid bpf_ld_imm64 insn\n");
12044                                 return -EINVAL;
12045                         }
12046
12047                         if (insn[0].src_reg == 0)
12048                                 /* valid generic load 64-bit imm */
12049                                 goto next_insn;
12050
12051                         if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
12052                                 aux = &env->insn_aux_data[i];
12053                                 err = check_pseudo_btf_id(env, insn, aux);
12054                                 if (err)
12055                                         return err;
12056                                 goto next_insn;
12057                         }
12058
12059                         if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
12060                                 aux = &env->insn_aux_data[i];
12061                                 aux->ptr_type = PTR_TO_FUNC;
12062                                 goto next_insn;
12063                         }
12064
12065                         /* In final convert_pseudo_ld_imm64() step, this is
12066                          * converted into regular 64-bit imm load insn.
12067                          */
12068                         switch (insn[0].src_reg) {
12069                         case BPF_PSEUDO_MAP_VALUE:
12070                         case BPF_PSEUDO_MAP_IDX_VALUE:
12071                                 break;
12072                         case BPF_PSEUDO_MAP_FD:
12073                         case BPF_PSEUDO_MAP_IDX:
12074                                 if (insn[1].imm == 0)
12075                                         break;
12076                                 fallthrough;
12077                         default:
12078                                 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
12079                                 return -EINVAL;
12080                         }
12081
12082                         switch (insn[0].src_reg) {
12083                         case BPF_PSEUDO_MAP_IDX_VALUE:
12084                         case BPF_PSEUDO_MAP_IDX:
12085                                 if (bpfptr_is_null(env->fd_array)) {
12086                                         verbose(env, "fd_idx without fd_array is invalid\n");
12087                                         return -EPROTO;
12088                                 }
12089                                 if (copy_from_bpfptr_offset(&fd, env->fd_array,
12090                                                             insn[0].imm * sizeof(fd),
12091                                                             sizeof(fd)))
12092                                         return -EFAULT;
12093                                 break;
12094                         default:
12095                                 fd = insn[0].imm;
12096                                 break;
12097                         }
12098
12099                         f = fdget(fd);
12100                         map = __bpf_map_get(f);
12101                         if (IS_ERR(map)) {
12102                                 verbose(env, "fd %d is not pointing to valid bpf_map\n",
12103                                         insn[0].imm);
12104                                 return PTR_ERR(map);
12105                         }
12106
12107                         err = check_map_prog_compatibility(env, map, env->prog);
12108                         if (err) {
12109                                 fdput(f);
12110                                 return err;
12111                         }
12112
12113                         aux = &env->insn_aux_data[i];
12114                         if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
12115                             insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
12116                                 addr = (unsigned long)map;
12117                         } else {
12118                                 u32 off = insn[1].imm;
12119
12120                                 if (off >= BPF_MAX_VAR_OFF) {
12121                                         verbose(env, "direct value offset of %u is not allowed\n", off);
12122                                         fdput(f);
12123                                         return -EINVAL;
12124                                 }
12125
12126                                 if (!map->ops->map_direct_value_addr) {
12127                                         verbose(env, "no direct value access support for this map type\n");
12128                                         fdput(f);
12129                                         return -EINVAL;
12130                                 }
12131
12132                                 err = map->ops->map_direct_value_addr(map, &addr, off);
12133                                 if (err) {
12134                                         verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
12135                                                 map->value_size, off);
12136                                         fdput(f);
12137                                         return err;
12138                                 }
12139
12140                                 aux->map_off = off;
12141                                 addr += off;
12142                         }
12143
12144                         insn[0].imm = (u32)addr;
12145                         insn[1].imm = addr >> 32;
12146
12147                         /* check whether we recorded this map already */
12148                         for (j = 0; j < env->used_map_cnt; j++) {
12149                                 if (env->used_maps[j] == map) {
12150                                         aux->map_index = j;
12151                                         fdput(f);
12152                                         goto next_insn;
12153                                 }
12154                         }
12155
12156                         if (env->used_map_cnt >= MAX_USED_MAPS) {
12157                                 fdput(f);
12158                                 return -E2BIG;
12159                         }
12160
12161                         /* hold the map. If the program is rejected by verifier,
12162                          * the map will be released by release_maps() or it
12163                          * will be used by the valid program until it's unloaded
12164                          * and all maps are released in free_used_maps()
12165                          */
12166                         bpf_map_inc(map);
12167
12168                         aux->map_index = env->used_map_cnt;
12169                         env->used_maps[env->used_map_cnt++] = map;
12170
12171                         if (bpf_map_is_cgroup_storage(map) &&
12172                             bpf_cgroup_storage_assign(env->prog->aux, map)) {
12173                                 verbose(env, "only one cgroup storage of each type is allowed\n");
12174                                 fdput(f);
12175                                 return -EBUSY;
12176                         }
12177
12178                         fdput(f);
12179 next_insn:
12180                         insn++;
12181                         i++;
12182                         continue;
12183                 }
12184
12185                 /* Basic sanity check before we invest more work here. */
12186                 if (!bpf_opcode_in_insntable(insn->code)) {
12187                         verbose(env, "unknown opcode %02x\n", insn->code);
12188                         return -EINVAL;
12189                 }
12190         }
12191
12192         /* now all pseudo BPF_LD_IMM64 instructions load valid
12193          * 'struct bpf_map *' into a register instead of user map_fd.
12194          * These pointers will be used later by verifier to validate map access.
12195          */
12196         return 0;
12197 }
12198
12199 /* drop refcnt of maps used by the rejected program */
12200 static void release_maps(struct bpf_verifier_env *env)
12201 {
12202         __bpf_free_used_maps(env->prog->aux, env->used_maps,
12203                              env->used_map_cnt);
12204 }
12205
12206 /* drop refcnt of maps used by the rejected program */
12207 static void release_btfs(struct bpf_verifier_env *env)
12208 {
12209         __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
12210                              env->used_btf_cnt);
12211 }
12212
12213 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
12214 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
12215 {
12216         struct bpf_insn *insn = env->prog->insnsi;
12217         int insn_cnt = env->prog->len;
12218         int i;
12219
12220         for (i = 0; i < insn_cnt; i++, insn++) {
12221                 if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
12222                         continue;
12223                 if (insn->src_reg == BPF_PSEUDO_FUNC)
12224                         continue;
12225                 insn->src_reg = 0;
12226         }
12227 }
12228
12229 /* single env->prog->insni[off] instruction was replaced with the range
12230  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
12231  * [0, off) and [off, end) to new locations, so the patched range stays zero
12232  */
12233 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
12234                                  struct bpf_insn_aux_data *new_data,
12235                                  struct bpf_prog *new_prog, u32 off, u32 cnt)
12236 {
12237         struct bpf_insn_aux_data *old_data = env->insn_aux_data;
12238         struct bpf_insn *insn = new_prog->insnsi;
12239         u32 old_seen = old_data[off].seen;
12240         u32 prog_len;
12241         int i;
12242
12243         /* aux info at OFF always needs adjustment, no matter fast path
12244          * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
12245          * original insn at old prog.
12246          */
12247         old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
12248
12249         if (cnt == 1)
12250                 return;
12251         prog_len = new_prog->len;
12252
12253         memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
12254         memcpy(new_data + off + cnt - 1, old_data + off,
12255                sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
12256         for (i = off; i < off + cnt - 1; i++) {
12257                 /* Expand insni[off]'s seen count to the patched range. */
12258                 new_data[i].seen = old_seen;
12259                 new_data[i].zext_dst = insn_has_def32(env, insn + i);
12260         }
12261         env->insn_aux_data = new_data;
12262         vfree(old_data);
12263 }
12264
12265 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
12266 {
12267         int i;
12268
12269         if (len == 1)
12270                 return;
12271         /* NOTE: fake 'exit' subprog should be updated as well. */
12272         for (i = 0; i <= env->subprog_cnt; i++) {
12273                 if (env->subprog_info[i].start <= off)
12274                         continue;
12275                 env->subprog_info[i].start += len - 1;
12276         }
12277 }
12278
12279 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
12280 {
12281         struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
12282         int i, sz = prog->aux->size_poke_tab;
12283         struct bpf_jit_poke_descriptor *desc;
12284
12285         for (i = 0; i < sz; i++) {
12286                 desc = &tab[i];
12287                 if (desc->insn_idx <= off)
12288                         continue;
12289                 desc->insn_idx += len - 1;
12290         }
12291 }
12292
12293 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
12294                                             const struct bpf_insn *patch, u32 len)
12295 {
12296         struct bpf_prog *new_prog;
12297         struct bpf_insn_aux_data *new_data = NULL;
12298
12299         if (len > 1) {
12300                 new_data = vzalloc(array_size(env->prog->len + len - 1,
12301                                               sizeof(struct bpf_insn_aux_data)));
12302                 if (!new_data)
12303                         return NULL;
12304         }
12305
12306         new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
12307         if (IS_ERR(new_prog)) {
12308                 if (PTR_ERR(new_prog) == -ERANGE)
12309                         verbose(env,
12310                                 "insn %d cannot be patched due to 16-bit range\n",
12311                                 env->insn_aux_data[off].orig_idx);
12312                 vfree(new_data);
12313                 return NULL;
12314         }
12315         adjust_insn_aux_data(env, new_data, new_prog, off, len);
12316         adjust_subprog_starts(env, off, len);
12317         adjust_poke_descs(new_prog, off, len);
12318         return new_prog;
12319 }
12320
12321 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
12322                                               u32 off, u32 cnt)
12323 {
12324         int i, j;
12325
12326         /* find first prog starting at or after off (first to remove) */
12327         for (i = 0; i < env->subprog_cnt; i++)
12328                 if (env->subprog_info[i].start >= off)
12329                         break;
12330         /* find first prog starting at or after off + cnt (first to stay) */
12331         for (j = i; j < env->subprog_cnt; j++)
12332                 if (env->subprog_info[j].start >= off + cnt)
12333                         break;
12334         /* if j doesn't start exactly at off + cnt, we are just removing
12335          * the front of previous prog
12336          */
12337         if (env->subprog_info[j].start != off + cnt)
12338                 j--;
12339
12340         if (j > i) {
12341                 struct bpf_prog_aux *aux = env->prog->aux;
12342                 int move;
12343
12344                 /* move fake 'exit' subprog as well */
12345                 move = env->subprog_cnt + 1 - j;
12346
12347                 memmove(env->subprog_info + i,
12348                         env->subprog_info + j,
12349                         sizeof(*env->subprog_info) * move);
12350                 env->subprog_cnt -= j - i;
12351
12352                 /* remove func_info */
12353                 if (aux->func_info) {
12354                         move = aux->func_info_cnt - j;
12355
12356                         memmove(aux->func_info + i,
12357                                 aux->func_info + j,
12358                                 sizeof(*aux->func_info) * move);
12359                         aux->func_info_cnt -= j - i;
12360                         /* func_info->insn_off is set after all code rewrites,
12361                          * in adjust_btf_func() - no need to adjust
12362                          */
12363                 }
12364         } else {
12365                 /* convert i from "first prog to remove" to "first to adjust" */
12366                 if (env->subprog_info[i].start == off)
12367                         i++;
12368         }
12369
12370         /* update fake 'exit' subprog as well */
12371         for (; i <= env->subprog_cnt; i++)
12372                 env->subprog_info[i].start -= cnt;
12373
12374         return 0;
12375 }
12376
12377 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
12378                                       u32 cnt)
12379 {
12380         struct bpf_prog *prog = env->prog;
12381         u32 i, l_off, l_cnt, nr_linfo;
12382         struct bpf_line_info *linfo;
12383
12384         nr_linfo = prog->aux->nr_linfo;
12385         if (!nr_linfo)
12386                 return 0;
12387
12388         linfo = prog->aux->linfo;
12389
12390         /* find first line info to remove, count lines to be removed */
12391         for (i = 0; i < nr_linfo; i++)
12392                 if (linfo[i].insn_off >= off)
12393                         break;
12394
12395         l_off = i;
12396         l_cnt = 0;
12397         for (; i < nr_linfo; i++)
12398                 if (linfo[i].insn_off < off + cnt)
12399                         l_cnt++;
12400                 else
12401                         break;
12402
12403         /* First live insn doesn't match first live linfo, it needs to "inherit"
12404          * last removed linfo.  prog is already modified, so prog->len == off
12405          * means no live instructions after (tail of the program was removed).
12406          */
12407         if (prog->len != off && l_cnt &&
12408             (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
12409                 l_cnt--;
12410                 linfo[--i].insn_off = off + cnt;
12411         }
12412
12413         /* remove the line info which refer to the removed instructions */
12414         if (l_cnt) {
12415                 memmove(linfo + l_off, linfo + i,
12416                         sizeof(*linfo) * (nr_linfo - i));
12417
12418                 prog->aux->nr_linfo -= l_cnt;
12419                 nr_linfo = prog->aux->nr_linfo;
12420         }
12421
12422         /* pull all linfo[i].insn_off >= off + cnt in by cnt */
12423         for (i = l_off; i < nr_linfo; i++)
12424                 linfo[i].insn_off -= cnt;
12425
12426         /* fix up all subprogs (incl. 'exit') which start >= off */
12427         for (i = 0; i <= env->subprog_cnt; i++)
12428                 if (env->subprog_info[i].linfo_idx > l_off) {
12429                         /* program may have started in the removed region but
12430                          * may not be fully removed
12431                          */
12432                         if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
12433                                 env->subprog_info[i].linfo_idx -= l_cnt;
12434                         else
12435                                 env->subprog_info[i].linfo_idx = l_off;
12436                 }
12437
12438         return 0;
12439 }
12440
12441 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
12442 {
12443         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12444         unsigned int orig_prog_len = env->prog->len;
12445         int err;
12446
12447         if (bpf_prog_is_dev_bound(env->prog->aux))
12448                 bpf_prog_offload_remove_insns(env, off, cnt);
12449
12450         err = bpf_remove_insns(env->prog, off, cnt);
12451         if (err)
12452                 return err;
12453
12454         err = adjust_subprog_starts_after_remove(env, off, cnt);
12455         if (err)
12456                 return err;
12457
12458         err = bpf_adj_linfo_after_remove(env, off, cnt);
12459         if (err)
12460                 return err;
12461
12462         memmove(aux_data + off, aux_data + off + cnt,
12463                 sizeof(*aux_data) * (orig_prog_len - off - cnt));
12464
12465         return 0;
12466 }
12467
12468 /* The verifier does more data flow analysis than llvm and will not
12469  * explore branches that are dead at run time. Malicious programs can
12470  * have dead code too. Therefore replace all dead at-run-time code
12471  * with 'ja -1'.
12472  *
12473  * Just nops are not optimal, e.g. if they would sit at the end of the
12474  * program and through another bug we would manage to jump there, then
12475  * we'd execute beyond program memory otherwise. Returning exception
12476  * code also wouldn't work since we can have subprogs where the dead
12477  * code could be located.
12478  */
12479 static void sanitize_dead_code(struct bpf_verifier_env *env)
12480 {
12481         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12482         struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
12483         struct bpf_insn *insn = env->prog->insnsi;
12484         const int insn_cnt = env->prog->len;
12485         int i;
12486
12487         for (i = 0; i < insn_cnt; i++) {
12488                 if (aux_data[i].seen)
12489                         continue;
12490                 memcpy(insn + i, &trap, sizeof(trap));
12491                 aux_data[i].zext_dst = false;
12492         }
12493 }
12494
12495 static bool insn_is_cond_jump(u8 code)
12496 {
12497         u8 op;
12498
12499         if (BPF_CLASS(code) == BPF_JMP32)
12500                 return true;
12501
12502         if (BPF_CLASS(code) != BPF_JMP)
12503                 return false;
12504
12505         op = BPF_OP(code);
12506         return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
12507 }
12508
12509 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
12510 {
12511         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12512         struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
12513         struct bpf_insn *insn = env->prog->insnsi;
12514         const int insn_cnt = env->prog->len;
12515         int i;
12516
12517         for (i = 0; i < insn_cnt; i++, insn++) {
12518                 if (!insn_is_cond_jump(insn->code))
12519                         continue;
12520
12521                 if (!aux_data[i + 1].seen)
12522                         ja.off = insn->off;
12523                 else if (!aux_data[i + 1 + insn->off].seen)
12524                         ja.off = 0;
12525                 else
12526                         continue;
12527
12528                 if (bpf_prog_is_dev_bound(env->prog->aux))
12529                         bpf_prog_offload_replace_insn(env, i, &ja);
12530
12531                 memcpy(insn, &ja, sizeof(ja));
12532         }
12533 }
12534
12535 static int opt_remove_dead_code(struct bpf_verifier_env *env)
12536 {
12537         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12538         int insn_cnt = env->prog->len;
12539         int i, err;
12540
12541         for (i = 0; i < insn_cnt; i++) {
12542                 int j;
12543
12544                 j = 0;
12545                 while (i + j < insn_cnt && !aux_data[i + j].seen)
12546                         j++;
12547                 if (!j)
12548                         continue;
12549
12550                 err = verifier_remove_insns(env, i, j);
12551                 if (err)
12552                         return err;
12553                 insn_cnt = env->prog->len;
12554         }
12555
12556         return 0;
12557 }
12558
12559 static int opt_remove_nops(struct bpf_verifier_env *env)
12560 {
12561         const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
12562         struct bpf_insn *insn = env->prog->insnsi;
12563         int insn_cnt = env->prog->len;
12564         int i, err;
12565
12566         for (i = 0; i < insn_cnt; i++) {
12567                 if (memcmp(&insn[i], &ja, sizeof(ja)))
12568                         continue;
12569
12570                 err = verifier_remove_insns(env, i, 1);
12571                 if (err)
12572                         return err;
12573                 insn_cnt--;
12574                 i--;
12575         }
12576
12577         return 0;
12578 }
12579
12580 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
12581                                          const union bpf_attr *attr)
12582 {
12583         struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
12584         struct bpf_insn_aux_data *aux = env->insn_aux_data;
12585         int i, patch_len, delta = 0, len = env->prog->len;
12586         struct bpf_insn *insns = env->prog->insnsi;
12587         struct bpf_prog *new_prog;
12588         bool rnd_hi32;
12589
12590         rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
12591         zext_patch[1] = BPF_ZEXT_REG(0);
12592         rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
12593         rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
12594         rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
12595         for (i = 0; i < len; i++) {
12596                 int adj_idx = i + delta;
12597                 struct bpf_insn insn;
12598                 int load_reg;
12599
12600                 insn = insns[adj_idx];
12601                 load_reg = insn_def_regno(&insn);
12602                 if (!aux[adj_idx].zext_dst) {
12603                         u8 code, class;
12604                         u32 imm_rnd;
12605
12606                         if (!rnd_hi32)
12607                                 continue;
12608
12609                         code = insn.code;
12610                         class = BPF_CLASS(code);
12611                         if (load_reg == -1)
12612                                 continue;
12613
12614                         /* NOTE: arg "reg" (the fourth one) is only used for
12615                          *       BPF_STX + SRC_OP, so it is safe to pass NULL
12616                          *       here.
12617                          */
12618                         if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
12619                                 if (class == BPF_LD &&
12620                                     BPF_MODE(code) == BPF_IMM)
12621                                         i++;
12622                                 continue;
12623                         }
12624
12625                         /* ctx load could be transformed into wider load. */
12626                         if (class == BPF_LDX &&
12627                             aux[adj_idx].ptr_type == PTR_TO_CTX)
12628                                 continue;
12629
12630                         imm_rnd = get_random_int();
12631                         rnd_hi32_patch[0] = insn;
12632                         rnd_hi32_patch[1].imm = imm_rnd;
12633                         rnd_hi32_patch[3].dst_reg = load_reg;
12634                         patch = rnd_hi32_patch;
12635                         patch_len = 4;
12636                         goto apply_patch_buffer;
12637                 }
12638
12639                 /* Add in an zero-extend instruction if a) the JIT has requested
12640                  * it or b) it's a CMPXCHG.
12641                  *
12642                  * The latter is because: BPF_CMPXCHG always loads a value into
12643                  * R0, therefore always zero-extends. However some archs'
12644                  * equivalent instruction only does this load when the
12645                  * comparison is successful. This detail of CMPXCHG is
12646                  * orthogonal to the general zero-extension behaviour of the
12647                  * CPU, so it's treated independently of bpf_jit_needs_zext.
12648                  */
12649                 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
12650                         continue;
12651
12652                 if (WARN_ON(load_reg == -1)) {
12653                         verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
12654                         return -EFAULT;
12655                 }
12656
12657                 zext_patch[0] = insn;
12658                 zext_patch[1].dst_reg = load_reg;
12659                 zext_patch[1].src_reg = load_reg;
12660                 patch = zext_patch;
12661                 patch_len = 2;
12662 apply_patch_buffer:
12663                 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
12664                 if (!new_prog)
12665                         return -ENOMEM;
12666                 env->prog = new_prog;
12667                 insns = new_prog->insnsi;
12668                 aux = env->insn_aux_data;
12669                 delta += patch_len - 1;
12670         }
12671
12672         return 0;
12673 }
12674
12675 /* convert load instructions that access fields of a context type into a
12676  * sequence of instructions that access fields of the underlying structure:
12677  *     struct __sk_buff    -> struct sk_buff
12678  *     struct bpf_sock_ops -> struct sock
12679  */
12680 static int convert_ctx_accesses(struct bpf_verifier_env *env)
12681 {
12682         const struct bpf_verifier_ops *ops = env->ops;
12683         int i, cnt, size, ctx_field_size, delta = 0;
12684         const int insn_cnt = env->prog->len;
12685         struct bpf_insn insn_buf[16], *insn;
12686         u32 target_size, size_default, off;
12687         struct bpf_prog *new_prog;
12688         enum bpf_access_type type;
12689         bool is_narrower_load;
12690
12691         if (ops->gen_prologue || env->seen_direct_write) {
12692                 if (!ops->gen_prologue) {
12693                         verbose(env, "bpf verifier is misconfigured\n");
12694                         return -EINVAL;
12695                 }
12696                 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
12697                                         env->prog);
12698                 if (cnt >= ARRAY_SIZE(insn_buf)) {
12699                         verbose(env, "bpf verifier is misconfigured\n");
12700                         return -EINVAL;
12701                 } else if (cnt) {
12702                         new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
12703                         if (!new_prog)
12704                                 return -ENOMEM;
12705
12706                         env->prog = new_prog;
12707                         delta += cnt - 1;
12708                 }
12709         }
12710
12711         if (bpf_prog_is_dev_bound(env->prog->aux))
12712                 return 0;
12713
12714         insn = env->prog->insnsi + delta;
12715
12716         for (i = 0; i < insn_cnt; i++, insn++) {
12717                 bpf_convert_ctx_access_t convert_ctx_access;
12718                 bool ctx_access;
12719
12720                 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
12721                     insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
12722                     insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
12723                     insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
12724                         type = BPF_READ;
12725                         ctx_access = true;
12726                 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
12727                            insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
12728                            insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
12729                            insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
12730                            insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
12731                            insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
12732                            insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
12733                            insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
12734                         type = BPF_WRITE;
12735                         ctx_access = BPF_CLASS(insn->code) == BPF_STX;
12736                 } else {
12737                         continue;
12738                 }
12739
12740                 if (type == BPF_WRITE &&
12741                     env->insn_aux_data[i + delta].sanitize_stack_spill) {
12742                         struct bpf_insn patch[] = {
12743                                 *insn,
12744                                 BPF_ST_NOSPEC(),
12745                         };
12746
12747                         cnt = ARRAY_SIZE(patch);
12748                         new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
12749                         if (!new_prog)
12750                                 return -ENOMEM;
12751
12752                         delta    += cnt - 1;
12753                         env->prog = new_prog;
12754                         insn      = new_prog->insnsi + i + delta;
12755                         continue;
12756                 }
12757
12758                 if (!ctx_access)
12759                         continue;
12760
12761                 switch (env->insn_aux_data[i + delta].ptr_type) {
12762                 case PTR_TO_CTX:
12763                         if (!ops->convert_ctx_access)
12764                                 continue;
12765                         convert_ctx_access = ops->convert_ctx_access;
12766                         break;
12767                 case PTR_TO_SOCKET:
12768                 case PTR_TO_SOCK_COMMON:
12769                         convert_ctx_access = bpf_sock_convert_ctx_access;
12770                         break;
12771                 case PTR_TO_TCP_SOCK:
12772                         convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
12773                         break;
12774                 case PTR_TO_XDP_SOCK:
12775                         convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
12776                         break;
12777                 case PTR_TO_BTF_ID:
12778                         if (type == BPF_READ) {
12779                                 insn->code = BPF_LDX | BPF_PROBE_MEM |
12780                                         BPF_SIZE((insn)->code);
12781                                 env->prog->aux->num_exentries++;
12782                         } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
12783                                 verbose(env, "Writes through BTF pointers are not allowed\n");
12784                                 return -EINVAL;
12785                         }
12786                         continue;
12787                 default:
12788                         continue;
12789                 }
12790
12791                 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
12792                 size = BPF_LDST_BYTES(insn);
12793
12794                 /* If the read access is a narrower load of the field,
12795                  * convert to a 4/8-byte load, to minimum program type specific
12796                  * convert_ctx_access changes. If conversion is successful,
12797                  * we will apply proper mask to the result.
12798                  */
12799                 is_narrower_load = size < ctx_field_size;
12800                 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
12801                 off = insn->off;
12802                 if (is_narrower_load) {
12803                         u8 size_code;
12804
12805                         if (type == BPF_WRITE) {
12806                                 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
12807                                 return -EINVAL;
12808                         }
12809
12810                         size_code = BPF_H;
12811                         if (ctx_field_size == 4)
12812                                 size_code = BPF_W;
12813                         else if (ctx_field_size == 8)
12814                                 size_code = BPF_DW;
12815
12816                         insn->off = off & ~(size_default - 1);
12817                         insn->code = BPF_LDX | BPF_MEM | size_code;
12818                 }
12819
12820                 target_size = 0;
12821                 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
12822                                          &target_size);
12823                 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
12824                     (ctx_field_size && !target_size)) {
12825                         verbose(env, "bpf verifier is misconfigured\n");
12826                         return -EINVAL;
12827                 }
12828
12829                 if (is_narrower_load && size < target_size) {
12830                         u8 shift = bpf_ctx_narrow_access_offset(
12831                                 off, size, size_default) * 8;
12832                         if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
12833                                 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
12834                                 return -EINVAL;
12835                         }
12836                         if (ctx_field_size <= 4) {
12837                                 if (shift)
12838                                         insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
12839                                                                         insn->dst_reg,
12840                                                                         shift);
12841                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
12842                                                                 (1 << size * 8) - 1);
12843                         } else {
12844                                 if (shift)
12845                                         insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
12846                                                                         insn->dst_reg,
12847                                                                         shift);
12848                                 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
12849                                                                 (1ULL << size * 8) - 1);
12850                         }
12851                 }
12852
12853                 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
12854                 if (!new_prog)
12855                         return -ENOMEM;
12856
12857                 delta += cnt - 1;
12858
12859                 /* keep walking new program and skip insns we just inserted */
12860                 env->prog = new_prog;
12861                 insn      = new_prog->insnsi + i + delta;
12862         }
12863
12864         return 0;
12865 }
12866
12867 static int jit_subprogs(struct bpf_verifier_env *env)
12868 {
12869         struct bpf_prog *prog = env->prog, **func, *tmp;
12870         int i, j, subprog_start, subprog_end = 0, len, subprog;
12871         struct bpf_map *map_ptr;
12872         struct bpf_insn *insn;
12873         void *old_bpf_func;
12874         int err, num_exentries;
12875
12876         if (env->subprog_cnt <= 1)
12877                 return 0;
12878
12879         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
12880                 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
12881                         continue;
12882
12883                 /* Upon error here we cannot fall back to interpreter but
12884                  * need a hard reject of the program. Thus -EFAULT is
12885                  * propagated in any case.
12886                  */
12887                 subprog = find_subprog(env, i + insn->imm + 1);
12888                 if (subprog < 0) {
12889                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
12890                                   i + insn->imm + 1);
12891                         return -EFAULT;
12892                 }
12893                 /* temporarily remember subprog id inside insn instead of
12894                  * aux_data, since next loop will split up all insns into funcs
12895                  */
12896                 insn->off = subprog;
12897                 /* remember original imm in case JIT fails and fallback
12898                  * to interpreter will be needed
12899                  */
12900                 env->insn_aux_data[i].call_imm = insn->imm;
12901                 /* point imm to __bpf_call_base+1 from JITs point of view */
12902                 insn->imm = 1;
12903                 if (bpf_pseudo_func(insn))
12904                         /* jit (e.g. x86_64) may emit fewer instructions
12905                          * if it learns a u32 imm is the same as a u64 imm.
12906                          * Force a non zero here.
12907                          */
12908                         insn[1].imm = 1;
12909         }
12910
12911         err = bpf_prog_alloc_jited_linfo(prog);
12912         if (err)
12913                 goto out_undo_insn;
12914
12915         err = -ENOMEM;
12916         func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
12917         if (!func)
12918                 goto out_undo_insn;
12919
12920         for (i = 0; i < env->subprog_cnt; i++) {
12921                 subprog_start = subprog_end;
12922                 subprog_end = env->subprog_info[i + 1].start;
12923
12924                 len = subprog_end - subprog_start;
12925                 /* bpf_prog_run() doesn't call subprogs directly,
12926                  * hence main prog stats include the runtime of subprogs.
12927                  * subprogs don't have IDs and not reachable via prog_get_next_id
12928                  * func[i]->stats will never be accessed and stays NULL
12929                  */
12930                 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
12931                 if (!func[i])
12932                         goto out_free;
12933                 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
12934                        len * sizeof(struct bpf_insn));
12935                 func[i]->type = prog->type;
12936                 func[i]->len = len;
12937                 if (bpf_prog_calc_tag(func[i]))
12938                         goto out_free;
12939                 func[i]->is_func = 1;
12940                 func[i]->aux->func_idx = i;
12941                 /* Below members will be freed only at prog->aux */
12942                 func[i]->aux->btf = prog->aux->btf;
12943                 func[i]->aux->func_info = prog->aux->func_info;
12944                 func[i]->aux->poke_tab = prog->aux->poke_tab;
12945                 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
12946
12947                 for (j = 0; j < prog->aux->size_poke_tab; j++) {
12948                         struct bpf_jit_poke_descriptor *poke;
12949
12950                         poke = &prog->aux->poke_tab[j];
12951                         if (poke->insn_idx < subprog_end &&
12952                             poke->insn_idx >= subprog_start)
12953                                 poke->aux = func[i]->aux;
12954                 }
12955
12956                 /* Use bpf_prog_F_tag to indicate functions in stack traces.
12957                  * Long term would need debug info to populate names
12958                  */
12959                 func[i]->aux->name[0] = 'F';
12960                 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
12961                 func[i]->jit_requested = 1;
12962                 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
12963                 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
12964                 func[i]->aux->linfo = prog->aux->linfo;
12965                 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
12966                 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
12967                 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
12968                 num_exentries = 0;
12969                 insn = func[i]->insnsi;
12970                 for (j = 0; j < func[i]->len; j++, insn++) {
12971                         if (BPF_CLASS(insn->code) == BPF_LDX &&
12972                             BPF_MODE(insn->code) == BPF_PROBE_MEM)
12973                                 num_exentries++;
12974                 }
12975                 func[i]->aux->num_exentries = num_exentries;
12976                 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
12977                 func[i] = bpf_int_jit_compile(func[i]);
12978                 if (!func[i]->jited) {
12979                         err = -ENOTSUPP;
12980                         goto out_free;
12981                 }
12982                 cond_resched();
12983         }
12984
12985         /* at this point all bpf functions were successfully JITed
12986          * now populate all bpf_calls with correct addresses and
12987          * run last pass of JIT
12988          */
12989         for (i = 0; i < env->subprog_cnt; i++) {
12990                 insn = func[i]->insnsi;
12991                 for (j = 0; j < func[i]->len; j++, insn++) {
12992                         if (bpf_pseudo_func(insn)) {
12993                                 subprog = insn->off;
12994                                 insn[0].imm = (u32)(long)func[subprog]->bpf_func;
12995                                 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
12996                                 continue;
12997                         }
12998                         if (!bpf_pseudo_call(insn))
12999                                 continue;
13000                         subprog = insn->off;
13001                         insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
13002                 }
13003
13004                 /* we use the aux data to keep a list of the start addresses
13005                  * of the JITed images for each function in the program
13006                  *
13007                  * for some architectures, such as powerpc64, the imm field
13008                  * might not be large enough to hold the offset of the start
13009                  * address of the callee's JITed image from __bpf_call_base
13010                  *
13011                  * in such cases, we can lookup the start address of a callee
13012                  * by using its subprog id, available from the off field of
13013                  * the call instruction, as an index for this list
13014                  */
13015                 func[i]->aux->func = func;
13016                 func[i]->aux->func_cnt = env->subprog_cnt;
13017         }
13018         for (i = 0; i < env->subprog_cnt; i++) {
13019                 old_bpf_func = func[i]->bpf_func;
13020                 tmp = bpf_int_jit_compile(func[i]);
13021                 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
13022                         verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
13023                         err = -ENOTSUPP;
13024                         goto out_free;
13025                 }
13026                 cond_resched();
13027         }
13028
13029         /* finally lock prog and jit images for all functions and
13030          * populate kallsysm
13031          */
13032         for (i = 0; i < env->subprog_cnt; i++) {
13033                 bpf_prog_lock_ro(func[i]);
13034                 bpf_prog_kallsyms_add(func[i]);
13035         }
13036
13037         /* Last step: make now unused interpreter insns from main
13038          * prog consistent for later dump requests, so they can
13039          * later look the same as if they were interpreted only.
13040          */
13041         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
13042                 if (bpf_pseudo_func(insn)) {
13043                         insn[0].imm = env->insn_aux_data[i].call_imm;
13044                         insn[1].imm = insn->off;
13045                         insn->off = 0;
13046                         continue;
13047                 }
13048                 if (!bpf_pseudo_call(insn))
13049                         continue;
13050                 insn->off = env->insn_aux_data[i].call_imm;
13051                 subprog = find_subprog(env, i + insn->off + 1);
13052                 insn->imm = subprog;
13053         }
13054
13055         prog->jited = 1;
13056         prog->bpf_func = func[0]->bpf_func;
13057         prog->aux->func = func;
13058         prog->aux->func_cnt = env->subprog_cnt;
13059         bpf_prog_jit_attempt_done(prog);
13060         return 0;
13061 out_free:
13062         /* We failed JIT'ing, so at this point we need to unregister poke
13063          * descriptors from subprogs, so that kernel is not attempting to
13064          * patch it anymore as we're freeing the subprog JIT memory.
13065          */
13066         for (i = 0; i < prog->aux->size_poke_tab; i++) {
13067                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
13068                 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
13069         }
13070         /* At this point we're guaranteed that poke descriptors are not
13071          * live anymore. We can just unlink its descriptor table as it's
13072          * released with the main prog.
13073          */
13074         for (i = 0; i < env->subprog_cnt; i++) {
13075                 if (!func[i])
13076                         continue;
13077                 func[i]->aux->poke_tab = NULL;
13078                 bpf_jit_free(func[i]);
13079         }
13080         kfree(func);
13081 out_undo_insn:
13082         /* cleanup main prog to be interpreted */
13083         prog->jit_requested = 0;
13084         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
13085                 if (!bpf_pseudo_call(insn))
13086                         continue;
13087                 insn->off = 0;
13088                 insn->imm = env->insn_aux_data[i].call_imm;
13089         }
13090         bpf_prog_jit_attempt_done(prog);
13091         return err;
13092 }
13093
13094 static int fixup_call_args(struct bpf_verifier_env *env)
13095 {
13096 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
13097         struct bpf_prog *prog = env->prog;
13098         struct bpf_insn *insn = prog->insnsi;
13099         bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
13100         int i, depth;
13101 #endif
13102         int err = 0;
13103
13104         if (env->prog->jit_requested &&
13105             !bpf_prog_is_dev_bound(env->prog->aux)) {
13106                 err = jit_subprogs(env);
13107                 if (err == 0)
13108                         return 0;
13109                 if (err == -EFAULT)
13110                         return err;
13111         }
13112 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
13113         if (has_kfunc_call) {
13114                 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
13115                 return -EINVAL;
13116         }
13117         if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
13118                 /* When JIT fails the progs with bpf2bpf calls and tail_calls
13119                  * have to be rejected, since interpreter doesn't support them yet.
13120                  */
13121                 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
13122                 return -EINVAL;
13123         }
13124         for (i = 0; i < prog->len; i++, insn++) {
13125                 if (bpf_pseudo_func(insn)) {
13126                         /* When JIT fails the progs with callback calls
13127                          * have to be rejected, since interpreter doesn't support them yet.
13128                          */
13129                         verbose(env, "callbacks are not allowed in non-JITed programs\n");
13130                         return -EINVAL;
13131                 }
13132
13133                 if (!bpf_pseudo_call(insn))
13134                         continue;
13135                 depth = get_callee_stack_depth(env, insn, i);
13136                 if (depth < 0)
13137                         return depth;
13138                 bpf_patch_call_args(insn, depth);
13139         }
13140         err = 0;
13141 #endif
13142         return err;
13143 }
13144
13145 static int fixup_kfunc_call(struct bpf_verifier_env *env,
13146                             struct bpf_insn *insn)
13147 {
13148         const struct bpf_kfunc_desc *desc;
13149
13150         if (!insn->imm) {
13151                 verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
13152                 return -EINVAL;
13153         }
13154
13155         /* insn->imm has the btf func_id. Replace it with
13156          * an address (relative to __bpf_base_call).
13157          */
13158         desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
13159         if (!desc) {
13160                 verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
13161                         insn->imm);
13162                 return -EFAULT;
13163         }
13164
13165         insn->imm = desc->imm;
13166
13167         return 0;
13168 }
13169
13170 /* Do various post-verification rewrites in a single program pass.
13171  * These rewrites simplify JIT and interpreter implementations.
13172  */
13173 static int do_misc_fixups(struct bpf_verifier_env *env)
13174 {
13175         struct bpf_prog *prog = env->prog;
13176         enum bpf_attach_type eatype = prog->expected_attach_type;
13177         bool expect_blinding = bpf_jit_blinding_enabled(prog);
13178         enum bpf_prog_type prog_type = resolve_prog_type(prog);
13179         struct bpf_insn *insn = prog->insnsi;
13180         const struct bpf_func_proto *fn;
13181         const int insn_cnt = prog->len;
13182         const struct bpf_map_ops *ops;
13183         struct bpf_insn_aux_data *aux;
13184         struct bpf_insn insn_buf[16];
13185         struct bpf_prog *new_prog;
13186         struct bpf_map *map_ptr;
13187         int i, ret, cnt, delta = 0;
13188
13189         for (i = 0; i < insn_cnt; i++, insn++) {
13190                 /* Make divide-by-zero exceptions impossible. */
13191                 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
13192                     insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
13193                     insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
13194                     insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
13195                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
13196                         bool isdiv = BPF_OP(insn->code) == BPF_DIV;
13197                         struct bpf_insn *patchlet;
13198                         struct bpf_insn chk_and_div[] = {
13199                                 /* [R,W]x div 0 -> 0 */
13200                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
13201                                              BPF_JNE | BPF_K, insn->src_reg,
13202                                              0, 2, 0),
13203                                 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
13204                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
13205                                 *insn,
13206                         };
13207                         struct bpf_insn chk_and_mod[] = {
13208                                 /* [R,W]x mod 0 -> [R,W]x */
13209                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
13210                                              BPF_JEQ | BPF_K, insn->src_reg,
13211                                              0, 1 + (is64 ? 0 : 1), 0),
13212                                 *insn,
13213                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
13214                                 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
13215                         };
13216
13217                         patchlet = isdiv ? chk_and_div : chk_and_mod;
13218                         cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
13219                                       ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
13220
13221                         new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
13222                         if (!new_prog)
13223                                 return -ENOMEM;
13224
13225                         delta    += cnt - 1;
13226                         env->prog = prog = new_prog;
13227                         insn      = new_prog->insnsi + i + delta;
13228                         continue;
13229                 }
13230
13231                 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
13232                 if (BPF_CLASS(insn->code) == BPF_LD &&
13233                     (BPF_MODE(insn->code) == BPF_ABS ||
13234                      BPF_MODE(insn->code) == BPF_IND)) {
13235                         cnt = env->ops->gen_ld_abs(insn, insn_buf);
13236                         if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
13237                                 verbose(env, "bpf verifier is misconfigured\n");
13238                                 return -EINVAL;
13239                         }
13240
13241                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13242                         if (!new_prog)
13243                                 return -ENOMEM;
13244
13245                         delta    += cnt - 1;
13246                         env->prog = prog = new_prog;
13247                         insn      = new_prog->insnsi + i + delta;
13248                         continue;
13249                 }
13250
13251                 /* Rewrite pointer arithmetic to mitigate speculation attacks. */
13252                 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
13253                     insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
13254                         const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
13255                         const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
13256                         struct bpf_insn *patch = &insn_buf[0];
13257                         bool issrc, isneg, isimm;
13258                         u32 off_reg;
13259
13260                         aux = &env->insn_aux_data[i + delta];
13261                         if (!aux->alu_state ||
13262                             aux->alu_state == BPF_ALU_NON_POINTER)
13263                                 continue;
13264
13265                         isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
13266                         issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
13267                                 BPF_ALU_SANITIZE_SRC;
13268                         isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
13269
13270                         off_reg = issrc ? insn->src_reg : insn->dst_reg;
13271                         if (isimm) {
13272                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
13273                         } else {
13274                                 if (isneg)
13275                                         *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
13276                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
13277                                 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
13278                                 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
13279                                 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
13280                                 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
13281                                 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
13282                         }
13283                         if (!issrc)
13284                                 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
13285                         insn->src_reg = BPF_REG_AX;
13286                         if (isneg)
13287                                 insn->code = insn->code == code_add ?
13288                                              code_sub : code_add;
13289                         *patch++ = *insn;
13290                         if (issrc && isneg && !isimm)
13291                                 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
13292                         cnt = patch - insn_buf;
13293
13294                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13295                         if (!new_prog)
13296                                 return -ENOMEM;
13297
13298                         delta    += cnt - 1;
13299                         env->prog = prog = new_prog;
13300                         insn      = new_prog->insnsi + i + delta;
13301                         continue;
13302                 }
13303
13304                 if (insn->code != (BPF_JMP | BPF_CALL))
13305                         continue;
13306                 if (insn->src_reg == BPF_PSEUDO_CALL)
13307                         continue;
13308                 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
13309                         ret = fixup_kfunc_call(env, insn);
13310                         if (ret)
13311                                 return ret;
13312                         continue;
13313                 }
13314
13315                 if (insn->imm == BPF_FUNC_get_route_realm)
13316                         prog->dst_needed = 1;
13317                 if (insn->imm == BPF_FUNC_get_prandom_u32)
13318                         bpf_user_rnd_init_once();
13319                 if (insn->imm == BPF_FUNC_override_return)
13320                         prog->kprobe_override = 1;
13321                 if (insn->imm == BPF_FUNC_tail_call) {
13322                         /* If we tail call into other programs, we
13323                          * cannot make any assumptions since they can
13324                          * be replaced dynamically during runtime in
13325                          * the program array.
13326                          */
13327                         prog->cb_access = 1;
13328                         if (!allow_tail_call_in_subprogs(env))
13329                                 prog->aux->stack_depth = MAX_BPF_STACK;
13330                         prog->aux->max_pkt_offset = MAX_PACKET_OFF;
13331
13332                         /* mark bpf_tail_call as different opcode to avoid
13333                          * conditional branch in the interpreter for every normal
13334                          * call and to prevent accidental JITing by JIT compiler
13335                          * that doesn't support bpf_tail_call yet
13336                          */
13337                         insn->imm = 0;
13338                         insn->code = BPF_JMP | BPF_TAIL_CALL;
13339
13340                         aux = &env->insn_aux_data[i + delta];
13341                         if (env->bpf_capable && !expect_blinding &&
13342                             prog->jit_requested &&
13343                             !bpf_map_key_poisoned(aux) &&
13344                             !bpf_map_ptr_poisoned(aux) &&
13345                             !bpf_map_ptr_unpriv(aux)) {
13346                                 struct bpf_jit_poke_descriptor desc = {
13347                                         .reason = BPF_POKE_REASON_TAIL_CALL,
13348                                         .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
13349                                         .tail_call.key = bpf_map_key_immediate(aux),
13350                                         .insn_idx = i + delta,
13351                                 };
13352
13353                                 ret = bpf_jit_add_poke_descriptor(prog, &desc);
13354                                 if (ret < 0) {
13355                                         verbose(env, "adding tail call poke descriptor failed\n");
13356                                         return ret;
13357                                 }
13358
13359                                 insn->imm = ret + 1;
13360                                 continue;
13361                         }
13362
13363                         if (!bpf_map_ptr_unpriv(aux))
13364                                 continue;
13365
13366                         /* instead of changing every JIT dealing with tail_call
13367                          * emit two extra insns:
13368                          * if (index >= max_entries) goto out;
13369                          * index &= array->index_mask;
13370                          * to avoid out-of-bounds cpu speculation
13371                          */
13372                         if (bpf_map_ptr_poisoned(aux)) {
13373                                 verbose(env, "tail_call abusing map_ptr\n");
13374                                 return -EINVAL;
13375                         }
13376
13377                         map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
13378                         insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
13379                                                   map_ptr->max_entries, 2);
13380                         insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
13381                                                     container_of(map_ptr,
13382                                                                  struct bpf_array,
13383                                                                  map)->index_mask);
13384                         insn_buf[2] = *insn;
13385                         cnt = 3;
13386                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13387                         if (!new_prog)
13388                                 return -ENOMEM;
13389
13390                         delta    += cnt - 1;
13391                         env->prog = prog = new_prog;
13392                         insn      = new_prog->insnsi + i + delta;
13393                         continue;
13394                 }
13395
13396                 if (insn->imm == BPF_FUNC_timer_set_callback) {
13397                         /* The verifier will process callback_fn as many times as necessary
13398                          * with different maps and the register states prepared by
13399                          * set_timer_callback_state will be accurate.
13400                          *
13401                          * The following use case is valid:
13402                          *   map1 is shared by prog1, prog2, prog3.
13403                          *   prog1 calls bpf_timer_init for some map1 elements
13404                          *   prog2 calls bpf_timer_set_callback for some map1 elements.
13405                          *     Those that were not bpf_timer_init-ed will return -EINVAL.
13406                          *   prog3 calls bpf_timer_start for some map1 elements.
13407                          *     Those that were not both bpf_timer_init-ed and
13408                          *     bpf_timer_set_callback-ed will return -EINVAL.
13409                          */
13410                         struct bpf_insn ld_addrs[2] = {
13411                                 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
13412                         };
13413
13414                         insn_buf[0] = ld_addrs[0];
13415                         insn_buf[1] = ld_addrs[1];
13416                         insn_buf[2] = *insn;
13417                         cnt = 3;
13418
13419                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13420                         if (!new_prog)
13421                                 return -ENOMEM;
13422
13423                         delta    += cnt - 1;
13424                         env->prog = prog = new_prog;
13425                         insn      = new_prog->insnsi + i + delta;
13426                         goto patch_call_imm;
13427                 }
13428
13429                 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
13430                  * and other inlining handlers are currently limited to 64 bit
13431                  * only.
13432                  */
13433                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
13434                     (insn->imm == BPF_FUNC_map_lookup_elem ||
13435                      insn->imm == BPF_FUNC_map_update_elem ||
13436                      insn->imm == BPF_FUNC_map_delete_elem ||
13437                      insn->imm == BPF_FUNC_map_push_elem   ||
13438                      insn->imm == BPF_FUNC_map_pop_elem    ||
13439                      insn->imm == BPF_FUNC_map_peek_elem   ||
13440                      insn->imm == BPF_FUNC_redirect_map    ||
13441                      insn->imm == BPF_FUNC_for_each_map_elem)) {
13442                         aux = &env->insn_aux_data[i + delta];
13443                         if (bpf_map_ptr_poisoned(aux))
13444                                 goto patch_call_imm;
13445
13446                         map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
13447                         ops = map_ptr->ops;
13448                         if (insn->imm == BPF_FUNC_map_lookup_elem &&
13449                             ops->map_gen_lookup) {
13450                                 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
13451                                 if (cnt == -EOPNOTSUPP)
13452                                         goto patch_map_ops_generic;
13453                                 if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
13454                                         verbose(env, "bpf verifier is misconfigured\n");
13455                                         return -EINVAL;
13456                                 }
13457
13458                                 new_prog = bpf_patch_insn_data(env, i + delta,
13459                                                                insn_buf, cnt);
13460                                 if (!new_prog)
13461                                         return -ENOMEM;
13462
13463                                 delta    += cnt - 1;
13464                                 env->prog = prog = new_prog;
13465                                 insn      = new_prog->insnsi + i + delta;
13466                                 continue;
13467                         }
13468
13469                         BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
13470                                      (void *(*)(struct bpf_map *map, void *key))NULL));
13471                         BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
13472                                      (int (*)(struct bpf_map *map, void *key))NULL));
13473                         BUILD_BUG_ON(!__same_type(ops->map_update_elem,
13474                                      (int (*)(struct bpf_map *map, void *key, void *value,
13475                                               u64 flags))NULL));
13476                         BUILD_BUG_ON(!__same_type(ops->map_push_elem,
13477                                      (int (*)(struct bpf_map *map, void *value,
13478                                               u64 flags))NULL));
13479                         BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
13480                                      (int (*)(struct bpf_map *map, void *value))NULL));
13481                         BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
13482                                      (int (*)(struct bpf_map *map, void *value))NULL));
13483                         BUILD_BUG_ON(!__same_type(ops->map_redirect,
13484                                      (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
13485                         BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
13486                                      (int (*)(struct bpf_map *map,
13487                                               bpf_callback_t callback_fn,
13488                                               void *callback_ctx,
13489                                               u64 flags))NULL));
13490
13491 patch_map_ops_generic:
13492                         switch (insn->imm) {
13493                         case BPF_FUNC_map_lookup_elem:
13494                                 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
13495                                 continue;
13496                         case BPF_FUNC_map_update_elem:
13497                                 insn->imm = BPF_CALL_IMM(ops->map_update_elem);
13498                                 continue;
13499                         case BPF_FUNC_map_delete_elem:
13500                                 insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
13501                                 continue;
13502                         case BPF_FUNC_map_push_elem:
13503                                 insn->imm = BPF_CALL_IMM(ops->map_push_elem);
13504                                 continue;
13505                         case BPF_FUNC_map_pop_elem:
13506                                 insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
13507                                 continue;
13508                         case BPF_FUNC_map_peek_elem:
13509                                 insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
13510                                 continue;
13511                         case BPF_FUNC_redirect_map:
13512                                 insn->imm = BPF_CALL_IMM(ops->map_redirect);
13513                                 continue;
13514                         case BPF_FUNC_for_each_map_elem:
13515                                 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
13516                                 continue;
13517                         }
13518
13519                         goto patch_call_imm;
13520                 }
13521
13522                 /* Implement bpf_jiffies64 inline. */
13523                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
13524                     insn->imm == BPF_FUNC_jiffies64) {
13525                         struct bpf_insn ld_jiffies_addr[2] = {
13526                                 BPF_LD_IMM64(BPF_REG_0,
13527                                              (unsigned long)&jiffies),
13528                         };
13529
13530                         insn_buf[0] = ld_jiffies_addr[0];
13531                         insn_buf[1] = ld_jiffies_addr[1];
13532                         insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
13533                                                   BPF_REG_0, 0);
13534                         cnt = 3;
13535
13536                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
13537                                                        cnt);
13538                         if (!new_prog)
13539                                 return -ENOMEM;
13540
13541                         delta    += cnt - 1;
13542                         env->prog = prog = new_prog;
13543                         insn      = new_prog->insnsi + i + delta;
13544                         continue;
13545                 }
13546
13547                 /* Implement bpf_get_func_arg inline. */
13548                 if (prog_type == BPF_PROG_TYPE_TRACING &&
13549                     insn->imm == BPF_FUNC_get_func_arg) {
13550                         /* Load nr_args from ctx - 8 */
13551                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
13552                         insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
13553                         insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
13554                         insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
13555                         insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
13556                         insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
13557                         insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
13558                         insn_buf[7] = BPF_JMP_A(1);
13559                         insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
13560                         cnt = 9;
13561
13562                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13563                         if (!new_prog)
13564                                 return -ENOMEM;
13565
13566                         delta    += cnt - 1;
13567                         env->prog = prog = new_prog;
13568                         insn      = new_prog->insnsi + i + delta;
13569                         continue;
13570                 }
13571
13572                 /* Implement bpf_get_func_ret inline. */
13573                 if (prog_type == BPF_PROG_TYPE_TRACING &&
13574                     insn->imm == BPF_FUNC_get_func_ret) {
13575                         if (eatype == BPF_TRACE_FEXIT ||
13576                             eatype == BPF_MODIFY_RETURN) {
13577                                 /* Load nr_args from ctx - 8 */
13578                                 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
13579                                 insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
13580                                 insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
13581                                 insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
13582                                 insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
13583                                 insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
13584                                 cnt = 6;
13585                         } else {
13586                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
13587                                 cnt = 1;
13588                         }
13589
13590                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13591                         if (!new_prog)
13592                                 return -ENOMEM;
13593
13594                         delta    += cnt - 1;
13595                         env->prog = prog = new_prog;
13596                         insn      = new_prog->insnsi + i + delta;
13597                         continue;
13598                 }
13599
13600                 /* Implement get_func_arg_cnt inline. */
13601                 if (prog_type == BPF_PROG_TYPE_TRACING &&
13602                     insn->imm == BPF_FUNC_get_func_arg_cnt) {
13603                         /* Load nr_args from ctx - 8 */
13604                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
13605
13606                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
13607                         if (!new_prog)
13608                                 return -ENOMEM;
13609
13610                         env->prog = prog = new_prog;
13611                         insn      = new_prog->insnsi + i + delta;
13612                         continue;
13613                 }
13614
13615                 /* Implement bpf_get_func_ip inline. */
13616                 if (prog_type == BPF_PROG_TYPE_TRACING &&
13617                     insn->imm == BPF_FUNC_get_func_ip) {
13618                         /* Load IP address from ctx - 16 */
13619                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
13620
13621                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
13622                         if (!new_prog)
13623                                 return -ENOMEM;
13624
13625                         env->prog = prog = new_prog;
13626                         insn      = new_prog->insnsi + i + delta;
13627                         continue;
13628                 }
13629
13630 patch_call_imm:
13631                 fn = env->ops->get_func_proto(insn->imm, env->prog);
13632                 /* all functions that have prototype and verifier allowed
13633                  * programs to call them, must be real in-kernel functions
13634                  */
13635                 if (!fn->func) {
13636                         verbose(env,
13637                                 "kernel subsystem misconfigured func %s#%d\n",
13638                                 func_id_name(insn->imm), insn->imm);
13639                         return -EFAULT;
13640                 }
13641                 insn->imm = fn->func - __bpf_call_base;
13642         }
13643
13644         /* Since poke tab is now finalized, publish aux to tracker. */
13645         for (i = 0; i < prog->aux->size_poke_tab; i++) {
13646                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
13647                 if (!map_ptr->ops->map_poke_track ||
13648                     !map_ptr->ops->map_poke_untrack ||
13649                     !map_ptr->ops->map_poke_run) {
13650                         verbose(env, "bpf verifier is misconfigured\n");
13651                         return -EINVAL;
13652                 }
13653
13654                 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
13655                 if (ret < 0) {
13656                         verbose(env, "tracking tail call prog failed\n");
13657                         return ret;
13658                 }
13659         }
13660
13661         sort_kfunc_descs_by_imm(env->prog);
13662
13663         return 0;
13664 }
13665
13666 static void free_states(struct bpf_verifier_env *env)
13667 {
13668         struct bpf_verifier_state_list *sl, *sln;
13669         int i;
13670
13671         sl = env->free_list;
13672         while (sl) {
13673                 sln = sl->next;
13674                 free_verifier_state(&sl->state, false);
13675                 kfree(sl);
13676                 sl = sln;
13677         }
13678         env->free_list = NULL;
13679
13680         if (!env->explored_states)
13681                 return;
13682
13683         for (i = 0; i < state_htab_size(env); i++) {
13684                 sl = env->explored_states[i];
13685
13686                 while (sl) {
13687                         sln = sl->next;
13688                         free_verifier_state(&sl->state, false);
13689                         kfree(sl);
13690                         sl = sln;
13691                 }
13692                 env->explored_states[i] = NULL;
13693         }
13694 }
13695
13696 static int do_check_common(struct bpf_verifier_env *env, int subprog)
13697 {
13698         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
13699         struct bpf_verifier_state *state;
13700         struct bpf_reg_state *regs;
13701         int ret, i;
13702
13703         env->prev_linfo = NULL;
13704         env->pass_cnt++;
13705
13706         state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
13707         if (!state)
13708                 return -ENOMEM;
13709         state->curframe = 0;
13710         state->speculative = false;
13711         state->branches = 1;
13712         state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
13713         if (!state->frame[0]) {
13714                 kfree(state);
13715                 return -ENOMEM;
13716         }
13717         env->cur_state = state;
13718         init_func_state(env, state->frame[0],
13719                         BPF_MAIN_FUNC /* callsite */,
13720                         0 /* frameno */,
13721                         subprog);
13722
13723         regs = state->frame[state->curframe]->regs;
13724         if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
13725                 ret = btf_prepare_func_args(env, subprog, regs);
13726                 if (ret)
13727                         goto out;
13728                 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
13729                         if (regs[i].type == PTR_TO_CTX)
13730                                 mark_reg_known_zero(env, regs, i);
13731                         else if (regs[i].type == SCALAR_VALUE)
13732                                 mark_reg_unknown(env, regs, i);
13733                         else if (base_type(regs[i].type) == PTR_TO_MEM) {
13734                                 const u32 mem_size = regs[i].mem_size;
13735
13736                                 mark_reg_known_zero(env, regs, i);
13737                                 regs[i].mem_size = mem_size;
13738                                 regs[i].id = ++env->id_gen;
13739                         }
13740                 }
13741         } else {
13742                 /* 1st arg to a function */
13743                 regs[BPF_REG_1].type = PTR_TO_CTX;
13744                 mark_reg_known_zero(env, regs, BPF_REG_1);
13745                 ret = btf_check_subprog_arg_match(env, subprog, regs);
13746                 if (ret == -EFAULT)
13747                         /* unlikely verifier bug. abort.
13748                          * ret == 0 and ret < 0 are sadly acceptable for
13749                          * main() function due to backward compatibility.
13750                          * Like socket filter program may be written as:
13751                          * int bpf_prog(struct pt_regs *ctx)
13752                          * and never dereference that ctx in the program.
13753                          * 'struct pt_regs' is a type mismatch for socket
13754                          * filter that should be using 'struct __sk_buff'.
13755                          */
13756                         goto out;
13757         }
13758
13759         ret = do_check(env);
13760 out:
13761         /* check for NULL is necessary, since cur_state can be freed inside
13762          * do_check() under memory pressure.
13763          */
13764         if (env->cur_state) {
13765                 free_verifier_state(env->cur_state, true);
13766                 env->cur_state = NULL;
13767         }
13768         while (!pop_stack(env, NULL, NULL, false));
13769         if (!ret && pop_log)
13770                 bpf_vlog_reset(&env->log, 0);
13771         free_states(env);
13772         return ret;
13773 }
13774
13775 /* Verify all global functions in a BPF program one by one based on their BTF.
13776  * All global functions must pass verification. Otherwise the whole program is rejected.
13777  * Consider:
13778  * int bar(int);
13779  * int foo(int f)
13780  * {
13781  *    return bar(f);
13782  * }
13783  * int bar(int b)
13784  * {
13785  *    ...
13786  * }
13787  * foo() will be verified first for R1=any_scalar_value. During verification it
13788  * will be assumed that bar() already verified successfully and call to bar()
13789  * from foo() will be checked for type match only. Later bar() will be verified
13790  * independently to check that it's safe for R1=any_scalar_value.
13791  */
13792 static int do_check_subprogs(struct bpf_verifier_env *env)
13793 {
13794         struct bpf_prog_aux *aux = env->prog->aux;
13795         int i, ret;
13796
13797         if (!aux->func_info)
13798                 return 0;
13799
13800         for (i = 1; i < env->subprog_cnt; i++) {
13801                 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
13802                         continue;
13803                 env->insn_idx = env->subprog_info[i].start;
13804                 WARN_ON_ONCE(env->insn_idx == 0);
13805                 ret = do_check_common(env, i);
13806                 if (ret) {
13807                         return ret;
13808                 } else if (env->log.level & BPF_LOG_LEVEL) {
13809                         verbose(env,
13810                                 "Func#%d is safe for any args that match its prototype\n",
13811                                 i);
13812                 }
13813         }
13814         return 0;
13815 }
13816
13817 static int do_check_main(struct bpf_verifier_env *env)
13818 {
13819         int ret;
13820
13821         env->insn_idx = 0;
13822         ret = do_check_common(env, 0);
13823         if (!ret)
13824                 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
13825         return ret;
13826 }
13827
13828
13829 static void print_verification_stats(struct bpf_verifier_env *env)
13830 {
13831         int i;
13832
13833         if (env->log.level & BPF_LOG_STATS) {
13834                 verbose(env, "verification time %lld usec\n",
13835                         div_u64(env->verification_time, 1000));
13836                 verbose(env, "stack depth ");
13837                 for (i = 0; i < env->subprog_cnt; i++) {
13838                         u32 depth = env->subprog_info[i].stack_depth;
13839
13840                         verbose(env, "%d", depth);
13841                         if (i + 1 < env->subprog_cnt)
13842                                 verbose(env, "+");
13843                 }
13844                 verbose(env, "\n");
13845         }
13846         verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
13847                 "total_states %d peak_states %d mark_read %d\n",
13848                 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
13849                 env->max_states_per_insn, env->total_states,
13850                 env->peak_states, env->longest_mark_read_walk);
13851 }
13852
13853 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
13854 {
13855         const struct btf_type *t, *func_proto;
13856         const struct bpf_struct_ops *st_ops;
13857         const struct btf_member *member;
13858         struct bpf_prog *prog = env->prog;
13859         u32 btf_id, member_idx;
13860         const char *mname;
13861
13862         if (!prog->gpl_compatible) {
13863                 verbose(env, "struct ops programs must have a GPL compatible license\n");
13864                 return -EINVAL;
13865         }
13866
13867         btf_id = prog->aux->attach_btf_id;
13868         st_ops = bpf_struct_ops_find(btf_id);
13869         if (!st_ops) {
13870                 verbose(env, "attach_btf_id %u is not a supported struct\n",
13871                         btf_id);
13872                 return -ENOTSUPP;
13873         }
13874
13875         t = st_ops->type;
13876         member_idx = prog->expected_attach_type;
13877         if (member_idx >= btf_type_vlen(t)) {
13878                 verbose(env, "attach to invalid member idx %u of struct %s\n",
13879                         member_idx, st_ops->name);
13880                 return -EINVAL;
13881         }
13882
13883         member = &btf_type_member(t)[member_idx];
13884         mname = btf_name_by_offset(btf_vmlinux, member->name_off);
13885         func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
13886                                                NULL);
13887         if (!func_proto) {
13888                 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
13889                         mname, member_idx, st_ops->name);
13890                 return -EINVAL;
13891         }
13892
13893         if (st_ops->check_member) {
13894                 int err = st_ops->check_member(t, member);
13895
13896                 if (err) {
13897                         verbose(env, "attach to unsupported member %s of struct %s\n",
13898                                 mname, st_ops->name);
13899                         return err;
13900                 }
13901         }
13902
13903         prog->aux->attach_func_proto = func_proto;
13904         prog->aux->attach_func_name = mname;
13905         env->ops = st_ops->verifier_ops;
13906
13907         return 0;
13908 }
13909 #define SECURITY_PREFIX "security_"
13910
13911 static int check_attach_modify_return(unsigned long addr, const char *func_name)
13912 {
13913         if (within_error_injection_list(addr) ||
13914             !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
13915                 return 0;
13916
13917         return -EINVAL;
13918 }
13919
13920 /* list of non-sleepable functions that are otherwise on
13921  * ALLOW_ERROR_INJECTION list
13922  */
13923 BTF_SET_START(btf_non_sleepable_error_inject)
13924 /* Three functions below can be called from sleepable and non-sleepable context.
13925  * Assume non-sleepable from bpf safety point of view.
13926  */
13927 BTF_ID(func, __filemap_add_folio)
13928 BTF_ID(func, should_fail_alloc_page)
13929 BTF_ID(func, should_failslab)
13930 BTF_SET_END(btf_non_sleepable_error_inject)
13931
13932 static int check_non_sleepable_error_inject(u32 btf_id)
13933 {
13934         return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
13935 }
13936
13937 int bpf_check_attach_target(struct bpf_verifier_log *log,
13938                             const struct bpf_prog *prog,
13939                             const struct bpf_prog *tgt_prog,
13940                             u32 btf_id,
13941                             struct bpf_attach_target_info *tgt_info)
13942 {
13943         bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
13944         const char prefix[] = "btf_trace_";
13945         int ret = 0, subprog = -1, i;
13946         const struct btf_type *t;
13947         bool conservative = true;
13948         const char *tname;
13949         struct btf *btf;
13950         long addr = 0;
13951
13952         if (!btf_id) {
13953                 bpf_log(log, "Tracing programs must provide btf_id\n");
13954                 return -EINVAL;
13955         }
13956         btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
13957         if (!btf) {
13958                 bpf_log(log,
13959                         "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
13960                 return -EINVAL;
13961         }
13962         t = btf_type_by_id(btf, btf_id);
13963         if (!t) {
13964                 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
13965                 return -EINVAL;
13966         }
13967         tname = btf_name_by_offset(btf, t->name_off);
13968         if (!tname) {
13969                 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
13970                 return -EINVAL;
13971         }
13972         if (tgt_prog) {
13973                 struct bpf_prog_aux *aux = tgt_prog->aux;
13974
13975                 for (i = 0; i < aux->func_info_cnt; i++)
13976                         if (aux->func_info[i].type_id == btf_id) {
13977                                 subprog = i;
13978                                 break;
13979                         }
13980                 if (subprog == -1) {
13981                         bpf_log(log, "Subprog %s doesn't exist\n", tname);
13982                         return -EINVAL;
13983                 }
13984                 conservative = aux->func_info_aux[subprog].unreliable;
13985                 if (prog_extension) {
13986                         if (conservative) {
13987                                 bpf_log(log,
13988                                         "Cannot replace static functions\n");
13989                                 return -EINVAL;
13990                         }
13991                         if (!prog->jit_requested) {
13992                                 bpf_log(log,
13993                                         "Extension programs should be JITed\n");
13994                                 return -EINVAL;
13995                         }
13996                 }
13997                 if (!tgt_prog->jited) {
13998                         bpf_log(log, "Can attach to only JITed progs\n");
13999                         return -EINVAL;
14000                 }
14001                 if (tgt_prog->type == prog->type) {
14002                         /* Cannot fentry/fexit another fentry/fexit program.
14003                          * Cannot attach program extension to another extension.
14004                          * It's ok to attach fentry/fexit to extension program.
14005                          */
14006                         bpf_log(log, "Cannot recursively attach\n");
14007                         return -EINVAL;
14008                 }
14009                 if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
14010                     prog_extension &&
14011                     (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
14012                      tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
14013                         /* Program extensions can extend all program types
14014                          * except fentry/fexit. The reason is the following.
14015                          * The fentry/fexit programs are used for performance
14016                          * analysis, stats and can be attached to any program
14017                          * type except themselves. When extension program is
14018                          * replacing XDP function it is necessary to allow
14019                          * performance analysis of all functions. Both original
14020                          * XDP program and its program extension. Hence
14021                          * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
14022                          * allowed. If extending of fentry/fexit was allowed it
14023                          * would be possible to create long call chain
14024                          * fentry->extension->fentry->extension beyond
14025                          * reasonable stack size. Hence extending fentry is not
14026                          * allowed.
14027                          */
14028                         bpf_log(log, "Cannot extend fentry/fexit\n");
14029                         return -EINVAL;
14030                 }
14031         } else {
14032                 if (prog_extension) {
14033                         bpf_log(log, "Cannot replace kernel functions\n");
14034                         return -EINVAL;
14035                 }
14036         }
14037
14038         switch (prog->expected_attach_type) {
14039         case BPF_TRACE_RAW_TP:
14040                 if (tgt_prog) {
14041                         bpf_log(log,
14042                                 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
14043                         return -EINVAL;
14044                 }
14045                 if (!btf_type_is_typedef(t)) {
14046                         bpf_log(log, "attach_btf_id %u is not a typedef\n",
14047                                 btf_id);
14048                         return -EINVAL;
14049                 }
14050                 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
14051                         bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
14052                                 btf_id, tname);
14053                         return -EINVAL;
14054                 }
14055                 tname += sizeof(prefix) - 1;
14056                 t = btf_type_by_id(btf, t->type);
14057                 if (!btf_type_is_ptr(t))
14058                         /* should never happen in valid vmlinux build */
14059                         return -EINVAL;
14060                 t = btf_type_by_id(btf, t->type);
14061                 if (!btf_type_is_func_proto(t))
14062                         /* should never happen in valid vmlinux build */
14063                         return -EINVAL;
14064
14065                 break;
14066         case BPF_TRACE_ITER:
14067                 if (!btf_type_is_func(t)) {
14068                         bpf_log(log, "attach_btf_id %u is not a function\n",
14069                                 btf_id);
14070                         return -EINVAL;
14071                 }
14072                 t = btf_type_by_id(btf, t->type);
14073                 if (!btf_type_is_func_proto(t))
14074                         return -EINVAL;
14075                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
14076                 if (ret)
14077                         return ret;
14078                 break;
14079         default:
14080                 if (!prog_extension)
14081                         return -EINVAL;
14082                 fallthrough;
14083         case BPF_MODIFY_RETURN:
14084         case BPF_LSM_MAC:
14085         case BPF_TRACE_FENTRY:
14086         case BPF_TRACE_FEXIT:
14087                 if (!btf_type_is_func(t)) {
14088                         bpf_log(log, "attach_btf_id %u is not a function\n",
14089                                 btf_id);
14090                         return -EINVAL;
14091                 }
14092                 if (prog_extension &&
14093                     btf_check_type_match(log, prog, btf, t))
14094                         return -EINVAL;
14095                 t = btf_type_by_id(btf, t->type);
14096                 if (!btf_type_is_func_proto(t))
14097                         return -EINVAL;
14098
14099                 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
14100                     (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
14101                      prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
14102                         return -EINVAL;
14103
14104                 if (tgt_prog && conservative)
14105                         t = NULL;
14106
14107                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
14108                 if (ret < 0)
14109                         return ret;
14110
14111                 if (tgt_prog) {
14112                         if (subprog == 0)
14113                                 addr = (long) tgt_prog->bpf_func;
14114                         else
14115                                 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
14116                 } else {
14117                         addr = kallsyms_lookup_name(tname);
14118                         if (!addr) {
14119                                 bpf_log(log,
14120                                         "The address of function %s cannot be found\n",
14121                                         tname);
14122                                 return -ENOENT;
14123                         }
14124                 }
14125
14126                 if (prog->aux->sleepable) {
14127                         ret = -EINVAL;
14128                         switch (prog->type) {
14129                         case BPF_PROG_TYPE_TRACING:
14130                                 /* fentry/fexit/fmod_ret progs can be sleepable only if they are
14131                                  * attached to ALLOW_ERROR_INJECTION and are not in denylist.
14132                                  */
14133                                 if (!check_non_sleepable_error_inject(btf_id) &&
14134                                     within_error_injection_list(addr))
14135                                         ret = 0;
14136                                 break;
14137                         case BPF_PROG_TYPE_LSM:
14138                                 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
14139                                  * Only some of them are sleepable.
14140                                  */
14141                                 if (bpf_lsm_is_sleepable_hook(btf_id))
14142                                         ret = 0;
14143                                 break;
14144                         default:
14145                                 break;
14146                         }
14147                         if (ret) {
14148                                 bpf_log(log, "%s is not sleepable\n", tname);
14149                                 return ret;
14150                         }
14151                 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
14152                         if (tgt_prog) {
14153                                 bpf_log(log, "can't modify return codes of BPF programs\n");
14154                                 return -EINVAL;
14155                         }
14156                         ret = check_attach_modify_return(addr, tname);
14157                         if (ret) {
14158                                 bpf_log(log, "%s() is not modifiable\n", tname);
14159                                 return ret;
14160                         }
14161                 }
14162
14163                 break;
14164         }
14165         tgt_info->tgt_addr = addr;
14166         tgt_info->tgt_name = tname;
14167         tgt_info->tgt_type = t;
14168         return 0;
14169 }
14170
14171 BTF_SET_START(btf_id_deny)
14172 BTF_ID_UNUSED
14173 #ifdef CONFIG_SMP
14174 BTF_ID(func, migrate_disable)
14175 BTF_ID(func, migrate_enable)
14176 #endif
14177 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
14178 BTF_ID(func, rcu_read_unlock_strict)
14179 #endif
14180 BTF_SET_END(btf_id_deny)
14181
14182 static int check_attach_btf_id(struct bpf_verifier_env *env)
14183 {
14184         struct bpf_prog *prog = env->prog;
14185         struct bpf_prog *tgt_prog = prog->aux->dst_prog;
14186         struct bpf_attach_target_info tgt_info = {};
14187         u32 btf_id = prog->aux->attach_btf_id;
14188         struct bpf_trampoline *tr;
14189         int ret;
14190         u64 key;
14191
14192         if (prog->type == BPF_PROG_TYPE_SYSCALL) {
14193                 if (prog->aux->sleepable)
14194                         /* attach_btf_id checked to be zero already */
14195                         return 0;
14196                 verbose(env, "Syscall programs can only be sleepable\n");
14197                 return -EINVAL;
14198         }
14199
14200         if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
14201             prog->type != BPF_PROG_TYPE_LSM) {
14202                 verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
14203                 return -EINVAL;
14204         }
14205
14206         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
14207                 return check_struct_ops_btf_id(env);
14208
14209         if (prog->type != BPF_PROG_TYPE_TRACING &&
14210             prog->type != BPF_PROG_TYPE_LSM &&
14211             prog->type != BPF_PROG_TYPE_EXT)
14212                 return 0;
14213
14214         ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
14215         if (ret)
14216                 return ret;
14217
14218         if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
14219                 /* to make freplace equivalent to their targets, they need to
14220                  * inherit env->ops and expected_attach_type for the rest of the
14221                  * verification
14222                  */
14223                 env->ops = bpf_verifier_ops[tgt_prog->type];
14224                 prog->expected_attach_type = tgt_prog->expected_attach_type;
14225         }
14226
14227         /* store info about the attachment target that will be used later */
14228         prog->aux->attach_func_proto = tgt_info.tgt_type;
14229         prog->aux->attach_func_name = tgt_info.tgt_name;
14230
14231         if (tgt_prog) {
14232                 prog->aux->saved_dst_prog_type = tgt_prog->type;
14233                 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
14234         }
14235
14236         if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
14237                 prog->aux->attach_btf_trace = true;
14238                 return 0;
14239         } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
14240                 if (!bpf_iter_prog_supported(prog))
14241                         return -EINVAL;
14242                 return 0;
14243         }
14244
14245         if (prog->type == BPF_PROG_TYPE_LSM) {
14246                 ret = bpf_lsm_verify_prog(&env->log, prog);
14247                 if (ret < 0)
14248                         return ret;
14249         } else if (prog->type == BPF_PROG_TYPE_TRACING &&
14250                    btf_id_set_contains(&btf_id_deny, btf_id)) {
14251                 return -EINVAL;
14252         }
14253
14254         key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
14255         tr = bpf_trampoline_get(key, &tgt_info);
14256         if (!tr)
14257                 return -ENOMEM;
14258
14259         prog->aux->dst_trampoline = tr;
14260         return 0;
14261 }
14262
14263 struct btf *bpf_get_btf_vmlinux(void)
14264 {
14265         if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
14266                 mutex_lock(&bpf_verifier_lock);
14267                 if (!btf_vmlinux)
14268                         btf_vmlinux = btf_parse_vmlinux();
14269                 mutex_unlock(&bpf_verifier_lock);
14270         }
14271         return btf_vmlinux;
14272 }
14273
14274 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
14275 {
14276         u64 start_time = ktime_get_ns();
14277         struct bpf_verifier_env *env;
14278         struct bpf_verifier_log *log;
14279         int i, len, ret = -EINVAL;
14280         bool is_priv;
14281
14282         /* no program is valid */
14283         if (ARRAY_SIZE(bpf_verifier_ops) == 0)
14284                 return -EINVAL;
14285
14286         /* 'struct bpf_verifier_env' can be global, but since it's not small,
14287          * allocate/free it every time bpf_check() is called
14288          */
14289         env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
14290         if (!env)
14291                 return -ENOMEM;
14292         log = &env->log;
14293
14294         len = (*prog)->len;
14295         env->insn_aux_data =
14296                 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
14297         ret = -ENOMEM;
14298         if (!env->insn_aux_data)
14299                 goto err_free_env;
14300         for (i = 0; i < len; i++)
14301                 env->insn_aux_data[i].orig_idx = i;
14302         env->prog = *prog;
14303         env->ops = bpf_verifier_ops[env->prog->type];
14304         env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
14305         is_priv = bpf_capable();
14306
14307         bpf_get_btf_vmlinux();
14308
14309         /* grab the mutex to protect few globals used by verifier */
14310         if (!is_priv)
14311                 mutex_lock(&bpf_verifier_lock);
14312
14313         if (attr->log_level || attr->log_buf || attr->log_size) {
14314                 /* user requested verbose verifier output
14315                  * and supplied buffer to store the verification trace
14316                  */
14317                 log->level = attr->log_level;
14318                 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
14319                 log->len_total = attr->log_size;
14320
14321                 /* log attributes have to be sane */
14322                 if (!bpf_verifier_log_attr_valid(log)) {
14323                         ret = -EINVAL;
14324                         goto err_unlock;
14325                 }
14326         }
14327
14328         mark_verifier_state_clean(env);
14329
14330         if (IS_ERR(btf_vmlinux)) {
14331                 /* Either gcc or pahole or kernel are broken. */
14332                 verbose(env, "in-kernel BTF is malformed\n");
14333                 ret = PTR_ERR(btf_vmlinux);
14334                 goto skip_full_check;
14335         }
14336
14337         env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
14338         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
14339                 env->strict_alignment = true;
14340         if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
14341                 env->strict_alignment = false;
14342
14343         env->allow_ptr_leaks = bpf_allow_ptr_leaks();
14344         env->allow_uninit_stack = bpf_allow_uninit_stack();
14345         env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
14346         env->bypass_spec_v1 = bpf_bypass_spec_v1();
14347         env->bypass_spec_v4 = bpf_bypass_spec_v4();
14348         env->bpf_capable = bpf_capable();
14349
14350         if (is_priv)
14351                 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
14352
14353         env->explored_states = kvcalloc(state_htab_size(env),
14354                                        sizeof(struct bpf_verifier_state_list *),
14355                                        GFP_USER);
14356         ret = -ENOMEM;
14357         if (!env->explored_states)
14358                 goto skip_full_check;
14359
14360         ret = add_subprog_and_kfunc(env);
14361         if (ret < 0)
14362                 goto skip_full_check;
14363
14364         ret = check_subprogs(env);
14365         if (ret < 0)
14366                 goto skip_full_check;
14367
14368         ret = check_btf_info(env, attr, uattr);
14369         if (ret < 0)
14370                 goto skip_full_check;
14371
14372         ret = check_attach_btf_id(env);
14373         if (ret)
14374                 goto skip_full_check;
14375
14376         ret = resolve_pseudo_ldimm64(env);
14377         if (ret < 0)
14378                 goto skip_full_check;
14379
14380         if (bpf_prog_is_dev_bound(env->prog->aux)) {
14381                 ret = bpf_prog_offload_verifier_prep(env->prog);
14382                 if (ret)
14383                         goto skip_full_check;
14384         }
14385
14386         ret = check_cfg(env);
14387         if (ret < 0)
14388                 goto skip_full_check;
14389
14390         ret = do_check_subprogs(env);
14391         ret = ret ?: do_check_main(env);
14392
14393         if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
14394                 ret = bpf_prog_offload_finalize(env);
14395
14396 skip_full_check:
14397         kvfree(env->explored_states);
14398
14399         if (ret == 0)
14400                 ret = check_max_stack_depth(env);
14401
14402         /* instruction rewrites happen after this point */
14403         if (is_priv) {
14404                 if (ret == 0)
14405                         opt_hard_wire_dead_code_branches(env);
14406                 if (ret == 0)
14407                         ret = opt_remove_dead_code(env);
14408                 if (ret == 0)
14409                         ret = opt_remove_nops(env);
14410         } else {
14411                 if (ret == 0)
14412                         sanitize_dead_code(env);
14413         }
14414
14415         if (ret == 0)
14416                 /* program is valid, convert *(u32*)(ctx + off) accesses */
14417                 ret = convert_ctx_accesses(env);
14418
14419         if (ret == 0)
14420                 ret = do_misc_fixups(env);
14421
14422         /* do 32-bit optimization after insn patching has done so those patched
14423          * insns could be handled correctly.
14424          */
14425         if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
14426                 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
14427                 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
14428                                                                      : false;
14429         }
14430
14431         if (ret == 0)
14432                 ret = fixup_call_args(env);
14433
14434         env->verification_time = ktime_get_ns() - start_time;
14435         print_verification_stats(env);
14436         env->prog->aux->verified_insns = env->insn_processed;
14437
14438         if (log->level && bpf_verifier_log_full(log))
14439                 ret = -ENOSPC;
14440         if (log->level && !log->ubuf) {
14441                 ret = -EFAULT;
14442                 goto err_release_maps;
14443         }
14444
14445         if (ret)
14446                 goto err_release_maps;
14447
14448         if (env->used_map_cnt) {
14449                 /* if program passed verifier, update used_maps in bpf_prog_info */
14450                 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
14451                                                           sizeof(env->used_maps[0]),
14452                                                           GFP_KERNEL);
14453
14454                 if (!env->prog->aux->used_maps) {
14455                         ret = -ENOMEM;
14456                         goto err_release_maps;
14457                 }
14458
14459                 memcpy(env->prog->aux->used_maps, env->used_maps,
14460                        sizeof(env->used_maps[0]) * env->used_map_cnt);
14461                 env->prog->aux->used_map_cnt = env->used_map_cnt;
14462         }
14463         if (env->used_btf_cnt) {
14464                 /* if program passed verifier, update used_btfs in bpf_prog_aux */
14465                 env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
14466                                                           sizeof(env->used_btfs[0]),
14467                                                           GFP_KERNEL);
14468                 if (!env->prog->aux->used_btfs) {
14469                         ret = -ENOMEM;
14470                         goto err_release_maps;
14471                 }
14472
14473                 memcpy(env->prog->aux->used_btfs, env->used_btfs,
14474                        sizeof(env->used_btfs[0]) * env->used_btf_cnt);
14475                 env->prog->aux->used_btf_cnt = env->used_btf_cnt;
14476         }
14477         if (env->used_map_cnt || env->used_btf_cnt) {
14478                 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
14479                  * bpf_ld_imm64 instructions
14480                  */
14481                 convert_pseudo_ld_imm64(env);
14482         }
14483
14484         adjust_btf_func(env);
14485
14486 err_release_maps:
14487         if (!env->prog->aux->used_maps)
14488                 /* if we didn't copy map pointers into bpf_prog_info, release
14489                  * them now. Otherwise free_used_maps() will release them.
14490                  */
14491                 release_maps(env);
14492         if (!env->prog->aux->used_btfs)
14493                 release_btfs(env);
14494
14495         /* extension progs temporarily inherit the attach_type of their targets
14496            for verification purposes, so set it back to zero before returning
14497          */
14498         if (env->prog->type == BPF_PROG_TYPE_EXT)
14499                 env->prog->expected_attach_type = 0;
14500
14501         *prog = env->prog;
14502 err_unlock:
14503         if (!is_priv)
14504                 mutex_unlock(&bpf_verifier_lock);
14505         vfree(env->insn_aux_data);
14506 err_free_env:
14507         kfree(env);
14508         return ret;
14509 }