kernel/bpf/verifier.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3  * Copyright (c) 2016 Facebook
   4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
   5  */
   6 #include <uapi/linux/btf.h>
   7 #include <linux/kernel.h>
   8 #include <linux/types.h>
   9 #include <linux/slab.h>
  10 #include <linux/bpf.h>
  11 #include <linux/btf.h>
  12 #include <linux/bpf_verifier.h>
  13 #include <linux/filter.h>
  14 #include <net/netlink.h>
  15 #include <linux/file.h>
  16 #include <linux/vmalloc.h>
  17 #include <linux/stringify.h>
  18 #include <linux/bsearch.h>
  19 #include <linux/sort.h>
  20 #include <linux/perf_event.h>
  21 #include <linux/ctype.h>
  22 #include <linux/error-injection.h>
  23 #include <linux/bpf_lsm.h>
  24 #include <linux/btf_ids.h>
  25
  26 #include "disasm.h"
  27
  28 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  29 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
  30         [_id] = & _name ## _verifier_ops,
  31 #define BPF_MAP_TYPE(_id, _ops)
  32 #define BPF_LINK_TYPE(_id, _name)
  33 #include <linux/bpf_types.h>
  34 #undef BPF_PROG_TYPE
  35 #undef BPF_MAP_TYPE
  36 #undef BPF_LINK_TYPE
  37 };
  38
  39 /* bpf_check() is a static code analyzer that walks eBPF program
  40  * instruction by instruction and updates register/stack state.
  41  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
  42  *
  43  * The first pass is depth-first-search to check that the program is a DAG.
  44  * It rejects the following programs:
  45  * - larger than BPF_MAXINSNS insns
  46  * - if loop is present (detected via back-edge)
  47  * - unreachable insns exist (shouldn't be a forest. program = one function)
  48  * - out of bounds or malformed jumps
  49  * The second pass is all possible path descent from the 1st insn.
  50  * Since it's analyzing all paths through the program, the length of the
  51  * analysis is limited to 64k insn, which may be hit even if total number of
  52  * insn is less then 4K, but there are too many branches that change stack/regs.
  53  * Number of 'branches to be analyzed' is limited to 1k
  54  *
  55  * On entry to each instruction, each register has a type, and the instruction
  56  * changes the types of the registers depending on instruction semantics.
  57  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
  58  * copied to R1.
  59  *
  60  * All registers are 64-bit.
  61  * R0 - return register
  62  * R1-R5 argument passing registers
  63  * R6-R9 callee saved registers
  64  * R10 - frame pointer read-only
  65  *
  66  * At the start of BPF program the register R1 contains a pointer to bpf_context
  67  * and has type PTR_TO_CTX.
  68  *
  69  * Verifier tracks arithmetic operations on pointers in case:
  70  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
  71  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
  72  * 1st insn copies R10 (which has FRAME_PTR) type into R1
  73  * and 2nd arithmetic instruction is pattern matched to recognize
  74  * that it wants to construct a pointer to some element within stack.
  75  * So after 2nd insn, the register R1 has type PTR_TO_STACK
  76  * (and -20 constant is saved for further stack bounds checking).
  77  * Meaning that this reg is a pointer to stack plus known immediate constant.
  78  *
  79  * Most of the time the registers have SCALAR_VALUE type, which
  80  * means the register has some value, but it's not a valid pointer.
  81  * (like pointer plus pointer becomes SCALAR_VALUE type)
  82  *
  83  * When verifier sees load or store instructions the type of base register
  84  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
  85  * four pointer types recognized by check_mem_access() function.
  86  *
  87  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  88  * and the range of [ptr, ptr + map's value_size) is accessible.
  89  *
  90  * registers used to pass values to function calls are checked against
  91  * function argument constraints.
  92  *
  93  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
  94  * It means that the register type passed to this function must be
  95  * PTR_TO_STACK and it will be used inside the function as
  96  * 'pointer to map element key'
  97  *
  98  * For example the argument constraints for bpf_map_lookup_elem():
  99  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
 100  *   .arg1_type = ARG_CONST_MAP_PTR,
 101  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
 102  *
 103  * ret_type says that this function returns 'pointer to map elem value or null'
 104  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 105  * 2nd argument should be a pointer to stack, which will be used inside
 106  * the helper function as a pointer to map element key.
 107  *
 108  * On the kernel side the helper function looks like:
 109  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 110  * {
 111  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 112  *    void *key = (void *) (unsigned long) r2;
 113  *    void *value;
 114  *
 115  *    here kernel can access 'key' and 'map' pointers safely, knowing that
 116  *    [key, key + map->key_size) bytes are valid and were initialized on
 117  *    the stack of eBPF program.
 118  * }
 119  *
 120  * Corresponding eBPF program may look like:
 121  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
 122  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
 123  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
 124  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 125  * here verifier looks at prototype of map_lookup_elem() and sees:
 126  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
 127  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
 128  *
 129  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
 130  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
 131  * and were initialized prior to this call.
 132  * If it's ok, then verifier allows this BPF_CALL insn and looks at
 133  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
 134  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 135  * returns either pointer to map value or NULL.
 136  *
 137  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
 138  * insn, the register holding that pointer in the true branch changes state to
 139  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
 140  * branch. See check_cond_jmp_op().
 141  *
 142  * After the call R0 is set to return type of the function and registers R1-R5
 143  * are set to NOT_INIT to indicate that they are no longer readable.
 144  *
 145  * The following reference types represent a potential reference to a kernel
 146  * resource which, after first being allocated, must be checked and freed by
 147  * the BPF program:
 148  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
 149  *
 150  * When the verifier sees a helper call return a reference type, it allocates a
 151  * pointer id for the reference and stores it in the current function state.
 152  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
 153  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
 154  * passes through a NULL-check conditional. For the branch wherein the state is
 155  * changed to CONST_IMM, the verifier releases the reference.
 156  *
 157  * For each helper function that allocates a reference, such as
 158  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
 159  * bpf_sk_release(). When a reference type passes into the release function,
 160  * the verifier also releases the reference. If any unchecked or unreleased
 161  * reference remains at the end of the program, the verifier rejects it.
 162  */
 163
 164 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
 165 struct bpf_verifier_stack_elem {
 166         /* verifer state is 'st'
 167          * before processing instruction 'insn_idx'
 168          * and after processing instruction 'prev_insn_idx'
 169          */
 170         struct bpf_verifier_state st;
 171         int insn_idx;
 172         int prev_insn_idx;
 173         struct bpf_verifier_stack_elem *next;
 174         /* length of verifier log at the time this state was pushed on stack */
 175         u32 log_pos;
 176 };
 177
 178 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ    8192
 179 #define BPF_COMPLEXITY_LIMIT_STATES     64
 180
 181 #define BPF_MAP_KEY_POISON      (1ULL << 63)
 182 #define BPF_MAP_KEY_SEEN        (1ULL << 62)
 183
 184 #define BPF_MAP_PTR_UNPRIV      1UL
 185 #define BPF_MAP_PTR_POISON      ((void *)((0xeB9FUL << 1) +     \
 186                                           POISON_POINTER_DELTA))
 187 #define BPF_MAP_PTR(X)          ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
 188
 189 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
 190 {
 191         return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
 192 }
 193
 194 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
 195 {
 196         return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
 197 }
 198
 199 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
 200                               const struct bpf_map *map, bool unpriv)
 201 {
 202         BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
 203         unpriv |= bpf_map_ptr_unpriv(aux);
 204         aux->map_ptr_state = (unsigned long)map |
 205                              (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
 206 }
 207
 208 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
 209 {
 210         return aux->map_key_state & BPF_MAP_KEY_POISON;
 211 }
 212
 213 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
 214 {
 215         return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
 216 }
 217
 218 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
 219 {
 220         return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
 221 }
 222
 223 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
 224 {
 225         bool poisoned = bpf_map_key_poisoned(aux);
 226
 227         aux->map_key_state = state | BPF_MAP_KEY_SEEN |
 228                              (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
 229 }
 230
 231 static bool bpf_pseudo_call(const struct bpf_insn *insn)
 232 {
 233         return insn->code == (BPF_JMP | BPF_CALL) &&
 234                insn->src_reg == BPF_PSEUDO_CALL;
 235 }
 236
 237 static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
 238 {
 239         return insn->code == (BPF_JMP | BPF_CALL) &&
 240                insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
 241 }
 242
 243 struct bpf_call_arg_meta {
 244         struct bpf_map *map_ptr;
 245         bool raw_mode;
 246         bool pkt_access;
 247         int regno;
 248         int access_size;
 249         int mem_size;
 250         u64 msize_max_value;
 251         int ref_obj_id;
 252         int map_uid;
 253         int func_id;
 254         struct btf *btf;
 255         u32 btf_id;
 256         struct btf *ret_btf;
 257         u32 ret_btf_id;
 258         u32 subprogno;
 259 };
 260
 261 struct btf *btf_vmlinux;
 262
 263 static DEFINE_MUTEX(bpf_verifier_lock);
 264
 265 static const struct bpf_line_info *
 266 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
 267 {
 268         const struct bpf_line_info *linfo;
 269         const struct bpf_prog *prog;
 270         u32 i, nr_linfo;
 271
 272         prog = env->prog;
 273         nr_linfo = prog->aux->nr_linfo;
 274
 275         if (!nr_linfo || insn_off >= prog->len)
 276                 return NULL;
 277
 278         linfo = prog->aux->linfo;
 279         for (i = 1; i < nr_linfo; i++)
 280                 if (insn_off < linfo[i].insn_off)
 281                         break;
 282
 283         return &linfo[i - 1];
 284 }
 285
 286 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 287                        va_list args)
 288 {
 289         unsigned int n;
 290
 291         n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 292
 293         WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
 294                   "verifier log line truncated - local buffer too short\n");
 295
 296         n = min(log->len_total - log->len_used - 1, n);
 297         log->kbuf[n] = '\0';
 298
 299         if (log->level == BPF_LOG_KERNEL) {
 300                 pr_err("BPF:%s\n", log->kbuf);
 301                 return;
 302         }
 303         if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
 304                 log->len_used += n;
 305         else
 306                 log->ubuf = NULL;
 307 }
 308
 309 static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
 310 {
 311         char zero = 0;
 312
 313         if (!bpf_verifier_log_needed(log))
 314                 return;
 315
 316         log->len_used = new_pos;
 317         if (put_user(zero, log->ubuf + new_pos))
 318                 log->ubuf = NULL;
 319 }
 320
 321 /* log_level controls verbosity level of eBPF verifier.
 322  * bpf_verifier_log_write() is used to dump the verification trace to the log,
 323  * so the user can figure out what's wrong with the program
 324  */
 325 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 326                                            const char *fmt, ...)
 327 {
 328         va_list args;
 329
 330         if (!bpf_verifier_log_needed(&env->log))
 331                 return;
 332
 333         va_start(args, fmt);
 334         bpf_verifier_vlog(&env->log, fmt, args);
 335         va_end(args);
 336 }
 337 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
 338
 339 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 340 {
 341         struct bpf_verifier_env *env = private_data;
 342         va_list args;
 343
 344         if (!bpf_verifier_log_needed(&env->log))
 345                 return;
 346
 347         va_start(args, fmt);
 348         bpf_verifier_vlog(&env->log, fmt, args);
 349         va_end(args);
 350 }
 351
 352 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
 353                             const char *fmt, ...)
 354 {
 355         va_list args;
 356
 357         if (!bpf_verifier_log_needed(log))
 358                 return;
 359
 360         va_start(args, fmt);
 361         bpf_verifier_vlog(log, fmt, args);
 362         va_end(args);
 363 }
 364
 365 static const char *ltrim(const char *s)
 366 {
 367         while (isspace(*s))
 368                 s++;
 369
 370         return s;
 371 }
 372
 373 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
 374                                          u32 insn_off,
 375                                          const char *prefix_fmt, ...)
 376 {
 377         const struct bpf_line_info *linfo;
 378
 379         if (!bpf_verifier_log_needed(&env->log))
 380                 return;
 381
 382         linfo = find_linfo(env, insn_off);
 383         if (!linfo || linfo == env->prev_linfo)
 384                 return;
 385
 386         if (prefix_fmt) {
 387                 va_list args;
 388
 389                 va_start(args, prefix_fmt);
 390                 bpf_verifier_vlog(&env->log, prefix_fmt, args);
 391                 va_end(args);
 392         }
 393
 394         verbose(env, "%s\n",
 395                 ltrim(btf_name_by_offset(env->prog->aux->btf,
 396                                          linfo->line_off)));
 397
 398         env->prev_linfo = linfo;
 399 }
 400
 401 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
 402                                    struct bpf_reg_state *reg,
 403                                    struct tnum *range, const char *ctx,
 404                                    const char *reg_name)
 405 {
 406         char tn_buf[48];
 407
 408         verbose(env, "At %s the register %s ", ctx, reg_name);
 409         if (!tnum_is_unknown(reg->var_off)) {
 410                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 411                 verbose(env, "has value %s", tn_buf);
 412         } else {
 413                 verbose(env, "has unknown scalar value");
 414         }
 415         tnum_strn(tn_buf, sizeof(tn_buf), *range);
 416         verbose(env, " should have been in %s\n", tn_buf);
 417 }
 418
 419 static bool type_is_pkt_pointer(enum bpf_reg_type type)
 420 {
 421         return type == PTR_TO_PACKET ||
 422                type == PTR_TO_PACKET_META;
 423 }
 424
 425 static bool type_is_sk_pointer(enum bpf_reg_type type)
 426 {
 427         return type == PTR_TO_SOCKET ||
 428                 type == PTR_TO_SOCK_COMMON ||
 429                 type == PTR_TO_TCP_SOCK ||
 430                 type == PTR_TO_XDP_SOCK;
 431 }
 432
 433 static bool reg_type_not_null(enum bpf_reg_type type)
 434 {
 435         return type == PTR_TO_SOCKET ||
 436                 type == PTR_TO_TCP_SOCK ||
 437                 type == PTR_TO_MAP_VALUE ||
 438                 type == PTR_TO_MAP_KEY ||
 439                 type == PTR_TO_SOCK_COMMON;
 440 }
 441
 442 static bool reg_type_may_be_null(enum bpf_reg_type type)
 443 {
 444         return type == PTR_TO_MAP_VALUE_OR_NULL ||
 445                type == PTR_TO_SOCKET_OR_NULL ||
 446                type == PTR_TO_SOCK_COMMON_OR_NULL ||
 447                type == PTR_TO_TCP_SOCK_OR_NULL ||
 448                type == PTR_TO_BTF_ID_OR_NULL ||
 449                type == PTR_TO_MEM_OR_NULL ||
 450                type == PTR_TO_RDONLY_BUF_OR_NULL ||
 451                type == PTR_TO_RDWR_BUF_OR_NULL;
 452 }
 453
 454 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
 455 {
 456         return reg->type == PTR_TO_MAP_VALUE &&
 457                 map_value_has_spin_lock(reg->map_ptr);
 458 }
 459
 460 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
 461 {
 462         return type == PTR_TO_SOCKET ||
 463                 type == PTR_TO_SOCKET_OR_NULL ||
 464                 type == PTR_TO_TCP_SOCK ||
 465                 type == PTR_TO_TCP_SOCK_OR_NULL ||
 466                 type == PTR_TO_MEM ||
 467                 type == PTR_TO_MEM_OR_NULL;
 468 }
 469
 470 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
 471 {
 472         return type == ARG_PTR_TO_SOCK_COMMON;
 473 }
 474
 475 static bool arg_type_may_be_null(enum bpf_arg_type type)
 476 {
 477         return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
 478                type == ARG_PTR_TO_MEM_OR_NULL ||
 479                type == ARG_PTR_TO_CTX_OR_NULL ||
 480                type == ARG_PTR_TO_SOCKET_OR_NULL ||
 481                type == ARG_PTR_TO_ALLOC_MEM_OR_NULL ||
 482                type == ARG_PTR_TO_STACK_OR_NULL;
 483 }
 484
 485 /* Determine whether the function releases some resources allocated by another
 486  * function call. The first reference type argument will be assumed to be
 487  * released by release_reference().
 488  */
 489 static bool is_release_function(enum bpf_func_id func_id)
 490 {
 491         return func_id == BPF_FUNC_sk_release ||
 492                func_id == BPF_FUNC_ringbuf_submit ||
 493                func_id == BPF_FUNC_ringbuf_discard;
 494 }
 495
 496 static bool may_be_acquire_function(enum bpf_func_id func_id)
 497 {
 498         return func_id == BPF_FUNC_sk_lookup_tcp ||
 499                 func_id == BPF_FUNC_sk_lookup_udp ||
 500                 func_id == BPF_FUNC_skc_lookup_tcp ||
 501                 func_id == BPF_FUNC_map_lookup_elem ||
 502                 func_id == BPF_FUNC_ringbuf_reserve;
 503 }
 504
 505 static bool is_acquire_function(enum bpf_func_id func_id,
 506                                 const struct bpf_map *map)
 507 {
 508         enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
 509
 510         if (func_id == BPF_FUNC_sk_lookup_tcp ||
 511             func_id == BPF_FUNC_sk_lookup_udp ||
 512             func_id == BPF_FUNC_skc_lookup_tcp ||
 513             func_id == BPF_FUNC_ringbuf_reserve)
 514                 return true;
 515
 516         if (func_id == BPF_FUNC_map_lookup_elem &&
 517             (map_type == BPF_MAP_TYPE_SOCKMAP ||
 518              map_type == BPF_MAP_TYPE_SOCKHASH))
 519                 return true;
 520
 521         return false;
 522 }
 523
 524 static bool is_ptr_cast_function(enum bpf_func_id func_id)
 525 {
 526         return func_id == BPF_FUNC_tcp_sock ||
 527                 func_id == BPF_FUNC_sk_fullsock ||
 528                 func_id == BPF_FUNC_skc_to_tcp_sock ||
 529                 func_id == BPF_FUNC_skc_to_tcp6_sock ||
 530                 func_id == BPF_FUNC_skc_to_udp6_sock ||
 531                 func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
 532                 func_id == BPF_FUNC_skc_to_tcp_request_sock;
 533 }
 534
 535 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
 536 {
 537         return BPF_CLASS(insn->code) == BPF_STX &&
 538                BPF_MODE(insn->code) == BPF_ATOMIC &&
 539                insn->imm == BPF_CMPXCHG;
 540 }
 541
 542 /* string representation of 'enum bpf_reg_type' */
 543 static const char * const reg_type_str[] = {
 544         [NOT_INIT]              = "?",
 545         [SCALAR_VALUE]          = "inv",
 546         [PTR_TO_CTX]            = "ctx",
 547         [CONST_PTR_TO_MAP]      = "map_ptr",
 548         [PTR_TO_MAP_VALUE]      = "map_value",
 549         [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
 550         [PTR_TO_STACK]          = "fp",
 551         [PTR_TO_PACKET]         = "pkt",
 552         [PTR_TO_PACKET_META]    = "pkt_meta",
 553         [PTR_TO_PACKET_END]     = "pkt_end",
 554         [PTR_TO_FLOW_KEYS]      = "flow_keys",
 555         [PTR_TO_SOCKET]         = "sock",
 556         [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
 557         [PTR_TO_SOCK_COMMON]    = "sock_common",
 558         [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
 559         [PTR_TO_TCP_SOCK]       = "tcp_sock",
 560         [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 561         [PTR_TO_TP_BUFFER]      = "tp_buffer",
 562         [PTR_TO_XDP_SOCK]       = "xdp_sock",
 563         [PTR_TO_BTF_ID]         = "ptr_",
 564         [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
 565         [PTR_TO_PERCPU_BTF_ID]  = "percpu_ptr_",
 566         [PTR_TO_MEM]            = "mem",
 567         [PTR_TO_MEM_OR_NULL]    = "mem_or_null",
 568         [PTR_TO_RDONLY_BUF]     = "rdonly_buf",
 569         [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
 570         [PTR_TO_RDWR_BUF]       = "rdwr_buf",
 571         [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
 572         [PTR_TO_FUNC]           = "func",
 573         [PTR_TO_MAP_KEY]        = "map_key",
 574 };
 575
 576 static char slot_type_char[] = {
 577         [STACK_INVALID] = '?',
 578         [STACK_SPILL]   = 'r',
 579         [STACK_MISC]    = 'm',
 580         [STACK_ZERO]    = '0',
 581 };
 582
 583 static void print_liveness(struct bpf_verifier_env *env,
 584                            enum bpf_reg_liveness live)
 585 {
 586         if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
 587             verbose(env, "_");
 588         if (live & REG_LIVE_READ)
 589                 verbose(env, "r");
 590         if (live & REG_LIVE_WRITTEN)
 591                 verbose(env, "w");
 592         if (live & REG_LIVE_DONE)
 593                 verbose(env, "D");
 594 }
 595
 596 static struct bpf_func_state *func(struct bpf_verifier_env *env,
 597                                    const struct bpf_reg_state *reg)
 598 {
 599         struct bpf_verifier_state *cur = env->cur_state;
 600
 601         return cur->frame[reg->frameno];
 602 }
 603
 604 static const char *kernel_type_name(const struct btf* btf, u32 id)
 605 {
 606         return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
 607 }
 608
 609 /* The reg state of a pointer or a bounded scalar was saved when
 610  * it was spilled to the stack.
 611  */
 612 static bool is_spilled_reg(const struct bpf_stack_state *stack)
 613 {
 614         return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
 615 }
 616
 617 static void scrub_spilled_slot(u8 *stype)
 618 {
 619         if (*stype != STACK_INVALID)
 620                 *stype = STACK_MISC;
 621 }
 622
 623 static void print_verifier_state(struct bpf_verifier_env *env,
 624                                  const struct bpf_func_state *state)
 625 {
 626         const struct bpf_reg_state *reg;
 627         enum bpf_reg_type t;
 628         int i;
 629
 630         if (state->frameno)
 631                 verbose(env, " frame%d:", state->frameno);
 632         for (i = 0; i < MAX_BPF_REG; i++) {
 633                 reg = &state->regs[i];
 634                 t = reg->type;
 635                 if (t == NOT_INIT)
 636                         continue;
 637                 verbose(env, " R%d", i);
 638                 print_liveness(env, reg->live);
 639                 verbose(env, "=%s", reg_type_str[t]);
 640                 if (t == SCALAR_VALUE && reg->precise)
 641                         verbose(env, "P");
 642                 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
 643                     tnum_is_const(reg->var_off)) {
 644                         /* reg->off should be 0 for SCALAR_VALUE */
 645                         verbose(env, "%lld", reg->var_off.value + reg->off);
 646                 } else {
 647                         if (t == PTR_TO_BTF_ID ||
 648                             t == PTR_TO_BTF_ID_OR_NULL ||
 649                             t == PTR_TO_PERCPU_BTF_ID)
 650                                 verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
 651                         verbose(env, "(id=%d", reg->id);
 652                         if (reg_type_may_be_refcounted_or_null(t))
 653                                 verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
 654                         if (t != SCALAR_VALUE)
 655                                 verbose(env, ",off=%d", reg->off);
 656                         if (type_is_pkt_pointer(t))
 657                                 verbose(env, ",r=%d", reg->range);
 658                         else if (t == CONST_PTR_TO_MAP ||
 659                                  t == PTR_TO_MAP_KEY ||
 660                                  t == PTR_TO_MAP_VALUE ||
 661                                  t == PTR_TO_MAP_VALUE_OR_NULL)
 662                                 verbose(env, ",ks=%d,vs=%d",
 663                                         reg->map_ptr->key_size,
 664                                         reg->map_ptr->value_size);
 665                         if (tnum_is_const(reg->var_off)) {
 666                                 /* Typically an immediate SCALAR_VALUE, but
 667                                  * could be a pointer whose offset is too big
 668                                  * for reg->off
 669                                  */
 670                                 verbose(env, ",imm=%llx", reg->var_off.value);
 671                         } else {
 672                                 if (reg->smin_value != reg->umin_value &&
 673                                     reg->smin_value != S64_MIN)
 674                                         verbose(env, ",smin_value=%lld",
 675                                                 (long long)reg->smin_value);
 676                                 if (reg->smax_value != reg->umax_value &&
 677                                     reg->smax_value != S64_MAX)
 678                                         verbose(env, ",smax_value=%lld",
 679                                                 (long long)reg->smax_value);
 680                                 if (reg->umin_value != 0)
 681                                         verbose(env, ",umin_value=%llu",
 682                                                 (unsigned long long)reg->umin_value);
 683                                 if (reg->umax_value != U64_MAX)
 684                                         verbose(env, ",umax_value=%llu",
 685                                                 (unsigned long long)reg->umax_value);
 686                                 if (!tnum_is_unknown(reg->var_off)) {
 687                                         char tn_buf[48];
 688
 689                                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 690                                         verbose(env, ",var_off=%s", tn_buf);
 691                                 }
 692                                 if (reg->s32_min_value != reg->smin_value &&
 693                                     reg->s32_min_value != S32_MIN)
 694                                         verbose(env, ",s32_min_value=%d",
 695                                                 (int)(reg->s32_min_value));
 696                                 if (reg->s32_max_value != reg->smax_value &&
 697                                     reg->s32_max_value != S32_MAX)
 698                                         verbose(env, ",s32_max_value=%d",
 699                                                 (int)(reg->s32_max_value));
 700                                 if (reg->u32_min_value != reg->umin_value &&
 701                                     reg->u32_min_value != U32_MIN)
 702                                         verbose(env, ",u32_min_value=%d",
 703                                                 (int)(reg->u32_min_value));
 704                                 if (reg->u32_max_value != reg->umax_value &&
 705                                     reg->u32_max_value != U32_MAX)
 706                                         verbose(env, ",u32_max_value=%d",
 707                                                 (int)(reg->u32_max_value));
 708                         }
 709                         verbose(env, ")");
 710                 }
 711         }
 712         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
 713                 char types_buf[BPF_REG_SIZE + 1];
 714                 bool valid = false;
 715                 int j;
 716
 717                 for (j = 0; j < BPF_REG_SIZE; j++) {
 718                         if (state->stack[i].slot_type[j] != STACK_INVALID)
 719                                 valid = true;
 720                         types_buf[j] = slot_type_char[
 721                                         state->stack[i].slot_type[j]];
 722                 }
 723                 types_buf[BPF_REG_SIZE] = 0;
 724                 if (!valid)
 725                         continue;
 726                 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
 727                 print_liveness(env, state->stack[i].spilled_ptr.live);
 728                 if (is_spilled_reg(&state->stack[i])) {
 729                         reg = &state->stack[i].spilled_ptr;
 730                         t = reg->type;
 731                         verbose(env, "=%s", reg_type_str[t]);
 732                         if (t == SCALAR_VALUE && reg->precise)
 733                                 verbose(env, "P");
 734                         if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
 735                                 verbose(env, "%lld", reg->var_off.value + reg->off);
 736                 } else {
 737                         verbose(env, "=%s", types_buf);
 738                 }
 739         }
 740         if (state->acquired_refs && state->refs[0].id) {
 741                 verbose(env, " refs=%d", state->refs[0].id);
 742                 for (i = 1; i < state->acquired_refs; i++)
 743                         if (state->refs[i].id)
 744                                 verbose(env, ",%d", state->refs[i].id);
 745         }
 746         if (state->in_callback_fn)
 747                 verbose(env, " cb");
 748         if (state->in_async_callback_fn)
 749                 verbose(env, " async_cb");
 750         verbose(env, "\n");
 751 }
 752
 753 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
 754  * small to hold src. This is different from krealloc since we don't want to preserve
 755  * the contents of dst.
 756  *
 757  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
 758  * not be allocated.
 759  */
 760 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
 761 {
 762         size_t bytes;
 763
 764         if (ZERO_OR_NULL_PTR(src))
 765                 goto out;
 766
 767         if (unlikely(check_mul_overflow(n, size, &bytes)))
 768                 return NULL;
 769
 770         if (ksize(dst) < bytes) {
 771                 kfree(dst);
 772                 dst = kmalloc_track_caller(bytes, flags);
 773                 if (!dst)
 774                         return NULL;
 775         }
 776
 777         memcpy(dst, src, bytes);
 778 out:
 779         return dst ? dst : ZERO_SIZE_PTR;
 780 }
 781
 782 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
 783  * small to hold new_n items. new items are zeroed out if the array grows.
 784  *
 785  * Contrary to krealloc_array, does not free arr if new_n is zero.
 786  */
 787 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
 788 {
 789         if (!new_n || old_n == new_n)
 790                 goto out;
 791
 792         arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
 793         if (!arr)
 794                 return NULL;
 795
 796         if (new_n > old_n)
 797                 memset(arr + old_n * size, 0, (new_n - old_n) * size);
 798
 799 out:
 800         return arr ? arr : ZERO_SIZE_PTR;
 801 }
 802
 803 static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
 804 {
 805         dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
 806                                sizeof(struct bpf_reference_state), GFP_KERNEL);
 807         if (!dst->refs)
 808                 return -ENOMEM;
 809
 810         dst->acquired_refs = src->acquired_refs;
 811         return 0;
 812 }
 813
 814 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
 815 {
 816         size_t n = src->allocated_stack / BPF_REG_SIZE;
 817
 818         dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
 819                                 GFP_KERNEL);
 820         if (!dst->stack)
 821                 return -ENOMEM;
 822
 823         dst->allocated_stack = src->allocated_stack;
 824         return 0;
 825 }
 826
 827 static int resize_reference_state(struct bpf_func_state *state, size_t n)
 828 {
 829         state->refs = realloc_array(state->refs, state->acquired_refs, n,
 830                                     sizeof(struct bpf_reference_state));
 831         if (!state->refs)
 832                 return -ENOMEM;
 833
 834         state->acquired_refs = n;
 835         return 0;
 836 }
 837
 838 static int grow_stack_state(struct bpf_func_state *state, int size)
 839 {
 840         size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
 841
 842         if (old_n >= n)
 843                 return 0;
 844
 845         state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
 846         if (!state->stack)
 847                 return -ENOMEM;
 848
 849         state->allocated_stack = size;
 850         return 0;
 851 }
 852
 853 /* Acquire a pointer id from the env and update the state->refs to include
 854  * this new pointer reference.
 855  * On success, returns a valid pointer id to associate with the register
 856  * On failure, returns a negative errno.
 857  */
 858 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
 859 {
 860         struct bpf_func_state *state = cur_func(env);
 861         int new_ofs = state->acquired_refs;
 862         int id, err;
 863
 864         err = resize_reference_state(state, state->acquired_refs + 1);
 865         if (err)
 866                 return err;
 867         id = ++env->id_gen;
 868         state->refs[new_ofs].id = id;
 869         state->refs[new_ofs].insn_idx = insn_idx;
 870
 871         return id;
 872 }
 873
 874 /* release function corresponding to acquire_reference_state(). Idempotent. */
 875 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
 876 {
 877         int i, last_idx;
 878
 879         last_idx = state->acquired_refs - 1;
 880         for (i = 0; i < state->acquired_refs; i++) {
 881                 if (state->refs[i].id == ptr_id) {
 882                         if (last_idx && i != last_idx)
 883                                 memcpy(&state->refs[i], &state->refs[last_idx],
 884                                        sizeof(*state->refs));
 885                         memset(&state->refs[last_idx], 0, sizeof(*state->refs));
 886                         state->acquired_refs--;
 887                         return 0;
 888                 }
 889         }
 890         return -EINVAL;
 891 }
 892
 893 static void free_func_state(struct bpf_func_state *state)
 894 {
 895         if (!state)
 896                 return;
 897         kfree(state->refs);
 898         kfree(state->stack);
 899         kfree(state);
 900 }
 901
 902 static void clear_jmp_history(struct bpf_verifier_state *state)
 903 {
 904         kfree(state->jmp_history);
 905         state->jmp_history = NULL;
 906         state->jmp_history_cnt = 0;
 907 }
 908
 909 static void free_verifier_state(struct bpf_verifier_state *state,
 910                                 bool free_self)
 911 {
 912         int i;
 913
 914         for (i = 0; i <= state->curframe; i++) {
 915                 free_func_state(state->frame[i]);
 916                 state->frame[i] = NULL;
 917         }
 918         clear_jmp_history(state);
 919         if (free_self)
 920                 kfree(state);
 921 }
 922
 923 /* copy verifier state from src to dst growing dst stack space
 924  * when necessary to accommodate larger src stack
 925  */
 926 static int copy_func_state(struct bpf_func_state *dst,
 927                            const struct bpf_func_state *src)
 928 {
 929         int err;
 930
 931         memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
 932         err = copy_reference_state(dst, src);
 933         if (err)
 934                 return err;
 935         return copy_stack_state(dst, src);
 936 }
 937
 938 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 939                                const struct bpf_verifier_state *src)
 940 {
 941         struct bpf_func_state *dst;
 942         int i, err;
 943
 944         dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
 945                                             src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
 946                                             GFP_USER);
 947         if (!dst_state->jmp_history)
 948                 return -ENOMEM;
 949         dst_state->jmp_history_cnt = src->jmp_history_cnt;
 950
 951         /* if dst has more stack frames then src frame, free them */
 952         for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
 953                 free_func_state(dst_state->frame[i]);
 954                 dst_state->frame[i] = NULL;
 955         }
 956         dst_state->speculative = src->speculative;
 957         dst_state->curframe = src->curframe;
 958         dst_state->active_spin_lock = src->active_spin_lock;
 959         dst_state->branches = src->branches;
 960         dst_state->parent = src->parent;
 961         dst_state->first_insn_idx = src->first_insn_idx;
 962         dst_state->last_insn_idx = src->last_insn_idx;
 963         for (i = 0; i <= src->curframe; i++) {
 964                 dst = dst_state->frame[i];
 965                 if (!dst) {
 966                         dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 967                         if (!dst)
 968                                 return -ENOMEM;
 969                         dst_state->frame[i] = dst;
 970                 }
 971                 err = copy_func_state(dst, src->frame[i]);
 972                 if (err)
 973                         return err;
 974         }
 975         return 0;
 976 }
 977
 978 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
 979 {
 980         while (st) {
 981                 u32 br = --st->branches;
 982
 983                 /* WARN_ON(br > 1) technically makes sense here,
 984                  * but see comment in push_stack(), hence:
 985                  */
 986                 WARN_ONCE((int)br < 0,
 987                           "BUG update_branch_counts:branches_to_explore=%d\n",
 988                           br);
 989                 if (br)
 990                         break;
 991                 st = st->parent;
 992         }
 993 }
 994
 995 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
 996                      int *insn_idx, bool pop_log)
 997 {
 998         struct bpf_verifier_state *cur = env->cur_state;
 999         struct bpf_verifier_stack_elem *elem, *head = env->head;
1000         int err;
1001
1002         if (env->head == NULL)
1003                 return -ENOENT;
1004
1005         if (cur) {
1006                 err = copy_verifier_state(cur, &head->st);
1007                 if (err)
1008                         return err;
1009         }
1010         if (pop_log)
1011                 bpf_vlog_reset(&env->log, head->log_pos);
1012         if (insn_idx)
1013                 *insn_idx = head->insn_idx;
1014         if (prev_insn_idx)
1015                 *prev_insn_idx = head->prev_insn_idx;
1016         elem = head->next;
1017         free_verifier_state(&head->st, false);
1018         kfree(head);
1019         env->head = elem;
1020         env->stack_size--;
1021         return 0;
1022 }
1023
1024 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
1025                                              int insn_idx, int prev_insn_idx,
1026                                              bool speculative)
1027 {
1028         struct bpf_verifier_state *cur = env->cur_state;
1029         struct bpf_verifier_stack_elem *elem;
1030         int err;
1031
1032         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1033         if (!elem)
1034                 goto err;
1035
1036         elem->insn_idx = insn_idx;
1037         elem->prev_insn_idx = prev_insn_idx;
1038         elem->next = env->head;
1039         elem->log_pos = env->log.len_used;
1040         env->head = elem;
1041         env->stack_size++;
1042         err = copy_verifier_state(&elem->st, cur);
1043         if (err)
1044                 goto err;
1045         elem->st.speculative |= speculative;
1046         if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1047                 verbose(env, "The sequence of %d jumps is too complex.\n",
1048                         env->stack_size);
1049                 goto err;
1050         }
1051         if (elem->st.parent) {
1052                 ++elem->st.parent->branches;
1053                 /* WARN_ON(branches > 2) technically makes sense here,
1054                  * but
1055                  * 1. speculative states will bump 'branches' for non-branch
1056                  * instructions
1057                  * 2. is_state_visited() heuristics may decide not to create
1058                  * a new state for a sequence of branches and all such current
1059                  * and cloned states will be pointing to a single parent state
1060                  * which might have large 'branches' count.
1061                  */
1062         }
1063         return &elem->st;
1064 err:
1065         free_verifier_state(env->cur_state, true);
1066         env->cur_state = NULL;
1067         /* pop all elements and return */
1068         while (!pop_stack(env, NULL, NULL, false));
1069         return NULL;
1070 }
1071
1072 #define CALLER_SAVED_REGS 6
1073 static const int caller_saved[CALLER_SAVED_REGS] = {
1074         BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1075 };
1076
1077 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1078                                 struct bpf_reg_state *reg);
1079
1080 /* This helper doesn't clear reg->id */
1081 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1082 {
1083         reg->var_off = tnum_const(imm);
1084         reg->smin_value = (s64)imm;
1085         reg->smax_value = (s64)imm;
1086         reg->umin_value = imm;
1087         reg->umax_value = imm;
1088
1089         reg->s32_min_value = (s32)imm;
1090         reg->s32_max_value = (s32)imm;
1091         reg->u32_min_value = (u32)imm;
1092         reg->u32_max_value = (u32)imm;
1093 }
1094
1095 /* Mark the unknown part of a register (variable offset or scalar value) as
1096  * known to have the value @imm.
1097  */
1098 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1099 {
1100         /* Clear id, off, and union(map_ptr, range) */
1101         memset(((u8 *)reg) + sizeof(reg->type), 0,
1102                offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1103         ___mark_reg_known(reg, imm);
1104 }
1105
1106 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1107 {
1108         reg->var_off = tnum_const_subreg(reg->var_off, imm);
1109         reg->s32_min_value = (s32)imm;
1110         reg->s32_max_value = (s32)imm;
1111         reg->u32_min_value = (u32)imm;
1112         reg->u32_max_value = (u32)imm;
1113 }
1114
1115 /* Mark the 'variable offset' part of a register as zero.  This should be
1116  * used only on registers holding a pointer type.
1117  */
1118 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1119 {
1120         __mark_reg_known(reg, 0);
1121 }
1122
1123 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1124 {
1125         __mark_reg_known(reg, 0);
1126         reg->type = SCALAR_VALUE;
1127 }
1128
1129 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1130                                 struct bpf_reg_state *regs, u32 regno)
1131 {
1132         if (WARN_ON(regno >= MAX_BPF_REG)) {
1133                 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1134                 /* Something bad happened, let's kill all regs */
1135                 for (regno = 0; regno < MAX_BPF_REG; regno++)
1136                         __mark_reg_not_init(env, regs + regno);
1137                 return;
1138         }
1139         __mark_reg_known_zero(regs + regno);
1140 }
1141
1142 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
1143 {
1144         switch (reg->type) {
1145         case PTR_TO_MAP_VALUE_OR_NULL: {
1146                 const struct bpf_map *map = reg->map_ptr;
1147
1148                 if (map->inner_map_meta) {
1149                         reg->type = CONST_PTR_TO_MAP;
1150                         reg->map_ptr = map->inner_map_meta;
1151                         /* transfer reg's id which is unique for every map_lookup_elem
1152                          * as UID of the inner map.
1153                          */
1154                         if (map_value_has_timer(map->inner_map_meta))
1155                                 reg->map_uid = reg->id;
1156                 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
1157                         reg->type = PTR_TO_XDP_SOCK;
1158                 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
1159                            map->map_type == BPF_MAP_TYPE_SOCKHASH) {
1160                         reg->type = PTR_TO_SOCKET;
1161                 } else {
1162                         reg->type = PTR_TO_MAP_VALUE;
1163                 }
1164                 break;
1165         }
1166         case PTR_TO_SOCKET_OR_NULL:
1167                 reg->type = PTR_TO_SOCKET;
1168                 break;
1169         case PTR_TO_SOCK_COMMON_OR_NULL:
1170                 reg->type = PTR_TO_SOCK_COMMON;
1171                 break;
1172         case PTR_TO_TCP_SOCK_OR_NULL:
1173                 reg->type = PTR_TO_TCP_SOCK;
1174                 break;
1175         case PTR_TO_BTF_ID_OR_NULL:
1176                 reg->type = PTR_TO_BTF_ID;
1177                 break;
1178         case PTR_TO_MEM_OR_NULL:
1179                 reg->type = PTR_TO_MEM;
1180                 break;
1181         case PTR_TO_RDONLY_BUF_OR_NULL:
1182                 reg->type = PTR_TO_RDONLY_BUF;
1183                 break;
1184         case PTR_TO_RDWR_BUF_OR_NULL:
1185                 reg->type = PTR_TO_RDWR_BUF;
1186                 break;
1187         default:
1188                 WARN_ONCE(1, "unknown nullable register type");
1189         }
1190 }
1191
1192 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1193 {
1194         return type_is_pkt_pointer(reg->type);
1195 }
1196
1197 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1198 {
1199         return reg_is_pkt_pointer(reg) ||
1200                reg->type == PTR_TO_PACKET_END;
1201 }
1202
1203 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1204 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1205                                     enum bpf_reg_type which)
1206 {
1207         /* The register can already have a range from prior markings.
1208          * This is fine as long as it hasn't been advanced from its
1209          * origin.
1210          */
1211         return reg->type == which &&
1212                reg->id == 0 &&
1213                reg->off == 0 &&
1214                tnum_equals_const(reg->var_off, 0);
1215 }
1216
1217 /* Reset the min/max bounds of a register */
1218 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1219 {
1220         reg->smin_value = S64_MIN;
1221         reg->smax_value = S64_MAX;
1222         reg->umin_value = 0;
1223         reg->umax_value = U64_MAX;
1224
1225         reg->s32_min_value = S32_MIN;
1226         reg->s32_max_value = S32_MAX;
1227         reg->u32_min_value = 0;
1228         reg->u32_max_value = U32_MAX;
1229 }
1230
1231 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1232 {
1233         reg->smin_value = S64_MIN;
1234         reg->smax_value = S64_MAX;
1235         reg->umin_value = 0;
1236         reg->umax_value = U64_MAX;
1237 }
1238
1239 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1240 {
1241         reg->s32_min_value = S32_MIN;
1242         reg->s32_max_value = S32_MAX;
1243         reg->u32_min_value = 0;
1244         reg->u32_max_value = U32_MAX;
1245 }
1246
1247 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1248 {
1249         struct tnum var32_off = tnum_subreg(reg->var_off);
1250
1251         /* min signed is max(sign bit) | min(other bits) */
1252         reg->s32_min_value = max_t(s32, reg->s32_min_value,
1253                         var32_off.value | (var32_off.mask & S32_MIN));
1254         /* max signed is min(sign bit) | max(other bits) */
1255         reg->s32_max_value = min_t(s32, reg->s32_max_value,
1256                         var32_off.value | (var32_off.mask & S32_MAX));
1257         reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1258         reg->u32_max_value = min(reg->u32_max_value,
1259                                  (u32)(var32_off.value | var32_off.mask));
1260 }
1261
1262 static void __update_reg64_bounds(struct bpf_reg_state *reg)
1263 {
1264         /* min signed is max(sign bit) | min(other bits) */
1265         reg->smin_value = max_t(s64, reg->smin_value,
1266                                 reg->var_off.value | (reg->var_off.mask & S64_MIN));
1267         /* max signed is min(sign bit) | max(other bits) */
1268         reg->smax_value = min_t(s64, reg->smax_value,
1269                                 reg->var_off.value | (reg->var_off.mask & S64_MAX));
1270         reg->umin_value = max(reg->umin_value, reg->var_off.value);
1271         reg->umax_value = min(reg->umax_value,
1272                               reg->var_off.value | reg->var_off.mask);
1273 }
1274
1275 static void __update_reg_bounds(struct bpf_reg_state *reg)
1276 {
1277         __update_reg32_bounds(reg);
1278         __update_reg64_bounds(reg);
1279 }
1280
1281 /* Uses signed min/max values to inform unsigned, and vice-versa */
1282 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1283 {
1284         /* Learn sign from signed bounds.
1285          * If we cannot cross the sign boundary, then signed and unsigned bounds
1286          * are the same, so combine.  This works even in the negative case, e.g.
1287          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1288          */
1289         if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1290                 reg->s32_min_value = reg->u32_min_value =
1291                         max_t(u32, reg->s32_min_value, reg->u32_min_value);
1292                 reg->s32_max_value = reg->u32_max_value =
1293                         min_t(u32, reg->s32_max_value, reg->u32_max_value);
1294                 return;
1295         }
1296         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1297          * boundary, so we must be careful.
1298          */
1299         if ((s32)reg->u32_max_value >= 0) {
1300                 /* Positive.  We can't learn anything from the smin, but smax
1301                  * is positive, hence safe.
1302                  */
1303                 reg->s32_min_value = reg->u32_min_value;
1304                 reg->s32_max_value = reg->u32_max_value =
1305                         min_t(u32, reg->s32_max_value, reg->u32_max_value);
1306         } else if ((s32)reg->u32_min_value < 0) {
1307                 /* Negative.  We can't learn anything from the smax, but smin
1308                  * is negative, hence safe.
1309                  */
1310                 reg->s32_min_value = reg->u32_min_value =
1311                         max_t(u32, reg->s32_min_value, reg->u32_min_value);
1312                 reg->s32_max_value = reg->u32_max_value;
1313         }
1314 }
1315
1316 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
1317 {
1318         /* Learn sign from signed bounds.
1319          * If we cannot cross the sign boundary, then signed and unsigned bounds
1320          * are the same, so combine.  This works even in the negative case, e.g.
1321          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1322          */
1323         if (reg->smin_value >= 0 || reg->smax_value < 0) {
1324                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1325                                                           reg->umin_value);
1326                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1327                                                           reg->umax_value);
1328                 return;
1329         }
1330         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1331          * boundary, so we must be careful.
1332          */
1333         if ((s64)reg->umax_value >= 0) {
1334                 /* Positive.  We can't learn anything from the smin, but smax
1335                  * is positive, hence safe.
1336                  */
1337                 reg->smin_value = reg->umin_value;
1338                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1339                                                           reg->umax_value);
1340         } else if ((s64)reg->umin_value < 0) {
1341                 /* Negative.  We can't learn anything from the smax, but smin
1342                  * is negative, hence safe.
1343                  */
1344                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1345                                                           reg->umin_value);
1346                 reg->smax_value = reg->umax_value;
1347         }
1348 }
1349
1350 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1351 {
1352         __reg32_deduce_bounds(reg);
1353         __reg64_deduce_bounds(reg);
1354 }
1355
1356 /* Attempts to improve var_off based on unsigned min/max information */
1357 static void __reg_bound_offset(struct bpf_reg_state *reg)
1358 {
1359         struct tnum var64_off = tnum_intersect(reg->var_off,
1360                                                tnum_range(reg->umin_value,
1361                                                           reg->umax_value));
1362         struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1363                                                 tnum_range(reg->u32_min_value,
1364                                                            reg->u32_max_value));
1365
1366         reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1367 }
1368
1369 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1370 {
1371         reg->umin_value = reg->u32_min_value;
1372         reg->umax_value = reg->u32_max_value;
1373         /* Attempt to pull 32-bit signed bounds into 64-bit bounds
1374          * but must be positive otherwise set to worse case bounds
1375          * and refine later from tnum.
1376          */
1377         if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0)
1378                 reg->smax_value = reg->s32_max_value;
1379         else
1380                 reg->smax_value = U32_MAX;
1381         if (reg->s32_min_value >= 0)
1382                 reg->smin_value = reg->s32_min_value;
1383         else
1384                 reg->smin_value = 0;
1385 }
1386
1387 static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1388 {
1389         /* special case when 64-bit register has upper 32-bit register
1390          * zeroed. Typically happens after zext or <<32, >>32 sequence
1391          * allowing us to use 32-bit bounds directly,
1392          */
1393         if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1394                 __reg_assign_32_into_64(reg);
1395         } else {
1396                 /* Otherwise the best we can do is push lower 32bit known and
1397                  * unknown bits into register (var_off set from jmp logic)
1398                  * then learn as much as possible from the 64-bit tnum
1399                  * known and unknown bits. The previous smin/smax bounds are
1400                  * invalid here because of jmp32 compare so mark them unknown
1401                  * so they do not impact tnum bounds calculation.
1402                  */
1403                 __mark_reg64_unbounded(reg);
1404                 __update_reg_bounds(reg);
1405         }
1406
1407         /* Intersecting with the old var_off might have improved our bounds
1408          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1409          * then new var_off is (0; 0x7f...fc) which improves our umax.
1410          */
1411         __reg_deduce_bounds(reg);
1412         __reg_bound_offset(reg);
1413         __update_reg_bounds(reg);
1414 }
1415
1416 static bool __reg64_bound_s32(s64 a)
1417 {
1418         return a >= S32_MIN && a <= S32_MAX;
1419 }
1420
1421 static bool __reg64_bound_u32(u64 a)
1422 {
1423         return a >= U32_MIN && a <= U32_MAX;
1424 }
1425
1426 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1427 {
1428         __mark_reg32_unbounded(reg);
1429
1430         if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
1431                 reg->s32_min_value = (s32)reg->smin_value;
1432                 reg->s32_max_value = (s32)reg->smax_value;
1433         }
1434         if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
1435                 reg->u32_min_value = (u32)reg->umin_value;
1436                 reg->u32_max_value = (u32)reg->umax_value;
1437         }
1438
1439         /* Intersecting with the old var_off might have improved our bounds
1440          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1441          * then new var_off is (0; 0x7f...fc) which improves our umax.
1442          */
1443         __reg_deduce_bounds(reg);
1444         __reg_bound_offset(reg);
1445         __update_reg_bounds(reg);
1446 }
1447
1448 /* Mark a register as having a completely unknown (scalar) value. */
1449 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1450                                struct bpf_reg_state *reg)
1451 {
1452         /*
1453          * Clear type, id, off, and union(map_ptr, range) and
1454          * padding between 'type' and union
1455          */
1456         memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1457         reg->type = SCALAR_VALUE;
1458         reg->var_off = tnum_unknown;
1459         reg->frameno = 0;
1460         reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1461         __mark_reg_unbounded(reg);
1462 }
1463
1464 static void mark_reg_unknown(struct bpf_verifier_env *env,
1465                              struct bpf_reg_state *regs, u32 regno)
1466 {
1467         if (WARN_ON(regno >= MAX_BPF_REG)) {
1468                 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1469                 /* Something bad happened, let's kill all regs except FP */
1470                 for (regno = 0; regno < BPF_REG_FP; regno++)
1471                         __mark_reg_not_init(env, regs + regno);
1472                 return;
1473         }
1474         __mark_reg_unknown(env, regs + regno);
1475 }
1476
1477 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1478                                 struct bpf_reg_state *reg)
1479 {
1480         __mark_reg_unknown(env, reg);
1481         reg->type = NOT_INIT;
1482 }
1483
1484 static void mark_reg_not_init(struct bpf_verifier_env *env,
1485                               struct bpf_reg_state *regs, u32 regno)
1486 {
1487         if (WARN_ON(regno >= MAX_BPF_REG)) {
1488                 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1489                 /* Something bad happened, let's kill all regs except FP */
1490                 for (regno = 0; regno < BPF_REG_FP; regno++)
1491                         __mark_reg_not_init(env, regs + regno);
1492                 return;
1493         }
1494         __mark_reg_not_init(env, regs + regno);
1495 }
1496
1497 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1498                             struct bpf_reg_state *regs, u32 regno,
1499                             enum bpf_reg_type reg_type,
1500                             struct btf *btf, u32 btf_id)
1501 {
1502         if (reg_type == SCALAR_VALUE) {
1503                 mark_reg_unknown(env, regs, regno);
1504                 return;
1505         }
1506         mark_reg_known_zero(env, regs, regno);
1507         regs[regno].type = PTR_TO_BTF_ID;
1508         regs[regno].btf = btf;
1509         regs[regno].btf_id = btf_id;
1510 }
1511
1512 #define DEF_NOT_SUBREG  (0)
1513 static void init_reg_state(struct bpf_verifier_env *env,
1514                            struct bpf_func_state *state)
1515 {
1516         struct bpf_reg_state *regs = state->regs;
1517         int i;
1518
1519         for (i = 0; i < MAX_BPF_REG; i++) {
1520                 mark_reg_not_init(env, regs, i);
1521                 regs[i].live = REG_LIVE_NONE;
1522                 regs[i].parent = NULL;
1523                 regs[i].subreg_def = DEF_NOT_SUBREG;
1524         }
1525
1526         /* frame pointer */
1527         regs[BPF_REG_FP].type = PTR_TO_STACK;
1528         mark_reg_known_zero(env, regs, BPF_REG_FP);
1529         regs[BPF_REG_FP].frameno = state->frameno;
1530 }
1531
1532 #define BPF_MAIN_FUNC (-1)
1533 static void init_func_state(struct bpf_verifier_env *env,
1534                             struct bpf_func_state *state,
1535                             int callsite, int frameno, int subprogno)
1536 {
1537         state->callsite = callsite;
1538         state->frameno = frameno;
1539         state->subprogno = subprogno;
1540         init_reg_state(env, state);
1541 }
1542
1543 /* Similar to push_stack(), but for async callbacks */
1544 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
1545                                                 int insn_idx, int prev_insn_idx,
1546                                                 int subprog)
1547 {
1548         struct bpf_verifier_stack_elem *elem;
1549         struct bpf_func_state *frame;
1550
1551         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
1552         if (!elem)
1553                 goto err;
1554
1555         elem->insn_idx = insn_idx;
1556         elem->prev_insn_idx = prev_insn_idx;
1557         elem->next = env->head;
1558         elem->log_pos = env->log.len_used;
1559         env->head = elem;
1560         env->stack_size++;
1561         if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
1562                 verbose(env,
1563                         "The sequence of %d jumps is too complex for async cb.\n",
1564                         env->stack_size);
1565                 goto err;
1566         }
1567         /* Unlike push_stack() do not copy_verifier_state().
1568          * The caller state doesn't matter.
1569          * This is async callback. It starts in a fresh stack.
1570          * Initialize it similar to do_check_common().
1571          */
1572         elem->st.branches = 1;
1573         frame = kzalloc(sizeof(*frame), GFP_KERNEL);
1574         if (!frame)
1575                 goto err;
1576         init_func_state(env, frame,
1577                         BPF_MAIN_FUNC /* callsite */,
1578                         0 /* frameno within this callchain */,
1579                         subprog /* subprog number within this prog */);
1580         elem->st.frame[0] = frame;
1581         return &elem->st;
1582 err:
1583         free_verifier_state(env->cur_state, true);
1584         env->cur_state = NULL;
1585         /* pop all elements and return */
1586         while (!pop_stack(env, NULL, NULL, false));
1587         return NULL;
1588 }
1589
1590
1591 enum reg_arg_type {
1592         SRC_OP,         /* register is used as source operand */
1593         DST_OP,         /* register is used as destination operand */
1594         DST_OP_NO_MARK  /* same as above, check only, don't mark */
1595 };
1596
1597 static int cmp_subprogs(const void *a, const void *b)
1598 {
1599         return ((struct bpf_subprog_info *)a)->start -
1600                ((struct bpf_subprog_info *)b)->start;
1601 }
1602
1603 static int find_subprog(struct bpf_verifier_env *env, int off)
1604 {
1605         struct bpf_subprog_info *p;
1606
1607         p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1608                     sizeof(env->subprog_info[0]), cmp_subprogs);
1609         if (!p)
1610                 return -ENOENT;
1611         return p - env->subprog_info;
1612
1613 }
1614
1615 static int add_subprog(struct bpf_verifier_env *env, int off)
1616 {
1617         int insn_cnt = env->prog->len;
1618         int ret;
1619
1620         if (off >= insn_cnt || off < 0) {
1621                 verbose(env, "call to invalid destination\n");
1622                 return -EINVAL;
1623         }
1624         ret = find_subprog(env, off);
1625         if (ret >= 0)
1626                 return ret;
1627         if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1628                 verbose(env, "too many subprograms\n");
1629                 return -E2BIG;
1630         }
1631         /* determine subprog starts. The end is one before the next starts */
1632         env->subprog_info[env->subprog_cnt++].start = off;
1633         sort(env->subprog_info, env->subprog_cnt,
1634              sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1635         return env->subprog_cnt - 1;
1636 }
1637
1638 #define MAX_KFUNC_DESCS 256
1639 #define MAX_KFUNC_BTFS  256
1640
1641 struct bpf_kfunc_desc {
1642         struct btf_func_model func_model;
1643         u32 func_id;
1644         s32 imm;
1645         u16 offset;
1646 };
1647
1648 struct bpf_kfunc_btf {
1649         struct btf *btf;
1650         struct module *module;
1651         u16 offset;
1652 };
1653
1654 struct bpf_kfunc_desc_tab {
1655         struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
1656         u32 nr_descs;
1657 };
1658
1659 struct bpf_kfunc_btf_tab {
1660         struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
1661         u32 nr_descs;
1662 };
1663
1664 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
1665 {
1666         const struct bpf_kfunc_desc *d0 = a;
1667         const struct bpf_kfunc_desc *d1 = b;
1668
1669         /* func_id is not greater than BTF_MAX_TYPE */
1670         return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
1671 }
1672
1673 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
1674 {
1675         const struct bpf_kfunc_btf *d0 = a;
1676         const struct bpf_kfunc_btf *d1 = b;
1677
1678         return d0->offset - d1->offset;
1679 }
1680
1681 static const struct bpf_kfunc_desc *
1682 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
1683 {
1684         struct bpf_kfunc_desc desc = {
1685                 .func_id = func_id,
1686                 .offset = offset,
1687         };
1688         struct bpf_kfunc_desc_tab *tab;
1689
1690         tab = prog->aux->kfunc_tab;
1691         return bsearch(&desc, tab->descs, tab->nr_descs,
1692                        sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
1693 }
1694
1695 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
1696                                          s16 offset, struct module **btf_modp)
1697 {
1698         struct bpf_kfunc_btf kf_btf = { .offset = offset };
1699         struct bpf_kfunc_btf_tab *tab;
1700         struct bpf_kfunc_btf *b;
1701         struct module *mod;
1702         struct btf *btf;
1703         int btf_fd;
1704
1705         tab = env->prog->aux->kfunc_btf_tab;
1706         b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
1707                     sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
1708         if (!b) {
1709                 if (tab->nr_descs == MAX_KFUNC_BTFS) {
1710                         verbose(env, "too many different module BTFs\n");
1711                         return ERR_PTR(-E2BIG);
1712                 }
1713
1714                 if (bpfptr_is_null(env->fd_array)) {
1715                         verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
1716                         return ERR_PTR(-EPROTO);
1717                 }
1718
1719                 if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
1720                                             offset * sizeof(btf_fd),
1721                                             sizeof(btf_fd)))
1722                         return ERR_PTR(-EFAULT);
1723
1724                 btf = btf_get_by_fd(btf_fd);
1725                 if (IS_ERR(btf)) {
1726                         verbose(env, "invalid module BTF fd specified\n");
1727                         return btf;
1728                 }
1729
1730                 if (!btf_is_module(btf)) {
1731                         verbose(env, "BTF fd for kfunc is not a module BTF\n");
1732                         btf_put(btf);
1733                         return ERR_PTR(-EINVAL);
1734                 }
1735
1736                 mod = btf_try_get_module(btf);
1737                 if (!mod) {
1738                         btf_put(btf);
1739                         return ERR_PTR(-ENXIO);
1740                 }
1741
1742                 b = &tab->descs[tab->nr_descs++];
1743                 b->btf = btf;
1744                 b->module = mod;
1745                 b->offset = offset;
1746
1747                 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1748                      kfunc_btf_cmp_by_off, NULL);
1749         }
1750         if (btf_modp)
1751                 *btf_modp = b->module;
1752         return b->btf;
1753 }
1754
1755 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
1756 {
1757         if (!tab)
1758                 return;
1759
1760         while (tab->nr_descs--) {
1761                 module_put(tab->descs[tab->nr_descs].module);
1762                 btf_put(tab->descs[tab->nr_descs].btf);
1763         }
1764         kfree(tab);
1765 }
1766
1767 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
1768                                        u32 func_id, s16 offset,
1769                                        struct module **btf_modp)
1770 {
1771         if (offset) {
1772                 if (offset < 0) {
1773                         /* In the future, this can be allowed to increase limit
1774                          * of fd index into fd_array, interpreted as u16.
1775                          */
1776                         verbose(env, "negative offset disallowed for kernel module function call\n");
1777                         return ERR_PTR(-EINVAL);
1778                 }
1779
1780                 return __find_kfunc_desc_btf(env, offset, btf_modp);
1781         }
1782         return btf_vmlinux ?: ERR_PTR(-ENOENT);
1783 }
1784
1785 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
1786 {
1787         const struct btf_type *func, *func_proto;
1788         struct bpf_kfunc_btf_tab *btf_tab;
1789         struct bpf_kfunc_desc_tab *tab;
1790         struct bpf_prog_aux *prog_aux;
1791         struct bpf_kfunc_desc *desc;
1792         const char *func_name;
1793         struct btf *desc_btf;
1794         unsigned long addr;
1795         int err;
1796
1797         prog_aux = env->prog->aux;
1798         tab = prog_aux->kfunc_tab;
1799         btf_tab = prog_aux->kfunc_btf_tab;
1800         if (!tab) {
1801                 if (!btf_vmlinux) {
1802                         verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
1803                         return -ENOTSUPP;
1804                 }
1805
1806                 if (!env->prog->jit_requested) {
1807                         verbose(env, "JIT is required for calling kernel function\n");
1808                         return -ENOTSUPP;
1809                 }
1810
1811                 if (!bpf_jit_supports_kfunc_call()) {
1812                         verbose(env, "JIT does not support calling kernel function\n");
1813                         return -ENOTSUPP;
1814                 }
1815
1816                 if (!env->prog->gpl_compatible) {
1817                         verbose(env, "cannot call kernel function from non-GPL compatible program\n");
1818                         return -EINVAL;
1819                 }
1820
1821                 tab = kzalloc(sizeof(*tab), GFP_KERNEL);
1822                 if (!tab)
1823                         return -ENOMEM;
1824                 prog_aux->kfunc_tab = tab;
1825         }
1826
1827         /* func_id == 0 is always invalid, but instead of returning an error, be
1828          * conservative and wait until the code elimination pass before returning
1829          * error, so that invalid calls that get pruned out can be in BPF programs
1830          * loaded from userspace.  It is also required that offset be untouched
1831          * for such calls.
1832          */
1833         if (!func_id && !offset)
1834                 return 0;
1835
1836         if (!btf_tab && offset) {
1837                 btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
1838                 if (!btf_tab)
1839                         return -ENOMEM;
1840                 prog_aux->kfunc_btf_tab = btf_tab;
1841         }
1842
1843         desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
1844         if (IS_ERR(desc_btf)) {
1845                 verbose(env, "failed to find BTF for kernel function\n");
1846                 return PTR_ERR(desc_btf);
1847         }
1848
1849         if (find_kfunc_desc(env->prog, func_id, offset))
1850                 return 0;
1851
1852         if (tab->nr_descs == MAX_KFUNC_DESCS) {
1853                 verbose(env, "too many different kernel function calls\n");
1854                 return -E2BIG;
1855         }
1856
1857         func = btf_type_by_id(desc_btf, func_id);
1858         if (!func || !btf_type_is_func(func)) {
1859                 verbose(env, "kernel btf_id %u is not a function\n",
1860                         func_id);
1861                 return -EINVAL;
1862         }
1863         func_proto = btf_type_by_id(desc_btf, func->type);
1864         if (!func_proto || !btf_type_is_func_proto(func_proto)) {
1865                 verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
1866                         func_id);
1867                 return -EINVAL;
1868         }
1869
1870         func_name = btf_name_by_offset(desc_btf, func->name_off);
1871         addr = kallsyms_lookup_name(func_name);
1872         if (!addr) {
1873                 verbose(env, "cannot find address for kernel function %s\n",
1874                         func_name);
1875                 return -EINVAL;
1876         }
1877
1878         desc = &tab->descs[tab->nr_descs++];
1879         desc->func_id = func_id;
1880         desc->imm = BPF_CALL_IMM(addr);
1881         desc->offset = offset;
1882         err = btf_distill_func_proto(&env->log, desc_btf,
1883                                      func_proto, func_name,
1884                                      &desc->func_model);
1885         if (!err)
1886                 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1887                      kfunc_desc_cmp_by_id_off, NULL);
1888         return err;
1889 }
1890
1891 static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
1892 {
1893         const struct bpf_kfunc_desc *d0 = a;
1894         const struct bpf_kfunc_desc *d1 = b;
1895
1896         if (d0->imm > d1->imm)
1897                 return 1;
1898         else if (d0->imm < d1->imm)
1899                 return -1;
1900         return 0;
1901 }
1902
1903 static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
1904 {
1905         struct bpf_kfunc_desc_tab *tab;
1906
1907         tab = prog->aux->kfunc_tab;
1908         if (!tab)
1909                 return;
1910
1911         sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
1912              kfunc_desc_cmp_by_imm, NULL);
1913 }
1914
1915 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
1916 {
1917         return !!prog->aux->kfunc_tab;
1918 }
1919
1920 const struct btf_func_model *
1921 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
1922                          const struct bpf_insn *insn)
1923 {
1924         const struct bpf_kfunc_desc desc = {
1925                 .imm = insn->imm,
1926         };
1927         const struct bpf_kfunc_desc *res;
1928         struct bpf_kfunc_desc_tab *tab;
1929
1930         tab = prog->aux->kfunc_tab;
1931         res = bsearch(&desc, tab->descs, tab->nr_descs,
1932                       sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
1933
1934         return res ? &res->func_model : NULL;
1935 }
1936
1937 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
1938 {
1939         struct bpf_subprog_info *subprog = env->subprog_info;
1940         struct bpf_insn *insn = env->prog->insnsi;
1941         int i, ret, insn_cnt = env->prog->len;
1942
1943         /* Add entry function. */
1944         ret = add_subprog(env, 0);
1945         if (ret)
1946                 return ret;
1947
1948         for (i = 0; i < insn_cnt; i++, insn++) {
1949                 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
1950                     !bpf_pseudo_kfunc_call(insn))
1951                         continue;
1952
1953                 if (!env->bpf_capable) {
1954                         verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
1955                         return -EPERM;
1956                 }
1957
1958                 if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
1959                         ret = add_subprog(env, i + insn->imm + 1);
1960                 else
1961                         ret = add_kfunc_call(env, insn->imm, insn->off);
1962
1963                 if (ret < 0)
1964                         return ret;
1965         }
1966
1967         /* Add a fake 'exit' subprog which could simplify subprog iteration
1968          * logic. 'subprog_cnt' should not be increased.
1969          */
1970         subprog[env->subprog_cnt].start = insn_cnt;
1971
1972         if (env->log.level & BPF_LOG_LEVEL2)
1973                 for (i = 0; i < env->subprog_cnt; i++)
1974                         verbose(env, "func#%d @%d\n", i, subprog[i].start);
1975
1976         return 0;
1977 }
1978
1979 static int check_subprogs(struct bpf_verifier_env *env)
1980 {
1981         int i, subprog_start, subprog_end, off, cur_subprog = 0;
1982         struct bpf_subprog_info *subprog = env->subprog_info;
1983         struct bpf_insn *insn = env->prog->insnsi;
1984         int insn_cnt = env->prog->len;
1985
1986         /* now check that all jumps are within the same subprog */
1987         subprog_start = subprog[cur_subprog].start;
1988         subprog_end = subprog[cur_subprog + 1].start;
1989         for (i = 0; i < insn_cnt; i++) {
1990                 u8 code = insn[i].code;
1991
1992                 if (code == (BPF_JMP | BPF_CALL) &&
1993                     insn[i].imm == BPF_FUNC_tail_call &&
1994                     insn[i].src_reg != BPF_PSEUDO_CALL)
1995                         subprog[cur_subprog].has_tail_call = true;
1996                 if (BPF_CLASS(code) == BPF_LD &&
1997                     (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
1998                         subprog[cur_subprog].has_ld_abs = true;
1999                 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
2000                         goto next;
2001                 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
2002                         goto next;
2003                 off = i + insn[i].off + 1;
2004                 if (off < subprog_start || off >= subprog_end) {
2005                         verbose(env, "jump out of range from insn %d to %d\n", i, off);
2006                         return -EINVAL;
2007                 }
2008 next:
2009                 if (i == subprog_end - 1) {
2010                         /* to avoid fall-through from one subprog into another
2011                          * the last insn of the subprog should be either exit
2012                          * or unconditional jump back
2013                          */
2014                         if (code != (BPF_JMP | BPF_EXIT) &&
2015                             code != (BPF_JMP | BPF_JA)) {
2016                                 verbose(env, "last insn is not an exit or jmp\n");
2017                                 return -EINVAL;
2018                         }
2019                         subprog_start = subprog_end;
2020                         cur_subprog++;
2021                         if (cur_subprog < env->subprog_cnt)
2022                                 subprog_end = subprog[cur_subprog + 1].start;
2023                 }
2024         }
2025         return 0;
2026 }
2027
2028 /* Parentage chain of this register (or stack slot) should take care of all
2029  * issues like callee-saved registers, stack slot allocation time, etc.
2030  */
2031 static int mark_reg_read(struct bpf_verifier_env *env,
2032                          const struct bpf_reg_state *state,
2033                          struct bpf_reg_state *parent, u8 flag)
2034 {
2035         bool writes = parent == state->parent; /* Observe write marks */
2036         int cnt = 0;
2037
2038         while (parent) {
2039                 /* if read wasn't screened by an earlier write ... */
2040                 if (writes && state->live & REG_LIVE_WRITTEN)
2041                         break;
2042                 if (parent->live & REG_LIVE_DONE) {
2043                         verbose(env, "verifier BUG type %s var_off %lld off %d\n",
2044                                 reg_type_str[parent->type],
2045                                 parent->var_off.value, parent->off);
2046                         return -EFAULT;
2047                 }
2048                 /* The first condition is more likely to be true than the
2049                  * second, checked it first.
2050                  */
2051                 if ((parent->live & REG_LIVE_READ) == flag ||
2052                     parent->live & REG_LIVE_READ64)
2053                         /* The parentage chain never changes and
2054                          * this parent was already marked as LIVE_READ.
2055                          * There is no need to keep walking the chain again and
2056                          * keep re-marking all parents as LIVE_READ.
2057                          * This case happens when the same register is read
2058                          * multiple times without writes into it in-between.
2059                          * Also, if parent has the stronger REG_LIVE_READ64 set,
2060                          * then no need to set the weak REG_LIVE_READ32.
2061                          */
2062                         break;
2063                 /* ... then we depend on parent's value */
2064                 parent->live |= flag;
2065                 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
2066                 if (flag == REG_LIVE_READ64)
2067                         parent->live &= ~REG_LIVE_READ32;
2068                 state = parent;
2069                 parent = state->parent;
2070                 writes = true;
2071                 cnt++;
2072         }
2073
2074         if (env->longest_mark_read_walk < cnt)
2075                 env->longest_mark_read_walk = cnt;
2076         return 0;
2077 }
2078
2079 /* This function is supposed to be used by the following 32-bit optimization
2080  * code only. It returns TRUE if the source or destination register operates
2081  * on 64-bit, otherwise return FALSE.
2082  */
2083 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
2084                      u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
2085 {
2086         u8 code, class, op;
2087
2088         code = insn->code;
2089         class = BPF_CLASS(code);
2090         op = BPF_OP(code);
2091         if (class == BPF_JMP) {
2092                 /* BPF_EXIT for "main" will reach here. Return TRUE
2093                  * conservatively.
2094                  */
2095                 if (op == BPF_EXIT)
2096                         return true;
2097                 if (op == BPF_CALL) {
2098                         /* BPF to BPF call will reach here because of marking
2099                          * caller saved clobber with DST_OP_NO_MARK for which we
2100                          * don't care the register def because they are anyway
2101                          * marked as NOT_INIT already.
2102                          */
2103                         if (insn->src_reg == BPF_PSEUDO_CALL)
2104                                 return false;
2105                         /* Helper call will reach here because of arg type
2106                          * check, conservatively return TRUE.
2107                          */
2108                         if (t == SRC_OP)
2109                                 return true;
2110
2111                         return false;
2112                 }
2113         }
2114
2115         if (class == BPF_ALU64 || class == BPF_JMP ||
2116             /* BPF_END always use BPF_ALU class. */
2117             (class == BPF_ALU && op == BPF_END && insn->imm == 64))
2118                 return true;
2119
2120         if (class == BPF_ALU || class == BPF_JMP32)
2121                 return false;
2122
2123         if (class == BPF_LDX) {
2124                 if (t != SRC_OP)
2125                         return BPF_SIZE(code) == BPF_DW;
2126                 /* LDX source must be ptr. */
2127                 return true;
2128         }
2129
2130         if (class == BPF_STX) {
2131                 /* BPF_STX (including atomic variants) has multiple source
2132                  * operands, one of which is a ptr. Check whether the caller is
2133                  * asking about it.
2134                  */
2135                 if (t == SRC_OP && reg->type != SCALAR_VALUE)
2136                         return true;
2137                 return BPF_SIZE(code) == BPF_DW;
2138         }
2139
2140         if (class == BPF_LD) {
2141                 u8 mode = BPF_MODE(code);
2142
2143                 /* LD_IMM64 */
2144                 if (mode == BPF_IMM)
2145                         return true;
2146
2147                 /* Both LD_IND and LD_ABS return 32-bit data. */
2148                 if (t != SRC_OP)
2149                         return  false;
2150
2151                 /* Implicit ctx ptr. */
2152                 if (regno == BPF_REG_6)
2153                         return true;
2154
2155                 /* Explicit source could be any width. */
2156                 return true;
2157         }
2158
2159         if (class == BPF_ST)
2160                 /* The only source register for BPF_ST is a ptr. */
2161                 return true;
2162
2163         /* Conservatively return true at default. */
2164         return true;
2165 }
2166
2167 /* Return the regno defined by the insn, or -1. */
2168 static int insn_def_regno(const struct bpf_insn *insn)
2169 {
2170         switch (BPF_CLASS(insn->code)) {
2171         case BPF_JMP:
2172         case BPF_JMP32:
2173         case BPF_ST:
2174                 return -1;
2175         case BPF_STX:
2176                 if (BPF_MODE(insn->code) == BPF_ATOMIC &&
2177                     (insn->imm & BPF_FETCH)) {
2178                         if (insn->imm == BPF_CMPXCHG)
2179                                 return BPF_REG_0;
2180                         else
2181                                 return insn->src_reg;
2182                 } else {
2183                         return -1;
2184                 }
2185         default:
2186                 return insn->dst_reg;
2187         }
2188 }
2189
2190 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
2191 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
2192 {
2193         int dst_reg = insn_def_regno(insn);
2194
2195         if (dst_reg == -1)
2196                 return false;
2197
2198         return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
2199 }
2200
2201 static void mark_insn_zext(struct bpf_verifier_env *env,
2202                            struct bpf_reg_state *reg)
2203 {
2204         s32 def_idx = reg->subreg_def;
2205
2206         if (def_idx == DEF_NOT_SUBREG)
2207                 return;
2208
2209         env->insn_aux_data[def_idx - 1].zext_dst = true;
2210         /* The dst will be zero extended, so won't be sub-register anymore. */
2211         reg->subreg_def = DEF_NOT_SUBREG;
2212 }
2213
2214 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
2215                          enum reg_arg_type t)
2216 {
2217         struct bpf_verifier_state *vstate = env->cur_state;
2218         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2219         struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
2220         struct bpf_reg_state *reg, *regs = state->regs;
2221         bool rw64;
2222
2223         if (regno >= MAX_BPF_REG) {
2224                 verbose(env, "R%d is invalid\n", regno);
2225                 return -EINVAL;
2226         }
2227
2228         reg = &regs[regno];
2229         rw64 = is_reg64(env, insn, regno, reg, t);
2230         if (t == SRC_OP) {
2231                 /* check whether register used as source operand can be read */
2232                 if (reg->type == NOT_INIT) {
2233                         verbose(env, "R%d !read_ok\n", regno);
2234                         return -EACCES;
2235                 }
2236                 /* We don't need to worry about FP liveness because it's read-only */
2237                 if (regno == BPF_REG_FP)
2238                         return 0;
2239
2240                 if (rw64)
2241                         mark_insn_zext(env, reg);
2242
2243                 return mark_reg_read(env, reg, reg->parent,
2244                                      rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
2245         } else {
2246                 /* check whether register used as dest operand can be written to */
2247                 if (regno == BPF_REG_FP) {
2248                         verbose(env, "frame pointer is read only\n");
2249                         return -EACCES;
2250                 }
2251                 reg->live |= REG_LIVE_WRITTEN;
2252                 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
2253                 if (t == DST_OP)
2254                         mark_reg_unknown(env, regs, regno);
2255         }
2256         return 0;
2257 }
2258
2259 /* for any branch, call, exit record the history of jmps in the given state */
2260 static int push_jmp_history(struct bpf_verifier_env *env,
2261                             struct bpf_verifier_state *cur)
2262 {
2263         u32 cnt = cur->jmp_history_cnt;
2264         struct bpf_idx_pair *p;
2265
2266         cnt++;
2267         p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
2268         if (!p)
2269                 return -ENOMEM;
2270         p[cnt - 1].idx = env->insn_idx;
2271         p[cnt - 1].prev_idx = env->prev_insn_idx;
2272         cur->jmp_history = p;
2273         cur->jmp_history_cnt = cnt;
2274         return 0;
2275 }
2276
2277 /* Backtrack one insn at a time. If idx is not at the top of recorded
2278  * history then previous instruction came from straight line execution.
2279  */
2280 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
2281                              u32 *history)
2282 {
2283         u32 cnt = *history;
2284
2285         if (cnt && st->jmp_history[cnt - 1].idx == i) {
2286                 i = st->jmp_history[cnt - 1].prev_idx;
2287                 (*history)--;
2288         } else {
2289                 i--;
2290         }
2291         return i;
2292 }
2293
2294 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
2295 {
2296         const struct btf_type *func;
2297         struct btf *desc_btf;
2298
2299         if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
2300                 return NULL;
2301
2302         desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
2303         if (IS_ERR(desc_btf))
2304                 return "<error>";
2305
2306         func = btf_type_by_id(desc_btf, insn->imm);
2307         return btf_name_by_offset(desc_btf, func->name_off);
2308 }
2309
2310 /* For given verifier state backtrack_insn() is called from the last insn to
2311  * the first insn. Its purpose is to compute a bitmask of registers and
2312  * stack slots that needs precision in the parent verifier state.
2313  */
2314 static int backtrack_insn(struct bpf_verifier_env *env, int idx,
2315                           u32 *reg_mask, u64 *stack_mask)
2316 {
2317         const struct bpf_insn_cbs cbs = {
2318                 .cb_call        = disasm_kfunc_name,
2319                 .cb_print       = verbose,
2320                 .private_data   = env,
2321         };
2322         struct bpf_insn *insn = env->prog->insnsi + idx;
2323         u8 class = BPF_CLASS(insn->code);
2324         u8 opcode = BPF_OP(insn->code);
2325         u8 mode = BPF_MODE(insn->code);
2326         u32 dreg = 1u << insn->dst_reg;
2327         u32 sreg = 1u << insn->src_reg;
2328         u32 spi;
2329
2330         if (insn->code == 0)
2331                 return 0;
2332         if (env->log.level & BPF_LOG_LEVEL) {
2333                 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
2334                 verbose(env, "%d: ", idx);
2335                 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
2336         }
2337
2338         if (class == BPF_ALU || class == BPF_ALU64) {
2339                 if (!(*reg_mask & dreg))
2340                         return 0;
2341                 if (opcode == BPF_MOV) {
2342                         if (BPF_SRC(insn->code) == BPF_X) {
2343                                 /* dreg = sreg
2344                                  * dreg needs precision after this insn
2345                                  * sreg needs precision before this insn
2346                                  */
2347                                 *reg_mask &= ~dreg;
2348                                 *reg_mask |= sreg;
2349                         } else {
2350                                 /* dreg = K
2351                                  * dreg needs precision after this insn.
2352                                  * Corresponding register is already marked
2353                                  * as precise=true in this verifier state.
2354                                  * No further markings in parent are necessary
2355                                  */
2356                                 *reg_mask &= ~dreg;
2357                         }
2358                 } else {
2359                         if (BPF_SRC(insn->code) == BPF_X) {
2360                                 /* dreg += sreg
2361                                  * both dreg and sreg need precision
2362                                  * before this insn
2363                                  */
2364                                 *reg_mask |= sreg;
2365                         } /* else dreg += K
2366                            * dreg still needs precision before this insn
2367                            */
2368                 }
2369         } else if (class == BPF_LDX) {
2370                 if (!(*reg_mask & dreg))
2371                         return 0;
2372                 *reg_mask &= ~dreg;
2373
2374                 /* scalars can only be spilled into stack w/o losing precision.
2375                  * Load from any other memory can be zero extended.
2376                  * The desire to keep that precision is already indicated
2377                  * by 'precise' mark in corresponding register of this state.
2378                  * No further tracking necessary.
2379                  */
2380                 if (insn->src_reg != BPF_REG_FP)
2381                         return 0;
2382
2383                 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
2384                  * that [fp - off] slot contains scalar that needs to be
2385                  * tracked with precision
2386                  */
2387                 spi = (-insn->off - 1) / BPF_REG_SIZE;
2388                 if (spi >= 64) {
2389                         verbose(env, "BUG spi %d\n", spi);
2390                         WARN_ONCE(1, "verifier backtracking bug");
2391                         return -EFAULT;
2392                 }
2393                 *stack_mask |= 1ull << spi;
2394         } else if (class == BPF_STX || class == BPF_ST) {
2395                 if (*reg_mask & dreg)
2396                         /* stx & st shouldn't be using _scalar_ dst_reg
2397                          * to access memory. It means backtracking
2398                          * encountered a case of pointer subtraction.
2399                          */
2400                         return -ENOTSUPP;
2401                 /* scalars can only be spilled into stack */
2402                 if (insn->dst_reg != BPF_REG_FP)
2403                         return 0;
2404                 spi = (-insn->off - 1) / BPF_REG_SIZE;
2405                 if (spi >= 64) {
2406                         verbose(env, "BUG spi %d\n", spi);
2407                         WARN_ONCE(1, "verifier backtracking bug");
2408                         return -EFAULT;
2409                 }
2410                 if (!(*stack_mask & (1ull << spi)))
2411                         return 0;
2412                 *stack_mask &= ~(1ull << spi);
2413                 if (class == BPF_STX)
2414                         *reg_mask |= sreg;
2415         } else if (class == BPF_JMP || class == BPF_JMP32) {
2416                 if (opcode == BPF_CALL) {
2417                         if (insn->src_reg == BPF_PSEUDO_CALL)
2418                                 return -ENOTSUPP;
2419                         /* regular helper call sets R0 */
2420                         *reg_mask &= ~1;
2421                         if (*reg_mask & 0x3f) {
2422                                 /* if backtracing was looking for registers R1-R5
2423                                  * they should have been found already.
2424                                  */
2425                                 verbose(env, "BUG regs %x\n", *reg_mask);
2426                                 WARN_ONCE(1, "verifier backtracking bug");
2427                                 return -EFAULT;
2428                         }
2429                 } else if (opcode == BPF_EXIT) {
2430                         return -ENOTSUPP;
2431                 }
2432         } else if (class == BPF_LD) {
2433                 if (!(*reg_mask & dreg))
2434                         return 0;
2435                 *reg_mask &= ~dreg;
2436                 /* It's ld_imm64 or ld_abs or ld_ind.
2437                  * For ld_imm64 no further tracking of precision
2438                  * into parent is necessary
2439                  */
2440                 if (mode == BPF_IND || mode == BPF_ABS)
2441                         /* to be analyzed */
2442                         return -ENOTSUPP;
2443         }
2444         return 0;
2445 }
2446
2447 /* the scalar precision tracking algorithm:
2448  * . at the start all registers have precise=false.
2449  * . scalar ranges are tracked as normal through alu and jmp insns.
2450  * . once precise value of the scalar register is used in:
2451  *   .  ptr + scalar alu
2452  *   . if (scalar cond K|scalar)
2453  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
2454  *   backtrack through the verifier states and mark all registers and
2455  *   stack slots with spilled constants that these scalar regisers
2456  *   should be precise.
2457  * . during state pruning two registers (or spilled stack slots)
2458  *   are equivalent if both are not precise.
2459  *
2460  * Note the verifier cannot simply walk register parentage chain,
2461  * since many different registers and stack slots could have been
2462  * used to compute single precise scalar.
2463  *
2464  * The approach of starting with precise=true for all registers and then
2465  * backtrack to mark a register as not precise when the verifier detects
2466  * that program doesn't care about specific value (e.g., when helper
2467  * takes register as ARG_ANYTHING parameter) is not safe.
2468  *
2469  * It's ok to walk single parentage chain of the verifier states.
2470  * It's possible that this backtracking will go all the way till 1st insn.
2471  * All other branches will be explored for needing precision later.
2472  *
2473  * The backtracking needs to deal with cases like:
2474  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
2475  * r9 -= r8
2476  * r5 = r9
2477  * if r5 > 0x79f goto pc+7
2478  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
2479  * r5 += 1
2480  * ...
2481  * call bpf_perf_event_output#25
2482  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
2483  *
2484  * and this case:
2485  * r6 = 1
2486  * call foo // uses callee's r6 inside to compute r0
2487  * r0 += r6
2488  * if r0 == 0 goto
2489  *
2490  * to track above reg_mask/stack_mask needs to be independent for each frame.
2491  *
2492  * Also if parent's curframe > frame where backtracking started,
2493  * the verifier need to mark registers in both frames, otherwise callees
2494  * may incorrectly prune callers. This is similar to
2495  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
2496  *
2497  * For now backtracking falls back into conservative marking.
2498  */
2499 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
2500                                      struct bpf_verifier_state *st)
2501 {
2502         struct bpf_func_state *func;
2503         struct bpf_reg_state *reg;
2504         int i, j;
2505
2506         /* big hammer: mark all scalars precise in this path.
2507          * pop_stack may still get !precise scalars.
2508          */
2509         for (; st; st = st->parent)
2510                 for (i = 0; i <= st->curframe; i++) {
2511                         func = st->frame[i];
2512                         for (j = 0; j < BPF_REG_FP; j++) {
2513                                 reg = &func->regs[j];
2514                                 if (reg->type != SCALAR_VALUE)
2515                                         continue;
2516                                 reg->precise = true;
2517                         }
2518                         for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2519                                 if (!is_spilled_reg(&func->stack[j]))
2520                                         continue;
2521                                 reg = &func->stack[j].spilled_ptr;
2522                                 if (reg->type != SCALAR_VALUE)
2523                                         continue;
2524                                 reg->precise = true;
2525                         }
2526                 }
2527 }
2528
2529 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
2530                                   int spi)
2531 {
2532         struct bpf_verifier_state *st = env->cur_state;
2533         int first_idx = st->first_insn_idx;
2534         int last_idx = env->insn_idx;
2535         struct bpf_func_state *func;
2536         struct bpf_reg_state *reg;
2537         u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2538         u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2539         bool skip_first = true;
2540         bool new_marks = false;
2541         int i, err;
2542
2543         if (!env->bpf_capable)
2544                 return 0;
2545
2546         func = st->frame[st->curframe];
2547         if (regno >= 0) {
2548                 reg = &func->regs[regno];
2549                 if (reg->type != SCALAR_VALUE) {
2550                         WARN_ONCE(1, "backtracing misuse");
2551                         return -EFAULT;
2552                 }
2553                 if (!reg->precise)
2554                         new_marks = true;
2555                 else
2556                         reg_mask = 0;
2557                 reg->precise = true;
2558         }
2559
2560         while (spi >= 0) {
2561                 if (!is_spilled_reg(&func->stack[spi])) {
2562                         stack_mask = 0;
2563                         break;
2564                 }
2565                 reg = &func->stack[spi].spilled_ptr;
2566                 if (reg->type != SCALAR_VALUE) {
2567                         stack_mask = 0;
2568                         break;
2569                 }
2570                 if (!reg->precise)
2571                         new_marks = true;
2572                 else
2573                         stack_mask = 0;
2574                 reg->precise = true;
2575                 break;
2576         }
2577
2578         if (!new_marks)
2579                 return 0;
2580         if (!reg_mask && !stack_mask)
2581                 return 0;
2582         for (;;) {
2583                 DECLARE_BITMAP(mask, 64);
2584                 u32 history = st->jmp_history_cnt;
2585
2586                 if (env->log.level & BPF_LOG_LEVEL)
2587                         verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2588                 for (i = last_idx;;) {
2589                         if (skip_first) {
2590                                 err = 0;
2591                                 skip_first = false;
2592                         } else {
2593                                 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2594                         }
2595                         if (err == -ENOTSUPP) {
2596                                 mark_all_scalars_precise(env, st);
2597                                 return 0;
2598                         } else if (err) {
2599                                 return err;
2600                         }
2601                         if (!reg_mask && !stack_mask)
2602                                 /* Found assignment(s) into tracked register in this state.
2603                                  * Since this state is already marked, just return.
2604                                  * Nothing to be tracked further in the parent state.
2605                                  */
2606                                 return 0;
2607                         if (i == first_idx)
2608                                 break;
2609                         i = get_prev_insn_idx(st, i, &history);
2610                         if (i >= env->prog->len) {
2611                                 /* This can happen if backtracking reached insn 0
2612                                  * and there are still reg_mask or stack_mask
2613                                  * to backtrack.
2614                                  * It means the backtracking missed the spot where
2615                                  * particular register was initialized with a constant.
2616                                  */
2617                                 verbose(env, "BUG backtracking idx %d\n", i);
2618                                 WARN_ONCE(1, "verifier backtracking bug");
2619                                 return -EFAULT;
2620                         }
2621                 }
2622                 st = st->parent;
2623                 if (!st)
2624                         break;
2625
2626                 new_marks = false;
2627                 func = st->frame[st->curframe];
2628                 bitmap_from_u64(mask, reg_mask);
2629                 for_each_set_bit(i, mask, 32) {
2630                         reg = &func->regs[i];
2631                         if (reg->type != SCALAR_VALUE) {
2632                                 reg_mask &= ~(1u << i);
2633                                 continue;
2634                         }
2635                         if (!reg->precise)
2636                                 new_marks = true;
2637                         reg->precise = true;
2638                 }
2639
2640                 bitmap_from_u64(mask, stack_mask);
2641                 for_each_set_bit(i, mask, 64) {
2642                         if (i >= func->allocated_stack / BPF_REG_SIZE) {
2643                                 /* the sequence of instructions:
2644                                  * 2: (bf) r3 = r10
2645                                  * 3: (7b) *(u64 *)(r3 -8) = r0
2646                                  * 4: (79) r4 = *(u64 *)(r10 -8)
2647                                  * doesn't contain jmps. It's backtracked
2648                                  * as a single block.
2649                                  * During backtracking insn 3 is not recognized as
2650                                  * stack access, so at the end of backtracking
2651                                  * stack slot fp-8 is still marked in stack_mask.
2652                                  * However the parent state may not have accessed
2653                                  * fp-8 and it's "unallocated" stack space.
2654                                  * In such case fallback to conservative.
2655                                  */
2656                                 mark_all_scalars_precise(env, st);
2657                                 return 0;
2658                         }
2659
2660                         if (!is_spilled_reg(&func->stack[i])) {
2661                                 stack_mask &= ~(1ull << i);
2662                                 continue;
2663                         }
2664                         reg = &func->stack[i].spilled_ptr;
2665                         if (reg->type != SCALAR_VALUE) {
2666                                 stack_mask &= ~(1ull << i);
2667                                 continue;
2668                         }
2669                         if (!reg->precise)
2670                                 new_marks = true;
2671                         reg->precise = true;
2672                 }
2673                 if (env->log.level & BPF_LOG_LEVEL) {
2674                         print_verifier_state(env, func);
2675                         verbose(env, "parent %s regs=%x stack=%llx marks\n",
2676                                 new_marks ? "didn't have" : "already had",
2677                                 reg_mask, stack_mask);
2678                 }
2679
2680                 if (!reg_mask && !stack_mask)
2681                         break;
2682                 if (!new_marks)
2683                         break;
2684
2685                 last_idx = st->last_insn_idx;
2686                 first_idx = st->first_insn_idx;
2687         }
2688         return 0;
2689 }
2690
2691 static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2692 {
2693         return __mark_chain_precision(env, regno, -1);
2694 }
2695
2696 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2697 {
2698         return __mark_chain_precision(env, -1, spi);
2699 }
2700
2701 static bool is_spillable_regtype(enum bpf_reg_type type)
2702 {
2703         switch (type) {
2704         case PTR_TO_MAP_VALUE:
2705         case PTR_TO_MAP_VALUE_OR_NULL:
2706         case PTR_TO_STACK:
2707         case PTR_TO_CTX:
2708         case PTR_TO_PACKET:
2709         case PTR_TO_PACKET_META:
2710         case PTR_TO_PACKET_END:
2711         case PTR_TO_FLOW_KEYS:
2712         case CONST_PTR_TO_MAP:
2713         case PTR_TO_SOCKET:
2714         case PTR_TO_SOCKET_OR_NULL:
2715         case PTR_TO_SOCK_COMMON:
2716         case PTR_TO_SOCK_COMMON_OR_NULL:
2717         case PTR_TO_TCP_SOCK:
2718         case PTR_TO_TCP_SOCK_OR_NULL:
2719         case PTR_TO_XDP_SOCK:
2720         case PTR_TO_BTF_ID:
2721         case PTR_TO_BTF_ID_OR_NULL:
2722         case PTR_TO_RDONLY_BUF:
2723         case PTR_TO_RDONLY_BUF_OR_NULL:
2724         case PTR_TO_RDWR_BUF:
2725         case PTR_TO_RDWR_BUF_OR_NULL:
2726         case PTR_TO_PERCPU_BTF_ID:
2727         case PTR_TO_MEM:
2728         case PTR_TO_MEM_OR_NULL:
2729         case PTR_TO_FUNC:
2730         case PTR_TO_MAP_KEY:
2731                 return true;
2732         default:
2733                 return false;
2734         }
2735 }
2736
2737 /* Does this register contain a constant zero? */
2738 static bool register_is_null(struct bpf_reg_state *reg)
2739 {
2740         return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2741 }
2742
2743 static bool register_is_const(struct bpf_reg_state *reg)
2744 {
2745         return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2746 }
2747
2748 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2749 {
2750         return tnum_is_unknown(reg->var_off) &&
2751                reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2752                reg->umin_value == 0 && reg->umax_value == U64_MAX &&
2753                reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
2754                reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2755 }
2756
2757 static bool register_is_bounded(struct bpf_reg_state *reg)
2758 {
2759         return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2760 }
2761
2762 static bool __is_pointer_value(bool allow_ptr_leaks,
2763                                const struct bpf_reg_state *reg)
2764 {
2765         if (allow_ptr_leaks)
2766                 return false;
2767
2768         return reg->type != SCALAR_VALUE;
2769 }
2770
2771 static void save_register_state(struct bpf_func_state *state,
2772                                 int spi, struct bpf_reg_state *reg,
2773                                 int size)
2774 {
2775         int i;
2776
2777         state->stack[spi].spilled_ptr = *reg;
2778         if (size == BPF_REG_SIZE)
2779                 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2780
2781         for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
2782                 state->stack[spi].slot_type[i - 1] = STACK_SPILL;
2783
2784         /* size < 8 bytes spill */
2785         for (; i; i--)
2786                 scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
2787 }
2788
2789 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
2790  * stack boundary and alignment are checked in check_mem_access()
2791  */
2792 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2793                                        /* stack frame we're writing to */
2794                                        struct bpf_func_state *state,
2795                                        int off, int size, int value_regno,
2796                                        int insn_idx)
2797 {
2798         struct bpf_func_state *cur; /* state of the current function */
2799         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2800         u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
2801         struct bpf_reg_state *reg = NULL;
2802
2803         err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
2804         if (err)
2805                 return err;
2806         /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
2807          * so it's aligned access and [off, off + size) are within stack limits
2808          */
2809         if (!env->allow_ptr_leaks &&
2810             state->stack[spi].slot_type[0] == STACK_SPILL &&
2811             size != BPF_REG_SIZE) {
2812                 verbose(env, "attempt to corrupt spilled pointer on stack\n");
2813                 return -EACCES;
2814         }
2815
2816         cur = env->cur_state->frame[env->cur_state->curframe];
2817         if (value_regno >= 0)
2818                 reg = &cur->regs[value_regno];
2819         if (!env->bypass_spec_v4) {
2820                 bool sanitize = reg && is_spillable_regtype(reg->type);
2821
2822                 for (i = 0; i < size; i++) {
2823                         if (state->stack[spi].slot_type[i] == STACK_INVALID) {
2824                                 sanitize = true;
2825                                 break;
2826                         }
2827                 }
2828
2829                 if (sanitize)
2830                         env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
2831         }
2832
2833         if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
2834             !register_is_null(reg) && env->bpf_capable) {
2835                 if (dst_reg != BPF_REG_FP) {
2836                         /* The backtracking logic can only recognize explicit
2837                          * stack slot address like [fp - 8]. Other spill of
2838                          * scalar via different register has to be conservative.
2839                          * Backtrack from here and mark all registers as precise
2840                          * that contributed into 'reg' being a constant.
2841                          */
2842                         err = mark_chain_precision(env, value_regno);
2843                         if (err)
2844                                 return err;
2845                 }
2846                 save_register_state(state, spi, reg, size);
2847         } else if (reg && is_spillable_regtype(reg->type)) {
2848                 /* register containing pointer is being spilled into stack */
2849                 if (size != BPF_REG_SIZE) {
2850                         verbose_linfo(env, insn_idx, "; ");
2851                         verbose(env, "invalid size of register spill\n");
2852                         return -EACCES;
2853                 }
2854                 if (state != cur && reg->type == PTR_TO_STACK) {
2855                         verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
2856                         return -EINVAL;
2857                 }
2858                 save_register_state(state, spi, reg, size);
2859         } else {
2860                 u8 type = STACK_MISC;
2861
2862                 /* regular write of data into stack destroys any spilled ptr */
2863                 state->stack[spi].spilled_ptr.type = NOT_INIT;
2864                 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
2865                 if (is_spilled_reg(&state->stack[spi]))
2866                         for (i = 0; i < BPF_REG_SIZE; i++)
2867                                 scrub_spilled_slot(&state->stack[spi].slot_type[i]);
2868
2869                 /* only mark the slot as written if all 8 bytes were written
2870                  * otherwise read propagation may incorrectly stop too soon
2871                  * when stack slots are partially written.
2872                  * This heuristic means that read propagation will be
2873                  * conservative, since it will add reg_live_read marks
2874                  * to stack slots all the way to first state when programs
2875                  * writes+reads less than 8 bytes
2876                  */
2877                 if (size == BPF_REG_SIZE)
2878                         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2879
2880                 /* when we zero initialize stack slots mark them as such */
2881                 if (reg && register_is_null(reg)) {
2882                         /* backtracking doesn't work for STACK_ZERO yet. */
2883                         err = mark_chain_precision(env, value_regno);
2884                         if (err)
2885                                 return err;
2886                         type = STACK_ZERO;
2887                 }
2888
2889                 /* Mark slots affected by this stack write. */
2890                 for (i = 0; i < size; i++)
2891                         state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
2892                                 type;
2893         }
2894         return 0;
2895 }
2896
2897 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
2898  * known to contain a variable offset.
2899  * This function checks whether the write is permitted and conservatively
2900  * tracks the effects of the write, considering that each stack slot in the
2901  * dynamic range is potentially written to.
2902  *
2903  * 'off' includes 'regno->off'.
2904  * 'value_regno' can be -1, meaning that an unknown value is being written to
2905  * the stack.
2906  *
2907  * Spilled pointers in range are not marked as written because we don't know
2908  * what's going to be actually written. This means that read propagation for
2909  * future reads cannot be terminated by this write.
2910  *
2911  * For privileged programs, uninitialized stack slots are considered
2912  * initialized by this write (even though we don't know exactly what offsets
2913  * are going to be written to). The idea is that we don't want the verifier to
2914  * reject future reads that access slots written to through variable offsets.
2915  */
2916 static int check_stack_write_var_off(struct bpf_verifier_env *env,
2917                                      /* func where register points to */
2918                                      struct bpf_func_state *state,
2919                                      int ptr_regno, int off, int size,
2920                                      int value_regno, int insn_idx)
2921 {
2922         struct bpf_func_state *cur; /* state of the current function */
2923         int min_off, max_off;
2924         int i, err;
2925         struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
2926         bool writing_zero = false;
2927         /* set if the fact that we're writing a zero is used to let any
2928          * stack slots remain STACK_ZERO
2929          */
2930         bool zero_used = false;
2931
2932         cur = env->cur_state->frame[env->cur_state->curframe];
2933         ptr_reg = &cur->regs[ptr_regno];
2934         min_off = ptr_reg->smin_value + off;
2935         max_off = ptr_reg->smax_value + off + size;
2936         if (value_regno >= 0)
2937                 value_reg = &cur->regs[value_regno];
2938         if (value_reg && register_is_null(value_reg))
2939                 writing_zero = true;
2940
2941         err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
2942         if (err)
2943                 return err;
2944
2945
2946         /* Variable offset writes destroy any spilled pointers in range. */
2947         for (i = min_off; i < max_off; i++) {
2948                 u8 new_type, *stype;
2949                 int slot, spi;
2950
2951                 slot = -i - 1;
2952                 spi = slot / BPF_REG_SIZE;
2953                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2954
2955                 if (!env->allow_ptr_leaks
2956                                 && *stype != NOT_INIT
2957                                 && *stype != SCALAR_VALUE) {
2958                         /* Reject the write if there's are spilled pointers in
2959                          * range. If we didn't reject here, the ptr status
2960                          * would be erased below (even though not all slots are
2961                          * actually overwritten), possibly opening the door to
2962                          * leaks.
2963                          */
2964                         verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
2965                                 insn_idx, i);
2966                         return -EINVAL;
2967                 }
2968
2969                 /* Erase all spilled pointers. */
2970                 state->stack[spi].spilled_ptr.type = NOT_INIT;
2971
2972                 /* Update the slot type. */
2973                 new_type = STACK_MISC;
2974                 if (writing_zero && *stype == STACK_ZERO) {
2975                         new_type = STACK_ZERO;
2976                         zero_used = true;
2977                 }
2978                 /* If the slot is STACK_INVALID, we check whether it's OK to
2979                  * pretend that it will be initialized by this write. The slot
2980                  * might not actually be written to, and so if we mark it as
2981                  * initialized future reads might leak uninitialized memory.
2982                  * For privileged programs, we will accept such reads to slots
2983                  * that may or may not be written because, if we're reject
2984                  * them, the error would be too confusing.
2985                  */
2986                 if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
2987                         verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
2988                                         insn_idx, i);
2989                         return -EINVAL;
2990                 }
2991                 *stype = new_type;
2992         }
2993         if (zero_used) {
2994                 /* backtracking doesn't work for STACK_ZERO yet. */
2995                 err = mark_chain_precision(env, value_regno);
2996                 if (err)
2997                         return err;
2998         }
2999         return 0;
3000 }
3001
3002 /* When register 'dst_regno' is assigned some values from stack[min_off,
3003  * max_off), we set the register's type according to the types of the
3004  * respective stack slots. If all the stack values are known to be zeros, then
3005  * so is the destination reg. Otherwise, the register is considered to be
3006  * SCALAR. This function does not deal with register filling; the caller must
3007  * ensure that all spilled registers in the stack range have been marked as
3008  * read.
3009  */
3010 static void mark_reg_stack_read(struct bpf_verifier_env *env,
3011                                 /* func where src register points to */
3012                                 struct bpf_func_state *ptr_state,
3013                                 int min_off, int max_off, int dst_regno)
3014 {
3015         struct bpf_verifier_state *vstate = env->cur_state;
3016         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3017         int i, slot, spi;
3018         u8 *stype;
3019         int zeros = 0;
3020
3021         for (i = min_off; i < max_off; i++) {
3022                 slot = -i - 1;
3023                 spi = slot / BPF_REG_SIZE;
3024                 stype = ptr_state->stack[spi].slot_type;
3025                 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
3026                         break;
3027                 zeros++;
3028         }
3029         if (zeros == max_off - min_off) {
3030                 /* any access_size read into register is zero extended,
3031                  * so the whole register == const_zero
3032                  */
3033                 __mark_reg_const_zero(&state->regs[dst_regno]);
3034                 /* backtracking doesn't support STACK_ZERO yet,
3035                  * so mark it precise here, so that later
3036                  * backtracking can stop here.
3037                  * Backtracking may not need this if this register
3038                  * doesn't participate in pointer adjustment.
3039                  * Forward propagation of precise flag is not
3040                  * necessary either. This mark is only to stop
3041                  * backtracking. Any register that contributed
3042                  * to const 0 was marked precise before spill.
3043                  */
3044                 state->regs[dst_regno].precise = true;
3045         } else {
3046                 /* have read misc data from the stack */
3047                 mark_reg_unknown(env, state->regs, dst_regno);
3048         }
3049         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3050 }
3051
3052 /* Read the stack at 'off' and put the results into the register indicated by
3053  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
3054  * spilled reg.
3055  *
3056  * 'dst_regno' can be -1, meaning that the read value is not going to a
3057  * register.
3058  *
3059  * The access is assumed to be within the current stack bounds.
3060  */
3061 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
3062                                       /* func where src register points to */
3063                                       struct bpf_func_state *reg_state,
3064                                       int off, int size, int dst_regno)
3065 {
3066         struct bpf_verifier_state *vstate = env->cur_state;
3067         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3068         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
3069         struct bpf_reg_state *reg;
3070         u8 *stype, type;
3071
3072         stype = reg_state->stack[spi].slot_type;
3073         reg = &reg_state->stack[spi].spilled_ptr;
3074
3075         if (is_spilled_reg(&reg_state->stack[spi])) {
3076                 u8 spill_size = 1;
3077
3078                 for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
3079                         spill_size++;
3080
3081                 if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
3082                         if (reg->type != SCALAR_VALUE) {
3083                                 verbose_linfo(env, env->insn_idx, "; ");
3084                                 verbose(env, "invalid size of register fill\n");
3085                                 return -EACCES;
3086                         }
3087
3088                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3089                         if (dst_regno < 0)
3090                                 return 0;
3091
3092                         if (!(off % BPF_REG_SIZE) && size == spill_size) {
3093                                 /* The earlier check_reg_arg() has decided the
3094                                  * subreg_def for this insn.  Save it first.
3095                                  */
3096                                 s32 subreg_def = state->regs[dst_regno].subreg_def;
3097
3098                                 state->regs[dst_regno] = *reg;
3099                                 state->regs[dst_regno].subreg_def = subreg_def;
3100                         } else {
3101                                 for (i = 0; i < size; i++) {
3102                                         type = stype[(slot - i) % BPF_REG_SIZE];
3103                                         if (type == STACK_SPILL)
3104                                                 continue;
3105                                         if (type == STACK_MISC)
3106                                                 continue;
3107                                         verbose(env, "invalid read from stack off %d+%d size %d\n",
3108                                                 off, i, size);
3109                                         return -EACCES;
3110                                 }
3111                                 mark_reg_unknown(env, state->regs, dst_regno);
3112                         }
3113                         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3114                         return 0;
3115                 }
3116
3117                 if (dst_regno >= 0) {
3118                         /* restore register state from stack */
3119                         state->regs[dst_regno] = *reg;
3120                         /* mark reg as written since spilled pointer state likely
3121                          * has its liveness marks cleared by is_state_visited()
3122                          * which resets stack/reg liveness for state transitions
3123                          */
3124                         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
3125                 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
3126                         /* If dst_regno==-1, the caller is asking us whether
3127                          * it is acceptable to use this value as a SCALAR_VALUE
3128                          * (e.g. for XADD).
3129                          * We must not allow unprivileged callers to do that
3130                          * with spilled pointers.
3131                          */
3132                         verbose(env, "leaking pointer from stack off %d\n",
3133                                 off);
3134                         return -EACCES;
3135                 }
3136                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3137         } else {
3138                 for (i = 0; i < size; i++) {
3139                         type = stype[(slot - i) % BPF_REG_SIZE];
3140                         if (type == STACK_MISC)
3141                                 continue;
3142                         if (type == STACK_ZERO)
3143                                 continue;
3144                         verbose(env, "invalid read from stack off %d+%d size %d\n",
3145                                 off, i, size);
3146                         return -EACCES;
3147                 }
3148                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
3149                 if (dst_regno >= 0)
3150                         mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
3151         }
3152         return 0;
3153 }
3154
3155 enum stack_access_src {
3156         ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
3157         ACCESS_HELPER = 2,  /* the access is performed by a helper */
3158 };
3159
3160 static int check_stack_range_initialized(struct bpf_verifier_env *env,
3161                                          int regno, int off, int access_size,
3162                                          bool zero_size_allowed,
3163                                          enum stack_access_src type,
3164                                          struct bpf_call_arg_meta *meta);
3165
3166 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
3167 {
3168         return cur_regs(env) + regno;
3169 }
3170
3171 /* Read the stack at 'ptr_regno + off' and put the result into the register
3172  * 'dst_regno'.
3173  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
3174  * but not its variable offset.
3175  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
3176  *
3177  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
3178  * filling registers (i.e. reads of spilled register cannot be detected when
3179  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
3180  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
3181  * offset; for a fixed offset check_stack_read_fixed_off should be used
3182  * instead.
3183  */
3184 static int check_stack_read_var_off(struct bpf_verifier_env *env,
3185                                     int ptr_regno, int off, int size, int dst_regno)
3186 {
3187         /* The state of the source register. */
3188         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3189         struct bpf_func_state *ptr_state = func(env, reg);
3190         int err;
3191         int min_off, max_off;
3192
3193         /* Note that we pass a NULL meta, so raw access will not be permitted.
3194          */
3195         err = check_stack_range_initialized(env, ptr_regno, off, size,
3196                                             false, ACCESS_DIRECT, NULL);
3197         if (err)
3198                 return err;
3199
3200         min_off = reg->smin_value + off;
3201         max_off = reg->smax_value + off;
3202         mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
3203         return 0;
3204 }
3205
3206 /* check_stack_read dispatches to check_stack_read_fixed_off or
3207  * check_stack_read_var_off.
3208  *
3209  * The caller must ensure that the offset falls within the allocated stack
3210  * bounds.
3211  *
3212  * 'dst_regno' is a register which will receive the value from the stack. It
3213  * can be -1, meaning that the read value is not going to a register.
3214  */
3215 static int check_stack_read(struct bpf_verifier_env *env,
3216                             int ptr_regno, int off, int size,
3217                             int dst_regno)
3218 {
3219         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3220         struct bpf_func_state *state = func(env, reg);
3221         int err;
3222         /* Some accesses are only permitted with a static offset. */
3223         bool var_off = !tnum_is_const(reg->var_off);
3224
3225         /* The offset is required to be static when reads don't go to a
3226          * register, in order to not leak pointers (see
3227          * check_stack_read_fixed_off).
3228          */
3229         if (dst_regno < 0 && var_off) {
3230                 char tn_buf[48];
3231
3232                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3233                 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
3234                         tn_buf, off, size);
3235                 return -EACCES;
3236         }
3237         /* Variable offset is prohibited for unprivileged mode for simplicity
3238          * since it requires corresponding support in Spectre masking for stack
3239          * ALU. See also retrieve_ptr_limit().
3240          */
3241         if (!env->bypass_spec_v1 && var_off) {
3242                 char tn_buf[48];
3243
3244                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3245                 verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
3246                                 ptr_regno, tn_buf);
3247                 return -EACCES;
3248         }
3249
3250         if (!var_off) {
3251                 off += reg->var_off.value;
3252                 err = check_stack_read_fixed_off(env, state, off, size,
3253                                                  dst_regno);
3254         } else {
3255                 /* Variable offset stack reads need more conservative handling
3256                  * than fixed offset ones. Note that dst_regno >= 0 on this
3257                  * branch.
3258                  */
3259                 err = check_stack_read_var_off(env, ptr_regno, off, size,
3260                                                dst_regno);
3261         }
3262         return err;
3263 }
3264
3265
3266 /* check_stack_write dispatches to check_stack_write_fixed_off or
3267  * check_stack_write_var_off.
3268  *
3269  * 'ptr_regno' is the register used as a pointer into the stack.
3270  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
3271  * 'value_regno' is the register whose value we're writing to the stack. It can
3272  * be -1, meaning that we're not writing from a register.
3273  *
3274  * The caller must ensure that the offset falls within the maximum stack size.
3275  */
3276 static int check_stack_write(struct bpf_verifier_env *env,
3277                              int ptr_regno, int off, int size,
3278                              int value_regno, int insn_idx)
3279 {
3280         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
3281         struct bpf_func_state *state = func(env, reg);
3282         int err;
3283
3284         if (tnum_is_const(reg->var_off)) {
3285                 off += reg->var_off.value;
3286                 err = check_stack_write_fixed_off(env, state, off, size,
3287                                                   value_regno, insn_idx);
3288         } else {
3289                 /* Variable offset stack reads need more conservative handling
3290                  * than fixed offset ones.
3291                  */
3292                 err = check_stack_write_var_off(env, state,
3293                                                 ptr_regno, off, size,
3294                                                 value_regno, insn_idx);
3295         }
3296         return err;
3297 }
3298
3299 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
3300                                  int off, int size, enum bpf_access_type type)
3301 {
3302         struct bpf_reg_state *regs = cur_regs(env);
3303         struct bpf_map *map = regs[regno].map_ptr;
3304         u32 cap = bpf_map_flags_to_cap(map);
3305
3306         if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
3307                 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
3308                         map->value_size, off, size);
3309                 return -EACCES;
3310         }
3311
3312         if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
3313                 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
3314                         map->value_size, off, size);
3315                 return -EACCES;
3316         }
3317
3318         return 0;
3319 }
3320
3321 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
3322 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
3323                               int off, int size, u32 mem_size,
3324                               bool zero_size_allowed)
3325 {
3326         bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
3327         struct bpf_reg_state *reg;
3328
3329         if (off >= 0 && size_ok && (u64)off + size <= mem_size)
3330                 return 0;
3331
3332         reg = &cur_regs(env)[regno];
3333         switch (reg->type) {
3334         case PTR_TO_MAP_KEY:
3335                 verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
3336                         mem_size, off, size);
3337                 break;
3338         case PTR_TO_MAP_VALUE:
3339                 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
3340                         mem_size, off, size);
3341                 break;
3342         case PTR_TO_PACKET:
3343         case PTR_TO_PACKET_META:
3344         case PTR_TO_PACKET_END:
3345                 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
3346                         off, size, regno, reg->id, off, mem_size);
3347                 break;
3348         case PTR_TO_MEM:
3349         default:
3350                 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
3351                         mem_size, off, size);
3352         }
3353
3354         return -EACCES;
3355 }
3356
3357 /* check read/write into a memory region with possible variable offset */
3358 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
3359                                    int off, int size, u32 mem_size,
3360                                    bool zero_size_allowed)
3361 {
3362         struct bpf_verifier_state *vstate = env->cur_state;
3363         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3364         struct bpf_reg_state *reg = &state->regs[regno];
3365         int err;
3366
3367         /* We may have adjusted the register pointing to memory region, so we
3368          * need to try adding each of min_value and max_value to off
3369          * to make sure our theoretical access will be safe.
3370          */
3371         if (env->log.level & BPF_LOG_LEVEL)
3372                 print_verifier_state(env, state);
3373
3374         /* The minimum value is only important with signed
3375          * comparisons where we can't assume the floor of a
3376          * value is 0.  If we are using signed variables for our
3377          * index'es we need to make sure that whatever we use
3378          * will have a set floor within our range.
3379          */
3380         if (reg->smin_value < 0 &&
3381             (reg->smin_value == S64_MIN ||
3382              (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
3383               reg->smin_value + off < 0)) {
3384                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3385                         regno);
3386                 return -EACCES;
3387         }
3388         err = __check_mem_access(env, regno, reg->smin_value + off, size,
3389                                  mem_size, zero_size_allowed);
3390         if (err) {
3391                 verbose(env, "R%d min value is outside of the allowed memory range\n",
3392                         regno);
3393                 return err;
3394         }
3395
3396         /* If we haven't set a max value then we need to bail since we can't be
3397          * sure we won't do bad things.
3398          * If reg->umax_value + off could overflow, treat that as unbounded too.
3399          */
3400         if (reg->umax_value >= BPF_MAX_VAR_OFF) {
3401                 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
3402                         regno);
3403                 return -EACCES;
3404         }
3405         err = __check_mem_access(env, regno, reg->umax_value + off, size,
3406                                  mem_size, zero_size_allowed);
3407         if (err) {
3408                 verbose(env, "R%d max value is outside of the allowed memory range\n",
3409                         regno);
3410                 return err;
3411         }
3412
3413         return 0;
3414 }
3415
3416 /* check read/write into a map element with possible variable offset */
3417 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
3418                             int off, int size, bool zero_size_allowed)
3419 {
3420         struct bpf_verifier_state *vstate = env->cur_state;
3421         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3422         struct bpf_reg_state *reg = &state->regs[regno];
3423         struct bpf_map *map = reg->map_ptr;
3424         int err;
3425
3426         err = check_mem_region_access(env, regno, off, size, map->value_size,
3427                                       zero_size_allowed);
3428         if (err)
3429                 return err;
3430
3431         if (map_value_has_spin_lock(map)) {
3432                 u32 lock = map->spin_lock_off;
3433
3434                 /* if any part of struct bpf_spin_lock can be touched by
3435                  * load/store reject this program.
3436                  * To check that [x1, x2) overlaps with [y1, y2)
3437                  * it is sufficient to check x1 < y2 && y1 < x2.
3438                  */
3439                 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
3440                      lock < reg->umax_value + off + size) {
3441                         verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
3442                         return -EACCES;
3443                 }
3444         }
3445         if (map_value_has_timer(map)) {
3446                 u32 t = map->timer_off;
3447
3448                 if (reg->smin_value + off < t + sizeof(struct bpf_timer) &&
3449                      t < reg->umax_value + off + size) {
3450                         verbose(env, "bpf_timer cannot be accessed directly by load/store\n");
3451                         return -EACCES;
3452                 }
3453         }
3454         return err;
3455 }
3456
3457 #define MAX_PACKET_OFF 0xffff
3458
3459 static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
3460 {
3461         return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
3462 }
3463
3464 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
3465                                        const struct bpf_call_arg_meta *meta,
3466                                        enum bpf_access_type t)
3467 {
3468         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
3469
3470         switch (prog_type) {
3471         /* Program types only with direct read access go here! */
3472         case BPF_PROG_TYPE_LWT_IN:
3473         case BPF_PROG_TYPE_LWT_OUT:
3474         case BPF_PROG_TYPE_LWT_SEG6LOCAL:
3475         case BPF_PROG_TYPE_SK_REUSEPORT:
3476         case BPF_PROG_TYPE_FLOW_DISSECTOR:
3477         case BPF_PROG_TYPE_CGROUP_SKB:
3478                 if (t == BPF_WRITE)
3479                         return false;
3480                 fallthrough;
3481
3482         /* Program types with direct read + write access go here! */
3483         case BPF_PROG_TYPE_SCHED_CLS:
3484         case BPF_PROG_TYPE_SCHED_ACT:
3485         case BPF_PROG_TYPE_XDP:
3486         case BPF_PROG_TYPE_LWT_XMIT:
3487         case BPF_PROG_TYPE_SK_SKB:
3488         case BPF_PROG_TYPE_SK_MSG:
3489                 if (meta)
3490                         return meta->pkt_access;
3491
3492                 env->seen_direct_write = true;
3493                 return true;
3494
3495         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3496                 if (t == BPF_WRITE)
3497                         env->seen_direct_write = true;
3498
3499                 return true;
3500
3501         default:
3502                 return false;
3503         }
3504 }
3505
3506 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
3507                                int size, bool zero_size_allowed)
3508 {
3509         struct bpf_reg_state *regs = cur_regs(env);
3510         struct bpf_reg_state *reg = &regs[regno];
3511         int err;
3512
3513         /* We may have added a variable offset to the packet pointer; but any
3514          * reg->range we have comes after that.  We are only checking the fixed
3515          * offset.
3516          */
3517
3518         /* We don't allow negative numbers, because we aren't tracking enough
3519          * detail to prove they're safe.
3520          */
3521         if (reg->smin_value < 0) {
3522                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3523                         regno);
3524                 return -EACCES;
3525         }
3526
3527         err = reg->range < 0 ? -EINVAL :
3528               __check_mem_access(env, regno, off, size, reg->range,
3529                                  zero_size_allowed);
3530         if (err) {
3531                 verbose(env, "R%d offset is outside of the packet\n", regno);
3532                 return err;
3533         }
3534
3535         /* __check_mem_access has made sure "off + size - 1" is within u16.
3536          * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
3537          * otherwise find_good_pkt_pointers would have refused to set range info
3538          * that __check_mem_access would have rejected this pkt access.
3539          * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
3540          */
3541         env->prog->aux->max_pkt_offset =
3542                 max_t(u32, env->prog->aux->max_pkt_offset,
3543                       off + reg->umax_value + size - 1);
3544
3545         return err;
3546 }
3547
3548 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
3549 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
3550                             enum bpf_access_type t, enum bpf_reg_type *reg_type,
3551                             struct btf **btf, u32 *btf_id)
3552 {
3553         struct bpf_insn_access_aux info = {
3554                 .reg_type = *reg_type,
3555                 .log = &env->log,
3556         };
3557
3558         if (env->ops->is_valid_access &&
3559             env->ops->is_valid_access(off, size, t, env->prog, &info)) {
3560                 /* A non zero info.ctx_field_size indicates that this field is a
3561                  * candidate for later verifier transformation to load the whole
3562                  * field and then apply a mask when accessed with a narrower
3563                  * access than actual ctx access size. A zero info.ctx_field_size
3564                  * will only allow for whole field access and rejects any other
3565                  * type of narrower access.
3566                  */
3567                 *reg_type = info.reg_type;
3568
3569                 if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) {
3570                         *btf = info.btf;
3571                         *btf_id = info.btf_id;
3572                 } else {
3573                         env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3574                 }
3575                 /* remember the offset of last byte accessed in ctx */
3576                 if (env->prog->aux->max_ctx_offset < off + size)
3577                         env->prog->aux->max_ctx_offset = off + size;
3578                 return 0;
3579         }
3580
3581         verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
3582         return -EACCES;
3583 }
3584
3585 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
3586                                   int size)
3587 {
3588         if (size < 0 || off < 0 ||
3589             (u64)off + size > sizeof(struct bpf_flow_keys)) {
3590                 verbose(env, "invalid access to flow keys off=%d size=%d\n",
3591                         off, size);
3592                 return -EACCES;
3593         }
3594         return 0;
3595 }
3596
3597 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
3598                              u32 regno, int off, int size,
3599                              enum bpf_access_type t)
3600 {
3601         struct bpf_reg_state *regs = cur_regs(env);
3602         struct bpf_reg_state *reg = &regs[regno];
3603         struct bpf_insn_access_aux info = {};
3604         bool valid;
3605
3606         if (reg->smin_value < 0) {
3607                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3608                         regno);
3609                 return -EACCES;
3610         }
3611
3612         switch (reg->type) {
3613         case PTR_TO_SOCK_COMMON:
3614                 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
3615                 break;
3616         case PTR_TO_SOCKET:
3617                 valid = bpf_sock_is_valid_access(off, size, t, &info);
3618                 break;
3619         case PTR_TO_TCP_SOCK:
3620                 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
3621                 break;
3622         case PTR_TO_XDP_SOCK:
3623                 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
3624                 break;
3625         default:
3626                 valid = false;
3627         }
3628
3629
3630         if (valid) {
3631                 env->insn_aux_data[insn_idx].ctx_field_size =
3632                         info.ctx_field_size;
3633                 return 0;
3634         }
3635
3636         verbose(env, "R%d invalid %s access off=%d size=%d\n",
3637                 regno, reg_type_str[reg->type], off, size);
3638
3639         return -EACCES;
3640 }
3641
3642 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
3643 {
3644         return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
3645 }
3646
3647 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
3648 {
3649         const struct bpf_reg_state *reg = reg_state(env, regno);
3650
3651         return reg->type == PTR_TO_CTX;
3652 }
3653
3654 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
3655 {
3656         const struct bpf_reg_state *reg = reg_state(env, regno);
3657
3658         return type_is_sk_pointer(reg->type);
3659 }
3660
3661 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
3662 {
3663         const struct bpf_reg_state *reg = reg_state(env, regno);
3664
3665         return type_is_pkt_pointer(reg->type);
3666 }
3667
3668 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
3669 {
3670         const struct bpf_reg_state *reg = reg_state(env, regno);
3671
3672         /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
3673         return reg->type == PTR_TO_FLOW_KEYS;
3674 }
3675
3676 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
3677                                    const struct bpf_reg_state *reg,
3678                                    int off, int size, bool strict)
3679 {
3680         struct tnum reg_off;
3681         int ip_align;
3682
3683         /* Byte size accesses are always allowed. */
3684         if (!strict || size == 1)
3685                 return 0;
3686
3687         /* For platforms that do not have a Kconfig enabling
3688          * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
3689          * NET_IP_ALIGN is universally set to '2'.  And on platforms
3690          * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
3691          * to this code only in strict mode where we want to emulate
3692          * the NET_IP_ALIGN==2 checking.  Therefore use an
3693          * unconditional IP align value of '2'.
3694          */
3695         ip_align = 2;
3696
3697         reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
3698         if (!tnum_is_aligned(reg_off, size)) {
3699                 char tn_buf[48];
3700
3701                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3702                 verbose(env,
3703                         "misaligned packet access off %d+%s+%d+%d size %d\n",
3704                         ip_align, tn_buf, reg->off, off, size);
3705                 return -EACCES;
3706         }
3707
3708         return 0;
3709 }
3710
3711 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
3712                                        const struct bpf_reg_state *reg,
3713                                        const char *pointer_desc,
3714                                        int off, int size, bool strict)
3715 {
3716         struct tnum reg_off;
3717
3718         /* Byte size accesses are always allowed. */
3719         if (!strict || size == 1)
3720                 return 0;
3721
3722         reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
3723         if (!tnum_is_aligned(reg_off, size)) {
3724                 char tn_buf[48];
3725
3726                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3727                 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
3728                         pointer_desc, tn_buf, reg->off, off, size);
3729                 return -EACCES;
3730         }
3731
3732         return 0;
3733 }
3734
3735 static int check_ptr_alignment(struct bpf_verifier_env *env,
3736                                const struct bpf_reg_state *reg, int off,
3737                                int size, bool strict_alignment_once)
3738 {
3739         bool strict = env->strict_alignment || strict_alignment_once;
3740         const char *pointer_desc = "";
3741
3742         switch (reg->type) {
3743         case PTR_TO_PACKET:
3744         case PTR_TO_PACKET_META:
3745                 /* Special case, because of NET_IP_ALIGN. Given metadata sits
3746                  * right in front, treat it the very same way.
3747                  */
3748                 return check_pkt_ptr_alignment(env, reg, off, size, strict);
3749         case PTR_TO_FLOW_KEYS:
3750                 pointer_desc = "flow keys ";
3751                 break;
3752         case PTR_TO_MAP_KEY:
3753                 pointer_desc = "key ";
3754                 break;
3755         case PTR_TO_MAP_VALUE:
3756                 pointer_desc = "value ";
3757                 break;
3758         case PTR_TO_CTX:
3759                 pointer_desc = "context ";
3760                 break;
3761         case PTR_TO_STACK:
3762                 pointer_desc = "stack ";
3763                 /* The stack spill tracking logic in check_stack_write_fixed_off()
3764                  * and check_stack_read_fixed_off() relies on stack accesses being
3765                  * aligned.
3766                  */
3767                 strict = true;
3768                 break;
3769         case PTR_TO_SOCKET:
3770                 pointer_desc = "sock ";
3771                 break;
3772         case PTR_TO_SOCK_COMMON:
3773                 pointer_desc = "sock_common ";
3774                 break;
3775         case PTR_TO_TCP_SOCK:
3776                 pointer_desc = "tcp_sock ";
3777                 break;
3778         case PTR_TO_XDP_SOCK:
3779                 pointer_desc = "xdp_sock ";
3780                 break;
3781         default:
3782                 break;
3783         }
3784         return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
3785                                            strict);
3786 }
3787
3788 static int update_stack_depth(struct bpf_verifier_env *env,
3789                               const struct bpf_func_state *func,
3790                               int off)
3791 {
3792         u16 stack = env->subprog_info[func->subprogno].stack_depth;
3793
3794         if (stack >= -off)
3795                 return 0;
3796
3797         /* update known max for given subprogram */
3798         env->subprog_info[func->subprogno].stack_depth = -off;
3799         return 0;
3800 }
3801
3802 /* starting from main bpf function walk all instructions of the function
3803  * and recursively walk all callees that given function can call.
3804  * Ignore jump and exit insns.
3805  * Since recursion is prevented by check_cfg() this algorithm
3806  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
3807  */
3808 static int check_max_stack_depth(struct bpf_verifier_env *env)
3809 {
3810         int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
3811         struct bpf_subprog_info *subprog = env->subprog_info;
3812         struct bpf_insn *insn = env->prog->insnsi;
3813         bool tail_call_reachable = false;
3814         int ret_insn[MAX_CALL_FRAMES];
3815         int ret_prog[MAX_CALL_FRAMES];
3816         int j;
3817
3818 process_func:
3819         /* protect against potential stack overflow that might happen when
3820          * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
3821          * depth for such case down to 256 so that the worst case scenario
3822          * would result in 8k stack size (32 which is tailcall limit * 256 =
3823          * 8k).
3824          *
3825          * To get the idea what might happen, see an example:
3826          * func1 -> sub rsp, 128
3827          *  subfunc1 -> sub rsp, 256
3828          *  tailcall1 -> add rsp, 256
3829          *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
3830          *   subfunc2 -> sub rsp, 64
3831          *   subfunc22 -> sub rsp, 128
3832          *   tailcall2 -> add rsp, 128
3833          *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
3834          *
3835          * tailcall will unwind the current stack frame but it will not get rid
3836          * of caller's stack as shown on the example above.
3837          */
3838         if (idx && subprog[idx].has_tail_call && depth >= 256) {
3839                 verbose(env,
3840                         "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
3841                         depth);
3842                 return -EACCES;
3843         }
3844         /* round up to 32-bytes, since this is granularity
3845          * of interpreter stack size
3846          */
3847         depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3848         if (depth > MAX_BPF_STACK) {
3849                 verbose(env, "combined stack size of %d calls is %d. Too large\n",
3850                         frame + 1, depth);
3851                 return -EACCES;
3852         }
3853 continue_func:
3854         subprog_end = subprog[idx + 1].start;
3855         for (; i < subprog_end; i++) {
3856                 int next_insn;
3857
3858                 if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
3859                         continue;
3860                 /* remember insn and function to return to */
3861                 ret_insn[frame] = i + 1;
3862                 ret_prog[frame] = idx;
3863
3864                 /* find the callee */
3865                 next_insn = i + insn[i].imm + 1;
3866                 idx = find_subprog(env, next_insn);
3867                 if (idx < 0) {
3868                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3869                                   next_insn);
3870                         return -EFAULT;
3871                 }
3872                 if (subprog[idx].is_async_cb) {
3873                         if (subprog[idx].has_tail_call) {
3874                                 verbose(env, "verifier bug. subprog has tail_call and async cb\n");
3875                                 return -EFAULT;
3876                         }
3877                          /* async callbacks don't increase bpf prog stack size */
3878                         continue;
3879                 }
3880                 i = next_insn;
3881
3882                 if (subprog[idx].has_tail_call)
3883                         tail_call_reachable = true;
3884
3885                 frame++;
3886                 if (frame >= MAX_CALL_FRAMES) {
3887                         verbose(env, "the call stack of %d frames is too deep !\n",
3888                                 frame);
3889                         return -E2BIG;
3890                 }
3891                 goto process_func;
3892         }
3893         /* if tail call got detected across bpf2bpf calls then mark each of the
3894          * currently present subprog frames as tail call reachable subprogs;
3895          * this info will be utilized by JIT so that we will be preserving the
3896          * tail call counter throughout bpf2bpf calls combined with tailcalls
3897          */
3898         if (tail_call_reachable)
3899                 for (j = 0; j < frame; j++)
3900                         subprog[ret_prog[j]].tail_call_reachable = true;
3901         if (subprog[0].tail_call_reachable)
3902                 env->prog->aux->tail_call_reachable = true;
3903
3904         /* end of for() loop means the last insn of the 'subprog'
3905          * was reached. Doesn't matter whether it was JA or EXIT
3906          */
3907         if (frame == 0)
3908                 return 0;
3909         depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3910         frame--;
3911         i = ret_insn[frame];
3912         idx = ret_prog[frame];
3913         goto continue_func;
3914 }
3915
3916 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
3917 static int get_callee_stack_depth(struct bpf_verifier_env *env,
3918                                   const struct bpf_insn *insn, int idx)
3919 {
3920         int start = idx + insn->imm + 1, subprog;
3921
3922         subprog = find_subprog(env, start);
3923         if (subprog < 0) {
3924                 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3925                           start);
3926                 return -EFAULT;
3927         }
3928         return env->subprog_info[subprog].stack_depth;
3929 }
3930 #endif
3931
3932 int check_ctx_reg(struct bpf_verifier_env *env,
3933                   const struct bpf_reg_state *reg, int regno)
3934 {
3935         /* Access to ctx or passing it to a helper is only allowed in
3936          * its original, unmodified form.
3937          */
3938
3939         if (reg->off) {
3940                 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
3941                         regno, reg->off);
3942                 return -EACCES;
3943         }
3944
3945         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3946                 char tn_buf[48];
3947
3948                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3949                 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
3950                 return -EACCES;
3951         }
3952
3953         return 0;
3954 }
3955
3956 static int __check_buffer_access(struct bpf_verifier_env *env,
3957                                  const char *buf_info,
3958                                  const struct bpf_reg_state *reg,
3959                                  int regno, int off, int size)
3960 {
3961         if (off < 0) {
3962                 verbose(env,
3963                         "R%d invalid %s buffer access: off=%d, size=%d\n",
3964                         regno, buf_info, off, size);
3965                 return -EACCES;
3966         }
3967         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3968                 char tn_buf[48];
3969
3970                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3971                 verbose(env,
3972                         "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
3973                         regno, off, tn_buf);
3974                 return -EACCES;
3975         }
3976
3977         return 0;
3978 }
3979
3980 static int check_tp_buffer_access(struct bpf_verifier_env *env,
3981                                   const struct bpf_reg_state *reg,
3982                                   int regno, int off, int size)
3983 {
3984         int err;
3985
3986         err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
3987         if (err)
3988                 return err;
3989
3990         if (off + size > env->prog->aux->max_tp_access)
3991                 env->prog->aux->max_tp_access = off + size;
3992
3993         return 0;
3994 }
3995
3996 static int check_buffer_access(struct bpf_verifier_env *env,
3997                                const struct bpf_reg_state *reg,
3998                                int regno, int off, int size,
3999                                bool zero_size_allowed,
4000                                const char *buf_info,
4001                                u32 *max_access)
4002 {
4003         int err;
4004
4005         err = __check_buffer_access(env, buf_info, reg, regno, off, size);
4006         if (err)
4007                 return err;
4008
4009         if (off + size > *max_access)
4010                 *max_access = off + size;
4011
4012         return 0;
4013 }
4014
4015 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
4016 static void zext_32_to_64(struct bpf_reg_state *reg)
4017 {
4018         reg->var_off = tnum_subreg(reg->var_off);
4019         __reg_assign_32_into_64(reg);
4020 }
4021
4022 /* truncate register to smaller size (in bytes)
4023  * must be called with size < BPF_REG_SIZE
4024  */
4025 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
4026 {
4027         u64 mask;
4028
4029         /* clear high bits in bit representation */
4030         reg->var_off = tnum_cast(reg->var_off, size);
4031
4032         /* fix arithmetic bounds */
4033         mask = ((u64)1 << (size * 8)) - 1;
4034         if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
4035                 reg->umin_value &= mask;
4036                 reg->umax_value &= mask;
4037         } else {
4038                 reg->umin_value = 0;
4039                 reg->umax_value = mask;
4040         }
4041         reg->smin_value = reg->umin_value;
4042         reg->smax_value = reg->umax_value;
4043
4044         /* If size is smaller than 32bit register the 32bit register
4045          * values are also truncated so we push 64-bit bounds into
4046          * 32-bit bounds. Above were truncated < 32-bits already.
4047          */
4048         if (size >= 4)
4049                 return;
4050         __reg_combine_64_into_32(reg);
4051 }
4052
4053 static bool bpf_map_is_rdonly(const struct bpf_map *map)
4054 {
4055         /* A map is considered read-only if the following condition are true:
4056          *
4057          * 1) BPF program side cannot change any of the map content. The
4058          *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
4059          *    and was set at map creation time.
4060          * 2) The map value(s) have been initialized from user space by a
4061          *    loader and then "frozen", such that no new map update/delete
4062          *    operations from syscall side are possible for the rest of
4063          *    the map's lifetime from that point onwards.
4064          * 3) Any parallel/pending map update/delete operations from syscall
4065          *    side have been completed. Only after that point, it's safe to
4066          *    assume that map value(s) are immutable.
4067          */
4068         return (map->map_flags & BPF_F_RDONLY_PROG) &&
4069                READ_ONCE(map->frozen) &&
4070                !bpf_map_write_active(map);
4071 }
4072
4073 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
4074 {
4075         void *ptr;
4076         u64 addr;
4077         int err;
4078
4079         err = map->ops->map_direct_value_addr(map, &addr, off);
4080         if (err)
4081                 return err;
4082         ptr = (void *)(long)addr + off;
4083
4084         switch (size) {
4085         case sizeof(u8):
4086                 *val = (u64)*(u8 *)ptr;
4087                 break;
4088         case sizeof(u16):
4089                 *val = (u64)*(u16 *)ptr;
4090                 break;
4091         case sizeof(u32):
4092                 *val = (u64)*(u32 *)ptr;
4093                 break;
4094         case sizeof(u64):
4095                 *val = *(u64 *)ptr;
4096                 break;
4097         default:
4098                 return -EINVAL;
4099         }
4100         return 0;
4101 }
4102
4103 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
4104                                    struct bpf_reg_state *regs,
4105                                    int regno, int off, int size,
4106                                    enum bpf_access_type atype,
4107                                    int value_regno)
4108 {
4109         struct bpf_reg_state *reg = regs + regno;
4110         const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
4111         const char *tname = btf_name_by_offset(reg->btf, t->name_off);
4112         u32 btf_id;
4113         int ret;
4114
4115         if (off < 0) {
4116                 verbose(env,
4117                         "R%d is ptr_%s invalid negative access: off=%d\n",
4118                         regno, tname, off);
4119                 return -EACCES;
4120         }
4121         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4122                 char tn_buf[48];
4123
4124                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4125                 verbose(env,
4126                         "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
4127                         regno, tname, off, tn_buf);
4128                 return -EACCES;
4129         }
4130
4131         if (env->ops->btf_struct_access) {
4132                 ret = env->ops->btf_struct_access(&env->log, reg->btf, t,
4133                                                   off, size, atype, &btf_id);
4134         } else {
4135                 if (atype != BPF_READ) {
4136                         verbose(env, "only read is supported\n");
4137                         return -EACCES;
4138                 }
4139
4140                 ret = btf_struct_access(&env->log, reg->btf, t, off, size,
4141                                         atype, &btf_id);
4142         }
4143
4144         if (ret < 0)
4145                 return ret;
4146
4147         if (atype == BPF_READ && value_regno >= 0)
4148                 mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id);
4149
4150         return 0;
4151 }
4152
4153 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
4154                                    struct bpf_reg_state *regs,
4155                                    int regno, int off, int size,
4156                                    enum bpf_access_type atype,
4157                                    int value_regno)
4158 {
4159         struct bpf_reg_state *reg = regs + regno;
4160         struct bpf_map *map = reg->map_ptr;
4161         const struct btf_type *t;
4162         const char *tname;
4163         u32 btf_id;
4164         int ret;
4165
4166         if (!btf_vmlinux) {
4167                 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
4168                 return -ENOTSUPP;
4169         }
4170
4171         if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
4172                 verbose(env, "map_ptr access not supported for map type %d\n",
4173                         map->map_type);
4174                 return -ENOTSUPP;
4175         }
4176
4177         t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
4178         tname = btf_name_by_offset(btf_vmlinux, t->name_off);
4179
4180         if (!env->allow_ptr_to_map_access) {
4181                 verbose(env,
4182                         "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
4183                         tname);
4184                 return -EPERM;
4185         }
4186
4187         if (off < 0) {
4188                 verbose(env, "R%d is %s invalid negative access: off=%d\n",
4189                         regno, tname, off);
4190                 return -EACCES;
4191         }
4192
4193         if (atype != BPF_READ) {
4194                 verbose(env, "only read from %s is supported\n", tname);
4195                 return -EACCES;
4196         }
4197
4198         ret = btf_struct_access(&env->log, btf_vmlinux, t, off, size, atype, &btf_id);
4199         if (ret < 0)
4200                 return ret;
4201
4202         if (value_regno >= 0)
4203                 mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id);
4204
4205         return 0;
4206 }
4207
4208 /* Check that the stack access at the given offset is within bounds. The
4209  * maximum valid offset is -1.
4210  *
4211  * The minimum valid offset is -MAX_BPF_STACK for writes, and
4212  * -state->allocated_stack for reads.
4213  */
4214 static int check_stack_slot_within_bounds(int off,
4215                                           struct bpf_func_state *state,
4216                                           enum bpf_access_type t)
4217 {
4218         int min_valid_off;
4219
4220         if (t == BPF_WRITE)
4221                 min_valid_off = -MAX_BPF_STACK;
4222         else
4223                 min_valid_off = -state->allocated_stack;
4224
4225         if (off < min_valid_off || off > -1)
4226                 return -EACCES;
4227         return 0;
4228 }
4229
4230 /* Check that the stack access at 'regno + off' falls within the maximum stack
4231  * bounds.
4232  *
4233  * 'off' includes `regno->offset`, but not its dynamic part (if any).
4234  */
4235 static int check_stack_access_within_bounds(
4236                 struct bpf_verifier_env *env,
4237                 int regno, int off, int access_size,
4238                 enum stack_access_src src, enum bpf_access_type type)
4239 {
4240         struct bpf_reg_state *regs = cur_regs(env);
4241         struct bpf_reg_state *reg = regs + regno;
4242         struct bpf_func_state *state = func(env, reg);
4243         int min_off, max_off;
4244         int err;
4245         char *err_extra;
4246
4247         if (src == ACCESS_HELPER)
4248                 /* We don't know if helpers are reading or writing (or both). */
4249                 err_extra = " indirect access to";
4250         else if (type == BPF_READ)
4251                 err_extra = " read from";
4252         else
4253                 err_extra = " write to";
4254
4255         if (tnum_is_const(reg->var_off)) {
4256                 min_off = reg->var_off.value + off;
4257                 if (access_size > 0)
4258                         max_off = min_off + access_size - 1;
4259                 else
4260                         max_off = min_off;
4261         } else {
4262                 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
4263                     reg->smin_value <= -BPF_MAX_VAR_OFF) {
4264                         verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
4265                                 err_extra, regno);
4266                         return -EACCES;
4267                 }
4268                 min_off = reg->smin_value + off;
4269                 if (access_size > 0)
4270                         max_off = reg->smax_value + off + access_size - 1;
4271                 else
4272                         max_off = min_off;
4273         }
4274
4275         err = check_stack_slot_within_bounds(min_off, state, type);
4276         if (!err)
4277                 err = check_stack_slot_within_bounds(max_off, state, type);
4278
4279         if (err) {
4280                 if (tnum_is_const(reg->var_off)) {
4281                         verbose(env, "invalid%s stack R%d off=%d size=%d\n",
4282                                 err_extra, regno, off, access_size);
4283                 } else {
4284                         char tn_buf[48];
4285
4286                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4287                         verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
4288                                 err_extra, regno, tn_buf, access_size);
4289                 }
4290         }
4291         return err;
4292 }
4293
4294 /* check whether memory at (regno + off) is accessible for t = (read | write)
4295  * if t==write, value_regno is a register which value is stored into memory
4296  * if t==read, value_regno is a register which will receive the value from memory
4297  * if t==write && value_regno==-1, some unknown value is stored into memory
4298  * if t==read && value_regno==-1, don't care what we read from memory
4299  */
4300 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
4301                             int off, int bpf_size, enum bpf_access_type t,
4302                             int value_regno, bool strict_alignment_once)
4303 {
4304         struct bpf_reg_state *regs = cur_regs(env);
4305         struct bpf_reg_state *reg = regs + regno;
4306         struct bpf_func_state *state;
4307         int size, err = 0;
4308
4309         size = bpf_size_to_bytes(bpf_size);
4310         if (size < 0)
4311                 return size;
4312
4313         /* alignment checks will add in reg->off themselves */
4314         err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
4315         if (err)
4316                 return err;
4317
4318         /* for access checks, reg->off is just part of off */
4319         off += reg->off;
4320
4321         if (reg->type == PTR_TO_MAP_KEY) {
4322                 if (t == BPF_WRITE) {
4323                         verbose(env, "write to change key R%d not allowed\n", regno);
4324                         return -EACCES;
4325                 }
4326
4327                 err = check_mem_region_access(env, regno, off, size,
4328                                               reg->map_ptr->key_size, false);
4329                 if (err)
4330                         return err;
4331                 if (value_regno >= 0)
4332                         mark_reg_unknown(env, regs, value_regno);
4333         } else if (reg->type == PTR_TO_MAP_VALUE) {
4334                 if (t == BPF_WRITE && value_regno >= 0 &&
4335                     is_pointer_value(env, value_regno)) {
4336                         verbose(env, "R%d leaks addr into map\n", value_regno);
4337                         return -EACCES;
4338                 }
4339                 err = check_map_access_type(env, regno, off, size, t);
4340                 if (err)
4341                         return err;
4342                 err = check_map_access(env, regno, off, size, false);
4343                 if (!err && t == BPF_READ && value_regno >= 0) {
4344                         struct bpf_map *map = reg->map_ptr;
4345
4346                         /* if map is read-only, track its contents as scalars */
4347                         if (tnum_is_const(reg->var_off) &&
4348                             bpf_map_is_rdonly(map) &&
4349                             map->ops->map_direct_value_addr) {
4350                                 int map_off = off + reg->var_off.value;
4351                                 u64 val = 0;
4352
4353                                 err = bpf_map_direct_read(map, map_off, size,
4354                                                           &val);
4355                                 if (err)
4356                                         return err;
4357
4358                                 regs[value_regno].type = SCALAR_VALUE;
4359                                 __mark_reg_known(&regs[value_regno], val);
4360                         } else {
4361                                 mark_reg_unknown(env, regs, value_regno);
4362                         }
4363                 }
4364         } else if (reg->type == PTR_TO_MEM) {
4365                 if (t == BPF_WRITE && value_regno >= 0 &&
4366                     is_pointer_value(env, value_regno)) {
4367                         verbose(env, "R%d leaks addr into mem\n", value_regno);
4368                         return -EACCES;
4369                 }
4370                 err = check_mem_region_access(env, regno, off, size,
4371                                               reg->mem_size, false);
4372                 if (!err && t == BPF_READ && value_regno >= 0)
4373                         mark_reg_unknown(env, regs, value_regno);
4374         } else if (reg->type == PTR_TO_CTX) {
4375                 enum bpf_reg_type reg_type = SCALAR_VALUE;
4376                 struct btf *btf = NULL;
4377                 u32 btf_id = 0;
4378
4379                 if (t == BPF_WRITE && value_regno >= 0 &&
4380                     is_pointer_value(env, value_regno)) {
4381                         verbose(env, "R%d leaks addr into ctx\n", value_regno);
4382                         return -EACCES;
4383                 }
4384
4385                 err = check_ctx_reg(env, reg, regno);
4386                 if (err < 0)
4387                         return err;
4388
4389                 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf, &btf_id);
4390                 if (err)
4391                         verbose_linfo(env, insn_idx, "; ");
4392                 if (!err && t == BPF_READ && value_regno >= 0) {
4393                         /* ctx access returns either a scalar, or a
4394                          * PTR_TO_PACKET[_META,_END]. In the latter
4395                          * case, we know the offset is zero.
4396                          */
4397                         if (reg_type == SCALAR_VALUE) {
4398                                 mark_reg_unknown(env, regs, value_regno);
4399                         } else {
4400                                 mark_reg_known_zero(env, regs,
4401                                                     value_regno);
4402                                 if (reg_type_may_be_null(reg_type))
4403                                         regs[value_regno].id = ++env->id_gen;
4404                                 /* A load of ctx field could have different
4405                                  * actual load size with the one encoded in the
4406                                  * insn. When the dst is PTR, it is for sure not
4407                                  * a sub-register.
4408                                  */
4409                                 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
4410                                 if (reg_type == PTR_TO_BTF_ID ||
4411                                     reg_type == PTR_TO_BTF_ID_OR_NULL) {
4412                                         regs[value_regno].btf = btf;
4413                                         regs[value_regno].btf_id = btf_id;
4414                                 }
4415                         }
4416                         regs[value_regno].type = reg_type;
4417                 }
4418
4419         } else if (reg->type == PTR_TO_STACK) {
4420                 /* Basic bounds checks. */
4421                 err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
4422                 if (err)
4423                         return err;
4424
4425                 state = func(env, reg);
4426                 err = update_stack_depth(env, state, off);
4427                 if (err)
4428                         return err;
4429
4430                 if (t == BPF_READ)
4431                         err = check_stack_read(env, regno, off, size,
4432                                                value_regno);
4433                 else
4434                         err = check_stack_write(env, regno, off, size,
4435                                                 value_regno, insn_idx);
4436         } else if (reg_is_pkt_pointer(reg)) {
4437                 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
4438                         verbose(env, "cannot write into packet\n");
4439                         return -EACCES;
4440                 }
4441                 if (t == BPF_WRITE && value_regno >= 0 &&
4442                     is_pointer_value(env, value_regno)) {
4443                         verbose(env, "R%d leaks addr into packet\n",
4444                                 value_regno);
4445                         return -EACCES;
4446                 }
4447                 err = check_packet_access(env, regno, off, size, false);
4448                 if (!err && t == BPF_READ && value_regno >= 0)
4449                         mark_reg_unknown(env, regs, value_regno);
4450         } else if (reg->type == PTR_TO_FLOW_KEYS) {
4451                 if (t == BPF_WRITE && value_regno >= 0 &&
4452                     is_pointer_value(env, value_regno)) {
4453                         verbose(env, "R%d leaks addr into flow keys\n",
4454                                 value_regno);
4455                         return -EACCES;
4456                 }
4457
4458                 err = check_flow_keys_access(env, off, size);
4459                 if (!err && t == BPF_READ && value_regno >= 0)
4460                         mark_reg_unknown(env, regs, value_regno);
4461         } else if (type_is_sk_pointer(reg->type)) {
4462                 if (t == BPF_WRITE) {
4463                         verbose(env, "R%d cannot write into %s\n",
4464                                 regno, reg_type_str[reg->type]);
4465                         return -EACCES;
4466                 }
4467                 err = check_sock_access(env, insn_idx, regno, off, size, t);
4468                 if (!err && value_regno >= 0)
4469                         mark_reg_unknown(env, regs, value_regno);
4470         } else if (reg->type == PTR_TO_TP_BUFFER) {
4471                 err = check_tp_buffer_access(env, reg, regno, off, size);
4472                 if (!err && t == BPF_READ && value_regno >= 0)
4473                         mark_reg_unknown(env, regs, value_regno);
4474         } else if (reg->type == PTR_TO_BTF_ID) {
4475                 err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
4476                                               value_regno);
4477         } else if (reg->type == CONST_PTR_TO_MAP) {
4478                 err = check_ptr_to_map_access(env, regs, regno, off, size, t,
4479                                               value_regno);
4480         } else if (reg->type == PTR_TO_RDONLY_BUF) {
4481                 if (t == BPF_WRITE) {
4482                         verbose(env, "R%d cannot write into %s\n",
4483                                 regno, reg_type_str[reg->type]);
4484                         return -EACCES;
4485                 }
4486                 err = check_buffer_access(env, reg, regno, off, size, false,
4487                                           "rdonly",
4488                                           &env->prog->aux->max_rdonly_access);
4489                 if (!err && value_regno >= 0)
4490                         mark_reg_unknown(env, regs, value_regno);
4491         } else if (reg->type == PTR_TO_RDWR_BUF) {
4492                 err = check_buffer_access(env, reg, regno, off, size, false,
4493                                           "rdwr",
4494                                           &env->prog->aux->max_rdwr_access);
4495                 if (!err && t == BPF_READ && value_regno >= 0)
4496                         mark_reg_unknown(env, regs, value_regno);
4497         } else {
4498                 verbose(env, "R%d invalid mem access '%s'\n", regno,
4499                         reg_type_str[reg->type]);
4500                 return -EACCES;
4501         }
4502
4503         if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
4504             regs[value_regno].type == SCALAR_VALUE) {
4505                 /* b/h/w load zero-extends, mark upper bits as known 0 */
4506                 coerce_reg_to_size(&regs[value_regno], size);
4507         }
4508         return err;
4509 }
4510
4511 static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
4512 {
4513         int load_reg;
4514         int err;
4515
4516         switch (insn->imm) {
4517         case BPF_ADD:
4518         case BPF_ADD | BPF_FETCH:
4519         case BPF_AND:
4520         case BPF_AND | BPF_FETCH:
4521         case BPF_OR:
4522         case BPF_OR | BPF_FETCH:
4523         case BPF_XOR:
4524         case BPF_XOR | BPF_FETCH:
4525         case BPF_XCHG:
4526         case BPF_CMPXCHG:
4527                 break;
4528         default:
4529                 verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
4530                 return -EINVAL;
4531         }
4532
4533         if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
4534                 verbose(env, "invalid atomic operand size\n");
4535                 return -EINVAL;
4536         }
4537
4538         /* check src1 operand */
4539         err = check_reg_arg(env, insn->src_reg, SRC_OP);
4540         if (err)
4541                 return err;
4542
4543         /* check src2 operand */
4544         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4545         if (err)
4546                 return err;
4547
4548         if (insn->imm == BPF_CMPXCHG) {
4549                 /* Check comparison of R0 with memory location */
4550                 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
4551                 if (err)
4552                         return err;
4553         }
4554
4555         if (is_pointer_value(env, insn->src_reg)) {
4556                 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
4557                 return -EACCES;
4558         }
4559
4560         if (is_ctx_reg(env, insn->dst_reg) ||
4561             is_pkt_reg(env, insn->dst_reg) ||
4562             is_flow_key_reg(env, insn->dst_reg) ||
4563             is_sk_reg(env, insn->dst_reg)) {
4564                 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
4565                         insn->dst_reg,
4566                         reg_type_str[reg_state(env, insn->dst_reg)->type]);
4567                 return -EACCES;
4568         }
4569
4570         if (insn->imm & BPF_FETCH) {
4571                 if (insn->imm == BPF_CMPXCHG)
4572                         load_reg = BPF_REG_0;
4573                 else
4574                         load_reg = insn->src_reg;
4575
4576                 /* check and record load of old value */
4577                 err = check_reg_arg(env, load_reg, DST_OP);
4578                 if (err)
4579                         return err;
4580         } else {
4581                 /* This instruction accesses a memory location but doesn't
4582                  * actually load it into a register.
4583                  */
4584                 load_reg = -1;
4585         }
4586
4587         /* Check whether we can read the memory, with second call for fetch
4588          * case to simulate the register fill.
4589          */
4590         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4591                                BPF_SIZE(insn->code), BPF_READ, -1, true);
4592         if (!err && load_reg >= 0)
4593                 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4594                                        BPF_SIZE(insn->code), BPF_READ, load_reg,
4595                                        true);
4596         if (err)
4597                 return err;
4598
4599         /* Check whether we can write into the same memory. */
4600         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
4601                                BPF_SIZE(insn->code), BPF_WRITE, -1, true);
4602         if (err)
4603                 return err;
4604
4605         return 0;
4606 }
4607
4608 /* When register 'regno' is used to read the stack (either directly or through
4609  * a helper function) make sure that it's within stack boundary and, depending
4610  * on the access type, that all elements of the stack are initialized.
4611  *
4612  * 'off' includes 'regno->off', but not its dynamic part (if any).
4613  *
4614  * All registers that have been spilled on the stack in the slots within the
4615  * read offsets are marked as read.
4616  */
4617 static int check_stack_range_initialized(
4618                 struct bpf_verifier_env *env, int regno, int off,
4619                 int access_size, bool zero_size_allowed,
4620                 enum stack_access_src type, struct bpf_call_arg_meta *meta)
4621 {
4622         struct bpf_reg_state *reg = reg_state(env, regno);
4623         struct bpf_func_state *state = func(env, reg);
4624         int err, min_off, max_off, i, j, slot, spi;
4625         char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
4626         enum bpf_access_type bounds_check_type;
4627         /* Some accesses can write anything into the stack, others are
4628          * read-only.
4629          */
4630         bool clobber = false;
4631
4632         if (access_size == 0 && !zero_size_allowed) {
4633                 verbose(env, "invalid zero-sized read\n");
4634                 return -EACCES;
4635         }
4636
4637         if (type == ACCESS_HELPER) {
4638                 /* The bounds checks for writes are more permissive than for
4639                  * reads. However, if raw_mode is not set, we'll do extra
4640                  * checks below.
4641                  */
4642                 bounds_check_type = BPF_WRITE;
4643                 clobber = true;
4644         } else {
4645                 bounds_check_type = BPF_READ;
4646         }
4647         err = check_stack_access_within_bounds(env, regno, off, access_size,
4648                                                type, bounds_check_type);
4649         if (err)
4650                 return err;
4651
4652
4653         if (tnum_is_const(reg->var_off)) {
4654                 min_off = max_off = reg->var_off.value + off;
4655         } else {
4656                 /* Variable offset is prohibited for unprivileged mode for
4657                  * simplicity since it requires corresponding support in
4658                  * Spectre masking for stack ALU.
4659                  * See also retrieve_ptr_limit().
4660                  */
4661                 if (!env->bypass_spec_v1) {
4662                         char tn_buf[48];
4663
4664                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4665                         verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
4666                                 regno, err_extra, tn_buf);
4667                         return -EACCES;
4668                 }
4669                 /* Only initialized buffer on stack is allowed to be accessed
4670                  * with variable offset. With uninitialized buffer it's hard to
4671                  * guarantee that whole memory is marked as initialized on
4672                  * helper return since specific bounds are unknown what may
4673                  * cause uninitialized stack leaking.
4674                  */
4675                 if (meta && meta->raw_mode)
4676                         meta = NULL;
4677
4678                 min_off = reg->smin_value + off;
4679                 max_off = reg->smax_value + off;
4680         }
4681
4682         if (meta && meta->raw_mode) {
4683                 meta->access_size = access_size;
4684                 meta->regno = regno;
4685                 return 0;
4686         }
4687
4688         for (i = min_off; i < max_off + access_size; i++) {
4689                 u8 *stype;
4690
4691                 slot = -i - 1;
4692                 spi = slot / BPF_REG_SIZE;
4693                 if (state->allocated_stack <= slot)
4694                         goto err;
4695                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4696                 if (*stype == STACK_MISC)
4697                         goto mark;
4698                 if (*stype == STACK_ZERO) {
4699                         if (clobber) {
4700                                 /* helper can write anything into the stack */
4701                                 *stype = STACK_MISC;
4702                         }
4703                         goto mark;
4704                 }
4705
4706                 if (is_spilled_reg(&state->stack[spi]) &&
4707                     state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
4708                         goto mark;
4709
4710                 if (is_spilled_reg(&state->stack[spi]) &&
4711                     (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
4712                      env->allow_ptr_leaks)) {
4713                         if (clobber) {
4714                                 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
4715                                 for (j = 0; j < BPF_REG_SIZE; j++)
4716                                         scrub_spilled_slot(&state->stack[spi].slot_type[j]);
4717                         }
4718                         goto mark;
4719                 }
4720
4721 err:
4722                 if (tnum_is_const(reg->var_off)) {
4723                         verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
4724                                 err_extra, regno, min_off, i - min_off, access_size);
4725                 } else {
4726                         char tn_buf[48];
4727
4728                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4729                         verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
4730                                 err_extra, regno, tn_buf, i - min_off, access_size);
4731                 }
4732                 return -EACCES;
4733 mark:
4734                 /* reading any byte out of 8-byte 'spill_slot' will cause
4735                  * the whole slot to be marked as 'read'
4736                  */
4737                 mark_reg_read(env, &state->stack[spi].spilled_ptr,
4738                               state->stack[spi].spilled_ptr.parent,
4739                               REG_LIVE_READ64);
4740         }
4741         return update_stack_depth(env, state, min_off);
4742 }
4743
4744 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
4745                                    int access_size, bool zero_size_allowed,
4746                                    struct bpf_call_arg_meta *meta)
4747 {
4748         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4749
4750         switch (reg->type) {
4751         case PTR_TO_PACKET:
4752         case PTR_TO_PACKET_META:
4753                 return check_packet_access(env, regno, reg->off, access_size,
4754                                            zero_size_allowed);
4755         case PTR_TO_MAP_KEY:
4756                 return check_mem_region_access(env, regno, reg->off, access_size,
4757                                                reg->map_ptr->key_size, false);
4758         case PTR_TO_MAP_VALUE:
4759                 if (check_map_access_type(env, regno, reg->off, access_size,
4760                                           meta && meta->raw_mode ? BPF_WRITE :
4761                                           BPF_READ))
4762                         return -EACCES;
4763                 return check_map_access(env, regno, reg->off, access_size,
4764                                         zero_size_allowed);
4765         case PTR_TO_MEM:
4766                 return check_mem_region_access(env, regno, reg->off,
4767                                                access_size, reg->mem_size,
4768                                                zero_size_allowed);
4769         case PTR_TO_RDONLY_BUF:
4770                 if (meta && meta->raw_mode)
4771                         return -EACCES;
4772                 return check_buffer_access(env, reg, regno, reg->off,
4773                                            access_size, zero_size_allowed,
4774                                            "rdonly",
4775                                            &env->prog->aux->max_rdonly_access);
4776         case PTR_TO_RDWR_BUF:
4777                 return check_buffer_access(env, reg, regno, reg->off,
4778                                            access_size, zero_size_allowed,
4779                                            "rdwr",
4780                                            &env->prog->aux->max_rdwr_access);
4781         case PTR_TO_STACK:
4782                 return check_stack_range_initialized(
4783                                 env,
4784                                 regno, reg->off, access_size,
4785                                 zero_size_allowed, ACCESS_HELPER, meta);
4786         default: /* scalar_value or invalid ptr */
4787                 /* Allow zero-byte read from NULL, regardless of pointer type */
4788                 if (zero_size_allowed && access_size == 0 &&
4789                     register_is_null(reg))
4790                         return 0;
4791
4792                 verbose(env, "R%d type=%s expected=%s\n", regno,
4793                         reg_type_str[reg->type],
4794                         reg_type_str[PTR_TO_STACK]);
4795                 return -EACCES;
4796         }
4797 }
4798
4799 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
4800                    u32 regno, u32 mem_size)
4801 {
4802         if (register_is_null(reg))
4803                 return 0;
4804
4805         if (reg_type_may_be_null(reg->type)) {
4806                 /* Assuming that the register contains a value check if the memory
4807                  * access is safe. Temporarily save and restore the register's state as
4808                  * the conversion shouldn't be visible to a caller.
4809                  */
4810                 const struct bpf_reg_state saved_reg = *reg;
4811                 int rv;
4812
4813                 mark_ptr_not_null_reg(reg);
4814                 rv = check_helper_mem_access(env, regno, mem_size, true, NULL);
4815                 *reg = saved_reg;
4816                 return rv;
4817         }
4818
4819         return check_helper_mem_access(env, regno, mem_size, true, NULL);
4820 }
4821
4822 /* Implementation details:
4823  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
4824  * Two bpf_map_lookups (even with the same key) will have different reg->id.
4825  * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
4826  * value_or_null->value transition, since the verifier only cares about
4827  * the range of access to valid map value pointer and doesn't care about actual
4828  * address of the map element.
4829  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
4830  * reg->id > 0 after value_or_null->value transition. By doing so
4831  * two bpf_map_lookups will be considered two different pointers that
4832  * point to different bpf_spin_locks.
4833  * The verifier allows taking only one bpf_spin_lock at a time to avoid
4834  * dead-locks.
4835  * Since only one bpf_spin_lock is allowed the checks are simpler than
4836  * reg_is_refcounted() logic. The verifier needs to remember only
4837  * one spin_lock instead of array of acquired_refs.
4838  * cur_state->active_spin_lock remembers which map value element got locked
4839  * and clears it after bpf_spin_unlock.
4840  */
4841 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
4842                              bool is_lock)
4843 {
4844         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4845         struct bpf_verifier_state *cur = env->cur_state;
4846         bool is_const = tnum_is_const(reg->var_off);
4847         struct bpf_map *map = reg->map_ptr;
4848         u64 val = reg->var_off.value;
4849
4850         if (!is_const) {
4851                 verbose(env,
4852                         "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
4853                         regno);
4854                 return -EINVAL;
4855         }
4856         if (!map->btf) {
4857                 verbose(env,
4858                         "map '%s' has to have BTF in order to use bpf_spin_lock\n",
4859                         map->name);
4860                 return -EINVAL;
4861         }
4862         if (!map_value_has_spin_lock(map)) {
4863                 if (map->spin_lock_off == -E2BIG)
4864                         verbose(env,
4865                                 "map '%s' has more than one 'struct bpf_spin_lock'\n",
4866                                 map->name);
4867                 else if (map->spin_lock_off == -ENOENT)
4868                         verbose(env,
4869                                 "map '%s' doesn't have 'struct bpf_spin_lock'\n",
4870                                 map->name);
4871                 else
4872                         verbose(env,
4873                                 "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
4874                                 map->name);
4875                 return -EINVAL;
4876         }
4877         if (map->spin_lock_off != val + reg->off) {
4878                 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
4879                         val + reg->off);
4880                 return -EINVAL;
4881         }
4882         if (is_lock) {
4883                 if (cur->active_spin_lock) {
4884                         verbose(env,
4885                                 "Locking two bpf_spin_locks are not allowed\n");
4886                         return -EINVAL;
4887                 }
4888                 cur->active_spin_lock = reg->id;
4889         } else {
4890                 if (!cur->active_spin_lock) {
4891                         verbose(env, "bpf_spin_unlock without taking a lock\n");
4892                         return -EINVAL;
4893                 }
4894                 if (cur->active_spin_lock != reg->id) {
4895                         verbose(env, "bpf_spin_unlock of different lock\n");
4896                         return -EINVAL;
4897                 }
4898                 cur->active_spin_lock = 0;
4899         }
4900         return 0;
4901 }
4902
4903 static int process_timer_func(struct bpf_verifier_env *env, int regno,
4904                               struct bpf_call_arg_meta *meta)
4905 {
4906         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4907         bool is_const = tnum_is_const(reg->var_off);
4908         struct bpf_map *map = reg->map_ptr;
4909         u64 val = reg->var_off.value;
4910
4911         if (!is_const) {
4912                 verbose(env,
4913                         "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
4914                         regno);
4915                 return -EINVAL;
4916         }
4917         if (!map->btf) {
4918                 verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
4919                         map->name);
4920                 return -EINVAL;
4921         }
4922         if (!map_value_has_timer(map)) {
4923                 if (map->timer_off == -E2BIG)
4924                         verbose(env,
4925                                 "map '%s' has more than one 'struct bpf_timer'\n",
4926                                 map->name);
4927                 else if (map->timer_off == -ENOENT)
4928                         verbose(env,
4929                                 "map '%s' doesn't have 'struct bpf_timer'\n",
4930                                 map->name);
4931                 else
4932                         verbose(env,
4933                                 "map '%s' is not a struct type or bpf_timer is mangled\n",
4934                                 map->name);
4935                 return -EINVAL;
4936         }
4937         if (map->timer_off != val + reg->off) {
4938                 verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
4939                         val + reg->off, map->timer_off);
4940                 return -EINVAL;
4941         }
4942         if (meta->map_ptr) {
4943                 verbose(env, "verifier bug. Two map pointers in a timer helper\n");
4944                 return -EFAULT;
4945         }
4946         meta->map_uid = reg->map_uid;
4947         meta->map_ptr = map;
4948         return 0;
4949 }
4950
4951 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
4952 {
4953         return type == ARG_PTR_TO_MEM ||
4954                type == ARG_PTR_TO_MEM_OR_NULL ||
4955                type == ARG_PTR_TO_UNINIT_MEM;
4956 }
4957
4958 static bool arg_type_is_mem_size(enum bpf_arg_type type)
4959 {
4960         return type == ARG_CONST_SIZE ||
4961                type == ARG_CONST_SIZE_OR_ZERO;
4962 }
4963
4964 static bool arg_type_is_alloc_size(enum bpf_arg_type type)
4965 {
4966         return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
4967 }
4968
4969 static bool arg_type_is_int_ptr(enum bpf_arg_type type)
4970 {
4971         return type == ARG_PTR_TO_INT ||
4972                type == ARG_PTR_TO_LONG;
4973 }
4974
4975 static int int_ptr_type_to_size(enum bpf_arg_type type)
4976 {
4977         if (type == ARG_PTR_TO_INT)
4978                 return sizeof(u32);
4979         else if (type == ARG_PTR_TO_LONG)
4980                 return sizeof(u64);
4981
4982         return -EINVAL;
4983 }
4984
4985 static int resolve_map_arg_type(struct bpf_verifier_env *env,
4986                                  const struct bpf_call_arg_meta *meta,
4987                                  enum bpf_arg_type *arg_type)
4988 {
4989         if (!meta->map_ptr) {
4990                 /* kernel subsystem misconfigured verifier */
4991                 verbose(env, "invalid map_ptr to access map->type\n");
4992                 return -EACCES;
4993         }
4994
4995         switch (meta->map_ptr->map_type) {
4996         case BPF_MAP_TYPE_SOCKMAP:
4997         case BPF_MAP_TYPE_SOCKHASH:
4998                 if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
4999                         *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
5000                 } else {
5001                         verbose(env, "invalid arg_type for sockmap/sockhash\n");
5002                         return -EINVAL;
5003                 }
5004                 break;
5005         case BPF_MAP_TYPE_BLOOM_FILTER:
5006                 if (meta->func_id == BPF_FUNC_map_peek_elem)
5007                         *arg_type = ARG_PTR_TO_MAP_VALUE;
5008                 break;
5009         default:
5010                 break;
5011         }
5012         return 0;
5013 }
5014
5015 struct bpf_reg_types {
5016         const enum bpf_reg_type types[10];
5017         u32 *btf_id;
5018 };
5019
5020 static const struct bpf_reg_types map_key_value_types = {
5021         .types = {
5022                 PTR_TO_STACK,
5023                 PTR_TO_PACKET,
5024                 PTR_TO_PACKET_META,
5025                 PTR_TO_MAP_KEY,
5026                 PTR_TO_MAP_VALUE,
5027         },
5028 };
5029
5030 static const struct bpf_reg_types sock_types = {
5031         .types = {
5032                 PTR_TO_SOCK_COMMON,
5033                 PTR_TO_SOCKET,
5034                 PTR_TO_TCP_SOCK,
5035                 PTR_TO_XDP_SOCK,
5036         },
5037 };
5038
5039 #ifdef CONFIG_NET
5040 static const struct bpf_reg_types btf_id_sock_common_types = {
5041         .types = {
5042                 PTR_TO_SOCK_COMMON,
5043                 PTR_TO_SOCKET,
5044                 PTR_TO_TCP_SOCK,
5045                 PTR_TO_XDP_SOCK,
5046                 PTR_TO_BTF_ID,
5047         },
5048         .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5049 };
5050 #endif
5051
5052 static const struct bpf_reg_types mem_types = {
5053         .types = {
5054                 PTR_TO_STACK,
5055                 PTR_TO_PACKET,
5056                 PTR_TO_PACKET_META,
5057                 PTR_TO_MAP_KEY,
5058                 PTR_TO_MAP_VALUE,
5059                 PTR_TO_MEM,
5060                 PTR_TO_RDONLY_BUF,
5061                 PTR_TO_RDWR_BUF,
5062         },
5063 };
5064
5065 static const struct bpf_reg_types int_ptr_types = {
5066         .types = {
5067                 PTR_TO_STACK,
5068                 PTR_TO_PACKET,
5069                 PTR_TO_PACKET_META,
5070                 PTR_TO_MAP_KEY,
5071                 PTR_TO_MAP_VALUE,
5072         },
5073 };
5074
5075 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
5076 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
5077 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
5078 static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
5079 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
5080 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
5081 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
5082 static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
5083 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
5084 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
5085 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
5086 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
5087
5088 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
5089         [ARG_PTR_TO_MAP_KEY]            = &map_key_value_types,
5090         [ARG_PTR_TO_MAP_VALUE]          = &map_key_value_types,
5091         [ARG_PTR_TO_UNINIT_MAP_VALUE]   = &map_key_value_types,
5092         [ARG_PTR_TO_MAP_VALUE_OR_NULL]  = &map_key_value_types,
5093         [ARG_CONST_SIZE]                = &scalar_types,
5094         [ARG_CONST_SIZE_OR_ZERO]        = &scalar_types,
5095         [ARG_CONST_ALLOC_SIZE_OR_ZERO]  = &scalar_types,
5096         [ARG_CONST_MAP_PTR]             = &const_map_ptr_types,
5097         [ARG_PTR_TO_CTX]                = &context_types,
5098         [ARG_PTR_TO_CTX_OR_NULL]        = &context_types,
5099         [ARG_PTR_TO_SOCK_COMMON]        = &sock_types,
5100 #ifdef CONFIG_NET
5101         [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
5102 #endif
5103         [ARG_PTR_TO_SOCKET]             = &fullsock_types,
5104         [ARG_PTR_TO_SOCKET_OR_NULL]     = &fullsock_types,
5105         [ARG_PTR_TO_BTF_ID]             = &btf_ptr_types,
5106         [ARG_PTR_TO_SPIN_LOCK]          = &spin_lock_types,
5107         [ARG_PTR_TO_MEM]                = &mem_types,
5108         [ARG_PTR_TO_MEM_OR_NULL]        = &mem_types,
5109         [ARG_PTR_TO_UNINIT_MEM]         = &mem_types,
5110         [ARG_PTR_TO_ALLOC_MEM]          = &alloc_mem_types,
5111         [ARG_PTR_TO_ALLOC_MEM_OR_NULL]  = &alloc_mem_types,
5112         [ARG_PTR_TO_INT]                = &int_ptr_types,
5113         [ARG_PTR_TO_LONG]               = &int_ptr_types,
5114         [ARG_PTR_TO_PERCPU_BTF_ID]      = &percpu_btf_ptr_types,
5115         [ARG_PTR_TO_FUNC]               = &func_ptr_types,
5116         [ARG_PTR_TO_STACK_OR_NULL]      = &stack_ptr_types,
5117         [ARG_PTR_TO_CONST_STR]          = &const_str_ptr_types,
5118         [ARG_PTR_TO_TIMER]              = &timer_types,
5119 };
5120
5121 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
5122                           enum bpf_arg_type arg_type,
5123                           const u32 *arg_btf_id)
5124 {
5125         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5126         enum bpf_reg_type expected, type = reg->type;
5127         const struct bpf_reg_types *compatible;
5128         int i, j;
5129
5130         compatible = compatible_reg_types[arg_type];
5131         if (!compatible) {
5132                 verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
5133                 return -EFAULT;
5134         }
5135
5136         for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
5137                 expected = compatible->types[i];
5138                 if (expected == NOT_INIT)
5139                         break;
5140
5141                 if (type == expected)
5142                         goto found;
5143         }
5144
5145         verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
5146         for (j = 0; j + 1 < i; j++)
5147                 verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
5148         verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
5149         return -EACCES;
5150
5151 found:
5152         if (type == PTR_TO_BTF_ID) {
5153                 if (!arg_btf_id) {
5154                         if (!compatible->btf_id) {
5155                                 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
5156                                 return -EFAULT;
5157                         }
5158                         arg_btf_id = compatible->btf_id;
5159                 }
5160
5161                 if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5162                                           btf_vmlinux, *arg_btf_id)) {
5163                         verbose(env, "R%d is of type %s but %s is expected\n",
5164                                 regno, kernel_type_name(reg->btf, reg->btf_id),
5165                                 kernel_type_name(btf_vmlinux, *arg_btf_id));
5166                         return -EACCES;
5167                 }
5168
5169                 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5170                         verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
5171                                 regno);
5172                         return -EACCES;
5173                 }
5174         }
5175
5176         return 0;
5177 }
5178
5179 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
5180                           struct bpf_call_arg_meta *meta,
5181                           const struct bpf_func_proto *fn)
5182 {
5183         u32 regno = BPF_REG_1 + arg;
5184         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
5185         enum bpf_arg_type arg_type = fn->arg_type[arg];
5186         enum bpf_reg_type type = reg->type;
5187         int err = 0;
5188
5189         if (arg_type == ARG_DONTCARE)
5190                 return 0;
5191
5192         err = check_reg_arg(env, regno, SRC_OP);
5193         if (err)
5194                 return err;
5195
5196         if (arg_type == ARG_ANYTHING) {
5197                 if (is_pointer_value(env, regno)) {
5198                         verbose(env, "R%d leaks addr into helper function\n",
5199                                 regno);
5200                         return -EACCES;
5201                 }
5202                 return 0;
5203         }
5204
5205         if (type_is_pkt_pointer(type) &&
5206             !may_access_direct_pkt_data(env, meta, BPF_READ)) {
5207                 verbose(env, "helper access to the packet is not allowed\n");
5208                 return -EACCES;
5209         }
5210
5211         if (arg_type == ARG_PTR_TO_MAP_VALUE ||
5212             arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
5213             arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
5214                 err = resolve_map_arg_type(env, meta, &arg_type);
5215                 if (err)
5216                         return err;
5217         }
5218
5219         if (register_is_null(reg) && arg_type_may_be_null(arg_type))
5220                 /* A NULL register has a SCALAR_VALUE type, so skip
5221                  * type checking.
5222                  */
5223                 goto skip_type_check;
5224
5225         err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
5226         if (err)
5227                 return err;
5228
5229         if (type == PTR_TO_CTX) {
5230                 err = check_ctx_reg(env, reg, regno);
5231                 if (err < 0)
5232                         return err;
5233         }
5234
5235 skip_type_check:
5236         if (reg->ref_obj_id) {
5237                 if (meta->ref_obj_id) {
5238                         verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
5239                                 regno, reg->ref_obj_id,
5240                                 meta->ref_obj_id);
5241                         return -EFAULT;
5242                 }
5243                 meta->ref_obj_id = reg->ref_obj_id;
5244         }
5245
5246         if (arg_type == ARG_CONST_MAP_PTR) {
5247                 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
5248                 if (meta->map_ptr) {
5249                         /* Use map_uid (which is unique id of inner map) to reject:
5250                          * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
5251                          * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
5252                          * if (inner_map1 && inner_map2) {
5253                          *     timer = bpf_map_lookup_elem(inner_map1);
5254                          *     if (timer)
5255                          *         // mismatch would have been allowed
5256                          *         bpf_timer_init(timer, inner_map2);
5257                          * }
5258                          *
5259                          * Comparing map_ptr is enough to distinguish normal and outer maps.
5260                          */
5261                         if (meta->map_ptr != reg->map_ptr ||
5262                             meta->map_uid != reg->map_uid) {
5263                                 verbose(env,
5264                                         "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
5265                                         meta->map_uid, reg->map_uid);
5266                                 return -EINVAL;
5267                         }
5268                 }
5269                 meta->map_ptr = reg->map_ptr;
5270                 meta->map_uid = reg->map_uid;
5271         } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
5272                 /* bpf_map_xxx(..., map_ptr, ..., key) call:
5273                  * check that [key, key + map->key_size) are within
5274                  * stack limits and initialized
5275                  */
5276                 if (!meta->map_ptr) {
5277                         /* in function declaration map_ptr must come before
5278                          * map_key, so that it's verified and known before
5279                          * we have to check map_key here. Otherwise it means
5280                          * that kernel subsystem misconfigured verifier
5281                          */
5282                         verbose(env, "invalid map_ptr to access map->key\n");
5283                         return -EACCES;
5284                 }
5285                 err = check_helper_mem_access(env, regno,
5286                                               meta->map_ptr->key_size, false,
5287                                               NULL);
5288         } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
5289                    (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
5290                     !register_is_null(reg)) ||
5291                    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
5292                 /* bpf_map_xxx(..., map_ptr, ..., value) call:
5293                  * check [value, value + map->value_size) validity
5294                  */
5295                 if (!meta->map_ptr) {
5296                         /* kernel subsystem misconfigured verifier */
5297                         verbose(env, "invalid map_ptr to access map->value\n");
5298                         return -EACCES;
5299                 }
5300                 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
5301                 err = check_helper_mem_access(env, regno,
5302                                               meta->map_ptr->value_size, false,
5303                                               meta);
5304         } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
5305                 if (!reg->btf_id) {
5306                         verbose(env, "Helper has invalid btf_id in R%d\n", regno);
5307                         return -EACCES;
5308                 }
5309                 meta->ret_btf = reg->btf;
5310                 meta->ret_btf_id = reg->btf_id;
5311         } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
5312                 if (meta->func_id == BPF_FUNC_spin_lock) {
5313                         if (process_spin_lock(env, regno, true))
5314                                 return -EACCES;
5315                 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
5316                         if (process_spin_lock(env, regno, false))
5317                                 return -EACCES;
5318                 } else {
5319                         verbose(env, "verifier internal error\n");
5320                         return -EFAULT;
5321                 }
5322         } else if (arg_type == ARG_PTR_TO_TIMER) {
5323                 if (process_timer_func(env, regno, meta))
5324                         return -EACCES;
5325         } else if (arg_type == ARG_PTR_TO_FUNC) {
5326                 meta->subprogno = reg->subprogno;
5327         } else if (arg_type_is_mem_ptr(arg_type)) {
5328                 /* The access to this pointer is only checked when we hit the
5329                  * next is_mem_size argument below.
5330                  */
5331                 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
5332         } else if (arg_type_is_mem_size(arg_type)) {
5333                 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
5334
5335                 /* This is used to refine r0 return value bounds for helpers
5336                  * that enforce this value as an upper bound on return values.
5337                  * See do_refine_retval_range() for helpers that can refine
5338                  * the return value. C type of helper is u32 so we pull register
5339                  * bound from umax_value however, if negative verifier errors
5340                  * out. Only upper bounds can be learned because retval is an
5341                  * int type and negative retvals are allowed.
5342                  */
5343                 meta->msize_max_value = reg->umax_value;
5344
5345                 /* The register is SCALAR_VALUE; the access check
5346                  * happens using its boundaries.
5347                  */
5348                 if (!tnum_is_const(reg->var_off))
5349                         /* For unprivileged variable accesses, disable raw
5350                          * mode so that the program is required to
5351                          * initialize all the memory that the helper could
5352                          * just partially fill up.
5353                          */
5354                         meta = NULL;
5355
5356                 if (reg->smin_value < 0) {
5357                         verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
5358                                 regno);
5359                         return -EACCES;
5360                 }
5361
5362                 if (reg->umin_value == 0) {
5363                         err = check_helper_mem_access(env, regno - 1, 0,
5364                                                       zero_size_allowed,
5365                                                       meta);
5366                         if (err)
5367                                 return err;
5368                 }
5369
5370                 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
5371                         verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
5372                                 regno);
5373                         return -EACCES;
5374                 }
5375                 err = check_helper_mem_access(env, regno - 1,
5376                                               reg->umax_value,
5377                                               zero_size_allowed, meta);
5378                 if (!err)
5379                         err = mark_chain_precision(env, regno);
5380         } else if (arg_type_is_alloc_size(arg_type)) {
5381                 if (!tnum_is_const(reg->var_off)) {
5382                         verbose(env, "R%d is not a known constant'\n",
5383                                 regno);
5384                         return -EACCES;
5385                 }
5386                 meta->mem_size = reg->var_off.value;
5387         } else if (arg_type_is_int_ptr(arg_type)) {
5388                 int size = int_ptr_type_to_size(arg_type);
5389
5390                 err = check_helper_mem_access(env, regno, size, false, meta);
5391                 if (err)
5392                         return err;
5393                 err = check_ptr_alignment(env, reg, 0, size, true);
5394         } else if (arg_type == ARG_PTR_TO_CONST_STR) {
5395                 struct bpf_map *map = reg->map_ptr;
5396                 int map_off;
5397                 u64 map_addr;
5398                 char *str_ptr;
5399
5400                 if (!bpf_map_is_rdonly(map)) {
5401                         verbose(env, "R%d does not point to a readonly map'\n", regno);
5402                         return -EACCES;
5403                 }
5404
5405                 if (!tnum_is_const(reg->var_off)) {
5406                         verbose(env, "R%d is not a constant address'\n", regno);
5407                         return -EACCES;
5408                 }
5409
5410                 if (!map->ops->map_direct_value_addr) {
5411                         verbose(env, "no direct value access support for this map type\n");
5412                         return -EACCES;
5413                 }
5414
5415                 err = check_map_access(env, regno, reg->off,
5416                                        map->value_size - reg->off, false);
5417                 if (err)
5418                         return err;
5419
5420                 map_off = reg->off + reg->var_off.value;
5421                 err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
5422                 if (err) {
5423                         verbose(env, "direct value access on string failed\n");
5424                         return err;
5425                 }
5426
5427                 str_ptr = (char *)(long)(map_addr);
5428                 if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
5429                         verbose(env, "string is not zero-terminated\n");
5430                         return -EINVAL;
5431                 }
5432         }
5433
5434         return err;
5435 }
5436
5437 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
5438 {
5439         enum bpf_attach_type eatype = env->prog->expected_attach_type;
5440         enum bpf_prog_type type = resolve_prog_type(env->prog);
5441
5442         if (func_id != BPF_FUNC_map_update_elem)
5443                 return false;
5444
5445         /* It's not possible to get access to a locked struct sock in these
5446          * contexts, so updating is safe.
5447          */
5448         switch (type) {
5449         case BPF_PROG_TYPE_TRACING:
5450                 if (eatype == BPF_TRACE_ITER)
5451                         return true;
5452                 break;
5453         case BPF_PROG_TYPE_SOCKET_FILTER:
5454         case BPF_PROG_TYPE_SCHED_CLS:
5455         case BPF_PROG_TYPE_SCHED_ACT:
5456         case BPF_PROG_TYPE_XDP:
5457         case BPF_PROG_TYPE_SK_REUSEPORT:
5458         case BPF_PROG_TYPE_FLOW_DISSECTOR:
5459         case BPF_PROG_TYPE_SK_LOOKUP:
5460                 return true;
5461         default:
5462                 break;
5463         }
5464
5465         verbose(env, "cannot update sockmap in this context\n");
5466         return false;
5467 }
5468
5469 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
5470 {
5471         return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
5472 }
5473
5474 static int check_map_func_compatibility(struct bpf_verifier_env *env,
5475                                         struct bpf_map *map, int func_id)
5476 {
5477         if (!map)
5478                 return 0;
5479
5480         /* We need a two way check, first is from map perspective ... */
5481         switch (map->map_type) {
5482         case BPF_MAP_TYPE_PROG_ARRAY:
5483                 if (func_id != BPF_FUNC_tail_call)
5484                         goto error;
5485                 break;
5486         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5487                 if (func_id != BPF_FUNC_perf_event_read &&
5488                     func_id != BPF_FUNC_perf_event_output &&
5489                     func_id != BPF_FUNC_skb_output &&
5490                     func_id != BPF_FUNC_perf_event_read_value &&
5491                     func_id != BPF_FUNC_xdp_output)
5492                         goto error;
5493                 break;
5494         case BPF_MAP_TYPE_RINGBUF:
5495                 if (func_id != BPF_FUNC_ringbuf_output &&
5496                     func_id != BPF_FUNC_ringbuf_reserve &&
5497                     func_id != BPF_FUNC_ringbuf_query)
5498                         goto error;
5499                 break;
5500         case BPF_MAP_TYPE_STACK_TRACE:
5501                 if (func_id != BPF_FUNC_get_stackid)
5502                         goto error;
5503                 break;
5504         case BPF_MAP_TYPE_CGROUP_ARRAY:
5505                 if (func_id != BPF_FUNC_skb_under_cgroup &&
5506                     func_id != BPF_FUNC_current_task_under_cgroup)
5507                         goto error;
5508                 break;
5509         case BPF_MAP_TYPE_CGROUP_STORAGE:
5510         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
5511                 if (func_id != BPF_FUNC_get_local_storage)
5512                         goto error;
5513                 break;
5514         case BPF_MAP_TYPE_DEVMAP:
5515         case BPF_MAP_TYPE_DEVMAP_HASH:
5516                 if (func_id != BPF_FUNC_redirect_map &&
5517                     func_id != BPF_FUNC_map_lookup_elem)
5518                         goto error;
5519                 break;
5520         /* Restrict bpf side of cpumap and xskmap, open when use-cases
5521          * appear.
5522          */
5523         case BPF_MAP_TYPE_CPUMAP:
5524                 if (func_id != BPF_FUNC_redirect_map)
5525                         goto error;
5526                 break;
5527         case BPF_MAP_TYPE_XSKMAP:
5528                 if (func_id != BPF_FUNC_redirect_map &&
5529                     func_id != BPF_FUNC_map_lookup_elem)
5530                         goto error;
5531                 break;
5532         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
5533         case BPF_MAP_TYPE_HASH_OF_MAPS:
5534                 if (func_id != BPF_FUNC_map_lookup_elem)
5535                         goto error;
5536                 break;
5537         case BPF_MAP_TYPE_SOCKMAP:
5538                 if (func_id != BPF_FUNC_sk_redirect_map &&
5539                     func_id != BPF_FUNC_sock_map_update &&
5540                     func_id != BPF_FUNC_map_delete_elem &&
5541                     func_id != BPF_FUNC_msg_redirect_map &&
5542                     func_id != BPF_FUNC_sk_select_reuseport &&
5543                     func_id != BPF_FUNC_map_lookup_elem &&
5544                     !may_update_sockmap(env, func_id))
5545                         goto error;
5546                 break;
5547         case BPF_MAP_TYPE_SOCKHASH:
5548                 if (func_id != BPF_FUNC_sk_redirect_hash &&
5549                     func_id != BPF_FUNC_sock_hash_update &&
5550                     func_id != BPF_FUNC_map_delete_elem &&
5551                     func_id != BPF_FUNC_msg_redirect_hash &&
5552                     func_id != BPF_FUNC_sk_select_reuseport &&
5553                     func_id != BPF_FUNC_map_lookup_elem &&
5554                     !may_update_sockmap(env, func_id))
5555                         goto error;
5556                 break;
5557         case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
5558                 if (func_id != BPF_FUNC_sk_select_reuseport)
5559                         goto error;
5560                 break;
5561         case BPF_MAP_TYPE_QUEUE:
5562         case BPF_MAP_TYPE_STACK:
5563                 if (func_id != BPF_FUNC_map_peek_elem &&
5564                     func_id != BPF_FUNC_map_pop_elem &&
5565                     func_id != BPF_FUNC_map_push_elem)
5566                         goto error;
5567                 break;
5568         case BPF_MAP_TYPE_SK_STORAGE:
5569                 if (func_id != BPF_FUNC_sk_storage_get &&
5570                     func_id != BPF_FUNC_sk_storage_delete)
5571                         goto error;
5572                 break;
5573         case BPF_MAP_TYPE_INODE_STORAGE:
5574                 if (func_id != BPF_FUNC_inode_storage_get &&
5575                     func_id != BPF_FUNC_inode_storage_delete)
5576                         goto error;
5577                 break;
5578         case BPF_MAP_TYPE_TASK_STORAGE:
5579                 if (func_id != BPF_FUNC_task_storage_get &&
5580                     func_id != BPF_FUNC_task_storage_delete)
5581                         goto error;
5582                 break;
5583         case BPF_MAP_TYPE_BLOOM_FILTER:
5584                 if (func_id != BPF_FUNC_map_peek_elem &&
5585                     func_id != BPF_FUNC_map_push_elem)
5586                         goto error;
5587                 break;
5588         default:
5589                 break;
5590         }
5591
5592         /* ... and second from the function itself. */
5593         switch (func_id) {
5594         case BPF_FUNC_tail_call:
5595                 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
5596                         goto error;
5597                 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
5598                         verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
5599                         return -EINVAL;
5600                 }
5601                 break;
5602         case BPF_FUNC_perf_event_read:
5603         case BPF_FUNC_perf_event_output:
5604         case BPF_FUNC_perf_event_read_value:
5605         case BPF_FUNC_skb_output:
5606         case BPF_FUNC_xdp_output:
5607                 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
5608                         goto error;
5609                 break;
5610         case BPF_FUNC_ringbuf_output:
5611         case BPF_FUNC_ringbuf_reserve:
5612         case BPF_FUNC_ringbuf_query:
5613                 if (map->map_type != BPF_MAP_TYPE_RINGBUF)
5614                         goto error;
5615                 break;
5616         case BPF_FUNC_get_stackid:
5617                 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
5618                         goto error;
5619                 break;
5620         case BPF_FUNC_current_task_under_cgroup:
5621         case BPF_FUNC_skb_under_cgroup:
5622                 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
5623                         goto error;
5624                 break;
5625         case BPF_FUNC_redirect_map:
5626                 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
5627                     map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
5628                     map->map_type != BPF_MAP_TYPE_CPUMAP &&
5629                     map->map_type != BPF_MAP_TYPE_XSKMAP)
5630                         goto error;
5631                 break;
5632         case BPF_FUNC_sk_redirect_map:
5633         case BPF_FUNC_msg_redirect_map:
5634         case BPF_FUNC_sock_map_update:
5635                 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
5636                         goto error;
5637                 break;
5638         case BPF_FUNC_sk_redirect_hash:
5639         case BPF_FUNC_msg_redirect_hash:
5640         case BPF_FUNC_sock_hash_update:
5641                 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
5642                         goto error;
5643                 break;
5644         case BPF_FUNC_get_local_storage:
5645                 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
5646                     map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
5647                         goto error;
5648                 break;
5649         case BPF_FUNC_sk_select_reuseport:
5650                 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
5651                     map->map_type != BPF_MAP_TYPE_SOCKMAP &&
5652                     map->map_type != BPF_MAP_TYPE_SOCKHASH)
5653                         goto error;
5654                 break;
5655         case BPF_FUNC_map_pop_elem:
5656                 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
5657                     map->map_type != BPF_MAP_TYPE_STACK)
5658                         goto error;
5659                 break;
5660         case BPF_FUNC_map_peek_elem:
5661         case BPF_FUNC_map_push_elem:
5662                 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
5663                     map->map_type != BPF_MAP_TYPE_STACK &&
5664                     map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
5665                         goto error;
5666                 break;
5667         case BPF_FUNC_sk_storage_get:
5668         case BPF_FUNC_sk_storage_delete:
5669                 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
5670                         goto error;
5671                 break;
5672         case BPF_FUNC_inode_storage_get:
5673         case BPF_FUNC_inode_storage_delete:
5674                 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
5675                         goto error;
5676                 break;
5677         case BPF_FUNC_task_storage_get:
5678         case BPF_FUNC_task_storage_delete:
5679                 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
5680                         goto error;
5681                 break;
5682         default:
5683                 break;
5684         }
5685
5686         return 0;
5687 error:
5688         verbose(env, "cannot pass map_type %d into func %s#%d\n",
5689                 map->map_type, func_id_name(func_id), func_id);
5690         return -EINVAL;
5691 }
5692
5693 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
5694 {
5695         int count = 0;
5696
5697         if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
5698                 count++;
5699         if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
5700                 count++;
5701         if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
5702                 count++;
5703         if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
5704                 count++;
5705         if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
5706                 count++;
5707
5708         /* We only support one arg being in raw mode at the moment,
5709          * which is sufficient for the helper functions we have
5710          * right now.
5711          */
5712         return count <= 1;
5713 }
5714
5715 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
5716                                     enum bpf_arg_type arg_next)
5717 {
5718         return (arg_type_is_mem_ptr(arg_curr) &&
5719                 !arg_type_is_mem_size(arg_next)) ||
5720                (!arg_type_is_mem_ptr(arg_curr) &&
5721                 arg_type_is_mem_size(arg_next));
5722 }
5723
5724 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
5725 {
5726         /* bpf_xxx(..., buf, len) call will access 'len'
5727          * bytes from memory 'buf'. Both arg types need
5728          * to be paired, so make sure there's no buggy
5729          * helper function specification.
5730          */
5731         if (arg_type_is_mem_size(fn->arg1_type) ||
5732             arg_type_is_mem_ptr(fn->arg5_type)  ||
5733             check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
5734             check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
5735             check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
5736             check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
5737                 return false;
5738
5739         return true;
5740 }
5741
5742 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
5743 {
5744         int count = 0;
5745
5746         if (arg_type_may_be_refcounted(fn->arg1_type))
5747                 count++;
5748         if (arg_type_may_be_refcounted(fn->arg2_type))
5749                 count++;
5750         if (arg_type_may_be_refcounted(fn->arg3_type))
5751                 count++;
5752         if (arg_type_may_be_refcounted(fn->arg4_type))
5753                 count++;
5754         if (arg_type_may_be_refcounted(fn->arg5_type))
5755                 count++;
5756
5757         /* A reference acquiring function cannot acquire
5758          * another refcounted ptr.
5759          */
5760         if (may_be_acquire_function(func_id) && count)
5761                 return false;
5762
5763         /* We only support one arg being unreferenced at the moment,
5764          * which is sufficient for the helper functions we have right now.
5765          */
5766         return count <= 1;
5767 }
5768
5769 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
5770 {
5771         int i;
5772
5773         for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
5774                 if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
5775                         return false;
5776
5777                 if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
5778                         return false;
5779         }
5780
5781         return true;
5782 }
5783
5784 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
5785 {
5786         return check_raw_mode_ok(fn) &&
5787                check_arg_pair_ok(fn) &&
5788                check_btf_id_ok(fn) &&
5789                check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
5790 }
5791
5792 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
5793  * are now invalid, so turn them into unknown SCALAR_VALUE.
5794  */
5795 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
5796                                      struct bpf_func_state *state)
5797 {
5798         struct bpf_reg_state *regs = state->regs, *reg;
5799         int i;
5800
5801         for (i = 0; i < MAX_BPF_REG; i++)
5802                 if (reg_is_pkt_pointer_any(&regs[i]))
5803                         mark_reg_unknown(env, regs, i);
5804
5805         bpf_for_each_spilled_reg(i, state, reg) {
5806                 if (!reg)
5807                         continue;
5808                 if (reg_is_pkt_pointer_any(reg))
5809                         __mark_reg_unknown(env, reg);
5810         }
5811 }
5812
5813 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
5814 {
5815         struct bpf_verifier_state *vstate = env->cur_state;
5816         int i;
5817
5818         for (i = 0; i <= vstate->curframe; i++)
5819                 __clear_all_pkt_pointers(env, vstate->frame[i]);
5820 }
5821
5822 enum {
5823         AT_PKT_END = -1,
5824         BEYOND_PKT_END = -2,
5825 };
5826
5827 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
5828 {
5829         struct bpf_func_state *state = vstate->frame[vstate->curframe];
5830         struct bpf_reg_state *reg = &state->regs[regn];
5831
5832         if (reg->type != PTR_TO_PACKET)
5833                 /* PTR_TO_PACKET_META is not supported yet */
5834                 return;
5835
5836         /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
5837          * How far beyond pkt_end it goes is unknown.
5838          * if (!range_open) it's the case of pkt >= pkt_end
5839          * if (range_open) it's the case of pkt > pkt_end
5840          * hence this pointer is at least 1 byte bigger than pkt_end
5841          */
5842         if (range_open)
5843                 reg->range = BEYOND_PKT_END;
5844         else
5845                 reg->range = AT_PKT_END;
5846 }
5847
5848 static void release_reg_references(struct bpf_verifier_env *env,
5849                                    struct bpf_func_state *state,
5850                                    int ref_obj_id)
5851 {
5852         struct bpf_reg_state *regs = state->regs, *reg;
5853         int i;
5854
5855         for (i = 0; i < MAX_BPF_REG; i++)
5856                 if (regs[i].ref_obj_id == ref_obj_id)
5857                         mark_reg_unknown(env, regs, i);
5858
5859         bpf_for_each_spilled_reg(i, state, reg) {
5860                 if (!reg)
5861                         continue;
5862                 if (reg->ref_obj_id == ref_obj_id)
5863                         __mark_reg_unknown(env, reg);
5864         }
5865 }
5866
5867 /* The pointer with the specified id has released its reference to kernel
5868  * resources. Identify all copies of the same pointer and clear the reference.
5869  */
5870 static int release_reference(struct bpf_verifier_env *env,
5871                              int ref_obj_id)
5872 {
5873         struct bpf_verifier_state *vstate = env->cur_state;
5874         int err;
5875         int i;
5876
5877         err = release_reference_state(cur_func(env), ref_obj_id);
5878         if (err)
5879                 return err;
5880
5881         for (i = 0; i <= vstate->curframe; i++)
5882                 release_reg_references(env, vstate->frame[i], ref_obj_id);
5883
5884         return 0;
5885 }
5886
5887 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
5888                                     struct bpf_reg_state *regs)
5889 {
5890         int i;
5891
5892         /* after the call registers r0 - r5 were scratched */
5893         for (i = 0; i < CALLER_SAVED_REGS; i++) {
5894                 mark_reg_not_init(env, regs, caller_saved[i]);
5895                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5896         }
5897 }
5898
5899 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
5900                                    struct bpf_func_state *caller,
5901                                    struct bpf_func_state *callee,
5902                                    int insn_idx);
5903
5904 static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
5905                              int *insn_idx, int subprog,
5906                              set_callee_state_fn set_callee_state_cb)
5907 {
5908         struct bpf_verifier_state *state = env->cur_state;
5909         struct bpf_func_info_aux *func_info_aux;
5910         struct bpf_func_state *caller, *callee;
5911         int err;
5912         bool is_global = false;
5913
5914         if (state->curframe + 1 >= MAX_CALL_FRAMES) {
5915                 verbose(env, "the call stack of %d frames is too deep\n",
5916                         state->curframe + 2);
5917                 return -E2BIG;
5918         }
5919
5920         caller = state->frame[state->curframe];
5921         if (state->frame[state->curframe + 1]) {
5922                 verbose(env, "verifier bug. Frame %d already allocated\n",
5923                         state->curframe + 1);
5924                 return -EFAULT;
5925         }
5926
5927         func_info_aux = env->prog->aux->func_info_aux;
5928         if (func_info_aux)
5929                 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
5930         err = btf_check_subprog_arg_match(env, subprog, caller->regs);
5931         if (err == -EFAULT)
5932                 return err;
5933         if (is_global) {
5934                 if (err) {
5935                         verbose(env, "Caller passes invalid args into func#%d\n",
5936                                 subprog);
5937                         return err;
5938                 } else {
5939                         if (env->log.level & BPF_LOG_LEVEL)
5940                                 verbose(env,
5941                                         "Func#%d is global and valid. Skipping.\n",
5942                                         subprog);
5943                         clear_caller_saved_regs(env, caller->regs);
5944
5945                         /* All global functions return a 64-bit SCALAR_VALUE */
5946                         mark_reg_unknown(env, caller->regs, BPF_REG_0);
5947                         caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5948
5949                         /* continue with next insn after call */
5950                         return 0;
5951                 }
5952         }
5953
5954         if (insn->code == (BPF_JMP | BPF_CALL) &&
5955             insn->imm == BPF_FUNC_timer_set_callback) {
5956                 struct bpf_verifier_state *async_cb;
5957
5958                 /* there is no real recursion here. timer callbacks are async */
5959                 env->subprog_info[subprog].is_async_cb = true;
5960                 async_cb = push_async_cb(env, env->subprog_info[subprog].start,
5961                                          *insn_idx, subprog);
5962                 if (!async_cb)
5963                         return -EFAULT;
5964                 callee = async_cb->frame[0];
5965                 callee->async_entry_cnt = caller->async_entry_cnt + 1;
5966
5967                 /* Convert bpf_timer_set_callback() args into timer callback args */
5968                 err = set_callee_state_cb(env, caller, callee, *insn_idx);
5969                 if (err)
5970                         return err;
5971
5972                 clear_caller_saved_regs(env, caller->regs);
5973                 mark_reg_unknown(env, caller->regs, BPF_REG_0);
5974                 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5975                 /* continue with next insn after call */
5976                 return 0;
5977         }
5978
5979         callee = kzalloc(sizeof(*callee), GFP_KERNEL);
5980         if (!callee)
5981                 return -ENOMEM;
5982         state->frame[state->curframe + 1] = callee;
5983
5984         /* callee cannot access r0, r6 - r9 for reading and has to write
5985          * into its own stack before reading from it.
5986          * callee can read/write into caller's stack
5987          */
5988         init_func_state(env, callee,
5989                         /* remember the callsite, it will be used by bpf_exit */
5990                         *insn_idx /* callsite */,
5991                         state->curframe + 1 /* frameno within this callchain */,
5992                         subprog /* subprog number within this prog */);
5993
5994         /* Transfer references to the callee */
5995         err = copy_reference_state(callee, caller);
5996         if (err)
5997                 return err;
5998
5999         err = set_callee_state_cb(env, caller, callee, *insn_idx);
6000         if (err)
6001                 return err;
6002
6003         clear_caller_saved_regs(env, caller->regs);
6004
6005         /* only increment it after check_reg_arg() finished */
6006         state->curframe++;
6007
6008         /* and go analyze first insn of the callee */
6009         *insn_idx = env->subprog_info[subprog].start - 1;
6010
6011         if (env->log.level & BPF_LOG_LEVEL) {
6012                 verbose(env, "caller:\n");
6013                 print_verifier_state(env, caller);
6014                 verbose(env, "callee:\n");
6015                 print_verifier_state(env, callee);
6016         }
6017         return 0;
6018 }
6019
6020 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
6021                                    struct bpf_func_state *caller,
6022                                    struct bpf_func_state *callee)
6023 {
6024         /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
6025          *      void *callback_ctx, u64 flags);
6026          * callback_fn(struct bpf_map *map, void *key, void *value,
6027          *      void *callback_ctx);
6028          */
6029         callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
6030
6031         callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6032         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6033         callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6034
6035         callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6036         __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6037         callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
6038
6039         /* pointer to stack or null */
6040         callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
6041
6042         /* unused */
6043         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6044         return 0;
6045 }
6046
6047 static int set_callee_state(struct bpf_verifier_env *env,
6048                             struct bpf_func_state *caller,
6049                             struct bpf_func_state *callee, int insn_idx)
6050 {
6051         int i;
6052
6053         /* copy r1 - r5 args that callee can access.  The copy includes parent
6054          * pointers, which connects us up to the liveness chain
6055          */
6056         for (i = BPF_REG_1; i <= BPF_REG_5; i++)
6057                 callee->regs[i] = caller->regs[i];
6058         return 0;
6059 }
6060
6061 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6062                            int *insn_idx)
6063 {
6064         int subprog, target_insn;
6065
6066         target_insn = *insn_idx + insn->imm + 1;
6067         subprog = find_subprog(env, target_insn);
6068         if (subprog < 0) {
6069                 verbose(env, "verifier bug. No program starts at insn %d\n",
6070                         target_insn);
6071                 return -EFAULT;
6072         }
6073
6074         return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
6075 }
6076
6077 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
6078                                        struct bpf_func_state *caller,
6079                                        struct bpf_func_state *callee,
6080                                        int insn_idx)
6081 {
6082         struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
6083         struct bpf_map *map;
6084         int err;
6085
6086         if (bpf_map_ptr_poisoned(insn_aux)) {
6087                 verbose(env, "tail_call abusing map_ptr\n");
6088                 return -EINVAL;
6089         }
6090
6091         map = BPF_MAP_PTR(insn_aux->map_ptr_state);
6092         if (!map->ops->map_set_for_each_callback_args ||
6093             !map->ops->map_for_each_callback) {
6094                 verbose(env, "callback function not allowed for map\n");
6095                 return -ENOTSUPP;
6096         }
6097
6098         err = map->ops->map_set_for_each_callback_args(env, caller, callee);
6099         if (err)
6100                 return err;
6101
6102         callee->in_callback_fn = true;
6103         return 0;
6104 }
6105
6106 static int set_timer_callback_state(struct bpf_verifier_env *env,
6107                                     struct bpf_func_state *caller,
6108                                     struct bpf_func_state *callee,
6109                                     int insn_idx)
6110 {
6111         struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
6112
6113         /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
6114          * callback_fn(struct bpf_map *map, void *key, void *value);
6115          */
6116         callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
6117         __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
6118         callee->regs[BPF_REG_1].map_ptr = map_ptr;
6119
6120         callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
6121         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
6122         callee->regs[BPF_REG_2].map_ptr = map_ptr;
6123
6124         callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
6125         __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
6126         callee->regs[BPF_REG_3].map_ptr = map_ptr;
6127
6128         /* unused */
6129         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
6130         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
6131         callee->in_async_callback_fn = true;
6132         return 0;
6133 }
6134
6135 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
6136 {
6137         struct bpf_verifier_state *state = env->cur_state;
6138         struct bpf_func_state *caller, *callee;
6139         struct bpf_reg_state *r0;
6140         int err;
6141
6142         callee = state->frame[state->curframe];
6143         r0 = &callee->regs[BPF_REG_0];
6144         if (r0->type == PTR_TO_STACK) {
6145                 /* technically it's ok to return caller's stack pointer
6146                  * (or caller's caller's pointer) back to the caller,
6147                  * since these pointers are valid. Only current stack
6148                  * pointer will be invalid as soon as function exits,
6149                  * but let's be conservative
6150                  */
6151                 verbose(env, "cannot return stack pointer to the caller\n");
6152                 return -EINVAL;
6153         }
6154
6155         state->curframe--;
6156         caller = state->frame[state->curframe];
6157         if (callee->in_callback_fn) {
6158                 /* enforce R0 return value range [0, 1]. */
6159                 struct tnum range = tnum_range(0, 1);
6160
6161                 if (r0->type != SCALAR_VALUE) {
6162                         verbose(env, "R0 not a scalar value\n");
6163                         return -EACCES;
6164                 }
6165                 if (!tnum_in(range, r0->var_off)) {
6166                         verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
6167                         return -EINVAL;
6168                 }
6169         } else {
6170                 /* return to the caller whatever r0 had in the callee */
6171                 caller->regs[BPF_REG_0] = *r0;
6172         }
6173
6174         /* Transfer references to the caller */
6175         err = copy_reference_state(caller, callee);
6176         if (err)
6177                 return err;
6178
6179         *insn_idx = callee->callsite + 1;
6180         if (env->log.level & BPF_LOG_LEVEL) {
6181                 verbose(env, "returning from callee:\n");
6182                 print_verifier_state(env, callee);
6183                 verbose(env, "to caller at %d:\n", *insn_idx);
6184                 print_verifier_state(env, caller);
6185         }
6186         /* clear everything in the callee */
6187         free_func_state(callee);
6188         state->frame[state->curframe + 1] = NULL;
6189         return 0;
6190 }
6191
6192 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
6193                                    int func_id,
6194                                    struct bpf_call_arg_meta *meta)
6195 {
6196         struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
6197
6198         if (ret_type != RET_INTEGER ||
6199             (func_id != BPF_FUNC_get_stack &&
6200              func_id != BPF_FUNC_get_task_stack &&
6201              func_id != BPF_FUNC_probe_read_str &&
6202              func_id != BPF_FUNC_probe_read_kernel_str &&
6203              func_id != BPF_FUNC_probe_read_user_str))
6204                 return;
6205
6206         ret_reg->smax_value = meta->msize_max_value;
6207         ret_reg->s32_max_value = meta->msize_max_value;
6208         ret_reg->smin_value = -MAX_ERRNO;
6209         ret_reg->s32_min_value = -MAX_ERRNO;
6210         __reg_deduce_bounds(ret_reg);
6211         __reg_bound_offset(ret_reg);
6212         __update_reg_bounds(ret_reg);
6213 }
6214
6215 static int
6216 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
6217                 int func_id, int insn_idx)
6218 {
6219         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
6220         struct bpf_map *map = meta->map_ptr;
6221
6222         if (func_id != BPF_FUNC_tail_call &&
6223             func_id != BPF_FUNC_map_lookup_elem &&
6224             func_id != BPF_FUNC_map_update_elem &&
6225             func_id != BPF_FUNC_map_delete_elem &&
6226             func_id != BPF_FUNC_map_push_elem &&
6227             func_id != BPF_FUNC_map_pop_elem &&
6228             func_id != BPF_FUNC_map_peek_elem &&
6229             func_id != BPF_FUNC_for_each_map_elem &&
6230             func_id != BPF_FUNC_redirect_map)
6231                 return 0;
6232
6233         if (map == NULL) {
6234                 verbose(env, "kernel subsystem misconfigured verifier\n");
6235                 return -EINVAL;
6236         }
6237
6238         /* In case of read-only, some additional restrictions
6239          * need to be applied in order to prevent altering the
6240          * state of the map from program side.
6241          */
6242         if ((map->map_flags & BPF_F_RDONLY_PROG) &&
6243             (func_id == BPF_FUNC_map_delete_elem ||
6244              func_id == BPF_FUNC_map_update_elem ||
6245              func_id == BPF_FUNC_map_push_elem ||
6246              func_id == BPF_FUNC_map_pop_elem)) {
6247                 verbose(env, "write into map forbidden\n");
6248                 return -EACCES;
6249         }
6250
6251         if (!BPF_MAP_PTR(aux->map_ptr_state))
6252                 bpf_map_ptr_store(aux, meta->map_ptr,
6253                                   !meta->map_ptr->bypass_spec_v1);
6254         else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
6255                 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
6256                                   !meta->map_ptr->bypass_spec_v1);
6257         return 0;
6258 }
6259
6260 static int
6261 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
6262                 int func_id, int insn_idx)
6263 {
6264         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
6265         struct bpf_reg_state *regs = cur_regs(env), *reg;
6266         struct bpf_map *map = meta->map_ptr;
6267         struct tnum range;
6268         u64 val;
6269         int err;
6270
6271         if (func_id != BPF_FUNC_tail_call)
6272                 return 0;
6273         if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
6274                 verbose(env, "kernel subsystem misconfigured verifier\n");
6275                 return -EINVAL;
6276         }
6277
6278         range = tnum_range(0, map->max_entries - 1);
6279         reg = &regs[BPF_REG_3];
6280
6281         if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
6282                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
6283                 return 0;
6284         }
6285
6286         err = mark_chain_precision(env, BPF_REG_3);
6287         if (err)
6288                 return err;
6289
6290         val = reg->var_off.value;
6291         if (bpf_map_key_unseen(aux))
6292                 bpf_map_key_store(aux, val);
6293         else if (!bpf_map_key_poisoned(aux) &&
6294                   bpf_map_key_immediate(aux) != val)
6295                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
6296         return 0;
6297 }
6298
6299 static int check_reference_leak(struct bpf_verifier_env *env)
6300 {
6301         struct bpf_func_state *state = cur_func(env);
6302         int i;
6303
6304         for (i = 0; i < state->acquired_refs; i++) {
6305                 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
6306                         state->refs[i].id, state->refs[i].insn_idx);
6307         }
6308         return state->acquired_refs ? -EINVAL : 0;
6309 }
6310
6311 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
6312                                    struct bpf_reg_state *regs)
6313 {
6314         struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
6315         struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
6316         struct bpf_map *fmt_map = fmt_reg->map_ptr;
6317         int err, fmt_map_off, num_args;
6318         u64 fmt_addr;
6319         char *fmt;
6320
6321         /* data must be an array of u64 */
6322         if (data_len_reg->var_off.value % 8)
6323                 return -EINVAL;
6324         num_args = data_len_reg->var_off.value / 8;
6325
6326         /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
6327          * and map_direct_value_addr is set.
6328          */
6329         fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
6330         err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
6331                                                   fmt_map_off);
6332         if (err) {
6333                 verbose(env, "verifier bug\n");
6334                 return -EFAULT;
6335         }
6336         fmt = (char *)(long)fmt_addr + fmt_map_off;
6337
6338         /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
6339          * can focus on validating the format specifiers.
6340          */
6341         err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
6342         if (err < 0)
6343                 verbose(env, "Invalid format string\n");
6344
6345         return err;
6346 }
6347
6348 static int check_get_func_ip(struct bpf_verifier_env *env)
6349 {
6350         enum bpf_attach_type eatype = env->prog->expected_attach_type;
6351         enum bpf_prog_type type = resolve_prog_type(env->prog);
6352         int func_id = BPF_FUNC_get_func_ip;
6353
6354         if (type == BPF_PROG_TYPE_TRACING) {
6355                 if (eatype != BPF_TRACE_FENTRY && eatype != BPF_TRACE_FEXIT &&
6356                     eatype != BPF_MODIFY_RETURN) {
6357                         verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
6358                                 func_id_name(func_id), func_id);
6359                         return -ENOTSUPP;
6360                 }
6361                 return 0;
6362         } else if (type == BPF_PROG_TYPE_KPROBE) {
6363                 return 0;
6364         }
6365
6366         verbose(env, "func %s#%d not supported for program type %d\n",
6367                 func_id_name(func_id), func_id, type);
6368         return -ENOTSUPP;
6369 }
6370
6371 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
6372                              int *insn_idx_p)
6373 {
6374         const struct bpf_func_proto *fn = NULL;
6375         struct bpf_reg_state *regs;
6376         struct bpf_call_arg_meta meta;
6377         int insn_idx = *insn_idx_p;
6378         bool changes_data;
6379         int i, err, func_id;
6380
6381         /* find function prototype */
6382         func_id = insn->imm;
6383         if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
6384                 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
6385                         func_id);
6386                 return -EINVAL;
6387         }
6388
6389         if (env->ops->get_func_proto)
6390                 fn = env->ops->get_func_proto(func_id, env->prog);
6391         if (!fn) {
6392                 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
6393                         func_id);
6394                 return -EINVAL;
6395         }
6396
6397         /* eBPF programs must be GPL compatible to use GPL-ed functions */
6398         if (!env->prog->gpl_compatible && fn->gpl_only) {
6399                 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
6400                 return -EINVAL;
6401         }
6402
6403         if (fn->allowed && !fn->allowed(env->prog)) {
6404                 verbose(env, "helper call is not allowed in probe\n");
6405                 return -EINVAL;
6406         }
6407
6408         /* With LD_ABS/IND some JITs save/restore skb from r1. */
6409         changes_data = bpf_helper_changes_pkt_data(fn->func);
6410         if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
6411                 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
6412                         func_id_name(func_id), func_id);
6413                 return -EINVAL;
6414         }
6415
6416         memset(&meta, 0, sizeof(meta));
6417         meta.pkt_access = fn->pkt_access;
6418
6419         err = check_func_proto(fn, func_id);
6420         if (err) {
6421                 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
6422                         func_id_name(func_id), func_id);
6423                 return err;
6424         }
6425
6426         meta.func_id = func_id;
6427         /* check args */
6428         for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
6429                 err = check_func_arg(env, i, &meta, fn);
6430                 if (err)
6431                         return err;
6432         }
6433
6434         err = record_func_map(env, &meta, func_id, insn_idx);
6435         if (err)
6436                 return err;
6437
6438         err = record_func_key(env, &meta, func_id, insn_idx);
6439         if (err)
6440                 return err;
6441
6442         /* Mark slots with STACK_MISC in case of raw mode, stack offset
6443          * is inferred from register state.
6444          */
6445         for (i = 0; i < meta.access_size; i++) {
6446                 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
6447                                        BPF_WRITE, -1, false);
6448                 if (err)
6449                         return err;
6450         }
6451
6452         if (func_id == BPF_FUNC_tail_call) {
6453                 err = check_reference_leak(env);
6454                 if (err) {
6455                         verbose(env, "tail_call would lead to reference leak\n");
6456                         return err;
6457                 }
6458         } else if (is_release_function(func_id)) {
6459                 err = release_reference(env, meta.ref_obj_id);
6460                 if (err) {
6461                         verbose(env, "func %s#%d reference has not been acquired before\n",
6462                                 func_id_name(func_id), func_id);
6463                         return err;
6464                 }
6465         }
6466
6467         regs = cur_regs(env);
6468
6469         /* check that flags argument in get_local_storage(map, flags) is 0,
6470          * this is required because get_local_storage() can't return an error.
6471          */
6472         if (func_id == BPF_FUNC_get_local_storage &&
6473             !register_is_null(&regs[BPF_REG_2])) {
6474                 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
6475                 return -EINVAL;
6476         }
6477
6478         if (func_id == BPF_FUNC_for_each_map_elem) {
6479                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6480                                         set_map_elem_callback_state);
6481                 if (err < 0)
6482                         return -EINVAL;
6483         }
6484
6485         if (func_id == BPF_FUNC_timer_set_callback) {
6486                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
6487                                         set_timer_callback_state);
6488                 if (err < 0)
6489                         return -EINVAL;
6490         }
6491
6492         if (func_id == BPF_FUNC_snprintf) {
6493                 err = check_bpf_snprintf_call(env, regs);
6494                 if (err < 0)
6495                         return err;
6496         }
6497
6498         /* reset caller saved regs */
6499         for (i = 0; i < CALLER_SAVED_REGS; i++) {
6500                 mark_reg_not_init(env, regs, caller_saved[i]);
6501                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
6502         }
6503
6504         /* helper call returns 64-bit value. */
6505         regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
6506
6507         /* update return register (already marked as written above) */
6508         if (fn->ret_type == RET_INTEGER) {
6509                 /* sets type to SCALAR_VALUE */
6510                 mark_reg_unknown(env, regs, BPF_REG_0);
6511         } else if (fn->ret_type == RET_VOID) {
6512                 regs[BPF_REG_0].type = NOT_INIT;
6513         } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
6514                    fn->ret_type == RET_PTR_TO_MAP_VALUE) {
6515                 /* There is no offset yet applied, variable or fixed */
6516                 mark_reg_known_zero(env, regs, BPF_REG_0);
6517                 /* remember map_ptr, so that check_map_access()
6518                  * can check 'value_size' boundary of memory access
6519                  * to map element returned from bpf_map_lookup_elem()
6520                  */
6521                 if (meta.map_ptr == NULL) {
6522                         verbose(env,
6523                                 "kernel subsystem misconfigured verifier\n");
6524                         return -EINVAL;
6525                 }
6526                 regs[BPF_REG_0].map_ptr = meta.map_ptr;
6527                 regs[BPF_REG_0].map_uid = meta.map_uid;
6528                 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
6529                         regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
6530                         if (map_value_has_spin_lock(meta.map_ptr))
6531                                 regs[BPF_REG_0].id = ++env->id_gen;
6532                 } else {
6533                         regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
6534                 }
6535         } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
6536                 mark_reg_known_zero(env, regs, BPF_REG_0);
6537                 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
6538         } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
6539                 mark_reg_known_zero(env, regs, BPF_REG_0);
6540                 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
6541         } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
6542                 mark_reg_known_zero(env, regs, BPF_REG_0);
6543                 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
6544         } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
6545                 mark_reg_known_zero(env, regs, BPF_REG_0);
6546                 regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
6547                 regs[BPF_REG_0].mem_size = meta.mem_size;
6548         } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
6549                    fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
6550                 const struct btf_type *t;
6551
6552                 mark_reg_known_zero(env, regs, BPF_REG_0);
6553                 t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
6554                 if (!btf_type_is_struct(t)) {
6555                         u32 tsize;
6556                         const struct btf_type *ret;
6557                         const char *tname;
6558
6559                         /* resolve the type size of ksym. */
6560                         ret = btf_resolve_size(meta.ret_btf, t, &tsize);
6561                         if (IS_ERR(ret)) {
6562                                 tname = btf_name_by_offset(meta.ret_btf, t->name_off);
6563                                 verbose(env, "unable to resolve the size of type '%s': %ld\n",
6564                                         tname, PTR_ERR(ret));
6565                                 return -EINVAL;
6566                         }
6567                         regs[BPF_REG_0].type =
6568                                 fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
6569                                 PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
6570                         regs[BPF_REG_0].mem_size = tsize;
6571                 } else {
6572                         regs[BPF_REG_0].type =
6573                                 fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
6574                                 PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
6575                         regs[BPF_REG_0].btf = meta.ret_btf;
6576                         regs[BPF_REG_0].btf_id = meta.ret_btf_id;
6577                 }
6578         } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL ||
6579                    fn->ret_type == RET_PTR_TO_BTF_ID) {
6580                 int ret_btf_id;
6581
6582                 mark_reg_known_zero(env, regs, BPF_REG_0);
6583                 regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ?
6584                                                      PTR_TO_BTF_ID :
6585                                                      PTR_TO_BTF_ID_OR_NULL;
6586                 ret_btf_id = *fn->ret_btf_id;
6587                 if (ret_btf_id == 0) {
6588                         verbose(env, "invalid return type %d of func %s#%d\n",
6589                                 fn->ret_type, func_id_name(func_id), func_id);
6590                         return -EINVAL;
6591                 }
6592                 /* current BPF helper definitions are only coming from
6593                  * built-in code with type IDs from  vmlinux BTF
6594                  */
6595                 regs[BPF_REG_0].btf = btf_vmlinux;
6596                 regs[BPF_REG_0].btf_id = ret_btf_id;
6597         } else {
6598                 verbose(env, "unknown return type %d of func %s#%d\n",
6599                         fn->ret_type, func_id_name(func_id), func_id);
6600                 return -EINVAL;
6601         }
6602
6603         if (reg_type_may_be_null(regs[BPF_REG_0].type))
6604                 regs[BPF_REG_0].id = ++env->id_gen;
6605
6606         if (is_ptr_cast_function(func_id)) {
6607                 /* For release_reference() */
6608                 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
6609         } else if (is_acquire_function(func_id, meta.map_ptr)) {
6610                 int id = acquire_reference_state(env, insn_idx);
6611
6612                 if (id < 0)
6613                         return id;
6614                 /* For mark_ptr_or_null_reg() */
6615                 regs[BPF_REG_0].id = id;
6616                 /* For release_reference() */
6617                 regs[BPF_REG_0].ref_obj_id = id;
6618         }
6619
6620         do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
6621
6622         err = check_map_func_compatibility(env, meta.map_ptr, func_id);
6623         if (err)
6624                 return err;
6625
6626         if ((func_id == BPF_FUNC_get_stack ||
6627              func_id == BPF_FUNC_get_task_stack) &&
6628             !env->prog->has_callchain_buf) {
6629                 const char *err_str;
6630
6631 #ifdef CONFIG_PERF_EVENTS
6632                 err = get_callchain_buffers(sysctl_perf_event_max_stack);
6633                 err_str = "cannot get callchain buffer for func %s#%d\n";
6634 #else
6635                 err = -ENOTSUPP;
6636                 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
6637 #endif
6638                 if (err) {
6639                         verbose(env, err_str, func_id_name(func_id), func_id);
6640                         return err;
6641                 }
6642
6643                 env->prog->has_callchain_buf = true;
6644         }
6645
6646         if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
6647                 env->prog->call_get_stack = true;
6648
6649         if (func_id == BPF_FUNC_get_func_ip) {
6650                 if (check_get_func_ip(env))
6651                         return -ENOTSUPP;
6652                 env->prog->call_get_func_ip = true;
6653         }
6654
6655         if (changes_data)
6656                 clear_all_pkt_pointers(env);
6657         return 0;
6658 }
6659
6660 /* mark_btf_func_reg_size() is used when the reg size is determined by
6661  * the BTF func_proto's return value size and argument.
6662  */
6663 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
6664                                    size_t reg_size)
6665 {
6666         struct bpf_reg_state *reg = &cur_regs(env)[regno];
6667
6668         if (regno == BPF_REG_0) {
6669                 /* Function return value */
6670                 reg->live |= REG_LIVE_WRITTEN;
6671                 reg->subreg_def = reg_size == sizeof(u64) ?
6672                         DEF_NOT_SUBREG : env->insn_idx + 1;
6673         } else {
6674                 /* Function argument */
6675                 if (reg_size == sizeof(u64)) {
6676                         mark_insn_zext(env, reg);
6677                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
6678                 } else {
6679                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
6680                 }
6681         }
6682 }
6683
6684 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
6685 {
6686         const struct btf_type *t, *func, *func_proto, *ptr_type;
6687         struct bpf_reg_state *regs = cur_regs(env);
6688         const char *func_name, *ptr_type_name;
6689         u32 i, nargs, func_id, ptr_type_id;
6690         struct module *btf_mod = NULL;
6691         const struct btf_param *args;
6692         struct btf *desc_btf;
6693         int err;
6694
6695         /* skip for now, but return error when we find this in fixup_kfunc_call */
6696         if (!insn->imm)
6697                 return 0;
6698
6699         desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
6700         if (IS_ERR(desc_btf))
6701                 return PTR_ERR(desc_btf);
6702
6703         func_id = insn->imm;
6704         func = btf_type_by_id(desc_btf, func_id);
6705         func_name = btf_name_by_offset(desc_btf, func->name_off);
6706         func_proto = btf_type_by_id(desc_btf, func->type);
6707
6708         if (!env->ops->check_kfunc_call ||
6709             !env->ops->check_kfunc_call(func_id, btf_mod)) {
6710                 verbose(env, "calling kernel function %s is not allowed\n",
6711                         func_name);
6712                 return -EACCES;
6713         }
6714
6715         /* Check the arguments */
6716         err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
6717         if (err)
6718                 return err;
6719
6720         for (i = 0; i < CALLER_SAVED_REGS; i++)
6721                 mark_reg_not_init(env, regs, caller_saved[i]);
6722
6723         /* Check return type */
6724         t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
6725         if (btf_type_is_scalar(t)) {
6726                 mark_reg_unknown(env, regs, BPF_REG_0);
6727                 mark_btf_func_reg_size(env, BPF_REG_0, t->size);
6728         } else if (btf_type_is_ptr(t)) {
6729                 ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
6730                                                    &ptr_type_id);
6731                 if (!btf_type_is_struct(ptr_type)) {
6732                         ptr_type_name = btf_name_by_offset(desc_btf,
6733                                                            ptr_type->name_off);
6734                         verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
6735                                 func_name, btf_type_str(ptr_type),
6736                                 ptr_type_name);
6737                         return -EINVAL;
6738                 }
6739                 mark_reg_known_zero(env, regs, BPF_REG_0);
6740                 regs[BPF_REG_0].btf = desc_btf;
6741                 regs[BPF_REG_0].type = PTR_TO_BTF_ID;
6742                 regs[BPF_REG_0].btf_id = ptr_type_id;
6743                 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
6744         } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
6745
6746         nargs = btf_type_vlen(func_proto);
6747         args = (const struct btf_param *)(func_proto + 1);
6748         for (i = 0; i < nargs; i++) {
6749                 u32 regno = i + 1;
6750
6751                 t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
6752                 if (btf_type_is_ptr(t))
6753                         mark_btf_func_reg_size(env, regno, sizeof(void *));
6754                 else
6755                         /* scalar. ensured by btf_check_kfunc_arg_match() */
6756                         mark_btf_func_reg_size(env, regno, t->size);
6757         }
6758
6759         return 0;
6760 }
6761
6762 static bool signed_add_overflows(s64 a, s64 b)
6763 {
6764         /* Do the add in u64, where overflow is well-defined */
6765         s64 res = (s64)((u64)a + (u64)b);
6766
6767         if (b < 0)
6768                 return res > a;
6769         return res < a;
6770 }
6771
6772 static bool signed_add32_overflows(s32 a, s32 b)
6773 {
6774         /* Do the add in u32, where overflow is well-defined */
6775         s32 res = (s32)((u32)a + (u32)b);
6776
6777         if (b < 0)
6778                 return res > a;
6779         return res < a;
6780 }
6781
6782 static bool signed_sub_overflows(s64 a, s64 b)
6783 {
6784         /* Do the sub in u64, where overflow is well-defined */
6785         s64 res = (s64)((u64)a - (u64)b);
6786
6787         if (b < 0)
6788                 return res < a;
6789         return res > a;
6790 }
6791
6792 static bool signed_sub32_overflows(s32 a, s32 b)
6793 {
6794         /* Do the sub in u32, where overflow is well-defined */
6795         s32 res = (s32)((u32)a - (u32)b);
6796
6797         if (b < 0)
6798                 return res < a;
6799         return res > a;
6800 }
6801
6802 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
6803                                   const struct bpf_reg_state *reg,
6804                                   enum bpf_reg_type type)
6805 {
6806         bool known = tnum_is_const(reg->var_off);
6807         s64 val = reg->var_off.value;
6808         s64 smin = reg->smin_value;
6809
6810         if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
6811                 verbose(env, "math between %s pointer and %lld is not allowed\n",
6812                         reg_type_str[type], val);
6813                 return false;
6814         }
6815
6816         if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
6817                 verbose(env, "%s pointer offset %d is not allowed\n",
6818                         reg_type_str[type], reg->off);
6819                 return false;
6820         }
6821
6822         if (smin == S64_MIN) {
6823                 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
6824                         reg_type_str[type]);
6825                 return false;
6826         }
6827
6828         if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
6829                 verbose(env, "value %lld makes %s pointer be out of bounds\n",
6830                         smin, reg_type_str[type]);
6831                 return false;
6832         }
6833
6834         return true;
6835 }
6836
6837 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
6838 {
6839         return &env->insn_aux_data[env->insn_idx];
6840 }
6841
6842 enum {
6843         REASON_BOUNDS   = -1,
6844         REASON_TYPE     = -2,
6845         REASON_PATHS    = -3,
6846         REASON_LIMIT    = -4,
6847         REASON_STACK    = -5,
6848 };
6849
6850 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
6851                               u32 *alu_limit, bool mask_to_left)
6852 {
6853         u32 max = 0, ptr_limit = 0;
6854
6855         switch (ptr_reg->type) {
6856         case PTR_TO_STACK:
6857                 /* Offset 0 is out-of-bounds, but acceptable start for the
6858                  * left direction, see BPF_REG_FP. Also, unknown scalar
6859                  * offset where we would need to deal with min/max bounds is
6860                  * currently prohibited for unprivileged.
6861                  */
6862                 max = MAX_BPF_STACK + mask_to_left;
6863                 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
6864                 break;
6865         case PTR_TO_MAP_VALUE:
6866                 max = ptr_reg->map_ptr->value_size;
6867                 ptr_limit = (mask_to_left ?
6868                              ptr_reg->smin_value :
6869                              ptr_reg->umax_value) + ptr_reg->off;
6870                 break;
6871         default:
6872                 return REASON_TYPE;
6873         }
6874
6875         if (ptr_limit >= max)
6876                 return REASON_LIMIT;
6877         *alu_limit = ptr_limit;
6878         return 0;
6879 }
6880
6881 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
6882                                     const struct bpf_insn *insn)
6883 {
6884         return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
6885 }
6886
6887 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
6888                                        u32 alu_state, u32 alu_limit)
6889 {
6890         /* If we arrived here from different branches with different
6891          * state or limits to sanitize, then this won't work.
6892          */
6893         if (aux->alu_state &&
6894             (aux->alu_state != alu_state ||
6895              aux->alu_limit != alu_limit))
6896                 return REASON_PATHS;
6897
6898         /* Corresponding fixup done in do_misc_fixups(). */
6899         aux->alu_state = alu_state;
6900         aux->alu_limit = alu_limit;
6901         return 0;
6902 }
6903
6904 static int sanitize_val_alu(struct bpf_verifier_env *env,
6905                             struct bpf_insn *insn)
6906 {
6907         struct bpf_insn_aux_data *aux = cur_aux(env);
6908
6909         if (can_skip_alu_sanitation(env, insn))
6910                 return 0;
6911
6912         return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
6913 }
6914
6915 static bool sanitize_needed(u8 opcode)
6916 {
6917         return opcode == BPF_ADD || opcode == BPF_SUB;
6918 }
6919
6920 struct bpf_sanitize_info {
6921         struct bpf_insn_aux_data aux;
6922         bool mask_to_left;
6923 };
6924
6925 static struct bpf_verifier_state *
6926 sanitize_speculative_path(struct bpf_verifier_env *env,
6927                           const struct bpf_insn *insn,
6928                           u32 next_idx, u32 curr_idx)
6929 {
6930         struct bpf_verifier_state *branch;
6931         struct bpf_reg_state *regs;
6932
6933         branch = push_stack(env, next_idx, curr_idx, true);
6934         if (branch && insn) {
6935                 regs = branch->frame[branch->curframe]->regs;
6936                 if (BPF_SRC(insn->code) == BPF_K) {
6937                         mark_reg_unknown(env, regs, insn->dst_reg);
6938                 } else if (BPF_SRC(insn->code) == BPF_X) {
6939                         mark_reg_unknown(env, regs, insn->dst_reg);
6940                         mark_reg_unknown(env, regs, insn->src_reg);
6941                 }
6942         }
6943         return branch;
6944 }
6945
6946 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
6947                             struct bpf_insn *insn,
6948                             const struct bpf_reg_state *ptr_reg,
6949                             const struct bpf_reg_state *off_reg,
6950                             struct bpf_reg_state *dst_reg,
6951                             struct bpf_sanitize_info *info,
6952                             const bool commit_window)
6953 {
6954         struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
6955         struct bpf_verifier_state *vstate = env->cur_state;
6956         bool off_is_imm = tnum_is_const(off_reg->var_off);
6957         bool off_is_neg = off_reg->smin_value < 0;
6958         bool ptr_is_dst_reg = ptr_reg == dst_reg;
6959         u8 opcode = BPF_OP(insn->code);
6960         u32 alu_state, alu_limit;
6961         struct bpf_reg_state tmp;
6962         bool ret;
6963         int err;
6964
6965         if (can_skip_alu_sanitation(env, insn))
6966                 return 0;
6967
6968         /* We already marked aux for masking from non-speculative
6969          * paths, thus we got here in the first place. We only care
6970          * to explore bad access from here.
6971          */
6972         if (vstate->speculative)
6973                 goto do_sim;
6974
6975         if (!commit_window) {
6976                 if (!tnum_is_const(off_reg->var_off) &&
6977                     (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
6978                         return REASON_BOUNDS;
6979
6980                 info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
6981                                      (opcode == BPF_SUB && !off_is_neg);
6982         }
6983
6984         err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
6985         if (err < 0)
6986                 return err;
6987
6988         if (commit_window) {
6989                 /* In commit phase we narrow the masking window based on
6990                  * the observed pointer move after the simulated operation.
6991                  */
6992                 alu_state = info->aux.alu_state;
6993                 alu_limit = abs(info->aux.alu_limit - alu_limit);
6994         } else {
6995                 alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
6996                 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
6997                 alu_state |= ptr_is_dst_reg ?
6998                              BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
6999
7000                 /* Limit pruning on unknown scalars to enable deep search for
7001                  * potential masking differences from other program paths.
7002                  */
7003                 if (!off_is_imm)
7004                         env->explore_alu_limits = true;
7005         }
7006
7007         err = update_alu_sanitation_state(aux, alu_state, alu_limit);
7008         if (err < 0)
7009                 return err;
7010 do_sim:
7011         /* If we're in commit phase, we're done here given we already
7012          * pushed the truncated dst_reg into the speculative verification
7013          * stack.
7014          *
7015          * Also, when register is a known constant, we rewrite register-based
7016          * operation to immediate-based, and thus do not need masking (and as
7017          * a consequence, do not need to simulate the zero-truncation either).
7018          */
7019         if (commit_window || off_is_imm)
7020                 return 0;
7021
7022         /* Simulate and find potential out-of-bounds access under
7023          * speculative execution from truncation as a result of
7024          * masking when off was not within expected range. If off
7025          * sits in dst, then we temporarily need to move ptr there
7026          * to simulate dst (== 0) +/-= ptr. Needed, for example,
7027          * for cases where we use K-based arithmetic in one direction
7028          * and truncated reg-based in the other in order to explore
7029          * bad access.
7030          */
7031         if (!ptr_is_dst_reg) {
7032                 tmp = *dst_reg;
7033                 *dst_reg = *ptr_reg;
7034         }
7035         ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
7036                                         env->insn_idx);
7037         if (!ptr_is_dst_reg && ret)
7038                 *dst_reg = tmp;
7039         return !ret ? REASON_STACK : 0;
7040 }
7041
7042 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
7043 {
7044         struct bpf_verifier_state *vstate = env->cur_state;
7045
7046         /* If we simulate paths under speculation, we don't update the
7047          * insn as 'seen' such that when we verify unreachable paths in
7048          * the non-speculative domain, sanitize_dead_code() can still
7049          * rewrite/sanitize them.
7050          */
7051         if (!vstate->speculative)
7052                 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
7053 }
7054
7055 static int sanitize_err(struct bpf_verifier_env *env,
7056                         const struct bpf_insn *insn, int reason,
7057                         const struct bpf_reg_state *off_reg,
7058                         const struct bpf_reg_state *dst_reg)
7059 {
7060         static const char *err = "pointer arithmetic with it prohibited for !root";
7061         const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
7062         u32 dst = insn->dst_reg, src = insn->src_reg;
7063
7064         switch (reason) {
7065         case REASON_BOUNDS:
7066                 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
7067                         off_reg == dst_reg ? dst : src, err);
7068                 break;
7069         case REASON_TYPE:
7070                 verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
7071                         off_reg == dst_reg ? src : dst, err);
7072                 break;
7073         case REASON_PATHS:
7074                 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
7075                         dst, op, err);
7076                 break;
7077         case REASON_LIMIT:
7078                 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
7079                         dst, op, err);
7080                 break;
7081         case REASON_STACK:
7082                 verbose(env, "R%d could not be pushed for speculative verification, %s\n",
7083                         dst, err);
7084                 break;
7085         default:
7086                 verbose(env, "verifier internal error: unknown reason (%d)\n",
7087                         reason);
7088                 break;
7089         }
7090
7091         return -EACCES;
7092 }
7093
7094 /* check that stack access falls within stack limits and that 'reg' doesn't
7095  * have a variable offset.
7096  *
7097  * Variable offset is prohibited for unprivileged mode for simplicity since it
7098  * requires corresponding support in Spectre masking for stack ALU.  See also
7099  * retrieve_ptr_limit().
7100  *
7101  *
7102  * 'off' includes 'reg->off'.
7103  */
7104 static int check_stack_access_for_ptr_arithmetic(
7105                                 struct bpf_verifier_env *env,
7106                                 int regno,
7107                                 const struct bpf_reg_state *reg,
7108                                 int off)
7109 {
7110         if (!tnum_is_const(reg->var_off)) {
7111                 char tn_buf[48];
7112
7113                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7114                 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
7115                         regno, tn_buf, off);
7116                 return -EACCES;
7117         }
7118
7119         if (off >= 0 || off < -MAX_BPF_STACK) {
7120                 verbose(env, "R%d stack pointer arithmetic goes out of range, "
7121                         "prohibited for !root; off=%d\n", regno, off);
7122                 return -EACCES;
7123         }
7124
7125         return 0;
7126 }
7127
7128 static int sanitize_check_bounds(struct bpf_verifier_env *env,
7129                                  const struct bpf_insn *insn,
7130                                  const struct bpf_reg_state *dst_reg)
7131 {
7132         u32 dst = insn->dst_reg;
7133
7134         /* For unprivileged we require that resulting offset must be in bounds
7135          * in order to be able to sanitize access later on.
7136          */
7137         if (env->bypass_spec_v1)
7138                 return 0;
7139
7140         switch (dst_reg->type) {
7141         case PTR_TO_STACK:
7142                 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
7143                                         dst_reg->off + dst_reg->var_off.value))
7144                         return -EACCES;
7145                 break;
7146         case PTR_TO_MAP_VALUE:
7147                 if (check_map_access(env, dst, dst_reg->off, 1, false)) {
7148                         verbose(env, "R%d pointer arithmetic of map value goes out of range, "
7149                                 "prohibited for !root\n", dst);
7150                         return -EACCES;
7151                 }
7152                 break;
7153         default:
7154                 break;
7155         }
7156
7157         return 0;
7158 }
7159
7160 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
7161  * Caller should also handle BPF_MOV case separately.
7162  * If we return -EACCES, caller may want to try again treating pointer as a
7163  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
7164  */
7165 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
7166                                    struct bpf_insn *insn,
7167                                    const struct bpf_reg_state *ptr_reg,
7168                                    const struct bpf_reg_state *off_reg)
7169 {
7170         struct bpf_verifier_state *vstate = env->cur_state;
7171         struct bpf_func_state *state = vstate->frame[vstate->curframe];
7172         struct bpf_reg_state *regs = state->regs, *dst_reg;
7173         bool known = tnum_is_const(off_reg->var_off);
7174         s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
7175             smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
7176         u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
7177             umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
7178         struct bpf_sanitize_info info = {};
7179         u8 opcode = BPF_OP(insn->code);
7180         u32 dst = insn->dst_reg;
7181         int ret;
7182
7183         dst_reg = &regs[dst];
7184
7185         if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
7186             smin_val > smax_val || umin_val > umax_val) {
7187                 /* Taint dst register if offset had invalid bounds derived from
7188                  * e.g. dead branches.
7189                  */
7190                 __mark_reg_unknown(env, dst_reg);
7191                 return 0;
7192         }
7193
7194         if (BPF_CLASS(insn->code) != BPF_ALU64) {
7195                 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
7196                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
7197                         __mark_reg_unknown(env, dst_reg);
7198                         return 0;
7199                 }
7200
7201                 verbose(env,
7202                         "R%d 32-bit pointer arithmetic prohibited\n",
7203                         dst);
7204                 return -EACCES;
7205         }
7206
7207         switch (ptr_reg->type) {
7208         case PTR_TO_MAP_VALUE_OR_NULL:
7209                 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
7210                         dst, reg_type_str[ptr_reg->type]);
7211                 return -EACCES;
7212         case CONST_PTR_TO_MAP:
7213                 /* smin_val represents the known value */
7214                 if (known && smin_val == 0 && opcode == BPF_ADD)
7215                         break;
7216                 fallthrough;
7217         case PTR_TO_PACKET_END:
7218         case PTR_TO_SOCKET:
7219         case PTR_TO_SOCKET_OR_NULL:
7220         case PTR_TO_SOCK_COMMON:
7221         case PTR_TO_SOCK_COMMON_OR_NULL:
7222         case PTR_TO_TCP_SOCK:
7223         case PTR_TO_TCP_SOCK_OR_NULL:
7224         case PTR_TO_XDP_SOCK:
7225                 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
7226                         dst, reg_type_str[ptr_reg->type]);
7227                 return -EACCES;
7228         default:
7229                 break;
7230         }
7231
7232         /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
7233          * The id may be overwritten later if we create a new variable offset.
7234          */
7235         dst_reg->type = ptr_reg->type;
7236         dst_reg->id = ptr_reg->id;
7237
7238         if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
7239             !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
7240                 return -EINVAL;
7241
7242         /* pointer types do not carry 32-bit bounds at the moment. */
7243         __mark_reg32_unbounded(dst_reg);
7244
7245         if (sanitize_needed(opcode)) {
7246                 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
7247                                        &info, false);
7248                 if (ret < 0)
7249                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
7250         }
7251
7252         switch (opcode) {
7253         case BPF_ADD:
7254                 /* We can take a fixed offset as long as it doesn't overflow
7255                  * the s32 'off' field
7256                  */
7257                 if (known && (ptr_reg->off + smin_val ==
7258                               (s64)(s32)(ptr_reg->off + smin_val))) {
7259                         /* pointer += K.  Accumulate it into fixed offset */
7260                         dst_reg->smin_value = smin_ptr;
7261                         dst_reg->smax_value = smax_ptr;
7262                         dst_reg->umin_value = umin_ptr;
7263                         dst_reg->umax_value = umax_ptr;
7264                         dst_reg->var_off = ptr_reg->var_off;
7265                         dst_reg->off = ptr_reg->off + smin_val;
7266                         dst_reg->raw = ptr_reg->raw;
7267                         break;
7268                 }
7269                 /* A new variable offset is created.  Note that off_reg->off
7270                  * == 0, since it's a scalar.
7271                  * dst_reg gets the pointer type and since some positive
7272                  * integer value was added to the pointer, give it a new 'id'
7273                  * if it's a PTR_TO_PACKET.
7274                  * this creates a new 'base' pointer, off_reg (variable) gets
7275                  * added into the variable offset, and we copy the fixed offset
7276                  * from ptr_reg.
7277                  */
7278                 if (signed_add_overflows(smin_ptr, smin_val) ||
7279                     signed_add_overflows(smax_ptr, smax_val)) {
7280                         dst_reg->smin_value = S64_MIN;
7281                         dst_reg->smax_value = S64_MAX;
7282                 } else {
7283                         dst_reg->smin_value = smin_ptr + smin_val;
7284                         dst_reg->smax_value = smax_ptr + smax_val;
7285                 }
7286                 if (umin_ptr + umin_val < umin_ptr ||
7287                     umax_ptr + umax_val < umax_ptr) {
7288                         dst_reg->umin_value = 0;
7289                         dst_reg->umax_value = U64_MAX;
7290                 } else {
7291                         dst_reg->umin_value = umin_ptr + umin_val;
7292                         dst_reg->umax_value = umax_ptr + umax_val;
7293                 }
7294                 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
7295                 dst_reg->off = ptr_reg->off;
7296                 dst_reg->raw = ptr_reg->raw;
7297                 if (reg_is_pkt_pointer(ptr_reg)) {
7298                         dst_reg->id = ++env->id_gen;
7299                         /* something was added to pkt_ptr, set range to zero */
7300                         memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
7301                 }
7302                 break;
7303         case BPF_SUB:
7304                 if (dst_reg == off_reg) {
7305                         /* scalar -= pointer.  Creates an unknown scalar */
7306                         verbose(env, "R%d tried to subtract pointer from scalar\n",
7307                                 dst);
7308                         return -EACCES;
7309                 }
7310                 /* We don't allow subtraction from FP, because (according to
7311                  * test_verifier.c test "invalid fp arithmetic", JITs might not
7312                  * be able to deal with it.
7313                  */
7314                 if (ptr_reg->type == PTR_TO_STACK) {
7315                         verbose(env, "R%d subtraction from stack pointer prohibited\n",
7316                                 dst);
7317                         return -EACCES;
7318                 }
7319                 if (known && (ptr_reg->off - smin_val ==
7320                               (s64)(s32)(ptr_reg->off - smin_val))) {
7321                         /* pointer -= K.  Subtract it from fixed offset */
7322                         dst_reg->smin_value = smin_ptr;
7323                         dst_reg->smax_value = smax_ptr;
7324                         dst_reg->umin_value = umin_ptr;
7325                         dst_reg->umax_value = umax_ptr;
7326                         dst_reg->var_off = ptr_reg->var_off;
7327                         dst_reg->id = ptr_reg->id;
7328                         dst_reg->off = ptr_reg->off - smin_val;
7329                         dst_reg->raw = ptr_reg->raw;
7330                         break;
7331                 }
7332                 /* A new variable offset is created.  If the subtrahend is known
7333                  * nonnegative, then any reg->range we had before is still good.
7334                  */
7335                 if (signed_sub_overflows(smin_ptr, smax_val) ||
7336                     signed_sub_overflows(smax_ptr, smin_val)) {
7337                         /* Overflow possible, we know nothing */
7338                         dst_reg->smin_value = S64_MIN;
7339                         dst_reg->smax_value = S64_MAX;
7340                 } else {
7341                         dst_reg->smin_value = smin_ptr - smax_val;
7342                         dst_reg->smax_value = smax_ptr - smin_val;
7343                 }
7344                 if (umin_ptr < umax_val) {
7345                         /* Overflow possible, we know nothing */
7346                         dst_reg->umin_value = 0;
7347                         dst_reg->umax_value = U64_MAX;
7348                 } else {
7349                         /* Cannot overflow (as long as bounds are consistent) */
7350                         dst_reg->umin_value = umin_ptr - umax_val;
7351                         dst_reg->umax_value = umax_ptr - umin_val;
7352                 }
7353                 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
7354                 dst_reg->off = ptr_reg->off;
7355                 dst_reg->raw = ptr_reg->raw;
7356                 if (reg_is_pkt_pointer(ptr_reg)) {
7357                         dst_reg->id = ++env->id_gen;
7358                         /* something was added to pkt_ptr, set range to zero */
7359                         if (smin_val < 0)
7360                                 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
7361                 }
7362                 break;
7363         case BPF_AND:
7364         case BPF_OR:
7365         case BPF_XOR:
7366                 /* bitwise ops on pointers are troublesome, prohibit. */
7367                 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
7368                         dst, bpf_alu_string[opcode >> 4]);
7369                 return -EACCES;
7370         default:
7371                 /* other operators (e.g. MUL,LSH) produce non-pointer results */
7372                 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
7373                         dst, bpf_alu_string[opcode >> 4]);
7374                 return -EACCES;
7375         }
7376
7377         if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
7378                 return -EINVAL;
7379
7380         __update_reg_bounds(dst_reg);
7381         __reg_deduce_bounds(dst_reg);
7382         __reg_bound_offset(dst_reg);
7383
7384         if (sanitize_check_bounds(env, insn, dst_reg) < 0)
7385                 return -EACCES;
7386         if (sanitize_needed(opcode)) {
7387                 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
7388                                        &info, true);
7389                 if (ret < 0)
7390                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
7391         }
7392
7393         return 0;
7394 }
7395
7396 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
7397                                  struct bpf_reg_state *src_reg)
7398 {
7399         s32 smin_val = src_reg->s32_min_value;
7400         s32 smax_val = src_reg->s32_max_value;
7401         u32 umin_val = src_reg->u32_min_value;
7402         u32 umax_val = src_reg->u32_max_value;
7403
7404         if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
7405             signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
7406                 dst_reg->s32_min_value = S32_MIN;
7407                 dst_reg->s32_max_value = S32_MAX;
7408         } else {
7409                 dst_reg->s32_min_value += smin_val;
7410                 dst_reg->s32_max_value += smax_val;
7411         }
7412         if (dst_reg->u32_min_value + umin_val < umin_val ||
7413             dst_reg->u32_max_value + umax_val < umax_val) {
7414                 dst_reg->u32_min_value = 0;
7415                 dst_reg->u32_max_value = U32_MAX;
7416         } else {
7417                 dst_reg->u32_min_value += umin_val;
7418                 dst_reg->u32_max_value += umax_val;
7419         }
7420 }
7421
7422 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
7423                                struct bpf_reg_state *src_reg)
7424 {
7425         s64 smin_val = src_reg->smin_value;
7426         s64 smax_val = src_reg->smax_value;
7427         u64 umin_val = src_reg->umin_value;
7428         u64 umax_val = src_reg->umax_value;
7429
7430         if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
7431             signed_add_overflows(dst_reg->smax_value, smax_val)) {
7432                 dst_reg->smin_value = S64_MIN;
7433                 dst_reg->smax_value = S64_MAX;
7434         } else {
7435                 dst_reg->smin_value += smin_val;
7436                 dst_reg->smax_value += smax_val;
7437         }
7438         if (dst_reg->umin_value + umin_val < umin_val ||
7439             dst_reg->umax_value + umax_val < umax_val) {
7440                 dst_reg->umin_value = 0;
7441                 dst_reg->umax_value = U64_MAX;
7442         } else {
7443                 dst_reg->umin_value += umin_val;
7444                 dst_reg->umax_value += umax_val;
7445         }
7446 }
7447
7448 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
7449                                  struct bpf_reg_state *src_reg)
7450 {
7451         s32 smin_val = src_reg->s32_min_value;
7452         s32 smax_val = src_reg->s32_max_value;
7453         u32 umin_val = src_reg->u32_min_value;
7454         u32 umax_val = src_reg->u32_max_value;
7455
7456         if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
7457             signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
7458                 /* Overflow possible, we know nothing */
7459                 dst_reg->s32_min_value = S32_MIN;
7460                 dst_reg->s32_max_value = S32_MAX;
7461         } else {
7462                 dst_reg->s32_min_value -= smax_val;
7463                 dst_reg->s32_max_value -= smin_val;
7464         }
7465         if (dst_reg->u32_min_value < umax_val) {
7466                 /* Overflow possible, we know nothing */
7467                 dst_reg->u32_min_value = 0;
7468                 dst_reg->u32_max_value = U32_MAX;
7469         } else {
7470                 /* Cannot overflow (as long as bounds are consistent) */
7471                 dst_reg->u32_min_value -= umax_val;
7472                 dst_reg->u32_max_value -= umin_val;
7473         }
7474 }
7475
7476 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
7477                                struct bpf_reg_state *src_reg)
7478 {
7479         s64 smin_val = src_reg->smin_value;
7480         s64 smax_val = src_reg->smax_value;
7481         u64 umin_val = src_reg->umin_value;
7482         u64 umax_val = src_reg->umax_value;
7483
7484         if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
7485             signed_sub_overflows(dst_reg->smax_value, smin_val)) {
7486                 /* Overflow possible, we know nothing */
7487                 dst_reg->smin_value = S64_MIN;
7488                 dst_reg->smax_value = S64_MAX;
7489         } else {
7490                 dst_reg->smin_value -= smax_val;
7491                 dst_reg->smax_value -= smin_val;
7492         }
7493         if (dst_reg->umin_value < umax_val) {
7494                 /* Overflow possible, we know nothing */
7495                 dst_reg->umin_value = 0;
7496                 dst_reg->umax_value = U64_MAX;
7497         } else {
7498                 /* Cannot overflow (as long as bounds are consistent) */
7499                 dst_reg->umin_value -= umax_val;
7500                 dst_reg->umax_value -= umin_val;
7501         }
7502 }
7503
7504 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
7505                                  struct bpf_reg_state *src_reg)
7506 {
7507         s32 smin_val = src_reg->s32_min_value;
7508         u32 umin_val = src_reg->u32_min_value;
7509         u32 umax_val = src_reg->u32_max_value;
7510
7511         if (smin_val < 0 || dst_reg->s32_min_value < 0) {
7512                 /* Ain't nobody got time to multiply that sign */
7513                 __mark_reg32_unbounded(dst_reg);
7514                 return;
7515         }
7516         /* Both values are positive, so we can work with unsigned and
7517          * copy the result to signed (unless it exceeds S32_MAX).
7518          */
7519         if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
7520                 /* Potential overflow, we know nothing */
7521                 __mark_reg32_unbounded(dst_reg);
7522                 return;
7523         }
7524         dst_reg->u32_min_value *= umin_val;
7525         dst_reg->u32_max_value *= umax_val;
7526         if (dst_reg->u32_max_value > S32_MAX) {
7527                 /* Overflow possible, we know nothing */
7528                 dst_reg->s32_min_value = S32_MIN;
7529                 dst_reg->s32_max_value = S32_MAX;
7530         } else {
7531                 dst_reg->s32_min_value = dst_reg->u32_min_value;
7532                 dst_reg->s32_max_value = dst_reg->u32_max_value;
7533         }
7534 }
7535
7536 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
7537                                struct bpf_reg_state *src_reg)
7538 {
7539         s64 smin_val = src_reg->smin_value;
7540         u64 umin_val = src_reg->umin_value;
7541         u64 umax_val = src_reg->umax_value;
7542
7543         if (smin_val < 0 || dst_reg->smin_value < 0) {
7544                 /* Ain't nobody got time to multiply that sign */
7545                 __mark_reg64_unbounded(dst_reg);
7546                 return;
7547         }
7548         /* Both values are positive, so we can work with unsigned and
7549          * copy the result to signed (unless it exceeds S64_MAX).
7550          */
7551         if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
7552                 /* Potential overflow, we know nothing */
7553                 __mark_reg64_unbounded(dst_reg);
7554                 return;
7555         }
7556         dst_reg->umin_value *= umin_val;
7557         dst_reg->umax_value *= umax_val;
7558         if (dst_reg->umax_value > S64_MAX) {
7559                 /* Overflow possible, we know nothing */
7560                 dst_reg->smin_value = S64_MIN;
7561                 dst_reg->smax_value = S64_MAX;
7562         } else {
7563                 dst_reg->smin_value = dst_reg->umin_value;
7564                 dst_reg->smax_value = dst_reg->umax_value;
7565         }
7566 }
7567
7568 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
7569                                  struct bpf_reg_state *src_reg)
7570 {
7571         bool src_known = tnum_subreg_is_const(src_reg->var_off);
7572         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7573         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7574         s32 smin_val = src_reg->s32_min_value;
7575         u32 umax_val = src_reg->u32_max_value;
7576
7577         if (src_known && dst_known) {
7578                 __mark_reg32_known(dst_reg, var32_off.value);
7579                 return;
7580         }
7581
7582         /* We get our minimum from the var_off, since that's inherently
7583          * bitwise.  Our maximum is the minimum of the operands' maxima.
7584          */
7585         dst_reg->u32_min_value = var32_off.value;
7586         dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
7587         if (dst_reg->s32_min_value < 0 || smin_val < 0) {
7588                 /* Lose signed bounds when ANDing negative numbers,
7589                  * ain't nobody got time for that.
7590                  */
7591                 dst_reg->s32_min_value = S32_MIN;
7592                 dst_reg->s32_max_value = S32_MAX;
7593         } else {
7594                 /* ANDing two positives gives a positive, so safe to
7595                  * cast result into s64.
7596                  */
7597                 dst_reg->s32_min_value = dst_reg->u32_min_value;
7598                 dst_reg->s32_max_value = dst_reg->u32_max_value;
7599         }
7600 }
7601
7602 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
7603                                struct bpf_reg_state *src_reg)
7604 {
7605         bool src_known = tnum_is_const(src_reg->var_off);
7606         bool dst_known = tnum_is_const(dst_reg->var_off);
7607         s64 smin_val = src_reg->smin_value;
7608         u64 umax_val = src_reg->umax_value;
7609
7610         if (src_known && dst_known) {
7611                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
7612                 return;
7613         }
7614
7615         /* We get our minimum from the var_off, since that's inherently
7616          * bitwise.  Our maximum is the minimum of the operands' maxima.
7617          */
7618         dst_reg->umin_value = dst_reg->var_off.value;
7619         dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
7620         if (dst_reg->smin_value < 0 || smin_val < 0) {
7621                 /* Lose signed bounds when ANDing negative numbers,
7622                  * ain't nobody got time for that.
7623                  */
7624                 dst_reg->smin_value = S64_MIN;
7625                 dst_reg->smax_value = S64_MAX;
7626         } else {
7627                 /* ANDing two positives gives a positive, so safe to
7628                  * cast result into s64.
7629                  */
7630                 dst_reg->smin_value = dst_reg->umin_value;
7631                 dst_reg->smax_value = dst_reg->umax_value;
7632         }
7633         /* We may learn something more from the var_off */
7634         __update_reg_bounds(dst_reg);
7635 }
7636
7637 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
7638                                 struct bpf_reg_state *src_reg)
7639 {
7640         bool src_known = tnum_subreg_is_const(src_reg->var_off);
7641         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7642         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7643         s32 smin_val = src_reg->s32_min_value;
7644         u32 umin_val = src_reg->u32_min_value;
7645
7646         if (src_known && dst_known) {
7647                 __mark_reg32_known(dst_reg, var32_off.value);
7648                 return;
7649         }
7650
7651         /* We get our maximum from the var_off, and our minimum is the
7652          * maximum of the operands' minima
7653          */
7654         dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
7655         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
7656         if (dst_reg->s32_min_value < 0 || smin_val < 0) {
7657                 /* Lose signed bounds when ORing negative numbers,
7658                  * ain't nobody got time for that.
7659                  */
7660                 dst_reg->s32_min_value = S32_MIN;
7661                 dst_reg->s32_max_value = S32_MAX;
7662         } else {
7663                 /* ORing two positives gives a positive, so safe to
7664                  * cast result into s64.
7665                  */
7666                 dst_reg->s32_min_value = dst_reg->u32_min_value;
7667                 dst_reg->s32_max_value = dst_reg->u32_max_value;
7668         }
7669 }
7670
7671 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
7672                               struct bpf_reg_state *src_reg)
7673 {
7674         bool src_known = tnum_is_const(src_reg->var_off);
7675         bool dst_known = tnum_is_const(dst_reg->var_off);
7676         s64 smin_val = src_reg->smin_value;
7677         u64 umin_val = src_reg->umin_value;
7678
7679         if (src_known && dst_known) {
7680                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
7681                 return;
7682         }
7683
7684         /* We get our maximum from the var_off, and our minimum is the
7685          * maximum of the operands' minima
7686          */
7687         dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
7688         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
7689         if (dst_reg->smin_value < 0 || smin_val < 0) {
7690                 /* Lose signed bounds when ORing negative numbers,
7691                  * ain't nobody got time for that.
7692                  */
7693                 dst_reg->smin_value = S64_MIN;
7694                 dst_reg->smax_value = S64_MAX;
7695         } else {
7696                 /* ORing two positives gives a positive, so safe to
7697                  * cast result into s64.
7698                  */
7699                 dst_reg->smin_value = dst_reg->umin_value;
7700                 dst_reg->smax_value = dst_reg->umax_value;
7701         }
7702         /* We may learn something more from the var_off */
7703         __update_reg_bounds(dst_reg);
7704 }
7705
7706 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
7707                                  struct bpf_reg_state *src_reg)
7708 {
7709         bool src_known = tnum_subreg_is_const(src_reg->var_off);
7710         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
7711         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
7712         s32 smin_val = src_reg->s32_min_value;
7713
7714         if (src_known && dst_known) {
7715                 __mark_reg32_known(dst_reg, var32_off.value);
7716                 return;
7717         }
7718
7719         /* We get both minimum and maximum from the var32_off. */
7720         dst_reg->u32_min_value = var32_off.value;
7721         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
7722
7723         if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
7724                 /* XORing two positive sign numbers gives a positive,
7725                  * so safe to cast u32 result into s32.
7726                  */
7727                 dst_reg->s32_min_value = dst_reg->u32_min_value;
7728                 dst_reg->s32_max_value = dst_reg->u32_max_value;
7729         } else {
7730                 dst_reg->s32_min_value = S32_MIN;
7731                 dst_reg->s32_max_value = S32_MAX;
7732         }
7733 }
7734
7735 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
7736                                struct bpf_reg_state *src_reg)
7737 {
7738         bool src_known = tnum_is_const(src_reg->var_off);
7739         bool dst_known = tnum_is_const(dst_reg->var_off);
7740         s64 smin_val = src_reg->smin_value;
7741
7742         if (src_known && dst_known) {
7743                 /* dst_reg->var_off.value has been updated earlier */
7744                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
7745                 return;
7746         }
7747
7748         /* We get both minimum and maximum from the var_off. */
7749         dst_reg->umin_value = dst_reg->var_off.value;
7750         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
7751
7752         if (dst_reg->smin_value >= 0 && smin_val >= 0) {
7753                 /* XORing two positive sign numbers gives a positive,
7754                  * so safe to cast u64 result into s64.
7755                  */
7756                 dst_reg->smin_value = dst_reg->umin_value;
7757                 dst_reg->smax_value = dst_reg->umax_value;
7758         } else {
7759                 dst_reg->smin_value = S64_MIN;
7760                 dst_reg->smax_value = S64_MAX;
7761         }
7762
7763         __update_reg_bounds(dst_reg);
7764 }
7765
7766 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
7767                                    u64 umin_val, u64 umax_val)
7768 {
7769         /* We lose all sign bit information (except what we can pick
7770          * up from var_off)
7771          */
7772         dst_reg->s32_min_value = S32_MIN;
7773         dst_reg->s32_max_value = S32_MAX;
7774         /* If we might shift our top bit out, then we know nothing */
7775         if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
7776                 dst_reg->u32_min_value = 0;
7777                 dst_reg->u32_max_value = U32_MAX;
7778         } else {
7779                 dst_reg->u32_min_value <<= umin_val;
7780                 dst_reg->u32_max_value <<= umax_val;
7781         }
7782 }
7783
7784 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
7785                                  struct bpf_reg_state *src_reg)
7786 {
7787         u32 umax_val = src_reg->u32_max_value;
7788         u32 umin_val = src_reg->u32_min_value;
7789         /* u32 alu operation will zext upper bits */
7790         struct tnum subreg = tnum_subreg(dst_reg->var_off);
7791
7792         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
7793         dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
7794         /* Not required but being careful mark reg64 bounds as unknown so
7795          * that we are forced to pick them up from tnum and zext later and
7796          * if some path skips this step we are still safe.
7797          */
7798         __mark_reg64_unbounded(dst_reg);
7799         __update_reg32_bounds(dst_reg);
7800 }
7801
7802 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
7803                                    u64 umin_val, u64 umax_val)
7804 {
7805         /* Special case <<32 because it is a common compiler pattern to sign
7806          * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
7807          * positive we know this shift will also be positive so we can track
7808          * bounds correctly. Otherwise we lose all sign bit information except
7809          * what we can pick up from var_off. Perhaps we can generalize this
7810          * later to shifts of any length.
7811          */
7812         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
7813                 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
7814         else
7815                 dst_reg->smax_value = S64_MAX;
7816
7817         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
7818                 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
7819         else
7820                 dst_reg->smin_value = S64_MIN;
7821
7822         /* If we might shift our top bit out, then we know nothing */
7823         if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
7824                 dst_reg->umin_value = 0;
7825                 dst_reg->umax_value = U64_MAX;
7826         } else {
7827                 dst_reg->umin_value <<= umin_val;
7828                 dst_reg->umax_value <<= umax_val;
7829         }
7830 }
7831
7832 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
7833                                struct bpf_reg_state *src_reg)
7834 {
7835         u64 umax_val = src_reg->umax_value;
7836         u64 umin_val = src_reg->umin_value;
7837
7838         /* scalar64 calc uses 32bit unshifted bounds so must be called first */
7839         __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
7840         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
7841
7842         dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
7843         /* We may learn something more from the var_off */
7844         __update_reg_bounds(dst_reg);
7845 }
7846
7847 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
7848                                  struct bpf_reg_state *src_reg)
7849 {
7850         struct tnum subreg = tnum_subreg(dst_reg->var_off);
7851         u32 umax_val = src_reg->u32_max_value;
7852         u32 umin_val = src_reg->u32_min_value;
7853
7854         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
7855          * be negative, then either:
7856          * 1) src_reg might be zero, so the sign bit of the result is
7857          *    unknown, so we lose our signed bounds
7858          * 2) it's known negative, thus the unsigned bounds capture the
7859          *    signed bounds
7860          * 3) the signed bounds cross zero, so they tell us nothing
7861          *    about the result
7862          * If the value in dst_reg is known nonnegative, then again the
7863          * unsigned bounds capture the signed bounds.
7864          * Thus, in all cases it suffices to blow away our signed bounds
7865          * and rely on inferring new ones from the unsigned bounds and
7866          * var_off of the result.
7867          */
7868         dst_reg->s32_min_value = S32_MIN;
7869         dst_reg->s32_max_value = S32_MAX;
7870
7871         dst_reg->var_off = tnum_rshift(subreg, umin_val);
7872         dst_reg->u32_min_value >>= umax_val;
7873         dst_reg->u32_max_value >>= umin_val;
7874
7875         __mark_reg64_unbounded(dst_reg);
7876         __update_reg32_bounds(dst_reg);
7877 }
7878
7879 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
7880                                struct bpf_reg_state *src_reg)
7881 {
7882         u64 umax_val = src_reg->umax_value;
7883         u64 umin_val = src_reg->umin_value;
7884
7885         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
7886          * be negative, then either:
7887          * 1) src_reg might be zero, so the sign bit of the result is
7888          *    unknown, so we lose our signed bounds
7889          * 2) it's known negative, thus the unsigned bounds capture the
7890          *    signed bounds
7891          * 3) the signed bounds cross zero, so they tell us nothing
7892          *    about the result
7893          * If the value in dst_reg is known nonnegative, then again the
7894          * unsigned bounds capture the signed bounds.
7895          * Thus, in all cases it suffices to blow away our signed bounds
7896          * and rely on inferring new ones from the unsigned bounds and
7897          * var_off of the result.
7898          */
7899         dst_reg->smin_value = S64_MIN;
7900         dst_reg->smax_value = S64_MAX;
7901         dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
7902         dst_reg->umin_value >>= umax_val;
7903         dst_reg->umax_value >>= umin_val;
7904
7905         /* Its not easy to operate on alu32 bounds here because it depends
7906          * on bits being shifted in. Take easy way out and mark unbounded
7907          * so we can recalculate later from tnum.
7908          */
7909         __mark_reg32_unbounded(dst_reg);
7910         __update_reg_bounds(dst_reg);
7911 }
7912
7913 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
7914                                   struct bpf_reg_state *src_reg)
7915 {
7916         u64 umin_val = src_reg->u32_min_value;
7917
7918         /* Upon reaching here, src_known is true and
7919          * umax_val is equal to umin_val.
7920          */
7921         dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
7922         dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
7923
7924         dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
7925
7926         /* blow away the dst_reg umin_value/umax_value and rely on
7927          * dst_reg var_off to refine the result.
7928          */
7929         dst_reg->u32_min_value = 0;
7930         dst_reg->u32_max_value = U32_MAX;
7931
7932         __mark_reg64_unbounded(dst_reg);
7933         __update_reg32_bounds(dst_reg);
7934 }
7935
7936 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
7937                                 struct bpf_reg_state *src_reg)
7938 {
7939         u64 umin_val = src_reg->umin_value;
7940
7941         /* Upon reaching here, src_known is true and umax_val is equal
7942          * to umin_val.
7943          */
7944         dst_reg->smin_value >>= umin_val;
7945         dst_reg->smax_value >>= umin_val;
7946
7947         dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
7948
7949         /* blow away the dst_reg umin_value/umax_value and rely on
7950          * dst_reg var_off to refine the result.
7951          */
7952         dst_reg->umin_value = 0;
7953         dst_reg->umax_value = U64_MAX;
7954
7955         /* Its not easy to operate on alu32 bounds here because it depends
7956          * on bits being shifted in from upper 32-bits. Take easy way out
7957          * and mark unbounded so we can recalculate later from tnum.
7958          */
7959         __mark_reg32_unbounded(dst_reg);
7960         __update_reg_bounds(dst_reg);
7961 }
7962
7963 /* WARNING: This function does calculations on 64-bit values, but the actual
7964  * execution may occur on 32-bit values. Therefore, things like bitshifts
7965  * need extra checks in the 32-bit case.
7966  */
7967 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
7968                                       struct bpf_insn *insn,
7969                                       struct bpf_reg_state *dst_reg,
7970                                       struct bpf_reg_state src_reg)
7971 {
7972         struct bpf_reg_state *regs = cur_regs(env);
7973         u8 opcode = BPF_OP(insn->code);
7974         bool src_known;
7975         s64 smin_val, smax_val;
7976         u64 umin_val, umax_val;
7977         s32 s32_min_val, s32_max_val;
7978         u32 u32_min_val, u32_max_val;
7979         u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
7980         bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
7981         int ret;
7982
7983         smin_val = src_reg.smin_value;
7984         smax_val = src_reg.smax_value;
7985         umin_val = src_reg.umin_value;
7986         umax_val = src_reg.umax_value;
7987
7988         s32_min_val = src_reg.s32_min_value;
7989         s32_max_val = src_reg.s32_max_value;
7990         u32_min_val = src_reg.u32_min_value;
7991         u32_max_val = src_reg.u32_max_value;
7992
7993         if (alu32) {
7994                 src_known = tnum_subreg_is_const(src_reg.var_off);
7995                 if ((src_known &&
7996                      (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
7997                     s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
7998                         /* Taint dst register if offset had invalid bounds
7999                          * derived from e.g. dead branches.
8000                          */
8001                         __mark_reg_unknown(env, dst_reg);
8002                         return 0;
8003                 }
8004         } else {
8005                 src_known = tnum_is_const(src_reg.var_off);
8006                 if ((src_known &&
8007                      (smin_val != smax_val || umin_val != umax_val)) ||
8008                     smin_val > smax_val || umin_val > umax_val) {
8009                         /* Taint dst register if offset had invalid bounds
8010                          * derived from e.g. dead branches.
8011                          */
8012                         __mark_reg_unknown(env, dst_reg);
8013                         return 0;
8014                 }
8015         }
8016
8017         if (!src_known &&
8018             opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
8019                 __mark_reg_unknown(env, dst_reg);
8020                 return 0;
8021         }
8022
8023         if (sanitize_needed(opcode)) {
8024                 ret = sanitize_val_alu(env, insn);
8025                 if (ret < 0)
8026                         return sanitize_err(env, insn, ret, NULL, NULL);
8027         }
8028
8029         /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
8030          * There are two classes of instructions: The first class we track both
8031          * alu32 and alu64 sign/unsigned bounds independently this provides the
8032          * greatest amount of precision when alu operations are mixed with jmp32
8033          * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
8034          * and BPF_OR. This is possible because these ops have fairly easy to
8035          * understand and calculate behavior in both 32-bit and 64-bit alu ops.
8036          * See alu32 verifier tests for examples. The second class of
8037          * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
8038          * with regards to tracking sign/unsigned bounds because the bits may
8039          * cross subreg boundaries in the alu64 case. When this happens we mark
8040          * the reg unbounded in the subreg bound space and use the resulting
8041          * tnum to calculate an approximation of the sign/unsigned bounds.
8042          */
8043         switch (opcode) {
8044         case BPF_ADD:
8045                 scalar32_min_max_add(dst_reg, &src_reg);
8046                 scalar_min_max_add(dst_reg, &src_reg);
8047                 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
8048                 break;
8049         case BPF_SUB:
8050                 scalar32_min_max_sub(dst_reg, &src_reg);
8051                 scalar_min_max_sub(dst_reg, &src_reg);
8052                 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
8053                 break;
8054         case BPF_MUL:
8055                 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
8056                 scalar32_min_max_mul(dst_reg, &src_reg);
8057                 scalar_min_max_mul(dst_reg, &src_reg);
8058                 break;
8059         case BPF_AND:
8060                 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
8061                 scalar32_min_max_and(dst_reg, &src_reg);
8062                 scalar_min_max_and(dst_reg, &src_reg);
8063                 break;
8064         case BPF_OR:
8065                 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
8066                 scalar32_min_max_or(dst_reg, &src_reg);
8067                 scalar_min_max_or(dst_reg, &src_reg);
8068                 break;
8069         case BPF_XOR:
8070                 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
8071                 scalar32_min_max_xor(dst_reg, &src_reg);
8072                 scalar_min_max_xor(dst_reg, &src_reg);
8073                 break;
8074         case BPF_LSH:
8075                 if (umax_val >= insn_bitness) {
8076                         /* Shifts greater than 31 or 63 are undefined.
8077                          * This includes shifts by a negative number.
8078                          */
8079                         mark_reg_unknown(env, regs, insn->dst_reg);
8080                         break;
8081                 }
8082                 if (alu32)
8083                         scalar32_min_max_lsh(dst_reg, &src_reg);
8084                 else
8085                         scalar_min_max_lsh(dst_reg, &src_reg);
8086                 break;
8087         case BPF_RSH:
8088                 if (umax_val >= insn_bitness) {
8089                         /* Shifts greater than 31 or 63 are undefined.
8090                          * This includes shifts by a negative number.
8091                          */
8092                         mark_reg_unknown(env, regs, insn->dst_reg);
8093                         break;
8094                 }
8095                 if (alu32)
8096                         scalar32_min_max_rsh(dst_reg, &src_reg);
8097                 else
8098                         scalar_min_max_rsh(dst_reg, &src_reg);
8099                 break;
8100         case BPF_ARSH:
8101                 if (umax_val >= insn_bitness) {
8102                         /* Shifts greater than 31 or 63 are undefined.
8103                          * This includes shifts by a negative number.
8104                          */
8105                         mark_reg_unknown(env, regs, insn->dst_reg);
8106                         break;
8107                 }
8108                 if (alu32)
8109                         scalar32_min_max_arsh(dst_reg, &src_reg);
8110                 else
8111                         scalar_min_max_arsh(dst_reg, &src_reg);
8112                 break;
8113         default:
8114                 mark_reg_unknown(env, regs, insn->dst_reg);
8115                 break;
8116         }
8117
8118         /* ALU32 ops are zero extended into 64bit register */
8119         if (alu32)
8120                 zext_32_to_64(dst_reg);
8121
8122         __update_reg_bounds(dst_reg);
8123         __reg_deduce_bounds(dst_reg);
8124         __reg_bound_offset(dst_reg);
8125         return 0;
8126 }
8127
8128 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
8129  * and var_off.
8130  */
8131 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
8132                                    struct bpf_insn *insn)
8133 {
8134         struct bpf_verifier_state *vstate = env->cur_state;
8135         struct bpf_func_state *state = vstate->frame[vstate->curframe];
8136         struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
8137         struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
8138         u8 opcode = BPF_OP(insn->code);
8139         int err;
8140
8141         dst_reg = &regs[insn->dst_reg];
8142         src_reg = NULL;
8143         if (dst_reg->type != SCALAR_VALUE)
8144                 ptr_reg = dst_reg;
8145         else
8146                 /* Make sure ID is cleared otherwise dst_reg min/max could be
8147                  * incorrectly propagated into other registers by find_equal_scalars()
8148                  */
8149                 dst_reg->id = 0;
8150         if (BPF_SRC(insn->code) == BPF_X) {
8151                 src_reg = &regs[insn->src_reg];
8152                 if (src_reg->type != SCALAR_VALUE) {
8153                         if (dst_reg->type != SCALAR_VALUE) {
8154                                 /* Combining two pointers by any ALU op yields
8155                                  * an arbitrary scalar. Disallow all math except
8156                                  * pointer subtraction
8157                                  */
8158                                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
8159                                         mark_reg_unknown(env, regs, insn->dst_reg);
8160                                         return 0;
8161                                 }
8162                                 verbose(env, "R%d pointer %s pointer prohibited\n",
8163                                         insn->dst_reg,
8164                                         bpf_alu_string[opcode >> 4]);
8165                                 return -EACCES;
8166                         } else {
8167                                 /* scalar += pointer
8168                                  * This is legal, but we have to reverse our
8169                                  * src/dest handling in computing the range
8170                                  */
8171                                 err = mark_chain_precision(env, insn->dst_reg);
8172                                 if (err)
8173                                         return err;
8174                                 return adjust_ptr_min_max_vals(env, insn,
8175                                                                src_reg, dst_reg);
8176                         }
8177                 } else if (ptr_reg) {
8178                         /* pointer += scalar */
8179                         err = mark_chain_precision(env, insn->src_reg);
8180                         if (err)
8181                                 return err;
8182                         return adjust_ptr_min_max_vals(env, insn,
8183                                                        dst_reg, src_reg);
8184                 }
8185         } else {
8186                 /* Pretend the src is a reg with a known value, since we only
8187                  * need to be able to read from this state.
8188                  */
8189                 off_reg.type = SCALAR_VALUE;
8190                 __mark_reg_known(&off_reg, insn->imm);
8191                 src_reg = &off_reg;
8192                 if (ptr_reg) /* pointer += K */
8193                         return adjust_ptr_min_max_vals(env, insn,
8194                                                        ptr_reg, src_reg);
8195         }
8196
8197         /* Got here implies adding two SCALAR_VALUEs */
8198         if (WARN_ON_ONCE(ptr_reg)) {
8199                 print_verifier_state(env, state);
8200                 verbose(env, "verifier internal error: unexpected ptr_reg\n");
8201                 return -EINVAL;
8202         }
8203         if (WARN_ON(!src_reg)) {
8204                 print_verifier_state(env, state);
8205                 verbose(env, "verifier internal error: no src_reg\n");
8206                 return -EINVAL;
8207         }
8208         return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
8209 }
8210
8211 /* check validity of 32-bit and 64-bit arithmetic operations */
8212 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
8213 {
8214         struct bpf_reg_state *regs = cur_regs(env);
8215         u8 opcode = BPF_OP(insn->code);
8216         int err;
8217
8218         if (opcode == BPF_END || opcode == BPF_NEG) {
8219                 if (opcode == BPF_NEG) {
8220                         if (BPF_SRC(insn->code) != 0 ||
8221                             insn->src_reg != BPF_REG_0 ||
8222                             insn->off != 0 || insn->imm != 0) {
8223                                 verbose(env, "BPF_NEG uses reserved fields\n");
8224                                 return -EINVAL;
8225                         }
8226                 } else {
8227                         if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
8228                             (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
8229                             BPF_CLASS(insn->code) == BPF_ALU64) {
8230                                 verbose(env, "BPF_END uses reserved fields\n");
8231                                 return -EINVAL;
8232                         }
8233                 }
8234
8235                 /* check src operand */
8236                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
8237                 if (err)
8238                         return err;
8239
8240                 if (is_pointer_value(env, insn->dst_reg)) {
8241                         verbose(env, "R%d pointer arithmetic prohibited\n",
8242                                 insn->dst_reg);
8243                         return -EACCES;
8244                 }
8245
8246                 /* check dest operand */
8247                 err = check_reg_arg(env, insn->dst_reg, DST_OP);
8248                 if (err)
8249                         return err;
8250
8251         } else if (opcode == BPF_MOV) {
8252
8253                 if (BPF_SRC(insn->code) == BPF_X) {
8254                         if (insn->imm != 0 || insn->off != 0) {
8255                                 verbose(env, "BPF_MOV uses reserved fields\n");
8256                                 return -EINVAL;
8257                         }
8258
8259                         /* check src operand */
8260                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
8261                         if (err)
8262                                 return err;
8263                 } else {
8264                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
8265                                 verbose(env, "BPF_MOV uses reserved fields\n");
8266                                 return -EINVAL;
8267                         }
8268                 }
8269
8270                 /* check dest operand, mark as required later */
8271                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
8272                 if (err)
8273                         return err;
8274
8275                 if (BPF_SRC(insn->code) == BPF_X) {
8276                         struct bpf_reg_state *src_reg = regs + insn->src_reg;
8277                         struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
8278
8279                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
8280                                 /* case: R1 = R2
8281                                  * copy register state to dest reg
8282                                  */
8283                                 if (src_reg->type == SCALAR_VALUE && !src_reg->id)
8284                                         /* Assign src and dst registers the same ID
8285                                          * that will be used by find_equal_scalars()
8286                                          * to propagate min/max range.
8287                                          */
8288                                         src_reg->id = ++env->id_gen;
8289                                 *dst_reg = *src_reg;
8290                                 dst_reg->live |= REG_LIVE_WRITTEN;
8291                                 dst_reg->subreg_def = DEF_NOT_SUBREG;
8292                         } else {
8293                                 /* R1 = (u32) R2 */
8294                                 if (is_pointer_value(env, insn->src_reg)) {
8295                                         verbose(env,
8296                                                 "R%d partial copy of pointer\n",
8297                                                 insn->src_reg);
8298                                         return -EACCES;
8299                                 } else if (src_reg->type == SCALAR_VALUE) {
8300                                         *dst_reg = *src_reg;
8301                                         /* Make sure ID is cleared otherwise
8302                                          * dst_reg min/max could be incorrectly
8303                                          * propagated into src_reg by find_equal_scalars()
8304                                          */
8305                                         dst_reg->id = 0;
8306                                         dst_reg->live |= REG_LIVE_WRITTEN;
8307                                         dst_reg->subreg_def = env->insn_idx + 1;
8308                                 } else {
8309                                         mark_reg_unknown(env, regs,
8310                                                          insn->dst_reg);
8311                                 }
8312                                 zext_32_to_64(dst_reg);
8313                         }
8314                 } else {
8315                         /* case: R = imm
8316                          * remember the value we stored into this reg
8317                          */
8318                         /* clear any state __mark_reg_known doesn't set */
8319                         mark_reg_unknown(env, regs, insn->dst_reg);
8320                         regs[insn->dst_reg].type = SCALAR_VALUE;
8321                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
8322                                 __mark_reg_known(regs + insn->dst_reg,
8323                                                  insn->imm);
8324                         } else {
8325                                 __mark_reg_known(regs + insn->dst_reg,
8326                                                  (u32)insn->imm);
8327                         }
8328                 }
8329
8330         } else if (opcode > BPF_END) {
8331                 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
8332                 return -EINVAL;
8333
8334         } else {        /* all other ALU ops: and, sub, xor, add, ... */
8335
8336                 if (BPF_SRC(insn->code) == BPF_X) {
8337                         if (insn->imm != 0 || insn->off != 0) {
8338                                 verbose(env, "BPF_ALU uses reserved fields\n");
8339                                 return -EINVAL;
8340                         }
8341                         /* check src1 operand */
8342                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
8343                         if (err)
8344                                 return err;
8345                 } else {
8346                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
8347                                 verbose(env, "BPF_ALU uses reserved fields\n");
8348                                 return -EINVAL;
8349                         }
8350                 }
8351
8352                 /* check src2 operand */
8353                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
8354                 if (err)
8355                         return err;
8356
8357                 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
8358                     BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
8359                         verbose(env, "div by zero\n");
8360                         return -EINVAL;
8361                 }
8362
8363                 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
8364                      opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
8365                         int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
8366
8367                         if (insn->imm < 0 || insn->imm >= size) {
8368                                 verbose(env, "invalid shift %d\n", insn->imm);
8369                                 return -EINVAL;
8370                         }
8371                 }
8372
8373                 /* check dest operand */
8374                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
8375                 if (err)
8376                         return err;
8377
8378                 return adjust_reg_min_max_vals(env, insn);
8379         }
8380
8381         return 0;
8382 }
8383
8384 static void __find_good_pkt_pointers(struct bpf_func_state *state,
8385                                      struct bpf_reg_state *dst_reg,
8386                                      enum bpf_reg_type type, int new_range)
8387 {
8388         struct bpf_reg_state *reg;
8389         int i;
8390
8391         for (i = 0; i < MAX_BPF_REG; i++) {
8392                 reg = &state->regs[i];
8393                 if (reg->type == type && reg->id == dst_reg->id)
8394                         /* keep the maximum range already checked */
8395                         reg->range = max(reg->range, new_range);
8396         }
8397
8398         bpf_for_each_spilled_reg(i, state, reg) {
8399                 if (!reg)
8400                         continue;
8401                 if (reg->type == type && reg->id == dst_reg->id)
8402                         reg->range = max(reg->range, new_range);
8403         }
8404 }
8405
8406 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
8407                                    struct bpf_reg_state *dst_reg,
8408                                    enum bpf_reg_type type,
8409                                    bool range_right_open)
8410 {
8411         int new_range, i;
8412
8413         if (dst_reg->off < 0 ||
8414             (dst_reg->off == 0 && range_right_open))
8415                 /* This doesn't give us any range */
8416                 return;
8417
8418         if (dst_reg->umax_value > MAX_PACKET_OFF ||
8419             dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
8420                 /* Risk of overflow.  For instance, ptr + (1<<63) may be less
8421                  * than pkt_end, but that's because it's also less than pkt.
8422                  */
8423                 return;
8424
8425         new_range = dst_reg->off;
8426         if (range_right_open)
8427                 new_range++;
8428
8429         /* Examples for register markings:
8430          *
8431          * pkt_data in dst register:
8432          *
8433          *   r2 = r3;
8434          *   r2 += 8;
8435          *   if (r2 > pkt_end) goto <handle exception>
8436          *   <access okay>
8437          *
8438          *   r2 = r3;
8439          *   r2 += 8;
8440          *   if (r2 < pkt_end) goto <access okay>
8441          *   <handle exception>
8442          *
8443          *   Where:
8444          *     r2 == dst_reg, pkt_end == src_reg
8445          *     r2=pkt(id=n,off=8,r=0)
8446          *     r3=pkt(id=n,off=0,r=0)
8447          *
8448          * pkt_data in src register:
8449          *
8450          *   r2 = r3;
8451          *   r2 += 8;
8452          *   if (pkt_end >= r2) goto <access okay>
8453          *   <handle exception>
8454          *
8455          *   r2 = r3;
8456          *   r2 += 8;
8457          *   if (pkt_end <= r2) goto <handle exception>
8458          *   <access okay>
8459          *
8460          *   Where:
8461          *     pkt_end == dst_reg, r2 == src_reg
8462          *     r2=pkt(id=n,off=8,r=0)
8463          *     r3=pkt(id=n,off=0,r=0)
8464          *
8465          * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
8466          * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
8467          * and [r3, r3 + 8-1) respectively is safe to access depending on
8468          * the check.
8469          */
8470
8471         /* If our ids match, then we must have the same max_value.  And we
8472          * don't care about the other reg's fixed offset, since if it's too big
8473          * the range won't allow anything.
8474          * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
8475          */
8476         for (i = 0; i <= vstate->curframe; i++)
8477                 __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
8478                                          new_range);
8479 }
8480
8481 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
8482 {
8483         struct tnum subreg = tnum_subreg(reg->var_off);
8484         s32 sval = (s32)val;
8485
8486         switch (opcode) {
8487         case BPF_JEQ:
8488                 if (tnum_is_const(subreg))
8489                         return !!tnum_equals_const(subreg, val);
8490                 break;
8491         case BPF_JNE:
8492                 if (tnum_is_const(subreg))
8493                         return !tnum_equals_const(subreg, val);
8494                 break;
8495         case BPF_JSET:
8496                 if ((~subreg.mask & subreg.value) & val)
8497                         return 1;
8498                 if (!((subreg.mask | subreg.value) & val))
8499                         return 0;
8500                 break;
8501         case BPF_JGT:
8502                 if (reg->u32_min_value > val)
8503                         return 1;
8504                 else if (reg->u32_max_value <= val)
8505                         return 0;
8506                 break;
8507         case BPF_JSGT:
8508                 if (reg->s32_min_value > sval)
8509                         return 1;
8510                 else if (reg->s32_max_value <= sval)
8511                         return 0;
8512                 break;
8513         case BPF_JLT:
8514                 if (reg->u32_max_value < val)
8515                         return 1;
8516                 else if (reg->u32_min_value >= val)
8517                         return 0;
8518                 break;
8519         case BPF_JSLT:
8520                 if (reg->s32_max_value < sval)
8521                         return 1;
8522                 else if (reg->s32_min_value >= sval)
8523                         return 0;
8524                 break;
8525         case BPF_JGE:
8526                 if (reg->u32_min_value >= val)
8527                         return 1;
8528                 else if (reg->u32_max_value < val)
8529                         return 0;
8530                 break;
8531         case BPF_JSGE:
8532                 if (reg->s32_min_value >= sval)
8533                         return 1;
8534                 else if (reg->s32_max_value < sval)
8535                         return 0;
8536                 break;
8537         case BPF_JLE:
8538                 if (reg->u32_max_value <= val)
8539                         return 1;
8540                 else if (reg->u32_min_value > val)
8541                         return 0;
8542                 break;
8543         case BPF_JSLE:
8544                 if (reg->s32_max_value <= sval)
8545                         return 1;
8546                 else if (reg->s32_min_value > sval)
8547                         return 0;
8548                 break;
8549         }
8550
8551         return -1;
8552 }
8553
8554
8555 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
8556 {
8557         s64 sval = (s64)val;
8558
8559         switch (opcode) {
8560         case BPF_JEQ:
8561                 if (tnum_is_const(reg->var_off))
8562                         return !!tnum_equals_const(reg->var_off, val);
8563                 break;
8564         case BPF_JNE:
8565                 if (tnum_is_const(reg->var_off))
8566                         return !tnum_equals_const(reg->var_off, val);
8567                 break;
8568         case BPF_JSET:
8569                 if ((~reg->var_off.mask & reg->var_off.value) & val)
8570                         return 1;
8571                 if (!((reg->var_off.mask | reg->var_off.value) & val))
8572                         return 0;
8573                 break;
8574         case BPF_JGT:
8575                 if (reg->umin_value > val)
8576                         return 1;
8577                 else if (reg->umax_value <= val)
8578                         return 0;
8579                 break;
8580         case BPF_JSGT:
8581                 if (reg->smin_value > sval)
8582                         return 1;
8583                 else if (reg->smax_value <= sval)
8584                         return 0;
8585                 break;
8586         case BPF_JLT:
8587                 if (reg->umax_value < val)
8588                         return 1;
8589                 else if (reg->umin_value >= val)
8590                         return 0;
8591                 break;
8592         case BPF_JSLT:
8593                 if (reg->smax_value < sval)
8594                         return 1;
8595                 else if (reg->smin_value >= sval)
8596                         return 0;
8597                 break;
8598         case BPF_JGE:
8599                 if (reg->umin_value >= val)
8600                         return 1;
8601                 else if (reg->umax_value < val)
8602                         return 0;
8603                 break;
8604         case BPF_JSGE:
8605                 if (reg->smin_value >= sval)
8606                         return 1;
8607                 else if (reg->smax_value < sval)
8608                         return 0;
8609                 break;
8610         case BPF_JLE:
8611                 if (reg->umax_value <= val)
8612                         return 1;
8613                 else if (reg->umin_value > val)
8614                         return 0;
8615                 break;
8616         case BPF_JSLE:
8617                 if (reg->smax_value <= sval)
8618                         return 1;
8619                 else if (reg->smin_value > sval)
8620                         return 0;
8621                 break;
8622         }
8623
8624         return -1;
8625 }
8626
8627 /* compute branch direction of the expression "if (reg opcode val) goto target;"
8628  * and return:
8629  *  1 - branch will be taken and "goto target" will be executed
8630  *  0 - branch will not be taken and fall-through to next insn
8631  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
8632  *      range [0,10]
8633  */
8634 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
8635                            bool is_jmp32)
8636 {
8637         if (__is_pointer_value(false, reg)) {
8638                 if (!reg_type_not_null(reg->type))
8639                         return -1;
8640
8641                 /* If pointer is valid tests against zero will fail so we can
8642                  * use this to direct branch taken.
8643                  */
8644                 if (val != 0)
8645                         return -1;
8646
8647                 switch (opcode) {
8648                 case BPF_JEQ:
8649                         return 0;
8650                 case BPF_JNE:
8651                         return 1;
8652                 default:
8653                         return -1;
8654                 }
8655         }
8656
8657         if (is_jmp32)
8658                 return is_branch32_taken(reg, val, opcode);
8659         return is_branch64_taken(reg, val, opcode);
8660 }
8661
8662 static int flip_opcode(u32 opcode)
8663 {
8664         /* How can we transform "a <op> b" into "b <op> a"? */
8665         static const u8 opcode_flip[16] = {
8666                 /* these stay the same */
8667                 [BPF_JEQ  >> 4] = BPF_JEQ,
8668                 [BPF_JNE  >> 4] = BPF_JNE,
8669                 [BPF_JSET >> 4] = BPF_JSET,
8670                 /* these swap "lesser" and "greater" (L and G in the opcodes) */
8671                 [BPF_JGE  >> 4] = BPF_JLE,
8672                 [BPF_JGT  >> 4] = BPF_JLT,
8673                 [BPF_JLE  >> 4] = BPF_JGE,
8674                 [BPF_JLT  >> 4] = BPF_JGT,
8675                 [BPF_JSGE >> 4] = BPF_JSLE,
8676                 [BPF_JSGT >> 4] = BPF_JSLT,
8677                 [BPF_JSLE >> 4] = BPF_JSGE,
8678                 [BPF_JSLT >> 4] = BPF_JSGT
8679         };
8680         return opcode_flip[opcode >> 4];
8681 }
8682
8683 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
8684                                    struct bpf_reg_state *src_reg,
8685                                    u8 opcode)
8686 {
8687         struct bpf_reg_state *pkt;
8688
8689         if (src_reg->type == PTR_TO_PACKET_END) {
8690                 pkt = dst_reg;
8691         } else if (dst_reg->type == PTR_TO_PACKET_END) {
8692                 pkt = src_reg;
8693                 opcode = flip_opcode(opcode);
8694         } else {
8695                 return -1;
8696         }
8697
8698         if (pkt->range >= 0)
8699                 return -1;
8700
8701         switch (opcode) {
8702         case BPF_JLE:
8703                 /* pkt <= pkt_end */
8704                 fallthrough;
8705         case BPF_JGT:
8706                 /* pkt > pkt_end */
8707                 if (pkt->range == BEYOND_PKT_END)
8708                         /* pkt has at last one extra byte beyond pkt_end */
8709                         return opcode == BPF_JGT;
8710                 break;
8711         case BPF_JLT:
8712                 /* pkt < pkt_end */
8713                 fallthrough;
8714         case BPF_JGE:
8715                 /* pkt >= pkt_end */
8716                 if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
8717                         return opcode == BPF_JGE;
8718                 break;
8719         }
8720         return -1;
8721 }
8722
8723 /* Adjusts the register min/max values in the case that the dst_reg is the
8724  * variable register that we are working on, and src_reg is a constant or we're
8725  * simply doing a BPF_K check.
8726  * In JEQ/JNE cases we also adjust the var_off values.
8727  */
8728 static void reg_set_min_max(struct bpf_reg_state *true_reg,
8729                             struct bpf_reg_state *false_reg,
8730                             u64 val, u32 val32,
8731                             u8 opcode, bool is_jmp32)
8732 {
8733         struct tnum false_32off = tnum_subreg(false_reg->var_off);
8734         struct tnum false_64off = false_reg->var_off;
8735         struct tnum true_32off = tnum_subreg(true_reg->var_off);
8736         struct tnum true_64off = true_reg->var_off;
8737         s64 sval = (s64)val;
8738         s32 sval32 = (s32)val32;
8739
8740         /* If the dst_reg is a pointer, we can't learn anything about its
8741          * variable offset from the compare (unless src_reg were a pointer into
8742          * the same object, but we don't bother with that.
8743          * Since false_reg and true_reg have the same type by construction, we
8744          * only need to check one of them for pointerness.
8745          */
8746         if (__is_pointer_value(false, false_reg))
8747                 return;
8748
8749         switch (opcode) {
8750         case BPF_JEQ:
8751         case BPF_JNE:
8752         {
8753                 struct bpf_reg_state *reg =
8754                         opcode == BPF_JEQ ? true_reg : false_reg;
8755
8756                 /* JEQ/JNE comparison doesn't change the register equivalence.
8757                  * r1 = r2;
8758                  * if (r1 == 42) goto label;
8759                  * ...
8760                  * label: // here both r1 and r2 are known to be 42.
8761                  *
8762                  * Hence when marking register as known preserve it's ID.
8763                  */
8764                 if (is_jmp32)
8765                         __mark_reg32_known(reg, val32);
8766                 else
8767                         ___mark_reg_known(reg, val);
8768                 break;
8769         }
8770         case BPF_JSET:
8771                 if (is_jmp32) {
8772                         false_32off = tnum_and(false_32off, tnum_const(~val32));
8773                         if (is_power_of_2(val32))
8774                                 true_32off = tnum_or(true_32off,
8775                                                      tnum_const(val32));
8776                 } else {
8777                         false_64off = tnum_and(false_64off, tnum_const(~val));
8778                         if (is_power_of_2(val))
8779                                 true_64off = tnum_or(true_64off,
8780                                                      tnum_const(val));
8781                 }
8782                 break;
8783         case BPF_JGE:
8784         case BPF_JGT:
8785         {
8786                 if (is_jmp32) {
8787                         u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
8788                         u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
8789
8790                         false_reg->u32_max_value = min(false_reg->u32_max_value,
8791                                                        false_umax);
8792                         true_reg->u32_min_value = max(true_reg->u32_min_value,
8793                                                       true_umin);
8794                 } else {
8795                         u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
8796                         u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
8797
8798                         false_reg->umax_value = min(false_reg->umax_value, false_umax);
8799                         true_reg->umin_value = max(true_reg->umin_value, true_umin);
8800                 }
8801                 break;
8802         }
8803         case BPF_JSGE:
8804         case BPF_JSGT:
8805         {
8806                 if (is_jmp32) {
8807                         s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
8808                         s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
8809
8810                         false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
8811                         true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
8812                 } else {
8813                         s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
8814                         s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
8815
8816                         false_reg->smax_value = min(false_reg->smax_value, false_smax);
8817                         true_reg->smin_value = max(true_reg->smin_value, true_smin);
8818                 }
8819                 break;
8820         }
8821         case BPF_JLE:
8822         case BPF_JLT:
8823         {
8824                 if (is_jmp32) {
8825                         u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
8826                         u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
8827
8828                         false_reg->u32_min_value = max(false_reg->u32_min_value,
8829                                                        false_umin);
8830                         true_reg->u32_max_value = min(true_reg->u32_max_value,
8831                                                       true_umax);
8832                 } else {
8833                         u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
8834                         u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
8835
8836                         false_reg->umin_value = max(false_reg->umin_value, false_umin);
8837                         true_reg->umax_value = min(true_reg->umax_value, true_umax);
8838                 }
8839                 break;
8840         }
8841         case BPF_JSLE:
8842         case BPF_JSLT:
8843         {
8844                 if (is_jmp32) {
8845                         s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
8846                         s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
8847
8848                         false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
8849                         true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
8850                 } else {
8851                         s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
8852                         s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
8853
8854                         false_reg->smin_value = max(false_reg->smin_value, false_smin);
8855                         true_reg->smax_value = min(true_reg->smax_value, true_smax);
8856                 }
8857                 break;
8858         }
8859         default:
8860                 return;
8861         }
8862
8863         if (is_jmp32) {
8864                 false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
8865                                              tnum_subreg(false_32off));
8866                 true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
8867                                             tnum_subreg(true_32off));
8868                 __reg_combine_32_into_64(false_reg);
8869                 __reg_combine_32_into_64(true_reg);
8870         } else {
8871                 false_reg->var_off = false_64off;
8872                 true_reg->var_off = true_64off;
8873                 __reg_combine_64_into_32(false_reg);
8874                 __reg_combine_64_into_32(true_reg);
8875         }
8876 }
8877
8878 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
8879  * the variable reg.
8880  */
8881 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
8882                                 struct bpf_reg_state *false_reg,
8883                                 u64 val, u32 val32,
8884                                 u8 opcode, bool is_jmp32)
8885 {
8886         opcode = flip_opcode(opcode);
8887         /* This uses zero as "not present in table"; luckily the zero opcode,
8888          * BPF_JA, can't get here.
8889          */
8890         if (opcode)
8891                 reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
8892 }
8893
8894 /* Regs are known to be equal, so intersect their min/max/var_off */
8895 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
8896                                   struct bpf_reg_state *dst_reg)
8897 {
8898         src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
8899                                                         dst_reg->umin_value);
8900         src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
8901                                                         dst_reg->umax_value);
8902         src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
8903                                                         dst_reg->smin_value);
8904         src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
8905                                                         dst_reg->smax_value);
8906         src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
8907                                                              dst_reg->var_off);
8908         /* We might have learned new bounds from the var_off. */
8909         __update_reg_bounds(src_reg);
8910         __update_reg_bounds(dst_reg);
8911         /* We might have learned something about the sign bit. */
8912         __reg_deduce_bounds(src_reg);
8913         __reg_deduce_bounds(dst_reg);
8914         /* We might have learned some bits from the bounds. */
8915         __reg_bound_offset(src_reg);
8916         __reg_bound_offset(dst_reg);
8917         /* Intersecting with the old var_off might have improved our bounds
8918          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
8919          * then new var_off is (0; 0x7f...fc) which improves our umax.
8920          */
8921         __update_reg_bounds(src_reg);
8922         __update_reg_bounds(dst_reg);
8923 }
8924
8925 static void reg_combine_min_max(struct bpf_reg_state *true_src,
8926                                 struct bpf_reg_state *true_dst,
8927                                 struct bpf_reg_state *false_src,
8928                                 struct bpf_reg_state *false_dst,
8929                                 u8 opcode)
8930 {
8931         switch (opcode) {
8932         case BPF_JEQ:
8933                 __reg_combine_min_max(true_src, true_dst);
8934                 break;
8935         case BPF_JNE:
8936                 __reg_combine_min_max(false_src, false_dst);
8937                 break;
8938         }
8939 }
8940
8941 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
8942                                  struct bpf_reg_state *reg, u32 id,
8943                                  bool is_null)
8944 {
8945         if (reg_type_may_be_null(reg->type) && reg->id == id &&
8946             !WARN_ON_ONCE(!reg->id)) {
8947                 /* Old offset (both fixed and variable parts) should
8948                  * have been known-zero, because we don't allow pointer
8949                  * arithmetic on pointers that might be NULL.
8950                  */
8951                 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
8952                                  !tnum_equals_const(reg->var_off, 0) ||
8953                                  reg->off)) {
8954                         __mark_reg_known_zero(reg);
8955                         reg->off = 0;
8956                 }
8957                 if (is_null) {
8958                         reg->type = SCALAR_VALUE;
8959                         /* We don't need id and ref_obj_id from this point
8960                          * onwards anymore, thus we should better reset it,
8961                          * so that state pruning has chances to take effect.
8962                          */
8963                         reg->id = 0;
8964                         reg->ref_obj_id = 0;
8965
8966                         return;
8967                 }
8968
8969                 mark_ptr_not_null_reg(reg);
8970
8971                 if (!reg_may_point_to_spin_lock(reg)) {
8972                         /* For not-NULL ptr, reg->ref_obj_id will be reset
8973                          * in release_reg_references().
8974                          *
8975                          * reg->id is still used by spin_lock ptr. Other
8976                          * than spin_lock ptr type, reg->id can be reset.
8977                          */
8978                         reg->id = 0;
8979                 }
8980         }
8981 }
8982
8983 static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
8984                                     bool is_null)
8985 {
8986         struct bpf_reg_state *reg;
8987         int i;
8988
8989         for (i = 0; i < MAX_BPF_REG; i++)
8990                 mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
8991
8992         bpf_for_each_spilled_reg(i, state, reg) {
8993                 if (!reg)
8994                         continue;
8995                 mark_ptr_or_null_reg(state, reg, id, is_null);
8996         }
8997 }
8998
8999 /* The logic is similar to find_good_pkt_pointers(), both could eventually
9000  * be folded together at some point.
9001  */
9002 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
9003                                   bool is_null)
9004 {
9005         struct bpf_func_state *state = vstate->frame[vstate->curframe];
9006         struct bpf_reg_state *regs = state->regs;
9007         u32 ref_obj_id = regs[regno].ref_obj_id;
9008         u32 id = regs[regno].id;
9009         int i;
9010
9011         if (ref_obj_id && ref_obj_id == id && is_null)
9012                 /* regs[regno] is in the " == NULL" branch.
9013                  * No one could have freed the reference state before
9014                  * doing the NULL check.
9015                  */
9016                 WARN_ON_ONCE(release_reference_state(state, id));
9017
9018         for (i = 0; i <= vstate->curframe; i++)
9019                 __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
9020 }
9021
9022 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
9023                                    struct bpf_reg_state *dst_reg,
9024                                    struct bpf_reg_state *src_reg,
9025                                    struct bpf_verifier_state *this_branch,
9026                                    struct bpf_verifier_state *other_branch)
9027 {
9028         if (BPF_SRC(insn->code) != BPF_X)
9029                 return false;
9030
9031         /* Pointers are always 64-bit. */
9032         if (BPF_CLASS(insn->code) == BPF_JMP32)
9033                 return false;
9034
9035         switch (BPF_OP(insn->code)) {
9036         case BPF_JGT:
9037                 if ((dst_reg->type == PTR_TO_PACKET &&
9038                      src_reg->type == PTR_TO_PACKET_END) ||
9039                     (dst_reg->type == PTR_TO_PACKET_META &&
9040                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9041                         /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
9042                         find_good_pkt_pointers(this_branch, dst_reg,
9043                                                dst_reg->type, false);
9044                         mark_pkt_end(other_branch, insn->dst_reg, true);
9045                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9046                             src_reg->type == PTR_TO_PACKET) ||
9047                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9048                             src_reg->type == PTR_TO_PACKET_META)) {
9049                         /* pkt_end > pkt_data', pkt_data > pkt_meta' */
9050                         find_good_pkt_pointers(other_branch, src_reg,
9051                                                src_reg->type, true);
9052                         mark_pkt_end(this_branch, insn->src_reg, false);
9053                 } else {
9054                         return false;
9055                 }
9056                 break;
9057         case BPF_JLT:
9058                 if ((dst_reg->type == PTR_TO_PACKET &&
9059                      src_reg->type == PTR_TO_PACKET_END) ||
9060                     (dst_reg->type == PTR_TO_PACKET_META &&
9061                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9062                         /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
9063                         find_good_pkt_pointers(other_branch, dst_reg,
9064                                                dst_reg->type, true);
9065                         mark_pkt_end(this_branch, insn->dst_reg, false);
9066                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9067                             src_reg->type == PTR_TO_PACKET) ||
9068                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9069                             src_reg->type == PTR_TO_PACKET_META)) {
9070                         /* pkt_end < pkt_data', pkt_data > pkt_meta' */
9071                         find_good_pkt_pointers(this_branch, src_reg,
9072                                                src_reg->type, false);
9073                         mark_pkt_end(other_branch, insn->src_reg, true);
9074                 } else {
9075                         return false;
9076                 }
9077                 break;
9078         case BPF_JGE:
9079                 if ((dst_reg->type == PTR_TO_PACKET &&
9080                      src_reg->type == PTR_TO_PACKET_END) ||
9081                     (dst_reg->type == PTR_TO_PACKET_META &&
9082                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9083                         /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
9084                         find_good_pkt_pointers(this_branch, dst_reg,
9085                                                dst_reg->type, true);
9086                         mark_pkt_end(other_branch, insn->dst_reg, false);
9087                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9088                             src_reg->type == PTR_TO_PACKET) ||
9089                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9090                             src_reg->type == PTR_TO_PACKET_META)) {
9091                         /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
9092                         find_good_pkt_pointers(other_branch, src_reg,
9093                                                src_reg->type, false);
9094                         mark_pkt_end(this_branch, insn->src_reg, true);
9095                 } else {
9096                         return false;
9097                 }
9098                 break;
9099         case BPF_JLE:
9100                 if ((dst_reg->type == PTR_TO_PACKET &&
9101                      src_reg->type == PTR_TO_PACKET_END) ||
9102                     (dst_reg->type == PTR_TO_PACKET_META &&
9103                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
9104                         /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
9105                         find_good_pkt_pointers(other_branch, dst_reg,
9106                                                dst_reg->type, false);
9107                         mark_pkt_end(this_branch, insn->dst_reg, true);
9108                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
9109                             src_reg->type == PTR_TO_PACKET) ||
9110                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
9111                             src_reg->type == PTR_TO_PACKET_META)) {
9112                         /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
9113                         find_good_pkt_pointers(this_branch, src_reg,
9114                                                src_reg->type, true);
9115                         mark_pkt_end(other_branch, insn->src_reg, false);
9116                 } else {
9117                         return false;
9118                 }
9119                 break;
9120         default:
9121                 return false;
9122         }
9123
9124         return true;
9125 }
9126
9127 static void find_equal_scalars(struct bpf_verifier_state *vstate,
9128                                struct bpf_reg_state *known_reg)
9129 {
9130         struct bpf_func_state *state;
9131         struct bpf_reg_state *reg;
9132         int i, j;
9133
9134         for (i = 0; i <= vstate->curframe; i++) {
9135                 state = vstate->frame[i];
9136                 for (j = 0; j < MAX_BPF_REG; j++) {
9137                         reg = &state->regs[j];
9138                         if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
9139                                 *reg = *known_reg;
9140                 }
9141
9142                 bpf_for_each_spilled_reg(j, state, reg) {
9143                         if (!reg)
9144                                 continue;
9145                         if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
9146                                 *reg = *known_reg;
9147                 }
9148         }
9149 }
9150
9151 static int check_cond_jmp_op(struct bpf_verifier_env *env,
9152                              struct bpf_insn *insn, int *insn_idx)
9153 {
9154         struct bpf_verifier_state *this_branch = env->cur_state;
9155         struct bpf_verifier_state *other_branch;
9156         struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
9157         struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
9158         u8 opcode = BPF_OP(insn->code);
9159         bool is_jmp32;
9160         int pred = -1;
9161         int err;
9162
9163         /* Only conditional jumps are expected to reach here. */
9164         if (opcode == BPF_JA || opcode > BPF_JSLE) {
9165                 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
9166                 return -EINVAL;
9167         }
9168
9169         if (BPF_SRC(insn->code) == BPF_X) {
9170                 if (insn->imm != 0) {
9171                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
9172                         return -EINVAL;
9173                 }
9174
9175                 /* check src1 operand */
9176                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
9177                 if (err)
9178                         return err;
9179
9180                 if (is_pointer_value(env, insn->src_reg)) {
9181                         verbose(env, "R%d pointer comparison prohibited\n",
9182                                 insn->src_reg);
9183                         return -EACCES;
9184                 }
9185                 src_reg = &regs[insn->src_reg];
9186         } else {
9187                 if (insn->src_reg != BPF_REG_0) {
9188                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
9189                         return -EINVAL;
9190                 }
9191         }
9192
9193         /* check src2 operand */
9194         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9195         if (err)
9196                 return err;
9197
9198         dst_reg = &regs[insn->dst_reg];
9199         is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
9200
9201         if (BPF_SRC(insn->code) == BPF_K) {
9202                 pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
9203         } else if (src_reg->type == SCALAR_VALUE &&
9204                    is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
9205                 pred = is_branch_taken(dst_reg,
9206                                        tnum_subreg(src_reg->var_off).value,
9207                                        opcode,
9208                                        is_jmp32);
9209         } else if (src_reg->type == SCALAR_VALUE &&
9210                    !is_jmp32 && tnum_is_const(src_reg->var_off)) {
9211                 pred = is_branch_taken(dst_reg,
9212                                        src_reg->var_off.value,
9213                                        opcode,
9214                                        is_jmp32);
9215         } else if (reg_is_pkt_pointer_any(dst_reg) &&
9216                    reg_is_pkt_pointer_any(src_reg) &&
9217                    !is_jmp32) {
9218                 pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
9219         }
9220
9221         if (pred >= 0) {
9222                 /* If we get here with a dst_reg pointer type it is because
9223                  * above is_branch_taken() special cased the 0 comparison.
9224                  */
9225                 if (!__is_pointer_value(false, dst_reg))
9226                         err = mark_chain_precision(env, insn->dst_reg);
9227                 if (BPF_SRC(insn->code) == BPF_X && !err &&
9228                     !__is_pointer_value(false, src_reg))
9229                         err = mark_chain_precision(env, insn->src_reg);
9230                 if (err)
9231                         return err;
9232         }
9233
9234         if (pred == 1) {
9235                 /* Only follow the goto, ignore fall-through. If needed, push
9236                  * the fall-through branch for simulation under speculative
9237                  * execution.
9238                  */
9239                 if (!env->bypass_spec_v1 &&
9240                     !sanitize_speculative_path(env, insn, *insn_idx + 1,
9241                                                *insn_idx))
9242                         return -EFAULT;
9243                 *insn_idx += insn->off;
9244                 return 0;
9245         } else if (pred == 0) {
9246                 /* Only follow the fall-through branch, since that's where the
9247                  * program will go. If needed, push the goto branch for
9248                  * simulation under speculative execution.
9249                  */
9250                 if (!env->bypass_spec_v1 &&
9251                     !sanitize_speculative_path(env, insn,
9252                                                *insn_idx + insn->off + 1,
9253                                                *insn_idx))
9254                         return -EFAULT;
9255                 return 0;
9256         }
9257
9258         other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
9259                                   false);
9260         if (!other_branch)
9261                 return -EFAULT;
9262         other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
9263
9264         /* detect if we are comparing against a constant value so we can adjust
9265          * our min/max values for our dst register.
9266          * this is only legit if both are scalars (or pointers to the same
9267          * object, I suppose, but we don't support that right now), because
9268          * otherwise the different base pointers mean the offsets aren't
9269          * comparable.
9270          */
9271         if (BPF_SRC(insn->code) == BPF_X) {
9272                 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
9273
9274                 if (dst_reg->type == SCALAR_VALUE &&
9275                     src_reg->type == SCALAR_VALUE) {
9276                         if (tnum_is_const(src_reg->var_off) ||
9277                             (is_jmp32 &&
9278                              tnum_is_const(tnum_subreg(src_reg->var_off))))
9279                                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
9280                                                 dst_reg,
9281                                                 src_reg->var_off.value,
9282                                                 tnum_subreg(src_reg->var_off).value,
9283                                                 opcode, is_jmp32);
9284                         else if (tnum_is_const(dst_reg->var_off) ||
9285                                  (is_jmp32 &&
9286                                   tnum_is_const(tnum_subreg(dst_reg->var_off))))
9287                                 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
9288                                                     src_reg,
9289                                                     dst_reg->var_off.value,
9290                                                     tnum_subreg(dst_reg->var_off).value,
9291                                                     opcode, is_jmp32);
9292                         else if (!is_jmp32 &&
9293                                  (opcode == BPF_JEQ || opcode == BPF_JNE))
9294                                 /* Comparing for equality, we can combine knowledge */
9295                                 reg_combine_min_max(&other_branch_regs[insn->src_reg],
9296                                                     &other_branch_regs[insn->dst_reg],
9297                                                     src_reg, dst_reg, opcode);
9298                         if (src_reg->id &&
9299                             !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
9300                                 find_equal_scalars(this_branch, src_reg);
9301                                 find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
9302                         }
9303
9304                 }
9305         } else if (dst_reg->type == SCALAR_VALUE) {
9306                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
9307                                         dst_reg, insn->imm, (u32)insn->imm,
9308                                         opcode, is_jmp32);
9309         }
9310
9311         if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
9312             !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
9313                 find_equal_scalars(this_branch, dst_reg);
9314                 find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
9315         }
9316
9317         /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
9318          * NOTE: these optimizations below are related with pointer comparison
9319          *       which will never be JMP32.
9320          */
9321         if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
9322             insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
9323             reg_type_may_be_null(dst_reg->type)) {
9324                 /* Mark all identical registers in each branch as either
9325                  * safe or unknown depending R == 0 or R != 0 conditional.
9326                  */
9327                 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
9328                                       opcode == BPF_JNE);
9329                 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
9330                                       opcode == BPF_JEQ);
9331         } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
9332                                            this_branch, other_branch) &&
9333                    is_pointer_value(env, insn->dst_reg)) {
9334                 verbose(env, "R%d pointer comparison prohibited\n",
9335                         insn->dst_reg);
9336                 return -EACCES;
9337         }
9338         if (env->log.level & BPF_LOG_LEVEL)
9339                 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
9340         return 0;
9341 }
9342
9343 /* verify BPF_LD_IMM64 instruction */
9344 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
9345 {
9346         struct bpf_insn_aux_data *aux = cur_aux(env);
9347         struct bpf_reg_state *regs = cur_regs(env);
9348         struct bpf_reg_state *dst_reg;
9349         struct bpf_map *map;
9350         int err;
9351
9352         if (BPF_SIZE(insn->code) != BPF_DW) {
9353                 verbose(env, "invalid BPF_LD_IMM insn\n");
9354                 return -EINVAL;
9355         }
9356         if (insn->off != 0) {
9357                 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
9358                 return -EINVAL;
9359         }
9360
9361         err = check_reg_arg(env, insn->dst_reg, DST_OP);
9362         if (err)
9363                 return err;
9364
9365         dst_reg = &regs[insn->dst_reg];
9366         if (insn->src_reg == 0) {
9367                 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
9368
9369                 dst_reg->type = SCALAR_VALUE;
9370                 __mark_reg_known(&regs[insn->dst_reg], imm);
9371                 return 0;
9372         }
9373
9374         if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
9375                 mark_reg_known_zero(env, regs, insn->dst_reg);
9376
9377                 dst_reg->type = aux->btf_var.reg_type;
9378                 switch (dst_reg->type) {
9379                 case PTR_TO_MEM:
9380                         dst_reg->mem_size = aux->btf_var.mem_size;
9381                         break;
9382                 case PTR_TO_BTF_ID:
9383                 case PTR_TO_PERCPU_BTF_ID:
9384                         dst_reg->btf = aux->btf_var.btf;
9385                         dst_reg->btf_id = aux->btf_var.btf_id;
9386                         break;
9387                 default:
9388                         verbose(env, "bpf verifier is misconfigured\n");
9389                         return -EFAULT;
9390                 }
9391                 return 0;
9392         }
9393
9394         if (insn->src_reg == BPF_PSEUDO_FUNC) {
9395                 struct bpf_prog_aux *aux = env->prog->aux;
9396                 u32 subprogno = find_subprog(env,
9397                                              env->insn_idx + insn->imm + 1);
9398
9399                 if (!aux->func_info) {
9400                         verbose(env, "missing btf func_info\n");
9401                         return -EINVAL;
9402                 }
9403                 if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
9404                         verbose(env, "callback function not static\n");
9405                         return -EINVAL;
9406                 }
9407
9408                 dst_reg->type = PTR_TO_FUNC;
9409                 dst_reg->subprogno = subprogno;
9410                 return 0;
9411         }
9412
9413         map = env->used_maps[aux->map_index];
9414         mark_reg_known_zero(env, regs, insn->dst_reg);
9415         dst_reg->map_ptr = map;
9416
9417         if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
9418             insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
9419                 dst_reg->type = PTR_TO_MAP_VALUE;
9420                 dst_reg->off = aux->map_off;
9421                 if (map_value_has_spin_lock(map))
9422                         dst_reg->id = ++env->id_gen;
9423         } else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
9424                    insn->src_reg == BPF_PSEUDO_MAP_IDX) {
9425                 dst_reg->type = CONST_PTR_TO_MAP;
9426         } else {
9427                 verbose(env, "bpf verifier is misconfigured\n");
9428                 return -EINVAL;
9429         }
9430
9431         return 0;
9432 }
9433
9434 static bool may_access_skb(enum bpf_prog_type type)
9435 {
9436         switch (type) {
9437         case BPF_PROG_TYPE_SOCKET_FILTER:
9438         case BPF_PROG_TYPE_SCHED_CLS:
9439         case BPF_PROG_TYPE_SCHED_ACT:
9440                 return true;
9441         default:
9442                 return false;
9443         }
9444 }
9445
9446 /* verify safety of LD_ABS|LD_IND instructions:
9447  * - they can only appear in the programs where ctx == skb
9448  * - since they are wrappers of function calls, they scratch R1-R5 registers,
9449  *   preserve R6-R9, and store return value into R0
9450  *
9451  * Implicit input:
9452  *   ctx == skb == R6 == CTX
9453  *
9454  * Explicit input:
9455  *   SRC == any register
9456  *   IMM == 32-bit immediate
9457  *
9458  * Output:
9459  *   R0 - 8/16/32-bit skb data converted to cpu endianness
9460  */
9461 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
9462 {
9463         struct bpf_reg_state *regs = cur_regs(env);
9464         static const int ctx_reg = BPF_REG_6;
9465         u8 mode = BPF_MODE(insn->code);
9466         int i, err;
9467
9468         if (!may_access_skb(resolve_prog_type(env->prog))) {
9469                 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
9470                 return -EINVAL;
9471         }
9472
9473         if (!env->ops->gen_ld_abs) {
9474                 verbose(env, "bpf verifier is misconfigured\n");
9475                 return -EINVAL;
9476         }
9477
9478         if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
9479             BPF_SIZE(insn->code) == BPF_DW ||
9480             (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
9481                 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
9482                 return -EINVAL;
9483         }
9484
9485         /* check whether implicit source operand (register R6) is readable */
9486         err = check_reg_arg(env, ctx_reg, SRC_OP);
9487         if (err)
9488                 return err;
9489
9490         /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
9491          * gen_ld_abs() may terminate the program at runtime, leading to
9492          * reference leak.
9493          */
9494         err = check_reference_leak(env);
9495         if (err) {
9496                 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
9497                 return err;
9498         }
9499
9500         if (env->cur_state->active_spin_lock) {
9501                 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
9502                 return -EINVAL;
9503         }
9504
9505         if (regs[ctx_reg].type != PTR_TO_CTX) {
9506                 verbose(env,
9507                         "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
9508                 return -EINVAL;
9509         }
9510
9511         if (mode == BPF_IND) {
9512                 /* check explicit source operand */
9513                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
9514                 if (err)
9515                         return err;
9516         }
9517
9518         err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
9519         if (err < 0)
9520                 return err;
9521
9522         /* reset caller saved regs to unreadable */
9523         for (i = 0; i < CALLER_SAVED_REGS; i++) {
9524                 mark_reg_not_init(env, regs, caller_saved[i]);
9525                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
9526         }
9527
9528         /* mark destination R0 register as readable, since it contains
9529          * the value fetched from the packet.
9530          * Already marked as written above.
9531          */
9532         mark_reg_unknown(env, regs, BPF_REG_0);
9533         /* ld_abs load up to 32-bit skb data. */
9534         regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
9535         return 0;
9536 }
9537
9538 static int check_return_code(struct bpf_verifier_env *env)
9539 {
9540         struct tnum enforce_attach_type_range = tnum_unknown;
9541         const struct bpf_prog *prog = env->prog;
9542         struct bpf_reg_state *reg;
9543         struct tnum range = tnum_range(0, 1);
9544         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
9545         int err;
9546         struct bpf_func_state *frame = env->cur_state->frame[0];
9547         const bool is_subprog = frame->subprogno;
9548
9549         /* LSM and struct_ops func-ptr's return type could be "void" */
9550         if (!is_subprog &&
9551             (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
9552              prog_type == BPF_PROG_TYPE_LSM) &&
9553             !prog->aux->attach_func_proto->type)
9554                 return 0;
9555
9556         /* eBPF calling convention is such that R0 is used
9557          * to return the value from eBPF program.
9558          * Make sure that it's readable at this time
9559          * of bpf_exit, which means that program wrote
9560          * something into it earlier
9561          */
9562         err = check_reg_arg(env, BPF_REG_0, SRC_OP);
9563         if (err)
9564                 return err;
9565
9566         if (is_pointer_value(env, BPF_REG_0)) {
9567                 verbose(env, "R0 leaks addr as return value\n");
9568                 return -EACCES;
9569         }
9570
9571         reg = cur_regs(env) + BPF_REG_0;
9572
9573         if (frame->in_async_callback_fn) {
9574                 /* enforce return zero from async callbacks like timer */
9575                 if (reg->type != SCALAR_VALUE) {
9576                         verbose(env, "In async callback the register R0 is not a known value (%s)\n",
9577                                 reg_type_str[reg->type]);
9578                         return -EINVAL;
9579                 }
9580
9581                 if (!tnum_in(tnum_const(0), reg->var_off)) {
9582                         verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
9583                         return -EINVAL;
9584                 }
9585                 return 0;
9586         }
9587
9588         if (is_subprog) {
9589                 if (reg->type != SCALAR_VALUE) {
9590                         verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
9591                                 reg_type_str[reg->type]);
9592                         return -EINVAL;
9593                 }
9594                 return 0;
9595         }
9596
9597         switch (prog_type) {
9598         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
9599                 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
9600                     env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
9601                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
9602                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
9603                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
9604                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
9605                         range = tnum_range(1, 1);
9606                 if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
9607                     env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
9608                         range = tnum_range(0, 3);
9609                 break;
9610         case BPF_PROG_TYPE_CGROUP_SKB:
9611                 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
9612                         range = tnum_range(0, 3);
9613                         enforce_attach_type_range = tnum_range(2, 3);
9614                 }
9615                 break;
9616         case BPF_PROG_TYPE_CGROUP_SOCK:
9617         case BPF_PROG_TYPE_SOCK_OPS:
9618         case BPF_PROG_TYPE_CGROUP_DEVICE:
9619         case BPF_PROG_TYPE_CGROUP_SYSCTL:
9620         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
9621                 break;
9622         case BPF_PROG_TYPE_RAW_TRACEPOINT:
9623                 if (!env->prog->aux->attach_btf_id)
9624                         return 0;
9625                 range = tnum_const(0);
9626                 break;
9627         case BPF_PROG_TYPE_TRACING:
9628                 switch (env->prog->expected_attach_type) {
9629                 case BPF_TRACE_FENTRY:
9630                 case BPF_TRACE_FEXIT:
9631                         range = tnum_const(0);
9632                         break;
9633                 case BPF_TRACE_RAW_TP:
9634                 case BPF_MODIFY_RETURN:
9635                         return 0;
9636                 case BPF_TRACE_ITER:
9637                         break;
9638                 default:
9639                         return -ENOTSUPP;
9640                 }
9641                 break;
9642         case BPF_PROG_TYPE_SK_LOOKUP:
9643                 range = tnum_range(SK_DROP, SK_PASS);
9644                 break;
9645         case BPF_PROG_TYPE_EXT:
9646                 /* freplace program can return anything as its return value
9647                  * depends on the to-be-replaced kernel func or bpf program.
9648                  */
9649         default:
9650                 return 0;
9651         }
9652
9653         if (reg->type != SCALAR_VALUE) {
9654                 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
9655                         reg_type_str[reg->type]);
9656                 return -EINVAL;
9657         }
9658
9659         if (!tnum_in(range, reg->var_off)) {
9660                 verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
9661                 return -EINVAL;
9662         }
9663
9664         if (!tnum_is_unknown(enforce_attach_type_range) &&
9665             tnum_in(enforce_attach_type_range, reg->var_off))
9666                 env->prog->enforce_expected_attach_type = 1;
9667         return 0;
9668 }
9669
9670 /* non-recursive DFS pseudo code
9671  * 1  procedure DFS-iterative(G,v):
9672  * 2      label v as discovered
9673  * 3      let S be a stack
9674  * 4      S.push(v)
9675  * 5      while S is not empty
9676  * 6            t <- S.pop()
9677  * 7            if t is what we're looking for:
9678  * 8                return t
9679  * 9            for all edges e in G.adjacentEdges(t) do
9680  * 10               if edge e is already labelled
9681  * 11                   continue with the next edge
9682  * 12               w <- G.adjacentVertex(t,e)
9683  * 13               if vertex w is not discovered and not explored
9684  * 14                   label e as tree-edge
9685  * 15                   label w as discovered
9686  * 16                   S.push(w)
9687  * 17                   continue at 5
9688  * 18               else if vertex w is discovered
9689  * 19                   label e as back-edge
9690  * 20               else
9691  * 21                   // vertex w is explored
9692  * 22                   label e as forward- or cross-edge
9693  * 23           label t as explored
9694  * 24           S.pop()
9695  *
9696  * convention:
9697  * 0x10 - discovered
9698  * 0x11 - discovered and fall-through edge labelled
9699  * 0x12 - discovered and fall-through and branch edges labelled
9700  * 0x20 - explored
9701  */
9702
9703 enum {
9704         DISCOVERED = 0x10,
9705         EXPLORED = 0x20,
9706         FALLTHROUGH = 1,
9707         BRANCH = 2,
9708 };
9709
9710 static u32 state_htab_size(struct bpf_verifier_env *env)
9711 {
9712         return env->prog->len;
9713 }
9714
9715 static struct bpf_verifier_state_list **explored_state(
9716                                         struct bpf_verifier_env *env,
9717                                         int idx)
9718 {
9719         struct bpf_verifier_state *cur = env->cur_state;
9720         struct bpf_func_state *state = cur->frame[cur->curframe];
9721
9722         return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
9723 }
9724
9725 static void init_explored_state(struct bpf_verifier_env *env, int idx)
9726 {
9727         env->insn_aux_data[idx].prune_point = true;
9728 }
9729
9730 enum {
9731         DONE_EXPLORING = 0,
9732         KEEP_EXPLORING = 1,
9733 };
9734
9735 /* t, w, e - match pseudo-code above:
9736  * t - index of current instruction
9737  * w - next instruction
9738  * e - edge
9739  */
9740 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
9741                      bool loop_ok)
9742 {
9743         int *insn_stack = env->cfg.insn_stack;
9744         int *insn_state = env->cfg.insn_state;
9745
9746         if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
9747                 return DONE_EXPLORING;
9748
9749         if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
9750                 return DONE_EXPLORING;
9751
9752         if (w < 0 || w >= env->prog->len) {
9753                 verbose_linfo(env, t, "%d: ", t);
9754                 verbose(env, "jump out of range from insn %d to %d\n", t, w);
9755                 return -EINVAL;
9756         }
9757
9758         if (e == BRANCH)
9759                 /* mark branch target for state pruning */
9760                 init_explored_state(env, w);
9761
9762         if (insn_state[w] == 0) {
9763                 /* tree-edge */
9764                 insn_state[t] = DISCOVERED | e;
9765                 insn_state[w] = DISCOVERED;
9766                 if (env->cfg.cur_stack >= env->prog->len)
9767                         return -E2BIG;
9768                 insn_stack[env->cfg.cur_stack++] = w;
9769                 return KEEP_EXPLORING;
9770         } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
9771                 if (loop_ok && env->bpf_capable)
9772                         return DONE_EXPLORING;
9773                 verbose_linfo(env, t, "%d: ", t);
9774                 verbose_linfo(env, w, "%d: ", w);
9775                 verbose(env, "back-edge from insn %d to %d\n", t, w);
9776                 return -EINVAL;
9777         } else if (insn_state[w] == EXPLORED) {
9778                 /* forward- or cross-edge */
9779                 insn_state[t] = DISCOVERED | e;
9780         } else {
9781                 verbose(env, "insn state internal bug\n");
9782                 return -EFAULT;
9783         }
9784         return DONE_EXPLORING;
9785 }
9786
9787 static int visit_func_call_insn(int t, int insn_cnt,
9788                                 struct bpf_insn *insns,
9789                                 struct bpf_verifier_env *env,
9790                                 bool visit_callee)
9791 {
9792         int ret;
9793
9794         ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
9795         if (ret)
9796                 return ret;
9797
9798         if (t + 1 < insn_cnt)
9799                 init_explored_state(env, t + 1);
9800         if (visit_callee) {
9801                 init_explored_state(env, t);
9802                 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
9803                                 /* It's ok to allow recursion from CFG point of
9804                                  * view. __check_func_call() will do the actual
9805                                  * check.
9806                                  */
9807                                 bpf_pseudo_func(insns + t));
9808         }
9809         return ret;
9810 }
9811
9812 /* Visits the instruction at index t and returns one of the following:
9813  *  < 0 - an error occurred
9814  *  DONE_EXPLORING - the instruction was fully explored
9815  *  KEEP_EXPLORING - there is still work to be done before it is fully explored
9816  */
9817 static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
9818 {
9819         struct bpf_insn *insns = env->prog->insnsi;
9820         int ret;
9821
9822         if (bpf_pseudo_func(insns + t))
9823                 return visit_func_call_insn(t, insn_cnt, insns, env, true);
9824
9825         /* All non-branch instructions have a single fall-through edge. */
9826         if (BPF_CLASS(insns[t].code) != BPF_JMP &&
9827             BPF_CLASS(insns[t].code) != BPF_JMP32)
9828                 return push_insn(t, t + 1, FALLTHROUGH, env, false);
9829
9830         switch (BPF_OP(insns[t].code)) {
9831         case BPF_EXIT:
9832                 return DONE_EXPLORING;
9833
9834         case BPF_CALL:
9835                 if (insns[t].imm == BPF_FUNC_timer_set_callback)
9836                         /* Mark this call insn to trigger is_state_visited() check
9837                          * before call itself is processed by __check_func_call().
9838                          * Otherwise new async state will be pushed for further
9839                          * exploration.
9840                          */
9841                         init_explored_state(env, t);
9842                 return visit_func_call_insn(t, insn_cnt, insns, env,
9843                                             insns[t].src_reg == BPF_PSEUDO_CALL);
9844
9845         case BPF_JA:
9846                 if (BPF_SRC(insns[t].code) != BPF_K)
9847                         return -EINVAL;
9848
9849                 /* unconditional jump with single edge */
9850                 ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
9851                                 true);
9852                 if (ret)
9853                         return ret;
9854
9855                 /* unconditional jmp is not a good pruning point,
9856                  * but it's marked, since backtracking needs
9857                  * to record jmp history in is_state_visited().
9858                  */
9859                 init_explored_state(env, t + insns[t].off + 1);
9860                 /* tell verifier to check for equivalent states
9861                  * after every call and jump
9862                  */
9863                 if (t + 1 < insn_cnt)
9864                         init_explored_state(env, t + 1);
9865
9866                 return ret;
9867
9868         default:
9869                 /* conditional jump with two edges */
9870                 init_explored_state(env, t);
9871                 ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
9872                 if (ret)
9873                         return ret;
9874
9875                 return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
9876         }
9877 }
9878
9879 /* non-recursive depth-first-search to detect loops in BPF program
9880  * loop == back-edge in directed graph
9881  */
9882 static int check_cfg(struct bpf_verifier_env *env)
9883 {
9884         int insn_cnt = env->prog->len;
9885         int *insn_stack, *insn_state;
9886         int ret = 0;
9887         int i;
9888
9889         insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
9890         if (!insn_state)
9891                 return -ENOMEM;
9892
9893         insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
9894         if (!insn_stack) {
9895                 kvfree(insn_state);
9896                 return -ENOMEM;
9897         }
9898
9899         insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
9900         insn_stack[0] = 0; /* 0 is the first instruction */
9901         env->cfg.cur_stack = 1;
9902
9903         while (env->cfg.cur_stack > 0) {
9904                 int t = insn_stack[env->cfg.cur_stack - 1];
9905
9906                 ret = visit_insn(t, insn_cnt, env);
9907                 switch (ret) {
9908                 case DONE_EXPLORING:
9909                         insn_state[t] = EXPLORED;
9910                         env->cfg.cur_stack--;
9911                         break;
9912                 case KEEP_EXPLORING:
9913                         break;
9914                 default:
9915                         if (ret > 0) {
9916                                 verbose(env, "visit_insn internal bug\n");
9917                                 ret = -EFAULT;
9918                         }
9919                         goto err_free;
9920                 }
9921         }
9922
9923         if (env->cfg.cur_stack < 0) {
9924                 verbose(env, "pop stack internal bug\n");
9925                 ret = -EFAULT;
9926                 goto err_free;
9927         }
9928
9929         for (i = 0; i < insn_cnt; i++) {
9930                 if (insn_state[i] != EXPLORED) {
9931                         verbose(env, "unreachable insn %d\n", i);
9932                         ret = -EINVAL;
9933                         goto err_free;
9934                 }
9935         }
9936         ret = 0; /* cfg looks good */
9937
9938 err_free:
9939         kvfree(insn_state);
9940         kvfree(insn_stack);
9941         env->cfg.insn_state = env->cfg.insn_stack = NULL;
9942         return ret;
9943 }
9944
9945 static int check_abnormal_return(struct bpf_verifier_env *env)
9946 {
9947         int i;
9948
9949         for (i = 1; i < env->subprog_cnt; i++) {
9950                 if (env->subprog_info[i].has_ld_abs) {
9951                         verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
9952                         return -EINVAL;
9953                 }
9954                 if (env->subprog_info[i].has_tail_call) {
9955                         verbose(env, "tail_call is not allowed in subprogs without BTF\n");
9956                         return -EINVAL;
9957                 }
9958         }
9959         return 0;
9960 }
9961
9962 /* The minimum supported BTF func info size */
9963 #define MIN_BPF_FUNCINFO_SIZE   8
9964 #define MAX_FUNCINFO_REC_SIZE   252
9965
9966 static int check_btf_func(struct bpf_verifier_env *env,
9967                           const union bpf_attr *attr,
9968                           bpfptr_t uattr)
9969 {
9970         const struct btf_type *type, *func_proto, *ret_type;
9971         u32 i, nfuncs, urec_size, min_size;
9972         u32 krec_size = sizeof(struct bpf_func_info);
9973         struct bpf_func_info *krecord;
9974         struct bpf_func_info_aux *info_aux = NULL;
9975         struct bpf_prog *prog;
9976         const struct btf *btf;
9977         bpfptr_t urecord;
9978         u32 prev_offset = 0;
9979         bool scalar_return;
9980         int ret = -ENOMEM;
9981
9982         nfuncs = attr->func_info_cnt;
9983         if (!nfuncs) {
9984                 if (check_abnormal_return(env))
9985                         return -EINVAL;
9986                 return 0;
9987         }
9988
9989         if (nfuncs != env->subprog_cnt) {
9990                 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
9991                 return -EINVAL;
9992         }
9993
9994         urec_size = attr->func_info_rec_size;
9995         if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
9996             urec_size > MAX_FUNCINFO_REC_SIZE ||
9997             urec_size % sizeof(u32)) {
9998                 verbose(env, "invalid func info rec size %u\n", urec_size);
9999                 return -EINVAL;
10000         }
10001
10002         prog = env->prog;
10003         btf = prog->aux->btf;
10004
10005         urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
10006         min_size = min_t(u32, krec_size, urec_size);
10007
10008         krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
10009         if (!krecord)
10010                 return -ENOMEM;
10011         info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
10012         if (!info_aux)
10013                 goto err_free;
10014
10015         for (i = 0; i < nfuncs; i++) {
10016                 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
10017                 if (ret) {
10018                         if (ret == -E2BIG) {
10019                                 verbose(env, "nonzero tailing record in func info");
10020                                 /* set the size kernel expects so loader can zero
10021                                  * out the rest of the record.
10022                                  */
10023                                 if (copy_to_bpfptr_offset(uattr,
10024                                                           offsetof(union bpf_attr, func_info_rec_size),
10025                                                           &min_size, sizeof(min_size)))
10026                                         ret = -EFAULT;
10027                         }
10028                         goto err_free;
10029                 }
10030
10031                 if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
10032                         ret = -EFAULT;
10033                         goto err_free;
10034                 }
10035
10036                 /* check insn_off */
10037                 ret = -EINVAL;
10038                 if (i == 0) {
10039                         if (krecord[i].insn_off) {
10040                                 verbose(env,
10041                                         "nonzero insn_off %u for the first func info record",
10042                                         krecord[i].insn_off);
10043                                 goto err_free;
10044                         }
10045                 } else if (krecord[i].insn_off <= prev_offset) {
10046                         verbose(env,
10047                                 "same or smaller insn offset (%u) than previous func info record (%u)",
10048                                 krecord[i].insn_off, prev_offset);
10049                         goto err_free;
10050                 }
10051
10052                 if (env->subprog_info[i].start != krecord[i].insn_off) {
10053                         verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
10054                         goto err_free;
10055                 }
10056
10057                 /* check type_id */
10058                 type = btf_type_by_id(btf, krecord[i].type_id);
10059                 if (!type || !btf_type_is_func(type)) {
10060                         verbose(env, "invalid type id %d in func info",
10061                                 krecord[i].type_id);
10062                         goto err_free;
10063                 }
10064                 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
10065
10066                 func_proto = btf_type_by_id(btf, type->type);
10067                 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
10068                         /* btf_func_check() already verified it during BTF load */
10069                         goto err_free;
10070                 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
10071                 scalar_return =
10072                         btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
10073                 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
10074                         verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
10075                         goto err_free;
10076                 }
10077                 if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
10078                         verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
10079                         goto err_free;
10080                 }
10081
10082                 prev_offset = krecord[i].insn_off;
10083                 bpfptr_add(&urecord, urec_size);
10084         }
10085
10086         prog->aux->func_info = krecord;
10087         prog->aux->func_info_cnt = nfuncs;
10088         prog->aux->func_info_aux = info_aux;
10089         return 0;
10090
10091 err_free:
10092         kvfree(krecord);
10093         kfree(info_aux);
10094         return ret;
10095 }
10096
10097 static void adjust_btf_func(struct bpf_verifier_env *env)
10098 {
10099         struct bpf_prog_aux *aux = env->prog->aux;
10100         int i;
10101
10102         if (!aux->func_info)
10103                 return;
10104
10105         for (i = 0; i < env->subprog_cnt; i++)
10106                 aux->func_info[i].insn_off = env->subprog_info[i].start;
10107 }
10108
10109 #define MIN_BPF_LINEINFO_SIZE   (offsetof(struct bpf_line_info, line_col) + \
10110                 sizeof(((struct bpf_line_info *)(0))->line_col))
10111 #define MAX_LINEINFO_REC_SIZE   MAX_FUNCINFO_REC_SIZE
10112
10113 static int check_btf_line(struct bpf_verifier_env *env,
10114                           const union bpf_attr *attr,
10115                           bpfptr_t uattr)
10116 {
10117         u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
10118         struct bpf_subprog_info *sub;
10119         struct bpf_line_info *linfo;
10120         struct bpf_prog *prog;
10121         const struct btf *btf;
10122         bpfptr_t ulinfo;
10123         int err;
10124
10125         nr_linfo = attr->line_info_cnt;
10126         if (!nr_linfo)
10127                 return 0;
10128         if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
10129                 return -EINVAL;
10130
10131         rec_size = attr->line_info_rec_size;
10132         if (rec_size < MIN_BPF_LINEINFO_SIZE ||
10133             rec_size > MAX_LINEINFO_REC_SIZE ||
10134             rec_size & (sizeof(u32) - 1))
10135                 return -EINVAL;
10136
10137         /* Need to zero it in case the userspace may
10138          * pass in a smaller bpf_line_info object.
10139          */
10140         linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
10141                          GFP_KERNEL | __GFP_NOWARN);
10142         if (!linfo)
10143                 return -ENOMEM;
10144
10145         prog = env->prog;
10146         btf = prog->aux->btf;
10147
10148         s = 0;
10149         sub = env->subprog_info;
10150         ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
10151         expected_size = sizeof(struct bpf_line_info);
10152         ncopy = min_t(u32, expected_size, rec_size);
10153         for (i = 0; i < nr_linfo; i++) {
10154                 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
10155                 if (err) {
10156                         if (err == -E2BIG) {
10157                                 verbose(env, "nonzero tailing record in line_info");
10158                                 if (copy_to_bpfptr_offset(uattr,
10159                                                           offsetof(union bpf_attr, line_info_rec_size),
10160                                                           &expected_size, sizeof(expected_size)))
10161                                         err = -EFAULT;
10162                         }
10163                         goto err_free;
10164                 }
10165
10166                 if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
10167                         err = -EFAULT;
10168                         goto err_free;
10169                 }
10170
10171                 /*
10172                  * Check insn_off to ensure
10173                  * 1) strictly increasing AND
10174                  * 2) bounded by prog->len
10175                  *
10176                  * The linfo[0].insn_off == 0 check logically falls into
10177                  * the later "missing bpf_line_info for func..." case
10178                  * because the first linfo[0].insn_off must be the
10179                  * first sub also and the first sub must have
10180                  * subprog_info[0].start == 0.
10181                  */
10182                 if ((i && linfo[i].insn_off <= prev_offset) ||
10183                     linfo[i].insn_off >= prog->len) {
10184                         verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
10185                                 i, linfo[i].insn_off, prev_offset,
10186                                 prog->len);
10187                         err = -EINVAL;
10188                         goto err_free;
10189                 }
10190
10191                 if (!prog->insnsi[linfo[i].insn_off].code) {
10192                         verbose(env,
10193                                 "Invalid insn code at line_info[%u].insn_off\n",
10194                                 i);
10195                         err = -EINVAL;
10196                         goto err_free;
10197                 }
10198
10199                 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
10200                     !btf_name_by_offset(btf, linfo[i].file_name_off)) {
10201                         verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
10202                         err = -EINVAL;
10203                         goto err_free;
10204                 }
10205
10206                 if (s != env->subprog_cnt) {
10207                         if (linfo[i].insn_off == sub[s].start) {
10208                                 sub[s].linfo_idx = i;
10209                                 s++;
10210                         } else if (sub[s].start < linfo[i].insn_off) {
10211                                 verbose(env, "missing bpf_line_info for func#%u\n", s);
10212                                 err = -EINVAL;
10213                                 goto err_free;
10214                         }
10215                 }
10216
10217                 prev_offset = linfo[i].insn_off;
10218                 bpfptr_add(&ulinfo, rec_size);
10219         }
10220
10221         if (s != env->subprog_cnt) {
10222                 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
10223                         env->subprog_cnt - s, s);
10224                 err = -EINVAL;
10225                 goto err_free;
10226         }
10227
10228         prog->aux->linfo = linfo;
10229         prog->aux->nr_linfo = nr_linfo;
10230
10231         return 0;
10232
10233 err_free:
10234         kvfree(linfo);
10235         return err;
10236 }
10237
10238 static int check_btf_info(struct bpf_verifier_env *env,
10239                           const union bpf_attr *attr,
10240                           bpfptr_t uattr)
10241 {
10242         struct btf *btf;
10243         int err;
10244
10245         if (!attr->func_info_cnt && !attr->line_info_cnt) {
10246                 if (check_abnormal_return(env))
10247                         return -EINVAL;
10248                 return 0;
10249         }
10250
10251         btf = btf_get_by_fd(attr->prog_btf_fd);
10252         if (IS_ERR(btf))
10253                 return PTR_ERR(btf);
10254         if (btf_is_kernel(btf)) {
10255                 btf_put(btf);
10256                 return -EACCES;
10257         }
10258         env->prog->aux->btf = btf;
10259
10260         err = check_btf_func(env, attr, uattr);
10261         if (err)
10262                 return err;
10263
10264         err = check_btf_line(env, attr, uattr);
10265         if (err)
10266                 return err;
10267
10268         return 0;
10269 }
10270
10271 /* check %cur's range satisfies %old's */
10272 static bool range_within(struct bpf_reg_state *old,
10273                          struct bpf_reg_state *cur)
10274 {
10275         return old->umin_value <= cur->umin_value &&
10276                old->umax_value >= cur->umax_value &&
10277                old->smin_value <= cur->smin_value &&
10278                old->smax_value >= cur->smax_value &&
10279                old->u32_min_value <= cur->u32_min_value &&
10280                old->u32_max_value >= cur->u32_max_value &&
10281                old->s32_min_value <= cur->s32_min_value &&
10282                old->s32_max_value >= cur->s32_max_value;
10283 }
10284
10285 /* If in the old state two registers had the same id, then they need to have
10286  * the same id in the new state as well.  But that id could be different from
10287  * the old state, so we need to track the mapping from old to new ids.
10288  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
10289  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
10290  * regs with a different old id could still have new id 9, we don't care about
10291  * that.
10292  * So we look through our idmap to see if this old id has been seen before.  If
10293  * so, we require the new id to match; otherwise, we add the id pair to the map.
10294  */
10295 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
10296 {
10297         unsigned int i;
10298
10299         for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
10300                 if (!idmap[i].old) {
10301                         /* Reached an empty slot; haven't seen this id before */
10302                         idmap[i].old = old_id;
10303                         idmap[i].cur = cur_id;
10304                         return true;
10305                 }
10306                 if (idmap[i].old == old_id)
10307                         return idmap[i].cur == cur_id;
10308         }
10309         /* We ran out of idmap slots, which should be impossible */
10310         WARN_ON_ONCE(1);
10311         return false;
10312 }
10313
10314 static void clean_func_state(struct bpf_verifier_env *env,
10315                              struct bpf_func_state *st)
10316 {
10317         enum bpf_reg_liveness live;
10318         int i, j;
10319
10320         for (i = 0; i < BPF_REG_FP; i++) {
10321                 live = st->regs[i].live;
10322                 /* liveness must not touch this register anymore */
10323                 st->regs[i].live |= REG_LIVE_DONE;
10324                 if (!(live & REG_LIVE_READ))
10325                         /* since the register is unused, clear its state
10326                          * to make further comparison simpler
10327                          */
10328                         __mark_reg_not_init(env, &st->regs[i]);
10329         }
10330
10331         for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
10332                 live = st->stack[i].spilled_ptr.live;
10333                 /* liveness must not touch this stack slot anymore */
10334                 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
10335                 if (!(live & REG_LIVE_READ)) {
10336                         __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
10337                         for (j = 0; j < BPF_REG_SIZE; j++)
10338                                 st->stack[i].slot_type[j] = STACK_INVALID;
10339                 }
10340         }
10341 }
10342
10343 static void clean_verifier_state(struct bpf_verifier_env *env,
10344                                  struct bpf_verifier_state *st)
10345 {
10346         int i;
10347
10348         if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
10349                 /* all regs in this state in all frames were already marked */
10350                 return;
10351
10352         for (i = 0; i <= st->curframe; i++)
10353                 clean_func_state(env, st->frame[i]);
10354 }
10355
10356 /* the parentage chains form a tree.
10357  * the verifier states are added to state lists at given insn and
10358  * pushed into state stack for future exploration.
10359  * when the verifier reaches bpf_exit insn some of the verifer states
10360  * stored in the state lists have their final liveness state already,
10361  * but a lot of states will get revised from liveness point of view when
10362  * the verifier explores other branches.
10363  * Example:
10364  * 1: r0 = 1
10365  * 2: if r1 == 100 goto pc+1
10366  * 3: r0 = 2
10367  * 4: exit
10368  * when the verifier reaches exit insn the register r0 in the state list of
10369  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
10370  * of insn 2 and goes exploring further. At the insn 4 it will walk the
10371  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
10372  *
10373  * Since the verifier pushes the branch states as it sees them while exploring
10374  * the program the condition of walking the branch instruction for the second
10375  * time means that all states below this branch were already explored and
10376  * their final liveness marks are already propagated.
10377  * Hence when the verifier completes the search of state list in is_state_visited()
10378  * we can call this clean_live_states() function to mark all liveness states
10379  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
10380  * will not be used.
10381  * This function also clears the registers and stack for states that !READ
10382  * to simplify state merging.
10383  *
10384  * Important note here that walking the same branch instruction in the callee
10385  * doesn't meant that the states are DONE. The verifier has to compare
10386  * the callsites
10387  */
10388 static void clean_live_states(struct bpf_verifier_env *env, int insn,
10389                               struct bpf_verifier_state *cur)
10390 {
10391         struct bpf_verifier_state_list *sl;
10392         int i;
10393
10394         sl = *explored_state(env, insn);
10395         while (sl) {
10396                 if (sl->state.branches)
10397                         goto next;
10398                 if (sl->state.insn_idx != insn ||
10399                     sl->state.curframe != cur->curframe)
10400                         goto next;
10401                 for (i = 0; i <= cur->curframe; i++)
10402                         if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
10403                                 goto next;
10404                 clean_verifier_state(env, &sl->state);
10405 next:
10406                 sl = sl->next;
10407         }
10408 }
10409
10410 /* Returns true if (rold safe implies rcur safe) */
10411 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
10412                     struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
10413 {
10414         bool equal;
10415
10416         if (!(rold->live & REG_LIVE_READ))
10417                 /* explored state didn't use this */
10418                 return true;
10419
10420         equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
10421
10422         if (rold->type == PTR_TO_STACK)
10423                 /* two stack pointers are equal only if they're pointing to
10424                  * the same stack frame, since fp-8 in foo != fp-8 in bar
10425                  */
10426                 return equal && rold->frameno == rcur->frameno;
10427
10428         if (equal)
10429                 return true;
10430
10431         if (rold->type == NOT_INIT)
10432                 /* explored state can't have used this */
10433                 return true;
10434         if (rcur->type == NOT_INIT)
10435                 return false;
10436         switch (rold->type) {
10437         case SCALAR_VALUE:
10438                 if (env->explore_alu_limits)
10439                         return false;
10440                 if (rcur->type == SCALAR_VALUE) {
10441                         if (!rold->precise && !rcur->precise)
10442                                 return true;
10443                         /* new val must satisfy old val knowledge */
10444                         return range_within(rold, rcur) &&
10445                                tnum_in(rold->var_off, rcur->var_off);
10446                 } else {
10447                         /* We're trying to use a pointer in place of a scalar.
10448                          * Even if the scalar was unbounded, this could lead to
10449                          * pointer leaks because scalars are allowed to leak
10450                          * while pointers are not. We could make this safe in
10451                          * special cases if root is calling us, but it's
10452                          * probably not worth the hassle.
10453                          */
10454                         return false;
10455                 }
10456         case PTR_TO_MAP_KEY:
10457         case PTR_TO_MAP_VALUE:
10458                 /* If the new min/max/var_off satisfy the old ones and
10459                  * everything else matches, we are OK.
10460                  * 'id' is not compared, since it's only used for maps with
10461                  * bpf_spin_lock inside map element and in such cases if
10462                  * the rest of the prog is valid for one map element then
10463                  * it's valid for all map elements regardless of the key
10464                  * used in bpf_map_lookup()
10465                  */
10466                 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
10467                        range_within(rold, rcur) &&
10468                        tnum_in(rold->var_off, rcur->var_off);
10469         case PTR_TO_MAP_VALUE_OR_NULL:
10470                 /* a PTR_TO_MAP_VALUE could be safe to use as a
10471                  * PTR_TO_MAP_VALUE_OR_NULL into the same map.
10472                  * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
10473                  * checked, doing so could have affected others with the same
10474                  * id, and we can't check for that because we lost the id when
10475                  * we converted to a PTR_TO_MAP_VALUE.
10476                  */
10477                 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
10478                         return false;
10479                 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
10480                         return false;
10481                 /* Check our ids match any regs they're supposed to */
10482                 return check_ids(rold->id, rcur->id, idmap);
10483         case PTR_TO_PACKET_META:
10484         case PTR_TO_PACKET:
10485                 if (rcur->type != rold->type)
10486                         return false;
10487                 /* We must have at least as much range as the old ptr
10488                  * did, so that any accesses which were safe before are
10489                  * still safe.  This is true even if old range < old off,
10490                  * since someone could have accessed through (ptr - k), or
10491                  * even done ptr -= k in a register, to get a safe access.
10492                  */
10493                 if (rold->range > rcur->range)
10494                         return false;
10495                 /* If the offsets don't match, we can't trust our alignment;
10496                  * nor can we be sure that we won't fall out of range.
10497                  */
10498                 if (rold->off != rcur->off)
10499                         return false;
10500                 /* id relations must be preserved */
10501                 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
10502                         return false;
10503                 /* new val must satisfy old val knowledge */
10504                 return range_within(rold, rcur) &&
10505                        tnum_in(rold->var_off, rcur->var_off);
10506         case PTR_TO_CTX:
10507         case CONST_PTR_TO_MAP:
10508         case PTR_TO_PACKET_END:
10509         case PTR_TO_FLOW_KEYS:
10510         case PTR_TO_SOCKET:
10511         case PTR_TO_SOCKET_OR_NULL:
10512         case PTR_TO_SOCK_COMMON:
10513         case PTR_TO_SOCK_COMMON_OR_NULL:
10514         case PTR_TO_TCP_SOCK:
10515         case PTR_TO_TCP_SOCK_OR_NULL:
10516         case PTR_TO_XDP_SOCK:
10517                 /* Only valid matches are exact, which memcmp() above
10518                  * would have accepted
10519                  */
10520         default:
10521                 /* Don't know what's going on, just say it's not safe */
10522                 return false;
10523         }
10524
10525         /* Shouldn't get here; if we do, say it's not safe */
10526         WARN_ON_ONCE(1);
10527         return false;
10528 }
10529
10530 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
10531                       struct bpf_func_state *cur, struct bpf_id_pair *idmap)
10532 {
10533         int i, spi;
10534
10535         /* walk slots of the explored stack and ignore any additional
10536          * slots in the current stack, since explored(safe) state
10537          * didn't use them
10538          */
10539         for (i = 0; i < old->allocated_stack; i++) {
10540                 spi = i / BPF_REG_SIZE;
10541
10542                 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
10543                         i += BPF_REG_SIZE - 1;
10544                         /* explored state didn't use this */
10545                         continue;
10546                 }
10547
10548                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
10549                         continue;
10550
10551                 /* explored stack has more populated slots than current stack
10552                  * and these slots were used
10553                  */
10554                 if (i >= cur->allocated_stack)
10555                         return false;
10556
10557                 /* if old state was safe with misc data in the stack
10558                  * it will be safe with zero-initialized stack.
10559                  * The opposite is not true
10560                  */
10561                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
10562                     cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
10563                         continue;
10564                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
10565                     cur->stack[spi].slot_type[i % BPF_REG_SIZE])
10566                         /* Ex: old explored (safe) state has STACK_SPILL in
10567                          * this stack slot, but current has STACK_MISC ->
10568                          * this verifier states are not equivalent,
10569                          * return false to continue verification of this path
10570                          */
10571                         return false;
10572                 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
10573                         continue;
10574                 if (!is_spilled_reg(&old->stack[spi]))
10575                         continue;
10576                 if (!regsafe(env, &old->stack[spi].spilled_ptr,
10577                              &cur->stack[spi].spilled_ptr, idmap))
10578                         /* when explored and current stack slot are both storing
10579                          * spilled registers, check that stored pointers types
10580                          * are the same as well.
10581                          * Ex: explored safe path could have stored
10582                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
10583                          * but current path has stored:
10584                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
10585                          * such verifier states are not equivalent.
10586                          * return false to continue verification of this path
10587                          */
10588                         return false;
10589         }
10590         return true;
10591 }
10592
10593 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
10594 {
10595         if (old->acquired_refs != cur->acquired_refs)
10596                 return false;
10597         return !memcmp(old->refs, cur->refs,
10598                        sizeof(*old->refs) * old->acquired_refs);
10599 }
10600
10601 /* compare two verifier states
10602  *
10603  * all states stored in state_list are known to be valid, since
10604  * verifier reached 'bpf_exit' instruction through them
10605  *
10606  * this function is called when verifier exploring different branches of
10607  * execution popped from the state stack. If it sees an old state that has
10608  * more strict register state and more strict stack state then this execution
10609  * branch doesn't need to be explored further, since verifier already
10610  * concluded that more strict state leads to valid finish.
10611  *
10612  * Therefore two states are equivalent if register state is more conservative
10613  * and explored stack state is more conservative than the current one.
10614  * Example:
10615  *       explored                   current
10616  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
10617  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
10618  *
10619  * In other words if current stack state (one being explored) has more
10620  * valid slots than old one that already passed validation, it means
10621  * the verifier can stop exploring and conclude that current state is valid too
10622  *
10623  * Similarly with registers. If explored state has register type as invalid
10624  * whereas register type in current state is meaningful, it means that
10625  * the current state will reach 'bpf_exit' instruction safely
10626  */
10627 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
10628                               struct bpf_func_state *cur)
10629 {
10630         int i;
10631
10632         memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
10633         for (i = 0; i < MAX_BPF_REG; i++)
10634                 if (!regsafe(env, &old->regs[i], &cur->regs[i],
10635                              env->idmap_scratch))
10636                         return false;
10637
10638         if (!stacksafe(env, old, cur, env->idmap_scratch))
10639                 return false;
10640
10641         if (!refsafe(old, cur))
10642                 return false;
10643
10644         return true;
10645 }
10646
10647 static bool states_equal(struct bpf_verifier_env *env,
10648                          struct bpf_verifier_state *old,
10649                          struct bpf_verifier_state *cur)
10650 {
10651         int i;
10652
10653         if (old->curframe != cur->curframe)
10654                 return false;
10655
10656         /* Verification state from speculative execution simulation
10657          * must never prune a non-speculative execution one.
10658          */
10659         if (old->speculative && !cur->speculative)
10660                 return false;
10661
10662         if (old->active_spin_lock != cur->active_spin_lock)
10663                 return false;
10664
10665         /* for states to be equal callsites have to be the same
10666          * and all frame states need to be equivalent
10667          */
10668         for (i = 0; i <= old->curframe; i++) {
10669                 if (old->frame[i]->callsite != cur->frame[i]->callsite)
10670                         return false;
10671                 if (!func_states_equal(env, old->frame[i], cur->frame[i]))
10672                         return false;
10673         }
10674         return true;
10675 }
10676
10677 /* Return 0 if no propagation happened. Return negative error code if error
10678  * happened. Otherwise, return the propagated bit.
10679  */
10680 static int propagate_liveness_reg(struct bpf_verifier_env *env,
10681                                   struct bpf_reg_state *reg,
10682                                   struct bpf_reg_state *parent_reg)
10683 {
10684         u8 parent_flag = parent_reg->live & REG_LIVE_READ;
10685         u8 flag = reg->live & REG_LIVE_READ;
10686         int err;
10687
10688         /* When comes here, read flags of PARENT_REG or REG could be any of
10689          * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
10690          * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
10691          */
10692         if (parent_flag == REG_LIVE_READ64 ||
10693             /* Or if there is no read flag from REG. */
10694             !flag ||
10695             /* Or if the read flag from REG is the same as PARENT_REG. */
10696             parent_flag == flag)
10697                 return 0;
10698
10699         err = mark_reg_read(env, reg, parent_reg, flag);
10700         if (err)
10701                 return err;
10702
10703         return flag;
10704 }
10705
10706 /* A write screens off any subsequent reads; but write marks come from the
10707  * straight-line code between a state and its parent.  When we arrive at an
10708  * equivalent state (jump target or such) we didn't arrive by the straight-line
10709  * code, so read marks in the state must propagate to the parent regardless
10710  * of the state's write marks. That's what 'parent == state->parent' comparison
10711  * in mark_reg_read() is for.
10712  */
10713 static int propagate_liveness(struct bpf_verifier_env *env,
10714                               const struct bpf_verifier_state *vstate,
10715                               struct bpf_verifier_state *vparent)
10716 {
10717         struct bpf_reg_state *state_reg, *parent_reg;
10718         struct bpf_func_state *state, *parent;
10719         int i, frame, err = 0;
10720
10721         if (vparent->curframe != vstate->curframe) {
10722                 WARN(1, "propagate_live: parent frame %d current frame %d\n",
10723                      vparent->curframe, vstate->curframe);
10724                 return -EFAULT;
10725         }
10726         /* Propagate read liveness of registers... */
10727         BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
10728         for (frame = 0; frame <= vstate->curframe; frame++) {
10729                 parent = vparent->frame[frame];
10730                 state = vstate->frame[frame];
10731                 parent_reg = parent->regs;
10732                 state_reg = state->regs;
10733                 /* We don't need to worry about FP liveness, it's read-only */
10734                 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
10735                         err = propagate_liveness_reg(env, &state_reg[i],
10736                                                      &parent_reg[i]);
10737                         if (err < 0)
10738                                 return err;
10739                         if (err == REG_LIVE_READ64)
10740                                 mark_insn_zext(env, &parent_reg[i]);
10741                 }
10742
10743                 /* Propagate stack slots. */
10744                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
10745                             i < parent->allocated_stack / BPF_REG_SIZE; i++) {
10746                         parent_reg = &parent->stack[i].spilled_ptr;
10747                         state_reg = &state->stack[i].spilled_ptr;
10748                         err = propagate_liveness_reg(env, state_reg,
10749                                                      parent_reg);
10750                         if (err < 0)
10751                                 return err;
10752                 }
10753         }
10754         return 0;
10755 }
10756
10757 /* find precise scalars in the previous equivalent state and
10758  * propagate them into the current state
10759  */
10760 static int propagate_precision(struct bpf_verifier_env *env,
10761                                const struct bpf_verifier_state *old)
10762 {
10763         struct bpf_reg_state *state_reg;
10764         struct bpf_func_state *state;
10765         int i, err = 0;
10766
10767         state = old->frame[old->curframe];
10768         state_reg = state->regs;
10769         for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
10770                 if (state_reg->type != SCALAR_VALUE ||
10771                     !state_reg->precise)
10772                         continue;
10773                 if (env->log.level & BPF_LOG_LEVEL2)
10774                         verbose(env, "propagating r%d\n", i);
10775                 err = mark_chain_precision(env, i);
10776                 if (err < 0)
10777                         return err;
10778         }
10779
10780         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
10781                 if (!is_spilled_reg(&state->stack[i]))
10782                         continue;
10783                 state_reg = &state->stack[i].spilled_ptr;
10784                 if (state_reg->type != SCALAR_VALUE ||
10785                     !state_reg->precise)
10786                         continue;
10787                 if (env->log.level & BPF_LOG_LEVEL2)
10788                         verbose(env, "propagating fp%d\n",
10789                                 (-i - 1) * BPF_REG_SIZE);
10790                 err = mark_chain_precision_stack(env, i);
10791                 if (err < 0)
10792                         return err;
10793         }
10794         return 0;
10795 }
10796
10797 static bool states_maybe_looping(struct bpf_verifier_state *old,
10798                                  struct bpf_verifier_state *cur)
10799 {
10800         struct bpf_func_state *fold, *fcur;
10801         int i, fr = cur->curframe;
10802
10803         if (old->curframe != fr)
10804                 return false;
10805
10806         fold = old->frame[fr];
10807         fcur = cur->frame[fr];
10808         for (i = 0; i < MAX_BPF_REG; i++)
10809                 if (memcmp(&fold->regs[i], &fcur->regs[i],
10810                            offsetof(struct bpf_reg_state, parent)))
10811                         return false;
10812         return true;
10813 }
10814
10815
10816 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
10817 {
10818         struct bpf_verifier_state_list *new_sl;
10819         struct bpf_verifier_state_list *sl, **pprev;
10820         struct bpf_verifier_state *cur = env->cur_state, *new;
10821         int i, j, err, states_cnt = 0;
10822         bool add_new_state = env->test_state_freq ? true : false;
10823
10824         cur->last_insn_idx = env->prev_insn_idx;
10825         if (!env->insn_aux_data[insn_idx].prune_point)
10826                 /* this 'insn_idx' instruction wasn't marked, so we will not
10827                  * be doing state search here
10828                  */
10829                 return 0;
10830
10831         /* bpf progs typically have pruning point every 4 instructions
10832          * http://vger.kernel.org/bpfconf2019.html#session-1
10833          * Do not add new state for future pruning if the verifier hasn't seen
10834          * at least 2 jumps and at least 8 instructions.
10835          * This heuristics helps decrease 'total_states' and 'peak_states' metric.
10836          * In tests that amounts to up to 50% reduction into total verifier
10837          * memory consumption and 20% verifier time speedup.
10838          */
10839         if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
10840             env->insn_processed - env->prev_insn_processed >= 8)
10841                 add_new_state = true;
10842
10843         pprev = explored_state(env, insn_idx);
10844         sl = *pprev;
10845
10846         clean_live_states(env, insn_idx, cur);
10847
10848         while (sl) {
10849                 states_cnt++;
10850                 if (sl->state.insn_idx != insn_idx)
10851                         goto next;
10852
10853                 if (sl->state.branches) {
10854                         struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
10855
10856                         if (frame->in_async_callback_fn &&
10857                             frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
10858                                 /* Different async_entry_cnt means that the verifier is
10859                                  * processing another entry into async callback.
10860                                  * Seeing the same state is not an indication of infinite
10861                                  * loop or infinite recursion.
10862                                  * But finding the same state doesn't mean that it's safe
10863                                  * to stop processing the current state. The previous state
10864                                  * hasn't yet reached bpf_exit, since state.branches > 0.
10865                                  * Checking in_async_callback_fn alone is not enough either.
10866                                  * Since the verifier still needs to catch infinite loops
10867                                  * inside async callbacks.
10868                                  */
10869                         } else if (states_maybe_looping(&sl->state, cur) &&
10870                                    states_equal(env, &sl->state, cur)) {
10871                                 verbose_linfo(env, insn_idx, "; ");
10872                                 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
10873                                 return -EINVAL;
10874                         }
10875                         /* if the verifier is processing a loop, avoid adding new state
10876                          * too often, since different loop iterations have distinct
10877                          * states and may not help future pruning.
10878                          * This threshold shouldn't be too low to make sure that
10879                          * a loop with large bound will be rejected quickly.
10880                          * The most abusive loop will be:
10881                          * r1 += 1
10882                          * if r1 < 1000000 goto pc-2
10883                          * 1M insn_procssed limit / 100 == 10k peak states.
10884                          * This threshold shouldn't be too high either, since states
10885                          * at the end of the loop are likely to be useful in pruning.
10886                          */
10887                         if (env->jmps_processed - env->prev_jmps_processed < 20 &&
10888                             env->insn_processed - env->prev_insn_processed < 100)
10889                                 add_new_state = false;
10890                         goto miss;
10891                 }
10892                 if (states_equal(env, &sl->state, cur)) {
10893                         sl->hit_cnt++;
10894                         /* reached equivalent register/stack state,
10895                          * prune the search.
10896                          * Registers read by the continuation are read by us.
10897                          * If we have any write marks in env->cur_state, they
10898                          * will prevent corresponding reads in the continuation
10899                          * from reaching our parent (an explored_state).  Our
10900                          * own state will get the read marks recorded, but
10901                          * they'll be immediately forgotten as we're pruning
10902                          * this state and will pop a new one.
10903                          */
10904                         err = propagate_liveness(env, &sl->state, cur);
10905
10906                         /* if previous state reached the exit with precision and
10907                          * current state is equivalent to it (except precsion marks)
10908                          * the precision needs to be propagated back in
10909                          * the current state.
10910                          */
10911                         err = err ? : push_jmp_history(env, cur);
10912                         err = err ? : propagate_precision(env, &sl->state);
10913                         if (err)
10914                                 return err;
10915                         return 1;
10916                 }
10917 miss:
10918                 /* when new state is not going to be added do not increase miss count.
10919                  * Otherwise several loop iterations will remove the state
10920                  * recorded earlier. The goal of these heuristics is to have
10921                  * states from some iterations of the loop (some in the beginning
10922                  * and some at the end) to help pruning.
10923                  */
10924                 if (add_new_state)
10925                         sl->miss_cnt++;
10926                 /* heuristic to determine whether this state is beneficial
10927                  * to keep checking from state equivalence point of view.
10928                  * Higher numbers increase max_states_per_insn and verification time,
10929                  * but do not meaningfully decrease insn_processed.
10930                  */
10931                 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
10932                         /* the state is unlikely to be useful. Remove it to
10933                          * speed up verification
10934                          */
10935                         *pprev = sl->next;
10936                         if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
10937                                 u32 br = sl->state.branches;
10938
10939                                 WARN_ONCE(br,
10940                                           "BUG live_done but branches_to_explore %d\n",
10941                                           br);
10942                                 free_verifier_state(&sl->state, false);
10943                                 kfree(sl);
10944                                 env->peak_states--;
10945                         } else {
10946                                 /* cannot free this state, since parentage chain may
10947                                  * walk it later. Add it for free_list instead to
10948                                  * be freed at the end of verification
10949                                  */
10950                                 sl->next = env->free_list;
10951                                 env->free_list = sl;
10952                         }
10953                         sl = *pprev;
10954                         continue;
10955                 }
10956 next:
10957                 pprev = &sl->next;
10958                 sl = *pprev;
10959         }
10960
10961         if (env->max_states_per_insn < states_cnt)
10962                 env->max_states_per_insn = states_cnt;
10963
10964         if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
10965                 return push_jmp_history(env, cur);
10966
10967         if (!add_new_state)
10968                 return push_jmp_history(env, cur);
10969
10970         /* There were no equivalent states, remember the current one.
10971          * Technically the current state is not proven to be safe yet,
10972          * but it will either reach outer most bpf_exit (which means it's safe)
10973          * or it will be rejected. When there are no loops the verifier won't be
10974          * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
10975          * again on the way to bpf_exit.
10976          * When looping the sl->state.branches will be > 0 and this state
10977          * will not be considered for equivalence until branches == 0.
10978          */
10979         new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
10980         if (!new_sl)
10981                 return -ENOMEM;
10982         env->total_states++;
10983         env->peak_states++;
10984         env->prev_jmps_processed = env->jmps_processed;
10985         env->prev_insn_processed = env->insn_processed;
10986
10987         /* add new state to the head of linked list */
10988         new = &new_sl->state;
10989         err = copy_verifier_state(new, cur);
10990         if (err) {
10991                 free_verifier_state(new, false);
10992                 kfree(new_sl);
10993                 return err;
10994         }
10995         new->insn_idx = insn_idx;
10996         WARN_ONCE(new->branches != 1,
10997                   "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
10998
10999         cur->parent = new;
11000         cur->first_insn_idx = insn_idx;
11001         clear_jmp_history(cur);
11002         new_sl->next = *explored_state(env, insn_idx);
11003         *explored_state(env, insn_idx) = new_sl;
11004         /* connect new state to parentage chain. Current frame needs all
11005          * registers connected. Only r6 - r9 of the callers are alive (pushed
11006          * to the stack implicitly by JITs) so in callers' frames connect just
11007          * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
11008          * the state of the call instruction (with WRITTEN set), and r0 comes
11009          * from callee with its full parentage chain, anyway.
11010          */
11011         /* clear write marks in current state: the writes we did are not writes
11012          * our child did, so they don't screen off its reads from us.
11013          * (There are no read marks in current state, because reads always mark
11014          * their parent and current state never has children yet.  Only
11015          * explored_states can get read marks.)
11016          */
11017         for (j = 0; j <= cur->curframe; j++) {
11018                 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
11019                         cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
11020                 for (i = 0; i < BPF_REG_FP; i++)
11021                         cur->frame[j]->regs[i].live = REG_LIVE_NONE;
11022         }
11023
11024         /* all stack frames are accessible from callee, clear them all */
11025         for (j = 0; j <= cur->curframe; j++) {
11026                 struct bpf_func_state *frame = cur->frame[j];
11027                 struct bpf_func_state *newframe = new->frame[j];
11028
11029                 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
11030                         frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
11031                         frame->stack[i].spilled_ptr.parent =
11032                                                 &newframe->stack[i].spilled_ptr;
11033                 }
11034         }
11035         return 0;
11036 }
11037
11038 /* Return true if it's OK to have the same insn return a different type. */
11039 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
11040 {
11041         switch (type) {
11042         case PTR_TO_CTX:
11043         case PTR_TO_SOCKET:
11044         case PTR_TO_SOCKET_OR_NULL:
11045         case PTR_TO_SOCK_COMMON:
11046         case PTR_TO_SOCK_COMMON_OR_NULL:
11047         case PTR_TO_TCP_SOCK:
11048         case PTR_TO_TCP_SOCK_OR_NULL:
11049         case PTR_TO_XDP_SOCK:
11050         case PTR_TO_BTF_ID:
11051         case PTR_TO_BTF_ID_OR_NULL:
11052                 return false;
11053         default:
11054                 return true;
11055         }
11056 }
11057
11058 /* If an instruction was previously used with particular pointer types, then we
11059  * need to be careful to avoid cases such as the below, where it may be ok
11060  * for one branch accessing the pointer, but not ok for the other branch:
11061  *
11062  * R1 = sock_ptr
11063  * goto X;
11064  * ...
11065  * R1 = some_other_valid_ptr;
11066  * goto X;
11067  * ...
11068  * R2 = *(u32 *)(R1 + 0);
11069  */
11070 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
11071 {
11072         return src != prev && (!reg_type_mismatch_ok(src) ||
11073                                !reg_type_mismatch_ok(prev));
11074 }
11075
11076 static int do_check(struct bpf_verifier_env *env)
11077 {
11078         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
11079         struct bpf_verifier_state *state = env->cur_state;
11080         struct bpf_insn *insns = env->prog->insnsi;
11081         struct bpf_reg_state *regs;
11082         int insn_cnt = env->prog->len;
11083         bool do_print_state = false;
11084         int prev_insn_idx = -1;
11085
11086         for (;;) {
11087                 struct bpf_insn *insn;
11088                 u8 class;
11089                 int err;
11090
11091                 env->prev_insn_idx = prev_insn_idx;
11092                 if (env->insn_idx >= insn_cnt) {
11093                         verbose(env, "invalid insn idx %d insn_cnt %d\n",
11094                                 env->insn_idx, insn_cnt);
11095                         return -EFAULT;
11096                 }
11097
11098                 insn = &insns[env->insn_idx];
11099                 class = BPF_CLASS(insn->code);
11100
11101                 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
11102                         verbose(env,
11103                                 "BPF program is too large. Processed %d insn\n",
11104                                 env->insn_processed);
11105                         return -E2BIG;
11106                 }
11107
11108                 err = is_state_visited(env, env->insn_idx);
11109                 if (err < 0)
11110                         return err;
11111                 if (err == 1) {
11112                         /* found equivalent state, can prune the search */
11113                         if (env->log.level & BPF_LOG_LEVEL) {
11114                                 if (do_print_state)
11115                                         verbose(env, "\nfrom %d to %d%s: safe\n",
11116                                                 env->prev_insn_idx, env->insn_idx,
11117                                                 env->cur_state->speculative ?
11118                                                 " (speculative execution)" : "");
11119                                 else
11120                                         verbose(env, "%d: safe\n", env->insn_idx);
11121                         }
11122                         goto process_bpf_exit;
11123                 }
11124
11125                 if (signal_pending(current))
11126                         return -EAGAIN;
11127
11128                 if (need_resched())
11129                         cond_resched();
11130
11131                 if (env->log.level & BPF_LOG_LEVEL2 ||
11132                     (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
11133                         if (env->log.level & BPF_LOG_LEVEL2)
11134                                 verbose(env, "%d:", env->insn_idx);
11135                         else
11136                                 verbose(env, "\nfrom %d to %d%s:",
11137                                         env->prev_insn_idx, env->insn_idx,
11138                                         env->cur_state->speculative ?
11139                                         " (speculative execution)" : "");
11140                         print_verifier_state(env, state->frame[state->curframe]);
11141                         do_print_state = false;
11142                 }
11143
11144                 if (env->log.level & BPF_LOG_LEVEL) {
11145                         const struct bpf_insn_cbs cbs = {
11146                                 .cb_call        = disasm_kfunc_name,
11147                                 .cb_print       = verbose,
11148                                 .private_data   = env,
11149                         };
11150
11151                         verbose_linfo(env, env->insn_idx, "; ");
11152                         verbose(env, "%d: ", env->insn_idx);
11153                         print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
11154                 }
11155
11156                 if (bpf_prog_is_dev_bound(env->prog->aux)) {
11157                         err = bpf_prog_offload_verify_insn(env, env->insn_idx,
11158                                                            env->prev_insn_idx);
11159                         if (err)
11160                                 return err;
11161                 }
11162
11163                 regs = cur_regs(env);
11164                 sanitize_mark_insn_seen(env);
11165                 prev_insn_idx = env->insn_idx;
11166
11167                 if (class == BPF_ALU || class == BPF_ALU64) {
11168                         err = check_alu_op(env, insn);
11169                         if (err)
11170                                 return err;
11171
11172                 } else if (class == BPF_LDX) {
11173                         enum bpf_reg_type *prev_src_type, src_reg_type;
11174
11175                         /* check for reserved fields is already done */
11176
11177                         /* check src operand */
11178                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
11179                         if (err)
11180                                 return err;
11181
11182                         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
11183                         if (err)
11184                                 return err;
11185
11186                         src_reg_type = regs[insn->src_reg].type;
11187
11188                         /* check that memory (src_reg + off) is readable,
11189                          * the state of dst_reg will be updated by this func
11190                          */
11191                         err = check_mem_access(env, env->insn_idx, insn->src_reg,
11192                                                insn->off, BPF_SIZE(insn->code),
11193                                                BPF_READ, insn->dst_reg, false);
11194                         if (err)
11195                                 return err;
11196
11197                         prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
11198
11199                         if (*prev_src_type == NOT_INIT) {
11200                                 /* saw a valid insn
11201                                  * dst_reg = *(u32 *)(src_reg + off)
11202                                  * save type to validate intersecting paths
11203                                  */
11204                                 *prev_src_type = src_reg_type;
11205
11206                         } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
11207                                 /* ABuser program is trying to use the same insn
11208                                  * dst_reg = *(u32*) (src_reg + off)
11209                                  * with different pointer types:
11210                                  * src_reg == ctx in one branch and
11211                                  * src_reg == stack|map in some other branch.
11212                                  * Reject it.
11213                                  */
11214                                 verbose(env, "same insn cannot be used with different pointers\n");
11215                                 return -EINVAL;
11216                         }
11217
11218                 } else if (class == BPF_STX) {
11219                         enum bpf_reg_type *prev_dst_type, dst_reg_type;
11220
11221                         if (BPF_MODE(insn->code) == BPF_ATOMIC) {
11222                                 err = check_atomic(env, env->insn_idx, insn);
11223                                 if (err)
11224                                         return err;
11225                                 env->insn_idx++;
11226                                 continue;
11227                         }
11228
11229                         if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
11230                                 verbose(env, "BPF_STX uses reserved fields\n");
11231                                 return -EINVAL;
11232                         }
11233
11234                         /* check src1 operand */
11235                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
11236                         if (err)
11237                                 return err;
11238                         /* check src2 operand */
11239                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11240                         if (err)
11241                                 return err;
11242
11243                         dst_reg_type = regs[insn->dst_reg].type;
11244
11245                         /* check that memory (dst_reg + off) is writeable */
11246                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
11247                                                insn->off, BPF_SIZE(insn->code),
11248                                                BPF_WRITE, insn->src_reg, false);
11249                         if (err)
11250                                 return err;
11251
11252                         prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
11253
11254                         if (*prev_dst_type == NOT_INIT) {
11255                                 *prev_dst_type = dst_reg_type;
11256                         } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
11257                                 verbose(env, "same insn cannot be used with different pointers\n");
11258                                 return -EINVAL;
11259                         }
11260
11261                 } else if (class == BPF_ST) {
11262                         if (BPF_MODE(insn->code) != BPF_MEM ||
11263                             insn->src_reg != BPF_REG_0) {
11264                                 verbose(env, "BPF_ST uses reserved fields\n");
11265                                 return -EINVAL;
11266                         }
11267                         /* check src operand */
11268                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11269                         if (err)
11270                                 return err;
11271
11272                         if (is_ctx_reg(env, insn->dst_reg)) {
11273                                 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
11274                                         insn->dst_reg,
11275                                         reg_type_str[reg_state(env, insn->dst_reg)->type]);
11276                                 return -EACCES;
11277                         }
11278
11279                         /* check that memory (dst_reg + off) is writeable */
11280                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
11281                                                insn->off, BPF_SIZE(insn->code),
11282                                                BPF_WRITE, -1, false);
11283                         if (err)
11284                                 return err;
11285
11286                 } else if (class == BPF_JMP || class == BPF_JMP32) {
11287                         u8 opcode = BPF_OP(insn->code);
11288
11289                         env->jmps_processed++;
11290                         if (opcode == BPF_CALL) {
11291                                 if (BPF_SRC(insn->code) != BPF_K ||
11292                                     (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
11293                                      && insn->off != 0) ||
11294                                     (insn->src_reg != BPF_REG_0 &&
11295                                      insn->src_reg != BPF_PSEUDO_CALL &&
11296                                      insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
11297                                     insn->dst_reg != BPF_REG_0 ||
11298                                     class == BPF_JMP32) {
11299                                         verbose(env, "BPF_CALL uses reserved fields\n");
11300                                         return -EINVAL;
11301                                 }
11302
11303                                 if (env->cur_state->active_spin_lock &&
11304                                     (insn->src_reg == BPF_PSEUDO_CALL ||
11305                                      insn->imm != BPF_FUNC_spin_unlock)) {
11306                                         verbose(env, "function calls are not allowed while holding a lock\n");
11307                                         return -EINVAL;
11308                                 }
11309                                 if (insn->src_reg == BPF_PSEUDO_CALL)
11310                                         err = check_func_call(env, insn, &env->insn_idx);
11311                                 else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
11312                                         err = check_kfunc_call(env, insn);
11313                                 else
11314                                         err = check_helper_call(env, insn, &env->insn_idx);
11315                                 if (err)
11316                                         return err;
11317                         } else if (opcode == BPF_JA) {
11318                                 if (BPF_SRC(insn->code) != BPF_K ||
11319                                     insn->imm != 0 ||
11320                                     insn->src_reg != BPF_REG_0 ||
11321                                     insn->dst_reg != BPF_REG_0 ||
11322                                     class == BPF_JMP32) {
11323                                         verbose(env, "BPF_JA uses reserved fields\n");
11324                                         return -EINVAL;
11325                                 }
11326
11327                                 env->insn_idx += insn->off + 1;
11328                                 continue;
11329
11330                         } else if (opcode == BPF_EXIT) {
11331                                 if (BPF_SRC(insn->code) != BPF_K ||
11332                                     insn->imm != 0 ||
11333                                     insn->src_reg != BPF_REG_0 ||
11334                                     insn->dst_reg != BPF_REG_0 ||
11335                                     class == BPF_JMP32) {
11336                                         verbose(env, "BPF_EXIT uses reserved fields\n");
11337                                         return -EINVAL;
11338                                 }
11339
11340                                 if (env->cur_state->active_spin_lock) {
11341                                         verbose(env, "bpf_spin_unlock is missing\n");
11342                                         return -EINVAL;
11343                                 }
11344
11345                                 if (state->curframe) {
11346                                         /* exit from nested function */
11347                                         err = prepare_func_exit(env, &env->insn_idx);
11348                                         if (err)
11349                                                 return err;
11350                                         do_print_state = true;
11351                                         continue;
11352                                 }
11353
11354                                 err = check_reference_leak(env);
11355                                 if (err)
11356                                         return err;
11357
11358                                 err = check_return_code(env);
11359                                 if (err)
11360                                         return err;
11361 process_bpf_exit:
11362                                 update_branch_counts(env, env->cur_state);
11363                                 err = pop_stack(env, &prev_insn_idx,
11364                                                 &env->insn_idx, pop_log);
11365                                 if (err < 0) {
11366                                         if (err != -ENOENT)
11367                                                 return err;
11368                                         break;
11369                                 } else {
11370                                         do_print_state = true;
11371                                         continue;
11372                                 }
11373                         } else {
11374                                 err = check_cond_jmp_op(env, insn, &env->insn_idx);
11375                                 if (err)
11376                                         return err;
11377                         }
11378                 } else if (class == BPF_LD) {
11379                         u8 mode = BPF_MODE(insn->code);
11380
11381                         if (mode == BPF_ABS || mode == BPF_IND) {
11382                                 err = check_ld_abs(env, insn);
11383                                 if (err)
11384                                         return err;
11385
11386                         } else if (mode == BPF_IMM) {
11387                                 err = check_ld_imm(env, insn);
11388                                 if (err)
11389                                         return err;
11390
11391                                 env->insn_idx++;
11392                                 sanitize_mark_insn_seen(env);
11393                         } else {
11394                                 verbose(env, "invalid BPF_LD mode\n");
11395                                 return -EINVAL;
11396                         }
11397                 } else {
11398                         verbose(env, "unknown insn class %d\n", class);
11399                         return -EINVAL;
11400                 }
11401
11402                 env->insn_idx++;
11403         }
11404
11405         return 0;
11406 }
11407
11408 static int find_btf_percpu_datasec(struct btf *btf)
11409 {
11410         const struct btf_type *t;
11411         const char *tname;
11412         int i, n;
11413
11414         /*
11415          * Both vmlinux and module each have their own ".data..percpu"
11416          * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
11417          * types to look at only module's own BTF types.
11418          */
11419         n = btf_nr_types(btf);
11420         if (btf_is_module(btf))
11421                 i = btf_nr_types(btf_vmlinux);
11422         else
11423                 i = 1;
11424
11425         for(; i < n; i++) {
11426                 t = btf_type_by_id(btf, i);
11427                 if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
11428                         continue;
11429
11430                 tname = btf_name_by_offset(btf, t->name_off);
11431                 if (!strcmp(tname, ".data..percpu"))
11432                         return i;
11433         }
11434
11435         return -ENOENT;
11436 }
11437
11438 /* replace pseudo btf_id with kernel symbol address */
11439 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
11440                                struct bpf_insn *insn,
11441                                struct bpf_insn_aux_data *aux)
11442 {
11443         const struct btf_var_secinfo *vsi;
11444         const struct btf_type *datasec;
11445         struct btf_mod_pair *btf_mod;
11446         const struct btf_type *t;
11447         const char *sym_name;
11448         bool percpu = false;
11449         u32 type, id = insn->imm;
11450         struct btf *btf;
11451         s32 datasec_id;
11452         u64 addr;
11453         int i, btf_fd, err;
11454
11455         btf_fd = insn[1].imm;
11456         if (btf_fd) {
11457                 btf = btf_get_by_fd(btf_fd);
11458                 if (IS_ERR(btf)) {
11459                         verbose(env, "invalid module BTF object FD specified.\n");
11460                         return -EINVAL;
11461                 }
11462         } else {
11463                 if (!btf_vmlinux) {
11464                         verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
11465                         return -EINVAL;
11466                 }
11467                 btf = btf_vmlinux;
11468                 btf_get(btf);
11469         }
11470
11471         t = btf_type_by_id(btf, id);
11472         if (!t) {
11473                 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
11474                 err = -ENOENT;
11475                 goto err_put;
11476         }
11477
11478         if (!btf_type_is_var(t)) {
11479                 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
11480                 err = -EINVAL;
11481                 goto err_put;
11482         }
11483
11484         sym_name = btf_name_by_offset(btf, t->name_off);
11485         addr = kallsyms_lookup_name(sym_name);
11486         if (!addr) {
11487                 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
11488                         sym_name);
11489                 err = -ENOENT;
11490                 goto err_put;
11491         }
11492
11493         datasec_id = find_btf_percpu_datasec(btf);
11494         if (datasec_id > 0) {
11495                 datasec = btf_type_by_id(btf, datasec_id);
11496                 for_each_vsi(i, datasec, vsi) {
11497                         if (vsi->type == id) {
11498                                 percpu = true;
11499                                 break;
11500                         }
11501                 }
11502         }
11503
11504         insn[0].imm = (u32)addr;
11505         insn[1].imm = addr >> 32;
11506
11507         type = t->type;
11508         t = btf_type_skip_modifiers(btf, type, NULL);
11509         if (percpu) {
11510                 aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
11511                 aux->btf_var.btf = btf;
11512                 aux->btf_var.btf_id = type;
11513         } else if (!btf_type_is_struct(t)) {
11514                 const struct btf_type *ret;
11515                 const char *tname;
11516                 u32 tsize;
11517
11518                 /* resolve the type size of ksym. */
11519                 ret = btf_resolve_size(btf, t, &tsize);
11520                 if (IS_ERR(ret)) {
11521                         tname = btf_name_by_offset(btf, t->name_off);
11522                         verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
11523                                 tname, PTR_ERR(ret));
11524                         err = -EINVAL;
11525                         goto err_put;
11526                 }
11527                 aux->btf_var.reg_type = PTR_TO_MEM;
11528                 aux->btf_var.mem_size = tsize;
11529         } else {
11530                 aux->btf_var.reg_type = PTR_TO_BTF_ID;
11531                 aux->btf_var.btf = btf;
11532                 aux->btf_var.btf_id = type;
11533         }
11534
11535         /* check whether we recorded this BTF (and maybe module) already */
11536         for (i = 0; i < env->used_btf_cnt; i++) {
11537                 if (env->used_btfs[i].btf == btf) {
11538                         btf_put(btf);
11539                         return 0;
11540                 }
11541         }
11542
11543         if (env->used_btf_cnt >= MAX_USED_BTFS) {
11544                 err = -E2BIG;
11545                 goto err_put;
11546         }
11547
11548         btf_mod = &env->used_btfs[env->used_btf_cnt];
11549         btf_mod->btf = btf;
11550         btf_mod->module = NULL;
11551
11552         /* if we reference variables from kernel module, bump its refcount */
11553         if (btf_is_module(btf)) {
11554                 btf_mod->module = btf_try_get_module(btf);
11555                 if (!btf_mod->module) {
11556                         err = -ENXIO;
11557                         goto err_put;
11558                 }
11559         }
11560
11561         env->used_btf_cnt++;
11562
11563         return 0;
11564 err_put:
11565         btf_put(btf);
11566         return err;
11567 }
11568
11569 static int check_map_prealloc(struct bpf_map *map)
11570 {
11571         return (map->map_type != BPF_MAP_TYPE_HASH &&
11572                 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
11573                 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
11574                 !(map->map_flags & BPF_F_NO_PREALLOC);
11575 }
11576
11577 static bool is_tracing_prog_type(enum bpf_prog_type type)
11578 {
11579         switch (type) {
11580         case BPF_PROG_TYPE_KPROBE:
11581         case BPF_PROG_TYPE_TRACEPOINT:
11582         case BPF_PROG_TYPE_PERF_EVENT:
11583         case BPF_PROG_TYPE_RAW_TRACEPOINT:
11584                 return true;
11585         default:
11586                 return false;
11587         }
11588 }
11589
11590 static bool is_preallocated_map(struct bpf_map *map)
11591 {
11592         if (!check_map_prealloc(map))
11593                 return false;
11594         if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
11595                 return false;
11596         return true;
11597 }
11598
11599 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
11600                                         struct bpf_map *map,
11601                                         struct bpf_prog *prog)
11602
11603 {
11604         enum bpf_prog_type prog_type = resolve_prog_type(prog);
11605         /*
11606          * Validate that trace type programs use preallocated hash maps.
11607          *
11608          * For programs attached to PERF events this is mandatory as the
11609          * perf NMI can hit any arbitrary code sequence.
11610          *
11611          * All other trace types using preallocated hash maps are unsafe as
11612          * well because tracepoint or kprobes can be inside locked regions
11613          * of the memory allocator or at a place where a recursion into the
11614          * memory allocator would see inconsistent state.
11615          *
11616          * On RT enabled kernels run-time allocation of all trace type
11617          * programs is strictly prohibited due to lock type constraints. On
11618          * !RT kernels it is allowed for backwards compatibility reasons for
11619          * now, but warnings are emitted so developers are made aware of
11620          * the unsafety and can fix their programs before this is enforced.
11621          */
11622         if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
11623                 if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
11624                         verbose(env, "perf_event programs can only use preallocated hash map\n");
11625                         return -EINVAL;
11626                 }
11627                 if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
11628                         verbose(env, "trace type programs can only use preallocated hash map\n");
11629                         return -EINVAL;
11630                 }
11631                 WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
11632                 verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
11633         }
11634
11635         if (map_value_has_spin_lock(map)) {
11636                 if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
11637                         verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
11638                         return -EINVAL;
11639                 }
11640
11641                 if (is_tracing_prog_type(prog_type)) {
11642                         verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
11643                         return -EINVAL;
11644                 }
11645
11646                 if (prog->aux->sleepable) {
11647                         verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n");
11648                         return -EINVAL;
11649                 }
11650         }
11651
11652         if (map_value_has_timer(map)) {
11653                 if (is_tracing_prog_type(prog_type)) {
11654                         verbose(env, "tracing progs cannot use bpf_timer yet\n");
11655                         return -EINVAL;
11656                 }
11657         }
11658
11659         if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
11660             !bpf_offload_prog_map_match(prog, map)) {
11661                 verbose(env, "offload device mismatch between prog and map\n");
11662                 return -EINVAL;
11663         }
11664
11665         if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
11666                 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
11667                 return -EINVAL;
11668         }
11669
11670         if (prog->aux->sleepable)
11671                 switch (map->map_type) {
11672                 case BPF_MAP_TYPE_HASH:
11673                 case BPF_MAP_TYPE_LRU_HASH:
11674                 case BPF_MAP_TYPE_ARRAY:
11675                 case BPF_MAP_TYPE_PERCPU_HASH:
11676                 case BPF_MAP_TYPE_PERCPU_ARRAY:
11677                 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
11678                 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
11679                 case BPF_MAP_TYPE_HASH_OF_MAPS:
11680                         if (!is_preallocated_map(map)) {
11681                                 verbose(env,
11682                                         "Sleepable programs can only use preallocated maps\n");
11683                                 return -EINVAL;
11684                         }
11685                         break;
11686                 case BPF_MAP_TYPE_RINGBUF:
11687                         break;
11688                 default:
11689                         verbose(env,
11690                                 "Sleepable programs can only use array, hash, and ringbuf maps\n");
11691                         return -EINVAL;
11692                 }
11693
11694         return 0;
11695 }
11696
11697 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
11698 {
11699         return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
11700                 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
11701 }
11702
11703 /* find and rewrite pseudo imm in ld_imm64 instructions:
11704  *
11705  * 1. if it accesses map FD, replace it with actual map pointer.
11706  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
11707  *
11708  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
11709  */
11710 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
11711 {
11712         struct bpf_insn *insn = env->prog->insnsi;
11713         int insn_cnt = env->prog->len;
11714         int i, j, err;
11715
11716         err = bpf_prog_calc_tag(env->prog);
11717         if (err)
11718                 return err;
11719
11720         for (i = 0; i < insn_cnt; i++, insn++) {
11721                 if (BPF_CLASS(insn->code) == BPF_LDX &&
11722                     (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
11723                         verbose(env, "BPF_LDX uses reserved fields\n");
11724                         return -EINVAL;
11725                 }
11726
11727                 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
11728                         struct bpf_insn_aux_data *aux;
11729                         struct bpf_map *map;
11730                         struct fd f;
11731                         u64 addr;
11732                         u32 fd;
11733
11734                         if (i == insn_cnt - 1 || insn[1].code != 0 ||
11735                             insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
11736                             insn[1].off != 0) {
11737                                 verbose(env, "invalid bpf_ld_imm64 insn\n");
11738                                 return -EINVAL;
11739                         }
11740
11741                         if (insn[0].src_reg == 0)
11742                                 /* valid generic load 64-bit imm */
11743                                 goto next_insn;
11744
11745                         if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
11746                                 aux = &env->insn_aux_data[i];
11747                                 err = check_pseudo_btf_id(env, insn, aux);
11748                                 if (err)
11749                                         return err;
11750                                 goto next_insn;
11751                         }
11752
11753                         if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
11754                                 aux = &env->insn_aux_data[i];
11755                                 aux->ptr_type = PTR_TO_FUNC;
11756                                 goto next_insn;
11757                         }
11758
11759                         /* In final convert_pseudo_ld_imm64() step, this is
11760                          * converted into regular 64-bit imm load insn.
11761                          */
11762                         switch (insn[0].src_reg) {
11763                         case BPF_PSEUDO_MAP_VALUE:
11764                         case BPF_PSEUDO_MAP_IDX_VALUE:
11765                                 break;
11766                         case BPF_PSEUDO_MAP_FD:
11767                         case BPF_PSEUDO_MAP_IDX:
11768                                 if (insn[1].imm == 0)
11769                                         break;
11770                                 fallthrough;
11771                         default:
11772                                 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
11773                                 return -EINVAL;
11774                         }
11775
11776                         switch (insn[0].src_reg) {
11777                         case BPF_PSEUDO_MAP_IDX_VALUE:
11778                         case BPF_PSEUDO_MAP_IDX:
11779                                 if (bpfptr_is_null(env->fd_array)) {
11780                                         verbose(env, "fd_idx without fd_array is invalid\n");
11781                                         return -EPROTO;
11782                                 }
11783                                 if (copy_from_bpfptr_offset(&fd, env->fd_array,
11784                                                             insn[0].imm * sizeof(fd),
11785                                                             sizeof(fd)))
11786                                         return -EFAULT;
11787                                 break;
11788                         default:
11789                                 fd = insn[0].imm;
11790                                 break;
11791                         }
11792
11793                         f = fdget(fd);
11794                         map = __bpf_map_get(f);
11795                         if (IS_ERR(map)) {
11796                                 verbose(env, "fd %d is not pointing to valid bpf_map\n",
11797                                         insn[0].imm);
11798                                 return PTR_ERR(map);
11799                         }
11800
11801                         err = check_map_prog_compatibility(env, map, env->prog);
11802                         if (err) {
11803                                 fdput(f);
11804                                 return err;
11805                         }
11806
11807                         aux = &env->insn_aux_data[i];
11808                         if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
11809                             insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
11810                                 addr = (unsigned long)map;
11811                         } else {
11812                                 u32 off = insn[1].imm;
11813
11814                                 if (off >= BPF_MAX_VAR_OFF) {
11815                                         verbose(env, "direct value offset of %u is not allowed\n", off);
11816                                         fdput(f);
11817                                         return -EINVAL;
11818                                 }
11819
11820                                 if (!map->ops->map_direct_value_addr) {
11821                                         verbose(env, "no direct value access support for this map type\n");
11822                                         fdput(f);
11823                                         return -EINVAL;
11824                                 }
11825
11826                                 err = map->ops->map_direct_value_addr(map, &addr, off);
11827                                 if (err) {
11828                                         verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
11829                                                 map->value_size, off);
11830                                         fdput(f);
11831                                         return err;
11832                                 }
11833
11834                                 aux->map_off = off;
11835                                 addr += off;
11836                         }
11837
11838                         insn[0].imm = (u32)addr;
11839                         insn[1].imm = addr >> 32;
11840
11841                         /* check whether we recorded this map already */
11842                         for (j = 0; j < env->used_map_cnt; j++) {
11843                                 if (env->used_maps[j] == map) {
11844                                         aux->map_index = j;
11845                                         fdput(f);
11846                                         goto next_insn;
11847                                 }
11848                         }
11849
11850                         if (env->used_map_cnt >= MAX_USED_MAPS) {
11851                                 fdput(f);
11852                                 return -E2BIG;
11853                         }
11854
11855                         /* hold the map. If the program is rejected by verifier,
11856                          * the map will be released by release_maps() or it
11857                          * will be used by the valid program until it's unloaded
11858                          * and all maps are released in free_used_maps()
11859                          */
11860                         bpf_map_inc(map);
11861
11862                         aux->map_index = env->used_map_cnt;
11863                         env->used_maps[env->used_map_cnt++] = map;
11864
11865                         if (bpf_map_is_cgroup_storage(map) &&
11866                             bpf_cgroup_storage_assign(env->prog->aux, map)) {
11867                                 verbose(env, "only one cgroup storage of each type is allowed\n");
11868                                 fdput(f);
11869                                 return -EBUSY;
11870                         }
11871
11872                         fdput(f);
11873 next_insn:
11874                         insn++;
11875                         i++;
11876                         continue;
11877                 }
11878
11879                 /* Basic sanity check before we invest more work here. */
11880                 if (!bpf_opcode_in_insntable(insn->code)) {
11881                         verbose(env, "unknown opcode %02x\n", insn->code);
11882                         return -EINVAL;
11883                 }
11884         }
11885
11886         /* now all pseudo BPF_LD_IMM64 instructions load valid
11887          * 'struct bpf_map *' into a register instead of user map_fd.
11888          * These pointers will be used later by verifier to validate map access.
11889          */
11890         return 0;
11891 }
11892
11893 /* drop refcnt of maps used by the rejected program */
11894 static void release_maps(struct bpf_verifier_env *env)
11895 {
11896         __bpf_free_used_maps(env->prog->aux, env->used_maps,
11897                              env->used_map_cnt);
11898 }
11899
11900 /* drop refcnt of maps used by the rejected program */
11901 static void release_btfs(struct bpf_verifier_env *env)
11902 {
11903         __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
11904                              env->used_btf_cnt);
11905 }
11906
11907 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
11908 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
11909 {
11910         struct bpf_insn *insn = env->prog->insnsi;
11911         int insn_cnt = env->prog->len;
11912         int i;
11913
11914         for (i = 0; i < insn_cnt; i++, insn++) {
11915                 if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
11916                         continue;
11917                 if (insn->src_reg == BPF_PSEUDO_FUNC)
11918                         continue;
11919                 insn->src_reg = 0;
11920         }
11921 }
11922
11923 /* single env->prog->insni[off] instruction was replaced with the range
11924  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
11925  * [0, off) and [off, end) to new locations, so the patched range stays zero
11926  */
11927 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
11928                                  struct bpf_insn_aux_data *new_data,
11929                                  struct bpf_prog *new_prog, u32 off, u32 cnt)
11930 {
11931         struct bpf_insn_aux_data *old_data = env->insn_aux_data;
11932         struct bpf_insn *insn = new_prog->insnsi;
11933         u32 old_seen = old_data[off].seen;
11934         u32 prog_len;
11935         int i;
11936
11937         /* aux info at OFF always needs adjustment, no matter fast path
11938          * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
11939          * original insn at old prog.
11940          */
11941         old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
11942
11943         if (cnt == 1)
11944                 return;
11945         prog_len = new_prog->len;
11946
11947         memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
11948         memcpy(new_data + off + cnt - 1, old_data + off,
11949                sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
11950         for (i = off; i < off + cnt - 1; i++) {
11951                 /* Expand insni[off]'s seen count to the patched range. */
11952                 new_data[i].seen = old_seen;
11953                 new_data[i].zext_dst = insn_has_def32(env, insn + i);
11954         }
11955         env->insn_aux_data = new_data;
11956         vfree(old_data);
11957 }
11958
11959 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
11960 {
11961         int i;
11962
11963         if (len == 1)
11964                 return;
11965         /* NOTE: fake 'exit' subprog should be updated as well. */
11966         for (i = 0; i <= env->subprog_cnt; i++) {
11967                 if (env->subprog_info[i].start <= off)
11968                         continue;
11969                 env->subprog_info[i].start += len - 1;
11970         }
11971 }
11972
11973 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
11974 {
11975         struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
11976         int i, sz = prog->aux->size_poke_tab;
11977         struct bpf_jit_poke_descriptor *desc;
11978
11979         for (i = 0; i < sz; i++) {
11980                 desc = &tab[i];
11981                 if (desc->insn_idx <= off)
11982                         continue;
11983                 desc->insn_idx += len - 1;
11984         }
11985 }
11986
11987 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
11988                                             const struct bpf_insn *patch, u32 len)
11989 {
11990         struct bpf_prog *new_prog;
11991         struct bpf_insn_aux_data *new_data = NULL;
11992
11993         if (len > 1) {
11994                 new_data = vzalloc(array_size(env->prog->len + len - 1,
11995                                               sizeof(struct bpf_insn_aux_data)));
11996                 if (!new_data)
11997                         return NULL;
11998         }
11999
12000         new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
12001         if (IS_ERR(new_prog)) {
12002                 if (PTR_ERR(new_prog) == -ERANGE)
12003                         verbose(env,
12004                                 "insn %d cannot be patched due to 16-bit range\n",
12005                                 env->insn_aux_data[off].orig_idx);
12006                 vfree(new_data);
12007                 return NULL;
12008         }
12009         adjust_insn_aux_data(env, new_data, new_prog, off, len);
12010         adjust_subprog_starts(env, off, len);
12011         adjust_poke_descs(new_prog, off, len);
12012         return new_prog;
12013 }
12014
12015 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
12016                                               u32 off, u32 cnt)
12017 {
12018         int i, j;
12019
12020         /* find first prog starting at or after off (first to remove) */
12021         for (i = 0; i < env->subprog_cnt; i++)
12022                 if (env->subprog_info[i].start >= off)
12023                         break;
12024         /* find first prog starting at or after off + cnt (first to stay) */
12025         for (j = i; j < env->subprog_cnt; j++)
12026                 if (env->subprog_info[j].start >= off + cnt)
12027                         break;
12028         /* if j doesn't start exactly at off + cnt, we are just removing
12029          * the front of previous prog
12030          */
12031         if (env->subprog_info[j].start != off + cnt)
12032                 j--;
12033
12034         if (j > i) {
12035                 struct bpf_prog_aux *aux = env->prog->aux;
12036                 int move;
12037
12038                 /* move fake 'exit' subprog as well */
12039                 move = env->subprog_cnt + 1 - j;
12040
12041                 memmove(env->subprog_info + i,
12042                         env->subprog_info + j,
12043                         sizeof(*env->subprog_info) * move);
12044                 env->subprog_cnt -= j - i;
12045
12046                 /* remove func_info */
12047                 if (aux->func_info) {
12048                         move = aux->func_info_cnt - j;
12049
12050                         memmove(aux->func_info + i,
12051                                 aux->func_info + j,
12052                                 sizeof(*aux->func_info) * move);
12053                         aux->func_info_cnt -= j - i;
12054                         /* func_info->insn_off is set after all code rewrites,
12055                          * in adjust_btf_func() - no need to adjust
12056                          */
12057                 }
12058         } else {
12059                 /* convert i from "first prog to remove" to "first to adjust" */
12060                 if (env->subprog_info[i].start == off)
12061                         i++;
12062         }
12063
12064         /* update fake 'exit' subprog as well */
12065         for (; i <= env->subprog_cnt; i++)
12066                 env->subprog_info[i].start -= cnt;
12067
12068         return 0;
12069 }
12070
12071 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
12072                                       u32 cnt)
12073 {
12074         struct bpf_prog *prog = env->prog;
12075         u32 i, l_off, l_cnt, nr_linfo;
12076         struct bpf_line_info *linfo;
12077
12078         nr_linfo = prog->aux->nr_linfo;
12079         if (!nr_linfo)
12080                 return 0;
12081
12082         linfo = prog->aux->linfo;
12083
12084         /* find first line info to remove, count lines to be removed */
12085         for (i = 0; i < nr_linfo; i++)
12086                 if (linfo[i].insn_off >= off)
12087                         break;
12088
12089         l_off = i;
12090         l_cnt = 0;
12091         for (; i < nr_linfo; i++)
12092                 if (linfo[i].insn_off < off + cnt)
12093                         l_cnt++;
12094                 else
12095                         break;
12096
12097         /* First live insn doesn't match first live linfo, it needs to "inherit"
12098          * last removed linfo.  prog is already modified, so prog->len == off
12099          * means no live instructions after (tail of the program was removed).
12100          */
12101         if (prog->len != off && l_cnt &&
12102             (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
12103                 l_cnt--;
12104                 linfo[--i].insn_off = off + cnt;
12105         }
12106
12107         /* remove the line info which refer to the removed instructions */
12108         if (l_cnt) {
12109                 memmove(linfo + l_off, linfo + i,
12110                         sizeof(*linfo) * (nr_linfo - i));
12111
12112                 prog->aux->nr_linfo -= l_cnt;
12113                 nr_linfo = prog->aux->nr_linfo;
12114         }
12115
12116         /* pull all linfo[i].insn_off >= off + cnt in by cnt */
12117         for (i = l_off; i < nr_linfo; i++)
12118                 linfo[i].insn_off -= cnt;
12119
12120         /* fix up all subprogs (incl. 'exit') which start >= off */
12121         for (i = 0; i <= env->subprog_cnt; i++)
12122                 if (env->subprog_info[i].linfo_idx > l_off) {
12123                         /* program may have started in the removed region but
12124                          * may not be fully removed
12125                          */
12126                         if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
12127                                 env->subprog_info[i].linfo_idx -= l_cnt;
12128                         else
12129                                 env->subprog_info[i].linfo_idx = l_off;
12130                 }
12131
12132         return 0;
12133 }
12134
12135 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
12136 {
12137         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12138         unsigned int orig_prog_len = env->prog->len;
12139         int err;
12140
12141         if (bpf_prog_is_dev_bound(env->prog->aux))
12142                 bpf_prog_offload_remove_insns(env, off, cnt);
12143
12144         err = bpf_remove_insns(env->prog, off, cnt);
12145         if (err)
12146                 return err;
12147
12148         err = adjust_subprog_starts_after_remove(env, off, cnt);
12149         if (err)
12150                 return err;
12151
12152         err = bpf_adj_linfo_after_remove(env, off, cnt);
12153         if (err)
12154                 return err;
12155
12156         memmove(aux_data + off, aux_data + off + cnt,
12157                 sizeof(*aux_data) * (orig_prog_len - off - cnt));
12158
12159         return 0;
12160 }
12161
12162 /* The verifier does more data flow analysis than llvm and will not
12163  * explore branches that are dead at run time. Malicious programs can
12164  * have dead code too. Therefore replace all dead at-run-time code
12165  * with 'ja -1'.
12166  *
12167  * Just nops are not optimal, e.g. if they would sit at the end of the
12168  * program and through another bug we would manage to jump there, then
12169  * we'd execute beyond program memory otherwise. Returning exception
12170  * code also wouldn't work since we can have subprogs where the dead
12171  * code could be located.
12172  */
12173 static void sanitize_dead_code(struct bpf_verifier_env *env)
12174 {
12175         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12176         struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
12177         struct bpf_insn *insn = env->prog->insnsi;
12178         const int insn_cnt = env->prog->len;
12179         int i;
12180
12181         for (i = 0; i < insn_cnt; i++) {
12182                 if (aux_data[i].seen)
12183                         continue;
12184                 memcpy(insn + i, &trap, sizeof(trap));
12185                 aux_data[i].zext_dst = false;
12186         }
12187 }
12188
12189 static bool insn_is_cond_jump(u8 code)
12190 {
12191         u8 op;
12192
12193         if (BPF_CLASS(code) == BPF_JMP32)
12194                 return true;
12195
12196         if (BPF_CLASS(code) != BPF_JMP)
12197                 return false;
12198
12199         op = BPF_OP(code);
12200         return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
12201 }
12202
12203 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
12204 {
12205         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12206         struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
12207         struct bpf_insn *insn = env->prog->insnsi;
12208         const int insn_cnt = env->prog->len;
12209         int i;
12210
12211         for (i = 0; i < insn_cnt; i++, insn++) {
12212                 if (!insn_is_cond_jump(insn->code))
12213                         continue;
12214
12215                 if (!aux_data[i + 1].seen)
12216                         ja.off = insn->off;
12217                 else if (!aux_data[i + 1 + insn->off].seen)
12218                         ja.off = 0;
12219                 else
12220                         continue;
12221
12222                 if (bpf_prog_is_dev_bound(env->prog->aux))
12223                         bpf_prog_offload_replace_insn(env, i, &ja);
12224
12225                 memcpy(insn, &ja, sizeof(ja));
12226         }
12227 }
12228
12229 static int opt_remove_dead_code(struct bpf_verifier_env *env)
12230 {
12231         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
12232         int insn_cnt = env->prog->len;
12233         int i, err;
12234
12235         for (i = 0; i < insn_cnt; i++) {
12236                 int j;
12237
12238                 j = 0;
12239                 while (i + j < insn_cnt && !aux_data[i + j].seen)
12240                         j++;
12241                 if (!j)
12242                         continue;
12243
12244                 err = verifier_remove_insns(env, i, j);
12245                 if (err)
12246                         return err;
12247                 insn_cnt = env->prog->len;
12248         }
12249
12250         return 0;
12251 }
12252
12253 static int opt_remove_nops(struct bpf_verifier_env *env)
12254 {
12255         const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
12256         struct bpf_insn *insn = env->prog->insnsi;
12257         int insn_cnt = env->prog->len;
12258         int i, err;
12259
12260         for (i = 0; i < insn_cnt; i++) {
12261                 if (memcmp(&insn[i], &ja, sizeof(ja)))
12262                         continue;
12263
12264                 err = verifier_remove_insns(env, i, 1);
12265                 if (err)
12266                         return err;
12267                 insn_cnt--;
12268                 i--;
12269         }
12270
12271         return 0;
12272 }
12273
12274 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
12275                                          const union bpf_attr *attr)
12276 {
12277         struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
12278         struct bpf_insn_aux_data *aux = env->insn_aux_data;
12279         int i, patch_len, delta = 0, len = env->prog->len;
12280         struct bpf_insn *insns = env->prog->insnsi;
12281         struct bpf_prog *new_prog;
12282         bool rnd_hi32;
12283
12284         rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
12285         zext_patch[1] = BPF_ZEXT_REG(0);
12286         rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
12287         rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
12288         rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
12289         for (i = 0; i < len; i++) {
12290                 int adj_idx = i + delta;
12291                 struct bpf_insn insn;
12292                 int load_reg;
12293
12294                 insn = insns[adj_idx];
12295                 load_reg = insn_def_regno(&insn);
12296                 if (!aux[adj_idx].zext_dst) {
12297                         u8 code, class;
12298                         u32 imm_rnd;
12299
12300                         if (!rnd_hi32)
12301                                 continue;
12302
12303                         code = insn.code;
12304                         class = BPF_CLASS(code);
12305                         if (load_reg == -1)
12306                                 continue;
12307
12308                         /* NOTE: arg "reg" (the fourth one) is only used for
12309                          *       BPF_STX + SRC_OP, so it is safe to pass NULL
12310                          *       here.
12311                          */
12312                         if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
12313                                 if (class == BPF_LD &&
12314                                     BPF_MODE(code) == BPF_IMM)
12315                                         i++;
12316                                 continue;
12317                         }
12318
12319                         /* ctx load could be transformed into wider load. */
12320                         if (class == BPF_LDX &&
12321                             aux[adj_idx].ptr_type == PTR_TO_CTX)
12322                                 continue;
12323
12324                         imm_rnd = get_random_int();
12325                         rnd_hi32_patch[0] = insn;
12326                         rnd_hi32_patch[1].imm = imm_rnd;
12327                         rnd_hi32_patch[3].dst_reg = load_reg;
12328                         patch = rnd_hi32_patch;
12329                         patch_len = 4;
12330                         goto apply_patch_buffer;
12331                 }
12332
12333                 /* Add in an zero-extend instruction if a) the JIT has requested
12334                  * it or b) it's a CMPXCHG.
12335                  *
12336                  * The latter is because: BPF_CMPXCHG always loads a value into
12337                  * R0, therefore always zero-extends. However some archs'
12338                  * equivalent instruction only does this load when the
12339                  * comparison is successful. This detail of CMPXCHG is
12340                  * orthogonal to the general zero-extension behaviour of the
12341                  * CPU, so it's treated independently of bpf_jit_needs_zext.
12342                  */
12343                 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
12344                         continue;
12345
12346                 if (WARN_ON(load_reg == -1)) {
12347                         verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
12348                         return -EFAULT;
12349                 }
12350
12351                 zext_patch[0] = insn;
12352                 zext_patch[1].dst_reg = load_reg;
12353                 zext_patch[1].src_reg = load_reg;
12354                 patch = zext_patch;
12355                 patch_len = 2;
12356 apply_patch_buffer:
12357                 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
12358                 if (!new_prog)
12359                         return -ENOMEM;
12360                 env->prog = new_prog;
12361                 insns = new_prog->insnsi;
12362                 aux = env->insn_aux_data;
12363                 delta += patch_len - 1;
12364         }
12365
12366         return 0;
12367 }
12368
12369 /* convert load instructions that access fields of a context type into a
12370  * sequence of instructions that access fields of the underlying structure:
12371  *     struct __sk_buff    -> struct sk_buff
12372  *     struct bpf_sock_ops -> struct sock
12373  */
12374 static int convert_ctx_accesses(struct bpf_verifier_env *env)
12375 {
12376         const struct bpf_verifier_ops *ops = env->ops;
12377         int i, cnt, size, ctx_field_size, delta = 0;
12378         const int insn_cnt = env->prog->len;
12379         struct bpf_insn insn_buf[16], *insn;
12380         u32 target_size, size_default, off;
12381         struct bpf_prog *new_prog;
12382         enum bpf_access_type type;
12383         bool is_narrower_load;
12384
12385         if (ops->gen_prologue || env->seen_direct_write) {
12386                 if (!ops->gen_prologue) {
12387                         verbose(env, "bpf verifier is misconfigured\n");
12388                         return -EINVAL;
12389                 }
12390                 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
12391                                         env->prog);
12392                 if (cnt >= ARRAY_SIZE(insn_buf)) {
12393                         verbose(env, "bpf verifier is misconfigured\n");
12394                         return -EINVAL;
12395                 } else if (cnt) {
12396                         new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
12397                         if (!new_prog)
12398                                 return -ENOMEM;
12399
12400                         env->prog = new_prog;
12401                         delta += cnt - 1;
12402                 }
12403         }
12404
12405         if (bpf_prog_is_dev_bound(env->prog->aux))
12406                 return 0;
12407
12408         insn = env->prog->insnsi + delta;
12409
12410         for (i = 0; i < insn_cnt; i++, insn++) {
12411                 bpf_convert_ctx_access_t convert_ctx_access;
12412                 bool ctx_access;
12413
12414                 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
12415                     insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
12416                     insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
12417                     insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
12418                         type = BPF_READ;
12419                         ctx_access = true;
12420                 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
12421                            insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
12422                            insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
12423                            insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
12424                            insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
12425                            insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
12426                            insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
12427                            insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
12428                         type = BPF_WRITE;
12429                         ctx_access = BPF_CLASS(insn->code) == BPF_STX;
12430                 } else {
12431                         continue;
12432                 }
12433
12434                 if (type == BPF_WRITE &&
12435                     env->insn_aux_data[i + delta].sanitize_stack_spill) {
12436                         struct bpf_insn patch[] = {
12437                                 *insn,
12438                                 BPF_ST_NOSPEC(),
12439                         };
12440
12441                         cnt = ARRAY_SIZE(patch);
12442                         new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
12443                         if (!new_prog)
12444                                 return -ENOMEM;
12445
12446                         delta    += cnt - 1;
12447                         env->prog = new_prog;
12448                         insn      = new_prog->insnsi + i + delta;
12449                         continue;
12450                 }
12451
12452                 if (!ctx_access)
12453                         continue;
12454
12455                 switch (env->insn_aux_data[i + delta].ptr_type) {
12456                 case PTR_TO_CTX:
12457                         if (!ops->convert_ctx_access)
12458                                 continue;
12459                         convert_ctx_access = ops->convert_ctx_access;
12460                         break;
12461                 case PTR_TO_SOCKET:
12462                 case PTR_TO_SOCK_COMMON:
12463                         convert_ctx_access = bpf_sock_convert_ctx_access;
12464                         break;
12465                 case PTR_TO_TCP_SOCK:
12466                         convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
12467                         break;
12468                 case PTR_TO_XDP_SOCK:
12469                         convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
12470                         break;
12471                 case PTR_TO_BTF_ID:
12472                         if (type == BPF_READ) {
12473                                 insn->code = BPF_LDX | BPF_PROBE_MEM |
12474                                         BPF_SIZE((insn)->code);
12475                                 env->prog->aux->num_exentries++;
12476                         } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
12477                                 verbose(env, "Writes through BTF pointers are not allowed\n");
12478                                 return -EINVAL;
12479                         }
12480                         continue;
12481                 default:
12482                         continue;
12483                 }
12484
12485                 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
12486                 size = BPF_LDST_BYTES(insn);
12487
12488                 /* If the read access is a narrower load of the field,
12489                  * convert to a 4/8-byte load, to minimum program type specific
12490                  * convert_ctx_access changes. If conversion is successful,
12491                  * we will apply proper mask to the result.
12492                  */
12493                 is_narrower_load = size < ctx_field_size;
12494                 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
12495                 off = insn->off;
12496                 if (is_narrower_load) {
12497                         u8 size_code;
12498
12499                         if (type == BPF_WRITE) {
12500                                 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
12501                                 return -EINVAL;
12502                         }
12503
12504                         size_code = BPF_H;
12505                         if (ctx_field_size == 4)
12506                                 size_code = BPF_W;
12507                         else if (ctx_field_size == 8)
12508                                 size_code = BPF_DW;
12509
12510                         insn->off = off & ~(size_default - 1);
12511                         insn->code = BPF_LDX | BPF_MEM | size_code;
12512                 }
12513
12514                 target_size = 0;
12515                 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
12516                                          &target_size);
12517                 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
12518                     (ctx_field_size && !target_size)) {
12519                         verbose(env, "bpf verifier is misconfigured\n");
12520                         return -EINVAL;
12521                 }
12522
12523                 if (is_narrower_load && size < target_size) {
12524                         u8 shift = bpf_ctx_narrow_access_offset(
12525                                 off, size, size_default) * 8;
12526                         if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
12527                                 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
12528                                 return -EINVAL;
12529                         }
12530                         if (ctx_field_size <= 4) {
12531                                 if (shift)
12532                                         insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
12533                                                                         insn->dst_reg,
12534                                                                         shift);
12535                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
12536                                                                 (1 << size * 8) - 1);
12537                         } else {
12538                                 if (shift)
12539                                         insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
12540                                                                         insn->dst_reg,
12541                                                                         shift);
12542                                 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
12543                                                                 (1ULL << size * 8) - 1);
12544                         }
12545                 }
12546
12547                 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
12548                 if (!new_prog)
12549                         return -ENOMEM;
12550
12551                 delta += cnt - 1;
12552
12553                 /* keep walking new program and skip insns we just inserted */
12554                 env->prog = new_prog;
12555                 insn      = new_prog->insnsi + i + delta;
12556         }
12557
12558         return 0;
12559 }
12560
12561 static int jit_subprogs(struct bpf_verifier_env *env)
12562 {
12563         struct bpf_prog *prog = env->prog, **func, *tmp;
12564         int i, j, subprog_start, subprog_end = 0, len, subprog;
12565         struct bpf_map *map_ptr;
12566         struct bpf_insn *insn;
12567         void *old_bpf_func;
12568         int err, num_exentries;
12569
12570         if (env->subprog_cnt <= 1)
12571                 return 0;
12572
12573         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
12574                 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
12575                         continue;
12576
12577                 /* Upon error here we cannot fall back to interpreter but
12578                  * need a hard reject of the program. Thus -EFAULT is
12579                  * propagated in any case.
12580                  */
12581                 subprog = find_subprog(env, i + insn->imm + 1);
12582                 if (subprog < 0) {
12583                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
12584                                   i + insn->imm + 1);
12585                         return -EFAULT;
12586                 }
12587                 /* temporarily remember subprog id inside insn instead of
12588                  * aux_data, since next loop will split up all insns into funcs
12589                  */
12590                 insn->off = subprog;
12591                 /* remember original imm in case JIT fails and fallback
12592                  * to interpreter will be needed
12593                  */
12594                 env->insn_aux_data[i].call_imm = insn->imm;
12595                 /* point imm to __bpf_call_base+1 from JITs point of view */
12596                 insn->imm = 1;
12597                 if (bpf_pseudo_func(insn))
12598                         /* jit (e.g. x86_64) may emit fewer instructions
12599                          * if it learns a u32 imm is the same as a u64 imm.
12600                          * Force a non zero here.
12601                          */
12602                         insn[1].imm = 1;
12603         }
12604
12605         err = bpf_prog_alloc_jited_linfo(prog);
12606         if (err)
12607                 goto out_undo_insn;
12608
12609         err = -ENOMEM;
12610         func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
12611         if (!func)
12612                 goto out_undo_insn;
12613
12614         for (i = 0; i < env->subprog_cnt; i++) {
12615                 subprog_start = subprog_end;
12616                 subprog_end = env->subprog_info[i + 1].start;
12617
12618                 len = subprog_end - subprog_start;
12619                 /* bpf_prog_run() doesn't call subprogs directly,
12620                  * hence main prog stats include the runtime of subprogs.
12621                  * subprogs don't have IDs and not reachable via prog_get_next_id
12622                  * func[i]->stats will never be accessed and stays NULL
12623                  */
12624                 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
12625                 if (!func[i])
12626                         goto out_free;
12627                 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
12628                        len * sizeof(struct bpf_insn));
12629                 func[i]->type = prog->type;
12630                 func[i]->len = len;
12631                 if (bpf_prog_calc_tag(func[i]))
12632                         goto out_free;
12633                 func[i]->is_func = 1;
12634                 func[i]->aux->func_idx = i;
12635                 /* Below members will be freed only at prog->aux */
12636                 func[i]->aux->btf = prog->aux->btf;
12637                 func[i]->aux->func_info = prog->aux->func_info;
12638                 func[i]->aux->poke_tab = prog->aux->poke_tab;
12639                 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
12640
12641                 for (j = 0; j < prog->aux->size_poke_tab; j++) {
12642                         struct bpf_jit_poke_descriptor *poke;
12643
12644                         poke = &prog->aux->poke_tab[j];
12645                         if (poke->insn_idx < subprog_end &&
12646                             poke->insn_idx >= subprog_start)
12647                                 poke->aux = func[i]->aux;
12648                 }
12649
12650                 /* Use bpf_prog_F_tag to indicate functions in stack traces.
12651                  * Long term would need debug info to populate names
12652                  */
12653                 func[i]->aux->name[0] = 'F';
12654                 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
12655                 func[i]->jit_requested = 1;
12656                 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
12657                 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
12658                 func[i]->aux->linfo = prog->aux->linfo;
12659                 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
12660                 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
12661                 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
12662                 num_exentries = 0;
12663                 insn = func[i]->insnsi;
12664                 for (j = 0; j < func[i]->len; j++, insn++) {
12665                         if (BPF_CLASS(insn->code) == BPF_LDX &&
12666                             BPF_MODE(insn->code) == BPF_PROBE_MEM)
12667                                 num_exentries++;
12668                 }
12669                 func[i]->aux->num_exentries = num_exentries;
12670                 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
12671                 func[i] = bpf_int_jit_compile(func[i]);
12672                 if (!func[i]->jited) {
12673                         err = -ENOTSUPP;
12674                         goto out_free;
12675                 }
12676                 cond_resched();
12677         }
12678
12679         /* at this point all bpf functions were successfully JITed
12680          * now populate all bpf_calls with correct addresses and
12681          * run last pass of JIT
12682          */
12683         for (i = 0; i < env->subprog_cnt; i++) {
12684                 insn = func[i]->insnsi;
12685                 for (j = 0; j < func[i]->len; j++, insn++) {
12686                         if (bpf_pseudo_func(insn)) {
12687                                 subprog = insn->off;
12688                                 insn[0].imm = (u32)(long)func[subprog]->bpf_func;
12689                                 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
12690                                 continue;
12691                         }
12692                         if (!bpf_pseudo_call(insn))
12693                                 continue;
12694                         subprog = insn->off;
12695                         insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
12696                 }
12697
12698                 /* we use the aux data to keep a list of the start addresses
12699                  * of the JITed images for each function in the program
12700                  *
12701                  * for some architectures, such as powerpc64, the imm field
12702                  * might not be large enough to hold the offset of the start
12703                  * address of the callee's JITed image from __bpf_call_base
12704                  *
12705                  * in such cases, we can lookup the start address of a callee
12706                  * by using its subprog id, available from the off field of
12707                  * the call instruction, as an index for this list
12708                  */
12709                 func[i]->aux->func = func;
12710                 func[i]->aux->func_cnt = env->subprog_cnt;
12711         }
12712         for (i = 0; i < env->subprog_cnt; i++) {
12713                 old_bpf_func = func[i]->bpf_func;
12714                 tmp = bpf_int_jit_compile(func[i]);
12715                 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
12716                         verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
12717                         err = -ENOTSUPP;
12718                         goto out_free;
12719                 }
12720                 cond_resched();
12721         }
12722
12723         /* finally lock prog and jit images for all functions and
12724          * populate kallsysm
12725          */
12726         for (i = 0; i < env->subprog_cnt; i++) {
12727                 bpf_prog_lock_ro(func[i]);
12728                 bpf_prog_kallsyms_add(func[i]);
12729         }
12730
12731         /* Last step: make now unused interpreter insns from main
12732          * prog consistent for later dump requests, so they can
12733          * later look the same as if they were interpreted only.
12734          */
12735         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
12736                 if (bpf_pseudo_func(insn)) {
12737                         insn[0].imm = env->insn_aux_data[i].call_imm;
12738                         insn[1].imm = insn->off;
12739                         insn->off = 0;
12740                         continue;
12741                 }
12742                 if (!bpf_pseudo_call(insn))
12743                         continue;
12744                 insn->off = env->insn_aux_data[i].call_imm;
12745                 subprog = find_subprog(env, i + insn->off + 1);
12746                 insn->imm = subprog;
12747         }
12748
12749         prog->jited = 1;
12750         prog->bpf_func = func[0]->bpf_func;
12751         prog->aux->func = func;
12752         prog->aux->func_cnt = env->subprog_cnt;
12753         bpf_prog_jit_attempt_done(prog);
12754         return 0;
12755 out_free:
12756         /* We failed JIT'ing, so at this point we need to unregister poke
12757          * descriptors from subprogs, so that kernel is not attempting to
12758          * patch it anymore as we're freeing the subprog JIT memory.
12759          */
12760         for (i = 0; i < prog->aux->size_poke_tab; i++) {
12761                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
12762                 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
12763         }
12764         /* At this point we're guaranteed that poke descriptors are not
12765          * live anymore. We can just unlink its descriptor table as it's
12766          * released with the main prog.
12767          */
12768         for (i = 0; i < env->subprog_cnt; i++) {
12769                 if (!func[i])
12770                         continue;
12771                 func[i]->aux->poke_tab = NULL;
12772                 bpf_jit_free(func[i]);
12773         }
12774         kfree(func);
12775 out_undo_insn:
12776         /* cleanup main prog to be interpreted */
12777         prog->jit_requested = 0;
12778         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
12779                 if (!bpf_pseudo_call(insn))
12780                         continue;
12781                 insn->off = 0;
12782                 insn->imm = env->insn_aux_data[i].call_imm;
12783         }
12784         bpf_prog_jit_attempt_done(prog);
12785         return err;
12786 }
12787
12788 static int fixup_call_args(struct bpf_verifier_env *env)
12789 {
12790 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
12791         struct bpf_prog *prog = env->prog;
12792         struct bpf_insn *insn = prog->insnsi;
12793         bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
12794         int i, depth;
12795 #endif
12796         int err = 0;
12797
12798         if (env->prog->jit_requested &&
12799             !bpf_prog_is_dev_bound(env->prog->aux)) {
12800                 err = jit_subprogs(env);
12801                 if (err == 0)
12802                         return 0;
12803                 if (err == -EFAULT)
12804                         return err;
12805         }
12806 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
12807         if (has_kfunc_call) {
12808                 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
12809                 return -EINVAL;
12810         }
12811         if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
12812                 /* When JIT fails the progs with bpf2bpf calls and tail_calls
12813                  * have to be rejected, since interpreter doesn't support them yet.
12814                  */
12815                 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
12816                 return -EINVAL;
12817         }
12818         for (i = 0; i < prog->len; i++, insn++) {
12819                 if (bpf_pseudo_func(insn)) {
12820                         /* When JIT fails the progs with callback calls
12821                          * have to be rejected, since interpreter doesn't support them yet.
12822                          */
12823                         verbose(env, "callbacks are not allowed in non-JITed programs\n");
12824                         return -EINVAL;
12825                 }
12826
12827                 if (!bpf_pseudo_call(insn))
12828                         continue;
12829                 depth = get_callee_stack_depth(env, insn, i);
12830                 if (depth < 0)
12831                         return depth;
12832                 bpf_patch_call_args(insn, depth);
12833         }
12834         err = 0;
12835 #endif
12836         return err;
12837 }
12838
12839 static int fixup_kfunc_call(struct bpf_verifier_env *env,
12840                             struct bpf_insn *insn)
12841 {
12842         const struct bpf_kfunc_desc *desc;
12843
12844         if (!insn->imm) {
12845                 verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
12846                 return -EINVAL;
12847         }
12848
12849         /* insn->imm has the btf func_id. Replace it with
12850          * an address (relative to __bpf_base_call).
12851          */
12852         desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
12853         if (!desc) {
12854                 verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
12855                         insn->imm);
12856                 return -EFAULT;
12857         }
12858
12859         insn->imm = desc->imm;
12860
12861         return 0;
12862 }
12863
12864 /* Do various post-verification rewrites in a single program pass.
12865  * These rewrites simplify JIT and interpreter implementations.
12866  */
12867 static int do_misc_fixups(struct bpf_verifier_env *env)
12868 {
12869         struct bpf_prog *prog = env->prog;
12870         bool expect_blinding = bpf_jit_blinding_enabled(prog);
12871         enum bpf_prog_type prog_type = resolve_prog_type(prog);
12872         struct bpf_insn *insn = prog->insnsi;
12873         const struct bpf_func_proto *fn;
12874         const int insn_cnt = prog->len;
12875         const struct bpf_map_ops *ops;
12876         struct bpf_insn_aux_data *aux;
12877         struct bpf_insn insn_buf[16];
12878         struct bpf_prog *new_prog;
12879         struct bpf_map *map_ptr;
12880         int i, ret, cnt, delta = 0;
12881
12882         for (i = 0; i < insn_cnt; i++, insn++) {
12883                 /* Make divide-by-zero exceptions impossible. */
12884                 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
12885                     insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
12886                     insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
12887                     insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
12888                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
12889                         bool isdiv = BPF_OP(insn->code) == BPF_DIV;
12890                         struct bpf_insn *patchlet;
12891                         struct bpf_insn chk_and_div[] = {
12892                                 /* [R,W]x div 0 -> 0 */
12893                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
12894                                              BPF_JNE | BPF_K, insn->src_reg,
12895                                              0, 2, 0),
12896                                 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
12897                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
12898                                 *insn,
12899                         };
12900                         struct bpf_insn chk_and_mod[] = {
12901                                 /* [R,W]x mod 0 -> [R,W]x */
12902                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
12903                                              BPF_JEQ | BPF_K, insn->src_reg,
12904                                              0, 1 + (is64 ? 0 : 1), 0),
12905                                 *insn,
12906                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
12907                                 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
12908                         };
12909
12910                         patchlet = isdiv ? chk_and_div : chk_and_mod;
12911                         cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
12912                                       ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
12913
12914                         new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
12915                         if (!new_prog)
12916                                 return -ENOMEM;
12917
12918                         delta    += cnt - 1;
12919                         env->prog = prog = new_prog;
12920                         insn      = new_prog->insnsi + i + delta;
12921                         continue;
12922                 }
12923
12924                 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
12925                 if (BPF_CLASS(insn->code) == BPF_LD &&
12926                     (BPF_MODE(insn->code) == BPF_ABS ||
12927                      BPF_MODE(insn->code) == BPF_IND)) {
12928                         cnt = env->ops->gen_ld_abs(insn, insn_buf);
12929                         if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
12930                                 verbose(env, "bpf verifier is misconfigured\n");
12931                                 return -EINVAL;
12932                         }
12933
12934                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
12935                         if (!new_prog)
12936                                 return -ENOMEM;
12937
12938                         delta    += cnt - 1;
12939                         env->prog = prog = new_prog;
12940                         insn      = new_prog->insnsi + i + delta;
12941                         continue;
12942                 }
12943
12944                 /* Rewrite pointer arithmetic to mitigate speculation attacks. */
12945                 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
12946                     insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
12947                         const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
12948                         const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
12949                         struct bpf_insn *patch = &insn_buf[0];
12950                         bool issrc, isneg, isimm;
12951                         u32 off_reg;
12952
12953                         aux = &env->insn_aux_data[i + delta];
12954                         if (!aux->alu_state ||
12955                             aux->alu_state == BPF_ALU_NON_POINTER)
12956                                 continue;
12957
12958                         isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
12959                         issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
12960                                 BPF_ALU_SANITIZE_SRC;
12961                         isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
12962
12963                         off_reg = issrc ? insn->src_reg : insn->dst_reg;
12964                         if (isimm) {
12965                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
12966                         } else {
12967                                 if (isneg)
12968                                         *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
12969                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
12970                                 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
12971                                 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
12972                                 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
12973                                 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
12974                                 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
12975                         }
12976                         if (!issrc)
12977                                 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
12978                         insn->src_reg = BPF_REG_AX;
12979                         if (isneg)
12980                                 insn->code = insn->code == code_add ?
12981                                              code_sub : code_add;
12982                         *patch++ = *insn;
12983                         if (issrc && isneg && !isimm)
12984                                 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
12985                         cnt = patch - insn_buf;
12986
12987                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
12988                         if (!new_prog)
12989                                 return -ENOMEM;
12990
12991                         delta    += cnt - 1;
12992                         env->prog = prog = new_prog;
12993                         insn      = new_prog->insnsi + i + delta;
12994                         continue;
12995                 }
12996
12997                 if (insn->code != (BPF_JMP | BPF_CALL))
12998                         continue;
12999                 if (insn->src_reg == BPF_PSEUDO_CALL)
13000                         continue;
13001                 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
13002                         ret = fixup_kfunc_call(env, insn);
13003                         if (ret)
13004                                 return ret;
13005                         continue;
13006                 }
13007
13008                 if (insn->imm == BPF_FUNC_get_route_realm)
13009                         prog->dst_needed = 1;
13010                 if (insn->imm == BPF_FUNC_get_prandom_u32)
13011                         bpf_user_rnd_init_once();
13012                 if (insn->imm == BPF_FUNC_override_return)
13013                         prog->kprobe_override = 1;
13014                 if (insn->imm == BPF_FUNC_tail_call) {
13015                         /* If we tail call into other programs, we
13016                          * cannot make any assumptions since they can
13017                          * be replaced dynamically during runtime in
13018                          * the program array.
13019                          */
13020                         prog->cb_access = 1;
13021                         if (!allow_tail_call_in_subprogs(env))
13022                                 prog->aux->stack_depth = MAX_BPF_STACK;
13023                         prog->aux->max_pkt_offset = MAX_PACKET_OFF;
13024
13025                         /* mark bpf_tail_call as different opcode to avoid
13026                          * conditional branch in the interpreter for every normal
13027                          * call and to prevent accidental JITing by JIT compiler
13028                          * that doesn't support bpf_tail_call yet
13029                          */
13030                         insn->imm = 0;
13031                         insn->code = BPF_JMP | BPF_TAIL_CALL;
13032
13033                         aux = &env->insn_aux_data[i + delta];
13034                         if (env->bpf_capable && !expect_blinding &&
13035                             prog->jit_requested &&
13036                             !bpf_map_key_poisoned(aux) &&
13037                             !bpf_map_ptr_poisoned(aux) &&
13038                             !bpf_map_ptr_unpriv(aux)) {
13039                                 struct bpf_jit_poke_descriptor desc = {
13040                                         .reason = BPF_POKE_REASON_TAIL_CALL,
13041                                         .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
13042                                         .tail_call.key = bpf_map_key_immediate(aux),
13043                                         .insn_idx = i + delta,
13044                                 };
13045
13046                                 ret = bpf_jit_add_poke_descriptor(prog, &desc);
13047                                 if (ret < 0) {
13048                                         verbose(env, "adding tail call poke descriptor failed\n");
13049                                         return ret;
13050                                 }
13051
13052                                 insn->imm = ret + 1;
13053                                 continue;
13054                         }
13055
13056                         if (!bpf_map_ptr_unpriv(aux))
13057                                 continue;
13058
13059                         /* instead of changing every JIT dealing with tail_call
13060                          * emit two extra insns:
13061                          * if (index >= max_entries) goto out;
13062                          * index &= array->index_mask;
13063                          * to avoid out-of-bounds cpu speculation
13064                          */
13065                         if (bpf_map_ptr_poisoned(aux)) {
13066                                 verbose(env, "tail_call abusing map_ptr\n");
13067                                 return -EINVAL;
13068                         }
13069
13070                         map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
13071                         insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
13072                                                   map_ptr->max_entries, 2);
13073                         insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
13074                                                     container_of(map_ptr,
13075                                                                  struct bpf_array,
13076                                                                  map)->index_mask);
13077                         insn_buf[2] = *insn;
13078                         cnt = 3;
13079                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13080                         if (!new_prog)
13081                                 return -ENOMEM;
13082
13083                         delta    += cnt - 1;
13084                         env->prog = prog = new_prog;
13085                         insn      = new_prog->insnsi + i + delta;
13086                         continue;
13087                 }
13088
13089                 if (insn->imm == BPF_FUNC_timer_set_callback) {
13090                         /* The verifier will process callback_fn as many times as necessary
13091                          * with different maps and the register states prepared by
13092                          * set_timer_callback_state will be accurate.
13093                          *
13094                          * The following use case is valid:
13095                          *   map1 is shared by prog1, prog2, prog3.
13096                          *   prog1 calls bpf_timer_init for some map1 elements
13097                          *   prog2 calls bpf_timer_set_callback for some map1 elements.
13098                          *     Those that were not bpf_timer_init-ed will return -EINVAL.
13099                          *   prog3 calls bpf_timer_start for some map1 elements.
13100                          *     Those that were not both bpf_timer_init-ed and
13101                          *     bpf_timer_set_callback-ed will return -EINVAL.
13102                          */
13103                         struct bpf_insn ld_addrs[2] = {
13104                                 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
13105                         };
13106
13107                         insn_buf[0] = ld_addrs[0];
13108                         insn_buf[1] = ld_addrs[1];
13109                         insn_buf[2] = *insn;
13110                         cnt = 3;
13111
13112                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13113                         if (!new_prog)
13114                                 return -ENOMEM;
13115
13116                         delta    += cnt - 1;
13117                         env->prog = prog = new_prog;
13118                         insn      = new_prog->insnsi + i + delta;
13119                         goto patch_call_imm;
13120                 }
13121
13122                 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
13123                  * and other inlining handlers are currently limited to 64 bit
13124                  * only.
13125                  */
13126                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
13127                     (insn->imm == BPF_FUNC_map_lookup_elem ||
13128                      insn->imm == BPF_FUNC_map_update_elem ||
13129                      insn->imm == BPF_FUNC_map_delete_elem ||
13130                      insn->imm == BPF_FUNC_map_push_elem   ||
13131                      insn->imm == BPF_FUNC_map_pop_elem    ||
13132                      insn->imm == BPF_FUNC_map_peek_elem   ||
13133                      insn->imm == BPF_FUNC_redirect_map    ||
13134                      insn->imm == BPF_FUNC_for_each_map_elem)) {
13135                         aux = &env->insn_aux_data[i + delta];
13136                         if (bpf_map_ptr_poisoned(aux))
13137                                 goto patch_call_imm;
13138
13139                         map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
13140                         ops = map_ptr->ops;
13141                         if (insn->imm == BPF_FUNC_map_lookup_elem &&
13142                             ops->map_gen_lookup) {
13143                                 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
13144                                 if (cnt == -EOPNOTSUPP)
13145                                         goto patch_map_ops_generic;
13146                                 if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
13147                                         verbose(env, "bpf verifier is misconfigured\n");
13148                                         return -EINVAL;
13149                                 }
13150
13151                                 new_prog = bpf_patch_insn_data(env, i + delta,
13152                                                                insn_buf, cnt);
13153                                 if (!new_prog)
13154                                         return -ENOMEM;
13155
13156                                 delta    += cnt - 1;
13157                                 env->prog = prog = new_prog;
13158                                 insn      = new_prog->insnsi + i + delta;
13159                                 continue;
13160                         }
13161
13162                         BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
13163                                      (void *(*)(struct bpf_map *map, void *key))NULL));
13164                         BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
13165                                      (int (*)(struct bpf_map *map, void *key))NULL));
13166                         BUILD_BUG_ON(!__same_type(ops->map_update_elem,
13167                                      (int (*)(struct bpf_map *map, void *key, void *value,
13168                                               u64 flags))NULL));
13169                         BUILD_BUG_ON(!__same_type(ops->map_push_elem,
13170                                      (int (*)(struct bpf_map *map, void *value,
13171                                               u64 flags))NULL));
13172                         BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
13173                                      (int (*)(struct bpf_map *map, void *value))NULL));
13174                         BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
13175                                      (int (*)(struct bpf_map *map, void *value))NULL));
13176                         BUILD_BUG_ON(!__same_type(ops->map_redirect,
13177                                      (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
13178                         BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
13179                                      (int (*)(struct bpf_map *map,
13180                                               bpf_callback_t callback_fn,
13181                                               void *callback_ctx,
13182                                               u64 flags))NULL));
13183
13184 patch_map_ops_generic:
13185                         switch (insn->imm) {
13186                         case BPF_FUNC_map_lookup_elem:
13187                                 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
13188                                 continue;
13189                         case BPF_FUNC_map_update_elem:
13190                                 insn->imm = BPF_CALL_IMM(ops->map_update_elem);
13191                                 continue;
13192                         case BPF_FUNC_map_delete_elem:
13193                                 insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
13194                                 continue;
13195                         case BPF_FUNC_map_push_elem:
13196                                 insn->imm = BPF_CALL_IMM(ops->map_push_elem);
13197                                 continue;
13198                         case BPF_FUNC_map_pop_elem:
13199                                 insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
13200                                 continue;
13201                         case BPF_FUNC_map_peek_elem:
13202                                 insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
13203                                 continue;
13204                         case BPF_FUNC_redirect_map:
13205                                 insn->imm = BPF_CALL_IMM(ops->map_redirect);
13206                                 continue;
13207                         case BPF_FUNC_for_each_map_elem:
13208                                 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
13209                                 continue;
13210                         }
13211
13212                         goto patch_call_imm;
13213                 }
13214
13215                 /* Implement bpf_jiffies64 inline. */
13216                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
13217                     insn->imm == BPF_FUNC_jiffies64) {
13218                         struct bpf_insn ld_jiffies_addr[2] = {
13219                                 BPF_LD_IMM64(BPF_REG_0,
13220                                              (unsigned long)&jiffies),
13221                         };
13222
13223                         insn_buf[0] = ld_jiffies_addr[0];
13224                         insn_buf[1] = ld_jiffies_addr[1];
13225                         insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
13226                                                   BPF_REG_0, 0);
13227                         cnt = 3;
13228
13229                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
13230                                                        cnt);
13231                         if (!new_prog)
13232                                 return -ENOMEM;
13233
13234                         delta    += cnt - 1;
13235                         env->prog = prog = new_prog;
13236                         insn      = new_prog->insnsi + i + delta;
13237                         continue;
13238                 }
13239
13240                 /* Implement bpf_get_func_ip inline. */
13241                 if (prog_type == BPF_PROG_TYPE_TRACING &&
13242                     insn->imm == BPF_FUNC_get_func_ip) {
13243                         /* Load IP address from ctx - 8 */
13244                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
13245
13246                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
13247                         if (!new_prog)
13248                                 return -ENOMEM;
13249
13250                         env->prog = prog = new_prog;
13251                         insn      = new_prog->insnsi + i + delta;
13252                         continue;
13253                 }
13254
13255 patch_call_imm:
13256                 fn = env->ops->get_func_proto(insn->imm, env->prog);
13257                 /* all functions that have prototype and verifier allowed
13258                  * programs to call them, must be real in-kernel functions
13259                  */
13260                 if (!fn->func) {
13261                         verbose(env,
13262                                 "kernel subsystem misconfigured func %s#%d\n",
13263                                 func_id_name(insn->imm), insn->imm);
13264                         return -EFAULT;
13265                 }
13266                 insn->imm = fn->func - __bpf_call_base;
13267         }
13268
13269         /* Since poke tab is now finalized, publish aux to tracker. */
13270         for (i = 0; i < prog->aux->size_poke_tab; i++) {
13271                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
13272                 if (!map_ptr->ops->map_poke_track ||
13273                     !map_ptr->ops->map_poke_untrack ||
13274                     !map_ptr->ops->map_poke_run) {
13275                         verbose(env, "bpf verifier is misconfigured\n");
13276                         return -EINVAL;
13277                 }
13278
13279                 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
13280                 if (ret < 0) {
13281                         verbose(env, "tracking tail call prog failed\n");
13282                         return ret;
13283                 }
13284         }
13285
13286         sort_kfunc_descs_by_imm(env->prog);
13287
13288         return 0;
13289 }
13290
13291 static void free_states(struct bpf_verifier_env *env)
13292 {
13293         struct bpf_verifier_state_list *sl, *sln;
13294         int i;
13295
13296         sl = env->free_list;
13297         while (sl) {
13298                 sln = sl->next;
13299                 free_verifier_state(&sl->state, false);
13300                 kfree(sl);
13301                 sl = sln;
13302         }
13303         env->free_list = NULL;
13304
13305         if (!env->explored_states)
13306                 return;
13307
13308         for (i = 0; i < state_htab_size(env); i++) {
13309                 sl = env->explored_states[i];
13310
13311                 while (sl) {
13312                         sln = sl->next;
13313                         free_verifier_state(&sl->state, false);
13314                         kfree(sl);
13315                         sl = sln;
13316                 }
13317                 env->explored_states[i] = NULL;
13318         }
13319 }
13320
13321 static int do_check_common(struct bpf_verifier_env *env, int subprog)
13322 {
13323         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
13324         struct bpf_verifier_state *state;
13325         struct bpf_reg_state *regs;
13326         int ret, i;
13327
13328         env->prev_linfo = NULL;
13329         env->pass_cnt++;
13330
13331         state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
13332         if (!state)
13333                 return -ENOMEM;
13334         state->curframe = 0;
13335         state->speculative = false;
13336         state->branches = 1;
13337         state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
13338         if (!state->frame[0]) {
13339                 kfree(state);
13340                 return -ENOMEM;
13341         }
13342         env->cur_state = state;
13343         init_func_state(env, state->frame[0],
13344                         BPF_MAIN_FUNC /* callsite */,
13345                         0 /* frameno */,
13346                         subprog);
13347
13348         regs = state->frame[state->curframe]->regs;
13349         if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
13350                 ret = btf_prepare_func_args(env, subprog, regs);
13351                 if (ret)
13352                         goto out;
13353                 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
13354                         if (regs[i].type == PTR_TO_CTX)
13355                                 mark_reg_known_zero(env, regs, i);
13356                         else if (regs[i].type == SCALAR_VALUE)
13357                                 mark_reg_unknown(env, regs, i);
13358                         else if (regs[i].type == PTR_TO_MEM_OR_NULL) {
13359                                 const u32 mem_size = regs[i].mem_size;
13360
13361                                 mark_reg_known_zero(env, regs, i);
13362                                 regs[i].mem_size = mem_size;
13363                                 regs[i].id = ++env->id_gen;
13364                         }
13365                 }
13366         } else {
13367                 /* 1st arg to a function */
13368                 regs[BPF_REG_1].type = PTR_TO_CTX;
13369                 mark_reg_known_zero(env, regs, BPF_REG_1);
13370                 ret = btf_check_subprog_arg_match(env, subprog, regs);
13371                 if (ret == -EFAULT)
13372                         /* unlikely verifier bug. abort.
13373                          * ret == 0 and ret < 0 are sadly acceptable for
13374                          * main() function due to backward compatibility.
13375                          * Like socket filter program may be written as:
13376                          * int bpf_prog(struct pt_regs *ctx)
13377                          * and never dereference that ctx in the program.
13378                          * 'struct pt_regs' is a type mismatch for socket
13379                          * filter that should be using 'struct __sk_buff'.
13380                          */
13381                         goto out;
13382         }
13383
13384         ret = do_check(env);
13385 out:
13386         /* check for NULL is necessary, since cur_state can be freed inside
13387          * do_check() under memory pressure.
13388          */
13389         if (env->cur_state) {
13390                 free_verifier_state(env->cur_state, true);
13391                 env->cur_state = NULL;
13392         }
13393         while (!pop_stack(env, NULL, NULL, false));
13394         if (!ret && pop_log)
13395                 bpf_vlog_reset(&env->log, 0);
13396         free_states(env);
13397         return ret;
13398 }
13399
13400 /* Verify all global functions in a BPF program one by one based on their BTF.
13401  * All global functions must pass verification. Otherwise the whole program is rejected.
13402  * Consider:
13403  * int bar(int);
13404  * int foo(int f)
13405  * {
13406  *    return bar(f);
13407  * }
13408  * int bar(int b)
13409  * {
13410  *    ...
13411  * }
13412  * foo() will be verified first for R1=any_scalar_value. During verification it
13413  * will be assumed that bar() already verified successfully and call to bar()
13414  * from foo() will be checked for type match only. Later bar() will be verified
13415  * independently to check that it's safe for R1=any_scalar_value.
13416  */
13417 static int do_check_subprogs(struct bpf_verifier_env *env)
13418 {
13419         struct bpf_prog_aux *aux = env->prog->aux;
13420         int i, ret;
13421
13422         if (!aux->func_info)
13423                 return 0;
13424
13425         for (i = 1; i < env->subprog_cnt; i++) {
13426                 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
13427                         continue;
13428                 env->insn_idx = env->subprog_info[i].start;
13429                 WARN_ON_ONCE(env->insn_idx == 0);
13430                 ret = do_check_common(env, i);
13431                 if (ret) {
13432                         return ret;
13433                 } else if (env->log.level & BPF_LOG_LEVEL) {
13434                         verbose(env,
13435                                 "Func#%d is safe for any args that match its prototype\n",
13436                                 i);
13437                 }
13438         }
13439         return 0;
13440 }
13441
13442 static int do_check_main(struct bpf_verifier_env *env)
13443 {
13444         int ret;
13445
13446         env->insn_idx = 0;
13447         ret = do_check_common(env, 0);
13448         if (!ret)
13449                 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
13450         return ret;
13451 }
13452
13453
13454 static void print_verification_stats(struct bpf_verifier_env *env)
13455 {
13456         int i;
13457
13458         if (env->log.level & BPF_LOG_STATS) {
13459                 verbose(env, "verification time %lld usec\n",
13460                         div_u64(env->verification_time, 1000));
13461                 verbose(env, "stack depth ");
13462                 for (i = 0; i < env->subprog_cnt; i++) {
13463                         u32 depth = env->subprog_info[i].stack_depth;
13464
13465                         verbose(env, "%d", depth);
13466                         if (i + 1 < env->subprog_cnt)
13467                                 verbose(env, "+");
13468                 }
13469                 verbose(env, "\n");
13470         }
13471         verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
13472                 "total_states %d peak_states %d mark_read %d\n",
13473                 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
13474                 env->max_states_per_insn, env->total_states,
13475                 env->peak_states, env->longest_mark_read_walk);
13476 }
13477
13478 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
13479 {
13480         const struct btf_type *t, *func_proto;
13481         const struct bpf_struct_ops *st_ops;
13482         const struct btf_member *member;
13483         struct bpf_prog *prog = env->prog;
13484         u32 btf_id, member_idx;
13485         const char *mname;
13486
13487         if (!prog->gpl_compatible) {
13488                 verbose(env, "struct ops programs must have a GPL compatible license\n");
13489                 return -EINVAL;
13490         }
13491
13492         btf_id = prog->aux->attach_btf_id;
13493         st_ops = bpf_struct_ops_find(btf_id);
13494         if (!st_ops) {
13495                 verbose(env, "attach_btf_id %u is not a supported struct\n",
13496                         btf_id);
13497                 return -ENOTSUPP;
13498         }
13499
13500         t = st_ops->type;
13501         member_idx = prog->expected_attach_type;
13502         if (member_idx >= btf_type_vlen(t)) {
13503                 verbose(env, "attach to invalid member idx %u of struct %s\n",
13504                         member_idx, st_ops->name);
13505                 return -EINVAL;
13506         }
13507
13508         member = &btf_type_member(t)[member_idx];
13509         mname = btf_name_by_offset(btf_vmlinux, member->name_off);
13510         func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
13511                                                NULL);
13512         if (!func_proto) {
13513                 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
13514                         mname, member_idx, st_ops->name);
13515                 return -EINVAL;
13516         }
13517
13518         if (st_ops->check_member) {
13519                 int err = st_ops->check_member(t, member);
13520
13521                 if (err) {
13522                         verbose(env, "attach to unsupported member %s of struct %s\n",
13523                                 mname, st_ops->name);
13524                         return err;
13525                 }
13526         }
13527
13528         prog->aux->attach_func_proto = func_proto;
13529         prog->aux->attach_func_name = mname;
13530         env->ops = st_ops->verifier_ops;
13531
13532         return 0;
13533 }
13534 #define SECURITY_PREFIX "security_"
13535
13536 static int check_attach_modify_return(unsigned long addr, const char *func_name)
13537 {
13538         if (within_error_injection_list(addr) ||
13539             !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
13540                 return 0;
13541
13542         return -EINVAL;
13543 }
13544
13545 /* list of non-sleepable functions that are otherwise on
13546  * ALLOW_ERROR_INJECTION list
13547  */
13548 BTF_SET_START(btf_non_sleepable_error_inject)
13549 /* Three functions below can be called from sleepable and non-sleepable context.
13550  * Assume non-sleepable from bpf safety point of view.
13551  */
13552 BTF_ID(func, __filemap_add_folio)
13553 BTF_ID(func, should_fail_alloc_page)
13554 BTF_ID(func, should_failslab)
13555 BTF_SET_END(btf_non_sleepable_error_inject)
13556
13557 static int check_non_sleepable_error_inject(u32 btf_id)
13558 {
13559         return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
13560 }
13561
13562 int bpf_check_attach_target(struct bpf_verifier_log *log,
13563                             const struct bpf_prog *prog,
13564                             const struct bpf_prog *tgt_prog,
13565                             u32 btf_id,
13566                             struct bpf_attach_target_info *tgt_info)
13567 {
13568         bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
13569         const char prefix[] = "btf_trace_";
13570         int ret = 0, subprog = -1, i;
13571         const struct btf_type *t;
13572         bool conservative = true;
13573         const char *tname;
13574         struct btf *btf;
13575         long addr = 0;
13576
13577         if (!btf_id) {
13578                 bpf_log(log, "Tracing programs must provide btf_id\n");
13579                 return -EINVAL;
13580         }
13581         btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
13582         if (!btf) {
13583                 bpf_log(log,
13584                         "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
13585                 return -EINVAL;
13586         }
13587         t = btf_type_by_id(btf, btf_id);
13588         if (!t) {
13589                 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
13590                 return -EINVAL;
13591         }
13592         tname = btf_name_by_offset(btf, t->name_off);
13593         if (!tname) {
13594                 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
13595                 return -EINVAL;
13596         }
13597         if (tgt_prog) {
13598                 struct bpf_prog_aux *aux = tgt_prog->aux;
13599
13600                 for (i = 0; i < aux->func_info_cnt; i++)
13601                         if (aux->func_info[i].type_id == btf_id) {
13602                                 subprog = i;
13603                                 break;
13604                         }
13605                 if (subprog == -1) {
13606                         bpf_log(log, "Subprog %s doesn't exist\n", tname);
13607                         return -EINVAL;
13608                 }
13609                 conservative = aux->func_info_aux[subprog].unreliable;
13610                 if (prog_extension) {
13611                         if (conservative) {
13612                                 bpf_log(log,
13613                                         "Cannot replace static functions\n");
13614                                 return -EINVAL;
13615                         }
13616                         if (!prog->jit_requested) {
13617                                 bpf_log(log,
13618                                         "Extension programs should be JITed\n");
13619                                 return -EINVAL;
13620                         }
13621                 }
13622                 if (!tgt_prog->jited) {
13623                         bpf_log(log, "Can attach to only JITed progs\n");
13624                         return -EINVAL;
13625                 }
13626                 if (tgt_prog->type == prog->type) {
13627                         /* Cannot fentry/fexit another fentry/fexit program.
13628                          * Cannot attach program extension to another extension.
13629                          * It's ok to attach fentry/fexit to extension program.
13630                          */
13631                         bpf_log(log, "Cannot recursively attach\n");
13632                         return -EINVAL;
13633                 }
13634                 if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
13635                     prog_extension &&
13636                     (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
13637                      tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
13638                         /* Program extensions can extend all program types
13639                          * except fentry/fexit. The reason is the following.
13640                          * The fentry/fexit programs are used for performance
13641                          * analysis, stats and can be attached to any program
13642                          * type except themselves. When extension program is
13643                          * replacing XDP function it is necessary to allow
13644                          * performance analysis of all functions. Both original
13645                          * XDP program and its program extension. Hence
13646                          * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
13647                          * allowed. If extending of fentry/fexit was allowed it
13648                          * would be possible to create long call chain
13649                          * fentry->extension->fentry->extension beyond
13650                          * reasonable stack size. Hence extending fentry is not
13651                          * allowed.
13652                          */
13653                         bpf_log(log, "Cannot extend fentry/fexit\n");
13654                         return -EINVAL;
13655                 }
13656         } else {
13657                 if (prog_extension) {
13658                         bpf_log(log, "Cannot replace kernel functions\n");
13659                         return -EINVAL;
13660                 }
13661         }
13662
13663         switch (prog->expected_attach_type) {
13664         case BPF_TRACE_RAW_TP:
13665                 if (tgt_prog) {
13666                         bpf_log(log,
13667                                 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
13668                         return -EINVAL;
13669                 }
13670                 if (!btf_type_is_typedef(t)) {
13671                         bpf_log(log, "attach_btf_id %u is not a typedef\n",
13672                                 btf_id);
13673                         return -EINVAL;
13674                 }
13675                 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
13676                         bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
13677                                 btf_id, tname);
13678                         return -EINVAL;
13679                 }
13680                 tname += sizeof(prefix) - 1;
13681                 t = btf_type_by_id(btf, t->type);
13682                 if (!btf_type_is_ptr(t))
13683                         /* should never happen in valid vmlinux build */
13684                         return -EINVAL;
13685                 t = btf_type_by_id(btf, t->type);
13686                 if (!btf_type_is_func_proto(t))
13687                         /* should never happen in valid vmlinux build */
13688                         return -EINVAL;
13689
13690                 break;
13691         case BPF_TRACE_ITER:
13692                 if (!btf_type_is_func(t)) {
13693                         bpf_log(log, "attach_btf_id %u is not a function\n",
13694                                 btf_id);
13695                         return -EINVAL;
13696                 }
13697                 t = btf_type_by_id(btf, t->type);
13698                 if (!btf_type_is_func_proto(t))
13699                         return -EINVAL;
13700                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
13701                 if (ret)
13702                         return ret;
13703                 break;
13704         default:
13705                 if (!prog_extension)
13706                         return -EINVAL;
13707                 fallthrough;
13708         case BPF_MODIFY_RETURN:
13709         case BPF_LSM_MAC:
13710         case BPF_TRACE_FENTRY:
13711         case BPF_TRACE_FEXIT:
13712                 if (!btf_type_is_func(t)) {
13713                         bpf_log(log, "attach_btf_id %u is not a function\n",
13714                                 btf_id);
13715                         return -EINVAL;
13716                 }
13717                 if (prog_extension &&
13718                     btf_check_type_match(log, prog, btf, t))
13719                         return -EINVAL;
13720                 t = btf_type_by_id(btf, t->type);
13721                 if (!btf_type_is_func_proto(t))
13722                         return -EINVAL;
13723
13724                 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
13725                     (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
13726                      prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
13727                         return -EINVAL;
13728
13729                 if (tgt_prog && conservative)
13730                         t = NULL;
13731
13732                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
13733                 if (ret < 0)
13734                         return ret;
13735
13736                 if (tgt_prog) {
13737                         if (subprog == 0)
13738                                 addr = (long) tgt_prog->bpf_func;
13739                         else
13740                                 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
13741                 } else {
13742                         addr = kallsyms_lookup_name(tname);
13743                         if (!addr) {
13744                                 bpf_log(log,
13745                                         "The address of function %s cannot be found\n",
13746                                         tname);
13747                                 return -ENOENT;
13748                         }
13749                 }
13750
13751                 if (prog->aux->sleepable) {
13752                         ret = -EINVAL;
13753                         switch (prog->type) {
13754                         case BPF_PROG_TYPE_TRACING:
13755                                 /* fentry/fexit/fmod_ret progs can be sleepable only if they are
13756                                  * attached to ALLOW_ERROR_INJECTION and are not in denylist.
13757                                  */
13758                                 if (!check_non_sleepable_error_inject(btf_id) &&
13759                                     within_error_injection_list(addr))
13760                                         ret = 0;
13761                                 break;
13762                         case BPF_PROG_TYPE_LSM:
13763                                 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
13764                                  * Only some of them are sleepable.
13765                                  */
13766                                 if (bpf_lsm_is_sleepable_hook(btf_id))
13767                                         ret = 0;
13768                                 break;
13769                         default:
13770                                 break;
13771                         }
13772                         if (ret) {
13773                                 bpf_log(log, "%s is not sleepable\n", tname);
13774                                 return ret;
13775                         }
13776                 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
13777                         if (tgt_prog) {
13778                                 bpf_log(log, "can't modify return codes of BPF programs\n");
13779                                 return -EINVAL;
13780                         }
13781                         ret = check_attach_modify_return(addr, tname);
13782                         if (ret) {
13783                                 bpf_log(log, "%s() is not modifiable\n", tname);
13784                                 return ret;
13785                         }
13786                 }
13787
13788                 break;
13789         }
13790         tgt_info->tgt_addr = addr;
13791         tgt_info->tgt_name = tname;
13792         tgt_info->tgt_type = t;
13793         return 0;
13794 }
13795
13796 BTF_SET_START(btf_id_deny)
13797 BTF_ID_UNUSED
13798 #ifdef CONFIG_SMP
13799 BTF_ID(func, migrate_disable)
13800 BTF_ID(func, migrate_enable)
13801 #endif
13802 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
13803 BTF_ID(func, rcu_read_unlock_strict)
13804 #endif
13805 BTF_SET_END(btf_id_deny)
13806
13807 static int check_attach_btf_id(struct bpf_verifier_env *env)
13808 {
13809         struct bpf_prog *prog = env->prog;
13810         struct bpf_prog *tgt_prog = prog->aux->dst_prog;
13811         struct bpf_attach_target_info tgt_info = {};
13812         u32 btf_id = prog->aux->attach_btf_id;
13813         struct bpf_trampoline *tr;
13814         int ret;
13815         u64 key;
13816
13817         if (prog->type == BPF_PROG_TYPE_SYSCALL) {
13818                 if (prog->aux->sleepable)
13819                         /* attach_btf_id checked to be zero already */
13820                         return 0;
13821                 verbose(env, "Syscall programs can only be sleepable\n");
13822                 return -EINVAL;
13823         }
13824
13825         if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
13826             prog->type != BPF_PROG_TYPE_LSM) {
13827                 verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
13828                 return -EINVAL;
13829         }
13830
13831         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
13832                 return check_struct_ops_btf_id(env);
13833
13834         if (prog->type != BPF_PROG_TYPE_TRACING &&
13835             prog->type != BPF_PROG_TYPE_LSM &&
13836             prog->type != BPF_PROG_TYPE_EXT)
13837                 return 0;
13838
13839         ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
13840         if (ret)
13841                 return ret;
13842
13843         if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
13844                 /* to make freplace equivalent to their targets, they need to
13845                  * inherit env->ops and expected_attach_type for the rest of the
13846                  * verification
13847                  */
13848                 env->ops = bpf_verifier_ops[tgt_prog->type];
13849                 prog->expected_attach_type = tgt_prog->expected_attach_type;
13850         }
13851
13852         /* store info about the attachment target that will be used later */
13853         prog->aux->attach_func_proto = tgt_info.tgt_type;
13854         prog->aux->attach_func_name = tgt_info.tgt_name;
13855
13856         if (tgt_prog) {
13857                 prog->aux->saved_dst_prog_type = tgt_prog->type;
13858                 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
13859         }
13860
13861         if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
13862                 prog->aux->attach_btf_trace = true;
13863                 return 0;
13864         } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
13865                 if (!bpf_iter_prog_supported(prog))
13866                         return -EINVAL;
13867                 return 0;
13868         }
13869
13870         if (prog->type == BPF_PROG_TYPE_LSM) {
13871                 ret = bpf_lsm_verify_prog(&env->log, prog);
13872                 if (ret < 0)
13873                         return ret;
13874         } else if (prog->type == BPF_PROG_TYPE_TRACING &&
13875                    btf_id_set_contains(&btf_id_deny, btf_id)) {
13876                 return -EINVAL;
13877         }
13878
13879         key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
13880         tr = bpf_trampoline_get(key, &tgt_info);
13881         if (!tr)
13882                 return -ENOMEM;
13883
13884         prog->aux->dst_trampoline = tr;
13885         return 0;
13886 }
13887
13888 struct btf *bpf_get_btf_vmlinux(void)
13889 {
13890         if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
13891                 mutex_lock(&bpf_verifier_lock);
13892                 if (!btf_vmlinux)
13893                         btf_vmlinux = btf_parse_vmlinux();
13894                 mutex_unlock(&bpf_verifier_lock);
13895         }
13896         return btf_vmlinux;
13897 }
13898
13899 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
13900 {
13901         u64 start_time = ktime_get_ns();
13902         struct bpf_verifier_env *env;
13903         struct bpf_verifier_log *log;
13904         int i, len, ret = -EINVAL;
13905         bool is_priv;
13906
13907         /* no program is valid */
13908         if (ARRAY_SIZE(bpf_verifier_ops) == 0)
13909                 return -EINVAL;
13910
13911         /* 'struct bpf_verifier_env' can be global, but since it's not small,
13912          * allocate/free it every time bpf_check() is called
13913          */
13914         env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
13915         if (!env)
13916                 return -ENOMEM;
13917         log = &env->log;
13918
13919         len = (*prog)->len;
13920         env->insn_aux_data =
13921                 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
13922         ret = -ENOMEM;
13923         if (!env->insn_aux_data)
13924                 goto err_free_env;
13925         for (i = 0; i < len; i++)
13926                 env->insn_aux_data[i].orig_idx = i;
13927         env->prog = *prog;
13928         env->ops = bpf_verifier_ops[env->prog->type];
13929         env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
13930         is_priv = bpf_capable();
13931
13932         bpf_get_btf_vmlinux();
13933
13934         /* grab the mutex to protect few globals used by verifier */
13935         if (!is_priv)
13936                 mutex_lock(&bpf_verifier_lock);
13937
13938         if (attr->log_level || attr->log_buf || attr->log_size) {
13939                 /* user requested verbose verifier output
13940                  * and supplied buffer to store the verification trace
13941                  */
13942                 log->level = attr->log_level;
13943                 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
13944                 log->len_total = attr->log_size;
13945
13946                 ret = -EINVAL;
13947                 /* log attributes have to be sane */
13948                 if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
13949                     !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
13950                         goto err_unlock;
13951         }
13952
13953         if (IS_ERR(btf_vmlinux)) {
13954                 /* Either gcc or pahole or kernel are broken. */
13955                 verbose(env, "in-kernel BTF is malformed\n");
13956                 ret = PTR_ERR(btf_vmlinux);
13957                 goto skip_full_check;
13958         }
13959
13960         env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
13961         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
13962                 env->strict_alignment = true;
13963         if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
13964                 env->strict_alignment = false;
13965
13966         env->allow_ptr_leaks = bpf_allow_ptr_leaks();
13967         env->allow_uninit_stack = bpf_allow_uninit_stack();
13968         env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
13969         env->bypass_spec_v1 = bpf_bypass_spec_v1();
13970         env->bypass_spec_v4 = bpf_bypass_spec_v4();
13971         env->bpf_capable = bpf_capable();
13972
13973         if (is_priv)
13974                 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
13975
13976         env->explored_states = kvcalloc(state_htab_size(env),
13977                                        sizeof(struct bpf_verifier_state_list *),
13978                                        GFP_USER);
13979         ret = -ENOMEM;
13980         if (!env->explored_states)
13981                 goto skip_full_check;
13982
13983         ret = add_subprog_and_kfunc(env);
13984         if (ret < 0)
13985                 goto skip_full_check;
13986
13987         ret = check_subprogs(env);
13988         if (ret < 0)
13989                 goto skip_full_check;
13990
13991         ret = check_btf_info(env, attr, uattr);
13992         if (ret < 0)
13993                 goto skip_full_check;
13994
13995         ret = check_attach_btf_id(env);
13996         if (ret)
13997                 goto skip_full_check;
13998
13999         ret = resolve_pseudo_ldimm64(env);
14000         if (ret < 0)
14001                 goto skip_full_check;
14002
14003         if (bpf_prog_is_dev_bound(env->prog->aux)) {
14004                 ret = bpf_prog_offload_verifier_prep(env->prog);
14005                 if (ret)
14006                         goto skip_full_check;
14007         }
14008
14009         ret = check_cfg(env);
14010         if (ret < 0)
14011                 goto skip_full_check;
14012
14013         ret = do_check_subprogs(env);
14014         ret = ret ?: do_check_main(env);
14015
14016         if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
14017                 ret = bpf_prog_offload_finalize(env);
14018
14019 skip_full_check:
14020         kvfree(env->explored_states);
14021
14022         if (ret == 0)
14023                 ret = check_max_stack_depth(env);
14024
14025         /* instruction rewrites happen after this point */
14026         if (is_priv) {
14027                 if (ret == 0)
14028                         opt_hard_wire_dead_code_branches(env);
14029                 if (ret == 0)
14030                         ret = opt_remove_dead_code(env);
14031                 if (ret == 0)
14032                         ret = opt_remove_nops(env);
14033         } else {
14034                 if (ret == 0)
14035                         sanitize_dead_code(env);
14036         }
14037
14038         if (ret == 0)
14039                 /* program is valid, convert *(u32*)(ctx + off) accesses */
14040                 ret = convert_ctx_accesses(env);
14041
14042         if (ret == 0)
14043                 ret = do_misc_fixups(env);
14044
14045         /* do 32-bit optimization after insn patching has done so those patched
14046          * insns could be handled correctly.
14047          */
14048         if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
14049                 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
14050                 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
14051                                                                      : false;
14052         }
14053
14054         if (ret == 0)
14055                 ret = fixup_call_args(env);
14056
14057         env->verification_time = ktime_get_ns() - start_time;
14058         print_verification_stats(env);
14059         env->prog->aux->verified_insns = env->insn_processed;
14060
14061         if (log->level && bpf_verifier_log_full(log))
14062                 ret = -ENOSPC;
14063         if (log->level && !log->ubuf) {
14064                 ret = -EFAULT;
14065                 goto err_release_maps;
14066         }
14067
14068         if (ret)
14069                 goto err_release_maps;
14070
14071         if (env->used_map_cnt) {
14072                 /* if program passed verifier, update used_maps in bpf_prog_info */
14073                 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
14074                                                           sizeof(env->used_maps[0]),
14075                                                           GFP_KERNEL);
14076
14077                 if (!env->prog->aux->used_maps) {
14078                         ret = -ENOMEM;
14079                         goto err_release_maps;
14080                 }
14081
14082                 memcpy(env->prog->aux->used_maps, env->used_maps,
14083                        sizeof(env->used_maps[0]) * env->used_map_cnt);
14084                 env->prog->aux->used_map_cnt = env->used_map_cnt;
14085         }
14086         if (env->used_btf_cnt) {
14087                 /* if program passed verifier, update used_btfs in bpf_prog_aux */
14088                 env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
14089                                                           sizeof(env->used_btfs[0]),
14090                                                           GFP_KERNEL);
14091                 if (!env->prog->aux->used_btfs) {
14092                         ret = -ENOMEM;
14093                         goto err_release_maps;
14094                 }
14095
14096                 memcpy(env->prog->aux->used_btfs, env->used_btfs,
14097                        sizeof(env->used_btfs[0]) * env->used_btf_cnt);
14098                 env->prog->aux->used_btf_cnt = env->used_btf_cnt;
14099         }
14100         if (env->used_map_cnt || env->used_btf_cnt) {
14101                 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
14102                  * bpf_ld_imm64 instructions
14103                  */
14104                 convert_pseudo_ld_imm64(env);
14105         }
14106
14107         adjust_btf_func(env);
14108
14109 err_release_maps:
14110         if (!env->prog->aux->used_maps)
14111                 /* if we didn't copy map pointers into bpf_prog_info, release
14112                  * them now. Otherwise free_used_maps() will release them.
14113                  */
14114                 release_maps(env);
14115         if (!env->prog->aux->used_btfs)
14116                 release_btfs(env);
14117
14118         /* extension progs temporarily inherit the attach_type of their targets
14119            for verification purposes, so set it back to zero before returning
14120          */
14121         if (env->prog->type == BPF_PROG_TYPE_EXT)
14122                 env->prog->expected_attach_type = 0;
14123
14124         *prog = env->prog;
14125 err_unlock:
14126         if (!is_priv)
14127                 mutex_unlock(&bpf_verifier_lock);
14128         vfree(env->insn_aux_data);
14129 err_free_env:
14130         kfree(env);
14131         return ret;
14132 }