kernel/bpf/syscall.c

   1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2  *
   3  * This program is free software; you can redistribute it and/or
   4  * modify it under the terms of version 2 of the GNU General Public
   5  * License as published by the Free Software Foundation.
   6  *
   7  * This program is distributed in the hope that it will be useful, but
   8  * WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10  * General Public License for more details.
  11  */
  12 #include <linux/bpf.h>
  13 #include <linux/syscalls.h>
  14 #include <linux/slab.h>
  15 #include <linux/anon_inodes.h>
  16 #include <linux/file.h>
  17 #include <linux/license.h>
  18 #include <linux/filter.h>
  19 #include <linux/version.h>
  20 #include <linux/kernel.h>
  21
  22 DEFINE_PER_CPU(int, bpf_prog_active);
  23
  24 int sysctl_unprivileged_bpf_disabled __read_mostly;
  25
  26 static LIST_HEAD(bpf_map_types);
  27
  28 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
  29 {
  30         struct bpf_map_type_list *tl;
  31         struct bpf_map *map;
  32
  33         list_for_each_entry(tl, &bpf_map_types, list_node) {
  34                 if (tl->type == attr->map_type) {
  35                         map = tl->ops->map_alloc(attr);
  36                         if (IS_ERR(map))
  37                                 return map;
  38                         map->ops = tl->ops;
  39                         map->map_type = attr->map_type;
  40                         return map;
  41                 }
  42         }
  43         return ERR_PTR(-EINVAL);
  44 }
  45
  46 /* boot time registration of different map implementations */
  47 void bpf_register_map_type(struct bpf_map_type_list *tl)
  48 {
  49         list_add(&tl->list_node, &bpf_map_types);
  50 }
  51
  52 int bpf_map_precharge_memlock(u32 pages)
  53 {
  54         struct user_struct *user = get_current_user();
  55         unsigned long memlock_limit, cur;
  56
  57         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  58         cur = atomic_long_read(&user->locked_vm);
  59         free_uid(user);
  60         if (cur + pages > memlock_limit)
  61                 return -EPERM;
  62         return 0;
  63 }
  64
  65 static int bpf_map_charge_memlock(struct bpf_map *map)
  66 {
  67         struct user_struct *user = get_current_user();
  68         unsigned long memlock_limit;
  69
  70         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  71
  72         atomic_long_add(map->pages, &user->locked_vm);
  73
  74         if (atomic_long_read(&user->locked_vm) > memlock_limit) {
  75                 atomic_long_sub(map->pages, &user->locked_vm);
  76                 free_uid(user);
  77                 return -EPERM;
  78         }
  79         map->user = user;
  80         return 0;
  81 }
  82
  83 static void bpf_map_uncharge_memlock(struct bpf_map *map)
  84 {
  85         struct user_struct *user = map->user;
  86
  87         atomic_long_sub(map->pages, &user->locked_vm);
  88         free_uid(user);
  89 }
  90
  91 /* called from workqueue */
  92 static void bpf_map_free_deferred(struct work_struct *work)
  93 {
  94         struct bpf_map *map = container_of(work, struct bpf_map, work);
  95
  96         bpf_map_uncharge_memlock(map);
  97         /* implementation dependent freeing */
  98         map->ops->map_free(map);
  99 }
 100
 101 static void bpf_map_put_uref(struct bpf_map *map)
 102 {
 103         if (atomic_dec_and_test(&map->usercnt)) {
 104                 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
 105                         bpf_fd_array_map_clear(map);
 106         }
 107 }
 108
 109 /* decrement map refcnt and schedule it for freeing via workqueue
 110  * (unrelying map implementation ops->map_free() might sleep)
 111  */
 112 void bpf_map_put(struct bpf_map *map)
 113 {
 114         if (atomic_dec_and_test(&map->refcnt)) {
 115                 INIT_WORK(&map->work, bpf_map_free_deferred);
 116                 schedule_work(&map->work);
 117         }
 118 }
 119
 120 void bpf_map_put_with_uref(struct bpf_map *map)
 121 {
 122         bpf_map_put_uref(map);
 123         bpf_map_put(map);
 124 }
 125
 126 static int bpf_map_release(struct inode *inode, struct file *filp)
 127 {
 128         struct bpf_map *map = filp->private_data;
 129
 130         if (map->ops->map_release)
 131                 map->ops->map_release(map, filp);
 132
 133         bpf_map_put_with_uref(map);
 134         return 0;
 135 }
 136
 137 #ifdef CONFIG_PROC_FS
 138 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 139 {
 140         const struct bpf_map *map = filp->private_data;
 141         const struct bpf_array *array;
 142         u32 owner_prog_type = 0;
 143
 144         if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
 145                 array = container_of(map, struct bpf_array, map);
 146                 owner_prog_type = array->owner_prog_type;
 147         }
 148
 149         seq_printf(m,
 150                    "map_type:\t%u\n"
 151                    "key_size:\t%u\n"
 152                    "value_size:\t%u\n"
 153                    "max_entries:\t%u\n"
 154                    "map_flags:\t%#x\n"
 155                    "memlock:\t%llu\n",
 156                    map->map_type,
 157                    map->key_size,
 158                    map->value_size,
 159                    map->max_entries,
 160                    map->map_flags,
 161                    map->pages * 1ULL << PAGE_SHIFT);
 162
 163         if (owner_prog_type)
 164                 seq_printf(m, "owner_prog_type:\t%u\n",
 165                            owner_prog_type);
 166 }
 167 #endif
 168
 169 static const struct file_operations bpf_map_fops = {
 170 #ifdef CONFIG_PROC_FS
 171         .show_fdinfo    = bpf_map_show_fdinfo,
 172 #endif
 173         .release        = bpf_map_release,
 174 };
 175
 176 int bpf_map_new_fd(struct bpf_map *map)
 177 {
 178         return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
 179                                 O_RDWR | O_CLOEXEC);
 180 }
 181
 182 /* helper macro to check that unused fields 'union bpf_attr' are zero */
 183 #define CHECK_ATTR(CMD) \
 184         memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
 185                    sizeof(attr->CMD##_LAST_FIELD), 0, \
 186                    sizeof(*attr) - \
 187                    offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
 188                    sizeof(attr->CMD##_LAST_FIELD)) != NULL
 189
 190 #define BPF_MAP_CREATE_LAST_FIELD map_flags
 191 /* called via syscall */
 192 static int map_create(union bpf_attr *attr)
 193 {
 194         struct bpf_map *map;
 195         int err;
 196
 197         err = CHECK_ATTR(BPF_MAP_CREATE);
 198         if (err)
 199                 return -EINVAL;
 200
 201         /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 202         map = find_and_alloc_map(attr);
 203         if (IS_ERR(map))
 204                 return PTR_ERR(map);
 205
 206         atomic_set(&map->refcnt, 1);
 207         atomic_set(&map->usercnt, 1);
 208
 209         err = bpf_map_charge_memlock(map);
 210         if (err)
 211                 goto free_map_nouncharge;
 212
 213         err = bpf_map_new_fd(map);
 214         if (err < 0)
 215                 /* failed to allocate fd */
 216                 goto free_map;
 217
 218         return err;
 219
 220 free_map:
 221         bpf_map_uncharge_memlock(map);
 222 free_map_nouncharge:
 223         map->ops->map_free(map);
 224         return err;
 225 }
 226
 227 /* if error is returned, fd is released.
 228  * On success caller should complete fd access with matching fdput()
 229  */
 230 struct bpf_map *__bpf_map_get(struct fd f)
 231 {
 232         if (!f.file)
 233                 return ERR_PTR(-EBADF);
 234         if (f.file->f_op != &bpf_map_fops) {
 235                 fdput(f);
 236                 return ERR_PTR(-EINVAL);
 237         }
 238
 239         return f.file->private_data;
 240 }
 241
 242 /* prog's and map's refcnt limit */
 243 #define BPF_MAX_REFCNT 32768
 244
 245 struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 246 {
 247         if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
 248                 atomic_dec(&map->refcnt);
 249                 return ERR_PTR(-EBUSY);
 250         }
 251         if (uref)
 252                 atomic_inc(&map->usercnt);
 253         return map;
 254 }
 255
 256 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 257 {
 258         struct fd f = fdget(ufd);
 259         struct bpf_map *map;
 260
 261         map = __bpf_map_get(f);
 262         if (IS_ERR(map))
 263                 return map;
 264
 265         map = bpf_map_inc(map, true);
 266         fdput(f);
 267
 268         return map;
 269 }
 270
 271 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 272 {
 273         return -ENOTSUPP;
 274 }
 275
 276 /* last field in 'union bpf_attr' used by this command */
 277 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 278
 279 static int map_lookup_elem(union bpf_attr *attr)
 280 {
 281         void __user *ukey = u64_to_user_ptr(attr->key);
 282         void __user *uvalue = u64_to_user_ptr(attr->value);
 283         int ufd = attr->map_fd;
 284         struct bpf_map *map;
 285         void *key, *value, *ptr;
 286         u32 value_size;
 287         struct fd f;
 288         int err;
 289
 290         if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 291                 return -EINVAL;
 292
 293         f = fdget(ufd);
 294         map = __bpf_map_get(f);
 295         if (IS_ERR(map))
 296                 return PTR_ERR(map);
 297
 298         err = -ENOMEM;
 299         key = kmalloc(map->key_size, GFP_USER);
 300         if (!key)
 301                 goto err_put;
 302
 303         err = -EFAULT;
 304         if (copy_from_user(key, ukey, map->key_size) != 0)
 305                 goto free_key;
 306
 307         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 308             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 309             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 310                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 311         else
 312                 value_size = map->value_size;
 313
 314         err = -ENOMEM;
 315         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 316         if (!value)
 317                 goto free_key;
 318
 319         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 320             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 321                 err = bpf_percpu_hash_copy(map, key, value);
 322         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 323                 err = bpf_percpu_array_copy(map, key, value);
 324         } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
 325                 err = bpf_stackmap_copy(map, key, value);
 326         } else {
 327                 rcu_read_lock();
 328                 ptr = map->ops->map_lookup_elem(map, key);
 329                 if (ptr)
 330                         memcpy(value, ptr, value_size);
 331                 rcu_read_unlock();
 332                 err = ptr ? 0 : -ENOENT;
 333         }
 334
 335         if (err)
 336                 goto free_value;
 337
 338         err = -EFAULT;
 339         if (copy_to_user(uvalue, value, value_size) != 0)
 340                 goto free_value;
 341
 342         err = 0;
 343
 344 free_value:
 345         kfree(value);
 346 free_key:
 347         kfree(key);
 348 err_put:
 349         fdput(f);
 350         return err;
 351 }
 352
 353 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 354
 355 static int map_update_elem(union bpf_attr *attr)
 356 {
 357         void __user *ukey = u64_to_user_ptr(attr->key);
 358         void __user *uvalue = u64_to_user_ptr(attr->value);
 359         int ufd = attr->map_fd;
 360         struct bpf_map *map;
 361         void *key, *value;
 362         u32 value_size;
 363         struct fd f;
 364         int err;
 365
 366         if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
 367                 return -EINVAL;
 368
 369         f = fdget(ufd);
 370         map = __bpf_map_get(f);
 371         if (IS_ERR(map))
 372                 return PTR_ERR(map);
 373
 374         err = -ENOMEM;
 375         key = kmalloc(map->key_size, GFP_USER);
 376         if (!key)
 377                 goto err_put;
 378
 379         err = -EFAULT;
 380         if (copy_from_user(key, ukey, map->key_size) != 0)
 381                 goto free_key;
 382
 383         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 384             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
 385             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 386                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 387         else
 388                 value_size = map->value_size;
 389
 390         err = -ENOMEM;
 391         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 392         if (!value)
 393                 goto free_key;
 394
 395         err = -EFAULT;
 396         if (copy_from_user(value, uvalue, value_size) != 0)
 397                 goto free_value;
 398
 399         /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
 400          * inside bpf map update or delete otherwise deadlocks are possible
 401          */
 402         preempt_disable();
 403         __this_cpu_inc(bpf_prog_active);
 404         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 405             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 406                 err = bpf_percpu_hash_update(map, key, value, attr->flags);
 407         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 408                 err = bpf_percpu_array_update(map, key, value, attr->flags);
 409         } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
 410                    map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
 411                    map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
 412                 rcu_read_lock();
 413                 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
 414                                                    attr->flags);
 415                 rcu_read_unlock();
 416         } else {
 417                 rcu_read_lock();
 418                 err = map->ops->map_update_elem(map, key, value, attr->flags);
 419                 rcu_read_unlock();
 420         }
 421         __this_cpu_dec(bpf_prog_active);
 422         preempt_enable();
 423
 424 free_value:
 425         kfree(value);
 426 free_key:
 427         kfree(key);
 428 err_put:
 429         fdput(f);
 430         return err;
 431 }
 432
 433 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
 434
 435 static int map_delete_elem(union bpf_attr *attr)
 436 {
 437         void __user *ukey = u64_to_user_ptr(attr->key);
 438         int ufd = attr->map_fd;
 439         struct bpf_map *map;
 440         struct fd f;
 441         void *key;
 442         int err;
 443
 444         if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
 445                 return -EINVAL;
 446
 447         f = fdget(ufd);
 448         map = __bpf_map_get(f);
 449         if (IS_ERR(map))
 450                 return PTR_ERR(map);
 451
 452         err = -ENOMEM;
 453         key = kmalloc(map->key_size, GFP_USER);
 454         if (!key)
 455                 goto err_put;
 456
 457         err = -EFAULT;
 458         if (copy_from_user(key, ukey, map->key_size) != 0)
 459                 goto free_key;
 460
 461         preempt_disable();
 462         __this_cpu_inc(bpf_prog_active);
 463         rcu_read_lock();
 464         err = map->ops->map_delete_elem(map, key);
 465         rcu_read_unlock();
 466         __this_cpu_dec(bpf_prog_active);
 467         preempt_enable();
 468
 469 free_key:
 470         kfree(key);
 471 err_put:
 472         fdput(f);
 473         return err;
 474 }
 475
 476 /* last field in 'union bpf_attr' used by this command */
 477 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
 478
 479 static int map_get_next_key(union bpf_attr *attr)
 480 {
 481         void __user *ukey = u64_to_user_ptr(attr->key);
 482         void __user *unext_key = u64_to_user_ptr(attr->next_key);
 483         int ufd = attr->map_fd;
 484         struct bpf_map *map;
 485         void *key, *next_key;
 486         struct fd f;
 487         int err;
 488
 489         if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
 490                 return -EINVAL;
 491
 492         f = fdget(ufd);
 493         map = __bpf_map_get(f);
 494         if (IS_ERR(map))
 495                 return PTR_ERR(map);
 496
 497         err = -ENOMEM;
 498         key = kmalloc(map->key_size, GFP_USER);
 499         if (!key)
 500                 goto err_put;
 501
 502         err = -EFAULT;
 503         if (copy_from_user(key, ukey, map->key_size) != 0)
 504                 goto free_key;
 505
 506         err = -ENOMEM;
 507         next_key = kmalloc(map->key_size, GFP_USER);
 508         if (!next_key)
 509                 goto free_key;
 510
 511         rcu_read_lock();
 512         err = map->ops->map_get_next_key(map, key, next_key);
 513         rcu_read_unlock();
 514         if (err)
 515                 goto free_next_key;
 516
 517         err = -EFAULT;
 518         if (copy_to_user(unext_key, next_key, map->key_size) != 0)
 519                 goto free_next_key;
 520
 521         err = 0;
 522
 523 free_next_key:
 524         kfree(next_key);
 525 free_key:
 526         kfree(key);
 527 err_put:
 528         fdput(f);
 529         return err;
 530 }
 531
 532 static LIST_HEAD(bpf_prog_types);
 533
 534 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 535 {
 536         struct bpf_prog_type_list *tl;
 537
 538         list_for_each_entry(tl, &bpf_prog_types, list_node) {
 539                 if (tl->type == type) {
 540                         prog->aux->ops = tl->ops;
 541                         prog->type = type;
 542                         return 0;
 543                 }
 544         }
 545
 546         return -EINVAL;
 547 }
 548
 549 void bpf_register_prog_type(struct bpf_prog_type_list *tl)
 550 {
 551         list_add(&tl->list_node, &bpf_prog_types);
 552 }
 553
 554 /* fixup insn->imm field of bpf_call instructions:
 555  * if (insn->imm == BPF_FUNC_map_lookup_elem)
 556  *      insn->imm = bpf_map_lookup_elem - __bpf_call_base;
 557  * else if (insn->imm == BPF_FUNC_map_update_elem)
 558  *      insn->imm = bpf_map_update_elem - __bpf_call_base;
 559  * else ...
 560  *
 561  * this function is called after eBPF program passed verification
 562  */
 563 static void fixup_bpf_calls(struct bpf_prog *prog)
 564 {
 565         const struct bpf_func_proto *fn;
 566         int i;
 567
 568         for (i = 0; i < prog->len; i++) {
 569                 struct bpf_insn *insn = &prog->insnsi[i];
 570
 571                 if (insn->code == (BPF_JMP | BPF_CALL)) {
 572                         /* we reach here when program has bpf_call instructions
 573                          * and it passed bpf_check(), means that
 574                          * ops->get_func_proto must have been supplied, check it
 575                          */
 576                         BUG_ON(!prog->aux->ops->get_func_proto);
 577
 578                         if (insn->imm == BPF_FUNC_get_route_realm)
 579                                 prog->dst_needed = 1;
 580                         if (insn->imm == BPF_FUNC_get_prandom_u32)
 581                                 bpf_user_rnd_init_once();
 582                         if (insn->imm == BPF_FUNC_xdp_adjust_head)
 583                                 prog->xdp_adjust_head = 1;
 584                         if (insn->imm == BPF_FUNC_tail_call) {
 585                                 /* mark bpf_tail_call as different opcode
 586                                  * to avoid conditional branch in
 587                                  * interpeter for every normal call
 588                                  * and to prevent accidental JITing by
 589                                  * JIT compiler that doesn't support
 590                                  * bpf_tail_call yet
 591                                  */
 592                                 insn->imm = 0;
 593                                 insn->code |= BPF_X;
 594                                 continue;
 595                         }
 596
 597                         fn = prog->aux->ops->get_func_proto(insn->imm);
 598                         /* all functions that have prototype and verifier allowed
 599                          * programs to call them, must be real in-kernel functions
 600                          */
 601                         BUG_ON(!fn->func);
 602                         insn->imm = fn->func - __bpf_call_base;
 603                 }
 604         }
 605 }
 606
 607 /* drop refcnt on maps used by eBPF program and free auxilary data */
 608 static void free_used_maps(struct bpf_prog_aux *aux)
 609 {
 610         int i;
 611
 612         for (i = 0; i < aux->used_map_cnt; i++)
 613                 bpf_map_put(aux->used_maps[i]);
 614
 615         kfree(aux->used_maps);
 616 }
 617
 618 int __bpf_prog_charge(struct user_struct *user, u32 pages)
 619 {
 620         unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 621         unsigned long user_bufs;
 622
 623         if (user) {
 624                 user_bufs = atomic_long_add_return(pages, &user->locked_vm);
 625                 if (user_bufs > memlock_limit) {
 626                         atomic_long_sub(pages, &user->locked_vm);
 627                         return -EPERM;
 628                 }
 629         }
 630
 631         return 0;
 632 }
 633
 634 void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
 635 {
 636         if (user)
 637                 atomic_long_sub(pages, &user->locked_vm);
 638 }
 639
 640 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
 641 {
 642         struct user_struct *user = get_current_user();
 643         int ret;
 644
 645         ret = __bpf_prog_charge(user, prog->pages);
 646         if (ret) {
 647                 free_uid(user);
 648                 return ret;
 649         }
 650
 651         prog->aux->user = user;
 652         return 0;
 653 }
 654
 655 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 656 {
 657         struct user_struct *user = prog->aux->user;
 658
 659         __bpf_prog_uncharge(user, prog->pages);
 660         free_uid(user);
 661 }
 662
 663 static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 664 {
 665         struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 666
 667         free_used_maps(aux);
 668         bpf_prog_uncharge_memlock(aux->prog);
 669         bpf_prog_free(aux->prog);
 670 }
 671
 672 void bpf_prog_put(struct bpf_prog *prog)
 673 {
 674         if (atomic_dec_and_test(&prog->aux->refcnt))
 675                 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 676 }
 677 EXPORT_SYMBOL_GPL(bpf_prog_put);
 678
 679 static int bpf_prog_release(struct inode *inode, struct file *filp)
 680 {
 681         struct bpf_prog *prog = filp->private_data;
 682
 683         bpf_prog_put(prog);
 684         return 0;
 685 }
 686
 687 #ifdef CONFIG_PROC_FS
 688 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 689 {
 690         const struct bpf_prog *prog = filp->private_data;
 691         char prog_digest[sizeof(prog->digest) * 2 + 1] = { };
 692
 693         bin2hex(prog_digest, prog->digest, sizeof(prog->digest));
 694         seq_printf(m,
 695                    "prog_type:\t%u\n"
 696                    "prog_jited:\t%u\n"
 697                    "prog_digest:\t%s\n"
 698                    "memlock:\t%llu\n",
 699                    prog->type,
 700                    prog->jited,
 701                    prog_digest,
 702                    prog->pages * 1ULL << PAGE_SHIFT);
 703 }
 704 #endif
 705
 706 static const struct file_operations bpf_prog_fops = {
 707 #ifdef CONFIG_PROC_FS
 708         .show_fdinfo    = bpf_prog_show_fdinfo,
 709 #endif
 710         .release        = bpf_prog_release,
 711 };
 712
 713 int bpf_prog_new_fd(struct bpf_prog *prog)
 714 {
 715         return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
 716                                 O_RDWR | O_CLOEXEC);
 717 }
 718
 719 static struct bpf_prog *____bpf_prog_get(struct fd f)
 720 {
 721         if (!f.file)
 722                 return ERR_PTR(-EBADF);
 723         if (f.file->f_op != &bpf_prog_fops) {
 724                 fdput(f);
 725                 return ERR_PTR(-EINVAL);
 726         }
 727
 728         return f.file->private_data;
 729 }
 730
 731 struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
 732 {
 733         if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
 734                 atomic_sub(i, &prog->aux->refcnt);
 735                 return ERR_PTR(-EBUSY);
 736         }
 737         return prog;
 738 }
 739 EXPORT_SYMBOL_GPL(bpf_prog_add);
 740
 741 void bpf_prog_sub(struct bpf_prog *prog, int i)
 742 {
 743         /* Only to be used for undoing previous bpf_prog_add() in some
 744          * error path. We still know that another entity in our call
 745          * path holds a reference to the program, thus atomic_sub() can
 746          * be safely used in such cases!
 747          */
 748         WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
 749 }
 750 EXPORT_SYMBOL_GPL(bpf_prog_sub);
 751
 752 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
 753 {
 754         return bpf_prog_add(prog, 1);
 755 }
 756 EXPORT_SYMBOL_GPL(bpf_prog_inc);
 757
 758 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
 759 {
 760         struct fd f = fdget(ufd);
 761         struct bpf_prog *prog;
 762
 763         prog = ____bpf_prog_get(f);
 764         if (IS_ERR(prog))
 765                 return prog;
 766         if (type && prog->type != *type) {
 767                 prog = ERR_PTR(-EINVAL);
 768                 goto out;
 769         }
 770
 771         prog = bpf_prog_inc(prog);
 772 out:
 773         fdput(f);
 774         return prog;
 775 }
 776
 777 struct bpf_prog *bpf_prog_get(u32 ufd)
 778 {
 779         return __bpf_prog_get(ufd, NULL);
 780 }
 781
 782 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
 783 {
 784         return __bpf_prog_get(ufd, &type);
 785 }
 786 EXPORT_SYMBOL_GPL(bpf_prog_get_type);
 787
 788 /* last field in 'union bpf_attr' used by this command */
 789 #define BPF_PROG_LOAD_LAST_FIELD kern_version
 790
 791 static int bpf_prog_load(union bpf_attr *attr)
 792 {
 793         enum bpf_prog_type type = attr->prog_type;
 794         struct bpf_prog *prog;
 795         int err;
 796         char license[128];
 797         bool is_gpl;
 798
 799         if (CHECK_ATTR(BPF_PROG_LOAD))
 800                 return -EINVAL;
 801
 802         /* copy eBPF program license from user space */
 803         if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
 804                               sizeof(license) - 1) < 0)
 805                 return -EFAULT;
 806         license[sizeof(license) - 1] = 0;
 807
 808         /* eBPF programs must be GPL compatible to use GPL-ed functions */
 809         is_gpl = license_is_gpl_compatible(license);
 810
 811         if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
 812                 return -E2BIG;
 813
 814         if (type == BPF_PROG_TYPE_KPROBE &&
 815             attr->kern_version != LINUX_VERSION_CODE)
 816                 return -EINVAL;
 817
 818         if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
 819                 return -EPERM;
 820
 821         /* plain bpf_prog allocation */
 822         prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 823         if (!prog)
 824                 return -ENOMEM;
 825
 826         err = bpf_prog_charge_memlock(prog);
 827         if (err)
 828                 goto free_prog_nouncharge;
 829
 830         prog->len = attr->insn_cnt;
 831
 832         err = -EFAULT;
 833         if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
 834                            bpf_prog_insn_size(prog)) != 0)
 835                 goto free_prog;
 836
 837         prog->orig_prog = NULL;
 838         prog->jited = 0;
 839
 840         atomic_set(&prog->aux->refcnt, 1);
 841         prog->gpl_compatible = is_gpl ? 1 : 0;
 842
 843         /* find program type: socket_filter vs tracing_filter */
 844         err = find_prog_type(type, prog);
 845         if (err < 0)
 846                 goto free_prog;
 847
 848         /* run eBPF verifier */
 849         err = bpf_check(&prog, attr);
 850         if (err < 0)
 851                 goto free_used_maps;
 852
 853         /* fixup BPF_CALL->imm field */
 854         fixup_bpf_calls(prog);
 855
 856         /* eBPF program is ready to be JITed */
 857         prog = bpf_prog_select_runtime(prog, &err);
 858         if (err < 0)
 859                 goto free_used_maps;
 860
 861         err = bpf_prog_new_fd(prog);
 862         if (err < 0)
 863                 /* failed to allocate fd */
 864                 goto free_used_maps;
 865
 866         return err;
 867
 868 free_used_maps:
 869         free_used_maps(prog->aux);
 870 free_prog:
 871         bpf_prog_uncharge_memlock(prog);
 872 free_prog_nouncharge:
 873         bpf_prog_free(prog);
 874         return err;
 875 }
 876
 877 #define BPF_OBJ_LAST_FIELD bpf_fd
 878
 879 static int bpf_obj_pin(const union bpf_attr *attr)
 880 {
 881         if (CHECK_ATTR(BPF_OBJ))
 882                 return -EINVAL;
 883
 884         return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
 885 }
 886
 887 static int bpf_obj_get(const union bpf_attr *attr)
 888 {
 889         if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
 890                 return -EINVAL;
 891
 892         return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
 893 }
 894
 895 #ifdef CONFIG_CGROUP_BPF
 896
 897 #define BPF_PROG_ATTACH_LAST_FIELD attach_type
 898
 899 static int bpf_prog_attach(const union bpf_attr *attr)
 900 {
 901         struct bpf_prog *prog;
 902         struct cgroup *cgrp;
 903         enum bpf_prog_type ptype;
 904
 905         if (!capable(CAP_NET_ADMIN))
 906                 return -EPERM;
 907
 908         if (CHECK_ATTR(BPF_PROG_ATTACH))
 909                 return -EINVAL;
 910
 911         switch (attr->attach_type) {
 912         case BPF_CGROUP_INET_INGRESS:
 913         case BPF_CGROUP_INET_EGRESS:
 914                 ptype = BPF_PROG_TYPE_CGROUP_SKB;
 915                 break;
 916         case BPF_CGROUP_INET_SOCK_CREATE:
 917                 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
 918                 break;
 919         default:
 920                 return -EINVAL;
 921         }
 922
 923         prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
 924         if (IS_ERR(prog))
 925                 return PTR_ERR(prog);
 926
 927         cgrp = cgroup_get_from_fd(attr->target_fd);
 928         if (IS_ERR(cgrp)) {
 929                 bpf_prog_put(prog);
 930                 return PTR_ERR(cgrp);
 931         }
 932
 933         cgroup_bpf_update(cgrp, prog, attr->attach_type);
 934         cgroup_put(cgrp);
 935
 936         return 0;
 937 }
 938
 939 #define BPF_PROG_DETACH_LAST_FIELD attach_type
 940
 941 static int bpf_prog_detach(const union bpf_attr *attr)
 942 {
 943         struct cgroup *cgrp;
 944
 945         if (!capable(CAP_NET_ADMIN))
 946                 return -EPERM;
 947
 948         if (CHECK_ATTR(BPF_PROG_DETACH))
 949                 return -EINVAL;
 950
 951         switch (attr->attach_type) {
 952         case BPF_CGROUP_INET_INGRESS:
 953         case BPF_CGROUP_INET_EGRESS:
 954         case BPF_CGROUP_INET_SOCK_CREATE:
 955                 cgrp = cgroup_get_from_fd(attr->target_fd);
 956                 if (IS_ERR(cgrp))
 957                         return PTR_ERR(cgrp);
 958
 959                 cgroup_bpf_update(cgrp, NULL, attr->attach_type);
 960                 cgroup_put(cgrp);
 961                 break;
 962
 963         default:
 964                 return -EINVAL;
 965         }
 966
 967         return 0;
 968 }
 969 #endif /* CONFIG_CGROUP_BPF */
 970
 971 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 972 {
 973         union bpf_attr attr = {};
 974         int err;
 975
 976         if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
 977                 return -EPERM;
 978
 979         if (!access_ok(VERIFY_READ, uattr, 1))
 980                 return -EFAULT;
 981
 982         if (size > PAGE_SIZE)   /* silly large */
 983                 return -E2BIG;
 984
 985         /* If we're handed a bigger struct than we know of,
 986          * ensure all the unknown bits are 0 - i.e. new
 987          * user-space does not rely on any kernel feature
 988          * extensions we dont know about yet.
 989          */
 990         if (size > sizeof(attr)) {
 991                 unsigned char __user *addr;
 992                 unsigned char __user *end;
 993                 unsigned char val;
 994
 995                 addr = (void __user *)uattr + sizeof(attr);
 996                 end  = (void __user *)uattr + size;
 997
 998                 for (; addr < end; addr++) {
 999                         err = get_user(val, addr);
1000                         if (err)
1001                                 return err;
1002                         if (val)
1003                                 return -E2BIG;
1004                 }
1005                 size = sizeof(attr);
1006         }
1007
1008         /* copy attributes from user space, may be less than sizeof(bpf_attr) */
1009         if (copy_from_user(&attr, uattr, size) != 0)
1010                 return -EFAULT;
1011
1012         switch (cmd) {
1013         case BPF_MAP_CREATE:
1014                 err = map_create(&attr);
1015                 break;
1016         case BPF_MAP_LOOKUP_ELEM:
1017                 err = map_lookup_elem(&attr);
1018                 break;
1019         case BPF_MAP_UPDATE_ELEM:
1020                 err = map_update_elem(&attr);
1021                 break;
1022         case BPF_MAP_DELETE_ELEM:
1023                 err = map_delete_elem(&attr);
1024                 break;
1025         case BPF_MAP_GET_NEXT_KEY:
1026                 err = map_get_next_key(&attr);
1027                 break;
1028         case BPF_PROG_LOAD:
1029                 err = bpf_prog_load(&attr);
1030                 break;
1031         case BPF_OBJ_PIN:
1032                 err = bpf_obj_pin(&attr);
1033                 break;
1034         case BPF_OBJ_GET:
1035                 err = bpf_obj_get(&attr);
1036                 break;
1037
1038 #ifdef CONFIG_CGROUP_BPF
1039         case BPF_PROG_ATTACH:
1040                 err = bpf_prog_attach(&attr);
1041                 break;
1042         case BPF_PROG_DETACH:
1043                 err = bpf_prog_detach(&attr);
1044                 break;
1045 #endif
1046
1047         default:
1048                 err = -EINVAL;
1049                 break;
1050         }
1051
1052         return err;
1053 }