samples/bpf: add a test for bpf_override_return

[linux-block.git] / tools / include / uapi / linux / bpf.h
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index c174971afbe657d2523503b94a3a2c960a6cf9e1..cf446c25c0ec73326710ee49df6a1749d1c65d0c 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
  /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   *
   * This program is free software; you can redistribute it and/or
@@ -92,6 +93,7 @@ enum bpf_cmd {
         BPF_PROG_GET_FD_BY_ID,
         BPF_MAP_GET_FD_BY_ID,
         BPF_OBJ_GET_INFO_BY_FD,
+       BPF_PROG_QUERY,
  };
  
  enum bpf_map_type {
@@ -111,6 +113,7 @@ enum bpf_map_type {
         BPF_MAP_TYPE_HASH_OF_MAPS,
         BPF_MAP_TYPE_DEVMAP,
         BPF_MAP_TYPE_SOCKMAP,
+       BPF_MAP_TYPE_CPUMAP,
  };
  
  enum bpf_prog_type {
@@ -129,6 +132,7 @@ enum bpf_prog_type {
         BPF_PROG_TYPE_LWT_XMIT,
         BPF_PROG_TYPE_SOCK_OPS,
         BPF_PROG_TYPE_SK_SKB,
+       BPF_PROG_TYPE_CGROUP_DEVICE,
  };
  
  enum bpf_attach_type {
@@ -138,16 +142,53 @@ enum bpf_attach_type {
         BPF_CGROUP_SOCK_OPS,
         BPF_SK_SKB_STREAM_PARSER,
         BPF_SK_SKB_STREAM_VERDICT,
+       BPF_CGROUP_DEVICE,
         __MAX_BPF_ATTACH_TYPE
  };
  
  #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
  
-/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
- * to the given target_fd cgroup the descendent cgroup will be able to
- * override effective bpf program that was inherited from this cgroup
+/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
+ *
+ * NONE(default): No further bpf programs allowed in the subtree.
+ *
+ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
+ * the program in this cgroup yields to sub-cgroup program.
+ *
+ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
+ * that cgroup program gets run in addition to the program in this cgroup.
+ *
+ * Only one program is allowed to be attached to a cgroup with
+ * NONE or BPF_F_ALLOW_OVERRIDE flag.
+ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
+ * release old program and attach the new one. Attach flags has to match.
+ *
+ * Multiple programs are allowed to be attached to a cgroup with
+ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
+ * (those that were attached first, run first)
+ * The programs of sub-cgroup are executed first, then programs of
+ * this cgroup and then programs of parent cgroup.
+ * When children program makes decision (like picking TCP CA or sock bind)
+ * parent program has a chance to override it.
+ *
+ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
+ * A cgroup with NONE doesn't allow any programs in sub-cgroups.
+ * Ex1:
+ * cgrp1 (MULTI progs A, B) ->
+ *    cgrp2 (OVERRIDE prog C) ->
+ *      cgrp3 (MULTI prog D) ->
+ *        cgrp4 (OVERRIDE prog E) ->
+ *          cgrp5 (NONE prog F)
+ * the event in cgrp5 triggers execution of F,D,A,B in that order.
+ * if prog F is detached, the execution is E,D,A,B
+ * if prog F and D are detached, the execution is E,A,B
+ * if prog F, E and D are detached, the execution is C,A,B
+ *
+ * All eligible programs are executed regardless of return code from
+ * earlier programs.
   */
  #define BPF_F_ALLOW_OVERRIDE   (1U << 0)
+#define BPF_F_ALLOW_MULTI      (1U << 1)
  
  /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
   * verifier will perform strict alignment checking as if the kernel
@@ -175,6 +216,15 @@ enum bpf_attach_type {
  /* Specify numa node during map creation */
  #define BPF_F_NUMA_NODE                (1U << 2)
  
+/* flags for BPF_PROG_QUERY */
+#define BPF_F_QUERY_EFFECTIVE  (1U << 0)
+
+#define BPF_OBJ_NAME_LEN 16U
+
+/* Flags for accessing BPF object */
+#define BPF_F_RDONLY           (1U << 3)
+#define BPF_F_WRONLY           (1U << 4)
+
  union bpf_attr {
         struct { /* anonymous struct used by BPF_MAP_CREATE command */
                 __u32   map_type;       /* one of enum bpf_map_type */
@@ -188,6 +238,7 @@ union bpf_attr {
                 __u32   numa_node;      /* numa node (effective only if
                                          * BPF_F_NUMA_NODE is set).
                                          */
+               char    map_name[BPF_OBJ_NAME_LEN];
         };
  
         struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -210,11 +261,14 @@ union bpf_attr {
                 __aligned_u64   log_buf;        /* user supplied buffer */
                 __u32           kern_version;   /* checked when prog_type=kprobe */
                 __u32           prog_flags;
+               char            prog_name[BPF_OBJ_NAME_LEN];
+               __u32           prog_ifindex;   /* ifindex of netdev to prep for */
         };
  
         struct { /* anonymous struct used by BPF_OBJ_* commands */
                 __aligned_u64   pathname;
                 __u32           bpf_fd;
+               __u32           file_flags;
         };
  
         struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
@@ -242,6 +296,7 @@ union bpf_attr {
                         __u32           map_id;
                 };
                 __u32           next_id;
+               __u32           open_flags;
         };
  
         struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
@@ -249,6 +304,15 @@ union bpf_attr {
                 __u32           info_len;
                 __aligned_u64   info;
         } info;
+
+       struct { /* anonymous struct used by BPF_PROG_QUERY command */
+               __u32           target_fd;      /* container object to query */
+               __u32           attach_type;
+               __u32           query_flags;
+               __u32           attach_flags;
+               __aligned_u64   prog_ids;
+               __u32           prog_cnt;
+       } query;
  } __attribute__((aligned(8)));
  
  /* BPF helper function descriptions:
@@ -553,12 +617,22 @@ union bpf_attr {
   * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
   *     Calls setsockopt. Not all opts are available, only those with
   *     integer optvals plus TCP_CONGESTION.
- *     Supported levels: SOL_SOCKET and IPROTO_TCP
+ *     Supported levels: SOL_SOCKET and IPPROTO_TCP
   *     @bpf_socket: pointer to bpf_socket
- *     @level: SOL_SOCKET or IPROTO_TCP
+ *     @level: SOL_SOCKET or IPPROTO_TCP
   *     @optname: option name
   *     @optval: pointer to option value
- *     @optlen: length of optval in byes
+ *     @optlen: length of optval in bytes
+ *     Return: 0 or negative error
+ *
+ * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
+ *     Calls getsockopt. Not all opts are available.
+ *     Supported levels: IPPROTO_TCP
+ *     @bpf_socket: pointer to bpf_socket
+ *     @level: IPPROTO_TCP
+ *     @optname: option name
+ *     @optval: pointer to option value
+ *     @optlen: length of optval in bytes
   *     Return: 0 or negative error
   *
   * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
@@ -569,20 +643,44 @@ union bpf_attr {
   *     @flags: reserved for future use
   *     Return: 0 on success or negative error code
   *
- * int bpf_sk_redirect_map(skb, map, key, flags)
+ * int bpf_sk_redirect_map(map, key, flags)
   *     Redirect skb to a sock in map using key as a lookup key for the
   *     sock in map.
- *     @skb: pointer to skb
   *     @map: pointer to sockmap
   *     @key: key to lookup sock in map
   *     @flags: reserved for future use
- *     Return: SK_REDIRECT
+ *     Return: SK_PASS
   *
   * int bpf_sock_map_update(skops, map, key, flags)
   *     @skops: pointer to bpf_sock_ops
   *     @map: pointer to sockmap to update
   *     @key: key to insert/update sock in map
   *     @flags: same flags as map update elem
+ *
+ * int bpf_xdp_adjust_meta(xdp_md, delta)
+ *     Adjust the xdp_md.data_meta by delta
+ *     @xdp_md: pointer to xdp_md
+ *     @delta: An positive/negative integer to be added to xdp_md.data_meta
+ *     Return: 0 on success or negative on error
+ *
+ * int bpf_perf_event_read_value(map, flags, buf, buf_size)
+ *     read perf event counter value and perf event enabled/running time
+ *     @map: pointer to perf_event_array map
+ *     @flags: index of event in the map or bitmask flags
+ *     @buf: buf to fill
+ *     @buf_size: size of the buf
+ *     Return: 0 on success or negative error code
+ *
+ * int bpf_perf_prog_read_value(ctx, buf, buf_size)
+ *     read perf prog attached perf event counter and enabled/running time
+ *     @ctx: pointer to ctx
+ *     @buf: buf to fill
+ *     @buf_size: size of the buf
+ *     Return : 0 on success or negative error code
+ *
+ * int bpf_override_return(pt_regs, rc)
+ *     @pt_regs: pointer to struct pt_regs
+ *     @rc: the return value to set
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -639,6 +737,11 @@ union bpf_attr {
         FN(redirect_map),               \
         FN(sk_redirect_map),            \
         FN(sock_map_update),            \
+       FN(xdp_adjust_meta),            \
+       FN(perf_event_read_value),      \
+       FN(perf_prog_read_value),       \
+       FN(getsockopt),                 \
+       FN(override_return),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
@@ -682,7 +785,9 @@ enum bpf_func_id {
  #define BPF_F_ZERO_CSUM_TX             (1ULL << 1)
  #define BPF_F_DONT_FRAGMENT            (1ULL << 2)
  
-/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
+/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
+ * BPF_FUNC_perf_event_read_value flags.
+ */
  #define BPF_F_INDEX_MASK               0xffffffffULL
  #define BPF_F_CURRENT_CPU              BPF_F_INDEX_MASK
  /* BPF_FUNC_perf_event_output for sk_buff input context. */
@@ -716,7 +821,7 @@ struct __sk_buff {
         __u32 data_end;
         __u32 napi_id;
  
-       /* accessed by BPF_PROG_TYPE_sk_skb types */
+       /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
         __u32 family;
         __u32 remote_ip4;       /* Stored in network byte order */
         __u32 local_ip4;        /* Stored in network byte order */
@@ -724,6 +829,9 @@ struct __sk_buff {
         __u32 local_ip6[4];     /* Stored in network byte order */
         __u32 remote_port;      /* Stored in network byte order */
         __u32 local_port;       /* stored in host byte order */
+       /* ... here. */
+
+       __u32 data_meta;
  };
  
  struct bpf_tunnel_key {
@@ -784,12 +892,12 @@ enum xdp_action {
  struct xdp_md {
         __u32 data;
         __u32 data_end;
+       __u32 data_meta;
  };
  
  enum sk_action {
         SK_DROP = 0,
         SK_PASS,
-       SK_REDIRECT,
  };
  
  #define BPF_TAG_SIZE   8
@@ -802,6 +910,11 @@ struct bpf_prog_info {
         __u32 xlated_prog_len;
         __aligned_u64 jited_prog_insns;
         __aligned_u64 xlated_prog_insns;
+       __u64 load_time;        /* ns since boottime */
+       __u32 created_by_uid;
+       __u32 nr_map_ids;
+       __aligned_u64 map_ids;
+       char name[BPF_OBJ_NAME_LEN];
  } __attribute__((aligned(8)));
  
  struct bpf_map_info {
@@ -811,6 +924,7 @@ struct bpf_map_info {
         __u32 value_size;
         __u32 max_entries;
         __u32 map_flags;
+       char  name[BPF_OBJ_NAME_LEN];
  } __attribute__((aligned(8)));
  
  /* User bpf_sock_ops struct to access socket values and specify request ops
@@ -860,9 +974,35 @@ enum {
         BPF_SOCK_OPS_NEEDS_ECN,         /* If connection's congestion control
                                          * needs ECN
                                          */
+       BPF_SOCK_OPS_BASE_RTT,          /* Get base RTT. The correct value is
+                                        * based on the path and may be
+                                        * dependent on the congestion control
+                                        * algorithm. In general it indicates
+                                        * a congestion threshold. RTTs above
+                                        * this indicate congestion
+                                        */
  };
  
  #define TCP_BPF_IW             1001    /* Set TCP initial congestion window */
  #define TCP_BPF_SNDCWND_CLAMP  1002    /* Set sndcwnd_clamp */
  
+struct bpf_perf_event_value {
+       __u64 counter;
+       __u64 enabled;
+       __u64 running;
+};
+
+#define BPF_DEVCG_ACC_MKNOD    (1ULL << 0)
+#define BPF_DEVCG_ACC_READ     (1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE    (1ULL << 2)
+
+#define BPF_DEVCG_DEV_BLOCK    (1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR     (1ULL << 1)
+
+struct bpf_cgroup_dev_ctx {
+       __u32 access_type; /* (access << 16) | type */
+       __u32 major;
+       __u32 minor;
+};
+
  #endif /* _UAPI__LINUX_BPF_H__ */