bpf: add skb->tstamp r/w access from tc clsact and cg skb progs
authorVlad Dumitrescu <vladum@google.com>
Thu, 22 Nov 2018 19:39:16 +0000 (14:39 -0500)
committerAlexei Starovoitov <ast@kernel.org>
Thu, 22 Nov 2018 23:47:28 +0000 (15:47 -0800)
This could be used to rate limit egress traffic in concert with a qdisc
which supports Earliest Departure Time, such as FQ.

Write access from cg skb progs only with CAP_SYS_ADMIN, since the value
will be used by downstream qdiscs. It might make sense to relax this.

Changes v1 -> v2:
  - allow access from cg skb, write only with CAP_SYS_ADMIN

Signed-off-by: Vlad Dumitrescu <vladum@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
include/uapi/linux/bpf.h
net/core/filter.c
tools/include/uapi/linux/bpf.h
tools/testing/selftests/bpf/test_verifier.c

index c1554aa0746594b73f8cbed93e3204f1b0c354de..23e2031a43d439db7cc37d7a43b153366562853e 100644 (file)
@@ -2468,6 +2468,7 @@ struct __sk_buff {
 
        __u32 data_meta;
        struct bpf_flow_keys *flow_keys;
+       __u64 tstamp;
 };
 
 struct bpf_tunnel_key {
index f6ca38a7d4332ddcdb6d421eb2b07fff53efb564..65dc13aeca7c406973f01a4d2a178f858209d56e 100644 (file)
@@ -5573,6 +5573,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
                if (size != sizeof(struct bpf_flow_keys *))
                        return false;
                break;
+       case bpf_ctx_range(struct __sk_buff, tstamp):
+               if (size != sizeof(__u64))
+                       return false;
+               break;
        default:
                /* Only narrow read access allowed for now. */
                if (type == BPF_WRITE) {
@@ -5600,6 +5604,7 @@ static bool sk_filter_is_valid_access(int off, int size,
        case bpf_ctx_range(struct __sk_buff, data_end):
        case bpf_ctx_range(struct __sk_buff, flow_keys):
        case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+       case bpf_ctx_range(struct __sk_buff, tstamp):
                return false;
        }
 
@@ -5638,6 +5643,10 @@ static bool cg_skb_is_valid_access(int off, int size,
                case bpf_ctx_range(struct __sk_buff, priority):
                case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
                        break;
+               case bpf_ctx_range(struct __sk_buff, tstamp):
+                       if (!capable(CAP_SYS_ADMIN))
+                               return false;
+                       break;
                default:
                        return false;
                }
@@ -5665,6 +5674,7 @@ static bool lwt_is_valid_access(int off, int size,
        case bpf_ctx_range_till(struct __sk_buff, family, local_port):
        case bpf_ctx_range(struct __sk_buff, data_meta):
        case bpf_ctx_range(struct __sk_buff, flow_keys):
+       case bpf_ctx_range(struct __sk_buff, tstamp):
                return false;
        }
 
@@ -5874,6 +5884,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
                case bpf_ctx_range(struct __sk_buff, priority):
                case bpf_ctx_range(struct __sk_buff, tc_classid):
                case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+               case bpf_ctx_range(struct __sk_buff, tstamp):
                        break;
                default:
                        return false;
@@ -6093,6 +6104,7 @@ static bool sk_skb_is_valid_access(int off, int size,
        case bpf_ctx_range(struct __sk_buff, tc_classid):
        case bpf_ctx_range(struct __sk_buff, data_meta):
        case bpf_ctx_range(struct __sk_buff, flow_keys):
+       case bpf_ctx_range(struct __sk_buff, tstamp):
                return false;
        }
 
@@ -6179,6 +6191,7 @@ static bool flow_dissector_is_valid_access(int off, int size,
        case bpf_ctx_range(struct __sk_buff, tc_classid):
        case bpf_ctx_range(struct __sk_buff, data_meta):
        case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+       case bpf_ctx_range(struct __sk_buff, tstamp):
                return false;
        }
 
@@ -6488,6 +6501,22 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
                *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
                                      si->src_reg, off);
                break;
+
+       case offsetof(struct __sk_buff, tstamp):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
+
+               if (type == BPF_WRITE)
+                       *insn++ = BPF_STX_MEM(BPF_DW,
+                                             si->dst_reg, si->src_reg,
+                                             bpf_target_off(struct sk_buff,
+                                                            tstamp, 8,
+                                                            target_size));
+               else
+                       *insn++ = BPF_LDX_MEM(BPF_DW,
+                                             si->dst_reg, si->src_reg,
+                                             bpf_target_off(struct sk_buff,
+                                                            tstamp, 8,
+                                                            target_size));
        }
 
        return insn - insn_buf;
index c1554aa0746594b73f8cbed93e3204f1b0c354de..23e2031a43d439db7cc37d7a43b153366562853e 100644 (file)
@@ -2468,6 +2468,7 @@ struct __sk_buff {
 
        __u32 data_meta;
        struct bpf_flow_keys *flow_keys;
+       __u64 tstamp;
 };
 
 struct bpf_tunnel_key {
index 54d16fbdef8b931037928e89890710f70333f45e..537a8f91af02da0d920d4f34ff1043248c7d5b0b 100644 (file)
@@ -2446,6 +2446,10 @@ static struct bpf_test tests[] = {
                                    offsetof(struct __sk_buff, tc_index)),
                        BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
                                    offsetof(struct __sk_buff, cb[3])),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+                                   offsetof(struct __sk_buff, tstamp)),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+                                   offsetof(struct __sk_buff, tstamp)),
                        BPF_EXIT_INSN(),
                },
                .errstr_unpriv = "",
@@ -5297,6 +5301,31 @@ static struct bpf_test tests[] = {
                .errstr_unpriv = "R2 leaks addr into helper function",
                .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
        },
+       {
+               "write tstamp from CGROUP_SKB",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+                                   offsetof(struct __sk_buff, tstamp)),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .result_unpriv = REJECT,
+               .errstr_unpriv = "invalid bpf_context access off=152 size=8",
+               .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       },
+       {
+               "read tstamp from CGROUP_SKB",
+               .insns = {
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+                                   offsetof(struct __sk_buff, tstamp)),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+       },
        {
                "multiple registers share map_lookup_elem result",
                .insns = {