net: Add additional bit to support clockid_t timestamp type
authorAbhishek Chauhan <quic_abchauha@quicinc.com>
Thu, 9 May 2024 21:18:33 +0000 (14:18 -0700)
committerMartin KaFai Lau <martin.lau@kernel.org>
Thu, 23 May 2024 21:14:36 +0000 (14:14 -0700)
tstamp_type is now set based on actual clockid_t compressed
into 2 bits.

To make the design scalable for future needs this commit bring in
the change to extend the tstamp_type:1 to tstamp_type:2 to support
other clockid_t timestamp.

We now support CLOCK_TAI as part of tstamp_type as part of this
commit with existing support CLOCK_MONOTONIC and CLOCK_REALTIME.

Signed-off-by: Abhishek Chauhan <quic_abchauha@quicinc.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20240509211834.3235191-3-quic_abchauha@quicinc.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
include/linux/skbuff.h
include/uapi/linux/bpf.h
net/core/filter.c
net/ipv4/ip_output.c
net/ipv4/raw.c
net/ipv4/tcp_ipv4.c
net/ipv6/ip6_output.c
net/ipv6/raw.c
net/ipv6/tcp_ipv6.c
net/packet/af_packet.c
tools/include/uapi/linux/bpf.h

index 3a721cc3b6441f53047755aadc7267e88dbe2196..1e5c97daaa374d5f2e821f2b62389e034cbece48 100644 (file)
@@ -709,6 +709,8 @@ typedef unsigned char *sk_buff_data_t;
 enum skb_tstamp_type {
        SKB_CLOCK_REALTIME,
        SKB_CLOCK_MONOTONIC,
+       SKB_CLOCK_TAI,
+       __SKB_CLOCK_MAX = SKB_CLOCK_TAI,
 };
 
 /**
@@ -957,7 +959,7 @@ struct sk_buff {
        /* private: */
        __u8                    __mono_tc_offset[0];
        /* public: */
-       __u8                    tstamp_type:1;  /* See skb_tstamp_type */
+       __u8                    tstamp_type:2;  /* See skb_tstamp_type */
 #ifdef CONFIG_NET_XGRESS
        __u8                    tc_at_ingress:1;        /* See TC_AT_INGRESS_MASK */
        __u8                    tc_skip_classify:1;
@@ -1087,15 +1089,16 @@ struct sk_buff {
 #endif
 #define PKT_TYPE_OFFSET                offsetof(struct sk_buff, __pkt_type_offset)
 
-/* if you move tc_at_ingress or mono_delivery_time
+/* if you move tc_at_ingress or tstamp_type
  * around, you also must adapt these constants.
  */
 #ifdef __BIG_ENDIAN_BITFIELD
-#define SKB_MONO_DELIVERY_TIME_MASK    (1 << 7)
-#define TC_AT_INGRESS_MASK             (1 << 6)
+#define SKB_TSTAMP_TYPE_MASK           (3 << 6)
+#define SKB_TSTAMP_TYPE_RSHIFT         (6)
+#define TC_AT_INGRESS_MASK             (1 << 5)
 #else
-#define SKB_MONO_DELIVERY_TIME_MASK    (1 << 0)
-#define TC_AT_INGRESS_MASK             (1 << 1)
+#define SKB_TSTAMP_TYPE_MASK           (3)
+#define TC_AT_INGRESS_MASK             (1 << 2)
 #endif
 #define SKB_BF_MONO_TC_OFFSET          offsetof(struct sk_buff, __mono_tc_offset)
 
@@ -4216,6 +4219,9 @@ static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb,
        case CLOCK_MONOTONIC:
                tstamp_type = SKB_CLOCK_MONOTONIC;
                break;
+       case CLOCK_TAI:
+               tstamp_type = SKB_CLOCK_TAI;
+               break;
        default:
                WARN_ON_ONCE(1);
                kt = 0;
index 90706a47f6ffe974d346502c5511d9ac63a4f86e..25ea393cf084b953f31c7dbf5e710bcde068c116 100644 (file)
@@ -6207,12 +6207,17 @@ union {                                 \
        __u64 :64;                      \
 } __attribute__((aligned(8)))
 
+/* The enum used in skb->tstamp_type. It specifies the clock type
+ * of the time stored in the skb->tstamp.
+ */
 enum {
-       BPF_SKB_TSTAMP_UNSPEC,
-       BPF_SKB_TSTAMP_DELIVERY_MONO,   /* tstamp has mono delivery time */
-       /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
-        * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
-        * and try to deduce it by ingress, egress or skb->sk->sk_clockid.
+       BPF_SKB_TSTAMP_UNSPEC = 0,              /* DEPRECATED */
+       BPF_SKB_TSTAMP_DELIVERY_MONO = 1,       /* DEPRECATED */
+       BPF_SKB_CLOCK_REALTIME = 0,
+       BPF_SKB_CLOCK_MONOTONIC = 1,
+       BPF_SKB_CLOCK_TAI = 2,
+       /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle,
+        * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid.
         */
 };
 
index a3781a796da4dd2169154581d2c54b31b169121d..c6edfe9f41bcc308e94fae09ce9a725d334b2758 100644 (file)
@@ -7726,17 +7726,21 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
                return -EOPNOTSUPP;
 
        switch (tstamp_type) {
-       case BPF_SKB_TSTAMP_DELIVERY_MONO:
+       case BPF_SKB_CLOCK_REALTIME:
+               skb->tstamp = tstamp;
+               skb->tstamp_type = SKB_CLOCK_REALTIME;
+               break;
+       case BPF_SKB_CLOCK_MONOTONIC:
                if (!tstamp)
                        return -EINVAL;
                skb->tstamp = tstamp;
                skb->tstamp_type = SKB_CLOCK_MONOTONIC;
                break;
-       case BPF_SKB_TSTAMP_UNSPEC:
-               if (tstamp)
+       case BPF_SKB_CLOCK_TAI:
+               if (!tstamp)
                        return -EINVAL;
-               skb->tstamp = 0;
-               skb->tstamp_type = SKB_CLOCK_REALTIME;
+               skb->tstamp = tstamp;
+               skb->tstamp_type = SKB_CLOCK_TAI;
                break;
        default:
                return -EINVAL;
@@ -9387,16 +9391,17 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
 {
        __u8 value_reg = si->dst_reg;
        __u8 skb_reg = si->src_reg;
-       /* AX is needed because src_reg and dst_reg could be the same */
-       __u8 tmp_reg = BPF_REG_AX;
-
-       *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
-                             SKB_BF_MONO_TC_OFFSET);
-       *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
-                               SKB_MONO_DELIVERY_TIME_MASK, 2);
-       *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
-       *insn++ = BPF_JMP_A(1);
-       *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
+       BUILD_BUG_ON(__SKB_CLOCK_MAX != (int)BPF_SKB_CLOCK_TAI);
+       BUILD_BUG_ON(SKB_CLOCK_REALTIME != (int)BPF_SKB_CLOCK_REALTIME);
+       BUILD_BUG_ON(SKB_CLOCK_MONOTONIC != (int)BPF_SKB_CLOCK_MONOTONIC);
+       BUILD_BUG_ON(SKB_CLOCK_TAI != (int)BPF_SKB_CLOCK_TAI);
+       *insn++ = BPF_LDX_MEM(BPF_B, value_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
+       *insn++ = BPF_ALU32_IMM(BPF_AND, value_reg, SKB_TSTAMP_TYPE_MASK);
+#ifdef __BIG_ENDIAN_BITFIELD
+       *insn++ = BPF_ALU32_IMM(BPF_RSH, value_reg, SKB_TSTAMP_TYPE_RSHIFT);
+#else
+       BUILD_BUG_ON(!(SKB_TSTAMP_TYPE_MASK & 0x1));
+#endif
 
        return insn;
 }
@@ -9439,10 +9444,11 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
                __u8 tmp_reg = BPF_REG_AX;
 
                *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
-               *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
-                                       TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
-               *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
-                                       TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
+               /* check if ingress mask bits is set */
+               *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
+               *insn++ = BPF_JMP_A(4);
+               *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, SKB_TSTAMP_TYPE_MASK, 1);
+               *insn++ = BPF_JMP_A(2);
                /* skb->tc_at_ingress && skb->tstamp_type,
                 * read 0 as the (rcv) timestamp.
                 */
@@ -9479,7 +9485,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
                /* goto <store> */
                *insn++ = BPF_JMP_A(2);
                /* <clear>: skb->tstamp_type */
-               *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
+               *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_TSTAMP_TYPE_MASK);
                *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET);
        }
 #endif
index fe86cadfa85b6b4d46caa78038abeb29a56fbd81..b90d0f78ac8080f6e5aba2382b11075253f85d61 100644 (file)
@@ -1457,7 +1457,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 
        skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
        skb->mark = cork->mark;
-       skb->tstamp = cork->transmit_time;
+       if (sk_is_tcp(sk))
+               skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC);
+       else
+               skb_set_delivery_type_by_clockid(skb, cork->transmit_time, sk->sk_clockid);
        /*
         * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
         * on dst refcount
index 4cb43401e0e06c5003c268c28bf0882ac4e8b4e0..1a095365035645e9370f3eb94219f88f552c9e60 100644 (file)
@@ -360,7 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
        skb->protocol = htons(ETH_P_IP);
        skb->priority = READ_ONCE(sk->sk_priority);
        skb->mark = sockc->mark;
-       skb->tstamp = sockc->transmit_time;
+       skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
        skb_dst_set(skb, &rt->dst);
        *rtp = NULL;
 
index 30ef0c8f5e92d301c31ea1a05f662c1fc4cf37af..8f70b8d1d1e5631c5a738c21e6eec3b41f5bd031 100644 (file)
@@ -3625,6 +3625,8 @@ void __init tcp_v4_init(void)
                 */
                inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO;
 
+               sk->sk_clockid = CLOCK_MONOTONIC;
+
                per_cpu(ipv4_tcp_sk, cpu) = sk;
        }
        if (register_pernet_subsys(&tcp_sk_ops))
index 1ab0f23d37bfbd700e5ed7cf923031386111ec9e..e7a19df3125e4390a401d59a3e354eee58c8da93 100644 (file)
@@ -1924,7 +1924,10 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 
        skb->priority = READ_ONCE(sk->sk_priority);
        skb->mark = cork->base.mark;
-       skb->tstamp = cork->base.transmit_time;
+       if (sk_is_tcp(sk))
+               skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
+       else
+               skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid);
 
        ip6_cork_steal_dst(skb, cork);
        IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
index 2eedf255600b9ec93cdb1f381d8e1fbf41524bfc..f838366e825672a6e95e67c116fb4c33146fbd0f 100644 (file)
@@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
        skb->protocol = htons(ETH_P_IPV6);
        skb->priority = READ_ONCE(sk->sk_priority);
        skb->mark = sockc->mark;
-       skb->tstamp = sockc->transmit_time;
+       skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
 
        skb_put(skb, length);
        skb_reset_network_header(skb);
index 8333005c5c2ea6fdf178feca109293a4043b63e7..750aa681779ca315d37bdd8a7cdfbebb9f4da63a 100644 (file)
@@ -2387,8 +2387,14 @@ static struct inet_protosw tcpv6_protosw = {
 
 static int __net_init tcpv6_net_init(struct net *net)
 {
-       return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
-                                   SOCK_RAW, IPPROTO_TCP, net);
+       int res;
+
+       res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
+                                  SOCK_RAW, IPPROTO_TCP, net);
+       if (!res)
+               net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC;
+
+       return res;
 }
 
 static void __net_exit tcpv6_net_exit(struct net *net)
index ea3ebc160e25cc661901717a755f47db927c304d..fce3908875912f67551cf090a524f96427c09ab3 100644 (file)
@@ -2056,8 +2056,7 @@ retry:
        skb->dev = dev;
        skb->priority = READ_ONCE(sk->sk_priority);
        skb->mark = READ_ONCE(sk->sk_mark);
-       skb->tstamp = sockc.transmit_time;
-
+       skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid);
        skb_setup_tx_timestamp(skb, sockc.tsflags);
 
        if (unlikely(extra_len == 4))
@@ -2584,7 +2583,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
        skb->dev = dev;
        skb->priority = READ_ONCE(po->sk.sk_priority);
        skb->mark = READ_ONCE(po->sk.sk_mark);
-       skb->tstamp = sockc->transmit_time;
+       skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid);
        skb_setup_tx_timestamp(skb, sockc->tsflags);
        skb_zcopy_set_nouarg(skb, ph.raw);
 
@@ -3062,7 +3061,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
        skb->dev = dev;
        skb->priority = READ_ONCE(sk->sk_priority);
        skb->mark = sockc.mark;
-       skb->tstamp = sockc.transmit_time;
+       skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid);
 
        if (unlikely(extra_len == 4))
                skb->no_fcs = 1;
index 90706a47f6ffe974d346502c5511d9ac63a4f86e..25ea393cf084b953f31c7dbf5e710bcde068c116 100644 (file)
@@ -6207,12 +6207,17 @@ union {                                 \
        __u64 :64;                      \
 } __attribute__((aligned(8)))
 
+/* The enum used in skb->tstamp_type. It specifies the clock type
+ * of the time stored in the skb->tstamp.
+ */
 enum {
-       BPF_SKB_TSTAMP_UNSPEC,
-       BPF_SKB_TSTAMP_DELIVERY_MONO,   /* tstamp has mono delivery time */
-       /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
-        * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
-        * and try to deduce it by ingress, egress or skb->sk->sk_clockid.
+       BPF_SKB_TSTAMP_UNSPEC = 0,              /* DEPRECATED */
+       BPF_SKB_TSTAMP_DELIVERY_MONO = 1,       /* DEPRECATED */
+       BPF_SKB_CLOCK_REALTIME = 0,
+       BPF_SKB_CLOCK_MONOTONIC = 1,
+       BPF_SKB_CLOCK_TAI = 2,
+       /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle,
+        * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid.
         */
 };