Re: [RFC PATCH bpf-next v2] net: Add additional bit to support userspace timestamp type

From: Abhishek Chauhan (ABC)
Date: Thu Apr 11 2024 - 19:46:43 EST



I see one problem which i will fix it as part of next patch (considering 24h to upload next patch)
is the subject does not show [RFC PATCH bpf-next v2 (2/2)<== this is missing]

On 4/11/2024 4:05 PM, Abhishek Chauhan wrote:
> tstamp_type can be real, mono or userspace timestamp.
>
> This commit adds userspace timestamp and sets it if there is
> valid transmit_time available in socket coming from userspace.
>
> To make the design scalable for future needs this commit bring in
> the change to extend the tstamp_type:1 to tstamp_type:2 to support
> userspace timestamp.
>
> Link: https://lore.kernel.org/netdev/bc037db4-58bb-4861-ac31-a361a93841d3@xxxxxxxxx/
> Signed-off-by: Abhishek Chauhan <quic_abchauha@xxxxxxxxxxx>
> ---
> Changes since v1
> - identified additional changes in BPF framework.
> - Bit shift in SKB_MONO_DELIVERY_TIME_MASK and TC_AT_INGRESS_MASK.
> - Made changes in skb_set_delivery_time to keep changes similar to
> previous code for mono_delivery_time and just setting tstamp_type
> bit 1 for userspace timestamp.
>
> include/linux/skbuff.h | 19 +++++++++++++++----
> net/ipv4/ip_output.c | 2 +-
> net/ipv4/raw.c | 2 +-
> net/ipv6/ip6_output.c | 2 +-
> net/ipv6/raw.c | 2 +-
> net/packet/af_packet.c | 7 +++----
> .../selftests/bpf/prog_tests/ctx_rewrite.c | 8 ++++----
> 7 files changed, 26 insertions(+), 16 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index a83a2120b57f..b6346c21c3d4 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -827,7 +827,8 @@ enum skb_tstamp_type {
> * @tstamp_type: When set, skb->tstamp has the
> * delivery_time in mono clock base (i.e. EDT). Otherwise, the
> * skb->tstamp has the (rcv) timestamp at ingress and
> - * delivery_time at egress.
> + * delivery_time at egress or skb->tstamp defined by skb->sk->sk_clockid
> + * coming from userspace
> * @napi_id: id of the NAPI struct this skb came from
> * @sender_cpu: (aka @napi_id) source CPU in XPS
> * @alloc_cpu: CPU which did the skb allocation.
> @@ -955,7 +956,7 @@ struct sk_buff {
> /* private: */
> __u8 __mono_tc_offset[0];
> /* public: */
> - __u8 tstamp_type:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
> + __u8 tstamp_type:2; /* See SKB_MONO_DELIVERY_TIME_MASK */
> #ifdef CONFIG_NET_XGRESS
> __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
> __u8 tc_skip_classify:1;
> @@ -1090,10 +1091,10 @@ struct sk_buff {
> */
> #ifdef __BIG_ENDIAN_BITFIELD
> #define SKB_MONO_DELIVERY_TIME_MASK (1 << 7)
> -#define TC_AT_INGRESS_MASK (1 << 6)
> +#define TC_AT_INGRESS_MASK (1 << 5)
> #else
> #define SKB_MONO_DELIVERY_TIME_MASK (1 << 0)
> -#define TC_AT_INGRESS_MASK (1 << 1)
> +#define TC_AT_INGRESS_MASK (1 << 2)
> #endif
> #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset)
>
> @@ -4262,6 +4263,16 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
> case CLOCK_MONO:
> skb->tstamp_type = kt && tstamp_type;
> break;
> + /* if any other time base, must be from userspace
> + * so set userspace tstamp_type bit
> + * See skbuff tstamp_type:2
> + * 0x0 => real timestamp_type
> + * 0x1 => mono timestamp_type
> + * 0x2 => timestamp_type set from userspace
> + */
> + default:
> + if (kt && tstamp_type)
> + skb->tstamp_type = 0x2;
> }
> }
>
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index 62e457f7c02c..c9317d4addce 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -1457,7 +1457,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
>
> skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
> skb->mark = cork->mark;
> - skb->tstamp = cork->transmit_time;
> + skb_set_delivery_time(skb, cork->transmit_time, sk->sk_clockid);
> /*
> * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
> * on dst refcount
> diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
> index dcb11f22cbf2..a7d84fc0e530 100644
> --- a/net/ipv4/raw.c
> +++ b/net/ipv4/raw.c
> @@ -360,7 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
> skb->protocol = htons(ETH_P_IP);
> skb->priority = READ_ONCE(sk->sk_priority);
> skb->mark = sockc->mark;
> - skb->tstamp = sockc->transmit_time;
> + skb_set_delivery_time(skb, sockc->transmit_time, sk->sk_clockid);
> skb_dst_set(skb, &rt->dst);
> *rtp = NULL;
>
> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index a9e819115622..0b8193bdd98f 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c
> @@ -1924,7 +1924,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
>
> skb->priority = READ_ONCE(sk->sk_priority);
> skb->mark = cork->base.mark;
> - skb->tstamp = cork->base.transmit_time;
> + skb_set_delivery_time(skb, cork->base.transmit_time, sk->sk_clockid);
>
> ip6_cork_steal_dst(skb, cork);
> IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
> diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
> index 0d896ca7b589..625f3a917e50 100644
> --- a/net/ipv6/raw.c
> +++ b/net/ipv6/raw.c
> @@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
> skb->protocol = htons(ETH_P_IPV6);
> skb->priority = READ_ONCE(sk->sk_priority);
> skb->mark = sockc->mark;
> - skb->tstamp = sockc->transmit_time;
> + skb_set_delivery_time(skb, sockc->transmit_time, sk->sk_clockid);
>
> skb_put(skb, length);
> skb_reset_network_header(skb);
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index 8c6d3fbb4ed8..356c96f23370 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -2056,8 +2056,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
> skb->dev = dev;
> skb->priority = READ_ONCE(sk->sk_priority);
> skb->mark = READ_ONCE(sk->sk_mark);
> - skb->tstamp = sockc.transmit_time;
> -
> + skb_set_delivery_time(skb, sockc.transmit_time, sk->sk_clockid);
> skb_setup_tx_timestamp(skb, sockc.tsflags);
>
> if (unlikely(extra_len == 4))
> @@ -2585,7 +2584,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
> skb->dev = dev;
> skb->priority = READ_ONCE(po->sk.sk_priority);
> skb->mark = READ_ONCE(po->sk.sk_mark);
> - skb->tstamp = sockc->transmit_time;
> + skb_set_delivery_time(skb, sockc->transmit_time, po->sk.sk_clockid);
> skb_setup_tx_timestamp(skb, sockc->tsflags);
> skb_zcopy_set_nouarg(skb, ph.raw);
>
> @@ -3063,7 +3062,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
> skb->dev = dev;
> skb->priority = READ_ONCE(sk->sk_priority);
> skb->mark = sockc.mark;
> - skb->tstamp = sockc.transmit_time;
> + skb_set_delivery_time(skb, sockc.transmit_time, sk->sk_clockid);
>
> if (unlikely(extra_len == 4))
> skb->no_fcs = 1;
> diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
> index 3b7c57fe55a5..d7f58d9671f7 100644
> --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
> +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
> @@ -69,15 +69,15 @@ static struct test_case test_cases[] = {
> {
> N(SCHED_CLS, struct __sk_buff, tstamp),
> .read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
> - "w11 &= 3;"
> - "if w11 != 0x3 goto pc+2;"
> + "w11 &= 5;"
> + "if w11 != 0x5 goto pc+2;"
> "$dst = 0;"
> "goto pc+1;"
> "$dst = *(u64 *)($ctx + sk_buff::tstamp);",
> .write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
> - "if w11 & 0x2 goto pc+1;"
> + "if w11 & 0x4 goto pc+1;"
> "goto pc+2;"
> - "w11 &= -2;"
> + "w11 &= -4;"
> "*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;"
> "*(u64 *)($ctx + sk_buff::tstamp) = $src;",
> },