Re: [RFC PATCH bpf-next v2] net: Add additional bit to support userspace timestamp type

From: Andrew Halaney
Date: Fri Apr 12 2024 - 09:37:55 EST


On Thu, Apr 11, 2024 at 04:45:57PM -0700, Abhishek Chauhan (ABC) wrote:
>
> I see one problem which i will fix it as part of next patch (considering 24h to upload next patch)
> is the subject does not show [RFC PATCH bpf-next v2 (2/2)<== this is missing]

Just a tip, but I've been using b4 for patches lately, and it really is
quite nice at handling these sorts of process bits (cover letters,
versioning, any prefixes like RFC bpf-next, etc):

https://b4.docs.kernel.org/en/latest/contributor/prep.html


>
> On 4/11/2024 4:05 PM, Abhishek Chauhan wrote:
> > tstamp_type can be real, mono or userspace timestamp.
> >
> > This commit adds userspace timestamp and sets it if there is
> > valid transmit_time available in socket coming from userspace.
> >
> > To make the design scalable for future needs this commit bring in
> > the change to extend the tstamp_type:1 to tstamp_type:2 to support
> > userspace timestamp.
> >
> > Link: https://lore.kernel.org/netdev/bc037db4-58bb-4861-ac31-a361a93841d3@xxxxxxxxx/
> > Signed-off-by: Abhishek Chauhan <quic_abchauha@xxxxxxxxxxx>
> > ---
> > Changes since v1
> > - identified additional changes in BPF framework.
> > - Bit shift in SKB_MONO_DELIVERY_TIME_MASK and TC_AT_INGRESS_MASK.
> > - Made changes in skb_set_delivery_time to keep changes similar to
> > previous code for mono_delivery_time and just setting tstamp_type
> > bit 1 for userspace timestamp.
> >
> > include/linux/skbuff.h | 19 +++++++++++++++----
> > net/ipv4/ip_output.c | 2 +-
> > net/ipv4/raw.c | 2 +-
> > net/ipv6/ip6_output.c | 2 +-
> > net/ipv6/raw.c | 2 +-
> > net/packet/af_packet.c | 7 +++----
> > .../selftests/bpf/prog_tests/ctx_rewrite.c | 8 ++++----
> > 7 files changed, 26 insertions(+), 16 deletions(-)
> >
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index a83a2120b57f..b6346c21c3d4 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -827,7 +827,8 @@ enum skb_tstamp_type {
> > * @tstamp_type: When set, skb->tstamp has the
> > * delivery_time in mono clock base (i.e. EDT). Otherwise, the
> > * skb->tstamp has the (rcv) timestamp at ingress and
> > - * delivery_time at egress.
> > + * delivery_time at egress or skb->tstamp defined by skb->sk->sk_clockid
> > + * coming from userspace
> > * @napi_id: id of the NAPI struct this skb came from
> > * @sender_cpu: (aka @napi_id) source CPU in XPS
> > * @alloc_cpu: CPU which did the skb allocation.
> > @@ -955,7 +956,7 @@ struct sk_buff {
> > /* private: */
> > __u8 __mono_tc_offset[0];
> > /* public: */
> > - __u8 tstamp_type:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
> > + __u8 tstamp_type:2; /* See SKB_MONO_DELIVERY_TIME_MASK */
> > #ifdef CONFIG_NET_XGRESS
> > __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
> > __u8 tc_skip_classify:1;
> > @@ -1090,10 +1091,10 @@ struct sk_buff {
> > */
> > #ifdef __BIG_ENDIAN_BITFIELD
> > #define SKB_MONO_DELIVERY_TIME_MASK (1 << 7)
> > -#define TC_AT_INGRESS_MASK (1 << 6)
> > +#define TC_AT_INGRESS_MASK (1 << 5)
> > #else
> > #define SKB_MONO_DELIVERY_TIME_MASK (1 << 0)
> > -#define TC_AT_INGRESS_MASK (1 << 1)
> > +#define TC_AT_INGRESS_MASK (1 << 2)
> > #endif
> > #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset)
> >
> > @@ -4262,6 +4263,16 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
> > case CLOCK_MONO:
> > skb->tstamp_type = kt && tstamp_type;
> > break;
> > + /* if any other time base, must be from userspace
> > + * so set userspace tstamp_type bit
> > + * See skbuff tstamp_type:2
> > + * 0x0 => real timestamp_type
> > + * 0x1 => mono timestamp_type
> > + * 0x2 => timestamp_type set from userspace
> > + */
> > + default:
> > + if (kt && tstamp_type)
> > + skb->tstamp_type = 0x2;
> > }
> > }
> >
> > diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> > index 62e457f7c02c..c9317d4addce 100644
> > --- a/net/ipv4/ip_output.c
> > +++ b/net/ipv4/ip_output.c
> > @@ -1457,7 +1457,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
> >
> > skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
> > skb->mark = cork->mark;
> > - skb->tstamp = cork->transmit_time;
> > + skb_set_delivery_time(skb, cork->transmit_time, sk->sk_clockid);
> > /*
> > * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
> > * on dst refcount
> > diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
> > index dcb11f22cbf2..a7d84fc0e530 100644
> > --- a/net/ipv4/raw.c
> > +++ b/net/ipv4/raw.c
> > @@ -360,7 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
> > skb->protocol = htons(ETH_P_IP);
> > skb->priority = READ_ONCE(sk->sk_priority);
> > skb->mark = sockc->mark;
> > - skb->tstamp = sockc->transmit_time;
> > + skb_set_delivery_time(skb, sockc->transmit_time, sk->sk_clockid);
> > skb_dst_set(skb, &rt->dst);
> > *rtp = NULL;
> >
> > diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> > index a9e819115622..0b8193bdd98f 100644
> > --- a/net/ipv6/ip6_output.c
> > +++ b/net/ipv6/ip6_output.c
> > @@ -1924,7 +1924,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
> >
> > skb->priority = READ_ONCE(sk->sk_priority);
> > skb->mark = cork->base.mark;
> > - skb->tstamp = cork->base.transmit_time;
> > + skb_set_delivery_time(skb, cork->base.transmit_time, sk->sk_clockid);
> >
> > ip6_cork_steal_dst(skb, cork);
> > IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
> > diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
> > index 0d896ca7b589..625f3a917e50 100644
> > --- a/net/ipv6/raw.c
> > +++ b/net/ipv6/raw.c
> > @@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
> > skb->protocol = htons(ETH_P_IPV6);
> > skb->priority = READ_ONCE(sk->sk_priority);
> > skb->mark = sockc->mark;
> > - skb->tstamp = sockc->transmit_time;
> > + skb_set_delivery_time(skb, sockc->transmit_time, sk->sk_clockid);
> >
> > skb_put(skb, length);
> > skb_reset_network_header(skb);
> > diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> > index 8c6d3fbb4ed8..356c96f23370 100644
> > --- a/net/packet/af_packet.c
> > +++ b/net/packet/af_packet.c
> > @@ -2056,8 +2056,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
> > skb->dev = dev;
> > skb->priority = READ_ONCE(sk->sk_priority);
> > skb->mark = READ_ONCE(sk->sk_mark);
> > - skb->tstamp = sockc.transmit_time;
> > -
> > + skb_set_delivery_time(skb, sockc.transmit_time, sk->sk_clockid);
> > skb_setup_tx_timestamp(skb, sockc.tsflags);
> >
> > if (unlikely(extra_len == 4))
> > @@ -2585,7 +2584,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
> > skb->dev = dev;
> > skb->priority = READ_ONCE(po->sk.sk_priority);
> > skb->mark = READ_ONCE(po->sk.sk_mark);
> > - skb->tstamp = sockc->transmit_time;
> > + skb_set_delivery_time(skb, sockc->transmit_time, po->sk.sk_clockid);
> > skb_setup_tx_timestamp(skb, sockc->tsflags);
> > skb_zcopy_set_nouarg(skb, ph.raw);
> >
> > @@ -3063,7 +3062,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
> > skb->dev = dev;
> > skb->priority = READ_ONCE(sk->sk_priority);
> > skb->mark = sockc.mark;
> > - skb->tstamp = sockc.transmit_time;
> > + skb_set_delivery_time(skb, sockc.transmit_time, sk->sk_clockid);
> >
> > if (unlikely(extra_len == 4))
> > skb->no_fcs = 1;
> > diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
> > index 3b7c57fe55a5..d7f58d9671f7 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
> > @@ -69,15 +69,15 @@ static struct test_case test_cases[] = {
> > {
> > N(SCHED_CLS, struct __sk_buff, tstamp),
> > .read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
> > - "w11 &= 3;"
> > - "if w11 != 0x3 goto pc+2;"
> > + "w11 &= 5;"
> > + "if w11 != 0x5 goto pc+2;"
> > "$dst = 0;"
> > "goto pc+1;"
> > "$dst = *(u64 *)($ctx + sk_buff::tstamp);",
> > .write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
> > - "if w11 & 0x2 goto pc+1;"
> > + "if w11 & 0x4 goto pc+1;"
> > "goto pc+2;"
> > - "w11 &= -2;"
> > + "w11 &= -4;"
> > "*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;"
> > "*(u64 *)($ctx + sk_buff::tstamp) = $src;",
> > },
>