Re: [RFC PATCH bpf-next v4 2/2] net: Add additional bit to support clockid_t timestamp type

From: Willem de Bruijn
Date: Thu Apr 18 2024 - 15:06:53 EST


Abhishek Chauhan wrote:
> tstamp_type is now set based on actual clockid_t compressed
> into 2 bits.
>
> To make the design scalable for future needs this commit bring in
> the change to extend the tstamp_type:1 to tstamp_type:2 to support
> other clockid_t timestamp.
>
> We now support CLOCK_TAI as part of tstamp_type as part of this
> commit with exisiting support CLOCK_MONOTONIC and CLOCK_REALTIME.
>
> Link: https://lore.kernel.org/netdev/bc037db4-58bb-4861-ac31-a361a93841d3@xxxxxxxxx/
> Signed-off-by: Abhishek Chauhan <quic_abchauha@xxxxxxxxxxx>
>
> /**
> - * tstamp_type:1 can take 2 values each
> + * tstamp_type:2 can take 4 values each
> * represented by time base in skb
> * 0x0 => real timestamp_type
> * 0x1 => mono timestamp_type
> + * 0x2 => tai timestamp_type
> + * 0x3 => undefined timestamp_type

Same point as previous patch about comment that repeats name.

> @@ -833,7 +836,8 @@ enum skb_tstamp_type {
> * @tstamp_type: When set, skb->tstamp has the
> * delivery_time in mono clock base (i.e. EDT). Otherwise, the
> * skb->tstamp has the (rcv) timestamp at ingress and
> - * delivery_time at egress.
> + * delivery_time at egress or skb->tstamp defined by skb->sk->sk_clockid
> + * coming from userspace

I would simplify the comment: clock base of skb->tstamp.
Already in the first patch.

> * @napi_id: id of the NAPI struct this skb came from
> * @sender_cpu: (aka @napi_id) source CPU in XPS
> * @alloc_cpu: CPU which did the skb allocation.
> @@ -961,7 +965,7 @@ struct sk_buff {
> /* private: */
> __u8 __mono_tc_offset[0];
> /* public: */
> - __u8 tstamp_type:1; /* See SKB_CLOCK_*_MASK */
> + __u8 tstamp_type:2; /* See skb_tstamp_type enum */

Probably good to call out that according to pahole this fills a hole.

> #ifdef CONFIG_NET_XGRESS
> __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
> __u8 tc_skip_classify:1;
> @@ -1096,10 +1100,12 @@ struct sk_buff {
> */
> #ifdef __BIG_ENDIAN_BITFIELD
> #define SKB_MONO_DELIVERY_TIME_MASK (1 << 7)
> -#define TC_AT_INGRESS_MASK (1 << 6)
> +#define SKB_TAI_DELIVERY_TIME_MASK (1 << 6)

SKB_TSTAMP_TYPE_BIT2_MASK?

> +#define TC_AT_INGRESS_MASK (1 << 5)
> #else
> #define SKB_MONO_DELIVERY_TIME_MASK (1 << 0)
> -#define TC_AT_INGRESS_MASK (1 << 1)
> +#define SKB_TAI_DELIVERY_TIME_MASK (1 << 1)
> +#define TC_AT_INGRESS_MASK (1 << 2)
> #endif
> #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset)
>
> @@ -4206,6 +4212,11 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
> case CLOCK_MONOTONIC:
> skb->tstamp_type = SKB_CLOCK_MONO;
> break;
> + case CLOCK_TAI:
> + skb->tstamp_type = SKB_CLOCK_TAI;
> + break;
> + default:
> + WARN_ONCE(true, "clockid %d not supported", tstamp_type);

and set to 0 and default tstamp_type?

> }
> }

> >
@@ -9372,10 +9378,16 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
> *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
> SKB_BF_MONO_TC_OFFSET);
> *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
> - SKB_MONO_DELIVERY_TIME_MASK, 2);
> + SKB_MONO_DELIVERY_TIME_MASK | SKB_TAI_DELIVERY_TIME_MASK, 2);
> + *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
> + SKB_MONO_DELIVERY_TIME_MASK, 3);
> + *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
> + SKB_TAI_DELIVERY_TIME_MASK, 4);
> *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
> *insn++ = BPF_JMP_A(1);
> *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
> + *insn++ = BPF_JMP_A(1);
> + *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_TAI);
>
> return insn;
> }
> @@ -9418,10 +9430,26 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
> __u8 tmp_reg = BPF_REG_AX;
>
> *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
> + /*check if all three bits are set*/
> *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
> - TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
> - *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
> - TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
> + TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK |
> + SKB_TAI_DELIVERY_TIME_MASK);
> + /*if all 3 bits are set jump 3 instructions and clear the register */
> + *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
> + TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK |
> + SKB_TAI_DELIVERY_TIME_MASK, 4);
> + /*Now check Mono is set with ingress mask if so clear */
> + *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
> + TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 3);
> + /*Now Check tai is set with ingress mask if so clear */
> + *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
> + TC_AT_INGRESS_MASK | SKB_TAI_DELIVERY_TIME_MASK, 2);
> + /*Now Check tai and mono are set if so clear */
> + *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
> + SKB_MONO_DELIVERY_TIME_MASK |
> + SKB_TAI_DELIVERY_TIME_MASK, 1);

This looks as if all JEQ result in "if so clear"?

Is the goal to only do something different for the two bits being 0x1,
can we have a single test with a two-bit mask, rather than four tests?