Re: [RFC PATCH bpf-next v4 2/2] net: Add additional bit to support clockid_t timestamp type

From: Martin KaFai Lau
Date: Thu Apr 18 2024 - 17:57:34 EST


On 4/18/24 1:10 PM, Abhishek Chauhan (ABC) wrote:
#ifdef CONFIG_NET_XGRESS
__u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
__u8 tc_skip_classify:1;
@@ -1096,10 +1100,12 @@ struct sk_buff {
*/
#ifdef __BIG_ENDIAN_BITFIELD
#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7)
-#define TC_AT_INGRESS_MASK (1 << 6)
+#define SKB_TAI_DELIVERY_TIME_MASK (1 << 6)

SKB_TSTAMP_TYPE_BIT2_MASK?

nit. Shorten it to just SKB_TSTAMP_TYPE_MASK?

#ifdef __BIG_ENDIAN_BITFIELD
#define SKB_TSTAMP_TYPE_MASK (3 << 6)
#define SKB_TSTAMP_TYPE_RSH (6) /* more on this later */
#else
#define SKB_TSTAMP_TYPE_MASK (3)
#endif


I was thinking to keep it as TAI because it will confuse developers. I hope thats okay.

I think it is not very useful to distinguish each bit since it is an enum value now. It becomes more like the "pkt_type:3" and its PKT_TYPE_MAX.

+#define TC_AT_INGRESS_MASK (1 << 5)
#else
#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0)
-#define TC_AT_INGRESS_MASK (1 << 1)
+#define SKB_TAI_DELIVERY_TIME_MASK (1 << 1)
+#define TC_AT_INGRESS_MASK (1 << 2)
#endif
#define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset)
@@ -4206,6 +4212,11 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
case CLOCK_MONOTONIC:
skb->tstamp_type = SKB_CLOCK_MONO;
break;
+ case CLOCK_TAI:
+ skb->tstamp_type = SKB_CLOCK_TAI;
+ break;
+ default:
+ WARN_ONCE(true, "clockid %d not supported", tstamp_type);

and set to 0 and default tstamp_type?
Actually thinking about it. I feel if its unsupported just fall back to default is the correct thing. I will take care of this.
}
}

>
@@ -9372,10 +9378,16 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
SKB_BF_MONO_TC_OFFSET);
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
- SKB_MONO_DELIVERY_TIME_MASK, 2);
+ SKB_MONO_DELIVERY_TIME_MASK | SKB_TAI_DELIVERY_TIME_MASK, 2);
+ *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
+ SKB_MONO_DELIVERY_TIME_MASK, 3);
+ *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
+ SKB_TAI_DELIVERY_TIME_MASK, 4);
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
*insn++ = BPF_JMP_A(1);
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
+ *insn++ = BPF_JMP_A(1);
+ *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_TAI);

With SKB_TSTAMP_TYPE_MASK defined like above, this could be simplified like this (untested):

static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
struct bpf_insn *insn)
{
__u8 value_reg = si->dst_reg;
__u8 skb_reg = si->src_reg;

BUILD_BUG_ON(__SKB_CLOCK_MAX != BPF_SKB_TSTAMP_DELIVERY_TAI);
*insn++ = BPF_LDX_MEM(BPF_B, value_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
*insn++ = BPF_ALU32_IMM(BPF_AND, value_reg, SKB_TSTAMP_TYPE_MASK);
#ifdef __BIG_ENDIAN_BITFIELD
*insn++ = BPF_ALU32_IMM(BPF_RSH, value_reg, SKB_TSTAMP_TYPE_RSH);
#else
BUILD_BUG_ON(!(SKB_TSTAMP_TYPE_MASK & 0x1));
#endif

return insn;
}

return insn;
}
@@ -9418,10 +9430,26 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
__u8 tmp_reg = BPF_REG_AX;
*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
+ /*check if all three bits are set*/
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
- TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
- *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
- TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
+ TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK |
+ SKB_TAI_DELIVERY_TIME_MASK);
+ /*if all 3 bits are set jump 3 instructions and clear the register */
+ *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
+ TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK |
+ SKB_TAI_DELIVERY_TIME_MASK, 4);
+ /*Now check Mono is set with ingress mask if so clear */
+ *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
+ TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 3);
+ /*Now Check tai is set with ingress mask if so clear */
+ *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
+ TC_AT_INGRESS_MASK | SKB_TAI_DELIVERY_TIME_MASK, 2);
+ /*Now Check tai and mono are set if so clear */
+ *insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
+ SKB_MONO_DELIVERY_TIME_MASK |
+ SKB_TAI_DELIVERY_TIME_MASK, 1);

Same as the bpf_convert_tstamp_type_read, this could be simplified with SKB_TSTAMP_TYPE_MASK.


This looks as if all JEQ result in "if so clear"?

Is the goal to only do something different for the two bits being 0x1,
can we have a single test with a two-bit mask, rather than four tests?

I think Martin wanted to take care of TAI as well. I will wait for his comment here

My Goal was to take care of invalid combos which does not hold valid
1. If all 3 bits are set => invalid combo (Test case written is Insane)
2. If 2 bits are set (tai+mono)(Test case written is Insane) => this cannot happen (because clock base can only be one in skb)
3. If 2 bit are set (ingress + tai/mono) => This is existing logic + tai being added (clear tstamp in ingress)
4. For all other cases go ahead and fill in the tstamp in the dest register.

If it is to ensure no new type is added without adding BPF_SKB_TSTAMP_DELIVERY_XYZ, I would simplify this runtime bpf insns here and use a BUILD_BUG_ON to catch it at compile time. Something like,

enum skb_tstamp_type {
SKB_CLOCK_REAL, /* Time base is skb is REALTIME */
SKB_CLOCK_MONO, /* Time base is skb is MONOTONIC */
SKB_CLOCK_TAI, /* Time base in skb is TAI */
__SKB_CLOCK_MAX = SKB_CLOCK_TAI,
};

/* Same one used in the bpf_convert_tstamp_type_read() above */
BUILD_BUG_ON(__SKB_CLOCK_MAX != BPF_SKB_TSTAMP_DELIVERY_TAI);

Another thing is, the UDP test in test_tc_dtime.c probably needs to be adjusted, the userspace is using the CLOCK_TAI in SO_TXTIME and it is getting forwarded now.