Re: [PATCH v2] skbuff: skb_segment, Call zero copy functions before using skbuff frags

From: Eric Dumazet
Date: Thu Aug 31 2023 - 02:59:56 EST


On Thu, Aug 31, 2023 at 1:28 AM Mohamed Khalfella
<mkhalfella@xxxxxxxxxxxxxxx> wrote:
>
> Commit bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions
> once per nskb") added the call to zero copy functions in skb_segment().
> The change introduced a bug in skb_segment() because skb_orphan_frags()
> may possibly change the number of fragments or allocate new fragments
> altogether leaving nrfrags and frag to point to the old values. This can
> cause a panic with stacktrace like the one below.
>
> [ 193.894380] BUG: kernel NULL pointer dereference, address: 00000000000000bc
> [ 193.895273] CPU: 13 PID: 18164 Comm: vh-net-17428 Kdump: loaded Tainted: G O 5.15.123+ #26
> [ 193.903919] RIP: 0010:skb_segment+0xb0e/0x12f0
> [ 194.021892] Call Trace:
> [ 194.027422] <TASK>
> [ 194.072861] tcp_gso_segment+0x107/0x540
> [ 194.082031] inet_gso_segment+0x15c/0x3d0
> [ 194.090783] skb_mac_gso_segment+0x9f/0x110
> [ 194.095016] __skb_gso_segment+0xc1/0x190
> [ 194.103131] netem_enqueue+0x290/0xb10 [sch_netem]
> [ 194.107071] dev_qdisc_enqueue+0x16/0x70
> [ 194.110884] __dev_queue_xmit+0x63b/0xb30
> [ 194.121670] bond_start_xmit+0x159/0x380 [bonding]
> [ 194.128506] dev_hard_start_xmit+0xc3/0x1e0
> [ 194.131787] __dev_queue_xmit+0x8a0/0xb30
> [ 194.138225] macvlan_start_xmit+0x4f/0x100 [macvlan]
> [ 194.141477] dev_hard_start_xmit+0xc3/0x1e0
> [ 194.144622] sch_direct_xmit+0xe3/0x280
> [ 194.147748] __dev_queue_xmit+0x54a/0xb30
> [ 194.154131] tap_get_user+0x2a8/0x9c0 [tap]
> [ 194.157358] tap_sendmsg+0x52/0x8e0 [tap]
> [ 194.167049] handle_tx_zerocopy+0x14e/0x4c0 [vhost_net]
> [ 194.173631] handle_tx+0xcd/0xe0 [vhost_net]
> [ 194.176959] vhost_worker+0x76/0xb0 [vhost]
> [ 194.183667] kthread+0x118/0x140
> [ 194.190358] ret_from_fork+0x1f/0x30
> [ 194.193670] </TASK>
>
> In this case calling skb_orphan_frags() updated nr_frags leaving nrfrags
> local variable in skb_segment() stale. This resulted in the code hitting
> i >= nrfrags prematurely and trying to move to next frag_skb using
> list_skb pointer, which was NULL, and caused kernel panic. Move the call
> to zero copy functions before using frags and nr_frags.
>
> Fixes: bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions once per nskb")
> Signed-off-by: Mohamed Khalfella <mkhalfella@xxxxxxxxxxxxxxx>
> Reported-by: Amit Goyal <agoyal@xxxxxxxxxxxxxxx>
> Cc: stable@xxxxxxxxxxxxxxx
> ---
> net/core/skbuff.c | 34 ++++++++++++++++++++--------------
> 1 file changed, 20 insertions(+), 14 deletions(-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index a298992060e6..18a33dc2d6af 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -4354,21 +4354,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> struct sk_buff *segs = NULL;
> struct sk_buff *tail = NULL;
> struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
> - skb_frag_t *frag = skb_shinfo(head_skb)->frags;
> unsigned int mss = skb_shinfo(head_skb)->gso_size;
> unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
> - struct sk_buff *frag_skb = head_skb;
> unsigned int offset = doffset;
> unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
> unsigned int partial_segs = 0;
> unsigned int headroom;
> unsigned int len = head_skb->len;
> + struct sk_buff *frag_skb;
> + skb_frag_t *frag;
> __be16 proto;
> bool csum, sg;
> - int nfrags = skb_shinfo(head_skb)->nr_frags;
> int err = -ENOMEM;
> int i = 0;
> - int pos;
> + int nfrags, pos;
>
> if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
> mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
> @@ -4445,6 +4444,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> headroom = skb_headroom(head_skb);
> pos = skb_headlen(head_skb);
>
> + if (skb_orphan_frags(head_skb, GFP_ATOMIC))
> + return ERR_PTR(-ENOMEM);
> +
> + nfrags = skb_shinfo(head_skb)->nr_frags;
> + frag = skb_shinfo(head_skb)->frags;
> + frag_skb = head_skb;
> +
> do {
> struct sk_buff *nskb;
> skb_frag_t *nskb_frag;
> @@ -4465,6 +4471,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> (skb_headlen(list_skb) == len || sg)) {
> BUG_ON(skb_headlen(list_skb) > len);
>
> + nskb = skb_clone(list_skb, GFP_ATOMIC);
> + if (unlikely(!nskb))
> + goto err;
> +

This patch is quite complex to review, so I am asking if this part was
really needed ?
<1> : You moved here <2> and <3>

If this is not strictly needed, please keep the code as is to ease
code review...

> i = 0;
> nfrags = skb_shinfo(list_skb)->nr_frags;
> frag = skb_shinfo(list_skb)->frags;
> @@ -4483,12 +4493,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> frag++;
> }
>
> - nskb = skb_clone(list_skb, GFP_ATOMIC);

<2>

> list_skb = list_skb->next;
>
> - if (unlikely(!nskb))
> - goto err;
> -

<3>

> if (unlikely(pskb_trim(nskb, len))) {
> kfree_skb(nskb);
> goto err;
> @@ -4564,12 +4570,16 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
> SKBFL_SHARED_FRAG;
>
> - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> - skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
> + if (skb_zerocopy_clone(nskb, list_skb, GFP_ATOMIC))

Why using list_skb here instead of frag_skb ?
Again, I have to look at the whole thing to understand why you did this.

> goto err;
>
> while (pos < offset + len) {
> if (i >= nfrags) {
> + if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
> + skb_zerocopy_clone(nskb, list_skb,
> + GFP_ATOMIC))
> + goto err;
> +

This part is fine.

> i = 0;
> nfrags = skb_shinfo(list_skb)->nr_frags;
> frag = skb_shinfo(list_skb)->frags;
> @@ -4583,10 +4593,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> i--;
> frag--;
> }
> - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> - skb_zerocopy_clone(nskb, frag_skb,
> - GFP_ATOMIC))
> - goto err;
>
> list_skb = list_skb->next;
> }
> --
> 2.17.1
>

Thanks.