Re: [PATCH v6] af_packet: Handle outgoing VLAN packets without hardware offloading

From: Chengen Du
Date: Fri Jun 07 2024 - 23:07:36 EST


Hi,

I would like to provide some additional explanations about the patch.


On Sat, Jun 8, 2024 at 10:54 AM Chengen Du <chengen.du@xxxxxxxxxxxxx> wrote:
>
> The issue initially stems from libpcap. The ethertype will be overwritten
> as the VLAN TPID if the network interface lacks hardware VLAN offloading.
> In the outbound packet path, if hardware VLAN offloading is unavailable,
> the VLAN tag is inserted into the payload but then cleared from the sk_buff
> struct. Consequently, this can lead to a false negative when checking for
> the presence of a VLAN tag, causing the packet sniffing outcome to lack
> VLAN tag information (i.e., TCI-TPID). As a result, the packet capturing
> tool may be unable to parse packets as expected.
>
> The TCI-TPID is missing because the prb_fill_vlan_info() function does not
> modify the tp_vlan_tci/tp_vlan_tpid values, as the information is in the
> payload and not in the sk_buff struct. The skb_vlan_tag_present() function
> only checks vlan_all in the sk_buff struct. In cooked mode, the L2 header
> is stripped, preventing the packet capturing tool from determining the
> correct TCI-TPID value. Additionally, the protocol in SLL is incorrect,
> which means the packet capturing tool cannot parse the L3 header correctly.
>
> Link: https://github.com/the-tcpdump-group/libpcap/issues/1105
> Link: https://lore.kernel.org/netdev/20240520070348.26725-1-chengen.du@xxxxxxxxxxxxx/T/#u
> Fixes: 393e52e33c6c ("packet: deliver VLAN TCI to userspace")
> Cc: stable@xxxxxxxxxxxxxxx
> Signed-off-by: Chengen Du <chengen.du@xxxxxxxxxxxxx>
> ---
> net/packet/af_packet.c | 57 ++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 55 insertions(+), 2 deletions(-)
>
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index ea3ebc160e25..8cffbe1f912d 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -538,6 +538,43 @@ static void *packet_current_frame(struct packet_sock *po,
> return packet_lookup_frame(po, rb, rb->head, status);
> }
>
> +static u16 vlan_get_tci(struct sk_buff *skb)
> +{
> + struct vlan_hdr vhdr, *vh;
> + u8 *skb_orig_data = skb->data;
> + int skb_orig_len = skb->len;
> +
> + skb_push(skb, skb->data - skb_mac_header(skb));
> + vh = skb_header_pointer(skb, ETH_HLEN, sizeof(vhdr), &vhdr);
> + if (skb_orig_data != skb->data) {
> + skb->data = skb_orig_data;
> + skb->len = skb_orig_len;
> + }


The reason for not directly using skb_header_pointer(skb,
skb_mac_header(skb) + ETH_HLEN, ...) to get the VLAN header is due to
the check logic in skb_header_pointer. In the SOCK_DGRAM and
PACKET_OUTGOING scenarios, the offset can be a negative number, which
causes the check logic (i.e., likely(hlen - offset >= len)) in
__skb_header_pointer() to not work as expected.

While it is possible to modify __skb_header_pointer() to handle cases
where the offset is negative, this change could affect a wider range
of code.

Please kindly share your thoughts on this approach.


> + if (unlikely(!vh))
> + return 0;
> +
> + return ntohs(vh->h_vlan_TCI);
> +}
> +
> +static __be16 vlan_get_protocol_dgram(struct sk_buff *skb)
> +{
> + __be16 proto = skb->protocol;
> +
> + if (unlikely(eth_type_vlan(proto))) {
> + u8 *skb_orig_data = skb->data;
> + int skb_orig_len = skb->len;
> +
> + skb_push(skb, skb->data - skb_mac_header(skb));
> + proto = __vlan_get_protocol(skb, proto, NULL);
> + if (skb_orig_data != skb->data) {
> + skb->data = skb_orig_data;
> + skb->len = skb_orig_len;
> + }
> + }
> +
> + return proto;
> +}
> +
> static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
> {
> del_timer_sync(&pkc->retire_blk_timer);
> @@ -1007,10 +1044,16 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
> static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
> struct tpacket3_hdr *ppd)
> {
> + struct packet_sock *po = container_of(pkc, struct packet_sock, rx_ring.prb_bdqc);
> +
> if (skb_vlan_tag_present(pkc->skb)) {
> ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
> ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
> ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
> + } else if (unlikely(po->sk.sk_type == SOCK_DGRAM && eth_type_vlan(pkc->skb->protocol))) {
> + ppd->hv1.tp_vlan_tci = vlan_get_tci(pkc->skb);
> + ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->protocol);
> + ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
> } else {
> ppd->hv1.tp_vlan_tci = 0;
> ppd->hv1.tp_vlan_tpid = 0;
> @@ -2428,6 +2471,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
> h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
> h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
> status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
> + } else if (unlikely(sk->sk_type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) {
> + h.h2->tp_vlan_tci = vlan_get_tci(skb);
> + h.h2->tp_vlan_tpid = ntohs(skb->protocol);
> + status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
> } else {
> h.h2->tp_vlan_tci = 0;
> h.h2->tp_vlan_tpid = 0;
> @@ -2457,7 +2504,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
> sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
> sll->sll_family = AF_PACKET;
> sll->sll_hatype = dev->type;
> - sll->sll_protocol = skb->protocol;
> + sll->sll_protocol = (sk->sk_type == SOCK_DGRAM) ?
> + vlan_get_protocol_dgram(skb) : skb->protocol;
> sll->sll_pkttype = skb->pkt_type;
> if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
> sll->sll_ifindex = orig_dev->ifindex;
> @@ -3482,7 +3530,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
> /* Original length was stored in sockaddr_ll fields */
> origlen = PACKET_SKB_CB(skb)->sa.origlen;
> sll->sll_family = AF_PACKET;
> - sll->sll_protocol = skb->protocol;
> + sll->sll_protocol = (sock->type == SOCK_DGRAM) ?
> + vlan_get_protocol_dgram(skb) : skb->protocol;
> }
>
> sock_recv_cmsgs(msg, sk, skb);
> @@ -3539,6 +3588,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
> aux.tp_vlan_tci = skb_vlan_tag_get(skb);
> aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
> aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
> + } else if (unlikely(sock->type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) {
> + aux.tp_vlan_tci = vlan_get_tci(skb);
> + aux.tp_vlan_tpid = ntohs(skb->protocol);
> + aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
> } else {
> aux.tp_vlan_tci = 0;
> aux.tp_vlan_tpid = 0;
> --
> 2.43.0
>