[PATCH v4] af_packet: Handle outgoing VLAN packets without hardware offloading

From: Chengen Du
Date: Sun Jun 02 2024 - 23:48:27 EST


The issue initially stems from libpcap. The ethertype will be overwritten
as the VLAN TPID if the network interface lacks hardware VLAN offloading.
In the outbound packet path, if hardware VLAN offloading is unavailable,
the VLAN tag is inserted into the payload but then cleared from the sk_buff
struct. Consequently, this can lead to a false negative when checking for
the presence of a VLAN tag, causing the packet sniffing outcome to lack
VLAN tag information (i.e., TCI-TPID). As a result, the packet capturing
tool may be unable to parse packets as expected.

The TCI-TPID is missing because the prb_fill_vlan_info() function does not
modify the tp_vlan_tci/tp_vlan_tpid values, as the information is in the
payload and not in the sk_buff struct. The skb_vlan_tag_present() function
only checks vlan_all in the sk_buff struct. In cooked mode, the L2 header
is stripped, preventing the packet capturing tool from determining the
correct TCI-TPID value. Additionally, the protocol in SLL is incorrect,
which means the packet capturing tool cannot parse the L3 header correctly.

Link: https://github.com/the-tcpdump-group/libpcap/issues/1105
Link: https://lore.kernel.org/netdev/20240520070348.26725-1-chengen.du@xxxxxxxxxxxxx/T/#u
Fixes: 393e52e33c6c ("packet: deliver VLAN TCI to userspace")
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Chengen Du <chengen.du@xxxxxxxxxxxxx>
---
net/packet/af_packet.c | 85 ++++++++++++++++++++++++++++++++++++------
1 file changed, 74 insertions(+), 11 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ea3ebc160e25..21d34a12c11c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -538,6 +538,62 @@ static void *packet_current_frame(struct packet_sock *po,
return packet_lookup_frame(po, rb, rb->head, status);
}

+static int vlan_get_info(struct sk_buff *skb, u16 *tci, u16 *tpid)
+{
+ if (skb_vlan_tag_present(skb)) {
+ *tci = skb_vlan_tag_get(skb);
+ *tpid = ntohs(skb->vlan_proto);
+ } else if (unlikely(eth_type_vlan(skb->protocol))) {
+ unsigned int vlan_depth = skb->mac_len;
+ struct vlan_hdr vhdr, *vh;
+ u8 *skb_head = skb->data;
+ int skb_len = skb->len;
+
+ if (vlan_depth) {
+ if (WARN_ON(vlan_depth < VLAN_HLEN))
+ return 0;
+ vlan_depth -= VLAN_HLEN;
+ } else {
+ vlan_depth = ETH_HLEN;
+ }
+
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ vh = skb_header_pointer(skb, vlan_depth, sizeof(vhdr), &vhdr);
+ if (skb_head != skb->data) {
+ skb->data = skb_head;
+ skb->len = skb_len;
+ }
+ if (unlikely(!vh))
+ return 0;
+
+ *tci = ntohs(vh->h_vlan_TCI);
+ *tpid = ntohs(skb->protocol);
+ } else {
+ return 0;
+ }
+
+ return 1;
+}
+
+static __be16 sll_get_protocol(struct sk_buff *skb)
+{
+ __be16 proto = skb->protocol;
+
+ if (unlikely(eth_type_vlan(proto))) {
+ u8 *skb_head = skb->data;
+ int skb_len = skb->len;
+
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ proto = __vlan_get_protocol(skb, proto, NULL);
+ if (skb_head != skb->data) {
+ skb->data = skb_head;
+ skb->len = skb_len;
+ }
+ }
+
+ return proto;
+}
+
static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
{
del_timer_sync(&pkc->retire_blk_timer);
@@ -1007,9 +1063,11 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
struct tpacket3_hdr *ppd)
{
- if (skb_vlan_tag_present(pkc->skb)) {
- ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
- ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
+ u16 tci, tpid;
+
+ if (vlan_get_info(pkc->skb, &tci, &tpid)) {
+ ppd->hv1.tp_vlan_tci = tci;
+ ppd->hv1.tp_vlan_tpid = tpid;
ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
} else {
ppd->hv1.tp_vlan_tci = 0;
@@ -2418,15 +2476,17 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
hdrlen = sizeof(*h.h1);
break;
case TPACKET_V2:
+ u16 tci, tpid;
+
h.h2->tp_len = skb->len;
h.h2->tp_snaplen = snaplen;
h.h2->tp_mac = macoff;
h.h2->tp_net = netoff;
h.h2->tp_sec = ts.tv_sec;
h.h2->tp_nsec = ts.tv_nsec;
- if (skb_vlan_tag_present(skb)) {
- h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
- h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
+ if (vlan_get_info(skb, &tci, &tpid)) {
+ h.h2->tp_vlan_tci = tci;
+ h.h2->tp_vlan_tpid = tpid;
status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
} else {
h.h2->tp_vlan_tci = 0;
@@ -2457,7 +2517,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
sll->sll_family = AF_PACKET;
sll->sll_hatype = dev->type;
- sll->sll_protocol = skb->protocol;
+ sll->sll_protocol = (sk->sk_type == SOCK_DGRAM) ?
+ sll_get_protocol(skb) : skb->protocol;
sll->sll_pkttype = skb->pkt_type;
if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
sll->sll_ifindex = orig_dev->ifindex;
@@ -3482,7 +3543,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
/* Original length was stored in sockaddr_ll fields */
origlen = PACKET_SKB_CB(skb)->sa.origlen;
sll->sll_family = AF_PACKET;
- sll->sll_protocol = skb->protocol;
+ sll->sll_protocol = (sock->type == SOCK_DGRAM) ?
+ sll_get_protocol(skb) : skb->protocol;
}

sock_recv_cmsgs(msg, sk, skb);
@@ -3521,6 +3583,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,

if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) {
struct tpacket_auxdata aux;
+ u16 tci, tpid;

aux.tp_status = TP_STATUS_USER;
if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -3535,9 +3598,9 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
aux.tp_snaplen = skb->len;
aux.tp_mac = 0;
aux.tp_net = skb_network_offset(skb);
- if (skb_vlan_tag_present(skb)) {
- aux.tp_vlan_tci = skb_vlan_tag_get(skb);
- aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
+ if (vlan_get_info(skb, &tci, &tpid)) {
+ aux.tp_vlan_tci = tci;
+ aux.tp_vlan_tpid = tpid;
aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
} else {
aux.tp_vlan_tci = 0;
--
2.43.0