[PATCH 4.14 22/80] packet: fix bitfield update race

From: Greg Kroah-Hartman
Date: Fri Apr 27 2018 - 10:06:46 EST


4.14-stable review patch. If anyone has any objections, please let me know.

------------------

From: Willem de Bruijn <willemb@xxxxxxxxxx>


[ Upstream commit a6361f0ca4b25460f2cdf3235ebe8115f622901e ]

Updates to the bitfields in struct packet_sock are not atomic.
Serialize these read-modify-write cycles.

Move po->running into a separate variable. Its writes are protected by
po->bind_lock (except for one startup case at packet_create). Also
replace a textual precondition warning with lockdep annotation.

All others are set only in packet_setsockopt. Serialize these
updates by holding the socket lock. Analogous to other field updates,
also hold the lock when testing whether a ring is active (pg_vec).

Fixes: 8dc419447415 ("[PACKET]: Add optional checksum computation for recvmsg")
Reported-by: DaeRyong Jeong <threeearcat@xxxxxxxxx>
Reported-by: Byoungyoung Lee <byoungyoung@xxxxxxxxxx>
Signed-off-by: Willem de Bruijn <willemb@xxxxxxxxxx>
Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
net/packet/af_packet.c | 60 +++++++++++++++++++++++++++++++++++--------------
net/packet/internal.h | 10 ++++----
2 files changed, 49 insertions(+), 21 deletions(-)

--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -331,11 +331,11 @@ static void packet_pick_tx_queue(struct
skb_set_queue_mapping(skb, queue_index);
}

-/* register_prot_hook must be invoked with the po->bind_lock held,
+/* __register_prot_hook must be invoked through register_prot_hook
* or from a context in which asynchronous accesses to the packet
* socket is not possible (packet_create()).
*/
-static void register_prot_hook(struct sock *sk)
+static void __register_prot_hook(struct sock *sk)
{
struct packet_sock *po = pkt_sk(sk);

@@ -350,8 +350,13 @@ static void register_prot_hook(struct so
}
}

-/* {,__}unregister_prot_hook() must be invoked with the po->bind_lock
- * held. If the sync parameter is true, we will temporarily drop
+static void register_prot_hook(struct sock *sk)
+{
+ lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
+ __register_prot_hook(sk);
+}
+
+/* If the sync parameter is true, we will temporarily drop
* the po->bind_lock and do a synchronize_net to make sure no
* asynchronous packet processing paths still refer to the elements
* of po->prot_hook. If the sync parameter is false, it is the
@@ -361,6 +366,8 @@ static void __unregister_prot_hook(struc
{
struct packet_sock *po = pkt_sk(sk);

+ lockdep_assert_held_once(&po->bind_lock);
+
po->running = 0;

if (po->fanout)
@@ -3261,7 +3268,7 @@ static int packet_create(struct net *net

if (proto) {
po->prot_hook.type = proto;
- register_prot_hook(sk);
+ __register_prot_hook(sk);
}

mutex_lock(&net->packet.sklist_lock);
@@ -3743,12 +3750,18 @@ packet_setsockopt(struct socket *sock, i

if (optlen != sizeof(val))
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
- po->tp_loss = !!val;
- return 0;
+
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->tp_loss = !!val;
+ ret = 0;
+ }
+ release_sock(sk);
+ return ret;
}
case PACKET_AUXDATA:
{
@@ -3759,7 +3772,9 @@ packet_setsockopt(struct socket *sock, i
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;

+ lock_sock(sk);
po->auxdata = !!val;
+ release_sock(sk);
return 0;
}
case PACKET_ORIGDEV:
@@ -3771,7 +3786,9 @@ packet_setsockopt(struct socket *sock, i
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;

+ lock_sock(sk);
po->origdev = !!val;
+ release_sock(sk);
return 0;
}
case PACKET_VNET_HDR:
@@ -3780,15 +3797,20 @@ packet_setsockopt(struct socket *sock, i

if (sock->type != SOCK_RAW)
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (optlen < sizeof(val))
return -EINVAL;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;

- po->has_vnet_hdr = !!val;
- return 0;
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->has_vnet_hdr = !!val;
+ ret = 0;
+ }
+ release_sock(sk);
+ return ret;
}
case PACKET_TIMESTAMP:
{
@@ -3826,11 +3848,17 @@ packet_setsockopt(struct socket *sock, i

if (optlen != sizeof(val))
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
- po->tp_tx_has_off = !!val;
+
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->tp_tx_has_off = !!val;
+ ret = 0;
+ }
+ release_sock(sk);
return 0;
}
case PACKET_QDISC_BYPASS:
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -112,10 +112,12 @@ struct packet_sock {
int copy_thresh;
spinlock_t bind_lock;
struct mutex pg_vec_lock;
- unsigned int running:1, /* prot_hook is attached*/
- auxdata:1,
+ unsigned int running; /* bind_lock must be held */
+ unsigned int auxdata:1, /* writer must hold sock lock */
origdev:1,
- has_vnet_hdr:1;
+ has_vnet_hdr:1,
+ tp_loss:1,
+ tp_tx_has_off:1;
int pressure;
int ifindex; /* bound device */
__be16 num;
@@ -125,8 +127,6 @@ struct packet_sock {
enum tpacket_versions tp_version;
unsigned int tp_hdrlen;
unsigned int tp_reserve;
- unsigned int tp_loss:1;
- unsigned int tp_tx_has_off:1;
unsigned int tp_tstamp;
struct net_device __rcu *cached_dev;
int (*xmit)(struct sk_buff *skb);