[PATCH net-next v4] vsock: add G2H fallback for CIDs not owned by H2G transport
From: Alexander Graf
Date: Wed Mar 04 2026 - 18:01:16 EST
When no H2G transport is loaded, vsock currently routes all CIDs to the
G2H transport (commit 65b422d9b61b ("vsock: forward all packets to the
host when no H2G is registered"). Extend that existing behavior: when
an H2G transport is loaded but does not claim a given CID, the
connection falls back to G2H in the same way.
This matters in environments like Nitro Enclaves, where an instance may
run nested VMs via vhost-vsock (H2G) while also needing to reach sibling
enclaves at higher CIDs through virtio-vsock-pci (G2H). With the old
code, any CID > 2 was unconditionally routed to H2G when vhost was
loaded, making those enclaves unreachable without setting
VMADDR_FLAG_TO_HOST explicitly on every connect.
Requiring every application to set VMADDR_FLAG_TO_HOST creates friction:
tools like socat, iperf, and others would all need to learn about it.
The flag was introduced 6 years ago and I am still not aware of any tool
that supports it. Even if there was support, it would be cumbersome to
use. The most natural experience is a single CID address space where H2G
only wins for CIDs it actually owns, and everything else falls through to
G2H, extending the behavior that already exists when H2G is absent.
To give user space at least a hint that the kernel applied this logic,
automatically set the VMADDR_FLAG_TO_HOST on the remote address so it
can determine the path taken via getpeername().
Add a per-network namespace sysctl net.vsock.g2h_fallback (default 1).
At 0 it forces strict routing: H2G always wins for CID > VMADDR_CID_HOST,
or ENODEV if H2G is not loaded.
Signed-off-by: Alexander Graf <graf@xxxxxxxxxx>
Tested-by: syzbot@xxxxxxxxxxxxxxxxxxxxxxxxx
---
v1 -> v2:
- Rebase on 7.0, include namespace support
- Add net.vsock.g2h_fallback sysctl
- Rework description
- Set VMADDR_FLAG_TO_HOST automatically
- Add VMCI support
- Update vsock_assign_transport() comment
v2 -> v3:
- Use has_remote_cid() on G2H transport to gate the fallback. This is
used by VMCI to indicate that it never takes G2H CIDs > 2.
- Move g2h_fallback into struct netns_vsock to enable namespaces
and fix syzbot warning
- Gate the !transport_h2g case on g2h_fallback as well, folding the
pre-existing no-H2G fallback into the new logic
- Remove has_remote_cid() from VMCI again. Instead implement it in
virtio.
v3 -> v4:
- Fix commit reference format (checkpatch)
- vhost: use !!vhost_vsock_get() instead of != NULL (checkpatch)
- Add braces around final else branch (checkpatch)
- Replace 'vhost' with 'H2G transport' (Stefano)
---
Documentation/admin-guide/sysctl/net.rst | 28 +++++++++++++++++++
drivers/vhost/vsock.c | 13 +++++++++
include/net/af_vsock.h | 9 ++++++
include/net/netns/vsock.h | 2 ++
net/vmw_vsock/af_vsock.c | 35 ++++++++++++++++++++----
net/vmw_vsock/virtio_transport.c | 7 +++++
6 files changed, 89 insertions(+), 5 deletions(-)
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 3b2ad61995d4..0724a793798f 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -602,3 +602,31 @@ it does not modify the current namespace or any existing children.
A namespace with ``ns_mode`` set to ``local`` cannot change
``child_ns_mode`` to ``global`` (returns ``-EPERM``).
+
+g2h_fallback
+------------
+
+Controls whether connections to CIDs not owned by the host-to-guest (H2G)
+transport automatically fall back to the guest-to-host (G2H) transport.
+
+When enabled, if a connect targets a CID that the H2G transport (e.g.
+vhost-vsock) does not serve, or if no H2G transport is loaded at all, the
+connection is routed via the G2H transport (e.g. virtio-vsock) instead. This
+allows a host running both nested VMs (via vhost-vsock) and sibling VMs
+reachable through the hypervisor (e.g. Nitro Enclaves) to address both using
+a single CID space, without requiring applications to set
+``VMADDR_FLAG_TO_HOST``.
+
+When the fallback is taken, ``VMADDR_FLAG_TO_HOST`` is automatically set on
+the remote address so that userspace can determine the path via
+``getpeername()``.
+
+Note: With this sysctl enabled, user space that attempts to talk to a guest
+CID which is not implemented by the H2G transport will create host vsock
+traffic. Environments that rely on H2G-only isolation should set it to 0.
+
+Values:
+
+ - 0 - Connections to CIDs <= 2 or with VMADDR_FLAG_TO_HOST use G2H;
+ all others use H2G (or fail with ENODEV if H2G is not loaded).
+ - 1 - Connections to CIDs not owned by H2G fall back to G2H. (default)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 054f7a718f50..1d8ec6bed53e 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -91,6 +91,18 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid, struct net *net)
return NULL;
}
+static bool vhost_transport_has_remote_cid(struct vsock_sock *vsk, u32 cid)
+{
+ struct sock *sk = sk_vsock(vsk);
+ struct net *net = sock_net(sk);
+ bool found;
+
+ rcu_read_lock();
+ found = !!vhost_vsock_get(cid, net);
+ rcu_read_unlock();
+ return found;
+}
+
static void
vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
struct vhost_virtqueue *vq)
@@ -424,6 +436,7 @@ static struct virtio_transport vhost_transport = {
.module = THIS_MODULE,
.get_local_cid = vhost_transport_get_local_cid,
+ .has_remote_cid = vhost_transport_has_remote_cid,
.init = virtio_transport_do_socket_init,
.destruct = virtio_transport_destruct,
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 533d8e75f7bb..4e40063adab4 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -179,6 +179,15 @@ struct vsock_transport {
/* Addressing. */
u32 (*get_local_cid)(void);
+ /* Check if this transport serves a specific remote CID.
+ * For H2G transports: return true if the CID belongs to a registered
+ * guest. If not implemented, all CIDs > VMADDR_CID_HOST go to H2G.
+ * For G2H transports: return true if the transport can reach arbitrary
+ * CIDs via the hypervisor (i.e. supports the fallback overlay). VMCI
+ * does not implement this as it only serves CIDs 0 and 2.
+ */
+ bool (*has_remote_cid)(struct vsock_sock *vsk, u32 remote_cid);
+
/* Read a single skb */
int (*read_skb)(struct vsock_sock *, skb_read_actor_t);
diff --git a/include/net/netns/vsock.h b/include/net/netns/vsock.h
index dc8cbe45f406..7f84aad92f57 100644
--- a/include/net/netns/vsock.h
+++ b/include/net/netns/vsock.h
@@ -20,5 +20,7 @@ struct netns_vsock {
/* 0 = unlocked, 1 = locked to global, 2 = locked to local */
int child_ns_mode_locked;
+
+ int g2h_fallback;
};
#endif /* __NET_NET_NAMESPACE_VSOCK_H */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 2f7d94d682cb..50843a977878 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -545,9 +545,13 @@ static void vsock_deassign_transport(struct vsock_sock *vsk)
* The vsk->remote_addr is used to decide which transport to use:
* - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if
* g2h is not loaded, will use local transport;
- * - remote CID <= VMADDR_CID_HOST or h2g is not loaded or remote flags field
- * includes VMADDR_FLAG_TO_HOST flag value, will use guest->host transport;
- * - remote CID > VMADDR_CID_HOST will use host->guest transport;
+ * - remote CID <= VMADDR_CID_HOST or remote flags field includes
+ * VMADDR_FLAG_TO_HOST, will use guest->host transport;
+ * - remote CID > VMADDR_CID_HOST and h2g is loaded and h2g claims that CID,
+ * will use host->guest transport;
+ * - h2g not loaded or h2g does not claim that CID and g2h claims the CID via
+ * has_remote_cid, will use guest->host transport (when g2h_fallback=1)
+ * - anything else goes to h2g or returns -ENODEV if no h2g is available
*/
int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
{
@@ -581,11 +585,21 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
case SOCK_SEQPACKET:
if (vsock_use_local_transport(remote_cid))
new_transport = transport_local;
- else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g ||
+ else if (remote_cid <= VMADDR_CID_HOST ||
(remote_flags & VMADDR_FLAG_TO_HOST))
new_transport = transport_g2h;
- else
+ else if (transport_h2g &&
+ (!transport_h2g->has_remote_cid ||
+ transport_h2g->has_remote_cid(vsk, remote_cid)))
+ new_transport = transport_h2g;
+ else if (sock_net(sk)->vsock.g2h_fallback &&
+ transport_g2h && transport_g2h->has_remote_cid &&
+ transport_g2h->has_remote_cid(vsk, remote_cid)) {
+ vsk->remote_addr.svm_flags |= VMADDR_FLAG_TO_HOST;
+ new_transport = transport_g2h;
+ } else {
new_transport = transport_h2g;
+ }
break;
default:
ret = -ESOCKTNOSUPPORT;
@@ -2879,6 +2893,15 @@ static struct ctl_table vsock_table[] = {
.mode = 0644,
.proc_handler = vsock_net_child_mode_string
},
+ {
+ .procname = "g2h_fallback",
+ .data = &init_net.vsock.g2h_fallback,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
};
static int __net_init vsock_sysctl_register(struct net *net)
@@ -2894,6 +2917,7 @@ static int __net_init vsock_sysctl_register(struct net *net)
table[0].data = &net->vsock.mode;
table[1].data = &net->vsock.child_ns_mode;
+ table[2].data = &net->vsock.g2h_fallback;
}
net->vsock.sysctl_hdr = register_net_sysctl_sz(net, "net/vsock", table,
@@ -2928,6 +2952,7 @@ static void vsock_net_init(struct net *net)
net->vsock.mode = vsock_net_child_mode(current->nsproxy->net_ns);
net->vsock.child_ns_mode = net->vsock.mode;
+ net->vsock.g2h_fallback = 1;
}
static __net_init int vsock_sysctl_init_net(struct net *net)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 77fe5b7b066c..57f2d6ec3ffc 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -547,11 +547,18 @@ bool virtio_transport_stream_allow(struct vsock_sock *vsk, u32 cid, u32 port)
static bool virtio_transport_seqpacket_allow(struct vsock_sock *vsk,
u32 remote_cid);
+static bool virtio_transport_has_remote_cid(struct vsock_sock *vsk, u32 cid)
+{
+ /* The CID could be implemented by the host. Always assume it is. */
+ return true;
+}
+
static struct virtio_transport virtio_transport = {
.transport = {
.module = THIS_MODULE,
.get_local_cid = virtio_transport_get_local_cid,
+ .has_remote_cid = virtio_transport_has_remote_cid,
.init = virtio_transport_do_socket_init,
.destruct = virtio_transport_destruct,
--
2.47.1
Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christof Hellmis, Andreas Stieger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597