[Devel] [PATCH 1/1] vhost/vsock: suppress EHOSTUNREACH fast-fail during CPR pause
Denis V. Lunev
den at openvz.org
Fri May 22 12:22:26 MSK 2026
The companion commit preserves ring state across VHOST_RESET_OWNER,
but QEMU still issues VHOST_VSOCK_SET_RUNNING(0) before RESET_OWNER,
nulling vq->private_data via vhost_vsock_drop_backends(). The
fast-fail in vhost_transport_send_pkt() from 4ff28534c799 then
rejects every host send with -EHOSTUNREACH until the destination
calls SET_RUNNING(1) -- the entire CPR window becomes a hard
outage for host AF_VSOCK clients (VSTOR-131956).
Add a cpr_paused flag set by vhost_vsock_stop() when the backend
was previously live, cleared by vhost_vsock_start(). When set,
vhost_transport_send_pkt() queues the skb instead of fast-failing;
the existing kick of send_pkt_work in vhost_vsock_start() drains
it on resume. A device that has never run keeps cpr_paused == false
and the boot-time fast-fail behaviour is preserved.
Set the flag before dropping backends so a concurrent sender never
observes (NULL, !paused).
https://virtuozzo.atlassian.net/browse/VSTOR-131956
Signed-off-by: Denis V. Lunev <den at openvz.org>
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply at anthropic.com>
---
drivers/vhost/vsock.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 0a518c3d1596..82671fcc6769 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -57,6 +57,7 @@ struct vhost_vsock {
u32 guest_cid;
bool seqpacket_allow;
+ bool cpr_paused; /* between stop and next start; queues sends */
};
static u32 vhost_transport_get_local_cid(void)
@@ -295,7 +296,9 @@ vhost_transport_send_pkt(struct sk_buff *skb)
* all the outcomes covered: if the backend becomes NULL right after the check,
* vhost_transport_do_send_pkt() will check it under the mutex anyway.
*/
- if (unlikely(!data_race(vhost_vq_get_backend(&vsock->vqs[VSOCK_VQ_RX])))) {
+ /* cpr_paused: queue across CPR; else NULL backend means not ready. */
+ if (unlikely(!data_race(vhost_vq_get_backend(&vsock->vqs[VSOCK_VQ_RX])) &&
+ !READ_ONCE(vsock->cpr_paused))) {
rcu_read_unlock();
kfree_skb(skb);
return -EHOSTUNREACH;
@@ -610,9 +613,9 @@ static int vhost_vsock_start(struct vhost_vsock *vsock)
mutex_unlock(&vq->mutex);
}
- /* Some packets may have been queued before the device was started,
- * let's kick the send worker to send them.
- */
+ WRITE_ONCE(vsock->cpr_paused, false);
+
+ /* Drain anything queued while paused or before first start. */
vhost_vq_work_queue(&vsock->vqs[VSOCK_VQ_RX], &vsock->send_pkt_work);
mutex_unlock(&vsock->dev.mutex);
@@ -653,6 +656,7 @@ static void vhost_vsock_drop_backends(struct vhost_vsock *vsock)
static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
{
int ret = 0;
+ bool was_running;
mutex_lock(&vsock->dev.mutex);
@@ -662,6 +666,12 @@ static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
goto err;
}
+ /* Set cpr_paused before dropping backends so concurrent senders
+ * see (backend, !paused) or (NULL, paused), never (NULL, !paused).
+ */
+ was_running = !!vhost_vq_get_backend(&vsock->vqs[VSOCK_VQ_RX]);
+ if (was_running)
+ WRITE_ONCE(vsock->cpr_paused, true);
vhost_vsock_drop_backends(vsock);
err:
mutex_unlock(&vsock->dev.mutex);
--
2.51.0
More information about the Devel
mailing list