[CRIU] [PATCH 12/14] soccr/tcp: Restore queues using library

Pavel Emelyanov xemul at virtuozzo.com
Mon Apr 18 06:06:39 PDT 2016


TODO: teach the library call to free the buffer as
its peer _get_queue does.

Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
---
 criu/sk-tcp.c |  93 ++++++++--------------------------------------------
 soccr/soccr.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 soccr/soccr.h |   2 ++
 3 files changed, 118 insertions(+), 79 deletions(-)

diff --git a/criu/sk-tcp.c b/criu/sk-tcp.c
index 510c327..0f0788c 100644
--- a/criu/sk-tcp.c
+++ b/criu/sk-tcp.c
@@ -253,10 +253,9 @@ int dump_one_tcp(int fd, struct inet_sk_desc *sk)
 	return 0;
 }
 
-static int __send_tcp_queue(int sk, int queue, u32 len, struct cr_img *img)
+static int send_tcp_queue(struct libsoccr_sk *sk, struct libsoccr_sk_data *data,
+		int queue, u32 len, struct cr_img *img)
 {
-	int ret, err = -1, max_chunk;
-	int off;
 	char *buf;
 
 	buf = xmalloc(len);
@@ -266,91 +265,27 @@ static int __send_tcp_queue(int sk, int queue, u32 len, struct cr_img *img)
 	if (read_img_buf(img, buf, len) < 0)
 		goto err;
 
-	max_chunk = len;
-	off = 0;
-
-	do {
-		int chunk = len;
-
-		if (chunk > max_chunk)
-			chunk = max_chunk;
-
-		ret = send(sk, buf + off, chunk, 0);
-		if (ret <= 0) {
-			if (max_chunk > 1024) {
-				/*
-				 * Kernel not only refuses the whole chunk,
-				 * but refuses to split it into pieces too.
-				 *
-				 * When restoring recv queue in repair mode
-				 * kernel doesn't try hard and just allocates
-				 * a linear skb with the size we pass to the
-				 * system call. Thus, if the size is too big
-				 * for slab allocator, the send just fails
-				 * with ENOMEM.
-				 *
-				 * In any case -- try smaller chunk, hopefully
-				 * there's still enough memory in the system.
-				 */
-				max_chunk >>= 1;
-				continue;
-			}
-
-			pr_perror("Can't restore %d queue data (%d), want (%d:%d:%d)",
-				  queue, ret, chunk, len, max_chunk);
-			goto err;
-		}
-		off += ret;
-		len -= ret;
-	} while (len);
+	if (libsoccr_set_queue_bytes(sk, data, sizeof(*data), queue, buf))
+		goto err;
 
-	err = 0;
-err:
 	xfree(buf);
+	return 0;
 
-	return err;
-}
-
-static int send_tcp_queue(int sk, int queue, u32 len, struct cr_img *img)
-{
-	pr_debug("\tRestoring TCP %d queue data %u bytes\n", queue, len);
-
-	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
-		pr_perror("Can't set repair queue");
-		return -1;
-	}
-
-	return __send_tcp_queue(sk, queue, len, img);
+err:
+	xfree(buf);
+	return -1;
 }
 
-static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img)
+static int restore_tcp_queues(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, struct cr_img *img)
 {
 	u32 len;
 
-	len = tse->inq_len;
-	if (len && send_tcp_queue(sk, TCP_RECV_QUEUE, len, img))
-		return -1;
-
-	/*
-	 * All data in a write buffer can be divided on two parts sent
-	 * but not yet acknowledged data and unsent data.
-	 * The TCP stack must know which data have been sent, because
-	 * acknowledgment can be received for them. These data must be
-	 * restored in repair mode.
-	 */
-	len = tse->outq_len - tse->unsq_len;
-	if (len && send_tcp_queue(sk, TCP_SEND_QUEUE, len, img))
+	len = data->inq_len;
+	if (len && send_tcp_queue(sk, data, TCP_RECV_QUEUE, len, img))
 		return -1;
 
-	/*
-	 * The second part of data have never been sent to outside, so
-	 * they can be restored without any tricks.
-	 */
-	len = tse->unsq_len;
-	tcp_repair_off(sk);
-	if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, img))
-		return -1;
-	if (tcp_repair_on(sk))
+	len = data->outq_len;
+	if (len && send_tcp_queue(sk, data, TCP_SEND_QUEUE, len, img))
 		return -1;
 
 	return 0;
@@ -419,7 +354,7 @@ static int restore_tcp_conn_state(int sk, struct libsoccr_sk *socr, struct inet_
 	if (restore_prepare_socket(sk))
 		goto err_c;
 
-	if (restore_tcp_queues(sk, tse, img))
+	if (restore_tcp_queues(socr, &data, img))
 		goto err_c;
 
 	if (tse->has_nodelay && tse->nodelay) {
diff --git a/soccr/soccr.c b/soccr/soccr.c
index e127342..eb7aace 100644
--- a/soccr/soccr.c
+++ b/soccr/soccr.c
@@ -365,3 +365,105 @@ int libsoccr_set_sk_data(struct libsoccr_sk *sk,
 
 	return 0;
 }
+
+static int __send_queue(struct libsoccr_sk *sk, int queue, char *buf, __u32 len)
+{
+	int ret, err = -1, max_chunk;
+	int off;
+
+	max_chunk = len;
+	off = 0;
+
+	do {
+		int chunk = len;
+
+		if (chunk > max_chunk)
+			chunk = max_chunk;
+
+		ret = send(sk->fd, buf + off, chunk, 0);
+		if (ret <= 0) {
+			if (max_chunk > 1024) {
+				/*
+				 * Kernel not only refuses the whole chunk,
+				 * but refuses to split it into pieces too.
+				 *
+				 * When restoring recv queue in repair mode
+				 * kernel doesn't try hard and just allocates
+				 * a linear skb with the size we pass to the
+				 * system call. Thus, if the size is too big
+				 * for slab allocator, the send just fails
+				 * with ENOMEM.
+				 *
+				 * In any case -- try smaller chunk, hopefully
+				 * there's still enough memory in the system.
+				 */
+				max_chunk >>= 1;
+				continue;
+			}
+
+			loge("Can't restore %d queue data (%d), want (%d:%d:%d)",
+				  queue, ret, chunk, len, max_chunk);
+			goto err;
+		}
+		off += ret;
+		len -= ret;
+	} while (len);
+
+	err = 0;
+err:
+	return err;
+}
+
+static int send_queue(struct libsoccr_sk *sk, int queue, char *buf, __u32 len)
+{
+	logd("\tRestoring TCP %d queue data %u bytes\n", queue, len);
+
+	if (setsockopt(sk->fd, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) < 0) {
+		loge("Can't set repair queue");
+		return -1;
+	}
+
+	return __send_queue(sk, queue, buf, len);
+}
+
+int libsoccr_set_queue_bytes(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsigned data_size,
+		int queue, char *buf)
+{
+	if (!data || data_size < SOCR_DATA_MIN_SIZE)
+		return -1;
+
+	if (queue == TCP_RECV_QUEUE)
+		return send_queue(sk, TCP_RECV_QUEUE, buf, data->inq_len);
+
+	if (queue == TCP_SEND_QUEUE) {
+		__u32 len, ulen;
+
+		/*
+		 * All data in a write buffer can be divided on two parts sent
+		 * but not yet acknowledged data and unsent data.
+		 * The TCP stack must know which data have been sent, because
+		 * acknowledgment can be received for them. These data must be
+		 * restored in repair mode.
+		 */
+		ulen = data->unsq_len;
+		len = data->outq_len - ulen;
+		if (len && send_queue(sk, TCP_SEND_QUEUE, buf, len))
+			return -2;
+
+		if (ulen) {
+			/*
+			 * The second part of data have never been sent to outside, so
+			 * they can be restored without any tricks.
+			 */
+			tcp_repair_off(sk->fd);
+			if (__send_queue(sk, TCP_SEND_QUEUE, buf + len, ulen))
+				return -3;
+			if (tcp_repair_on(sk->fd))
+				return -4;
+		}
+
+		return 0;
+	}
+
+	return -5;
+}
diff --git a/soccr/soccr.h b/soccr/soccr.h
index 0dd19ee..1d555e0 100644
--- a/soccr/soccr.h
+++ b/soccr/soccr.h
@@ -32,4 +32,6 @@ char *libsoccr_get_queue_bytes(struct libsoccr_sk *sk, int queue_id, int steal);
 
 int libsoccr_set_sk_data_unbound(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsigned data_size);
 int libsoccr_set_sk_data(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsigned data_size);
+int libsoccr_set_queue_bytes(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsigned data_size,
+		int queue, char *buf);
 #endif
-- 
2.5.0



More information about the CRIU mailing list