[CRIU] [PATCH] Do not call listen() when SO_REUSEADDR is off

Fri Feb 13 16:26:44 PST 2015

For an established TCP connection, the send queue is restored in two
steps: in step (1), we retransmit the data that was sent before but not
yet acknowledged, and in step (2), we transmit the data that was never
sent outside before.  The TCP_REPAIR option is disabled before step (2)
and re-enabled after step (2) (without this patch).

If the amount of data to be sent in step (2) is large, the TCP_REPAIR
flag on the socket can remain off for some time (O(milliseconds)).  If a
listen() is called on another socket bound to the same port during this
time window, it fails. This is because -- turning TCP_REPAIR off clears
the SO_REUSEADDR flag on the socket.

This patch adds a mutex (reuseaddr_lock) per port number, so that a
listen() on a port number does not happen while SO_REUSEADDR for another
socket on the same port is off.

Thanks to Amey Deshpande <ameyd at google.com> for debugging.

Signed-off-by: Saied Kazemi <saied at google.com>
---
 include/sk-inet.h |  1 +
 sk-inet.c         | 10 ++++++++++
 sk-tcp.c          | 14 ++++++++++----
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/include/sk-inet.h b/include/sk-inet.h
index aa7db16..22553bc 100644
--- a/include/sk-inet.h
+++ b/include/sk-inet.h
@@ -79,5 +79,6 @@ extern int restore_one_tcp(int sk, struct inet_sk_info *si);
 
 extern int check_tcp(void);
 extern int rst_tcp_socks_add(int fd, bool reuseaddr);
+extern mutex_t *inet_get_reuseaddr_lock(struct inet_sk_info *ii);
 
 #endif /* __CR_SK_INET_H__ */
diff --git a/sk-inet.c b/sk-inet.c
index adf6fc3..539c80c 100644
--- a/sk-inet.c
+++ b/sk-inet.c
@@ -30,6 +30,7 @@ struct inet_port {
 	int port;
 	int type;
 	futex_t users;
+	mutex_t reuseaddr_lock;
 	struct list_head list;
 };
 
@@ -53,6 +54,7 @@ static struct inet_port *port_add(int type, int port)
 	e->type = type;
 	futex_init(&e->users);
 	futex_inc(&e->users);
+	mutex_init(&e->reuseaddr_lock);
 
 	list_add(&e->list, &inet_ports);
 
@@ -537,10 +539,13 @@ static int open_inet_sk(struct file_desc *d)
 			goto err;
 		}
 
+		mutex_lock(&ii->port->reuseaddr_lock);
 		if (listen(sk, ie->backlog) == -1) {
 			pr_perror("Can't listen on a socket");
+			mutex_unlock(&ii->port->reuseaddr_lock);
 			goto err;
 		}
+		mutex_unlock(&ii->port->reuseaddr_lock);
 	}
 
 	if (ie->state == TCP_ESTABLISHED &&
@@ -624,3 +629,8 @@ int inet_connect(int sk, struct inet_sk_info *ii)
 
 	return 0;
 }
+
+mutex_t *inet_get_reuseaddr_lock(struct inet_sk_info *ii)
+{
+	return &ii->port->reuseaddr_lock;
+}
diff --git a/sk-tcp.c b/sk-tcp.c
index 3f1556d..85eaafa 100644
--- a/sk-tcp.c
+++ b/sk-tcp.c
@@ -507,7 +507,7 @@ static int send_tcp_queue(int sk, int queue, u32 len, struct cr_img *img)
 	return __send_tcp_queue(sk, queue, len, img);
 }
 
-static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img)
+static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img, mutex_t *reuse_lock)
 {
 	u32 len;
 
@@ -534,11 +534,17 @@ static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img)
 	 * they can be restored without any tricks.
 	 */
 	len = tse->unsq_len;
+	mutex_lock(reuse_lock);
 	tcp_repair_off(sk);
-	if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, img))
+	if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, img)) {
+		mutex_unlock(reuse_lock);
 		return -1;
-	if (tcp_repair_on(sk))
+	}
+	if (tcp_repair_on(sk)) {
+		mutex_unlock(reuse_lock);
 		return -1;
+	}
+	mutex_unlock(reuse_lock);
 
 	return 0;
 }
@@ -621,7 +627,7 @@ static int restore_tcp_conn_state(int sk, struct inet_sk_info *ii)
 	if (restore_tcp_opts(sk, tse))
 		goto err_c;
 
-	if (restore_tcp_queues(sk, tse, img))
+	if (restore_tcp_queues(sk, tse, img, inet_get_reuseaddr_lock(ii)))
 		goto err_c;
 
 	if (tse->has_nodelay && tse->nodelay) {
-- 
2.2.0.rc0.207.ga3a616c