[CRIU] [PATCH] Do not call listen() when SO_REUSEADDR is off
Saied Kazemi
saied at google.com
Fri Feb 13 16:26:44 PST 2015
For an established TCP connection, the send queue is restored in two
steps: in step (1), we retransmit the data that was sent before but not
yet acknowledged, and in step (2), we transmit the data that was never
sent outside before. The TCP_REPAIR option is disabled before step (2)
and re-enabled after step (2) (without this patch).
If the amount of data to be sent in step (2) is large, the TCP_REPAIR
flag on the socket can remain off for some time (O(milliseconds)). If a
listen() is called on another socket bound to the same port during this
time window, it fails. This is because -- turning TCP_REPAIR off clears
the SO_REUSEADDR flag on the socket.
This patch adds a mutex (reuseaddr_lock) per port number, so that a
listen() on a port number does not happen while SO_REUSEADDR for another
socket on the same port is off.
Thanks to Amey Deshpande <ameyd at google.com> for debugging.
Signed-off-by: Saied Kazemi <saied at google.com>
---
include/sk-inet.h | 1 +
sk-inet.c | 10 ++++++++++
sk-tcp.c | 14 ++++++++++----
3 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/include/sk-inet.h b/include/sk-inet.h
index aa7db16..22553bc 100644
--- a/include/sk-inet.h
+++ b/include/sk-inet.h
@@ -79,5 +79,6 @@ extern int restore_one_tcp(int sk, struct inet_sk_info *si);
extern int check_tcp(void);
extern int rst_tcp_socks_add(int fd, bool reuseaddr);
+extern mutex_t *inet_get_reuseaddr_lock(struct inet_sk_info *ii);
#endif /* __CR_SK_INET_H__ */
diff --git a/sk-inet.c b/sk-inet.c
index adf6fc3..539c80c 100644
--- a/sk-inet.c
+++ b/sk-inet.c
@@ -30,6 +30,7 @@ struct inet_port {
int port;
int type;
futex_t users;
+ mutex_t reuseaddr_lock;
struct list_head list;
};
@@ -53,6 +54,7 @@ static struct inet_port *port_add(int type, int port)
e->type = type;
futex_init(&e->users);
futex_inc(&e->users);
+ mutex_init(&e->reuseaddr_lock);
list_add(&e->list, &inet_ports);
@@ -537,10 +539,13 @@ static int open_inet_sk(struct file_desc *d)
goto err;
}
+ mutex_lock(&ii->port->reuseaddr_lock);
if (listen(sk, ie->backlog) == -1) {
pr_perror("Can't listen on a socket");
+ mutex_unlock(&ii->port->reuseaddr_lock);
goto err;
}
+ mutex_unlock(&ii->port->reuseaddr_lock);
}
if (ie->state == TCP_ESTABLISHED &&
@@ -624,3 +629,8 @@ int inet_connect(int sk, struct inet_sk_info *ii)
return 0;
}
+
+mutex_t *inet_get_reuseaddr_lock(struct inet_sk_info *ii)
+{
+ return &ii->port->reuseaddr_lock;
+}
diff --git a/sk-tcp.c b/sk-tcp.c
index 3f1556d..85eaafa 100644
--- a/sk-tcp.c
+++ b/sk-tcp.c
@@ -507,7 +507,7 @@ static int send_tcp_queue(int sk, int queue, u32 len, struct cr_img *img)
return __send_tcp_queue(sk, queue, len, img);
}
-static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img)
+static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img, mutex_t *reuse_lock)
{
u32 len;
@@ -534,11 +534,17 @@ static int restore_tcp_queues(int sk, TcpStreamEntry *tse, struct cr_img *img)
* they can be restored without any tricks.
*/
len = tse->unsq_len;
+ mutex_lock(reuse_lock);
tcp_repair_off(sk);
- if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, img))
+ if (len && __send_tcp_queue(sk, TCP_SEND_QUEUE, len, img)) {
+ mutex_unlock(reuse_lock);
return -1;
- if (tcp_repair_on(sk))
+ }
+ if (tcp_repair_on(sk)) {
+ mutex_unlock(reuse_lock);
return -1;
+ }
+ mutex_unlock(reuse_lock);
return 0;
}
@@ -621,7 +627,7 @@ static int restore_tcp_conn_state(int sk, struct inet_sk_info *ii)
if (restore_tcp_opts(sk, tse))
goto err_c;
- if (restore_tcp_queues(sk, tse, img))
+ if (restore_tcp_queues(sk, tse, img, inet_get_reuseaddr_lock(ii)))
goto err_c;
if (tse->has_nodelay && tse->nodelay) {
--
2.2.0.rc0.207.ga3a616c
More information about the CRIU
mailing list