[CRIU] Re: [PATCH cr 2/2] sk-inet: restore option REUSEADDR

Pavel Emelyanov xemul at parallels.com
Thu Aug 16 12:29:01 EDT 2012


On 08/16/2012 05:45 PM, Andrey Vagin wrote:
> 
> All sockets are created with SO_REUSEADDR, it's needed for restoring.
> E.g.: A listen socket is created after a connected socket. Both of them
> are binded to one port.
> 
> So SO_REUSEADDR should be restored, when all sockets on a port were created.
> 
> This code creates a structure for each port of one type of sockets
> and accounts a number of sockets, which are not restored yet.
> 
> Sockets have a hook post_open(), in which it waits when all sockets for
> a defined port would be created and then it will restore SO_REUSEADDR.
> 
> struct port contains a type (udp, tcp, etc) and a port number.
> It doesn't contain family or addr, because it's extra loads of logic,
> which doesn't bring a significant profits.
> 
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
>  include/sk-inet.h      |    2 +
>  protobuf/sk-opts.proto |    1 +
>  sk-inet.c              |   81 ++++++++++++++++++++++++++++++++++++++++++++++--
>  sockets.c              |    9 +++++-
>  4 files changed, 89 insertions(+), 4 deletions(-)
> 

> @@ -22,6 +23,46 @@
>  #define PB_ALEN_INET	1
>  #define PB_ALEN_INET6	4
>  
> +LIST_HEAD(inet_ports);

static

> +
> +#define SHMEM_SIZE 4096
> +static void *shmem = NULL;
> +static int shmem_size;
> +
> +struct inet_port {
> +	int port;
> +	int type;
> +	futex_t users;
> +	struct list_head list;
> +};
> +
> +static struct inet_port *port_add(int type, int port)
> +{
> +	struct inet_port *e;
> +
> +	list_for_each_entry(e, &inet_ports, list)
> +		if (e->type == type && e->port == port) {
> +			futex_inc(&e->users);
> +			return e;
> +		}
> +
> +	e = shmem + shmem_size;
> +	shmem_size += sizeof(*e);
> +	if (shmem_size > SHMEM_SIZE) {
> +		pr_err("Not enough memory\n");
> +		return NULL;
> +	}
> +
> +	e->port = port;
> +	e->type = type;
> +	futex_init(&e->users);
> +	futex_inc(&e->users);
> +
> +	list_add(&e->list, &inet_ports);
> +
> +	return e;
> +}
> +
>  static void show_one_inet(const char *act, const struct inet_sk_desc *sk)
>  {
>  	char src_addr[INET_ADDR_LEN] = "<unknown>";
> @@ -296,10 +337,12 @@ static bool is_bound(struct inet_sk_info *ii)
>  
>  
>  static int open_inet_sk(struct file_desc *d);
> +static int post_open_inet_sk(struct file_desc *d, int sk);
>  
>  static struct file_desc_ops inet_desc_ops = {
>  	.type = FD_TYPES__INETSK,
>  	.open = open_inet_sk,
> +	.post_open = post_open_inet_sk,
>  };
>  
>  static int collect_one_inetsk(void *o, ProtobufCMessage *base)
> @@ -311,11 +354,23 @@ static int collect_one_inetsk(void *o, ProtobufCMessage *base)
>  	if (tcp_connection(ii->ie))
>  		tcp_locked_conn_add(ii);
>  
> +	ii->port = port_add(ii->ie->type, ii->ie->src_port);
> +	if (ii->port == NULL)
> +		return -1;
> +

Add a comment describing why even sockets with this option ON are
added to this list.

>  	return 0;
>  }
>  
>  int collect_inet_sockets(void)
>  {
> +	shmem = mmap(NULL, SHMEM_SIZE, PROT_READ | PROT_WRITE,
> +					     MAP_SHARED | MAP_ANON, 0, 0);
> +	if (shmem == MAP_FAILED) {
> +		pr_perror("Can't map shmem");
> +		return -1;
> +	}
> +	shmem_size = 0;
> +
>  	return collect_image(CR_FD_INETSK, PB_INETSK,
>  			sizeof(struct inet_sk_info), collect_one_inetsk);
>  }
> @@ -340,10 +395,25 @@ static int inet_validate_address(InetSkEntry *ie)
>  	return -1;
>  }
>  
> +static int post_open_inet_sk(struct file_desc *d, int sk)
> +{
> +	struct inet_sk_info *ii;
> +	int no = 0;
> +
> +	ii = container_of(d, struct inet_sk_info, d);
> +
> +	futex_wait_until(&ii->port->users, 0);

This wait can be moved under if below.

> +
> +	if (!ii->ie->opts->reuseaddr)
> +		if (restore_opt(sk, SOL_SOCKET, SO_REUSEADDR, &no))
> +			return 1;
> +	return 0;
> +}
> +
>  static int open_inet_sk(struct file_desc *d)
>  {
>  	struct inet_sk_info *ii;
> -	int sk;
> +	int sk, yes = 1;
>  
>  	ii = container_of(d, struct inet_sk_info, d);
>  
> @@ -369,12 +439,15 @@ static int open_inet_sk(struct file_desc *d)
>  	}
>  
>  	if (ii->ie->v6only) {
> -		int yes = 1;
> -
>  		if (restore_opt(sk, SOL_IPV6, IPV6_V6ONLY, &yes) == -1)
>  			return -1;
>  	}
>  
> +	/* Set SO_REUSEADDR, because some sockets can be binded to one addr.

s/binded/bound/

> +	 * The origin value of SO_REUSEADDR will be restored in post_open. */

Plz, write multiline comments in

/*
 * something
 */

form

> +	if (restore_opt(sk, SOL_SOCKET, SO_REUSEADDR, &yes))
> +		return -1;
> +
>  	if (tcp_connection(ii->ie)) {
>  		if (!opts.tcp_established_ok) {
>  			pr_err("Connected TCP socket in image\n");
> @@ -413,6 +486,8 @@ static int open_inet_sk(struct file_desc *d)
>  			inet_connect(sk, ii))
>  		goto err;
>  done:
> +	futex_dec(&ii->port->users);
> +
>  	if (rst_file_params(sk, ii->ie->fown, ii->ie->flags))
>  		goto err;
>  
> diff --git a/sockets.c b/sockets.c
> index fb9477a..06f4ece 100644
> --- a/sockets.c
> +++ b/sockets.c
> @@ -80,6 +80,8 @@ int restore_socket_opts(int sk, SkOptsEntry *soe)
>  	tv.tv_usec = soe->so_rcv_tmo_usec;
>  	ret |= restore_opt(sk, SOL_SOCKET, SO_RCVTIMEO, &tv);
>  
> +	/* The restore of SO_REUSEADDR depends on type of socket */
> +
>  	return ret;
>  }
>  
> @@ -103,7 +105,7 @@ int do_dump_opt(int sk, int level, int name, void *val, int len)
>  
>  int dump_socket_opts(int sk, SkOptsEntry *soe)
>  {
> -	int ret = 0;
> +	int ret = 0, val;
>  	struct timeval tv;
>  
>  	ret |= dump_opt(sk, SOL_SOCKET, SO_SNDBUF, &soe->so_sndbuf);
> @@ -117,6 +119,10 @@ int dump_socket_opts(int sk, SkOptsEntry *soe)
>  	soe->so_rcv_tmo_sec = tv.tv_sec;
>  	soe->so_rcv_tmo_usec = tv.tv_usec;
>  
> +	ret |= dump_opt(sk, SOL_SOCKET, SO_REUSEADDR, &val);
> +	soe->reuseaddr = val ? true : false;
> +	soe->has_reuseaddr = true;
> +
>  	return ret;
>  }
>  
> @@ -281,6 +287,7 @@ int collect_sockets(int pid)
>  	}
>  
>  	if (opts.namespaces_flags & CLONE_NEWNET && opts.ext_unix_sk) {
> +		pr_info("Collect unix sockets from init netns\n");

Garbage?

>  		nl = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
>  		if (nl < 0) {
>  			pr_perror("Can't create sock diag socket");


More information about the CRIU mailing list