[CRIU] Re: [PATCH cr 3/8] tcp: unset TCP_REPAIR at the last moment after unlocking network

Pavel Emelyanov xemul at parallels.com
Fri Sep 14 07:20:13 EDT 2012


On 09/14/2012 02:25 PM, Andrey Vagin wrote:
> TCP_REPAIR should be droppet when a network is unlocked.
> A network should be unlocked at the last moment, because
> after this moment restore must not failed, otherwise a state of
> a tcp connection can be changed and a state of one side in our image
> will be invalid.
> 
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
>  cr-restore.c       |    9 ++++++-
>  include/restorer.h |    3 ++
>  include/sk-inet.h  |   17 +++++++++++++
>  restorer.c         |    7 ++++-
>  sk-tcp.c           |   64 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 97 insertions(+), 3 deletions(-)
> 
> diff --git a/cr-restore.c b/cr-restore.c
> index 0e3f948..9fee16f 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -1280,7 +1280,8 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_v
>  					      restore_task_vma_len +
>  					      restore_thread_vma_len +
>  					      self_vmas_len +
> -					      SHMEMS_SIZE + TASK_ENTRIES_SIZE);
> +					      SHMEMS_SIZE + TASK_ENTRIES_SIZE +
> +					      rst_tcp_socks_size);
>  	if (exec_mem_hint == -1) {
>  		pr_err("No suitable area for task_restore bootstrap (%ldK)\n",
>  		       restore_task_vma_len + restore_thread_vma_len);
> @@ -1356,6 +1357,12 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_v
>  	if (!task_args->tgt_vmas)
>  		goto err;
>  
> +	mem += vmas_len;
> +	if (rst_tcp_socks_remap(mem))
> +		goto err;
> +	task_args->rst_tcp_socks = mem;
> +	task_args->rst_tcp_socks_size = rst_tcp_socks_size;
> +
>  	/*
>  	 * Arguments for task restoration.
>  	 */
> diff --git a/include/restorer.h b/include/restorer.h
> index 274459b..3b9fbcf 100644
> --- a/include/restorer.h
> +++ b/include/restorer.h
> @@ -110,6 +110,9 @@ struct task_restore_core_args {
>  	bool				has_futex;
>  	u64				futex_rla;
>  	u32				futex_rla_len;
> +
> +	int				*rst_tcp_socks;
> +	int				rst_tcp_socks_size;
>  } __aligned(sizeof(long));
>  
>  struct pt_regs {
> diff --git a/include/sk-inet.h b/include/sk-inet.h
> index 0fe550c..18c8cdb 100644
> --- a/include/sk-inet.h
> +++ b/include/sk-inet.h
> @@ -3,6 +3,9 @@
>  
>  #include <netinet/tcp.h>
>  
> +#include "sockets.h"
> +#include "files.h"
> +#include "list.h"
>  #include "protobuf.h"
>  #include "../protobuf/sk-inet.pb-c.h"
>  
> @@ -62,4 +65,18 @@ struct cr_options;
>  void show_tcp_stream(int fd, struct cr_options *);
>  
>  int check_tcp_repair(void);
> +
> +extern int rst_tcp_socks_size;
> +extern int rst_tcp_socks_remap(void *addr);
> +
> +static inline void rst_tcp_socks_all(int *arr, int size)

Why in header?

> +{
> +	int i;
> +
> +	for (i =0; i < size / sizeof(int) && arr[i] >= 0; i++)

Why &&? Let's make a single end-of-array condition
(the arr[i] < 0 one looks nicer).

> +		tcp_repair_off(arr[i]);
> +
> +	sys_munmap(arr, size);
> +}
> +
>  #endif
> diff --git a/restorer.c b/restorer.c
> index 5cee0b1..f6c780d 100644
> --- a/restorer.c
> +++ b/restorer.c
> @@ -18,6 +18,7 @@
>  #include "restorer-log.h"
>  #include "util.h"
>  #include "image.h"
> +#include "sk-inet.h"
>  
>  #include "crtools.h"
>  #include "lock.h"
> @@ -631,10 +632,12 @@ long __export_restore_task(struct task_restore_core_args *args)
>  
>  	futex_dec_and_wake(&args->task_entries->nr_in_progress);
>  
> -	sys_close(args->logfd);
> -
>  	futex_wait_while(&args->task_entries->start, CR_STATE_RESTORE_SIGCHLD);
>  
> +	rst_tcp_socks_all(args->rst_tcp_socks, args->rst_tcp_socks_size);
> +
> +	sys_close(args->logfd);
> +
>  	/*
>  	 * The code that prepared the itimers makes shure the
>  	 * code below doesn't fail due to bad timing values.
> diff --git a/sk-tcp.c b/sk-tcp.c
> index 4a9cf73..65c0e21 100644
> --- a/sk-tcp.c
> +++ b/sk-tcp.c
> @@ -3,6 +3,7 @@
>  #include <linux/sockios.h>
>  #include <unistd.h>
>  #include <stdlib.h>
> +#include <sys/mman.h>
>  
>  #include "crtools.h"
>  #include "util.h"
> @@ -473,6 +474,66 @@ err:
>  	return -1;
>  }
>  
> +/*
> + * rst_tcp_socks contains sockets in repair mode,
> + * which will be off in restorer before resuming.
> + */
> +static int *rst_tcp_socks = NULL;
> +static int rst_tcp_socks_num = 0;
> +int rst_tcp_socks_size = 0;
> +
> +int rst_tcp_socks_remap(void *addr)
> +{
> +	if (!rst_tcp_socks) {
> +		BUG_ON(rst_tcp_socks_size);
> +		return 0;
> +	}
> +
> +	if (rst_tcp_socks_num * sizeof(int) < rst_tcp_socks_size)
> +		rst_tcp_socks[rst_tcp_socks_num] = -1;
> +
> +	rst_tcp_socks = mremap(rst_tcp_socks,
> +					rst_tcp_socks_size,
> +					rst_tcp_socks_size,
> +					MREMAP_MAYMOVE | MREMAP_FIXED,
> +					addr);
> +
> +	if (rst_tcp_socks == MAP_FAILED) {
> +		pr_perror("mremap failed\n");
> +		return -1;
> +	}
> +
> +	BUG_ON(rst_tcp_socks != addr);
> +
> +	return 0;
> +}
> +
> +static int rst_tcp_socks_add(int fd)
> +{
> +	if (rst_tcp_socks == NULL) {
> +		rst_tcp_socks_size += PAGE_SIZE;
> +		rst_tcp_socks = mmap(NULL, rst_tcp_socks_size,
> +					    PROT_WRITE | PROT_READ,
> +					    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> +	}
> +
> +	if ((rst_tcp_socks_num + 1) * sizeof(int) > rst_tcp_socks_size) {
> +		rst_tcp_socks = mremap(rst_tcp_socks,
> +						rst_tcp_socks_size,
> +						rst_tcp_socks_size + PAGE_SIZE,
> +						MREMAP_MAYMOVE);
> +		rst_tcp_socks_size += PAGE_SIZE;
> +	}

I heavily doubt that a task will have more than 1 page of inet socket descriptors :)
Let's not overkill on this and use xrealloc() and mmap+memcpy in rst_tcp_socks_remap.

> +
> +	if (rst_tcp_socks == MAP_FAILED) {
> +		pr_perror("Can't allocate memory\n");
> +		return -1;
> +	}
> +
> +	rst_tcp_socks[rst_tcp_socks_num++] = fd;
> +	return 0;
> +}
> +
>  int restore_one_tcp(int fd, struct inet_sk_info *ii)
>  {
>  	pr_info("Restoring TCP connection\n");
> @@ -480,6 +541,9 @@ int restore_one_tcp(int fd, struct inet_sk_info *ii)
>  	if (tcp_repair_on(fd))
>  		return -1;
>  
> +	if (rst_tcp_socks_add(fd))
> +		return -1;
> +
>  	nf_unlock_connection_info(ii);
>  
>  	if (restore_tcp_conn_state(fd, ii))



More information about the CRIU mailing list