[CRIU] Re: [PATCH cr 3/8] tcp: unset TCP_REPAIR at the last moment
after unlocking network
Pavel Emelyanov
xemul at parallels.com
Fri Sep 14 07:20:13 EDT 2012
On 09/14/2012 02:25 PM, Andrey Vagin wrote:
> TCP_REPAIR should be droppet when a network is unlocked.
> A network should be unlocked at the last moment, because
> after this moment restore must not failed, otherwise a state of
> a tcp connection can be changed and a state of one side in our image
> will be invalid.
>
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
> cr-restore.c | 9 ++++++-
> include/restorer.h | 3 ++
> include/sk-inet.h | 17 +++++++++++++
> restorer.c | 7 ++++-
> sk-tcp.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 97 insertions(+), 3 deletions(-)
>
> diff --git a/cr-restore.c b/cr-restore.c
> index 0e3f948..9fee16f 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -1280,7 +1280,8 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_v
> restore_task_vma_len +
> restore_thread_vma_len +
> self_vmas_len +
> - SHMEMS_SIZE + TASK_ENTRIES_SIZE);
> + SHMEMS_SIZE + TASK_ENTRIES_SIZE +
> + rst_tcp_socks_size);
> if (exec_mem_hint == -1) {
> pr_err("No suitable area for task_restore bootstrap (%ldK)\n",
> restore_task_vma_len + restore_thread_vma_len);
> @@ -1356,6 +1357,12 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_v
> if (!task_args->tgt_vmas)
> goto err;
>
> + mem += vmas_len;
> + if (rst_tcp_socks_remap(mem))
> + goto err;
> + task_args->rst_tcp_socks = mem;
> + task_args->rst_tcp_socks_size = rst_tcp_socks_size;
> +
> /*
> * Arguments for task restoration.
> */
> diff --git a/include/restorer.h b/include/restorer.h
> index 274459b..3b9fbcf 100644
> --- a/include/restorer.h
> +++ b/include/restorer.h
> @@ -110,6 +110,9 @@ struct task_restore_core_args {
> bool has_futex;
> u64 futex_rla;
> u32 futex_rla_len;
> +
> + int *rst_tcp_socks;
> + int rst_tcp_socks_size;
> } __aligned(sizeof(long));
>
> struct pt_regs {
> diff --git a/include/sk-inet.h b/include/sk-inet.h
> index 0fe550c..18c8cdb 100644
> --- a/include/sk-inet.h
> +++ b/include/sk-inet.h
> @@ -3,6 +3,9 @@
>
> #include <netinet/tcp.h>
>
> +#include "sockets.h"
> +#include "files.h"
> +#include "list.h"
> #include "protobuf.h"
> #include "../protobuf/sk-inet.pb-c.h"
>
> @@ -62,4 +65,18 @@ struct cr_options;
> void show_tcp_stream(int fd, struct cr_options *);
>
> int check_tcp_repair(void);
> +
> +extern int rst_tcp_socks_size;
> +extern int rst_tcp_socks_remap(void *addr);
> +
> +static inline void rst_tcp_socks_all(int *arr, int size)
Why in header?
> +{
> + int i;
> +
> + for (i =0; i < size / sizeof(int) && arr[i] >= 0; i++)
Why &&? Let's make a single end-of-array condition
(the arr[i] < 0 one looks nicer).
> + tcp_repair_off(arr[i]);
> +
> + sys_munmap(arr, size);
> +}
> +
> #endif
> diff --git a/restorer.c b/restorer.c
> index 5cee0b1..f6c780d 100644
> --- a/restorer.c
> +++ b/restorer.c
> @@ -18,6 +18,7 @@
> #include "restorer-log.h"
> #include "util.h"
> #include "image.h"
> +#include "sk-inet.h"
>
> #include "crtools.h"
> #include "lock.h"
> @@ -631,10 +632,12 @@ long __export_restore_task(struct task_restore_core_args *args)
>
> futex_dec_and_wake(&args->task_entries->nr_in_progress);
>
> - sys_close(args->logfd);
> -
> futex_wait_while(&args->task_entries->start, CR_STATE_RESTORE_SIGCHLD);
>
> + rst_tcp_socks_all(args->rst_tcp_socks, args->rst_tcp_socks_size);
> +
> + sys_close(args->logfd);
> +
> /*
> * The code that prepared the itimers makes shure the
> * code below doesn't fail due to bad timing values.
> diff --git a/sk-tcp.c b/sk-tcp.c
> index 4a9cf73..65c0e21 100644
> --- a/sk-tcp.c
> +++ b/sk-tcp.c
> @@ -3,6 +3,7 @@
> #include <linux/sockios.h>
> #include <unistd.h>
> #include <stdlib.h>
> +#include <sys/mman.h>
>
> #include "crtools.h"
> #include "util.h"
> @@ -473,6 +474,66 @@ err:
> return -1;
> }
>
> +/*
> + * rst_tcp_socks contains sockets in repair mode,
> + * which will be off in restorer before resuming.
> + */
> +static int *rst_tcp_socks = NULL;
> +static int rst_tcp_socks_num = 0;
> +int rst_tcp_socks_size = 0;
> +
> +int rst_tcp_socks_remap(void *addr)
> +{
> + if (!rst_tcp_socks) {
> + BUG_ON(rst_tcp_socks_size);
> + return 0;
> + }
> +
> + if (rst_tcp_socks_num * sizeof(int) < rst_tcp_socks_size)
> + rst_tcp_socks[rst_tcp_socks_num] = -1;
> +
> + rst_tcp_socks = mremap(rst_tcp_socks,
> + rst_tcp_socks_size,
> + rst_tcp_socks_size,
> + MREMAP_MAYMOVE | MREMAP_FIXED,
> + addr);
> +
> + if (rst_tcp_socks == MAP_FAILED) {
> + pr_perror("mremap failed\n");
> + return -1;
> + }
> +
> + BUG_ON(rst_tcp_socks != addr);
> +
> + return 0;
> +}
> +
> +static int rst_tcp_socks_add(int fd)
> +{
> + if (rst_tcp_socks == NULL) {
> + rst_tcp_socks_size += PAGE_SIZE;
> + rst_tcp_socks = mmap(NULL, rst_tcp_socks_size,
> + PROT_WRITE | PROT_READ,
> + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> + }
> +
> + if ((rst_tcp_socks_num + 1) * sizeof(int) > rst_tcp_socks_size) {
> + rst_tcp_socks = mremap(rst_tcp_socks,
> + rst_tcp_socks_size,
> + rst_tcp_socks_size + PAGE_SIZE,
> + MREMAP_MAYMOVE);
> + rst_tcp_socks_size += PAGE_SIZE;
> + }
I heavily doubt that a task will have more than 1 page of inet socket descriptors :)
Let's not overkill on this and use xrealloc() and mmap+memcpy in rst_tcp_socks_remap.
> +
> + if (rst_tcp_socks == MAP_FAILED) {
> + pr_perror("Can't allocate memory\n");
> + return -1;
> + }
> +
> + rst_tcp_socks[rst_tcp_socks_num++] = fd;
> + return 0;
> +}
> +
> int restore_one_tcp(int fd, struct inet_sk_info *ii)
> {
> pr_info("Restoring TCP connection\n");
> @@ -480,6 +541,9 @@ int restore_one_tcp(int fd, struct inet_sk_info *ii)
> if (tcp_repair_on(fd))
> return -1;
>
> + if (rst_tcp_socks_add(fd))
> + return -1;
> +
> nf_unlock_connection_info(ii);
>
> if (restore_tcp_conn_state(fd, ii))
More information about the CRIU
mailing list