[CRIU] [PATCH 7/8] aio: Restore AIO contexts

Pavel Emelyanov xemul at parallels.com
Fri Oct 10 12:02:51 PDT 2014


Restoring AIO is quite simple. Once all VMAs are put in
their places we can call io_setup() to let kernel create
the context back and then move the ring into proper place.

Another thing we should "restore" is the context ID. But
the thing is, upon ring creation kernel repots the ring
start address as this ID. And there's a patch in the -next
tree that changes the ID when we remap the ring. That
said after AIO context creation and ring remap we need
to check that the new ID is seen by the kernel.

Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
---
 cr-restore.c            | 24 +++++++++++++++++++++
 include/aio.h           |  6 ++++++
 include/restorer.h      |  3 +++
 pie/restorer.c          | 55 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 88 insertions(+)

diff --git a/cr-restore.c b/cr-restore.c
index c8cb428..7d547b7 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -72,6 +72,7 @@
 #include "timerfd.h"
 #include "file-lock.h"
 #include "action-scripts.h"
+#include "aio.h"
 
 #include "parasite-syscall.h"
 
@@ -2564,6 +2565,9 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	unsigned long vdso_rt_delta = 0;
 #endif
 
+	unsigned long aio_rings;
+	MmEntry *mm = rsti(current)->mm;
+
 	struct vm_area_list self_vmas;
 	struct vm_area_list *vmas = &rsti(current)->vmas;
 	int i;
@@ -2599,6 +2603,23 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	}
 
 	/*
+	 * Put info about AIO rings, they will get remapped
+	 */
+
+	aio_rings = rst_mem_cpos(RM_PRIVATE);
+	for (i = 0; i < mm->n_aios; i++) {
+		struct rst_aio_ring *raio;
+
+		raio = rst_mem_alloc(sizeof(*raio), RM_PRIVATE);
+		if (!raio)
+			goto err_nv;
+
+		raio->addr = mm->aios[i]->id;
+		raio->nr_req = mm->aios[i]->nr_req;
+		raio->len = mm->aios[i]->ring_len;
+	}
+
+	/*
 	 * Copy tcp sockets fds to rst memory -- restorer will
 	 * turn repair off before going sigreturn
 	 */
@@ -2747,6 +2768,9 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	task_args->tcp_socks_nr = rst_tcp_socks_nr;
 	task_args->tcp_socks = rst_mem_remap_ptr(tcp_socks, RM_PRIVATE);
 
+	task_args->nr_rings = mm->n_aios;
+	task_args->rings = rst_mem_remap_ptr(aio_rings, RM_PRIVATE);
+
 	task_args->n_helpers = n_helpers;
 	if (n_helpers > 0)
 		task_args->helpers = rst_mem_remap_ptr(helpers_pos, RM_PRIVATE);
diff --git a/include/aio.h b/include/aio.h
index af7a046..e839ec6 100644
--- a/include/aio.h
+++ b/include/aio.h
@@ -6,4 +6,10 @@ void free_aios(MmEntry *mme);
 struct parasite_ctl;
 int parasite_check_aios(struct parasite_ctl *, struct vm_area_list *);
 unsigned long aio_rings_args_size(struct vm_area_list *);
+
+struct rst_aio_ring {
+	unsigned long addr;
+	unsigned long len;
+	unsigned int nr_req;
+};
 #endif /* __CR_AIO_H__ */
diff --git a/include/restorer.h b/include/restorer.h
index 2d9af0d..8eefddc 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -148,6 +148,9 @@ struct task_restore_args {
 	struct rst_tcp_sock		*tcp_socks;
 	int				tcp_socks_nr;
 
+	struct rst_aio_ring		*rings;
+	int				nr_rings;
+
 	int				fd_last_pid; /* sys.ns_last_pid for threads rst */
 
 	pid_t				*helpers /* the TASK_HELPERS to wait on at the end of restore */;
diff --git a/pie/restorer.c b/pie/restorer.c
index 6c9d0a3..527772a 100644
--- a/pie/restorer.c
+++ b/pie/restorer.c
@@ -29,6 +29,7 @@
 #include "crtools.h"
 #include "lock.h"
 #include "restorer.h"
+#include "aio.h"
 
 #include "protobuf/creds.pb-c.h"
 #include "protobuf/mm.pb-c.h"
@@ -926,6 +927,60 @@ long __export_restore_task(struct task_restore_args *args)
 		}
 	}
 
+	/*
+	 * Now when all VMAs are in their places time to set
+	 * up AIO rings.
+	 */
+
+	for (i = 0; i < args->nr_rings; i++) {
+		struct rst_aio_ring *raio = &args->rings[i];
+		unsigned long ctx = 0;
+		int ret;
+
+		ret = sys_io_setup(raio->nr_req, &ctx);
+		if (ret < 0) {
+			pr_err("Ring setup failed with %d\n", ret);
+			goto core_restore_end;
+		}
+
+		if (ctx == raio->addr) /* Lucky bastards we are! */
+			continue;
+
+		/*
+		 * If we failed to get the proper nr_req right and
+		 * created smaller or larger ring, then this remap
+		 * will (should) fail, since AIO rings has immutable
+		 * size.
+		 *
+		 * This is not great, but anyway better than putting
+		 * a ring of wrong size into correct place.
+		 */
+
+		ctx = sys_mremap(ctx, raio->len, raio->len,
+					MREMAP_FIXED | MREMAP_MAYMOVE,
+					raio->addr);
+		if (ctx != raio->addr) {
+			pr_err("Ring remap failed with %ld\n", ctx);
+			goto core_restore_end;
+		}
+
+		/*
+		 * Now check that kernel not just remapped the
+		 * ring into new place, but updated the internal
+		 * context state respectively.
+		 */
+
+		ret = sys_io_getevents(ctx, 0, 1, NULL, NULL);
+		if (ret != 0) {
+			if (ret < 0)
+				pr_err("Kernel doesn't remap AIO rings\n");
+			else
+				pr_err("AIO context screwed up\n");
+
+			goto core_restore_end;
+		}
+	}
+
 	ret = 0;
 
 	/*
-- 
1.8.4.2




More information about the CRIU mailing list