<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Fri, Aug 15, 2014 at 6:00 PM, Pavel Emelyanov <span dir="ltr"><<a href="mailto:xemul@parallels.com" target="_blank">xemul@parallels.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="">On 08/15/2014 05:49 PM, Николай Сергеевич Плотник wrote:<br>
> From eb561addda9a216ab8875354c6f14d7741ee898f Mon Sep 17 00:00:00 2001<br>
</div>> From: Nikolay Plotnik <<a href="mailto:nikolay.plotnik@phystech.edu">nikolay.plotnik@phystech.edu</a> <mailto:<a href="mailto:nikolay.plotnik@phystech.edu">nikolay.plotnik@phystech.edu</a>>><br>
<div class="">> Date: Fri, 15 Aug 2014 14:59:54 +0400<br>
> Subject: [PATCH] Add check for empty aio ring in vma mapping<br>
><br>
> Some applications like mysql database use native linux aio with mappings for aio ring.<br>
> This patch checks if process with such mappings can be safety migrated(in case of empty<br>
> aio ring).<br>
<br>
</div>How will this thing get mapped back on restore?<br></blockquote><div> </div><div>Great notice. This patch doesn't include restore part. I will work on it.</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
> Signed-off-by: Nikolay S. Plotnik <<a href="mailto:nikolay.plotnik@phystech.edu">nikolay.plotnik@phystech.edu</a> <mailto:<a href="mailto:nikolay.plotnik@phystech.edu">nikolay.plotnik@phystech.edu</a>>><br>
<div class="">><br>
> ---<br>
> cr-dump.c | 9 +++++++++<br>
> include/image.h | 2 ++<br>
> include/parasite-syscall.h | 1 +<br>
> include/parasite.h | 6 ++++++<br>
> parasite-syscall.c | 20 +++++++++++++++++++<br>
> pie/parasite.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++<br>
> proc_parse.c | 4 ++++<br>
> 7 files changed, 91 insertions(+)<br>
><br>
> diff --git a/cr-dump.c b/cr-dump.c<br>
> index 1700d9d..e8eee6b 100644<br>
> --- a/cr-dump.c<br>
> +++ b/cr-dump.c<br>
> @@ -102,6 +102,9 @@ bool privately_dump_vma(struct vma_area *vma)<br>
> if (vma_area_is(vma, VMA_ANON_SHARED))<br>
> return false;<br>
><br>
> +if(vma_area_is(vma, VMA_AREA_AIO))<br>
</div>> +return false;<br>
<div class="">> +<br>
> if (!vma_area_is(vma, VMA_ANON_PRIVATE) &&<br>
> !vma_area_is(vma, VMA_FILE_PRIVATE)) {<br>
> pr_warn("Unexpected VMA area found\n");<br>
> @@ -1410,6 +1413,12 @@ static int pre_dump_one_task(struct pstree_item *item, struct list_head *ctls)<br>
> goto err_cure;<br>
> }<br>
><br>
> +ret = parasite_dump_aio_ring(parasite_ctl, &vmas);<br>
> +if(ret) {<br>
</div>> +pr_err("Can't dump aio ring (pid: %d)\n", pid);<br>
> +goto err_cure;<br>
<div><div class="h5">> +}<br>
> +<br>
> ret = predump_task_files(pid);<br>
> if (ret) {<br>
> pr_err("Pre-dumping files failed (pid: %d)\n", pid);<br>
> diff --git a/include/image.h b/include/image.h<br>
> index 477f339..8399e85 100644<br>
> --- a/include/image.h<br>
> +++ b/include/image.h<br>
> @@ -53,6 +53,8 @@<br>
> #define VMA_AREA_SOCKET(1 << 11)<br>
> #define VMA_AREA_VVAR(1 << 12)<br>
><br>
> +#define VMA_AREA_AIO(1 << 13)<br>
> +<br>
> #define VMA_UNSUPP(1 << 31)/* Unsupported VMA */<br>
><br>
> #define CR_CAP_SIZE2<br>
> diff --git a/include/parasite-syscall.h b/include/parasite-syscall.h<br>
> index 71534da..338dacd 100644<br>
> --- a/include/parasite-syscall.h<br>
> +++ b/include/parasite-syscall.h<br>
> @@ -83,6 +83,7 @@ extern int __parasite_wait_daemon_ack(unsigned int cmd,<br>
> struct parasite_ctl *ctl);<br>
><br>
> extern int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_misc *misc);<br>
> +extern int parasite_dump_aio_ring(struct parasite_ctl *ctl, struct vm_area_list *vma_area_list);<br>
> extern int parasite_dump_creds(struct parasite_ctl *ctl, struct _CredsEntry *ce);<br>
> extern int parasite_dump_thread_seized(struct parasite_ctl *ctl, int id,<br>
> struct pid *tid, struct _CoreEntry *core);<br>
> diff --git a/include/parasite.h b/include/parasite.h<br>
> index eaabc01..40f7bff 100644<br>
> --- a/include/parasite.h<br>
> +++ b/include/parasite.h<br>
> @@ -42,6 +42,7 @@ enum {<br>
> PARASITE_CMD_DUMP_ITIMERS,<br>
> PARASITE_CMD_DUMP_POSIX_TIMERS,<br>
> PARASITE_CMD_DUMP_MISC,<br>
> +PARASITE_CMD_DUMP_AIO_RING,<br>
> PARASITE_CMD_DUMP_CREDS,<br>
> PARASITE_CMD_DRAIN_FDS,<br>
> PARASITE_CMD_GET_PROC_FD,<br>
> @@ -162,6 +163,11 @@ struct parasite_dump_misc {<br>
> int dumpable;<br>
> };<br>
><br>
> +struct parasite_dump_aio_ring {<br>
> +unsigned long start;<br>
</div></div>> +unsigned long len;<br>
> +};<br>
> +<br>
> #define PARASITE_MAX_GROUPS(PAGE_SIZE / sizeof(unsigned int) - 2 * sizeof(unsigned))<br>
<div class="">><br>
> struct parasite_dump_creds {<br>
> diff --git a/parasite-syscall.c b/parasite-syscall.c<br>
> index f26503a..2d0352e 100644<br>
> --- a/parasite-syscall.c<br>
> +++ b/parasite-syscall.c<br>
> @@ -718,6 +718,26 @@ int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_mis<br>
> return 0;<br>
> }<br>
><br>
> +int parasite_dump_aio_ring(struct parasite_ctl *ctl, struct vm_area_list *vma_area_list)<br>
> +{<br>
> +struct parasite_dump_aio_ring *args;<br>
> +struct vma_area *vma;<br>
> +<br>
</div>> +args = parasite_args(ctl, struct parasite_dump_aio_ring);<br>
<div class="">> +<br>
> +list_for_each_entry(vma, &vma_area_list->h, list) {<br>
</div>> +if (vma_area_is(vma, VMA_AREA_REGULAR) && vma_area_is(vma, VMA_AREA_AIO)) {<br>
> +args->start = vma->e->start;<br>
<div class="">> + args->len = vma_area_len(vma);<br>
> +if (parasite_execute_daemon(PARASITE_CMD_DUMP_AIO_RING, ctl) < 0) {<br>
</div>> +pr_err("aio: Failed to dump aio ring");<br>
<div class="">> +return -1;<br>
> +}<br>
> +}<br>
> +}<br>
</div>> +return 0;<br>
<div><div class="h5">> +}<br>
> +<br>
> struct parasite_tty_args *parasite_dump_tty(struct parasite_ctl *ctl, int fd)<br>
> {<br>
> struct parasite_tty_args *p;<br>
> diff --git a/pie/parasite.c b/pie/parasite.c<br>
> index 92e7708..11a5c86 100644<br>
> --- a/pie/parasite.c<br>
> +++ b/pie/parasite.c<br>
> @@ -175,6 +175,50 @@ static int dump_misc(struct parasite_dump_misc *args)<br>
> return dump_thread_common(&args->ti);<br>
> }<br>
><br>
> +struct aio_ring {<br>
> + unsigned id; /* kernel internal index number */<br>
> + unsigned nr; /* number of io_events */<br>
> + unsigned head; /* Written to by userland or under ring_lock<br>
> + * mutex by aio_read_events_ring(). */<br>
> + unsigned tail;<br>
> +<br>
> + unsigned magic;<br>
> + unsigned compat_features;<br>
> + unsigned incompat_features;<br>
> + unsigned header_length; /* size of aio_ring */<br>
> +<br>
> +<br>
> + void *io_events;<br>
> +};<br>
> +<br>
> +#define AIO_RING_MAGIC 0xa10a10a1<br>
> +<br>
> +static int dump_aio_ring(struct parasite_dump_aio_ring *args)<br>
> +{<br>
</div></div>> +struct aio_ring *ar = 0;<br>
<div class="">> +<br>
> +if(sizeof(struct aio_ring) > args->len) {<br>
</div>> +pr_err("Size of struct aio_ring is greater than given vma area size\n");<br>
> +return -1;<br>
> +}<br>
> +<br>
> +ar = (struct aio_ring *)args->start;<br>
<div class="">> +<br>
> +if(AIO_RING_MAGIC != ar->magic) {<br>
</div>> +pr_err("Wrong magic number in aio structure, value - %x\n", ar->magic);<br>
<div class="">> +return -1;<br>
> +}<br>
> +<br>
> +if(ar->nr != 0) {<br>
</div>> +pr_err("Non empty aio ring, %x events\n", ar->nr);<br>
> +return -1;<br>
> +}<br>
> +<br>
> +pr_info("aio ring is empty\n");<br>
> +<br>
> +return 0;<br>
<div><div class="h5">> +}<br>
> +<br>
> static int dump_creds(struct parasite_dump_creds *args)<br>
> {<br>
> int ret;<br>
> @@ -470,6 +514,9 @@ static noinline __used int noinline parasite_daemon(void *args)<br>
> case PARASITE_CMD_DUMP_MISC:<br>
> ret = dump_misc(args);<br>
> break;<br>
> +case PARASITE_CMD_DUMP_AIO_RING:<br>
> +ret = dump_aio_ring(args);<br>
> +break;<br>
> case PARASITE_CMD_DUMP_CREDS:<br>
> ret = dump_creds(args);<br>
> break;<br>
> @@ -539,6 +586,8 @@ static noinline __used int parasite_init_daemon(void *data)<br>
> if (ret < 0) {<br>
> pr_err("Can't connect the control socket\n");<br>
> goto err;<br>
> +<br>
> +<br>
> }<br>
><br>
> ret = recv_fd(tsock);<br>
> diff --git a/proc_parse.c b/proc_parse.c<br>
> index 271d46b..9f38916 100644<br>
> --- a/proc_parse.c<br>
> +++ b/proc_parse.c<br>
> @@ -428,6 +428,8 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, bool use_map_file<br>
> #endif<br>
> } else if (strstr(buf, "[heap]")) {<br>
> vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_HEAP;<br>
> +} else if (strstr(buf, "/[aio]")) {<br>
</div></div>> +vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_AIO;<br>
<div class="">> } else {<br>
> vma_area->e->status = VMA_AREA_REGULAR;<br>
> }<br>
> @@ -436,6 +438,8 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, bool use_map_file<br>
> * Some mapping hints for restore, we save this on<br>
> * disk and restore might need to analyze it.<br>
> */<br>
> +if(vma_area_is(vma_area, VMA_AREA_AIO))<br>
> +continue;<br>
> if (vma_area->file_borrowed) {<br>
> struct vma_area *prev = prev_vfi.vma;<br>
><br>
> --<br>
> 1.9.3<br>
><br>
><br>
</div>> _______________________________________________<br>
> CRIU mailing list<br>
> <a href="mailto:CRIU@openvz.org">CRIU@openvz.org</a><br>
> <a href="https://lists.openvz.org/mailman/listinfo/criu" target="_blank">https://lists.openvz.org/mailman/listinfo/criu</a><br>
><br>
<br>
</blockquote></div><br></div></div>