[CRIU] [PATCH] Check for empty aio ring in vma mapping

Pavel Emelyanov xemul at parallels.com
Fri Aug 15 07:00:53 PDT 2014


On 08/15/2014 05:49 PM, Николай Сергеевич Плотник wrote:
> From eb561addda9a216ab8875354c6f14d7741ee898f Mon Sep 17 00:00:00 2001
> From: Nikolay Plotnik <nikolay.plotnik at phystech.edu <mailto:nikolay.plotnik at phystech.edu>>
> Date: Fri, 15 Aug 2014 14:59:54 +0400
> Subject: [PATCH] Add check for empty aio ring in vma mapping
> 
> Some applications like mysql database use native linux aio with mappings for aio ring.
> This patch checks if process with such mappings can be safety migrated(in case of empty
> aio ring).

How will this thing get mapped back on restore?

> Signed-off-by: Nikolay S. Plotnik <nikolay.plotnik at phystech.edu <mailto:nikolay.plotnik at phystech.edu>>
> 
> ---
>  cr-dump.c                  |  9 +++++++++
>  include/image.h            |  2 ++
>  include/parasite-syscall.h |  1 +
>  include/parasite.h         |  6 ++++++
>  parasite-syscall.c         | 20 +++++++++++++++++++
>  pie/parasite.c             | 49 ++++++++++++++++++++++++++++++++++++++++++++++
>  proc_parse.c               |  4 ++++
>  7 files changed, 91 insertions(+)
> 
> diff --git a/cr-dump.c b/cr-dump.c
> index 1700d9d..e8eee6b 100644
> --- a/cr-dump.c
> +++ b/cr-dump.c
> @@ -102,6 +102,9 @@ bool privately_dump_vma(struct vma_area *vma)
>  if (vma_area_is(vma, VMA_ANON_SHARED))
>  return false;
>  
> +if(vma_area_is(vma, VMA_AREA_AIO))
> +return false;
> +
>  if (!vma_area_is(vma, VMA_ANON_PRIVATE) &&
>  !vma_area_is(vma, VMA_FILE_PRIVATE)) {
>  pr_warn("Unexpected VMA area found\n");
> @@ -1410,6 +1413,12 @@ static int pre_dump_one_task(struct pstree_item *item, struct list_head *ctls)
>  goto err_cure;
>  }
>  
> +ret = parasite_dump_aio_ring(parasite_ctl, &vmas);
> +if(ret) {
> +pr_err("Can't dump aio ring (pid: %d)\n", pid);
> +goto err_cure;
> +}
> +
>  ret = predump_task_files(pid);
>  if (ret) {
>  pr_err("Pre-dumping files failed (pid: %d)\n", pid);
> diff --git a/include/image.h b/include/image.h
> index 477f339..8399e85 100644
> --- a/include/image.h
> +++ b/include/image.h
> @@ -53,6 +53,8 @@
>  #define VMA_AREA_SOCKET(1 <<  11)
>  #define VMA_AREA_VVAR(1 <<  12)
>  
> +#define VMA_AREA_AIO(1 <<  13)
> +
>  #define VMA_UNSUPP(1 <<  31)/* Unsupported VMA */
>  
>  #define CR_CAP_SIZE2
> diff --git a/include/parasite-syscall.h b/include/parasite-syscall.h
> index 71534da..338dacd 100644
> --- a/include/parasite-syscall.h
> +++ b/include/parasite-syscall.h
> @@ -83,6 +83,7 @@ extern int __parasite_wait_daemon_ack(unsigned int cmd,
>       struct parasite_ctl *ctl);
>  
>  extern int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_misc *misc);
> +extern int parasite_dump_aio_ring(struct parasite_ctl *ctl, struct vm_area_list *vma_area_list);
>  extern int parasite_dump_creds(struct parasite_ctl *ctl, struct _CredsEntry *ce);
>  extern int parasite_dump_thread_seized(struct parasite_ctl *ctl, int id,
>  struct pid *tid, struct _CoreEntry *core);
> diff --git a/include/parasite.h b/include/parasite.h
> index eaabc01..40f7bff 100644
> --- a/include/parasite.h
> +++ b/include/parasite.h
> @@ -42,6 +42,7 @@ enum {
>  PARASITE_CMD_DUMP_ITIMERS,
>  PARASITE_CMD_DUMP_POSIX_TIMERS,
>  PARASITE_CMD_DUMP_MISC,
> +PARASITE_CMD_DUMP_AIO_RING,
>  PARASITE_CMD_DUMP_CREDS,
>  PARASITE_CMD_DRAIN_FDS,
>  PARASITE_CMD_GET_PROC_FD,
> @@ -162,6 +163,11 @@ struct parasite_dump_misc {
>  int dumpable;
>  };
>  
> +struct parasite_dump_aio_ring {
> +unsigned long   start;
> +unsigned long   len;
> +};
> +
>  #define PARASITE_MAX_GROUPS(PAGE_SIZE / sizeof(unsigned int) - 2 * sizeof(unsigned))
>  
>  struct parasite_dump_creds {
> diff --git a/parasite-syscall.c b/parasite-syscall.c
> index f26503a..2d0352e 100644
> --- a/parasite-syscall.c
> +++ b/parasite-syscall.c
> @@ -718,6 +718,26 @@ int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_mis
>  return 0;
>  }
>  
> +int parasite_dump_aio_ring(struct parasite_ctl *ctl, struct vm_area_list *vma_area_list)
> +{
> +struct parasite_dump_aio_ring *args;
> +struct vma_area *vma;
> +
> +args = parasite_args(ctl, struct parasite_dump_aio_ring);
> +
> +list_for_each_entry(vma, &vma_area_list->h, list) {
> +if (vma_area_is(vma, VMA_AREA_REGULAR) && vma_area_is(vma, VMA_AREA_AIO)) {
> +args->start = vma->e->start;
> +               args->len = vma_area_len(vma);
> +if (parasite_execute_daemon(PARASITE_CMD_DUMP_AIO_RING, ctl) < 0) {
> +pr_err("aio: Failed to dump aio ring");
> +return -1;
> +}
> +}
> +}
> +return 0;
> +}
> +
>  struct parasite_tty_args *parasite_dump_tty(struct parasite_ctl *ctl, int fd)
>  {
>  struct parasite_tty_args *p;
> diff --git a/pie/parasite.c b/pie/parasite.c
> index 92e7708..11a5c86 100644
> --- a/pie/parasite.c
> +++ b/pie/parasite.c
> @@ -175,6 +175,50 @@ static int dump_misc(struct parasite_dump_misc *args)
>  return dump_thread_common(&args->ti);
>  }
>  
> +struct aio_ring {
> +        unsigned        id;     /* kernel internal index number */
> +        unsigned        nr;     /* number of io_events */
> +        unsigned        head;   /* Written to by userland or under ring_lock
> +                                 * mutex by aio_read_events_ring(). */
> +        unsigned        tail;
> +
> +        unsigned        magic;
> +        unsigned        compat_features;
> +        unsigned        incompat_features;
> +        unsigned        header_length;  /* size of aio_ring */
> +
> +
> +        void            *io_events;
> +};
> +
> +#define AIO_RING_MAGIC  0xa10a10a1
> +
> +static int dump_aio_ring(struct parasite_dump_aio_ring *args)
> +{
> +struct aio_ring  *ar = 0;
> +
> +if(sizeof(struct aio_ring) > args->len) {
> +pr_err("Size of struct aio_ring is greater than given vma area size\n");
> +return -1;
> +}
> +
> +ar = (struct aio_ring *)args->start;
> +
> +if(AIO_RING_MAGIC != ar->magic) {
> +pr_err("Wrong magic number in aio structure, value - %x\n", ar->magic);
> +return -1;
> +}
> +
> +if(ar->nr != 0) {
> +pr_err("Non empty aio ring, %x events\n", ar->nr);
> +return -1;
> +}
> +
> +pr_info("aio ring is empty\n");
> +
> +return 0;
> +}
> +
>  static int dump_creds(struct parasite_dump_creds *args)
>  {
>  int ret;
> @@ -470,6 +514,9 @@ static noinline __used int noinline parasite_daemon(void *args)
>  case PARASITE_CMD_DUMP_MISC:
>  ret = dump_misc(args);
>  break;
> +case PARASITE_CMD_DUMP_AIO_RING:
> +ret = dump_aio_ring(args);
> +break;
>  case PARASITE_CMD_DUMP_CREDS:
>  ret = dump_creds(args);
>  break;
> @@ -539,6 +586,8 @@ static noinline __used int parasite_init_daemon(void *data)
>  if (ret < 0) {
>  pr_err("Can't connect the control socket\n");
>  goto err;
> +
> +
>  }
>  
>  ret = recv_fd(tsock);
> diff --git a/proc_parse.c b/proc_parse.c
> index 271d46b..9f38916 100644
> --- a/proc_parse.c
> +++ b/proc_parse.c
> @@ -428,6 +428,8 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, bool use_map_file
>  #endif
>  } else if (strstr(buf, "[heap]")) {
>  vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_HEAP;
> +} else if (strstr(buf, "/[aio]")) {
> +vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_AIO;
>  } else {
>  vma_area->e->status = VMA_AREA_REGULAR;
>  }
> @@ -436,6 +438,8 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, bool use_map_file
>  * Some mapping hints for restore, we save this on
>  * disk and restore might need to analyze it.
>  */
> +if(vma_area_is(vma_area, VMA_AREA_AIO))
> +continue;
>  if (vma_area->file_borrowed) {
>  struct vma_area *prev = prev_vfi.vma;
>  
> -- 
> 1.9.3
> 
> 
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
> 



More information about the CRIU mailing list