[CRIU] [PATCH] Check for empty aio ring in vma mapping

Николай Сергеевич Плотник nikolay.plotnik at phystech.edu
Fri Aug 15 07:38:13 PDT 2014


On Fri, Aug 15, 2014 at 6:00 PM, Pavel Emelyanov <xemul at parallels.com>
wrote:

> On 08/15/2014 05:49 PM, Николай Сергеевич Плотник wrote:
> > From eb561addda9a216ab8875354c6f14d7741ee898f Mon Sep 17 00:00:00 2001
> > From: Nikolay Plotnik <nikolay.plotnik at phystech.edu <mailto:
> nikolay.plotnik at phystech.edu>>
> > Date: Fri, 15 Aug 2014 14:59:54 +0400
> > Subject: [PATCH] Add check for empty aio ring in vma mapping
> >
> > Some applications like mysql database use native linux aio with mappings
> for aio ring.
> > This patch checks if process with such mappings can be safety
> migrated(in case of empty
> > aio ring).
>
> How will this thing get mapped back on restore?
>

Great notice. This patch doesn't include restore part. I will work on it.


> > Signed-off-by: Nikolay S. Plotnik <nikolay.plotnik at phystech.edu <mailto:
> nikolay.plotnik at phystech.edu>>
> >
> > ---
> >  cr-dump.c                  |  9 +++++++++
> >  include/image.h            |  2 ++
> >  include/parasite-syscall.h |  1 +
> >  include/parasite.h         |  6 ++++++
> >  parasite-syscall.c         | 20 +++++++++++++++++++
> >  pie/parasite.c             | 49
> ++++++++++++++++++++++++++++++++++++++++++++++
> >  proc_parse.c               |  4 ++++
> >  7 files changed, 91 insertions(+)
> >
> > diff --git a/cr-dump.c b/cr-dump.c
> > index 1700d9d..e8eee6b 100644
> > --- a/cr-dump.c
> > +++ b/cr-dump.c
> > @@ -102,6 +102,9 @@ bool privately_dump_vma(struct vma_area *vma)
> >  if (vma_area_is(vma, VMA_ANON_SHARED))
> >  return false;
> >
> > +if(vma_area_is(vma, VMA_AREA_AIO))
> > +return false;
> > +
> >  if (!vma_area_is(vma, VMA_ANON_PRIVATE) &&
> >  !vma_area_is(vma, VMA_FILE_PRIVATE)) {
> >  pr_warn("Unexpected VMA area found\n");
> > @@ -1410,6 +1413,12 @@ static int pre_dump_one_task(struct pstree_item
> *item, struct list_head *ctls)
> >  goto err_cure;
> >  }
> >
> > +ret = parasite_dump_aio_ring(parasite_ctl, &vmas);
> > +if(ret) {
> > +pr_err("Can't dump aio ring (pid: %d)\n", pid);
> > +goto err_cure;
> > +}
> > +
> >  ret = predump_task_files(pid);
> >  if (ret) {
> >  pr_err("Pre-dumping files failed (pid: %d)\n", pid);
> > diff --git a/include/image.h b/include/image.h
> > index 477f339..8399e85 100644
> > --- a/include/image.h
> > +++ b/include/image.h
> > @@ -53,6 +53,8 @@
> >  #define VMA_AREA_SOCKET(1 <<  11)
> >  #define VMA_AREA_VVAR(1 <<  12)
> >
> > +#define VMA_AREA_AIO(1 <<  13)
> > +
> >  #define VMA_UNSUPP(1 <<  31)/* Unsupported VMA */
> >
> >  #define CR_CAP_SIZE2
> > diff --git a/include/parasite-syscall.h b/include/parasite-syscall.h
> > index 71534da..338dacd 100644
> > --- a/include/parasite-syscall.h
> > +++ b/include/parasite-syscall.h
> > @@ -83,6 +83,7 @@ extern int __parasite_wait_daemon_ack(unsigned int cmd,
> >       struct parasite_ctl *ctl);
> >
> >  extern int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct
> parasite_dump_misc *misc);
> > +extern int parasite_dump_aio_ring(struct parasite_ctl *ctl, struct
> vm_area_list *vma_area_list);
> >  extern int parasite_dump_creds(struct parasite_ctl *ctl, struct
> _CredsEntry *ce);
> >  extern int parasite_dump_thread_seized(struct parasite_ctl *ctl, int id,
> >  struct pid *tid, struct _CoreEntry *core);
> > diff --git a/include/parasite.h b/include/parasite.h
> > index eaabc01..40f7bff 100644
> > --- a/include/parasite.h
> > +++ b/include/parasite.h
> > @@ -42,6 +42,7 @@ enum {
> >  PARASITE_CMD_DUMP_ITIMERS,
> >  PARASITE_CMD_DUMP_POSIX_TIMERS,
> >  PARASITE_CMD_DUMP_MISC,
> > +PARASITE_CMD_DUMP_AIO_RING,
> >  PARASITE_CMD_DUMP_CREDS,
> >  PARASITE_CMD_DRAIN_FDS,
> >  PARASITE_CMD_GET_PROC_FD,
> > @@ -162,6 +163,11 @@ struct parasite_dump_misc {
> >  int dumpable;
> >  };
> >
> > +struct parasite_dump_aio_ring {
> > +unsigned long   start;
> > +unsigned long   len;
> > +};
> > +
> >  #define PARASITE_MAX_GROUPS(PAGE_SIZE / sizeof(unsigned int) - 2 *
> sizeof(unsigned))
> >
> >  struct parasite_dump_creds {
> > diff --git a/parasite-syscall.c b/parasite-syscall.c
> > index f26503a..2d0352e 100644
> > --- a/parasite-syscall.c
> > +++ b/parasite-syscall.c
> > @@ -718,6 +718,26 @@ int parasite_dump_misc_seized(struct parasite_ctl
> *ctl, struct parasite_dump_mis
> >  return 0;
> >  }
> >
> > +int parasite_dump_aio_ring(struct parasite_ctl *ctl, struct
> vm_area_list *vma_area_list)
> > +{
> > +struct parasite_dump_aio_ring *args;
> > +struct vma_area *vma;
> > +
> > +args = parasite_args(ctl, struct parasite_dump_aio_ring);
> > +
> > +list_for_each_entry(vma, &vma_area_list->h, list) {
> > +if (vma_area_is(vma, VMA_AREA_REGULAR) && vma_area_is(vma,
> VMA_AREA_AIO)) {
> > +args->start = vma->e->start;
> > +               args->len = vma_area_len(vma);
> > +if (parasite_execute_daemon(PARASITE_CMD_DUMP_AIO_RING, ctl) < 0) {
> > +pr_err("aio: Failed to dump aio ring");
> > +return -1;
> > +}
> > +}
> > +}
> > +return 0;
> > +}
> > +
> >  struct parasite_tty_args *parasite_dump_tty(struct parasite_ctl *ctl,
> int fd)
> >  {
> >  struct parasite_tty_args *p;
> > diff --git a/pie/parasite.c b/pie/parasite.c
> > index 92e7708..11a5c86 100644
> > --- a/pie/parasite.c
> > +++ b/pie/parasite.c
> > @@ -175,6 +175,50 @@ static int dump_misc(struct parasite_dump_misc
> *args)
> >  return dump_thread_common(&args->ti);
> >  }
> >
> > +struct aio_ring {
> > +        unsigned        id;     /* kernel internal index number */
> > +        unsigned        nr;     /* number of io_events */
> > +        unsigned        head;   /* Written to by userland or under
> ring_lock
> > +                                 * mutex by aio_read_events_ring(). */
> > +        unsigned        tail;
> > +
> > +        unsigned        magic;
> > +        unsigned        compat_features;
> > +        unsigned        incompat_features;
> > +        unsigned        header_length;  /* size of aio_ring */
> > +
> > +
> > +        void            *io_events;
> > +};
> > +
> > +#define AIO_RING_MAGIC  0xa10a10a1
> > +
> > +static int dump_aio_ring(struct parasite_dump_aio_ring *args)
> > +{
> > +struct aio_ring  *ar = 0;
> > +
> > +if(sizeof(struct aio_ring) > args->len) {
> > +pr_err("Size of struct aio_ring is greater than given vma area size\n");
> > +return -1;
> > +}
> > +
> > +ar = (struct aio_ring *)args->start;
> > +
> > +if(AIO_RING_MAGIC != ar->magic) {
> > +pr_err("Wrong magic number in aio structure, value - %x\n", ar->magic);
> > +return -1;
> > +}
> > +
> > +if(ar->nr != 0) {
> > +pr_err("Non empty aio ring, %x events\n", ar->nr);
> > +return -1;
> > +}
> > +
> > +pr_info("aio ring is empty\n");
> > +
> > +return 0;
> > +}
> > +
> >  static int dump_creds(struct parasite_dump_creds *args)
> >  {
> >  int ret;
> > @@ -470,6 +514,9 @@ static noinline __used int noinline
> parasite_daemon(void *args)
> >  case PARASITE_CMD_DUMP_MISC:
> >  ret = dump_misc(args);
> >  break;
> > +case PARASITE_CMD_DUMP_AIO_RING:
> > +ret = dump_aio_ring(args);
> > +break;
> >  case PARASITE_CMD_DUMP_CREDS:
> >  ret = dump_creds(args);
> >  break;
> > @@ -539,6 +586,8 @@ static noinline __used int parasite_init_daemon(void
> *data)
> >  if (ret < 0) {
> >  pr_err("Can't connect the control socket\n");
> >  goto err;
> > +
> > +
> >  }
> >
> >  ret = recv_fd(tsock);
> > diff --git a/proc_parse.c b/proc_parse.c
> > index 271d46b..9f38916 100644
> > --- a/proc_parse.c
> > +++ b/proc_parse.c
> > @@ -428,6 +428,8 @@ int parse_smaps(pid_t pid, struct vm_area_list
> *vma_area_list, bool use_map_file
> >  #endif
> >  } else if (strstr(buf, "[heap]")) {
> >  vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_HEAP;
> > +} else if (strstr(buf, "/[aio]")) {
> > +vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_AIO;
> >  } else {
> >  vma_area->e->status = VMA_AREA_REGULAR;
> >  }
> > @@ -436,6 +438,8 @@ int parse_smaps(pid_t pid, struct vm_area_list
> *vma_area_list, bool use_map_file
> >  * Some mapping hints for restore, we save this on
> >  * disk and restore might need to analyze it.
> >  */
> > +if(vma_area_is(vma_area, VMA_AREA_AIO))
> > +continue;
> >  if (vma_area->file_borrowed) {
> >  struct vma_area *prev = prev_vfi.vma;
> >
> > --
> > 1.9.3
> >
> >
> > _______________________________________________
> > CRIU mailing list
> > CRIU at openvz.org
> > https://lists.openvz.org/mailman/listinfo/criu
> >
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openvz.org/pipermail/criu/attachments/20140815/8b814f58/attachment-0001.html>


More information about the CRIU mailing list