<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Fri, Aug 15, 2014 at 6:00 PM, Pavel Emelyanov <span dir="ltr">&lt;<a href="mailto:xemul@parallels.com" target="_blank">xemul@parallels.com</a>&gt;</span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="">On 08/15/2014 05:49 PM, Николай Сергеевич Плотник wrote:<br>
&gt; From eb561addda9a216ab8875354c6f14d7741ee898f Mon Sep 17 00:00:00 2001<br>
</div>&gt; From: Nikolay Plotnik &lt;<a href="mailto:nikolay.plotnik@phystech.edu">nikolay.plotnik@phystech.edu</a> &lt;mailto:<a href="mailto:nikolay.plotnik@phystech.edu">nikolay.plotnik@phystech.edu</a>&gt;&gt;<br>
<div class="">&gt; Date: Fri, 15 Aug 2014 14:59:54 +0400<br>
&gt; Subject: [PATCH] Add check for empty aio ring in vma mapping<br>
&gt;<br>
&gt; Some applications like mysql database use native linux aio with mappings for aio ring.<br>
&gt; This patch checks if process with such mappings can be safety migrated(in case of empty<br>
&gt; aio ring).<br>
<br>
</div>How will this thing get mapped back on restore?<br></blockquote><div> </div><div>Great notice. This patch doesn&#39;t include restore part. I will work on it.</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<br>
&gt; Signed-off-by: Nikolay S. Plotnik &lt;<a href="mailto:nikolay.plotnik@phystech.edu">nikolay.plotnik@phystech.edu</a> &lt;mailto:<a href="mailto:nikolay.plotnik@phystech.edu">nikolay.plotnik@phystech.edu</a>&gt;&gt;<br>

<div class="">&gt;<br>
&gt; ---<br>
&gt;  cr-dump.c                  |  9 +++++++++<br>
&gt;  include/image.h            |  2 ++<br>
&gt;  include/parasite-syscall.h |  1 +<br>
&gt;  include/parasite.h         |  6 ++++++<br>
&gt;  parasite-syscall.c         | 20 +++++++++++++++++++<br>
&gt;  pie/parasite.c             | 49 ++++++++++++++++++++++++++++++++++++++++++++++<br>
&gt;  proc_parse.c               |  4 ++++<br>
&gt;  7 files changed, 91 insertions(+)<br>
&gt;<br>
&gt; diff --git a/cr-dump.c b/cr-dump.c<br>
&gt; index 1700d9d..e8eee6b 100644<br>
&gt; --- a/cr-dump.c<br>
&gt; +++ b/cr-dump.c<br>
&gt; @@ -102,6 +102,9 @@ bool privately_dump_vma(struct vma_area *vma)<br>
&gt;  if (vma_area_is(vma, VMA_ANON_SHARED))<br>
&gt;  return false;<br>
&gt;<br>
&gt; +if(vma_area_is(vma, VMA_AREA_AIO))<br>
</div>&gt; +return false;<br>
<div class="">&gt; +<br>
&gt;  if (!vma_area_is(vma, VMA_ANON_PRIVATE) &amp;&amp;<br>
&gt;  !vma_area_is(vma, VMA_FILE_PRIVATE)) {<br>
&gt;  pr_warn(&quot;Unexpected VMA area found\n&quot;);<br>
&gt; @@ -1410,6 +1413,12 @@ static int pre_dump_one_task(struct pstree_item *item, struct list_head *ctls)<br>
&gt;  goto err_cure;<br>
&gt;  }<br>
&gt;<br>
&gt; +ret = parasite_dump_aio_ring(parasite_ctl, &amp;vmas);<br>
&gt; +if(ret) {<br>
</div>&gt; +pr_err(&quot;Can&#39;t dump aio ring (pid: %d)\n&quot;, pid);<br>
&gt; +goto err_cure;<br>
<div><div class="h5">&gt; +}<br>
&gt; +<br>
&gt;  ret = predump_task_files(pid);<br>
&gt;  if (ret) {<br>
&gt;  pr_err(&quot;Pre-dumping files failed (pid: %d)\n&quot;, pid);<br>
&gt; diff --git a/include/image.h b/include/image.h<br>
&gt; index 477f339..8399e85 100644<br>
&gt; --- a/include/image.h<br>
&gt; +++ b/include/image.h<br>
&gt; @@ -53,6 +53,8 @@<br>
&gt;  #define VMA_AREA_SOCKET(1 &lt;&lt;  11)<br>
&gt;  #define VMA_AREA_VVAR(1 &lt;&lt;  12)<br>
&gt;<br>
&gt; +#define VMA_AREA_AIO(1 &lt;&lt;  13)<br>
&gt; +<br>
&gt;  #define VMA_UNSUPP(1 &lt;&lt;  31)/* Unsupported VMA */<br>
&gt;<br>
&gt;  #define CR_CAP_SIZE2<br>
&gt; diff --git a/include/parasite-syscall.h b/include/parasite-syscall.h<br>
&gt; index 71534da..338dacd 100644<br>
&gt; --- a/include/parasite-syscall.h<br>
&gt; +++ b/include/parasite-syscall.h<br>
&gt; @@ -83,6 +83,7 @@ extern int __parasite_wait_daemon_ack(unsigned int cmd,<br>
&gt;       struct parasite_ctl *ctl);<br>
&gt;<br>
&gt;  extern int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_misc *misc);<br>
&gt; +extern int parasite_dump_aio_ring(struct parasite_ctl *ctl, struct vm_area_list *vma_area_list);<br>
&gt;  extern int parasite_dump_creds(struct parasite_ctl *ctl, struct _CredsEntry *ce);<br>
&gt;  extern int parasite_dump_thread_seized(struct parasite_ctl *ctl, int id,<br>
&gt;  struct pid *tid, struct _CoreEntry *core);<br>
&gt; diff --git a/include/parasite.h b/include/parasite.h<br>
&gt; index eaabc01..40f7bff 100644<br>
&gt; --- a/include/parasite.h<br>
&gt; +++ b/include/parasite.h<br>
&gt; @@ -42,6 +42,7 @@ enum {<br>
&gt;  PARASITE_CMD_DUMP_ITIMERS,<br>
&gt;  PARASITE_CMD_DUMP_POSIX_TIMERS,<br>
&gt;  PARASITE_CMD_DUMP_MISC,<br>
&gt; +PARASITE_CMD_DUMP_AIO_RING,<br>
&gt;  PARASITE_CMD_DUMP_CREDS,<br>
&gt;  PARASITE_CMD_DRAIN_FDS,<br>
&gt;  PARASITE_CMD_GET_PROC_FD,<br>
&gt; @@ -162,6 +163,11 @@ struct parasite_dump_misc {<br>
&gt;  int dumpable;<br>
&gt;  };<br>
&gt;<br>
&gt; +struct parasite_dump_aio_ring {<br>
&gt; +unsigned long   start;<br>
</div></div>&gt; +unsigned long   len;<br>
&gt; +};<br>
&gt; +<br>
&gt;  #define PARASITE_MAX_GROUPS(PAGE_SIZE / sizeof(unsigned int) - 2 * sizeof(unsigned))<br>
<div class="">&gt;<br>
&gt;  struct parasite_dump_creds {<br>
&gt; diff --git a/parasite-syscall.c b/parasite-syscall.c<br>
&gt; index f26503a..2d0352e 100644<br>
&gt; --- a/parasite-syscall.c<br>
&gt; +++ b/parasite-syscall.c<br>
&gt; @@ -718,6 +718,26 @@ int parasite_dump_misc_seized(struct parasite_ctl *ctl, struct parasite_dump_mis<br>
&gt;  return 0;<br>
&gt;  }<br>
&gt;<br>
&gt; +int parasite_dump_aio_ring(struct parasite_ctl *ctl, struct vm_area_list *vma_area_list)<br>
&gt; +{<br>
&gt; +struct parasite_dump_aio_ring *args;<br>
&gt; +struct vma_area *vma;<br>
&gt; +<br>
</div>&gt; +args = parasite_args(ctl, struct parasite_dump_aio_ring);<br>
<div class="">&gt; +<br>
&gt; +list_for_each_entry(vma, &amp;vma_area_list-&gt;h, list) {<br>
</div>&gt; +if (vma_area_is(vma, VMA_AREA_REGULAR) &amp;&amp; vma_area_is(vma, VMA_AREA_AIO)) {<br>
&gt; +args-&gt;start = vma-&gt;e-&gt;start;<br>
<div class="">&gt; +               args-&gt;len = vma_area_len(vma);<br>
&gt; +if (parasite_execute_daemon(PARASITE_CMD_DUMP_AIO_RING, ctl) &lt; 0) {<br>
</div>&gt; +pr_err(&quot;aio: Failed to dump aio ring&quot;);<br>
<div class="">&gt; +return -1;<br>
&gt; +}<br>
&gt; +}<br>
&gt; +}<br>
</div>&gt; +return 0;<br>
<div><div class="h5">&gt; +}<br>
&gt; +<br>
&gt;  struct parasite_tty_args *parasite_dump_tty(struct parasite_ctl *ctl, int fd)<br>
&gt;  {<br>
&gt;  struct parasite_tty_args *p;<br>
&gt; diff --git a/pie/parasite.c b/pie/parasite.c<br>
&gt; index 92e7708..11a5c86 100644<br>
&gt; --- a/pie/parasite.c<br>
&gt; +++ b/pie/parasite.c<br>
&gt; @@ -175,6 +175,50 @@ static int dump_misc(struct parasite_dump_misc *args)<br>
&gt;  return dump_thread_common(&amp;args-&gt;ti);<br>
&gt;  }<br>
&gt;<br>
&gt; +struct aio_ring {<br>
&gt; +        unsigned        id;     /* kernel internal index number */<br>
&gt; +        unsigned        nr;     /* number of io_events */<br>
&gt; +        unsigned        head;   /* Written to by userland or under ring_lock<br>
&gt; +                                 * mutex by aio_read_events_ring(). */<br>
&gt; +        unsigned        tail;<br>
&gt; +<br>
&gt; +        unsigned        magic;<br>
&gt; +        unsigned        compat_features;<br>
&gt; +        unsigned        incompat_features;<br>
&gt; +        unsigned        header_length;  /* size of aio_ring */<br>
&gt; +<br>
&gt; +<br>
&gt; +        void            *io_events;<br>
&gt; +};<br>
&gt; +<br>
&gt; +#define AIO_RING_MAGIC  0xa10a10a1<br>
&gt; +<br>
&gt; +static int dump_aio_ring(struct parasite_dump_aio_ring *args)<br>
&gt; +{<br>
</div></div>&gt; +struct aio_ring  *ar = 0;<br>
<div class="">&gt; +<br>
&gt; +if(sizeof(struct aio_ring) &gt; args-&gt;len) {<br>
</div>&gt; +pr_err(&quot;Size of struct aio_ring is greater than given vma area size\n&quot;);<br>
&gt; +return -1;<br>
&gt; +}<br>
&gt; +<br>
&gt; +ar = (struct aio_ring *)args-&gt;start;<br>
<div class="">&gt; +<br>
&gt; +if(AIO_RING_MAGIC != ar-&gt;magic) {<br>
</div>&gt; +pr_err(&quot;Wrong magic number in aio structure, value - %x\n&quot;, ar-&gt;magic);<br>
<div class="">&gt; +return -1;<br>
&gt; +}<br>
&gt; +<br>
&gt; +if(ar-&gt;nr != 0) {<br>
</div>&gt; +pr_err(&quot;Non empty aio ring, %x events\n&quot;, ar-&gt;nr);<br>
&gt; +return -1;<br>
&gt; +}<br>
&gt; +<br>
&gt; +pr_info(&quot;aio ring is empty\n&quot;);<br>
&gt; +<br>
&gt; +return 0;<br>
<div><div class="h5">&gt; +}<br>
&gt; +<br>
&gt;  static int dump_creds(struct parasite_dump_creds *args)<br>
&gt;  {<br>
&gt;  int ret;<br>
&gt; @@ -470,6 +514,9 @@ static noinline __used int noinline parasite_daemon(void *args)<br>
&gt;  case PARASITE_CMD_DUMP_MISC:<br>
&gt;  ret = dump_misc(args);<br>
&gt;  break;<br>
&gt; +case PARASITE_CMD_DUMP_AIO_RING:<br>
&gt; +ret = dump_aio_ring(args);<br>
&gt; +break;<br>
&gt;  case PARASITE_CMD_DUMP_CREDS:<br>
&gt;  ret = dump_creds(args);<br>
&gt;  break;<br>
&gt; @@ -539,6 +586,8 @@ static noinline __used int parasite_init_daemon(void *data)<br>
&gt;  if (ret &lt; 0) {<br>
&gt;  pr_err(&quot;Can&#39;t connect the control socket\n&quot;);<br>
&gt;  goto err;<br>
&gt; +<br>
&gt; +<br>
&gt;  }<br>
&gt;<br>
&gt;  ret = recv_fd(tsock);<br>
&gt; diff --git a/proc_parse.c b/proc_parse.c<br>
&gt; index 271d46b..9f38916 100644<br>
&gt; --- a/proc_parse.c<br>
&gt; +++ b/proc_parse.c<br>
&gt; @@ -428,6 +428,8 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, bool use_map_file<br>
&gt;  #endif<br>
&gt;  } else if (strstr(buf, &quot;[heap]&quot;)) {<br>
&gt;  vma_area-&gt;e-&gt;status |= VMA_AREA_REGULAR | VMA_AREA_HEAP;<br>
&gt; +} else if (strstr(buf, &quot;/[aio]&quot;)) {<br>
</div></div>&gt; +vma_area-&gt;e-&gt;status |= VMA_AREA_REGULAR | VMA_AREA_AIO;<br>
<div class="">&gt;  } else {<br>
&gt;  vma_area-&gt;e-&gt;status = VMA_AREA_REGULAR;<br>
&gt;  }<br>
&gt; @@ -436,6 +438,8 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, bool use_map_file<br>
&gt;  * Some mapping hints for restore, we save this on<br>
&gt;  * disk and restore might need to analyze it.<br>
&gt;  */<br>
&gt; +if(vma_area_is(vma_area, VMA_AREA_AIO))<br>
&gt; +continue;<br>
&gt;  if (vma_area-&gt;file_borrowed) {<br>
&gt;  struct vma_area *prev = prev_vfi.vma;<br>
&gt;<br>
&gt; --<br>
&gt; 1.9.3<br>
&gt;<br>
&gt;<br>
</div>&gt; _______________________________________________<br>
&gt; CRIU mailing list<br>
&gt; <a href="mailto:CRIU@openvz.org">CRIU@openvz.org</a><br>
&gt; <a href="https://lists.openvz.org/mailman/listinfo/criu" target="_blank">https://lists.openvz.org/mailman/listinfo/criu</a><br>
&gt;<br>
<br>
</blockquote></div><br></div></div>