<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">2013/8/28 Pavel Emelyanov <span dir="ltr">&lt;<a href="mailto:xemul@parallels.com" target="_blank">xemul@parallels.com</a>&gt;</span><br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<div class="HOEnZb"><div class="h5">On 08/26/2013 05:15 PM, Andrey Vagin wrote:<br>
&gt; In /proc/pid/maps grow-down VMA-s are shown without guard pages, but<br>
&gt; sometime these &quot;guard&quot; pages can contain usefull data. For example if<br>
&gt; a real guard page has been remmaped by another VMA. Let&#39;s call such<br>
&gt; pages as fake guard pages.<br>
&gt;<br>
&gt; So when a grow-down VMA is mmaped on restore, it should be mapped with<br>
&gt; one more guard page to restore content of the fake guard page.<br>
&gt;<br>
&gt; <a href="https://bugzilla.openvz.org/show_bug.cgi?id=2715" target="_blank">https://bugzilla.openvz.org/show_bug.cgi?id=2715</a><br>
&gt; Signed-off-by: Andrey Vagin &lt;<a href="mailto:avagin@openvz.org">avagin@openvz.org</a>&gt;<br>
&gt; ---<br>
&gt;  cr-restore.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++-------<br>
&gt;  1 file changed, 53 insertions(+), 7 deletions(-)<br>
&gt;<br>
&gt; diff --git a/cr-restore.c b/cr-restore.c<br>
&gt; index 14bf300..8fea9ee 100644<br>
&gt; --- a/cr-restore.c<br>
&gt; +++ b/cr-restore.c<br>
&gt; @@ -207,7 +207,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,<br>
&gt;  {<br>
&gt;       int ret;<br>
&gt;       void *addr, *paddr = NULL;<br>
&gt; -     unsigned long nr_pages;<br>
&gt; +     unsigned long nr_pages, size;<br>
&gt;       struct vma_area *p = *pvma;<br>
&gt;<br>
&gt;       if (vma_entry_is(&amp;vma-&gt;vma, VMA_FILE_PRIVATE)) {<br>
&gt; @@ -242,6 +242,17 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,<br>
&gt;<br>
&gt;       *pvma = p;<br>
&gt;<br>
&gt; +     /*<br>
&gt; +      * A grow-down VMA has a guard page, which protect a VMA below it.<br>
&gt; +      * So one more page is mapped here to restore content of the first page<br>
&gt; +      */<br>
&gt; +     if (vma-&gt;vma.flags &amp; MAP_GROWSDOWN) {<br>
&gt; +             vma-&gt;vma.start -= PAGE_SIZE;<br>
&gt; +             if (paddr)<br>
&gt; +                     paddr -= PAGE_SIZE;<br>
&gt; +     }<br>
&gt; +<br>
&gt; +     size = vma_entry_len(&amp;vma-&gt;vma);<br>
&gt;       if (paddr == NULL) {<br>
&gt;               /*<br>
&gt;                * The respective memory area was NOT found in the parent.<br>
&gt; @@ -250,7 +261,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,<br>
&gt;               pr_info(&quot;Map 0x%016&quot;PRIx64&quot;-0x%016&quot;PRIx64&quot; 0x%016&quot;PRIx64&quot; vma\n&quot;,<br>
&gt;                       vma-&gt;vma.start, vma-&gt;vma.end, vma-&gt;vma.pgoff);<br>
&gt;<br>
&gt; -             addr = mmap(tgt_addr, vma_entry_len(&amp;vma-&gt;vma),<br>
&gt; +             addr = mmap(tgt_addr, size,<br>
&gt;                               vma-&gt;vma.prot | PROT_WRITE,<br>
&gt;                               vma-&gt;vma.flags | MAP_FIXED,<br>
&gt;                               vma-&gt;vma.fd, vma-&gt;vma.pgoff);<br>
&gt; @@ -266,7 +277,7 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,<br>
&gt;                */<br>
&gt;               vma-&gt;ppage_bitmap = p-&gt;page_bitmap;<br>
&gt;<br>
&gt; -             addr = mremap(paddr, vma_area_len(vma), vma_area_len(vma),<br>
&gt; +             addr = mremap(paddr, size, size,<br>
&gt;                               MREMAP_FIXED | MREMAP_MAYMOVE, tgt_addr);<br>
&gt;               if (addr != tgt_addr) {<br>
&gt;                       pr_perror(&quot;Unable to remap a private vma&quot;);<br>
&gt; @@ -279,10 +290,15 @@ static int map_private_vma(pid_t pid, struct vma_area *vma, void *tgt_addr,<br>
&gt;       pr_debug(&quot;\tpremap 0x%016&quot;PRIx64&quot;-0x%016&quot;PRIx64&quot; -&gt; %016lx\n&quot;,<br>
&gt;               vma-&gt;vma.start, vma-&gt;vma.end, (unsigned long)addr);<br>
&gt;<br>
&gt; +     if (vma-&gt;vma.flags &amp; MAP_GROWSDOWN) { /* Skip gurad page */<br>
&gt; +             vma-&gt;vma.start += PAGE_SIZE;<br>
&gt; +             vma_premmaped_start(&amp;vma-&gt;vma) += PAGE_SIZE;<br>
&gt; +     }<br>
&gt; +<br>
&gt;       if (vma_entry_is(&amp;vma-&gt;vma, VMA_FILE_PRIVATE))<br>
&gt;               close(vma-&gt;vma.fd);<br>
&gt;<br>
&gt; -     return 0;<br>
&gt; +     return size;<br>
&gt;  }<br>
&gt;<br>
&gt;  static int restore_priv_vma_content(pid_t pid)<br>
&gt; @@ -475,8 +491,11 @@ static int prepare_mappings(int pid)<br>
&gt;                       break;<br>
&gt;               }<br>
&gt;<br>
&gt; -             if (vma_priv(&amp;vma-&gt;vma))<br>
&gt; +             if (vma_priv(&amp;vma-&gt;vma)) {<br>
&gt;                       rst_vmas.priv_size += vma_area_len(vma);<br>
&gt; +                     if (vma-&gt;vma.flags &amp; MAP_GROWSDOWN)<br>
&gt; +                             rst_vmas.priv_size += PAGE_SIZE;<br>
&gt; +             }<br>
&gt;       }<br>
&gt;       close(fd);<br>
&gt;<br>
&gt; @@ -512,10 +531,10 @@ static int prepare_mappings(int pid)<br>
&gt;               if (ret &lt; 0)<br>
&gt;                       break;<br>
&gt;<br>
&gt; -             addr += vma_area_len(vma);<br>
&gt; +             addr += ret;<br>
&gt;       }<br>
&gt;<br>
&gt; -     if (ret == 0)<br>
&gt; +     if (ret &gt;= 0)<br>
&gt;               ret = restore_priv_vma_content(pid);<br>
&gt;<br>
&gt;  out:<br>
&gt; @@ -536,6 +555,31 @@ out:<br>
&gt;       return ret;<br>
&gt;  }<br>
&gt;<br>
&gt; +/*<br>
&gt; + * A gard page must be unmapped after restoring content and<br>
&gt; + * forking children to restore COW memory.<br>
&gt; + */<br>
&gt; +int unmap_guard_pages()<br>
&gt; +{<br>
&gt; +     struct vma_area *vma;<br>
&gt; +<br>
&gt; +     list_for_each_entry(vma, &amp;rst_vmas.h, list) {<br>
<br>
</div></div>Let&#39;s unmap them &quot;in place&quot;, instead of one more vmas list scan.<br></blockquote><div><br></div><div>We can&#39;t do that, because children are not forked in this moment.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<div class="HOEnZb"><div class="h5"><br>
&gt; +             if (!vma_priv(&amp;vma-&gt;vma))<br>
&gt; +                     continue;<br>
&gt; +<br>
&gt; +             if (vma-&gt;vma.flags &amp; MAP_GROWSDOWN) {<br>
&gt; +                     void *addr = (void *) vma_premmaped_start(&amp;vma-&gt;vma);<br>
&gt; +<br>
&gt; +                     if (munmap(addr - PAGE_SIZE, PAGE_SIZE)) {<br>
&gt; +                             pr_perror(&quot;Can&#39;t unmap guard page\n&quot;);<br>
&gt; +                             return -1;<br>
&gt; +                     }<br>
&gt; +             }<br>
&gt; +     }<br>
&gt; +<br>
&gt; +     return 0;<br>
&gt; +}<br>
&gt; +<br>
&gt;  static int open_vmas(int pid)<br>
&gt;  {<br>
&gt;       struct vma_area *vma;<br>
&gt; @@ -1184,6 +1228,8 @@ static int restore_task_with_children(void *_arg)<br>
&gt;       if (create_children_and_session())<br>
&gt;               exit(1);<br>
&gt;<br>
&gt; +     if (unmap_guard_pages())<br>
&gt; +             exit(1);<br>
&gt;       /*<br>
&gt;        * Unlike sessions, process groups (a.k.a. pgids) can be joined<br>
&gt;        * by any task, provided the task with pid == pgid (group leader)<br>
&gt;<br>
<br>
<br>
</div></div></blockquote></div><br></div></div>