[CRIU] [PATCH v3 02/19] criu: lazy-pages: replace page list with IOVs list
Mike Rapoport
rppt at linux.vnet.ibm.com
Tue Nov 15 08:57:11 PST 2016
Instead of tracking memory handled by userfaultfd on the page basis we can
use IOVs for continious chunks.
Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
---
criu/uffd.c | 262 +++++++++++++++++++++++++++++++++---------------------------
1 file changed, 144 insertions(+), 118 deletions(-)
diff --git a/criu/uffd.c b/criu/uffd.c
index 346057a..750b39e 100644
--- a/criu/uffd.c
+++ b/criu/uffd.c
@@ -47,11 +47,17 @@
static mutex_t *lazy_sock_mutex;
+struct lazy_iovec {
+ struct list_head l;
+ unsigned long base;
+ unsigned long len;
+};
+
struct lazy_pages_info {
int pid;
int uffd;
- struct list_head pages;
+ struct list_head iovs;
struct page_read pr;
@@ -72,7 +78,7 @@ static struct lazy_pages_info *lpi_init(void)
return NULL;
memset(lpi, 0, sizeof(*lpi));
- INIT_LIST_HEAD(&lpi->pages);
+ INIT_LIST_HEAD(&lpi->iovs);
INIT_LIST_HEAD(&lpi->l);
return lpi;
@@ -80,8 +86,12 @@ static struct lazy_pages_info *lpi_init(void)
static void lpi_fini(struct lazy_pages_info *lpi)
{
+ struct lazy_iovec *p, *n;
+
if (!lpi)
return;
+ list_for_each_entry_safe(p, n, &lpi->iovs, l)
+ xfree(p);
if (lpi->uffd > 0)
close(lpi->uffd);
if (lpi->pr.close)
@@ -288,118 +298,135 @@ out:
return -1;
}
-#define UFFD_FLAG_SENT 0x1
+static MmEntry *init_mm_entry(struct lazy_pages_info *lpi)
+{
+ struct cr_img *img;
+ MmEntry *mm;
+ int ret;
-struct uffd_pages_struct {
- struct list_head list;
- unsigned long addr;
- int flags;
-};
+ img = open_image(CR_FD_MM, O_RSTR, lpi->pid);
+ if (!img)
+ return NULL;
+
+ ret = pb_read_one_eof(img, &mm, PB_MM);
+ close_image(img);
+ if (ret == -1)
+ return NULL;
+ pr_debug("Found %zd VMAs in image\n", mm->n_vmas);
+
+ return mm;
+}
-static int collect_uffd_pages(struct lazy_pages_info *lpi, MmEntry *mm)
+static int update_lazy_iovecs(struct lazy_pages_info *lpi, unsigned long addr,
+ int len)
{
- unsigned long base;
- int i, j;
- struct iovec iov;
- unsigned long nr_pages;
- unsigned long ps;
- int rc;
- struct uffd_pages_struct *uffd_pages;
- struct page_read *pr = &lpi->pr;
+ struct lazy_iovec *lazy_iov, *n;
- rc = pr->get_pagemap(pr, &iov);
- if (rc <= 0)
- return 0;
+ list_for_each_entry_safe(lazy_iov, n, &lpi->iovs, l) {
+ unsigned long start = lazy_iov->base;
+ unsigned long end = start + lazy_iov->len;
- ps = page_size();
- nr_pages = iov.iov_len / ps;
- base = (unsigned long) iov.iov_base;
- pr_debug("iov.iov_base 0x%lx (%ld pages)\n", base, nr_pages);
+ if (len <= 0)
+ break;
- for (i = 0; i < nr_pages; i++) {
- bool uffd_page = false;
- base = (unsigned long) iov.iov_base + (i * ps);
- /*
- * Only pages which are MAP_ANONYMOUS and MAP_PRIVATE
- * are relevant for userfaultfd handling.
- * Loop over all VMAs to see if the flags matching.
- */
- for (j = 0; j < mm->n_vmas; j++) {
- VmaEntry *vma = mm->vmas[j];
- /*
- * This loop assumes that base can actually be found
- * in the VMA list.
- */
- if (base >= vma->start && base < vma->end) {
- if (vma_entry_can_be_lazy(vma)) {
- if(!pagemap_in_parent(pr->pe))
- uffd_page = true;
- break;
- }
+ if (addr < start || addr >= end)
+ continue;
+
+ if (addr + len < end) {
+ if (addr == start) {
+ lazy_iov->base += len;
+ lazy_iov->len -= len;
+ } else {
+ struct lazy_iovec *new_iov;
+
+ lazy_iov->len -= (end - addr);
+
+ new_iov = xzalloc(sizeof(*new_iov));
+ if (!new_iov)
+ return -1;
+
+ new_iov->base = addr + len;
+ new_iov->len = end - (addr + len);
+
+ list_add(&new_iov->l, &lazy_iov->l);
}
+ break;
}
- /* This is not a page we are looking for. Move along */
- if (!uffd_page)
- continue;
-
- pr_debug("Adding 0x%lx to our list\n", base);
+ if (addr == start) {
+ list_del(&lazy_iov->l);
+ xfree(lazy_iov);
+ } else {
+ lazy_iov->len -= (end - addr);
+ }
- uffd_pages = xzalloc(sizeof(struct uffd_pages_struct));
- if (!uffd_pages)
- return -1;
- uffd_pages->addr = base;
- list_add(&uffd_pages->list, &lpi->pages);
+ len -= (end - addr);
+ addr = end;
}
- return 1;
+ return 0;
}
/*
- * Setting up criu infrastructure and scan for VMAs.
+ * Create a list of IOVs that can be handled using userfaultfd. The
+ * IOVs generally correspond to lazy pagemap entries, except the cases
+ * when a single pagemap entry covers several VMAs. In those cases
+ * IOVs are split at VMA boundaries because UFFDIO_COPY may be done
+ * only inside a single VMA.
+ * We assume here that pagemaps and VMAs are sorted.
*/
-static int find_vmas(struct lazy_pages_info *lpi)
+static int collect_lazy_iovecs(struct lazy_pages_info *lpi)
{
- struct cr_img *img;
- int ret;
+ struct page_read *pr = &lpi->pr;
+ struct lazy_iovec *lazy_iov, *n;
MmEntry *mm;
- struct uffd_pages_struct *uffd_pages;
+ int nr_pages = 0, n_vma = 0;
+ int ret = -1;
+ unsigned long start, end, len;
- img = open_image(CR_FD_MM, O_RSTR, lpi->pid);
- if (!img)
+ mm = init_mm_entry(lpi);
+ if (!mm)
return -1;
- ret = pb_read_one_eof(img, &mm, PB_MM);
- close_image(img);
- if (ret == -1)
- return -1;
- pr_debug("Found %zd VMAs in image\n", mm->n_vmas);
+ while (pr->advance(pr)) {
+ if (!pagemap_lazy(pr->pe))
+ continue;
- ret = open_page_read(lpi->pid, &lpi->pr, PR_TASK);
- if (ret <= 0) {
- ret = -1;
- goto out;
- }
- /*
- * This puts all pages which should be handled by userfaultfd
- * in the list uffd_list. This list is later used to detect if
- * a page has already been transferred or if it needs to be
- * pushed into the process using userfaultfd.
- */
- do {
- ret = collect_uffd_pages(lpi, mm);
- if (ret == -1) {
- goto out;
+ start = pr->pe->vaddr;
+ end = start + pr->pe->nr_pages * page_size();
+ nr_pages += pr->pe->nr_pages;
+
+ for (; n_vma < mm->n_vmas; n_vma++) {
+ VmaEntry *vma = mm->vmas[n_vma];
+
+ if (start >= vma->end)
+ continue;
+
+ lazy_iov = xzalloc(sizeof(*lazy_iov));
+ if (!lazy_iov)
+ goto free_iovs;
+
+ len = min_t(uint64_t, end, vma->end) - start;
+ lazy_iov->base = start;
+ lazy_iov->len = len;
+ list_add_tail(&lazy_iov->l, &lpi->iovs);
+
+ if (end <= vma->end)
+ break;
+
+ start = vma->end;
}
- } while (ret);
+ }
- /* Count detected pages */
- list_for_each_entry(uffd_pages, &lpi->pages, list)
- ret++;
+ ret = nr_pages;
+ goto free_mm;
- pr_debug("Found %d pages to be handled by UFFD\n", ret);
+free_iovs:
+ list_for_each_entry_safe(lazy_iov, n, &lpi->iovs, l)
+ xfree(lazy_iov);
+free_mm:
+ mm_entry__free_unpacked(mm, NULL);
-out:
return ret;
}
@@ -441,12 +468,22 @@ static int ud_open(int client, struct lazy_pages_info **_lpi)
uffd_flags = fcntl(lpi->uffd, F_GETFD, NULL);
pr_debug("uffd_flags are 0x%x\n", uffd_flags);
+ ret = open_page_read(lpi->pid, &lpi->pr, PR_TASK);
+ if (ret <= 0) {
+ ret = -1;
+ goto out;
+ }
+
/*
* Find the memory pages belonging to the restored process
* so that it is trackable when all pages have been transferred.
*/
- if ((lpi->total_pages = find_vmas(lpi)) == -1)
+ ret = collect_lazy_iovecs(lpi);
+ if (ret < 0)
goto out;
+ lpi->total_pages = ret;
+
+ pr_debug("Found %ld pages to be handled by UFFD\n", lpi->total_pages);
list_add_tail(&lpi->l, &lpis);
*_lpi = lpi;
@@ -556,33 +593,33 @@ static int uffd_handle_page(struct lazy_pages_info *lpi, __u64 address,
static int handle_remaining_pages(struct lazy_pages_info *lpi, void *dest)
{
- struct uffd_pages_struct *uffd_pages;
- int rc;
+ struct lazy_iovec *lazy_iov;
+ int nr_pages, i, err;
+ unsigned long addr;
- list_for_each_entry(uffd_pages, &lpi->pages, list) {
- pr_debug("Checking remaining pages 0x%lx (flags 0x%x)\n",
- uffd_pages->addr, uffd_pages->flags);
- if (uffd_pages->flags & UFFD_FLAG_SENT)
- continue;
+ lpi->pr.reset(&lpi->pr);
- rc = uffd_handle_page(lpi, uffd_pages->addr, dest);
- if (rc < 0) {
- pr_err("Error during UFFD copy\n");
- return -1;
- }
+ list_for_each_entry(lazy_iov, &lpi->iovs, l) {
+ nr_pages = lazy_iov->len / PAGE_SIZE;
+
+ for (i = 0; i < nr_pages; i++) {
+ addr = lazy_iov->base + i * PAGE_SIZE;
- uffd_pages->flags |= UFFD_FLAG_SENT;
+ err = uffd_handle_page(lpi, addr, dest);
+ if (err < 0) {
+ pr_err("Error during UFFD copy\n");
+ return -1;
+ }
+ }
}
return 0;
}
-
static int handle_regular_pages(struct lazy_pages_info *lpi, void *dest,
__u64 address)
{
int rc;
- struct uffd_pages_struct *uffd_pages;
rc = uffd_handle_page(lpi, address, dest);
if (rc < 0) {
@@ -590,14 +627,9 @@ static int handle_regular_pages(struct lazy_pages_info *lpi, void *dest,
return -1;
}
- /*
- * Mark this page as having been already transferred, so
- * that it has not to be copied again later.
- */
- list_for_each_entry(uffd_pages, &lpi->pages, list) {
- if (uffd_pages->addr == address)
- uffd_pages->flags |= UFFD_FLAG_SENT;
- }
+ rc = update_lazy_iovecs(lpi, address, PAGE_SIZE);
+ if (rc < 0)
+ return -1;
return 0;
}
@@ -607,7 +639,6 @@ static int handle_user_fault(struct lazy_pages_info *lpi, void *dest)
struct uffd_msg msg;
__u64 flags;
__u64 address;
- struct uffd_pages_struct *uffd_pages;
int ret;
ret = read(lpi->uffd, &msg, sizeof(msg));
@@ -632,11 +663,6 @@ static int handle_user_fault(struct lazy_pages_info *lpi, void *dest)
address = msg.arg.pagefault.address & ~(page_size() - 1);
pr_debug("msg.arg.pagefault.address 0x%llx\n", address);
- /* Make sure to not transfer a page twice */
- list_for_each_entry(uffd_pages, &lpi->pages, list)
- if ((uffd_pages->addr == address) && (uffd_pages->flags & UFFD_FLAG_SENT))
- return 0;
-
/* Now handle the pages actually requested. */
flags = msg.arg.pagefault.flags;
pr_debug("msg.arg.pagefault.flags 0x%llx\n", flags);
--
1.9.1
More information about the CRIU
mailing list