[CRIU] [PATCH 1/4] criu: mv page-read.[ch] pagemap.[ch]
Mike Rapoport
rppt at linux.vnet.ibm.com
Thu Jun 23 09:46:50 PDT 2016
Signed-off-by: Mike Rapoport <rppt at linux.vnet.ibm.com>
---
criu/Makefile.crtools | 2 +-
criu/cr-dedup.c | 2 +-
criu/cr-restore.c | 1 -
criu/include/page-read.h | 95 ------------
criu/include/page-xfer.h | 2 +-
criu/include/pagemap.h | 95 ++++++++++++
criu/page-read.c | 384 -----------------------------------------------
criu/pagemap.c | 384 +++++++++++++++++++++++++++++++++++++++++++++++
criu/uffd.c | 2 +-
9 files changed, 483 insertions(+), 484 deletions(-)
delete mode 100644 criu/include/page-read.h
create mode 100644 criu/include/pagemap.h
delete mode 100644 criu/page-read.c
create mode 100644 criu/pagemap.c
diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
index 9e4ec7a..b241bf2 100644
--- a/criu/Makefile.crtools
+++ b/criu/Makefile.crtools
@@ -40,7 +40,7 @@ obj-y += netfilter.o
obj-y += net.o
obj-y += pagemap-cache.o
obj-y += page-pipe.o
-obj-y += page-read.o
+obj-y += pagemap.o
obj-y += page-xfer.o
obj-y += parasite-syscall.o
obj-y += pie/pie-relocs.o
diff --git a/criu/cr-dedup.c b/criu/cr-dedup.c
index 39609a7..726cec9 100644
--- a/criu/cr-dedup.c
+++ b/criu/cr-dedup.c
@@ -4,7 +4,7 @@
#include <unistd.h>
#include "crtools.h"
-#include "page-read.h"
+#include "pagemap.h"
#include "restorer.h"
#define MAX_BUNCH_SIZE 256
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index e0b19fe..c8418b1 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -60,7 +60,6 @@
#include "tty.h"
#include "cpu.h"
#include "file-lock.h"
-#include "page-read.h"
#include "vdso.h"
#include "stats.h"
#include "tun.h"
diff --git a/criu/include/page-read.h b/criu/include/page-read.h
deleted file mode 100644
index 4c6d21e..0000000
--- a/criu/include/page-read.h
+++ /dev/null
@@ -1,95 +0,0 @@
-#ifndef __CR_PAGE_READ_H__
-#define __CR_PAGE_READ_H__
-
-#include "images/pagemap.pb-c.h"
-
-/*
- * page_read -- engine, that reads pages from image file(s)
- *
- * Several page-read's can be arranged in a chain to read
- * pages from a series of snapshot.
- *
- * A task's address space vs pagemaps+page image pairs can
- * look like this (taken from comment in page-pipe.h):
- *
- * task:
- *
- * 0 0 0 0 1 1 1
- * 0 3 6 B 2 7 C
- * ---+++-----+++++++-----+++++----
- * pm1: ---+++-----++++++-------++++----
- * pm2: ---==+-----====+++-----++===----
- *
- * Here + is present page, - is non prsent, = is present,
- * but is not modified from last snapshot.
- *
- * Thus pagemap.img and pages.img entries are
- *
- * pm1: 03:3,0B:6,18:4
- * pm2: 03:2:P,05:1,0B:4:P,0F:3,17:2,19:3:P
- *
- * where P means "page is in parent pagemap".
- *
- * pg1: 03,04,05,0B,0C,0D,0E,0F,10,18,19,1A,1B
- * pg2: 05,0F,10,11,17,18
- *
- * When trying to restore from these 4 files we'd have
- * to carefull scan pagemap.img's one by one and read or
- * skip pages from pages.img where appropriate.
- *
- * All this is implemented in read_pagemap_page.
- */
-
-struct page_read {
- /*
- * gets next vaddr:len pair to work on.
- * Pagemap entries should be returned in sorted order.
- */
- int (*get_pagemap)(struct page_read *, struct iovec *iov);
- /* reads page from current pagemap */
- int (*read_pages)(struct page_read *, unsigned long vaddr, int nr, void *);
- /* stop working on current pagemap */
- void (*put_pagemap)(struct page_read *);
- void (*close)(struct page_read *);
- void (*skip_pages)(struct page_read *, unsigned long len);
- int (*seek_page)(struct page_read *pr, unsigned long vaddr, bool warn);
-
- /* Private data of reader */
- struct cr_img *pmi;
- struct cr_img *pi;
-
- PagemapEntry *pe; /* current pagemap we are on */
- struct page_read *parent; /* parent pagemap (if ->in_parent
- pagemap is met in image, then
- go to this guy for page, see
- read_pagemap_page */
- unsigned long cvaddr; /* vaddr we are on */
-
- struct iovec bunch; /* record consequent neighbour
- iovecs to punch together */
- unsigned id; /* for logging */
-
- PagemapEntry **pmes;
- int nr_pmes;
- int curr_pme;
-};
-
-#define PR_SHMEM 0x1
-#define PR_TASK 0x2
-
-#define PR_TYPE_MASK 0x3
-#define PR_MOD 0x4 /* Will need to modify */
-
-/*
- * -1 -- error
- * 0 -- no images
- * 1 -- opened
- */
-extern int open_page_read(int pid, struct page_read *, int pr_flags);
-extern int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags);
-extern void pagemap2iovec(PagemapEntry *pe, struct iovec *iov);
-extern void iovec2pagemap(struct iovec *iov, PagemapEntry *pe);
-
-extern int dedup_one_iovec(struct page_read *pr, struct iovec *iov);
-extern int punch_hole(struct page_read *pr, unsigned long off, unsigned long len, bool cleanup);
-#endif /* __CR_PAGE_READ_H__ */
diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h
index 8492daa..d19671b 100644
--- a/criu/include/page-xfer.h
+++ b/criu/include/page-xfer.h
@@ -1,6 +1,6 @@
#ifndef __CR_PAGE_XFER__H__
#define __CR_PAGE_XFER__H__
-#include "page-read.h"
+#include "pagemap.h"
extern int cr_page_server(bool daemon_mode, int cfd);
diff --git a/criu/include/pagemap.h b/criu/include/pagemap.h
new file mode 100644
index 0000000..4c6d21e
--- /dev/null
+++ b/criu/include/pagemap.h
@@ -0,0 +1,95 @@
+#ifndef __CR_PAGE_READ_H__
+#define __CR_PAGE_READ_H__
+
+#include "images/pagemap.pb-c.h"
+
+/*
+ * page_read -- engine, that reads pages from image file(s)
+ *
+ * Several page-read's can be arranged in a chain to read
+ * pages from a series of snapshot.
+ *
+ * A task's address space vs pagemaps+page image pairs can
+ * look like this (taken from comment in page-pipe.h):
+ *
+ * task:
+ *
+ * 0 0 0 0 1 1 1
+ * 0 3 6 B 2 7 C
+ * ---+++-----+++++++-----+++++----
+ * pm1: ---+++-----++++++-------++++----
+ * pm2: ---==+-----====+++-----++===----
+ *
+ * Here + is present page, - is non prsent, = is present,
+ * but is not modified from last snapshot.
+ *
+ * Thus pagemap.img and pages.img entries are
+ *
+ * pm1: 03:3,0B:6,18:4
+ * pm2: 03:2:P,05:1,0B:4:P,0F:3,17:2,19:3:P
+ *
+ * where P means "page is in parent pagemap".
+ *
+ * pg1: 03,04,05,0B,0C,0D,0E,0F,10,18,19,1A,1B
+ * pg2: 05,0F,10,11,17,18
+ *
+ * When trying to restore from these 4 files we'd have
+ * to carefull scan pagemap.img's one by one and read or
+ * skip pages from pages.img where appropriate.
+ *
+ * All this is implemented in read_pagemap_page.
+ */
+
+struct page_read {
+ /*
+ * gets next vaddr:len pair to work on.
+ * Pagemap entries should be returned in sorted order.
+ */
+ int (*get_pagemap)(struct page_read *, struct iovec *iov);
+ /* reads page from current pagemap */
+ int (*read_pages)(struct page_read *, unsigned long vaddr, int nr, void *);
+ /* stop working on current pagemap */
+ void (*put_pagemap)(struct page_read *);
+ void (*close)(struct page_read *);
+ void (*skip_pages)(struct page_read *, unsigned long len);
+ int (*seek_page)(struct page_read *pr, unsigned long vaddr, bool warn);
+
+ /* Private data of reader */
+ struct cr_img *pmi;
+ struct cr_img *pi;
+
+ PagemapEntry *pe; /* current pagemap we are on */
+ struct page_read *parent; /* parent pagemap (if ->in_parent
+ pagemap is met in image, then
+ go to this guy for page, see
+ read_pagemap_page */
+ unsigned long cvaddr; /* vaddr we are on */
+
+ struct iovec bunch; /* record consequent neighbour
+ iovecs to punch together */
+ unsigned id; /* for logging */
+
+ PagemapEntry **pmes;
+ int nr_pmes;
+ int curr_pme;
+};
+
+#define PR_SHMEM 0x1
+#define PR_TASK 0x2
+
+#define PR_TYPE_MASK 0x3
+#define PR_MOD 0x4 /* Will need to modify */
+
+/*
+ * -1 -- error
+ * 0 -- no images
+ * 1 -- opened
+ */
+extern int open_page_read(int pid, struct page_read *, int pr_flags);
+extern int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags);
+extern void pagemap2iovec(PagemapEntry *pe, struct iovec *iov);
+extern void iovec2pagemap(struct iovec *iov, PagemapEntry *pe);
+
+extern int dedup_one_iovec(struct page_read *pr, struct iovec *iov);
+extern int punch_hole(struct page_read *pr, unsigned long off, unsigned long len, bool cleanup);
+#endif /* __CR_PAGE_READ_H__ */
diff --git a/criu/page-read.c b/criu/page-read.c
deleted file mode 100644
index 9a02a5d..0000000
--- a/criu/page-read.c
+++ /dev/null
@@ -1,384 +0,0 @@
-#include <fcntl.h>
-#include <stdio.h>
-#include <unistd.h>
-
-#include "image.h"
-#include "cr_options.h"
-#include "servicefd.h"
-#include "page-read.h"
-
-#include "protobuf.h"
-#include "images/pagemap.pb-c.h"
-
-#ifndef SEEK_DATA
-#define SEEK_DATA 3
-#define SEEK_HOLE 4
-#endif
-
-void pagemap2iovec(PagemapEntry *pe, struct iovec *iov)
-{
- iov->iov_base = decode_pointer(pe->vaddr);
- iov->iov_len = pe->nr_pages * PAGE_SIZE;
-}
-
-void iovec2pagemap(struct iovec *iov, PagemapEntry *pe)
-{
- pe->vaddr = encode_pointer(iov->iov_base);
- pe->nr_pages = iov->iov_len / PAGE_SIZE;
-}
-
-static int get_pagemap(struct page_read *pr, struct iovec *iov)
-{
- PagemapEntry *pe;
-
- if (pr->curr_pme >= pr->nr_pmes)
- return 0;
-
- pe = pr->pmes[pr->curr_pme];
-
- pagemap2iovec(pe, iov);
-
- pr->pe = pe;
- pr->cvaddr = (unsigned long)iov->iov_base;
-
- if (pe->in_parent && !pr->parent) {
- pr_err("No parent for snapshot pagemap\n");
- return -1;
- }
-
- return 1;
-}
-
-static void put_pagemap(struct page_read *pr)
-{
- pr->curr_pme++;
-}
-
-static void skip_pagemap_pages(struct page_read *pr, unsigned long len)
-{
- if (!len)
- return;
-
- pr_debug("\tpr%u Skip %lu bytes from page-dump\n", pr->id, len);
- if (!pr->pe->in_parent)
- lseek(img_raw_fd(pr->pi), len, SEEK_CUR);
- pr->cvaddr += len;
-}
-
-static int seek_pagemap_page(struct page_read *pr, unsigned long vaddr,
- bool warn)
-{
- int ret;
- struct iovec iov;
-
- if (pr->pe)
- pagemap2iovec(pr->pe, &iov);
- else
- goto new_pagemap;
-
- while (1) {
- unsigned long iov_end;
-
- if (vaddr < pr->cvaddr) {
- if (warn)
- pr_err("Missing %lx in parent pagemap, current iov: base=%lx,len=%zu\n",
- vaddr, (unsigned long)iov.iov_base, iov.iov_len);
- return 0;
- }
- iov_end = (unsigned long)iov.iov_base + iov.iov_len;
-
- if (iov_end <= vaddr) {
- skip_pagemap_pages(pr, iov_end - pr->cvaddr);
- put_pagemap(pr);
-new_pagemap:
- ret = get_pagemap(pr, &iov);
- if (ret <= 0)
- return ret;
-
- continue;
- }
-
- skip_pagemap_pages(pr, vaddr - pr->cvaddr);
- return 1;
- }
-}
-
-static inline void pagemap_bound_check(PagemapEntry *pe, unsigned long vaddr, int nr)
-{
- if (vaddr < pe->vaddr || (vaddr - pe->vaddr) / PAGE_SIZE + nr > pe->nr_pages) {
- pr_err("Page read err %"PRIx64":%u vs %lx:%u\n",
- pe->vaddr, pe->nr_pages, vaddr, nr);
- BUG();
- }
-}
-
-static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr, void *buf)
-{
- int ret;
- unsigned long len = nr * PAGE_SIZE;
-
- pr_info("pr%u Read %lx %u pages\n", pr->id, vaddr, nr);
- pagemap_bound_check(pr->pe, vaddr, nr);
-
- if (pr->pe->in_parent) {
- struct page_read *ppr = pr->parent;
-
- /*
- * Parent pagemap at this point entry may be shorter
- * than the current vaddr:nr needs, so we have to
- * carefully 'split' the vaddr:nr into pieces and go
- * to parent page-read with the longest requests it
- * can handle.
- */
-
- do {
- int p_nr;
-
- pr_debug("\tpr%u Read from parent\n", pr->id);
- ret = seek_pagemap_page(ppr, vaddr, true);
- if (ret <= 0)
- return -1;
-
- /*
- * This is how many pages we have in the parent
- * page_read starting from vaddr. Go ahead and
- * read as much as we can.
- */
- p_nr = ppr->pe->nr_pages - (vaddr - ppr->pe->vaddr) / PAGE_SIZE;
- pr_info("\tparent has %u pages in\n", p_nr);
- if (p_nr > nr)
- p_nr = nr;
-
- ret = read_pagemap_page(ppr, vaddr, p_nr, buf);
- if (ret == -1)
- return ret;
-
- /*
- * OK, let's see how much data we have left and go
- * to parent page-read again for the next pagemap
- * entry.
- */
- nr -= p_nr;
- vaddr += p_nr * PAGE_SIZE;
- buf += p_nr * PAGE_SIZE;
- } while (nr);
- } else {
- int fd = img_raw_fd(pr->pi);
- off_t current_vaddr = lseek(fd, 0, SEEK_CUR);
-
- pr_debug("\tpr%u Read page from self %lx/%"PRIx64"\n", pr->id, pr->cvaddr, current_vaddr);
- ret = read(fd, buf, len);
- if (ret != len) {
- pr_perror("Can't read mapping page %d", ret);
- return -1;
- }
-
- if (opts.auto_dedup) {
- ret = punch_hole(pr, current_vaddr, len, false);
- if (ret == -1) {
- return -1;
- }
- }
- }
-
- pr->cvaddr += len;
-
- return 1;
-}
-
-static void free_pagemaps(struct page_read *pr)
-{
- int i;
-
- for (i = 0; i < pr->nr_pmes; i++)
- pagemap_entry__free_unpacked(pr->pmes[i], NULL);
-
- xfree(pr->pmes);
-}
-
-static void close_page_read(struct page_read *pr)
-{
- int ret;
-
- if (pr->bunch.iov_len > 0) {
- ret = punch_hole(pr, 0, 0, true);
- if (ret == -1)
- return;
-
- pr->bunch.iov_len = 0;
- }
-
- if (pr->parent) {
- close_page_read(pr->parent);
- xfree(pr->parent);
- }
-
- if (pr->pmi)
- close_image(pr->pmi);
- if (pr->pi)
- close_image(pr->pi);
-
- if (pr->pmes)
- free_pagemaps(pr);
-}
-
-static int try_open_parent(int dfd, int pid, struct page_read *pr, int pr_flags)
-{
- int pfd, ret;
- struct page_read *parent = NULL;
-
- pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
- if (pfd < 0 && errno == ENOENT)
- goto out;
-
- parent = xmalloc(sizeof(*parent));
- if (!parent)
- goto err_cl;
-
- ret = open_page_read_at(pfd, pid, parent, pr_flags);
- if (ret < 0)
- goto err_free;
-
- if (!ret) {
- xfree(parent);
- parent = NULL;
- }
-
- close(pfd);
-out:
- pr->parent = parent;
- return 0;
-
-err_free:
- xfree(parent);
-err_cl:
- close(pfd);
- return -1;
-}
-
-/*
- * The pagemap entry size is at least 8 bytes for small mappings with
- * low address and may get to 18 bytes or even more for large mappings
- * with high address and in_parent flag set. 16 seems to be nice round
- * number to minimize {over,under}-allocations
- */
-#define PAGEMAP_ENTRY_SIZE_ESTIMATE 16
-
-static int init_pagemaps(struct page_read *pr)
-{
- off_t fsize;
- int nr_pmes, nr_realloc;
-
- fsize = img_raw_size(pr->pmi);
- if (fsize < 0)
- return -1;
-
- nr_pmes = fsize / PAGEMAP_ENTRY_SIZE_ESTIMATE + 1;
- nr_realloc = nr_pmes / 2;
-
- pr->pmes = xzalloc(nr_pmes * sizeof(*pr->pmes));
- if (!pr->pmes)
- return -1;
-
- pr->nr_pmes = pr->curr_pme = 0;
-
- while (1) {
- int ret = pb_read_one_eof(pr->pmi, &pr->pmes[pr->nr_pmes],
- PB_PAGEMAP);
- if (ret < 0)
- goto free_pagemaps;
- if (ret == 0)
- break;
-
- pr->nr_pmes++;
- if (pr->nr_pmes >= nr_pmes) {
- nr_pmes += nr_realloc;
- pr->pmes = xrealloc(pr->pmes,
- nr_pmes * sizeof(*pr->pmes));
- if (!pr->pmes)
- goto free_pagemaps;
- }
- }
-
- return 0;
-
- close_image(pr->pmi);
- pr->pmi = NULL;
-
-free_pagemaps:
- free_pagemaps(pr);
- return -1;
-}
-
-int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
-{
- int flags, i_typ;
- static unsigned ids = 1;
-
- if (opts.auto_dedup)
- pr_flags |= PR_MOD;
- if (pr_flags & PR_MOD)
- flags = O_RDWR;
- else
- flags = O_RSTR;
-
- switch (pr_flags & PR_TYPE_MASK) {
- case PR_TASK:
- i_typ = CR_FD_PAGEMAP;
- break;
- case PR_SHMEM:
- i_typ = CR_FD_SHMEM_PAGEMAP;
- break;
- default:
- BUG();
- return -1;
- }
-
- pr->pe = NULL;
- pr->parent = NULL;
- pr->bunch.iov_len = 0;
- pr->bunch.iov_base = NULL;
-
- pr->pmi = open_image_at(dfd, i_typ, O_RSTR, (long)pid);
- if (!pr->pmi)
- return -1;
-
- if (empty_image(pr->pmi)) {
- close_image(pr->pmi);
- return 0;
- }
-
- if ((i_typ != CR_FD_SHMEM_PAGEMAP) && try_open_parent(dfd, pid, pr, pr_flags)) {
- close_image(pr->pmi);
- return -1;
- }
-
- pr->pi = open_pages_image_at(dfd, flags, pr->pmi);
- if (!pr->pi) {
- close_page_read(pr);
- return -1;
- }
-
- if (init_pagemaps(pr)) {
- close_page_read(pr);
- return -1;
- }
-
- pr->get_pagemap = get_pagemap;
- pr->put_pagemap = put_pagemap;
- pr->read_pages = read_pagemap_page;
- pr->close = close_page_read;
- pr->skip_pages = skip_pagemap_pages;
- pr->seek_page = seek_pagemap_page;
- pr->id = ids++;
-
- pr_debug("Opened page read %u (parent %u)\n",
- pr->id, pr->parent ? pr->parent->id : 0);
-
- return 1;
-}
-
-int open_page_read(int pid, struct page_read *pr, int pr_flags)
-{
- return open_page_read_at(get_service_fd(IMG_FD_OFF), pid, pr, pr_flags);
-}
diff --git a/criu/pagemap.c b/criu/pagemap.c
new file mode 100644
index 0000000..278eb77
--- /dev/null
+++ b/criu/pagemap.c
@@ -0,0 +1,384 @@
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "image.h"
+#include "cr_options.h"
+#include "servicefd.h"
+#include "pagemap.h"
+
+#include "protobuf.h"
+#include "images/pagemap.pb-c.h"
+
+#ifndef SEEK_DATA
+#define SEEK_DATA 3
+#define SEEK_HOLE 4
+#endif
+
+void pagemap2iovec(PagemapEntry *pe, struct iovec *iov)
+{
+ iov->iov_base = decode_pointer(pe->vaddr);
+ iov->iov_len = pe->nr_pages * PAGE_SIZE;
+}
+
+void iovec2pagemap(struct iovec *iov, PagemapEntry *pe)
+{
+ pe->vaddr = encode_pointer(iov->iov_base);
+ pe->nr_pages = iov->iov_len / PAGE_SIZE;
+}
+
+static int get_pagemap(struct page_read *pr, struct iovec *iov)
+{
+ PagemapEntry *pe;
+
+ if (pr->curr_pme >= pr->nr_pmes)
+ return 0;
+
+ pe = pr->pmes[pr->curr_pme];
+
+ pagemap2iovec(pe, iov);
+
+ pr->pe = pe;
+ pr->cvaddr = (unsigned long)iov->iov_base;
+
+ if (pe->in_parent && !pr->parent) {
+ pr_err("No parent for snapshot pagemap\n");
+ return -1;
+ }
+
+ return 1;
+}
+
+static void put_pagemap(struct page_read *pr)
+{
+ pr->curr_pme++;
+}
+
+static void skip_pagemap_pages(struct page_read *pr, unsigned long len)
+{
+ if (!len)
+ return;
+
+ pr_debug("\tpr%u Skip %lu bytes from page-dump\n", pr->id, len);
+ if (!pr->pe->in_parent)
+ lseek(img_raw_fd(pr->pi), len, SEEK_CUR);
+ pr->cvaddr += len;
+}
+
+static int seek_pagemap_page(struct page_read *pr, unsigned long vaddr,
+ bool warn)
+{
+ int ret;
+ struct iovec iov;
+
+ if (pr->pe)
+ pagemap2iovec(pr->pe, &iov);
+ else
+ goto new_pagemap;
+
+ while (1) {
+ unsigned long iov_end;
+
+ if (vaddr < pr->cvaddr) {
+ if (warn)
+ pr_err("Missing %lx in parent pagemap, current iov: base=%lx,len=%zu\n",
+ vaddr, (unsigned long)iov.iov_base, iov.iov_len);
+ return 0;
+ }
+ iov_end = (unsigned long)iov.iov_base + iov.iov_len;
+
+ if (iov_end <= vaddr) {
+ skip_pagemap_pages(pr, iov_end - pr->cvaddr);
+ put_pagemap(pr);
+new_pagemap:
+ ret = get_pagemap(pr, &iov);
+ if (ret <= 0)
+ return ret;
+
+ continue;
+ }
+
+ skip_pagemap_pages(pr, vaddr - pr->cvaddr);
+ return 1;
+ }
+}
+
+static inline void pagemap_bound_check(PagemapEntry *pe, unsigned long vaddr, int nr)
+{
+ if (vaddr < pe->vaddr || (vaddr - pe->vaddr) / PAGE_SIZE + nr > pe->nr_pages) {
+ pr_err("Page read err %"PRIx64":%u vs %lx:%u\n",
+ pe->vaddr, pe->nr_pages, vaddr, nr);
+ BUG();
+ }
+}
+
+static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr, void *buf)
+{
+ int ret;
+ unsigned long len = nr * PAGE_SIZE;
+
+ pr_info("pr%u Read %lx %u pages\n", pr->id, vaddr, nr);
+ pagemap_bound_check(pr->pe, vaddr, nr);
+
+ if (pr->pe->in_parent) {
+ struct page_read *ppr = pr->parent;
+
+ /*
+ * Parent pagemap at this point entry may be shorter
+ * than the current vaddr:nr needs, so we have to
+ * carefully 'split' the vaddr:nr into pieces and go
+ * to parent page-read with the longest requests it
+ * can handle.
+ */
+
+ do {
+ int p_nr;
+
+ pr_debug("\tpr%u Read from parent\n", pr->id);
+ ret = seek_pagemap_page(ppr, vaddr, true);
+ if (ret <= 0)
+ return -1;
+
+ /*
+ * This is how many pages we have in the parent
+ * page_read starting from vaddr. Go ahead and
+ * read as much as we can.
+ */
+ p_nr = ppr->pe->nr_pages - (vaddr - ppr->pe->vaddr) / PAGE_SIZE;
+ pr_info("\tparent has %u pages in\n", p_nr);
+ if (p_nr > nr)
+ p_nr = nr;
+
+ ret = read_pagemap_page(ppr, vaddr, p_nr, buf);
+ if (ret == -1)
+ return ret;
+
+ /*
+ * OK, let's see how much data we have left and go
+ * to parent page-read again for the next pagemap
+ * entry.
+ */
+ nr -= p_nr;
+ vaddr += p_nr * PAGE_SIZE;
+ buf += p_nr * PAGE_SIZE;
+ } while (nr);
+ } else {
+ int fd = img_raw_fd(pr->pi);
+ off_t current_vaddr = lseek(fd, 0, SEEK_CUR);
+
+ pr_debug("\tpr%u Read page from self %lx/%"PRIx64"\n", pr->id, pr->cvaddr, current_vaddr);
+ ret = read(fd, buf, len);
+ if (ret != len) {
+ pr_perror("Can't read mapping page %d", ret);
+ return -1;
+ }
+
+ if (opts.auto_dedup) {
+ ret = punch_hole(pr, current_vaddr, len, false);
+ if (ret == -1) {
+ return -1;
+ }
+ }
+ }
+
+ pr->cvaddr += len;
+
+ return 1;
+}
+
+static void free_pagemaps(struct page_read *pr)
+{
+ int i;
+
+ for (i = 0; i < pr->nr_pmes; i++)
+ pagemap_entry__free_unpacked(pr->pmes[i], NULL);
+
+ xfree(pr->pmes);
+}
+
+static void close_page_read(struct page_read *pr)
+{
+ int ret;
+
+ if (pr->bunch.iov_len > 0) {
+ ret = punch_hole(pr, 0, 0, true);
+ if (ret == -1)
+ return;
+
+ pr->bunch.iov_len = 0;
+ }
+
+ if (pr->parent) {
+ close_page_read(pr->parent);
+ xfree(pr->parent);
+ }
+
+ if (pr->pmi)
+ close_image(pr->pmi);
+ if (pr->pi)
+ close_image(pr->pi);
+
+ if (pr->pmes)
+ free_pagemaps(pr);
+}
+
+static int try_open_parent(int dfd, int pid, struct page_read *pr, int pr_flags)
+{
+ int pfd, ret;
+ struct page_read *parent = NULL;
+
+ pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY);
+ if (pfd < 0 && errno == ENOENT)
+ goto out;
+
+ parent = xmalloc(sizeof(*parent));
+ if (!parent)
+ goto err_cl;
+
+ ret = open_page_read_at(pfd, pid, parent, pr_flags);
+ if (ret < 0)
+ goto err_free;
+
+ if (!ret) {
+ xfree(parent);
+ parent = NULL;
+ }
+
+ close(pfd);
+out:
+ pr->parent = parent;
+ return 0;
+
+err_free:
+ xfree(parent);
+err_cl:
+ close(pfd);
+ return -1;
+}
+
+/*
+ * The pagemap entry size is at least 8 bytes for small mappings with
+ * low address and may get to 18 bytes or even more for large mappings
+ * with high address and in_parent flag set. 16 seems to be nice round
+ * number to minimize {over,under}-allocations
+ */
+#define PAGEMAP_ENTRY_SIZE_ESTIMATE 16
+
+static int init_pagemaps(struct page_read *pr)
+{
+ off_t fsize;
+ int nr_pmes, nr_realloc;
+
+ fsize = img_raw_size(pr->pmi);
+ if (fsize < 0)
+ return -1;
+
+ nr_pmes = fsize / PAGEMAP_ENTRY_SIZE_ESTIMATE + 1;
+ nr_realloc = nr_pmes / 2;
+
+ pr->pmes = xzalloc(nr_pmes * sizeof(*pr->pmes));
+ if (!pr->pmes)
+ return -1;
+
+ pr->nr_pmes = pr->curr_pme = 0;
+
+ while (1) {
+ int ret = pb_read_one_eof(pr->pmi, &pr->pmes[pr->nr_pmes],
+ PB_PAGEMAP);
+ if (ret < 0)
+ goto free_pagemaps;
+ if (ret == 0)
+ break;
+
+ pr->nr_pmes++;
+ if (pr->nr_pmes >= nr_pmes) {
+ nr_pmes += nr_realloc;
+ pr->pmes = xrealloc(pr->pmes,
+ nr_pmes * sizeof(*pr->pmes));
+ if (!pr->pmes)
+ goto free_pagemaps;
+ }
+ }
+
+ return 0;
+
+ close_image(pr->pmi);
+ pr->pmi = NULL;
+
+free_pagemaps:
+ free_pagemaps(pr);
+ return -1;
+}
+
+int open_page_read_at(int dfd, int pid, struct page_read *pr, int pr_flags)
+{
+ int flags, i_typ;
+ static unsigned ids = 1;
+
+ if (opts.auto_dedup)
+ pr_flags |= PR_MOD;
+ if (pr_flags & PR_MOD)
+ flags = O_RDWR;
+ else
+ flags = O_RSTR;
+
+ switch (pr_flags & PR_TYPE_MASK) {
+ case PR_TASK:
+ i_typ = CR_FD_PAGEMAP;
+ break;
+ case PR_SHMEM:
+ i_typ = CR_FD_SHMEM_PAGEMAP;
+ break;
+ default:
+ BUG();
+ return -1;
+ }
+
+ pr->pe = NULL;
+ pr->parent = NULL;
+ pr->bunch.iov_len = 0;
+ pr->bunch.iov_base = NULL;
+
+ pr->pmi = open_image_at(dfd, i_typ, O_RSTR, (long)pid);
+ if (!pr->pmi)
+ return -1;
+
+ if (empty_image(pr->pmi)) {
+ close_image(pr->pmi);
+ return 0;
+ }
+
+ if ((i_typ != CR_FD_SHMEM_PAGEMAP) && try_open_parent(dfd, pid, pr, pr_flags)) {
+ close_image(pr->pmi);
+ return -1;
+ }
+
+ pr->pi = open_pages_image_at(dfd, flags, pr->pmi);
+ if (!pr->pi) {
+ close_page_read(pr);
+ return -1;
+ }
+
+ if (init_pagemaps(pr)) {
+ close_page_read(pr);
+ return -1;
+ }
+
+ pr->get_pagemap = get_pagemap;
+ pr->put_pagemap = put_pagemap;
+ pr->read_pages = read_pagemap_page;
+ pr->close = close_page_read;
+ pr->skip_pages = skip_pagemap_pages;
+ pr->seek_page = seek_pagemap_page;
+ pr->id = ids++;
+
+ pr_debug("Opened page read %u (parent %u)\n",
+ pr->id, pr->parent ? pr->parent->id : 0);
+
+ return 1;
+}
+
+int open_page_read(int pid, struct page_read *pr, int pr_flags)
+{
+ return open_page_read_at(get_service_fd(IMG_FD_OFF), pid, pr, pr_flags);
+}
diff --git a/criu/uffd.c b/criu/uffd.c
index 46d9be4..f310ddd 100644
--- a/criu/uffd.c
+++ b/criu/uffd.c
@@ -21,7 +21,7 @@
#include "asm/page.h"
#include "log.h"
#include "criu-plugin.h"
-#include "page-read.h"
+#include "pagemap.h"
#include "files-reg.h"
#include "kerndat.h"
#include "mem.h"
--
1.9.1
More information about the CRIU
mailing list