[CRIU] [PATCH] Punch holes in input files when restoring anonymous non-shared memory
Pawel Stradomski
pstradomski at google.com
Tue Jul 17 16:38:57 MSK 2018
Punch holes in input files when restoring anonymous non-shared memory
if --auto-dedup is enabled.
This reduces memory usage if image files are stored on tmpfs.
Signed-off-by: Pawel Stradomski <pstradomski at google.com>
---
compel/arch/arm/plugins/std/syscalls/syscall.def | 1 +
.../ppc64/plugins/std/syscalls/syscall-ppc64.tbl | 1 +
.../s390/plugins/std/syscalls/syscall-s390.tbl | 1 +
.../arch/x86/plugins/std/syscalls/syscall_32.tbl | 1 +
.../arch/x86/plugins/std/syscalls/syscall_64.tbl | 1 +
criu/mem.c | 3 ++-
criu/pie/restorer.c | 16 ++++++++++++++++
7 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def
index b68f9f2f..bcd61d4a 100644
--- a/compel/arch/arm/plugins/std/syscalls/syscall.def
+++ b/compel/arch/arm/plugins/std/syscalls/syscall.def
@@ -109,3 +109,4 @@ seccomp 277 383 (unsigned int op, unsigned int flags, const char *uargs)
gettimeofday 169 78 (struct timeval *tv, struct timezone *tz)
preadv_raw 69 361 (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h)
userfaultfd 282 388 (int flags)
+fallocate 47 352 (int fd, int mode, loff_t offset, loff_t len)
diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
index fa0b034e..62e0bc1a 100644
--- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
+++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
@@ -89,6 +89,7 @@ __NR_set_robust_list 300 sys_set_robust_list (struct robust_list_head *head, si
__NR_get_robust_list 299 sys_get_robust_list (int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
__NR_vmsplice 285 sys_vmsplice (int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags)
__NR_openat 286 sys_openat (int dfd, const char *filename, int flags, int mode)
+__NR_fallocate 309 sys_fallocate (int fd, int mode, loff_t offset, loff_t len)
__NR_timerfd_settime 311 sys_timerfd_settime (int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr)
__NR_signalfd4 313 sys_signalfd (int fd, k_rtsigset_t *mask, size_t sizemask, int flags)
__NR_rt_tgsigqueueinfo 322 sys_rt_tgsigqueueinfo (pid_t tgid, pid_t pid, int sig, siginfo_t *info)
diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
index bc77ae97..3521e915 100644
--- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
+++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
@@ -89,6 +89,7 @@ __NR_set_robust_list 304 sys_set_robust_list (struct robust_list_head *head, si
__NR_get_robust_list 305 sys_get_robust_list (int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
__NR_vmsplice 309 sys_vmsplice (int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags)
__NR_openat 288 sys_openat (int dfd, const char *filename, int flags, int mode)
+__NR_fallocate 314 sys_fallocate (int fd, int mode, loff_t offset, loff_t len)
__NR_timerfd_settime 320 sys_timerfd_settime (int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr)
__NR_signalfd4 322 sys_signalfd (int fd, k_rtsigset_t *mask, size_t sizemask, int flags)
__NR_rt_tgsigqueueinfo 330 sys_rt_tgsigqueueinfo (pid_t tgid, pid_t pid, int sig, siginfo_t *info)
diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
index 9e1de281..a6c55b83 100644
--- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
+++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
@@ -83,6 +83,7 @@ __NR_set_robust_list 311 sys_set_robust_list (struct robust_list_head *head, si
__NR_get_robust_list 312 sys_get_robust_list (int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
__NR_vmsplice 316 sys_vmsplice (int fd, const struct iovec *iov, unsigned int nr_segs, unsigned int flags)
__NR_signalfd 321 sys_signalfd (int ufd, const k_rtsigset_t *sigmask, size_t sigsetsize)
+__NR_fallocate 324 sys_fallocate (int fd, int mode, loff_t offset, loff_t len)
__NR_timerfd_settime 325 sys_timerfd_settime (int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr)
__NR_preadv 333 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h)
__NR_rt_tgsigqueueinfo 335 sys_rt_tgsigqueueinfo (pid_t tgid, pid_t pid, int sig, siginfo_t *uinfo)
diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
index 726fa797..64271514 100644
--- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
+++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
@@ -94,6 +94,7 @@ __NR_set_robust_list 273 sys_set_robust_list (struct robust_list_head *head, s
__NR_get_robust_list 274 sys_get_robust_list (int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
__NR_seccomp 317 sys_seccomp (unsigned int op, unsigned int flags, const char *uargs)
__NR_vmsplice 278 sys_vmsplice (int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags)
+__NR_fallocate 285 sys_fallocate (int fd, int mode, loff_t offset, loff_t len)
__NR_timerfd_settime 286 sys_timerfd_settime (int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr)
__NR_signalfd4 289 sys_signalfd (int fd, k_rtsigset_t *mask, size_t sizemask, int flags)
__NR_preadv 295 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h)
diff --git a/criu/mem.c b/criu/mem.c
index d020b7fd..c3d604a5 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -1271,7 +1271,8 @@ static int prepare_vma_ios(struct pstree_item *t, struct task_restore_args *ta)
{
struct cr_img *pages;
- pages = open_image(CR_FD_PAGES, O_RSTR, rsti(t)->pages_img_id);
+ pages = open_image(CR_FD_PAGES, opts.auto_dedup ? O_RDWR : O_RSTR,
+ rsti(t)->pages_img_id);
if (!pages)
return -1;
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index f990e9b7..3f1a8a6b 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -646,6 +646,15 @@ static unsigned long restore_mapping(VmaEntry *vma_entry)
!(vma_entry->status & VMA_NO_PROT_WRITE))
prot |= PROT_WRITE;
+ /* TODO: if the mapping had MAP_LOCKED bit set, then the mmap will
+ * cause immediate page-in and increase in process memory usage,
+ * thus defeating attempts to conserve memory by running fallocate after
+ * each preadv.
+ *
+ * This could be fixed by zeroing MAP_LOCKED bit here and restoring it
+ * after all the contents is already loaded and the tmpfs files released
+ * by fallocate.
+ */
pr_debug("\tmmap(%"PRIx64" -> %"PRIx64", %x %x %d)\n",
vma_entry->start, vma_entry->end,
prot, flags, (int)vma_entry->fd);
@@ -1367,6 +1376,13 @@ long __export_restore_task(struct task_restore_args *args)
}
pr_debug("`- returned %ld\n", (long)r);
+ /* TODO: Check if auto-dedup is enabled instead of trusting fallocate to fail
+ * if the file is not opened for writing. */
+ if (r > 0) {
+ pr_debug(" `fallocate %d %ld %ld\n", args->vma_ios_fd, rio->off, r);
+ sys_fallocate(args->vma_ios_fd, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE,
+ rio->off, r);
+ }
rio->off += r;
/* Advance the iovecs */
do {
--
2.18.0.203.gfac676dfb9-goog
More information about the CRIU
mailing list