[CRIU] [PATCH 6/7] irmap: Introduce irmap on-disk cache
Pavel Emelyanov
xemul at parallels.com
Thu Jan 30 02:24:37 PST 2014
When dumping fsnotifies we may go to irmap to get inode->path
mapping. The irmap engine scans FS (in hinted locations) to
get one and it is slow even though we scan only part of the FS.
Since the above scanning is done while tasks are frozen the
freeze time goes up :(
Improve the situation by generating irmap cache in working dir
at pre-dump when tasks get unfrozen.
The on-disk irmap cache is PB file, it sits in -W directory
and can be loaded on dump/pre-dump start in memory. When
resolving the inode->path mapping irmap may meet these entries,
revalidate them and potentially save time.
After pre-dump the (re-)collected irmap data is written back
to irmap cache image. Typically entries written back are the
same read in on cache load.
Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
---
cr-dump.c | 10 ++++
fsnotify.c | 13 ++++--
include/irmap.h | 7 +++
irmap.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 168 insertions(+), 3 deletions(-)
diff --git a/cr-dump.c b/cr-dump.c
index 2c77810..b407536 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -71,6 +71,7 @@
#include "vma.h"
#include "cr-service.h"
#include "plugin.h"
+#include "irmap.h"
#include "asm/dump.h"
@@ -1614,6 +1615,9 @@ int cr_pre_dump_tasks(pid_t pid)
if (kerndat_init())
goto err;
+ if (irmap_load_cache())
+ goto err;
+
if (cpu_init())
goto err;
@@ -1671,6 +1675,9 @@ err:
parasite_cure_local(ctl);
}
+ if (irmap_predump_run())
+ ret = -1;
+
if (disconnect_from_page_server())
ret = -1;
@@ -1703,6 +1710,9 @@ int cr_dump_tasks(pid_t pid)
if (kerndat_init())
goto err;
+ if (irmap_load_cache())
+ goto err;
+
if (cpu_init())
goto err;
diff --git a/fsnotify.c b/fsnotify.c
index 7464494..a9412a0 100644
--- a/fsnotify.c
+++ b/fsnotify.c
@@ -124,7 +124,7 @@ out:
return fd;
}
-static int check_open_handle(unsigned int s_dev, unsigned long i_ino,
+int check_open_handle(unsigned int s_dev, unsigned long i_ino,
FhEntry *f_handle)
{
int fd;
@@ -183,7 +183,8 @@ static int dump_one_inotify(int lfd, u32 id, const struct fd_parms *p)
static int pre_dump_inotify_entry(union fdinfo_entries *e, void *arg)
{
- return 0;
+ InotifyWdEntry *we = &e->ify;
+ return irmap_queue_cache(we->s_dev, we->i_ino, we->f_handle);
}
static int pre_dump_one_inotify(int pid, int lfd)
@@ -262,7 +263,13 @@ static int dump_one_fanotify(int lfd, u32 id, const struct fd_parms *p)
static int pre_dump_fanotify_entry(union fdinfo_entries *e, void *arg)
{
- return 0;
+ FanotifyMarkEntry *fme = &e->ffy;
+
+ if (fme->type == MARK_TYPE__INODE)
+ return irmap_queue_cache(fme->s_dev, fme->ie->i_ino,
+ fme->ie->f_handle);
+ else
+ return 0;
}
static int pre_dump_one_fanotify(int pid, int lfd)
diff --git a/include/irmap.h b/include/irmap.h
index 25f830b..b5b495d 100644
--- a/include/irmap.h
+++ b/include/irmap.h
@@ -1,4 +1,11 @@
#ifndef __CR_IRMAP__H__
#define __CR_IRMAP__H__
char *irmap_lookup(unsigned int s_dev, unsigned long i_ino);
+struct _FhEntry;
+int irmap_queue_cache(unsigned int dev, unsigned long ino,
+ struct _FhEntry *fh);
+int irmap_predump_run(void);
+int check_open_handle(unsigned int s_dev, unsigned long i_ino,
+ struct _FhEntry *f_handle);
+int irmap_load_cache(void);
#endif
diff --git a/irmap.c b/irmap.c
index 7d326c4..367e082 100644
--- a/irmap.c
+++ b/irmap.c
@@ -21,6 +21,11 @@
#include "mount.h"
#include "log.h"
#include "util.h"
+#include "image.h"
+
+#include "protobuf.h"
+#include "protobuf/fsnotify.pb-c.h"
+#include "protobuf/fh.pb-c.h"
#undef LOG_PREFIX
#define LOG_PREFIX "irmap: "
@@ -235,3 +240,139 @@ char *irmap_lookup(unsigned int s_dev, unsigned long i_ino)
out:
return path;
}
+
+/*
+ * IRMAP pre-cache -- do early irmap scan on pre-dump to reduce
+ * the freeze time on dump
+ */
+
+struct irmap_predump {
+ unsigned int dev;
+ unsigned long ino;
+ FhEntry fh;
+ struct irmap_predump *next;
+};
+
+static struct irmap_predump *predump_queue;
+
+int irmap_queue_cache(unsigned int dev, unsigned long ino,
+ FhEntry *fh)
+{
+ struct irmap_predump *ip;
+
+ ip = xmalloc(sizeof(*ip));
+ if (!ip)
+ return -1;
+
+ ip->dev = dev;
+ ip->ino = ino;
+ ip->fh = *fh;
+
+ pr_debug("Queue %x:%lx for pre-dump\n", dev, ino);
+
+ ip->next = predump_queue;
+ predump_queue = ip;
+ return 0;
+}
+
+int irmap_predump_run(void)
+{
+ int ret = 0, fd;
+ struct irmap_predump *ip;
+
+ fd = open_image_at(AT_FDCWD, CR_FD_IRMAP_CACHE, O_DUMP);
+ if (fd < 0)
+ return -1;
+
+ pr_info("Running irmap pre-dump\n");
+
+ for (ip = predump_queue; ip; ip = ip->next) {
+ pr_debug("\tchecking %x:%lx\n", ip->dev, ip->ino);
+ ret = check_open_handle(ip->dev, ip->ino, &ip->fh);
+ if (ret)
+ break;
+
+ if (ip->fh.path) {
+ IrmapCacheEntry ic = IRMAP_CACHE_ENTRY__INIT;
+
+ pr_info("Irmap cache %x:%lx -> %s\n", ip->dev, ip->ino, ip->fh.path);
+ ic.dev = ip->dev;
+ ic.inode = ip->ino;
+ ic.path = ip->fh.path;
+
+ ret = pb_write_one(fd, &ic, PB_IRMAP_CACHE);
+ if (ret)
+ break;
+ }
+ }
+
+ close(fd);
+ return ret;
+}
+
+static int irmap_cache_one(IrmapCacheEntry *ie)
+{
+ struct irmap *ic;
+ unsigned hv;
+
+ ic = xmalloc(sizeof(*ic));
+ if (!ic)
+ return -1;
+
+ ic->dev = ie->dev;
+ ic->ino = ie->inode;
+ ic->path = xstrdup(ie->path);
+ if (!ie->path) {
+ xfree(ic);
+ return -1;
+ }
+
+ ic->nr_kids = 0;
+ /*
+ * We've loaded entry from cache, thus we'll need to check
+ * whether it's still valid when find it in cache.
+ */
+ ic->revalidate = true;
+
+ pr_debug("Pre-cache %x:%lx -> %s\n", ic->dev, ic->ino, ic->path);
+
+ hv = irmap_hashfn(ic->dev, ic->ino);
+ ic->next = cache[hv];
+ cache[hv] = ic;
+
+ return 0;
+}
+
+int irmap_load_cache(void)
+{
+ int fd, ret;
+
+ fd = open_image_at(AT_FDCWD, CR_FD_IRMAP_CACHE, O_RSTR);
+ if (fd < 0) {
+ if (errno == ENOENT) {
+ pr_info("No irmap cache\n");
+ return 0;
+ }
+
+ return -1;
+ }
+
+ pr_info("Loading irmap cache\n");
+ while (1) {
+ IrmapCacheEntry *ic;
+
+ ret = pb_read_one_eof(fd, &ic, PB_IRMAP_CACHE);
+ if (ret <= 0)
+ break;
+
+ ret = irmap_cache_one(ic);
+ if (ret < 0)
+ break;
+
+ irmap_cache_entry__free_unpacked(ic, NULL);
+ }
+
+ close(fd);
+ return ret;
+
+}
--
1.8.4.2
More information about the CRIU
mailing list