[CRIU] [PATCH 1/5] bfd: File-descriptors based buffered read

Pavel Emelyanov xemul at parallels.com
Fri Sep 19 06:30:52 PDT 2014


This sounds strange, but we kinda need one. Here's the
justification for that.

We heavily open /proc/pid/foo files. To speed things up we
do pid_dir = open("/proc/pid") then openat(pid_dir, foo).
This really saves time on big trees, up to 10%.

Sometimes we need line-by-line scan of these files, and for
that we currently use the fdopen() call. It takes a file
descriptor (obtained with openat from above) and wraps one
into a FILE*.

The problem with the latter is that fdopen _always_ mmap()s
a buffer for reads and this buffer always (!) gets unmapped
back on fclose(). This pair of mmap() + munmap() eats time
on big trees, up to 10% in my experiments with p.haul tests.

The situation is made even worse by the fact that each fgets
on the file results in a new page allocated in the kernel
(since the mapping is new). And also this fgets copies data,
which is not big deal, but for e.g. smaps file this results
in ~8K bytes being just copied around.


Having said that, here's a small but fast way of reading a
descriptor line-by-line using big buffer for reducing the
amount of read()s.

After all per-task fopen_proc()-s get reworked on this engine
(next 4 patches) the results on p.haul test would be

        Syscall     Calls      Time (% of time)
Now:
           mmap:      463  0.012033 (3.2%)
         munmap:      447  0.014473 (3.9%)
Patched:
         munmap:       57  0.002106 (0.6%)
           mmap:       74  0.002286 (0.7%)

The amount of read()s and open()s doesn't change since FILE*
also uses page-sized buffer for reading.

Also this eliminates some amount of lseek()s and fstat()s
the fdopen() does every time to catch up with file position
and to determine what sort of buffering it should use (for
terminals it's \n-driven, for files it's not).

Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
---
 Makefile.crtools |   1 +
 bfd.c            | 156 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/bfd.h    |  18 +++++++
 3 files changed, 175 insertions(+)
 create mode 100644 bfd.c
 create mode 100644 include/bfd.h

diff --git a/Makefile.crtools b/Makefile.crtools
index 2a6d4d2..7f21d25 100644
--- a/Makefile.crtools
+++ b/Makefile.crtools
@@ -15,6 +15,7 @@ obj-y	+= cr-show.o
 obj-y	+= cr-check.o
 obj-y	+= cr-dedup.o
 obj-y	+= util.o
+obj-y	+= bfd.o
 obj-y	+= action-scripts.o
 obj-y	+= sysctl.o
 obj-y	+= ptrace.o
diff --git a/bfd.c b/bfd.c
new file mode 100644
index 0000000..2c1faae
--- /dev/null
+++ b/bfd.c
@@ -0,0 +1,156 @@
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#include "log.h"
+#include "bfd.h"
+
+/*
+ * Kernel doesn't produce more than one page of
+ * date per one read call on proc files.
+ */
+#define BUFSIZE	(4096)
+
+/*
+ * XXX currently CRIU doesn't open several files
+ * at once, so we can have just one buffer.
+ */
+static char *buf;
+static bool buf_busy;
+
+static int buf_get(struct xbuf *xb)
+{
+	if (buf_busy) {
+		pr_err("bfd: Buffer busy\n");
+		return -1;
+	}
+
+	if (!buf) {
+		buf = mmap(NULL, BUFSIZE, PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANON, 0, 0);
+		if (buf == MAP_FAILED) {
+			pr_perror("bfd: No buf");
+			return -1;
+		}
+	}
+
+	buf_busy = true;
+	xb->mem = buf;
+	xb->pos = buf;
+	xb->bleft = 0;
+	return 0;
+}
+
+static void buf_put(struct xbuf *xb)
+{
+	buf_busy = false;
+	/*
+	 * Don't unmap buffer back, it will get reused
+	 * by next bfdopen call
+	 */
+}
+
+int bfdopen(struct bfd *f)
+{
+	if (buf_get(&f->b)) {
+		close(f->fd);
+		return -1;
+	}
+
+	return 0;
+}
+
+void bclose(struct bfd *f)
+{
+	buf_put(&f->b);
+	close(f->fd);
+}
+
+static int brefill(struct bfd *f)
+{
+	int ret;
+	struct xbuf *b = &f->b;
+
+	memmove(b->mem, b->pos, b->bleft);
+	b->pos = b->mem;
+
+	ret = read(f->fd, b->mem + b->bleft, BUFSIZE - b->bleft);
+	if (ret < 0) {
+		pr_perror("bfd: Error reading file");
+		return -1;
+	}
+
+	if (ret == 0)
+		return 0;
+
+	b->bleft += ret;
+	return 0;
+}
+
+static char *strnchr(char *str, unsigned int len, char c)
+{
+	while (len > 0 && *str != c) {
+		str++;
+		len--;
+	}
+
+	return len == 0 ? NULL : str;
+}
+
+char *breadline(struct bfd *f)
+{
+	struct xbuf *b = &f->b;
+	bool refilled = false;
+	char *n;
+	unsigned int ss = 0;
+
+again:
+	n = strnchr(b->pos + ss, b->bleft - ss, '\n');
+	if (n) {
+		char *ret;
+
+		ret = b->pos;
+		b->pos = n + 1; /* skip the \n found */
+		*n = '\0';
+		b->bleft -= (b->pos - ret);
+		return ret;
+	}
+
+	if (refilled) {
+		if (!b->bleft)
+			return NULL;
+
+		/*
+		 * Last bytes may lack the \n at the
+		 * end, need to report this as full
+		 * line anyway
+		 */
+		b->pos[b->bleft] = '\0';
+
+		/*
+		 * The b->pos still points to old data,
+		 * but we say that no bytes left there
+		 * so next call to breadline will not
+		 * "find" these bytes again.
+		 */
+		b->bleft = 0;
+		return b->pos;
+	}
+
+	/* no full line in the buffer -- refill one */
+	if (brefill(f))
+		return BREADERR;
+
+	/*
+	 * small optimization -- we've scanned b->bleft
+	 * symols already, no need to re-scan them after
+	 * the buffer refill.
+	 */
+	ss = b->bleft;
+	refilled = true;
+
+	goto again;
+}
diff --git a/include/bfd.h b/include/bfd.h
new file mode 100644
index 0000000..cea6d3a
--- /dev/null
+++ b/include/bfd.h
@@ -0,0 +1,18 @@
+#ifndef __CR_BFD_H__
+#define __CR_BFD_H__
+struct xbuf {
+	char *mem;		/* buffer */
+	char *pos;		/* position we see bytes at */
+	unsigned int bleft;	/* bytes left after b->pos */
+};
+
+struct bfd {
+	int fd;
+	struct xbuf b;
+};
+
+#define BREADERR	((char *)-1)
+int bfdopen(struct bfd *f);
+void bclose(struct bfd *f);
+char *breadline(struct bfd *f);
+#endif
-- 
1.8.4.2



More information about the CRIU mailing list