[CRIU] [PATCH 1/5] bfd: File-descriptors based buffered read

Ruslan Kuprieiev kupruser at gmail.com
Fri Sep 19 06:57:47 PDT 2014


On 19.09.2014 16:30, Pavel Emelyanov wrote:
> This sounds strange, but we kinda need one. Here's the
> justification for that.
>
> We heavily open /proc/pid/foo files. To speed things up we
> do pid_dir = open("/proc/pid") then openat(pid_dir, foo).
> This really saves time on big trees, up to 10%.
>
> Sometimes we need line-by-line scan of these files, and for
> that we currently use the fdopen() call. It takes a file
> descriptor (obtained with openat from above) and wraps one
> into a FILE*.
>
> The problem with the latter is that fdopen _always_ mmap()s
> a buffer for reads and this buffer always (!) gets unmapped
> back on fclose(). This pair of mmap() + munmap() eats time
> on big trees, up to 10% in my experiments with p.haul tests.
>
> The situation is made even worse by the fact that each fgets
> on the file results in a new page allocated in the kernel
> (since the mapping is new). And also this fgets copies data,
> which is not big deal, but for e.g. smaps file this results
> in ~8K bytes being just copied around.
>
>
> Having said that, here's a small but fast way of reading a
> descriptor line-by-line using big buffer for reducing the
> amount of read()s.
>
> After all per-task fopen_proc()-s get reworked on this engine
> (next 4 patches) the results on p.haul test would be
>
>          Syscall     Calls      Time (% of time)
> Now:
>             mmap:      463  0.012033 (3.2%)
>           munmap:      447  0.014473 (3.9%)
> Patched:
>           munmap:       57  0.002106 (0.6%)
>             mmap:       74  0.002286 (0.7%)
>
> The amount of read()s and open()s doesn't change since FILE*
> also uses page-sized buffer for reading.
>
> Also this eliminates some amount of lseek()s and fstat()s
> the fdopen() does every time to catch up with file position
> and to determine what sort of buffering it should use (for
> terminals it's \n-driven, for files it's not).
>
> Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
> ---
>   Makefile.crtools |   1 +
>   bfd.c            | 156 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   include/bfd.h    |  18 +++++++
>   3 files changed, 175 insertions(+)
>   create mode 100644 bfd.c
>   create mode 100644 include/bfd.h
>
> diff --git a/Makefile.crtools b/Makefile.crtools
> index 2a6d4d2..7f21d25 100644
> --- a/Makefile.crtools
> +++ b/Makefile.crtools
> @@ -15,6 +15,7 @@ obj-y	+= cr-show.o
>   obj-y	+= cr-check.o
>   obj-y	+= cr-dedup.o
>   obj-y	+= util.o
> +obj-y	+= bfd.o
>   obj-y	+= action-scripts.o
>   obj-y	+= sysctl.o
>   obj-y	+= ptrace.o
> diff --git a/bfd.c b/bfd.c
> new file mode 100644
> index 0000000..2c1faae
> --- /dev/null
> +++ b/bfd.c
> @@ -0,0 +1,156 @@
> +#include <unistd.h>
> +#include <stdbool.h>
> +#include <string.h>
> +#include <stdio.h>
> +#include <sys/mman.h>
> +#include <fcntl.h>
> +
> +#include "log.h"
> +#include "bfd.h"
> +
> +/*
> + * Kernel doesn't produce more than one page of
> + * date per one read call on proc files.
> + */
> +#define BUFSIZE	(4096)
> +
> +/*
> + * XXX currently CRIU doesn't open several files
> + * at once, so we can have just one buffer.
> + */
> +static char *buf;
> +static bool buf_busy;
> +

Why not just store flags(like buf_busy) and buffers inside xbuf structure?

> +static int buf_get(struct xbuf *xb)
> +{
> +	if (buf_busy) {
> +		pr_err("bfd: Buffer busy\n");
> +		return -1;
> +	}
> +
> +	if (!buf) {
> +		buf = mmap(NULL, BUFSIZE, PROT_READ | PROT_WRITE,
> +				MAP_PRIVATE | MAP_ANON, 0, 0);
> +		if (buf == MAP_FAILED) {
> +			pr_perror("bfd: No buf");
> +			return -1;
> +		}
> +	}
> +
> +	buf_busy = true;
> +	xb->mem = buf;
> +	xb->pos = buf;
> +	xb->bleft = 0;
> +	return 0;
> +}
> +
> +static void buf_put(struct xbuf *xb)
> +{
> +	buf_busy = false;
> +	/*
> +	 * Don't unmap buffer back, it will get reused
> +	 * by next bfdopen call
> +	 */
> +}
> +
> +int bfdopen(struct bfd *f)
> +{
> +	if (buf_get(&f->b)) {
> +		close(f->fd);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +void bclose(struct bfd *f)
> +{
> +	buf_put(&f->b);
> +	close(f->fd);
> +}
> +
> +static int brefill(struct bfd *f)
> +{
> +	int ret;
> +	struct xbuf *b = &f->b;
> +
> +	memmove(b->mem, b->pos, b->bleft);
> +	b->pos = b->mem;
> +
> +	ret = read(f->fd, b->mem + b->bleft, BUFSIZE - b->bleft);
> +	if (ret < 0) {
> +		pr_perror("bfd: Error reading file");
> +		return -1;
> +	}
> +
> +	if (ret == 0)
> +		return 0;
> +
> +	b->bleft += ret;
> +	return 0;
> +}
> +
> +static char *strnchr(char *str, unsigned int len, char c)
> +{
> +	while (len > 0 && *str != c) {
> +		str++;
> +		len--;
> +	}
> +
> +	return len == 0 ? NULL : str;
> +}
> +
> +char *breadline(struct bfd *f)
> +{
> +	struct xbuf *b = &f->b;
> +	bool refilled = false;
> +	char *n;
> +	unsigned int ss = 0;
> +
> +again:
> +	n = strnchr(b->pos + ss, b->bleft - ss, '\n');
> +	if (n) {
> +		char *ret;
> +
> +		ret = b->pos;
> +		b->pos = n + 1; /* skip the \n found */
> +		*n = '\0';
> +		b->bleft -= (b->pos - ret);
> +		return ret;
> +	}
> +
> +	if (refilled) {
> +		if (!b->bleft)
> +			return NULL;
> +
> +		/*
> +		 * Last bytes may lack the \n at the
> +		 * end, need to report this as full
> +		 * line anyway
> +		 */
> +		b->pos[b->bleft] = '\0';
> +
> +		/*
> +		 * The b->pos still points to old data,
> +		 * but we say that no bytes left there
> +		 * so next call to breadline will not
> +		 * "find" these bytes again.
> +		 */
> +		b->bleft = 0;
> +		return b->pos;
> +	}
> +
> +	/* no full line in the buffer -- refill one */
> +	if (brefill(f))
> +		return BREADERR;
> +
> +	/*
> +	 * small optimization -- we've scanned b->bleft
> +	 * symols already, no need to re-scan them after
> +	 * the buffer refill.
> +	 */
> +	ss = b->bleft;
> +	refilled = true;
> +
> +	goto again;
> +}
> diff --git a/include/bfd.h b/include/bfd.h
> new file mode 100644
> index 0000000..cea6d3a
> --- /dev/null
> +++ b/include/bfd.h
> @@ -0,0 +1,18 @@
> +#ifndef __CR_BFD_H__
> +#define __CR_BFD_H__
> +struct xbuf {
> +	char *mem;		/* buffer */
> +	char *pos;		/* position we see bytes at */
> +	unsigned int bleft;	/* bytes left after b->pos */
> +};
> +
> +struct bfd {
> +	int fd;
> +	struct xbuf b;
> +};
> +
> +#define BREADERR	((char *)-1)
> +int bfdopen(struct bfd *f);
> +void bclose(struct bfd *f);
> +char *breadline(struct bfd *f);
> +#endif



More information about the CRIU mailing list