[CRIU] [PATCH 02/10] fdstore: add a storage for file descriptors (v2)
Pavel Emelyanov
xemul at virtuozzo.com
Mon Feb 13 05:01:49 PST 2017
On 02/13/2017 08:49 AM, Andrei Vagin wrote:
> From: Andrei Vagin <avagin at virtuozzo.com>
>
> We need a storage for file descriptors which is shared between processes
> and doesn't use a lot of file descriptors. We are going to use it on
> restore and if it will use file descriptors, we will have to find
> descriptors which don't used by all restored processes to not confilict
> with their descriptors.
>
> There are two solutions. The first one is a service (process) which
> handles to command push_fd(id, fd) and pop_fd(id, fd).
>
> Another solution is to save descriptros in a unix socket. It requires
> only one extra descriptor which we can register as a service fd. Each
> unix socket has a buffer and can fit a number of file descriptros. We
> can use SK_PEEK_OFF and MSG_PEEK to get file descriptros from a socket
> as many times as we need.
>
> This patch implements the second solution.
>
> v2: call recvmsg with MSG_PEEK
v3: Add synchronization?
Anyway, would you please rebase this on currend criu-dev, it already has
the fdstore patch merged in its v2 incarnation :)
> Signed-off-by: Andrei Vagin <avagin at virtuozzo.com>
> ---
> criu/Makefile.crtools | 1 +
> criu/cr-restore.c | 6 +++
> criu/fdstore.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++
> criu/include/fdstore.h | 17 +++++++
> criu/include/servicefd.h | 1 +
> 5 files changed, 143 insertions(+)
> create mode 100644 criu/fdstore.c
> create mode 100644 criu/include/fdstore.h
>
> diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
> index e095d01..afb22c5 100644
> --- a/criu/Makefile.crtools
> +++ b/criu/Makefile.crtools
> @@ -81,6 +81,7 @@ obj-y += uts_ns.o
> obj-y += path.o
> obj-y += autofs.o
> obj-y += uffd.o
> +obj-y += fdstore.o
>
> ifeq ($(VDSO),y)
> obj-y += pie-util-vdso.o
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 7ccb136..07b7ae0 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -78,6 +78,7 @@
> #include "fault-injection.h"
> #include "sk-queue.h"
> #include "sigframe.h"
> +#include "fdstore.h"
>
> #include "parasite-syscall.h"
> #include "files-reg.h"
> @@ -1396,6 +1397,10 @@ static int restore_task_with_children(void *_arg)
>
> /* Restore root task */
> if (current->parent == NULL) {
> +
> + if (fdstore_init())
> + goto err;
> +
> if (join_namespaces()) {
> pr_perror("Join namespaces failed");
> goto err;
> @@ -3194,6 +3199,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
> close_proc();
> close_service_fd(ROOT_FD_OFF);
> close_service_fd(USERNSD_SK);
> + close_service_fd(FDSTORE_SK_OFF);
>
> __gcov_flush();
>
> diff --git a/criu/fdstore.c b/criu/fdstore.c
> new file mode 100644
> index 0000000..d9bed4d
> --- /dev/null
> +++ b/criu/fdstore.c
> @@ -0,0 +1,118 @@
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <sys/stat.h>
> +#include <unistd.h>
> +#include <stdio.h>
> +
> +#include "common/scm.h"
> +#include "common/lock.h"
> +#include "servicefd.h"
> +#include "fdstore.h"
> +#include "xmalloc.h"
> +#include "rst-malloc.h"
> +#include "log.h"
> +
> +static struct fdstore_desc {
> + int next_id;
> + mutex_t lock; /* to protect a peek offset */
> +} *desc;
> +
> +int fdstore_init(void)
> +{
> + struct sockaddr_un addr;
> + unsigned int addrlen;
> + struct stat st;
> + int sk, ret;
> +
> + desc = shmalloc(sizeof(*desc));
> + if (!desc)
> + return -1;
> +
> + desc->next_id = 0;
> + mutex_init(&desc->lock);
> +
> + sk = socket(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0);
> + if (sk < 0) {
> + pr_perror("Unable to create a socket");
> + return -1;
> + }
> +
> + if (fstat(sk, &st)) {
> + pr_perror("Unable to stat a file descriptor");
> + close(sk);
> + return -1;
> + }
> +
> + addr.sun_family = AF_UNIX;
> + addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%"PRIx64, st.st_ino);
> + addrlen += sizeof(addr.sun_family);
> +
> + addr.sun_path[0] = 0;
> +
> + /*
> + * This socket is connected to itself, so all messages are queued to
> + * its receive queue. Here we are going to use this socket to store
> + * file descriptors. For that we need to send a file descriptor in
> + * a queue and remeber its sequence number. Then we can set SO_PEEK_OFF
> + * to get a file descriptor without dequeuing it.
> + */
> + if (bind(sk, (struct sockaddr *) &addr, addrlen)) {
> + pr_perror("Unable to bind a socket");
> + close(sk);
> + return -1;
> + }
> + if (connect(sk, (struct sockaddr *) &addr, addrlen)) {
> + pr_perror("Unable to connect a socket");
> + close(sk);
> + return -1;
> + }
> +
> + ret = install_service_fd(FDSTORE_SK_OFF, sk);
> + close(sk);
> + if (ret < 0)
> + return -1;
> +
> + return 0;
> +}
> +
> +int fdstore_add(int fd)
> +{
> + int sk = get_service_fd(FDSTORE_SK_OFF);
> + int id;
> +
> + mutex_lock(&desc->lock);
> +
> + if (send_fd(sk, NULL, 0, fd)) {
> + mutex_unlock(&desc->lock);
> + return -1;
> + }
> +
> + id = desc->next_id++;
> +
> + mutex_unlock(&desc->lock);
> +
> + return id;
> +}
> +
> +int fdstore_get(int id)
> +{
> + int sk = get_service_fd(FDSTORE_SK_OFF);
> + int fd;
> +
> + mutex_lock(&desc->lock);
> + if (setsockopt(sk, SOL_SOCKET, SO_PEEK_OFF, &id, sizeof(id))) {
> + mutex_unlock(&desc->lock);
> + pr_perror("Unable to a peek offset");
> + return -1;
> + }
> +
> + if (__recv_fds(sk, &fd, 1, NULL, 0, MSG_PEEK) < 0) {
> + mutex_unlock(&desc->lock);
> + pr_perror("Unable to get a file descriptor with the %d id", id);
> + return -1;
> + }
> + mutex_unlock(&desc->lock);
> +
> + return fd;
> +}
> diff --git a/criu/include/fdstore.h b/criu/include/fdstore.h
> new file mode 100644
> index 0000000..bdfb5fe
> --- /dev/null
> +++ b/criu/include/fdstore.h
> @@ -0,0 +1,17 @@
> +#ifndef __CRIU_FDSTORE_H__
> +#define __CRIU_FDSTORE_H__
> +
> +/*
> + * fdstore is a storage for file descriptors which is shared
> + * between processes.
> + */
> +
> +int fdstore_init(void);
> +
> +/* Add a file descriptor to the storage and return its id */
> +int fdstore_add(int fd);
> +
> +/* Get a file descriptor from a storage by id */
> +int fdstore_get(int id);
> +
> +#endif
> diff --git a/criu/include/servicefd.h b/criu/include/servicefd.h
> index 5152fb6..c070480 100644
> --- a/criu/include/servicefd.h
> +++ b/criu/include/servicefd.h
> @@ -21,6 +21,7 @@ enum sfd_type {
> NS_FD_OFF, /* Node's net namespace fd */
> TRANSPORT_FD_OFF, /* to transfer file descriptors */
> LAZY_PAGES_SK_OFF, /* socket for communication with lazy-pages daemon */
> + FDSTORE_SK_OFF, /* socket to store file descriptors */
>
> SERVICE_FD_MAX
> };
>
More information about the CRIU
mailing list