[CRIU] [PATCH 1/3] sfd: Move service fds to separate file
Cyrill Gorcunov
gorcunov at gmail.com
Wed Apr 3 16:22:00 MSK 2019
It has nothing to do with utils but
rather a separate service engine.
Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
criu/Makefile.crtools | 1 +
criu/include/servicefd.h | 27 ++---
criu/include/util.h | 2 +
criu/plugin.c | 6 +
criu/servicefd.c | 251 +++++++++++++++++++++++++++++++++++++++
criu/util.c | 239 -------------------------------------
6 files changed, 273 insertions(+), 253 deletions(-)
create mode 100644 criu/servicefd.c
diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools
index c57369514410..b598f0ef58d8 100644
--- a/criu/Makefile.crtools
+++ b/criu/Makefile.crtools
@@ -87,6 +87,7 @@ obj-y += autofs.o
obj-y += fdstore.o
obj-y += uffd.o
obj-y += config.o
+obj-y += servicefd.o
ifeq ($(VDSO),y)
obj-y += pie-util-vdso.o
diff --git a/criu/include/servicefd.h b/criu/include/servicefd.h
index bddf71c09d84..f8c2814092b7 100644
--- a/criu/include/servicefd.h
+++ b/criu/include/servicefd.h
@@ -8,17 +8,17 @@ enum sfd_type {
LOG_FD_OFF,
IMG_FD_OFF,
- PROC_FD_OFF, /* fd with /proc for all proc_ calls */
+ PROC_FD_OFF, /* fd with /proc for all proc_ calls */
PROC_PID_FD_OFF,
- CR_PROC_FD_OFF, /* some other's proc fd.
- * For dump -- target ns' proc
- * For restore -- CRIU ns' proc
- */
- ROOT_FD_OFF, /* Root of the namespace we dump/restore */
+ CR_PROC_FD_OFF, /* some other's proc fd:
+ * - For dump -- target ns' proc
+ * - For restore -- CRIU ns' proc
+ */
+ ROOT_FD_OFF, /* Root of the namespace we dump/restore */
CGROUP_YARD,
- USERNSD_SK, /* Socket for usernsd */
- NS_FD_OFF, /* Node's net namespace fd */
- TRANSPORT_FD_OFF, /* to transfer file descriptors */
+ USERNSD_SK, /* Socket for usernsd */
+ NS_FD_OFF, /* Node's net namespace fd */
+ TRANSPORT_FD_OFF, /* to transfer file descriptors */
RPC_SK_OFF,
FDSTORE_SK_OFF,
@@ -28,14 +28,13 @@ enum sfd_type {
struct pstree_item;
extern bool sfds_protected;
-extern void set_proc_self_fd(int fd);
-extern int clone_service_fd(struct pstree_item *me);
extern int init_service_fd(void);
extern int get_service_fd(enum sfd_type type);
+extern bool is_any_service_fd(int fd);
+extern bool is_service_fd(int fd, enum sfd_type type);
+extern int service_fd_min_fd(struct pstree_item *item);
extern int install_service_fd(enum sfd_type type, int fd);
extern int close_service_fd(enum sfd_type type);
-extern bool is_service_fd(int fd, enum sfd_type type);
-extern bool is_any_service_fd(int fd);
-extern int service_fd_min_fd(struct pstree_item *);
+extern int clone_service_fd(struct pstree_item *me);
#endif /* __CR_SERVICE_FD_H__ */
diff --git a/criu/include/util.h b/criu/include/util.h
index d4cb3568dc1b..aa1cbd9eeee2 100644
--- a/criu/include/util.h
+++ b/criu/include/util.h
@@ -180,6 +180,8 @@ extern int cr_daemon(int nochdir, int noclose, int *keep_fd, int close_fd);
extern int close_status_fd(void);
extern int is_root_user(void);
+extern void set_proc_self_fd(int fd);
+
static inline bool dir_dots(const struct dirent *de)
{
return !strcmp(de->d_name, ".") || !strcmp(de->d_name, "..");
diff --git a/criu/plugin.c b/criu/plugin.c
index 0db862924328..b97d3763a972 100644
--- a/criu/plugin.c
+++ b/criu/plugin.c
@@ -10,6 +10,7 @@
#include "common/compiler.h"
#include "xmalloc.h"
#include "plugin.h"
+#include "servicefd.h"
#include "common/list.h"
#include "log.h"
@@ -90,6 +91,11 @@ static int verify_plugin(cr_plugin_desc_t *d)
return 0;
}
+int criu_get_image_dir(void)
+{
+ return get_service_fd(IMG_FD_OFF);
+}
+
static int cr_lib_load(int stage, char *path)
{
cr_plugin_desc_t *d;
diff --git a/criu/servicefd.c b/criu/servicefd.c
new file mode 100644
index 000000000000..a9909735af44
--- /dev/null
+++ b/criu/servicefd.c
@@ -0,0 +1,251 @@
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sched.h>
+
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <sys/resource.h>
+
+#include "common/compiler.h"
+#include "common/list.h"
+
+#include "criu-log.h"
+
+#include "util.h"
+#include "bitops.h"
+#include "pstree.h"
+#include "files.h"
+#include "rst_info.h"
+#include "servicefd.h"
+
+#undef LOG_PREFIX
+#define LOG_PREFIX "sfd: "
+
+/* Max potentially possible fd to be open by criu process */
+int service_fd_rlim_cur;
+
+/* Base of current process service fds set */
+static int service_fd_base;
+
+/* Id of current process in shared fdt */
+static int service_fd_id = 0;
+
+static DECLARE_BITMAP(sfd_map, SERVICE_FD_MAX);
+static int sfd_arr[SERVICE_FD_MAX];
+/*
+ * Variable for marking areas of code, where service fds modifications
+ * are prohibited. It's used to safe them from reusing their numbers
+ * by ordinary files. See install_service_fd() and close_service_fd().
+ */
+bool sfds_protected = false;
+
+int init_service_fd(void)
+{
+ struct rlimit64 rlimit;
+
+ /*
+ * Service FDs are those that most likely won't
+ * conflict with any 'real-life' ones
+ */
+
+ if (syscall(__NR_prlimit64, getpid(), RLIMIT_NOFILE, NULL, &rlimit)) {
+ pr_perror("Can't get rlimit");
+ return -1;
+ }
+
+ service_fd_rlim_cur = (int)rlimit.rlim_cur;
+ return 0;
+}
+
+static int __get_service_fd(enum sfd_type type, int service_fd_id)
+{
+ return service_fd_base - type - SERVICE_FD_MAX * service_fd_id;
+}
+
+int get_service_fd(enum sfd_type type)
+{
+ BUG_ON((int)type <= SERVICE_FD_MIN || (int)type >= SERVICE_FD_MAX);
+
+ if (!test_bit(type, sfd_map))
+ return -1;
+
+ if (service_fd_base == 0)
+ return sfd_arr[type];
+
+ return __get_service_fd(type, service_fd_id);
+}
+
+bool is_any_service_fd(int fd)
+{
+ int sfd_min_fd = __get_service_fd(SERVICE_FD_MAX, service_fd_id);
+ int sfd_max_fd = __get_service_fd(SERVICE_FD_MIN, service_fd_id);
+
+ if (fd > sfd_min_fd && fd < sfd_max_fd) {
+ int type = SERVICE_FD_MAX - (fd - sfd_min_fd);
+ if (type > SERVICE_FD_MIN && type < SERVICE_FD_MAX)
+ return !!test_bit(type, sfd_map);
+ }
+
+ return false;
+}
+
+bool is_service_fd(int fd, enum sfd_type type)
+{
+ return fd == get_service_fd(type);
+}
+
+int service_fd_min_fd(struct pstree_item *item)
+{
+ struct fdt *fdt = rsti(item)->fdt;
+ int id = 0;
+
+ if (fdt)
+ id = fdt->nr - 1;
+ return service_fd_rlim_cur - (SERVICE_FD_MAX - 1) - SERVICE_FD_MAX * id;
+}
+
+static void sfds_protection_bug(enum sfd_type type)
+{
+ pr_err("Service fd %u is being modified in protected context\n", type);
+ print_stack_trace(current ? vpid(current) : 0);
+ BUG();
+}
+
+int install_service_fd(enum sfd_type type, int fd)
+{
+ int sfd = __get_service_fd(type, service_fd_id);
+
+ BUG_ON((int)type <= SERVICE_FD_MIN || (int)type >= SERVICE_FD_MAX);
+ if (sfds_protected && !test_bit(type, sfd_map))
+ sfds_protection_bug(type);
+
+ if (service_fd_base == 0) {
+ if (test_bit(type, sfd_map))
+ close(sfd_arr[type]);
+ sfd_arr[type] = fd;
+ set_bit(type, sfd_map);
+ return fd;
+ }
+
+ if (dup3(fd, sfd, O_CLOEXEC) != sfd) {
+ pr_perror("Dup %d -> %d failed", fd, sfd);
+ close(fd);
+ return -1;
+ }
+
+ set_bit(type, sfd_map);
+ close(fd);
+ return sfd;
+}
+
+int close_service_fd(enum sfd_type type)
+{
+ int fd;
+
+ if (sfds_protected)
+ sfds_protection_bug(type);
+
+ fd = get_service_fd(type);
+ if (fd < 0)
+ return 0;
+
+ if (close_safe(&fd))
+ return -1;
+
+ clear_bit(type, sfd_map);
+ return 0;
+}
+
+static void move_service_fd(struct pstree_item *me, int type, int new_id, int new_base)
+{
+ int old = get_service_fd(type);
+ int new = new_base - type - SERVICE_FD_MAX * new_id;
+ int ret;
+
+ if (old < 0)
+ return;
+
+ ret = dup2(old, new);
+ if (ret == -1) {
+ if (errno != EBADF)
+ pr_perror("Unable to clone %d->%d", old, new);
+ } else if (!(rsti(me)->clone_flags & CLONE_FILES))
+ close(old);
+}
+
+static int choose_service_fd_base(struct pstree_item *me)
+{
+ int nr, real_nr, fdt_nr = 1, id = rsti(me)->service_fd_id;
+
+ if (rsti(me)->fdt) {
+ /* The base is set by owner of fdt (id 0) */
+ if (id != 0)
+ return service_fd_base;
+ fdt_nr = rsti(me)->fdt->nr;
+ }
+ /* Now find process's max used fd number */
+ if (!list_empty(&rsti(me)->fds))
+ nr = list_entry(rsti(me)->fds.prev,
+ struct fdinfo_list_entry, ps_list)->fe->fd;
+ else
+ nr = -1;
+
+ nr = max(nr, inh_fd_max);
+ /*
+ * Service fds go after max fd near right border of alignment:
+ *
+ * ...|max_fd|max_fd+1|...|sfd first|...|sfd last (aligned)|
+ *
+ * So, they take maximum numbers of area allocated by kernel.
+ * See linux alloc_fdtable() for details.
+ */
+ nr += (SERVICE_FD_MAX - SERVICE_FD_MIN) * fdt_nr;
+ nr += 16; /* Safety pad */
+ real_nr = nr;
+
+ nr /= (1024 / sizeof(void *));
+ if (nr)
+ nr = 1 << (32 - __builtin_clz(nr));
+ else
+ nr = 1;
+ nr *= (1024 / sizeof(void *));
+
+ if (nr > service_fd_rlim_cur) {
+ /* Right border is bigger, than rlim. OK, then just aligned value is enough */
+ nr = round_down(service_fd_rlim_cur, (1024 / sizeof(void *)));
+ if (nr < real_nr) {
+ pr_err("Can't chose service_fd_base: %d %d\n", nr, real_nr);
+ return -1;
+ }
+ }
+
+ return nr;
+}
+
+int clone_service_fd(struct pstree_item *me)
+{
+ int id, new_base, i, ret = -1;
+
+ new_base = choose_service_fd_base(me);
+ id = rsti(me)->service_fd_id;
+
+ if (new_base == -1)
+ return -1;
+ if (service_fd_base == new_base && service_fd_id == id)
+ return 0;
+
+ /* Dup sfds in memmove() style: they may overlap */
+ if (get_service_fd(LOG_FD_OFF) < new_base - LOG_FD_OFF - SERVICE_FD_MAX * id)
+ for (i = SERVICE_FD_MIN + 1; i < SERVICE_FD_MAX; i++)
+ move_service_fd(me, i, id, new_base);
+ else
+ for (i = SERVICE_FD_MAX - 1; i > SERVICE_FD_MIN; i--)
+ move_service_fd(me, i, id, new_base);
+
+ service_fd_base = new_base;
+ service_fd_id = id;
+ ret = 0;
+
+ return ret;
+}
diff --git a/criu/util.c b/criu/util.c
index ad1ec7ca631c..75e3d2a0407f 100644
--- a/criu/util.c
+++ b/criu/util.c
@@ -5,7 +5,6 @@
#include <string.h>
#include <errno.h>
#include <stdbool.h>
-#include <limits.h>
#include <signal.h>
#include <unistd.h>
#include <dirent.h>
@@ -17,8 +16,6 @@
#include <sys/param.h>
#include <sys/types.h>
#include <sys/ptrace.h>
-#include <sys/time.h>
-#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/wait.h>
@@ -28,12 +25,8 @@
#include <netinet/tcp.h>
#include <sched.h>
-#include "bitops.h"
#include "page.h"
-#include "common/compiler.h"
-#include "common/list.h"
#include "util.h"
-#include "rst-malloc.h"
#include "image.h"
#include "vma.h"
#include "mem.h"
@@ -42,7 +35,6 @@
#include "clone-noasan.h"
#include "cr_options.h"
-#include "servicefd.h"
#include "cr-service.h"
#include "files.h"
#include "pstree.h"
@@ -421,237 +413,6 @@ int do_open_proc(pid_t pid, int flags, const char *fmt, ...)
return openat(dirfd, path, flags);
}
-/* Max potentially possible fd to be open by criu process */
-int service_fd_rlim_cur;
-/* Base of current process service fds set */
-static int service_fd_base;
-/* Id of current process in shared fdt */
-static int service_fd_id = 0;
-
-int init_service_fd(void)
-{
- struct rlimit64 rlimit;
-
- /*
- * Service FDs are those that most likely won't
- * conflict with any 'real-life' ones
- */
-
- if (syscall(__NR_prlimit64, getpid(), RLIMIT_NOFILE, NULL, &rlimit)) {
- pr_perror("Can't get rlimit");
- return -1;
- }
-
- service_fd_rlim_cur = (int)rlimit.rlim_cur;
-
- return 0;
-}
-
-static int __get_service_fd(enum sfd_type type, int service_fd_id)
-{
- return service_fd_base - type - SERVICE_FD_MAX * service_fd_id;
-}
-
-int service_fd_min_fd(struct pstree_item *item)
-{
- struct fdt *fdt = rsti(item)->fdt;
- int id = 0;
-
- if (fdt)
- id = fdt->nr - 1;
- return service_fd_rlim_cur - (SERVICE_FD_MAX - 1) - SERVICE_FD_MAX * id;
-}
-
-static DECLARE_BITMAP(sfd_map, SERVICE_FD_MAX);
-static int sfd_arr[SERVICE_FD_MAX];
-/*
- * Variable for marking areas of code, where service fds modifications
- * are prohibited. It's used to safe them from reusing their numbers
- * by ordinary files. See install_service_fd() and close_service_fd().
- */
-bool sfds_protected = false;
-
-static void sfds_protection_bug(enum sfd_type type)
-{
- pr_err("Service fd %u is being modified in protected context\n", type);
- print_stack_trace(current ? vpid(current) : 0);
- BUG();
-}
-
-int install_service_fd(enum sfd_type type, int fd)
-{
- int sfd = __get_service_fd(type, service_fd_id);
-
- BUG_ON((int)type <= SERVICE_FD_MIN || (int)type >= SERVICE_FD_MAX);
- if (sfds_protected && !test_bit(type, sfd_map))
- sfds_protection_bug(type);
-
- if (service_fd_base == 0) {
- if (test_bit(type, sfd_map))
- close(sfd_arr[type]);
- sfd_arr[type] = fd;
- set_bit(type, sfd_map);
- return fd;
- }
-
- if (dup3(fd, sfd, O_CLOEXEC) != sfd) {
- pr_perror("Dup %d -> %d failed", fd, sfd);
- close(fd);
- return -1;
- }
-
- set_bit(type, sfd_map);
- close(fd);
- return sfd;
-}
-
-int get_service_fd(enum sfd_type type)
-{
- BUG_ON((int)type <= SERVICE_FD_MIN || (int)type >= SERVICE_FD_MAX);
-
- if (!test_bit(type, sfd_map))
- return -1;
-
- if (service_fd_base == 0)
- return sfd_arr[type];
-
- return __get_service_fd(type, service_fd_id);
-}
-
-int criu_get_image_dir(void)
-{
- return get_service_fd(IMG_FD_OFF);
-}
-
-int close_service_fd(enum sfd_type type)
-{
- int fd;
-
- if (sfds_protected)
- sfds_protection_bug(type);
-
- fd = get_service_fd(type);
- if (fd < 0)
- return 0;
-
- if (close_safe(&fd))
- return -1;
-
- clear_bit(type, sfd_map);
- return 0;
-}
-
-static void move_service_fd(struct pstree_item *me, int type, int new_id, int new_base)
-{
- int old = get_service_fd(type);
- int new = new_base - type - SERVICE_FD_MAX * new_id;
- int ret;
-
- if (old < 0)
- return;
- ret = dup2(old, new);
- if (ret == -1) {
- if (errno != EBADF)
- pr_perror("Unable to clone %d->%d", old, new);
- } else if (!(rsti(me)->clone_flags & CLONE_FILES))
- close(old);
-}
-
-static int choose_service_fd_base(struct pstree_item *me)
-{
- int nr, real_nr, fdt_nr = 1, id = rsti(me)->service_fd_id;
-
- if (rsti(me)->fdt) {
- /* The base is set by owner of fdt (id 0) */
- if (id != 0)
- return service_fd_base;
- fdt_nr = rsti(me)->fdt->nr;
- }
- /* Now find process's max used fd number */
- if (!list_empty(&rsti(me)->fds))
- nr = list_entry(rsti(me)->fds.prev,
- struct fdinfo_list_entry, ps_list)->fe->fd;
- else
- nr = -1;
-
- nr = max(nr, inh_fd_max);
- /*
- * Service fds go after max fd near right border of alignment:
- *
- * ...|max_fd|max_fd+1|...|sfd first|...|sfd last (aligned)|
- *
- * So, they take maximum numbers of area allocated by kernel.
- * See linux alloc_fdtable() for details.
- */
- nr += (SERVICE_FD_MAX - SERVICE_FD_MIN) * fdt_nr;
- nr += 16; /* Safety pad */
- real_nr = nr;
-
- nr /= (1024 / sizeof(void *));
- if (nr)
- nr = 1 << (32 - __builtin_clz(nr));
- else
- nr = 1;
- nr *= (1024 / sizeof(void *));
-
- if (nr > service_fd_rlim_cur) {
- /* Right border is bigger, than rlim. OK, then just aligned value is enough */
- nr = round_down(service_fd_rlim_cur, (1024 / sizeof(void *)));
- if (nr < real_nr) {
- pr_err("Can't chose service_fd_base: %d %d\n", nr, real_nr);
- return -1;
- }
- }
-
- return nr;
-}
-
-int clone_service_fd(struct pstree_item *me)
-{
- int id, new_base, i, ret = -1;
-
- new_base = choose_service_fd_base(me);
- id = rsti(me)->service_fd_id;
-
- if (new_base == -1)
- return -1;
- if (service_fd_base == new_base && service_fd_id == id)
- return 0;
-
- /* Dup sfds in memmove() style: they may overlap */
- if (get_service_fd(LOG_FD_OFF) < new_base - LOG_FD_OFF - SERVICE_FD_MAX * id)
- for (i = SERVICE_FD_MIN + 1; i < SERVICE_FD_MAX; i++)
- move_service_fd(me, i, id, new_base);
- else
- for (i = SERVICE_FD_MAX - 1; i > SERVICE_FD_MIN; i--)
- move_service_fd(me, i, id, new_base);
-
- service_fd_base = new_base;
- service_fd_id = id;
- ret = 0;
-
- return ret;
-}
-
-bool is_any_service_fd(int fd)
-{
- int sfd_min_fd = __get_service_fd(SERVICE_FD_MAX, service_fd_id);
- int sfd_max_fd = __get_service_fd(SERVICE_FD_MIN, service_fd_id);
-
- if (fd > sfd_min_fd && fd < sfd_max_fd) {
- int type = SERVICE_FD_MAX - (fd - sfd_min_fd);
- if (type > SERVICE_FD_MIN && type < SERVICE_FD_MAX)
- return !!test_bit(type, sfd_map);
- }
-
- return false;
-}
-
-bool is_service_fd(int fd, enum sfd_type type)
-{
- return fd == get_service_fd(type);
-}
-
int copy_file(int fd_in, int fd_out, size_t bytes)
{
ssize_t written = 0;
--
2.20.1
More information about the CRIU
mailing list