[CRIU] [PATCH] criu: raise the task file limit for specific commands
Andrei Vagin
avagin at gmail.com
Tue Apr 16 10:03:18 MSK 2019
We don't need to do this from early_init.
Cc: Cyrill Gorcunov <gorcunov at gmail.com>
Signed-off-by: Andrei Vagin <avagin at gmail.com>
---
criu/cr-dump.c | 12 ++++++++
criu/cr-restore.c | 3 ++
criu/crtools.c | 79 -----------------------------------------------------
criu/include/util.h | 2 ++
criu/servicefd.c | 14 ++++++++++
criu/util.c | 20 ++++++++++++++
6 files changed, 51 insertions(+), 79 deletions(-)
diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index d4ed40b91..91b8b383a 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -1557,6 +1557,11 @@ int cr_pre_dump_tasks(pid_t pid)
struct pstree_item *item;
int ret = -1;
+ /*
+ * We might need a lot of pipes to fetch huge number of pages to dump.
+ */
+ rlimit_unlimit_nofile();
+
root_item = alloc_pstree_item();
if (!root_item)
goto err;
@@ -1754,6 +1759,13 @@ int cr_dump_tasks(pid_t pid)
pr_info("Dumping processes (pid: %d)\n", pid);
pr_info("========================================\n");
+ /*
+ * We will fetch all file descriptors for each task, their number can
+ * be bigger than a default file limit, so we need to raise it to the
+ * maximum.
+ */
+ rlimit_unlimit_nofile();
+
root_item = alloc_pstree_item();
if (!root_item)
goto err;
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 6a3d62450..a7b121b8c 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -2330,6 +2330,9 @@ int cr_restore_tasks(void)
{
int ret = -1;
+ if (init_service_fd())
+ return 1;
+
if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE))
return -1;
diff --git a/criu/crtools.c b/criu/crtools.c
index 6a2955287..9c8064462 100644
--- a/criu/crtools.c
+++ b/criu/crtools.c
@@ -20,9 +20,6 @@
#include <sys/utsname.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
#include "int.h"
#include "page.h"
#include "common/compiler.h"
@@ -50,79 +47,6 @@
#include "setproctitle.h"
#include "sysctl.h"
-static void rlimit_unlimit_nofile(void)
-{
- struct rlimit new;
-
- new.rlim_cur = kdat.sysctl_nr_open;
- new.rlim_max = kdat.sysctl_nr_open;
-
- if (prlimit(getpid(), RLIMIT_NOFILE, &new, NULL)) {
- pr_perror("rlimit: Can't setup RLIMIT_NOFILE for self");
- return;
- } else
- pr_debug("rlimit: RLIMIT_NOFILE unlimited for self\n");
-
- service_fd_rlim_cur = kdat.sysctl_nr_open;
-}
-
-static int early_init(const char *cmd)
-{
- static const char *nofile_cmds[] = {
- "swrk", "service",
- "dump", "pre-dump",
- "restore",
- };
- size_t i;
-
- /*
- * Service fd engine implies that file descriptors
- * used won't be borrowed by the rest of the code
- * and default 1024 limit is not enough for high
- * loaded test/containers. Thus use kdat engine
- * to fetch current system level limit for numbers
- * of files allowed to open up and lift up own
- * limits.
- *
- * Note we have to do it before the service fd
- * get inited and we dont exit with errors here
- * because in worst scenario where clash of fd
- * happen we simply exit with explicit error
- * during real action stage.
- *
- * Same time raising limits cause kernel fdtable
- * to bloat so we do this only on the @nofile_cmds:
- *
- * - on dump criu needs additional files for sfd,
- * thus if container already has many files opened
- * we need to have at least not less space when
- * fetching fds from a target process;
- *
- * - on pre-dump we might need a lot of pipes to
- * fetch huge number of pages to dump;
- *
- * - on restore we still need to raise limits since
- * there is no guarantee that on dump we've not
- * been hitting fd limit already;
- *
- * - swrk and service obtain requests on the fly,
- * thus we don't know if on of above will be
- * there thus raise limits.
- */
- for (i = 0; i < ARRAY_SIZE(nofile_cmds); i++) {
- if (strcmp(nofile_cmds[i], cmd))
- continue;
- if (!kerndat_files_stat(true))
- rlimit_unlimit_nofile();
- break;
- }
-
- if (init_service_fd())
- return 1;
-
- return 0;
-}
-
int main(int argc, char *argv[], char *envp[])
{
int ret = -1;
@@ -158,9 +82,6 @@ int main(int argc, char *argv[], char *envp[])
log_set_loglevel(opts.log_level);
- if (early_init(argv[optind]))
- return -1;
-
if (!strcmp(argv[1], "swrk")) {
if (argc < 3)
goto usage;
diff --git a/criu/include/util.h b/criu/include/util.h
index 4e66dac60..621abb4a0 100644
--- a/criu/include/util.h
+++ b/criu/include/util.h
@@ -350,6 +350,8 @@ extern int epoll_del_rfd(int epfd, struct epoll_rfd *rfd);
extern int epoll_run_rfds(int epfd, struct epoll_event *evs, int nr_fds, int tmo);
extern int epoll_prepare(int nr_events, struct epoll_event **evs);
+extern void rlimit_unlimit_nofile(void);
+
extern int call_in_child_process(int (*fn)(void *), void *arg);
#ifdef __GLIBC__
extern void print_stack_trace(pid_t pid);
diff --git a/criu/servicefd.c b/criu/servicefd.c
index d7e81836b..82147921c 100644
--- a/criu/servicefd.c
+++ b/criu/servicefd.c
@@ -67,6 +67,20 @@ int init_service_fd(void)
{
struct rlimit64 rlimit;
+ /*
+ * Service fd engine implies that file descriptors used won't be
+ * borrowed by the rest of the code and default 1024 limit is not
+ * enough for high loaded test/containers. Thus use kdat engine to
+ * fetch current system level limit for numbers of files allowed to
+ * open up and lift up own limits.
+ *
+ * Note we have to do it before the service fd get initialized and we
+ * don't exit with errors here because in worst scenario where clash of
+ * fd happen we simply exit with explicit error during real action
+ * stage.
+ */
+ rlimit_unlimit_nofile();
+
/*
* Service FDs are those that most likely won't
* conflict with any 'real-life' ones
diff --git a/criu/util.c b/criu/util.c
index fbf0db3d3..9ba33ceb3 100644
--- a/criu/util.c
+++ b/criu/util.c
@@ -20,12 +20,15 @@
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/resource.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <sched.h>
#include <ctype.h>
+#include "kerndat.h"
#include "page.h"
#include "util.h"
#include "image.h"
@@ -1348,6 +1351,23 @@ out:
return ret;
}
+void rlimit_unlimit_nofile(void)
+{
+ struct rlimit new;
+
+ new.rlim_cur = kdat.sysctl_nr_open;
+ new.rlim_max = kdat.sysctl_nr_open;
+
+ if (prlimit(getpid(), RLIMIT_NOFILE, &new, NULL)) {
+ pr_perror("rlimit: Can't setup RLIMIT_NOFILE for self");
+ return;
+ } else
+ pr_debug("rlimit: RLIMIT_NOFILE unlimited for self\n");
+
+ service_fd_rlim_cur = kdat.sysctl_nr_open;
+}
+
+
#ifdef __GLIBC__
#include <execinfo.h>
void print_stack_trace(pid_t pid)
--
2.14.5
More information about the CRIU
mailing list