[CRIU] [PATCH v3] rework criu check logic
Saied Kazemi
saied at google.com
Wed Mar 16 14:38:28 PDT 2016
The "criu check" command to check if the kernel is properly configured
to run criu is broken.
The "criu check --ms" command used to be the way to tell criu to check
only for features that have been merged upstream. But recent kernels
have a set of features whose presence doesn't necessarily mean that
dump or restore will fail but rather *may* fail depending on whether
the process tree uses those features.
This patch deprecates --ms and introduces --extra, --experimental,
and --all. See "criu --help" or "man criu" for more info.
Typical use cases are:
$ sudo criu check
<zero or more warnings and errors...>
Looks good.
$ echo $?
0
$ sudo criu check --extra
<zero or more warnings and errors...>
Looks good.
$ echo $?
1
$ sudo criu check --extra
<one or more warnings...>
Looks good but some kernel features are missing
which, depending on your process tree, may cause
dump or restore failure.
$ echo $?
1
$ sudo criu check --feature list
mnt_id aio_remap timerfd tun userns fdinfo_lock seccomp_suspend \
seccomp_filters loginuid cgroupns
$ sudo criu check --feature mnt_id
Warn (cr-check.c:283): fdinfo doesn't contain the mnt_id field
$ echo $?
1
$ sudo criu check --feature tun
tun is supported
$ echo $?
0
Signed-off-by: Saied Kazemi <saied at google.com>
---
Documentation/criu.txt | 48 ++++++++--
criu/cr-check.c | 237 ++++++++++++++++++++++++++++++----------------
criu/cr-service.c | 3 -
criu/crtools.c | 40 ++++++--
criu/include/cr_options.h | 3 +-
5 files changed, 232 insertions(+), 99 deletions(-)
diff --git a/Documentation/criu.txt b/Documentation/criu.txt
index e7ba6dd..77eb5be 100644
--- a/Documentation/criu.txt
+++ b/Documentation/criu.txt
@@ -314,15 +314,51 @@ relaxing capability with *--cpu-cap*=*none* parameter.
*check*
~~~~~~~
-Tests whether the kernel support is up to date.
+Checks whether the kernel supports the features that *criu* needs to
+successfully dump and restore a process tree.
-*--ms*::
- Do not check not yet merged features.
+There are three categories of kernel support as described below. *criu
+check* always checks Category 1 features unless *--feature* is specified
+which only checks the specified feature.
+
+- *Category 1*. Absolutely required. These are features like
+ '/proc/<pid>/map_files', 'NETLINK_SOCK_DIAG' socket
+ monitoring, '/proc/sys/kernel/ns_last_pid', etc.
+
+- *Category 2*. Required only for specific cases. These are features
+ like aio remap, '/dev/net/tun', etc. that are
+ required if the process being dumped or restored
+ is using them.
+
+- *Category 3*. Experimental. These are features like task-diag that
+ are used for experimental purposes (mostly
+ during development).
+
+If there are no errors or warnings, *criu* prints "Looks good." and its
+exit code will be 0.
+
+A missing Category 1 feature causes *criu* to print "Does not look good."
+and its exit code will be non-zero.
+
+Missing Category 2 and 3 features cause *criu* to print "Looks good but
+some kernel features are missing which, depending on your process tree,
+may cause dump or restore failure." and its exit code will be non-zero.
+
+Without an argument, *criu check* checks Category 1 features. This
+behavior can change with the following options:
+
+*--extra*::
+ Check kernel support for Category 2 features.
+
+*--experimental*::
+ Check kernel support for Category 3 features.
+
+*--all*::
+ Check kernel support for Category 1, 2, and 3 features.
*--feature* '<name>'::
- Check a particular feature. Instead of checking everything one may specify
- which exactly feature is to be tested. The '<name>' may be: *mnt_id*,
- *aio_remap*, *timerfd*, *tun*, *userns*.
+ Check a specific feature. If '<name>' is 'list', a list of valid
+ kernel feature names that can be checked will be printed.
*page-server*
~~~~~~~~~~~~~
diff --git a/criu/cr-check.c b/criu/cr-check.c
index ea03684..428116a 100644
--- a/criu/cr-check.c
+++ b/criu/cr-check.c
@@ -41,6 +41,8 @@
#include "pstree.h"
#include "cr_options.h"
+static char *feature_name(int (*func)());
+
static int check_tty(void)
{
int master = -1, slave = -1;
@@ -169,7 +171,7 @@ static int check_kcmp(void)
return 0;
}
-static int check_prctl(void)
+static int check_prctl_cat1(void)
{
unsigned long user_auxv = 0;
unsigned int *tid_addr;
@@ -183,23 +185,18 @@ static int check_prctl(void)
}
/*
- * Either new or old interface must be supported in the kernel.
+ * It's OK if the new interface is not supported because it's
+ * a Category 2 feature, but the old interface has to be supported.
*/
ret = prctl(PR_SET_MM, PR_SET_MM_MAP_SIZE, (unsigned long)&size, 0, 0);
if (ret < 0) {
- if (!opts.check_ms_kernel) {
- pr_msg("prctl: PR_SET_MM_MAP is not supported, which "
- "is required for restoring user namespaces: %m\n");
- return -1;
- } else
- pr_warn("Skipping unssuported PR_SET_MM_MAP: %m\n");
-
+ pr_msg("Info prctl: PR_SET_MM_MAP_SIZE is not supported\n");
ret = prctl(PR_SET_MM, PR_SET_MM_BRK, (unsigned long)sbrk(0), 0, 0);
if (ret < 0) {
if (errno == EPERM)
pr_msg("prctl: One needs CAP_SYS_RESOURCE capability to perform testing\n");
else
- pr_msg("prctl: PR_SET_MM is not supported: %m\n");
+ pr_msg("prctl: PR_SET_MM_BRK is not supported: %m\n");
return -1;
}
@@ -219,6 +216,19 @@ static int check_prctl(void)
return 0;
}
+static int check_prctl_cat2(void)
+{
+ unsigned int size = 0;
+ int ret;
+
+ ret = prctl(PR_SET_MM, PR_SET_MM_MAP_SIZE, (unsigned long)&size, 0, 0);
+ if (ret) {
+ pr_warn("prctl: PR_SET_MM_MAP_SIZE is not supported\n");
+ return -1;
+ }
+ return 0;
+}
+
static int check_fcntl(void)
{
u32 v[2];
@@ -766,11 +776,8 @@ static int check_aio_remap(void)
ctx = (aio_context_t)naddr;
r = syscall(SYS_io_getevents, ctx, 0, 1, NULL, NULL);
if (r < 0) {
- if (!opts.check_ms_kernel) {
- pr_err("AIO remap doesn't work properly: %m\n");
- return -1;
- } else
- pr_warn("Skipping unsupported AIO remap: %m\n");
+ pr_err("AIO remap doesn't work properly: %m\n");
+ return -1;
}
return 0;
@@ -782,12 +789,8 @@ static int check_fdinfo_lock(void)
return -1;
if (!kdat.has_fdinfo_lock) {
- if (!opts.check_ms_kernel) {
- pr_err("fdinfo doesn't contain the lock field\n");
- return -1;
- } else {
- pr_warn("fdinfo doesn't contain the lock field\n");
- }
+ pr_err("fdinfo doesn't contain the lock field\n");
+ return -1;
}
return 0;
@@ -823,10 +826,6 @@ static int check_clone_parent_vs_pid()
static int check_cgroupns(void)
{
int ret;
- if (opts.check_ms_kernel) {
- pr_warn("Skipping cgroup namespaces check\n");
- return 0;
- }
ret = access("/proc/self/ns/cgroup", F_OK);
if (ret < 0) {
@@ -839,6 +838,29 @@ static int check_cgroupns(void)
static int (*chk_feature)(void);
+/*
+ * There are three categories of kernel features:
+ *
+ * 1. Absolutely required (/proc/pid/map_files, ptrace PEEKSIGINFO, etc.).
+ * 2. Required only for specific cases (aio remap, tun, etc.).
+ * Checked when --extra or --all is specified.
+ * 3. Experimental (task-diag).
+ * Checked when --experimental or --all is specified.
+ *
+ * We fail if any feature in category 1 is missing but tolerate failures
+ * in the other categories. Currently, there is nothing in category 3.
+ */
+#define CHECK_GOOD "Looks good."
+#define CHECK_BAD "Does not look good."
+#define CHECK_MAYBE "Looks good but some kernel features are missing\n" \
+ "which, depending on your process tree, may cause\n" \
+ "dump or restore failure."
+#define CHECK_CAT1(fn) do { \
+ if ((ret = fn) != 0) { \
+ print_on_level(DEFAULT_LOGLEVEL, "%s\n", CHECK_BAD); \
+ return ret; \
+ } \
+ } while (0)
int cr_check(void)
{
struct ns_id ns = { .type = NS_CRIU, .ns_pid = PROC_SELF, .nd = &mnt_ns_desc };
@@ -863,44 +885,74 @@ int cr_check(void)
return -1;
if (chk_feature) {
- ret = chk_feature();
- goto out;
+ if (chk_feature())
+ return -1;
+ print_on_level(DEFAULT_LOGLEVEL, "%s is supported\n",
+ feature_name(chk_feature));
+ return 0;
}
- ret |= check_map_files();
- ret |= check_sock_diag();
- ret |= check_ns_last_pid();
- ret |= check_sock_peek_off();
- ret |= check_kcmp();
- ret |= check_prctl();
- ret |= check_fcntl();
- ret |= check_proc_stat();
- ret |= check_tcp();
- ret |= check_fdinfo_ext();
- ret |= check_unaligned_vmsplice();
- ret |= check_tty();
- ret |= check_so_gets();
- ret |= check_ipc();
- ret |= check_sigqueuinfo();
- ret |= check_ptrace_peeksiginfo();
- ret |= check_ptrace_suspend_seccomp();
- ret |= check_ptrace_dump_seccomp_filters();
- ret |= check_mem_dirty_track();
- ret |= check_posix_timers();
- ret |= check_tun_cr(0);
- ret |= check_timerfd();
- ret |= check_mnt_id();
- ret |= check_aio_remap();
- ret |= check_fdinfo_lock();
- ret |= check_clone_parent_vs_pid();
- ret |= check_cgroupns();
+ /*
+ * Category 1 - absolutely required.
+ * So that the user can see clearly what's missing, we exit with
+ * non-zero status on the first failure because it gets very
+ * confusing when there are many warnings and error messages.
+ */
+ CHECK_CAT1(check_map_files());
+ CHECK_CAT1(check_sock_diag());
+ CHECK_CAT1(check_ns_last_pid());
+ CHECK_CAT1(check_sock_peek_off());
+ CHECK_CAT1(check_kcmp());
+ CHECK_CAT1(check_prctl_cat1());
+ CHECK_CAT1(check_fcntl());
+ CHECK_CAT1(check_proc_stat());
+ CHECK_CAT1(check_tcp());
+ CHECK_CAT1(check_fdinfo_ext());
+ CHECK_CAT1(check_unaligned_vmsplice());
+ CHECK_CAT1(check_tty());
+ CHECK_CAT1(check_so_gets());
+ CHECK_CAT1(check_ipc());
+ CHECK_CAT1(check_sigqueuinfo());
+ CHECK_CAT1(check_ptrace_peeksiginfo());
+
+ /*
+ * Category 2 - required for specific cases.
+ * Unlike Category 1 features, we don't exit with non-zero status
+ * on a failure because CRIU may still work.
+ */
+ if (opts.check_extra_features) {
+ ret |= check_prctl_cat2();
+ ret |= check_ptrace_suspend_seccomp();
+ ret |= check_ptrace_dump_seccomp_filters();
+ ret |= check_mem_dirty_track();
+ ret |= check_posix_timers();
+ ret |= check_tun_cr(0);
+ ret |= check_timerfd();
+ ret |= check_mnt_id();
+ ret |= check_aio_remap();
+ ret |= check_fdinfo_lock();
+ ret |= check_clone_parent_vs_pid();
+ ret |= check_cgroupns();
+ }
-out:
- if (!ret)
- print_on_level(DEFAULT_LOGLEVEL, "Looks good.\n");
+ /*
+ * Category 3 - experimental.
+ */
+ if (opts.check_experimental_features) {
+ /*
+ * Place holder as currently there are no checks.
+ * Remove this comment when adding checks for
+ * experimental features.
+ */
+ }
+ print_on_level(DEFAULT_LOGLEVEL, "%s\n", ret ? CHECK_MAYBE : CHECK_GOOD);
return ret;
}
+#undef CHECK_GOOD
+#undef CHECK_BAD
+#undef CHECK_MAYBE
+#undef CHECK_CAT1
static int check_tun(void)
{
@@ -926,7 +978,7 @@ static int check_userns(void)
ret = prctl(PR_SET_MM, PR_SET_MM_MAP_SIZE, (unsigned long)&size, 0, 0);
if (ret < 0) {
- pr_perror("No new prctl API");
+ pr_perror("prctl: PR_SET_MM_MAP_SIZE is not supported");
return -1;
}
@@ -946,32 +998,53 @@ static int check_loginuid(void)
return 0;
}
+struct feature_list {
+ char *name;
+ int (*func)();
+};
+
+static struct feature_list feature_list[] = {
+ { "mnt_id", check_mnt_id },
+ { "aio_remap", check_aio_remap },
+ { "timerfd", check_timerfd },
+ { "tun", check_tun },
+ { "userns", check_userns },
+ { "fdinfo_lock", check_fdinfo_lock },
+ { "seccomp_suspend", check_ptrace_suspend_seccomp },
+ { "seccomp_filters", check_ptrace_dump_seccomp_filters },
+ { "loginuid", check_loginuid },
+ { "cgroupns", check_cgroupns },
+ { NULL, NULL },
+};
+
int check_add_feature(char *feat)
{
- if (!strcmp(feat, "mnt_id"))
- chk_feature = check_mnt_id;
- else if (!strcmp(feat, "aio_remap"))
- chk_feature = check_aio_remap;
- else if (!strcmp(feat, "timerfd"))
- chk_feature = check_timerfd;
- else if (!strcmp(feat, "tun"))
- chk_feature = check_tun;
- else if (!strcmp(feat, "userns"))
- chk_feature = check_userns;
- else if (!strcmp(feat, "fdinfo_lock"))
- chk_feature = check_fdinfo_lock;
- else if (!strcmp(feat, "seccomp_suspend"))
- chk_feature = check_ptrace_suspend_seccomp;
- else if (!strcmp(feat, "seccomp_filters"))
- chk_feature = check_ptrace_dump_seccomp_filters;
- else if (!strcmp(feat, "loginuid"))
- chk_feature = check_loginuid;
- else if (!strcmp(feat, "cgroupns"))
- chk_feature = check_cgroupns;
- else {
- pr_err("Unknown feature %s\n", feat);
- return -1;
+ struct feature_list *fl;
+
+ if (!strcmp(feat, "list")) {
+ for (fl = feature_list; fl->name; fl++)
+ pr_msg("%s ", fl->name);
+ pr_msg("\n");
+ return 1;
}
- return 0;
+ for (fl = feature_list; fl->name; fl++) {
+ if (!strcmp(feat, fl->name)) {
+ chk_feature = fl->func;
+ return 0;
+ }
+ }
+ pr_err("Unknown feature %s\n", feat);
+ return -1;
+}
+
+static char *feature_name(int (*func)())
+{
+ struct feature_list *fl;
+
+ for (fl = feature_list; fl->func; fl++) {
+ if (fl->func == func)
+ return fl->name;
+ }
+ return NULL;
}
diff --git a/criu/cr-service.c b/criu/cr-service.c
index 88d4af7..032c763 100644
--- a/criu/cr-service.c
+++ b/criu/cr-service.c
@@ -567,9 +567,6 @@ static int check(int sk)
setproctitle("check --rpc");
- /* Check only minimal kernel support */
- opts.check_ms_kernel = true;
-
if (!cr_check())
resp.success = true;
diff --git a/criu/crtools.c b/criu/crtools.c
index 800fa6d..85c06a3 100644
--- a/criu/crtools.c
+++ b/criu/crtools.c
@@ -319,6 +319,9 @@ int main(int argc, char *argv[], char *envp[])
{ "external", required_argument, 0, 1073 },
{ "empty-ns", required_argument, 0, 1074 },
{ "unshare", required_argument, 0, 1075 },
+ { "extra", no_argument, 0, 1076 },
+ { "experimental", no_argument, 0, 1077 },
+ { "all", no_argument, 0, 1078 },
{ },
};
@@ -499,8 +502,8 @@ int main(int argc, char *argv[], char *envp[])
opts.force_irmap = true;
break;
case 1054:
- opts.check_ms_kernel = true;
- break;
+ pr_err("--ms is deprecated; see \"Check options\" of criu --help\n");
+ return 1;
case 'L':
opts.libdir = optarg;
break;
@@ -534,8 +537,11 @@ int main(int argc, char *argv[], char *envp[])
return 1;
break;
case 1063:
- if (check_add_feature(optarg) < 0)
+ ret = check_add_feature(optarg);
+ if (ret < 0) /* invalid kernel feature name */
return 1;
+ if (ret > 0) /* list kernel features and exit */
+ return 0;
break;
case 1064:
if (!add_skip_mount(optarg))
@@ -602,6 +608,16 @@ int main(int argc, char *argv[], char *envp[])
return 1;
}
break;
+ case 1076:
+ opts.check_extra_features = true;
+ break;
+ case 1077:
+ opts.check_experimental_features = true;
+ break;
+ case 1078:
+ opts.check_extra_features = true;
+ opts.check_experimental_features = true;
+ break;
case 'V':
pr_msg("Version: %s\n", CRIU_VERSION);
if (strcmp(CRIU_GITID, "0"))
@@ -763,7 +779,7 @@ usage:
"Usage:\n"
" criu dump|pre-dump -t PID [<options>]\n"
" criu restore [<options>]\n"
-" criu check [--ms]\n"
+" criu check [--feature FEAT]\n"
" criu exec -p PID <syscall-string>\n"
" criu page-server\n"
" criu service [<options>]\n"
@@ -857,8 +873,19 @@ usage:
" socket[inode]\n"
" file[mnt_id:inode]\n"
" --empty-ns {net}\n"
-" Create a namespace, but don't restore its properies.\n"
-" An user will retore them from action scripts.\n"
+" Create a namespace, but don't restore its properies.\n"
+" An user will retore them from action scripts.\n"
+"Check options:\n"
+" without any arguments, \"criu check\" checks availability of absolutely required\n"
+" kernel features; if any of these features is missing dump and restore will fail\n"
+" --extra also check availability of extra kernel features\n"
+" --experimental also check availability of experimental kernel features\n"
+" --all also check availability of extra and experimental kernel features\n"
+" --feature FEAT only check availability of one of the following kernel features\n"
+" "
+ );
+ check_add_feature("list");
+ pr_msg(
"\n"
"* Logging:\n"
" -o|--log-file FILE log file name\n"
@@ -886,7 +913,6 @@ usage:
"Other options:\n"
" -h|--help show this text\n"
" -V|--version show version\n"
-" --ms don't check not yet merged kernel features\n"
);
return 0;
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
index c1e8fbc..17eb0f0 100644
--- a/criu/include/cr_options.h
+++ b/criu/include/cr_options.h
@@ -56,7 +56,8 @@ struct cr_options {
int final_state;
char *show_dump_file;
char *show_fmt;
- bool check_ms_kernel;
+ bool check_extra_features;
+ bool check_experimental_features;
bool show_pages_content;
union {
bool restore_detach;
--
2.7.0.rc3.207.g0ac5344
More information about the CRIU
mailing list