<div dir="ltr">Pavel and Adrian,<div><br></div><div>Had some time this morning and did a quick rework of criu check hopefully without having broken anything.  Haven&#39;t had time for thorough testing but in the interest of time wanted to send the patch for everyone&#39;s comments.</div><div><br></div><div>With this patch, &quot;criu check --abs&quot; passes on 3.19 but fails on 3.13 due to PR_SET_MM_MAP.</div><div><br></div><div>Look forward to your review and feedback.</div><div><br></div><div>--Saied</div><div><br></div></div><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Feb 29, 2016 at 2:52 PM, Saied Kazemi <span dir="ltr">&lt;<a href="mailto:saied@google.com" target="_blank">saied@google.com</a>&gt;</span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">The &quot;criu check&quot; command to check if the kernel is properly configured<br>
to run criu is broken.<br>
<br>
The &quot;criu check --ms&quot; command used to be the way to tell criu to check<br>
only for features that have been merged upstream.  But recent kernels<br>
have a set of features whose presence doesn&#39;t necessarily mean dump will<br>
fail but rather *may* fail to dump or restore (e.g., soft dirty tracker,<br>
tun support, seccomp).<br>
<br>
This patch deprecates --ms and introduces --abs for absolutely needed<br>
features.<br>
<br>
Typical use cases are:<br>
<br>
        $ sudo criu check --abs<br>
        Looks good.<br>
        $ sudo criu check<br>
        &lt;zero or more errors...&gt;<br>
        Looks good but some kernel features are missing.<br>
        $ sudo criu check --feature mnt_id<br>
        Looks good.<br>
        $ sudo criu check --feature seccomp_suspend<br>
        Error (cr-check.c:604): Kernel doesn&#39;t support PTRACE_O_SUSPEND_SECCOMP<br>
        $ sudo criu check --feature list<br>
        mnt_id aio_remap timerfd tun userns fdinfo_lock seccomp_suspend \<br>
                seccomp_filters loginuid cgroupns<br>
<br>
Signed-off-by: Saied Kazemi &lt;<a href="mailto:saied@google.com">saied@google.com</a>&gt;<br>
---<br>
 criu/cr-check.c           | 178 +++++++++++++++++++++++-----------------------<br>
 criu/cr-service.c         |   4 +-<br>
 criu/crtools.c            |  28 ++++++--<br>
 criu/include/cr_options.h |   2 +-<br>
 4 files changed, 112 insertions(+), 100 deletions(-)<br>
<br>
diff --git a/criu/cr-check.c b/criu/cr-check.c<br>
index a67ee2d..d08e085 100644<br>
--- a/criu/cr-check.c<br>
+++ b/criu/cr-check.c<br>
@@ -171,7 +171,6 @@ static int check_kcmp(void)<br>
<br>
 static int check_prctl(void)<br>
 {<br>
-       unsigned long user_auxv = 0;<br>
        unsigned int *tid_addr;<br>
        unsigned int size = 0;<br>
        int ret;<br>
@@ -183,37 +182,13 @@ static int check_prctl(void)<br>
        }<br>
<br>
        /*<br>
-        * Either new or old interface must be supported in the kernel.<br>
+        * The new interface must be supported in the kernel.<br>
         */<br>
        ret = prctl(PR_SET_MM, PR_SET_MM_MAP_SIZE, (unsigned long)&amp;size, 0, 0);<br>
        if (ret) {<br>
-               if (!opts.check_ms_kernel) {<br>
-                       pr_msg(&quot;prctl: PR_SET_MM_MAP is not supported, which &quot;<br>
-                              &quot;is required for restoring user namespaces\n&quot;);<br>
-                       return -1;<br>
-               } else<br>
-                       pr_warn(&quot;Skipping unssuported PR_SET_MM_MAP\n&quot;);<br>
-<br>
-               ret = prctl(PR_SET_MM, PR_SET_MM_BRK, brk(0), 0, 0);<br>
-               if (ret) {<br>
-                       if (ret == -EPERM)<br>
-                               pr_msg(&quot;prctl: One needs CAP_SYS_RESOURCE capability to perform testing\n&quot;);<br>
-                       else<br>
-                               pr_msg(&quot;prctl: PR_SET_MM is not supported\n&quot;);<br>
-                       return -1;<br>
-               }<br>
-<br>
-               ret = prctl(PR_SET_MM, PR_SET_MM_EXE_FILE, -1, 0, 0);<br>
-               if (ret != -EBADF) {<br>
-                       pr_msg(&quot;prctl: PR_SET_MM_EXE_FILE is not supported (%d)\n&quot;, ret);<br>
-                       return -1;<br>
-               }<br>
-<br>
-               ret = prctl(PR_SET_MM, PR_SET_MM_AUXV, (long)&amp;user_auxv, sizeof(user_auxv), 0);<br>
-               if (ret) {<br>
-                       pr_msg(&quot;prctl: PR_SET_MM_AUXV is not supported\n&quot;);<br>
-                       return -1;<br>
-               }<br>
+               pr_msg(&quot;prctl: PR_SET_MM_MAP is not supported, which &quot;<br>
+                      &quot;is required for restoring user namespaces\n&quot;);<br>
+               return -1;<br>
        }<br>
<br>
        return 0;<br>
@@ -766,11 +741,8 @@ static int check_aio_remap(void)<br>
        ctx = (aio_context_t)naddr;<br>
        r = syscall(SYS_io_getevents, ctx, 0, 1, NULL, NULL);<br>
        if (r &lt; 0) {<br>
-               if (!opts.check_ms_kernel) {<br>
-                       pr_err(&quot;AIO remap doesn&#39;t work properly\n&quot;);<br>
-                       return -1;<br>
-               } else<br>
-                       pr_warn(&quot;Skipping unsupported AIO remap\n&quot;);<br>
+               pr_err(&quot;AIO remap doesn&#39;t work properly\n&quot;);<br>
+               return -1;<br>
        }<br>
<br>
        return 0;<br>
@@ -782,12 +754,8 @@ static int check_fdinfo_lock(void)<br>
                return -1;<br>
<br>
        if (!kdat.has_fdinfo_lock) {<br>
-               if (!opts.check_ms_kernel) {<br>
-                       pr_err(&quot;fdinfo doesn&#39;t contain the lock field\n&quot;);<br>
-                       return -1;<br>
-               } else {<br>
-                       pr_warn(&quot;fdinfo doesn&#39;t contain the lock field\n&quot;);<br>
-               }<br>
+               pr_err(&quot;fdinfo doesn&#39;t contain the lock field\n&quot;);<br>
+               return -1;<br>
        }<br>
<br>
        return 0;<br>
@@ -823,14 +791,10 @@ static int check_clone_parent_vs_pid()<br>
 static int check_cgroupns(void)<br>
 {<br>
        int ret;<br>
-       if (opts.check_ms_kernel) {<br>
-               pr_warn(&quot;Skipping cgroup namespaces check\n&quot;);<br>
-               return 0;<br>
-       }<br>
<br>
        ret = access(&quot;/proc/self/ns/cgroup&quot;, F_OK);<br>
        if (ret &lt; 0) {<br>
-               pr_err(&quot;cgroupns not supported. This is not fatal.&quot;);<br>
+               pr_err(&quot;cgroupns not supported. This is not fatal.\n&quot;);<br>
                return -1;<br>
        }<br>
<br>
@@ -839,9 +803,22 @@ static int check_cgroupns(void)<br>
<br>
 static int (*chk_feature)(void);<br>
<br>
+/*<br>
+ * There are three categories of kernel features:<br>
+ *<br>
+ *     1. Absolutely required (/proc/pid/map_files, ptrace PEEKSIGINFO, etc.)<br>
+ *     2. Required only for specific cases (aio remap, tun, etc.)<br>
+ *     3. Experimental (task-diag)<br>
+ *<br>
+ * We fail if any feature in category 1 is missing but tolerate failures<br>
+ * in the other categories.  Currently, there is nothing in category 3.<br>
+ */<br>
+#define GOOD           &quot;Looks good.&quot;<br>
+#define GOOD_BUT       &quot;Looks good but some kernel features are missing.&quot;<br>
 int cr_check(void)<br>
 {<br>
        struct ns_id ns = { .type = NS_CRIU, .ns_pid = PROC_SELF, .nd = &amp;mnt_ns_desc };<br>
+       int absret = 0;<br>
        int ret = 0;<br>
<br>
        if (!is_root_user())<br>
@@ -863,26 +840,39 @@ int cr_check(void)<br>
                return -1;<br>
<br>
        if (chk_feature) {<br>
-               ret = chk_feature();<br>
+               if (chk_feature())<br>
+                       return -1;<br>
                goto out;<br>
        }<br>
<br>
-       ret |= check_map_files();<br>
-       ret |= check_sock_diag();<br>
-       ret |= check_ns_last_pid();<br>
-       ret |= check_sock_peek_off();<br>
-       ret |= check_kcmp();<br>
-       ret |= check_prctl();<br>
-       ret |= check_fcntl();<br>
-       ret |= check_proc_stat();<br>
-       ret |= check_tcp();<br>
-       ret |= check_fdinfo_ext();<br>
-       ret |= check_unaligned_vmsplice();<br>
-       ret |= check_tty();<br>
-       ret |= check_so_gets();<br>
-       ret |= check_ipc();<br>
-       ret |= check_sigqueuinfo();<br>
-       ret |= check_ptrace_peeksiginfo();<br>
+       /*<br>
+        * Category 1 - absolutely required.<br>
+        */<br>
+       absret |= check_map_files();<br>
+       absret |= check_sock_diag();<br>
+       absret |= check_ns_last_pid();<br>
+       absret |= check_sock_peek_off();<br>
+       absret |= check_kcmp();<br>
+       absret |= check_prctl();<br>
+       absret |= check_fcntl();<br>
+       absret |= check_proc_stat();<br>
+       absret |= check_tcp();<br>
+       absret |= check_fdinfo_ext();<br>
+       absret |= check_unaligned_vmsplice();<br>
+       absret |= check_tty();<br>
+       absret |= check_so_gets();<br>
+       absret |= check_ipc();<br>
+       absret |= check_sigqueuinfo();<br>
+       absret |= check_ptrace_peeksiginfo();<br>
+       if (absret || opts.check_abs_features) {<br>
+               if (!absret)<br>
+                       print_on_level(DEFAULT_LOGLEVEL, &quot;%s\n&quot;, GOOD);<br>
+               return absret;<br>
+       }<br>
+<br>
+       /*<br>
+        * Category 2 - required for specific cases.<br>
+        */<br>
        ret |= check_ptrace_suspend_seccomp();<br>
        ret |= check_ptrace_dump_seccomp_filters();<br>
        ret |= check_mem_dirty_track();<br>
@@ -896,10 +886,8 @@ int cr_check(void)<br>
        ret |= check_cgroupns();<br>
<br>
 out:<br>
-       if (!ret)<br>
-               print_on_level(DEFAULT_LOGLEVEL, &quot;Looks good.\n&quot;);<br>
-<br>
-       return ret;<br>
+       print_on_level(DEFAULT_LOGLEVEL, &quot;%s\n&quot;, ret ? GOOD_BUT : GOOD);<br>
+       return 0;<br>
 }<br>
<br>
 static int check_tun(void)<br>
@@ -947,32 +935,42 @@ static int check_loginuid(void)<br>
        return 0;<br>
 }<br>
<br>
+struct feature_list {<br>
+       char *name;<br>
+       int (*func)();<br>
+};<br>
+<br>
+static struct feature_list feature_list[] = {<br>
+       { &quot;mnt_id&quot;, check_mnt_id },<br>
+       { &quot;aio_remap&quot;, check_aio_remap },<br>
+       { &quot;timerfd&quot;, check_timerfd },<br>
+       { &quot;tun&quot;, check_tun },<br>
+       { &quot;userns&quot;, check_userns },<br>
+       { &quot;fdinfo_lock&quot;, check_fdinfo_lock },<br>
+       { &quot;seccomp_suspend&quot;, check_ptrace_suspend_seccomp },<br>
+       { &quot;seccomp_filters&quot;, check_ptrace_dump_seccomp_filters },<br>
+       { &quot;loginuid&quot;, check_loginuid },<br>
+       { &quot;cgroupns&quot;, check_cgroupns },<br>
+       { NULL, NULL },<br>
+};<br>
+<br>
 int check_add_feature(char *feat)<br>
 {<br>
-       if (!strcmp(feat, &quot;mnt_id&quot;))<br>
-               chk_feature = check_mnt_id;<br>
-       else if (!strcmp(feat, &quot;aio_remap&quot;))<br>
-               chk_feature = check_aio_remap;<br>
-       else if (!strcmp(feat, &quot;timerfd&quot;))<br>
-               chk_feature = check_timerfd;<br>
-       else if (!strcmp(feat, &quot;tun&quot;))<br>
-               chk_feature = check_tun;<br>
-       else if (!strcmp(feat, &quot;userns&quot;))<br>
-               chk_feature = check_userns;<br>
-       else if (!strcmp(feat, &quot;fdinfo_lock&quot;))<br>
-               chk_feature = check_fdinfo_lock;<br>
-       else if (!strcmp(feat, &quot;seccomp_suspend&quot;))<br>
-               chk_feature = check_ptrace_suspend_seccomp;<br>
-       else if (!strcmp(feat, &quot;seccomp_filters&quot;))<br>
-               chk_feature = check_ptrace_dump_seccomp_filters;<br>
-       else if (!strcmp(feat, &quot;loginuid&quot;))<br>
-               chk_feature = check_loginuid;<br>
-       else if (!strcmp(feat, &quot;cgroupns&quot;))<br>
-               chk_feature = check_cgroupns;<br>
-       else {<br>
-               pr_err(&quot;Unknown feature %s\n&quot;, feat);<br>
-               return -1;<br>
+       struct feature_list *fl;<br>
+<br>
+       if (!strcmp(feat, &quot;list&quot;)) {<br>
+               for (fl = feature_list; fl-&gt;name; fl++)<br>
+                       pr_msg(&quot;%s &quot;, fl-&gt;name);<br>
+               pr_msg(&quot;\n&quot;);<br>
+               return 1;<br>
        }<br>
<br>
-       return 0;<br>
+       for (fl = feature_list; fl-&gt;name; fl++) {<br>
+               if (!strcmp(feat, fl-&gt;name)) {<br>
+                       chk_feature = fl-&gt;func;<br>
+                       return 0;<br>
+               }<br>
+       }<br>
+       pr_err(&quot;Unknown feature %s\n&quot;, feat);<br>
+       return -1;<br>
 }<br>
diff --git a/criu/cr-service.c b/criu/cr-service.c<br>
index 88d4af7..a1d843d 100644<br>
--- a/criu/cr-service.c<br>
+++ b/criu/cr-service.c<br>
@@ -567,8 +567,8 @@ static int check(int sk)<br>
<br>
        setproctitle(&quot;check --rpc&quot;);<br>
<br>
-       /* Check only minimal kernel support */<br>
-       opts.check_ms_kernel = true;<br>
+       /* Check only abolutely needed kernel features */<br>
+       opts.check_abs_features = true;<br>
<br>
        if (!cr_check())<br>
                resp.success = true;<br>
diff --git a/criu/crtools.c b/criu/crtools.c<br>
index a8ddb82..2120082 100644<br>
--- a/criu/crtools.c<br>
+++ b/criu/crtools.c<br>
@@ -275,6 +275,7 @@ int main(int argc, char *argv[], char *envp[])<br>
                { &quot;timeout&quot;,                    required_argument,      0, 1072 },<br>
                { &quot;external&quot;,                   required_argument,      0, 1073 },<br>
                { &quot;empty-ns&quot;,                   required_argument,      0, 1074 },<br>
+               { &quot;abs&quot;,                        no_argument,            0, 1075 },<br>
                { },<br>
        };<br>
<br>
@@ -455,8 +456,8 @@ int main(int argc, char *argv[], char *envp[])<br>
                        opts.force_irmap = true;<br>
                        break;<br>
                case 1054:<br>
-                       opts.check_ms_kernel = true;<br>
-                       break;<br>
+                       pr_err(&quot;--ms is deprecated, use --abs instead\n&quot;);<br>
+                       return 1;<br>
                case &#39;L&#39;:<br>
                        opts.libdir = optarg;<br>
                        break;<br>
@@ -490,8 +491,11 @@ int main(int argc, char *argv[], char *envp[])<br>
                                return 1;<br>
                        break;<br>
                case 1063:<br>
-                       if (check_add_feature(optarg) &lt; 0)<br>
+                       ret = check_add_feature(optarg);<br>
+                       if (ret &lt; 0)    /* invalid kernel feature name */<br>
                                return 1;<br>
+                       if (ret &gt; 0)    /* list kernel features and exit */<br>
+                               return 0;<br>
                        break;<br>
                case 1064:<br>
                        if (!add_skip_mount(optarg))<br>
@@ -554,6 +558,9 @@ int main(int argc, char *argv[], char *envp[])<br>
                                return 1;<br>
                        }<br>
                        break;<br>
+               case 1075:<br>
+                       opts.check_abs_features = true;<br>
+                       break;<br>
                case &#39;V&#39;:<br>
                        pr_msg(&quot;Version: %s\n&quot;, CRIU_VERSION);<br>
                        if (strcmp(CRIU_GITID, &quot;0&quot;))<br>
@@ -714,7 +721,7 @@ usage:<br>
 &quot;Usage:\n&quot;<br>
 &quot;  criu dump|pre-dump -t PID [&lt;options&gt;]\n&quot;<br>
 &quot;  criu restore [&lt;options&gt;]\n&quot;<br>
-&quot;  criu check [--ms]\n&quot;<br>
+&quot;  criu check [--feature FEAT]\n&quot;<br>
 &quot;  criu exec -p PID &lt;syscall-string&gt;\n&quot;<br>
 &quot;  criu page-server\n&quot;<br>
 &quot;  criu service [&lt;options&gt;]\n&quot;<br>
@@ -807,8 +814,16 @@ usage:<br>
 &quot;                            socket[inode]\n&quot;<br>
 &quot;                            file[mnt_id:inode]\n&quot;<br>
 &quot;  --empty-ns {net}\n&quot;<br>
-&quot;                      Create a namespace, but don&#39;t restore its properies.\n&quot;<br>
-&quot;                      An user will retore them from action scripts.\n&quot;<br>
+&quot;                        Create a namespace, but don&#39;t restore its properies.\n&quot;<br>
+&quot;                        An user will retore them from action scripts.\n&quot;<br>
+&quot;Check options:\n&quot;<br>
+&quot;  --abs                 check availability of absolutely needed kernel features\n&quot;<br>
+&quot;  --ms                  deprecated, use --abs (don&#39;t check not yet merged kernel features)\n&quot;<br>
+&quot;  --feature FEAT        check availability of one of the following kernel features\n&quot;<br>
+&quot;                        &quot;<br>
+       );<br>
+       check_add_feature(&quot;list&quot;);<br>
+       pr_msg(<br>
 &quot;\n&quot;<br>
 &quot;* Logging:\n&quot;<br>
 &quot;  -o|--log-file FILE    log file name\n&quot;<br>
@@ -836,7 +851,6 @@ usage:<br>
 &quot;Other options:\n&quot;<br>
 &quot;  -h|--help             show this text\n&quot;<br>
 &quot;  -V|--version          show version\n&quot;<br>
-&quot;     --ms               don&#39;t check not yet merged kernel features\n&quot;<br>
        );<br>
<br>
        return 0;<br>
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h<br>
index a6f0b3e..ba2633a 100644<br>
--- a/criu/include/cr_options.h<br>
+++ b/criu/include/cr_options.h<br>
@@ -56,7 +56,7 @@ struct cr_options {<br>
        int                     final_state;<br>
        char                    *show_dump_file;<br>
        char                    *show_fmt;<br>
-       bool                    check_ms_kernel;<br>
+       bool                    check_abs_features;<br>
        bool                    show_pages_content;<br>
        union {<br>
                bool            restore_detach;<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.7.0.rc3.207.g0ac5344<br>
<br>
</font></span></blockquote></div><br></div>