[CRIU] [PATCH] rework criu check logic

Saied Kazemi saied at google.com
Mon Feb 29 14:58:19 PST 2016


Pavel and Adrian,

Had some time this morning and did a quick rework of criu check hopefully
without having broken anything.  Haven't had time for thorough testing but
in the interest of time wanted to send the patch for everyone's comments.

With this patch, "criu check --abs" passes on 3.19 but fails on 3.13 due
to PR_SET_MM_MAP.

Look forward to your review and feedback.

--Saied


On Mon, Feb 29, 2016 at 2:52 PM, Saied Kazemi <saied at google.com> wrote:

> The "criu check" command to check if the kernel is properly configured
> to run criu is broken.
>
> The "criu check --ms" command used to be the way to tell criu to check
> only for features that have been merged upstream.  But recent kernels
> have a set of features whose presence doesn't necessarily mean dump will
> fail but rather *may* fail to dump or restore (e.g., soft dirty tracker,
> tun support, seccomp).
>
> This patch deprecates --ms and introduces --abs for absolutely needed
> features.
>
> Typical use cases are:
>
>         $ sudo criu check --abs
>         Looks good.
>         $ sudo criu check
>         <zero or more errors...>
>         Looks good but some kernel features are missing.
>         $ sudo criu check --feature mnt_id
>         Looks good.
>         $ sudo criu check --feature seccomp_suspend
>         Error (cr-check.c:604): Kernel doesn't support
> PTRACE_O_SUSPEND_SECCOMP
>         $ sudo criu check --feature list
>         mnt_id aio_remap timerfd tun userns fdinfo_lock seccomp_suspend \
>                 seccomp_filters loginuid cgroupns
>
> Signed-off-by: Saied Kazemi <saied at google.com>
> ---
>  criu/cr-check.c           | 178
> +++++++++++++++++++++++-----------------------
>  criu/cr-service.c         |   4 +-
>  criu/crtools.c            |  28 ++++++--
>  criu/include/cr_options.h |   2 +-
>  4 files changed, 112 insertions(+), 100 deletions(-)
>
> diff --git a/criu/cr-check.c b/criu/cr-check.c
> index a67ee2d..d08e085 100644
> --- a/criu/cr-check.c
> +++ b/criu/cr-check.c
> @@ -171,7 +171,6 @@ static int check_kcmp(void)
>
>  static int check_prctl(void)
>  {
> -       unsigned long user_auxv = 0;
>         unsigned int *tid_addr;
>         unsigned int size = 0;
>         int ret;
> @@ -183,37 +182,13 @@ static int check_prctl(void)
>         }
>
>         /*
> -        * Either new or old interface must be supported in the kernel.
> +        * The new interface must be supported in the kernel.
>          */
>         ret = prctl(PR_SET_MM, PR_SET_MM_MAP_SIZE, (unsigned long)&size,
> 0, 0);
>         if (ret) {
> -               if (!opts.check_ms_kernel) {
> -                       pr_msg("prctl: PR_SET_MM_MAP is not supported,
> which "
> -                              "is required for restoring user
> namespaces\n");
> -                       return -1;
> -               } else
> -                       pr_warn("Skipping unssuported PR_SET_MM_MAP\n");
> -
> -               ret = prctl(PR_SET_MM, PR_SET_MM_BRK, brk(0), 0, 0);
> -               if (ret) {
> -                       if (ret == -EPERM)
> -                               pr_msg("prctl: One needs CAP_SYS_RESOURCE
> capability to perform testing\n");
> -                       else
> -                               pr_msg("prctl: PR_SET_MM is not
> supported\n");
> -                       return -1;
> -               }
> -
> -               ret = prctl(PR_SET_MM, PR_SET_MM_EXE_FILE, -1, 0, 0);
> -               if (ret != -EBADF) {
> -                       pr_msg("prctl: PR_SET_MM_EXE_FILE is not supported
> (%d)\n", ret);
> -                       return -1;
> -               }
> -
> -               ret = prctl(PR_SET_MM, PR_SET_MM_AUXV, (long)&user_auxv,
> sizeof(user_auxv), 0);
> -               if (ret) {
> -                       pr_msg("prctl: PR_SET_MM_AUXV is not supported\n");
> -                       return -1;
> -               }
> +               pr_msg("prctl: PR_SET_MM_MAP is not supported, which "
> +                      "is required for restoring user namespaces\n");
> +               return -1;
>         }
>
>         return 0;
> @@ -766,11 +741,8 @@ static int check_aio_remap(void)
>         ctx = (aio_context_t)naddr;
>         r = syscall(SYS_io_getevents, ctx, 0, 1, NULL, NULL);
>         if (r < 0) {
> -               if (!opts.check_ms_kernel) {
> -                       pr_err("AIO remap doesn't work properly\n");
> -                       return -1;
> -               } else
> -                       pr_warn("Skipping unsupported AIO remap\n");
> +               pr_err("AIO remap doesn't work properly\n");
> +               return -1;
>         }
>
>         return 0;
> @@ -782,12 +754,8 @@ static int check_fdinfo_lock(void)
>                 return -1;
>
>         if (!kdat.has_fdinfo_lock) {
> -               if (!opts.check_ms_kernel) {
> -                       pr_err("fdinfo doesn't contain the lock field\n");
> -                       return -1;
> -               } else {
> -                       pr_warn("fdinfo doesn't contain the lock field\n");
> -               }
> +               pr_err("fdinfo doesn't contain the lock field\n");
> +               return -1;
>         }
>
>         return 0;
> @@ -823,14 +791,10 @@ static int check_clone_parent_vs_pid()
>  static int check_cgroupns(void)
>  {
>         int ret;
> -       if (opts.check_ms_kernel) {
> -               pr_warn("Skipping cgroup namespaces check\n");
> -               return 0;
> -       }
>
>         ret = access("/proc/self/ns/cgroup", F_OK);
>         if (ret < 0) {
> -               pr_err("cgroupns not supported. This is not fatal.");
> +               pr_err("cgroupns not supported. This is not fatal.\n");
>                 return -1;
>         }
>
> @@ -839,9 +803,22 @@ static int check_cgroupns(void)
>
>  static int (*chk_feature)(void);
>
> +/*
> + * There are three categories of kernel features:
> + *
> + *     1. Absolutely required (/proc/pid/map_files, ptrace PEEKSIGINFO,
> etc.)
> + *     2. Required only for specific cases (aio remap, tun, etc.)
> + *     3. Experimental (task-diag)
> + *
> + * We fail if any feature in category 1 is missing but tolerate failures
> + * in the other categories.  Currently, there is nothing in category 3.
> + */
> +#define GOOD           "Looks good."
> +#define GOOD_BUT       "Looks good but some kernel features are missing."
>  int cr_check(void)
>  {
>         struct ns_id ns = { .type = NS_CRIU, .ns_pid = PROC_SELF, .nd =
> &mnt_ns_desc };
> +       int absret = 0;
>         int ret = 0;
>
>         if (!is_root_user())
> @@ -863,26 +840,39 @@ int cr_check(void)
>                 return -1;
>
>         if (chk_feature) {
> -               ret = chk_feature();
> +               if (chk_feature())
> +                       return -1;
>                 goto out;
>         }
>
> -       ret |= check_map_files();
> -       ret |= check_sock_diag();
> -       ret |= check_ns_last_pid();
> -       ret |= check_sock_peek_off();
> -       ret |= check_kcmp();
> -       ret |= check_prctl();
> -       ret |= check_fcntl();
> -       ret |= check_proc_stat();
> -       ret |= check_tcp();
> -       ret |= check_fdinfo_ext();
> -       ret |= check_unaligned_vmsplice();
> -       ret |= check_tty();
> -       ret |= check_so_gets();
> -       ret |= check_ipc();
> -       ret |= check_sigqueuinfo();
> -       ret |= check_ptrace_peeksiginfo();
> +       /*
> +        * Category 1 - absolutely required.
> +        */
> +       absret |= check_map_files();
> +       absret |= check_sock_diag();
> +       absret |= check_ns_last_pid();
> +       absret |= check_sock_peek_off();
> +       absret |= check_kcmp();
> +       absret |= check_prctl();
> +       absret |= check_fcntl();
> +       absret |= check_proc_stat();
> +       absret |= check_tcp();
> +       absret |= check_fdinfo_ext();
> +       absret |= check_unaligned_vmsplice();
> +       absret |= check_tty();
> +       absret |= check_so_gets();
> +       absret |= check_ipc();
> +       absret |= check_sigqueuinfo();
> +       absret |= check_ptrace_peeksiginfo();
> +       if (absret || opts.check_abs_features) {
> +               if (!absret)
> +                       print_on_level(DEFAULT_LOGLEVEL, "%s\n", GOOD);
> +               return absret;
> +       }
> +
> +       /*
> +        * Category 2 - required for specific cases.
> +        */
>         ret |= check_ptrace_suspend_seccomp();
>         ret |= check_ptrace_dump_seccomp_filters();
>         ret |= check_mem_dirty_track();
> @@ -896,10 +886,8 @@ int cr_check(void)
>         ret |= check_cgroupns();
>
>  out:
> -       if (!ret)
> -               print_on_level(DEFAULT_LOGLEVEL, "Looks good.\n");
> -
> -       return ret;
> +       print_on_level(DEFAULT_LOGLEVEL, "%s\n", ret ? GOOD_BUT : GOOD);
> +       return 0;
>  }
>
>  static int check_tun(void)
> @@ -947,32 +935,42 @@ static int check_loginuid(void)
>         return 0;
>  }
>
> +struct feature_list {
> +       char *name;
> +       int (*func)();
> +};
> +
> +static struct feature_list feature_list[] = {
> +       { "mnt_id", check_mnt_id },
> +       { "aio_remap", check_aio_remap },
> +       { "timerfd", check_timerfd },
> +       { "tun", check_tun },
> +       { "userns", check_userns },
> +       { "fdinfo_lock", check_fdinfo_lock },
> +       { "seccomp_suspend", check_ptrace_suspend_seccomp },
> +       { "seccomp_filters", check_ptrace_dump_seccomp_filters },
> +       { "loginuid", check_loginuid },
> +       { "cgroupns", check_cgroupns },
> +       { NULL, NULL },
> +};
> +
>  int check_add_feature(char *feat)
>  {
> -       if (!strcmp(feat, "mnt_id"))
> -               chk_feature = check_mnt_id;
> -       else if (!strcmp(feat, "aio_remap"))
> -               chk_feature = check_aio_remap;
> -       else if (!strcmp(feat, "timerfd"))
> -               chk_feature = check_timerfd;
> -       else if (!strcmp(feat, "tun"))
> -               chk_feature = check_tun;
> -       else if (!strcmp(feat, "userns"))
> -               chk_feature = check_userns;
> -       else if (!strcmp(feat, "fdinfo_lock"))
> -               chk_feature = check_fdinfo_lock;
> -       else if (!strcmp(feat, "seccomp_suspend"))
> -               chk_feature = check_ptrace_suspend_seccomp;
> -       else if (!strcmp(feat, "seccomp_filters"))
> -               chk_feature = check_ptrace_dump_seccomp_filters;
> -       else if (!strcmp(feat, "loginuid"))
> -               chk_feature = check_loginuid;
> -       else if (!strcmp(feat, "cgroupns"))
> -               chk_feature = check_cgroupns;
> -       else {
> -               pr_err("Unknown feature %s\n", feat);
> -               return -1;
> +       struct feature_list *fl;
> +
> +       if (!strcmp(feat, "list")) {
> +               for (fl = feature_list; fl->name; fl++)
> +                       pr_msg("%s ", fl->name);
> +               pr_msg("\n");
> +               return 1;
>         }
>
> -       return 0;
> +       for (fl = feature_list; fl->name; fl++) {
> +               if (!strcmp(feat, fl->name)) {
> +                       chk_feature = fl->func;
> +                       return 0;
> +               }
> +       }
> +       pr_err("Unknown feature %s\n", feat);
> +       return -1;
>  }
> diff --git a/criu/cr-service.c b/criu/cr-service.c
> index 88d4af7..a1d843d 100644
> --- a/criu/cr-service.c
> +++ b/criu/cr-service.c
> @@ -567,8 +567,8 @@ static int check(int sk)
>
>         setproctitle("check --rpc");
>
> -       /* Check only minimal kernel support */
> -       opts.check_ms_kernel = true;
> +       /* Check only abolutely needed kernel features */
> +       opts.check_abs_features = true;
>
>         if (!cr_check())
>                 resp.success = true;
> diff --git a/criu/crtools.c b/criu/crtools.c
> index a8ddb82..2120082 100644
> --- a/criu/crtools.c
> +++ b/criu/crtools.c
> @@ -275,6 +275,7 @@ int main(int argc, char *argv[], char *envp[])
>                 { "timeout",                    required_argument,      0,
> 1072 },
>                 { "external",                   required_argument,      0,
> 1073 },
>                 { "empty-ns",                   required_argument,      0,
> 1074 },
> +               { "abs",                        no_argument,            0,
> 1075 },
>                 { },
>         };
>
> @@ -455,8 +456,8 @@ int main(int argc, char *argv[], char *envp[])
>                         opts.force_irmap = true;
>                         break;
>                 case 1054:
> -                       opts.check_ms_kernel = true;
> -                       break;
> +                       pr_err("--ms is deprecated, use --abs instead\n");
> +                       return 1;
>                 case 'L':
>                         opts.libdir = optarg;
>                         break;
> @@ -490,8 +491,11 @@ int main(int argc, char *argv[], char *envp[])
>                                 return 1;
>                         break;
>                 case 1063:
> -                       if (check_add_feature(optarg) < 0)
> +                       ret = check_add_feature(optarg);
> +                       if (ret < 0)    /* invalid kernel feature name */
>                                 return 1;
> +                       if (ret > 0)    /* list kernel features and exit */
> +                               return 0;
>                         break;
>                 case 1064:
>                         if (!add_skip_mount(optarg))
> @@ -554,6 +558,9 @@ int main(int argc, char *argv[], char *envp[])
>                                 return 1;
>                         }
>                         break;
> +               case 1075:
> +                       opts.check_abs_features = true;
> +                       break;
>                 case 'V':
>                         pr_msg("Version: %s\n", CRIU_VERSION);
>                         if (strcmp(CRIU_GITID, "0"))
> @@ -714,7 +721,7 @@ usage:
>  "Usage:\n"
>  "  criu dump|pre-dump -t PID [<options>]\n"
>  "  criu restore [<options>]\n"
> -"  criu check [--ms]\n"
> +"  criu check [--feature FEAT]\n"
>  "  criu exec -p PID <syscall-string>\n"
>  "  criu page-server\n"
>  "  criu service [<options>]\n"
> @@ -807,8 +814,16 @@ usage:
>  "                            socket[inode]\n"
>  "                            file[mnt_id:inode]\n"
>  "  --empty-ns {net}\n"
> -"                      Create a namespace, but don't restore its
> properies.\n"
> -"                      An user will retore them from action scripts.\n"
> +"                        Create a namespace, but don't restore its
> properies.\n"
> +"                        An user will retore them from action scripts.\n"
> +"Check options:\n"
> +"  --abs                 check availability of absolutely needed kernel
> features\n"
> +"  --ms                  deprecated, use --abs (don't check not yet
> merged kernel features)\n"
> +"  --feature FEAT        check availability of one of the following
> kernel features\n"
> +"                        "
> +       );
> +       check_add_feature("list");
> +       pr_msg(
>  "\n"
>  "* Logging:\n"
>  "  -o|--log-file FILE    log file name\n"
> @@ -836,7 +851,6 @@ usage:
>  "Other options:\n"
>  "  -h|--help             show this text\n"
>  "  -V|--version          show version\n"
> -"     --ms               don't check not yet merged kernel features\n"
>         );
>
>         return 0;
> diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
> index a6f0b3e..ba2633a 100644
> --- a/criu/include/cr_options.h
> +++ b/criu/include/cr_options.h
> @@ -56,7 +56,7 @@ struct cr_options {
>         int                     final_state;
>         char                    *show_dump_file;
>         char                    *show_fmt;
> -       bool                    check_ms_kernel;
> +       bool                    check_abs_features;
>         bool                    show_pages_content;
>         union {
>                 bool            restore_detach;
> --
> 2.7.0.rc3.207.g0ac5344
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openvz.org/pipermail/criu/attachments/20160229/79b279b3/attachment-0001.html>


More information about the CRIU mailing list