[CRIU] [PATCH 1/2] cgroups: Add ability to reuse precreated controllers and cgroups
Pavel Emelyanov
xemul at parallels.com
Fri May 22 04:53:22 PDT 2015
On 05/21/2015 10:33 PM, Cyrill Gorcunov wrote:
> When been playing wich checkpoint/restore of container I found
> that we can't reuse existing controller if they were pre-created.
> For example currently in PCS7 we're bindmount cgroups which belong
> to a container in a form of
>
> /sys/fs/cgroup/<controller>/<container> ==> /sys/fs/cgroup/<controller>
>
> so that CRIU dumps such configuration fine but on restore
> it recreates controllers from the scratch which we would
> like to bindmount them and ask CRIU to restore subcgroups
> and their parameters.
>
> So I extended --manage-cgroups option to take <mode> arguments
> which might be
>
> - strict: default mode -- no change in current behaviour,
> we restore everything from the scratch failing with error
> if some toplevel controller or cgroup already exist
What do you mean by "default"? The mode when --mange-cgroups is
specified, but "mode" (argument) is not?
> - opport: opportunistic mode -- if controller or toplevel
> cgroup doesn't exist we create it, otherwise simply continue
> execution. iow here we try to continue as long as we can.
>
> - bind: bindmount mode -- implies that controller and toplevel
> cgroup must already exist.
>
> The last mode allows me to premount cgroups for container externally
> and restore its limits from CRIU.
I see that modes can be OR-ed. What do the combinations mean?
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
> Documentation/criu.txt | 14 ++++++++++++-
> cgroup.c | 53 +++++++++++++++++++++++++++++++++++++++++---------
> crtools.c | 41 ++++++++++++++++++++++++++++++++++----
> include/cr_options.h | 11 ++++++++++-
> 4 files changed, 104 insertions(+), 15 deletions(-)
>
> diff --git a/Documentation/criu.txt b/Documentation/criu.txt
> index 81783635af89..c307abf7605f 100644
> --- a/Documentation/criu.txt
> +++ b/Documentation/criu.txt
> @@ -211,8 +211,20 @@ Restores previously checkpointed processes.
> *-r*, *--root* '<path>'::
> Change the root filesystem to <path> (when run in mount namespace).
>
> -*--manage-cgroups*::
> +*--manage-cgroups* [<mode>,<mode>]::
> Restore cgroups configuration associated with a task from the image.
> + '<mode>' may be a combination of keywords from below.
> +
> + - *strict*. Require all cgroups to be recreated from the scratch. If
> + any cgroup or subcgroup exist it cause restore procedure
> + to abort. This is default mode.
> +
> + - *opport*. Opportunistic mode where *criu* tries to restore all the
> + cgroups but does not fail if some cgroup already exist.
> +
> + - *bind*. Bind mode where all toplevel cgroups should be premounted
> + and *criu* only tries to restore subcgroups and their
> + parameters.
>
> *--cgroup-root* '[<controller>:]/<newroot>'::
> Change the root cgroup the controller will be installed into. No controller
> diff --git a/cgroup.c b/cgroup.c
> index b7fef38dd334..d0d693f50605 100644
> --- a/cgroup.c
> +++ b/cgroup.c
> @@ -1103,6 +1103,49 @@ static int prepare_cgroup_dirs(char **controllers, int n_controllers, char *paux
> return 0;
> }
>
> +static int prepare_toplevel(char *dir, char *mopt)
> +{
> + /*
> + * For bind mode simply check that directory
> + * is here, it's enough.
> + */
> + if (opts.manage_cgroups & CG_MODE_BIND) {
> + if (access(dir, F_OK) == 0) {
> + pr_err("\tHit existing cgyard dir %s\n", dir);
> + return -1;
> + } else if (errno != ENOENT) {
> + pr_perror("\tFiled accessing dir %s\n", dir);
> + return -1;
> + }
> + return 0;
> + }
> +
> + /*
> + * It's either oppotunistic either strict mode so
> + * create and mount.
> + */
> + pr_debug("\tMaking cgyard dir %s (%s)\n", dir, mopt);
> + if (mkdir(dir, 0700)) {
> + if (errno == EEXIST) {
> + /*
> + * For opportunistic mode simply imply
> + * that cgroup is already mounted.
> + */
> + if (opts.manage_cgroups & CG_MODE_OPPORT)
> + return 0;
> + }
> + pr_perror("Can't make cgyard dir %s", dir);
> + return -1;
> + }
> +
> + if (mount("none", dir, "cgroup", 0, mopt) < 0) {
> + pr_perror("Can't mount cgyard dir %s", dir);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> /*
> * Prepare the CGROUP_YARD service descriptor. This guy is
> * tmpfs mount with the set of ctl->name directories each
> @@ -1179,16 +1222,8 @@ static int prepare_cgroup_sfd(CgroupEntry *ce)
> paux + ctl_off, sizeof(paux) - ctl_off,
> opt, sizeof(opt));
>
> - pr_debug("\tMaking subdir %s (%s)\n", paux, opt);
> - if (mkdir(paux, 0700)) {
> - pr_perror("Can't make cgyard subdir %s", paux);
> - goto err;
> - }
> -
> - if (mount("none", paux, "cgroup", 0, opt) < 0) {
> - pr_perror("Can't mount %s cgyard", paux);
> + if (prepare_toplevel(paux, opt))
> goto err;
> - }
>
> /* We skip over the .criu.cgyard.XXXXXX/, since those will be
> * referred to by the cg yard service fd. */
> diff --git a/crtools.c b/crtools.c
> index 6da7afeed067..d8993be28cd0 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -57,7 +57,7 @@ void init_opts(void)
> INIT_LIST_HEAD(&opts.new_cgroup_roots);
>
> opts.cpu_cap = CPU_CAP_DEFAULT;
> - opts.manage_cgroups = false;
> + opts.manage_cgroups = CG_MODE_DEAULT;
> opts.ps_socket = -1;
> }
>
> @@ -147,6 +147,37 @@ Esyntax:
> return -1;
> }
>
> +static int parse_manage_cgroups(struct cr_options *opts, const char *optarg)
> +{
> + if (!optarg) {
> + opts->manage_cgroups = CG_MODE_DEAULT;
> + return 0;
> + }
> +
> + while (*optarg) {
> + if (optarg[0] == ',') {
> + optarg++;
> + continue;
> + } else if (!strncmp(optarg, "opport", 6)) {
> + opts->manage_cgroups |= CG_MODE_OPPORT;
> + optarg += 3;
> + } else if (!strncmp(optarg, "bind", 4)) {
> + opts->manage_cgroups |= CG_MODE_BIND;
> + optarg += 4;
> + } else if (!strncmp(optarg, "strict", 6)) {
> + opts->manage_cgroups |= CG_MODE_STRICT;
> + optarg += 6;
> + } else
> + goto Esyntax;
> + }
> +
> + return 0;
> +
> +Esyntax:
> + pr_err("Unknown cgroups mode `%s' selected\n", optarg);
> + return -1;
> +}
> +
> int main(int argc, char *argv[], char *envp[])
> {
> pid_t pid = 0, tree_id = 0;
> @@ -199,7 +230,7 @@ int main(int argc, char *argv[], char *envp[])
> { "force-irmap", no_argument, 0, 1058 },
> { "ext-mount-map", required_argument, 0, 'M' },
> { "exec-cmd", no_argument, 0, 1059 },
> - { "manage-cgroups", no_argument, 0, 1060 },
> + { "manage-cgroups", optional_argument, 0, 1060 },
> { "cgroup-root", required_argument, 0, 1061 },
> { "inherit-fd", required_argument, 0, 1062 },
> { "feature", required_argument, 0, 1063 },
> @@ -392,7 +423,8 @@ int main(int argc, char *argv[], char *envp[])
> has_exec_cmd = true;
> break;
> case 1060:
> - opts.manage_cgroups = true;
> + if (parse_manage_cgroups(&opts, optarg))
> + goto usage;
> break;
> case 1061:
> {
> @@ -669,7 +701,8 @@ usage:
> " allow autoresolving mounts with external sharing\n"
> " --enable-external-masters\n"
> " allow autoresolving mounts with external masters\n"
> -" --manage-cgroups dump or restore cgroups the process is in\n"
> +" --manage-cgroups [m] dump or restore cgroups the process is in usig mode:\n"
> +" 'opport','strict' (default) or 'bind'.\n"
> " --cgroup-root [controller:]/newroot\n"
> " change the root cgroup the controller will be\n"
> " installed into. No controller means that root is the\n"
> diff --git a/include/cr_options.h b/include/cr_options.h
> index 28bed45b0cd8..0843187d34b2 100644
> --- a/include/cr_options.h
> +++ b/include/cr_options.h
> @@ -21,6 +21,15 @@ struct cg_root_opt {
> char *newroot;
> };
>
> +/*
> + * Cgroup management options.
> + */
> +#define CG_MODE_OPPORT (1u << 0) /* Restore everything ignoring if cgroup exists */
> +#define CG_MODE_STRICT (1u << 1) /* It must be clean cgroups, recreate from scratch */
> +#define CG_MODE_BIND (1u << 2) /* Toplevel are bound, everything else to restore */
> +
> +#define CG_MODE_DEAULT (CG_MODE_STRICT)
> +
> struct cr_options {
> int final_state;
> char *show_dump_file;
> @@ -59,7 +68,7 @@ struct cr_options {
> unsigned int cpu_cap;
> bool force_irmap;
> char **exec_cmd;
> - bool manage_cgroups;
> + unsigned int manage_cgroups;
> char *new_global_cg_root;
> struct list_head new_cgroup_roots;
> bool autodetect_ext_mounts;
>
More information about the CRIU
mailing list