[CRIU] [PATCH 2/2] Make lacking cgroup properties non-fatal

Garrison Bellack gbellack at google.com
Wed Aug 13 14:46:31 PDT 2014


On Wed, Aug 13, 2014 at 1:55 PM, Tycho Andersen <
tycho.andersen at canonical.com> wrote:

> On Wed, Aug 13, 2014 at 01:32:01PM -0700, Garrison Bellack wrote:
> > On Wed, Aug 13, 2014 at 12:43 PM, Andrew Vagin <avagin at parallels.com>
> wrote:
> >
> > > On Wed, Aug 13, 2014 at 11:59:33AM -0700, gbellack at google.com wrote:
> > > > From: Garrison Bellack <gbellack at google.com>
> > > >
> > > > Because different kernel versions have different cgroup properties,
> criu
> > > > shouldn't crash just because the properties statically listed aren't
> > > exact.
> > > > Instead, during dump, ignore properties the kernel doesn't have and
> > > continue.
> > > > In addition, during restore, in the event of migration to a kernel
> that
> > > is
> > > > missing a property that was dumped, print an error but continue.
> > >
> > > I am not sure that we can continue in this case. Why do you think that
> > > it's safe? I think we need to ask a user about this. So can we add a
> new
> > > option to allow continuing in this case?
> >
> >
> > I'm a little confused about your concerns of safety. Are you referring to
> > missing properties on the dump or restore side?
>
> I think restore side. I also wondered the same thing -- it seems that
> missing properties on restore should be an error (perhaps as Andrew
> says with an --allow-missing-cg-props flag on the restore side to
> allow you to override this error).


Here is the reasoning behind this decision. Let say you are migrating
properties A,B,C,D where B doesn't exist.

This is how it currently works:
A will be restored. B prints an error and causes failure because it is
missing. However, because cgroup property restoration is the last thing to
happen during restore, and because of designs further up the call chain,
the restoration of the process still goes through. C,D are not restored
because of previous criu failure.
End result -- Process and property A restored, C and D are not restored
even though they exist on the new machine

With this patch:
A will be restored. B prints an error and continues. C and D are restored.
End result --  Process and property A, C, D all restored.

I think between these two options we clearly prefer the later behavior.


>
> >
> > > >
> > > > Change-Id: I5a8b93d6a8a3a9664914f10cf8e2110340dd8b31
> > > > Signed-off-by: Garrison Bellack <gbellack at google.com>
> > > > ---
> > > >  cgroup.c | 40 ++++++++++++++++++++++++++--------------
> > > >  1 file changed, 26 insertions(+), 14 deletions(-)
> > > >
> > > > diff --git a/cgroup.c b/cgroup.c
> > > > index f8dbbde..b727a50 100644
> > > > --- a/cgroup.c
> > > > +++ b/cgroup.c
> > > > @@ -284,35 +284,30 @@ static int find_dir(const char *path, struct
> > > list_head *dirs, struct cgroup_dir
> > > >   * Currently this function only supports properties that have 1
> value,
> > > under 100
> > > >   * chars
> > > >   */
> > > > -static int read_cgroup_prop(struct cgroup_prop *property, const char
> > > *fpath)
> > > > +static int read_cgroup_prop(struct cgroup_prop *property, const char
> > > *fullpath)
> > > >  {
> > > > -     char pbuf[PATH_MAX], buf[100];
> > > > +     char buf[100];
> > > >       FILE *f;
> > > >       char *endptr;
> > > >
> > > > -     if (snprintf(pbuf, PATH_MAX, "%s/%s", fpath, property->name) >=
> > > PATH_MAX) {
> > > > -             pr_err("snprintf output was truncated");
> > > > -             return -1;
> > > > -     }
> > > > -
> > > > -     f = fopen(pbuf, "r");
> > > > +     f = fopen(fullpath, "r");
> > > >       if (!f) {
> > > > -             pr_err("Failed opening %s\n", pbuf);
> > > >               property->value = NULL;
> > > > +             pr_perror("Failed opening %s\n", fullpath);
> > > >               return -1;
> > > >       }
> > > >
> > > >       memset(buf, 0, sizeof(buf));
> > > >       if (fread(buf, sizeof(buf), 1, f) != 1) {
> > > >               if (!feof(f)) {
> > > > -                     pr_err("Failed scanning %s\n", pbuf);
> > > > +                     pr_err("Failed scanning %s\n", fullpath);
> > > >                       fclose(f);
> > > >                       return -1;
> > > >               }
> > > >       }
> > > >
> > > >       if (fclose(f) != 0) {
> > > > -             pr_err("Failed closing %s\n", pbuf);
> > > > +             pr_err("Failed closing %s\n", fullpath);
> > > >               return -1;
> > > >       }
> > > >
> > > > @@ -390,6 +385,7 @@ static int add_cgroup_properties(const char
> *fpath,
> > > struct cgroup_dir *ncd,
> > > >                                struct cg_controller *controller)
> > > >  {
> > > >       int i, j;
> > > > +     char buf[PATH_MAX];
> > > >       struct cgroup_prop *prop;
> > > >
> > > >       for (i = 0; i < controller->n_controllers; ++i) {
> > > > @@ -397,16 +393,28 @@ static int add_cgroup_properties(const char
> > > *fpath, struct cgroup_dir *ncd,
> > > >               const char **prop_arr =
> > > get_known_properties(controller->controllers[i]);
> > > >
> > > >               for (j = 0; prop_arr != NULL && prop_arr[j] != NULL;
> ++j) {
> > > > +                     if (snprintf(buf, PATH_MAX, "%s/%s", fpath,
> > > prop_arr[j]) >= PATH_MAX) {
> > > > +                             pr_err("snprintf output was
> truncated");
> > > > +                             return -1;
> > > > +                     }
> > > > +
> > > > +                     if (access(buf, F_OK) < 0 && errno == ENOENT) {
> > > > +                             pr_info("Couldn't open %s. This cgroup
> > > property may not exist on this kernel\n", buf);
> > > > +                             continue;
> > > > +                     }
> > > > +
> > > >                       prop = create_cgroup_prop(prop_arr[j]);
> > > >                       if (!prop) {
> > > >                               free_all_cgroup_props(ncd);
> > > >                               return -1;
> > > >                       }
> > > > -                     if (read_cgroup_prop(prop, fpath) < 0) {
> > > > +
> > > > +                     if (read_cgroup_prop(prop, buf) < 0) {
> > > >                               free_cgroup_prop(prop);
> > > >                               free_all_cgroup_props(ncd);
> > > >                               return -1;
> > > >                       }
> > > > +
> > > >                       pr_info("Dumping value %s from %s/%s\n",
> > > prop->value, fpath, prop->name);
> > > >                       list_add_tail(&prop->list, &ncd->properties);
> > > >                       ncd->n_properties++;
> > > > @@ -924,8 +932,12 @@ static int restore_cgroup_prop(const
> > > CgroupPropEntry * cg_prop_entry_p,
> > > >
> > > >       f = fopenat(cg, path, "w+");
> > > >       if (!f) {
> > > > -             pr_perror("Failed opening %s for writing\n", path);
> > > > -             return -1;
> > > > +             if (errno != ENOENT) {
> > > > +                     pr_perror("Failed opening %s\n", path);
> > > > +                     return -1;
> > > > +             }
> > > > +             pr_perror("Couldn't open %s. This cgroup property may
> not
> > > exist on this kernel\n", path);
> > > > +             return 0;
> > > >       }
> > > >
> > > >       if (fprintf(f, "%s", cg_prop_entry_p->value) < 0) {
> > > > --
> > > > 2.1.0.rc2.206.gedb03e5
> > > >
> > > > _______________________________________________
> > > > CRIU mailing list
> > > > CRIU at openvz.org
> > > > https://lists.openvz.org/mailman/listinfo/criu
> > >
>
> > _______________________________________________
> > CRIU mailing list
> > CRIU at openvz.org
> > https://lists.openvz.org/mailman/listinfo/criu
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openvz.org/pipermail/criu/attachments/20140813/0cbb4a23/attachment.html>


More information about the CRIU mailing list