[Devel] [PATCH v2 8/8] allow for distro-specific fix ups at creation time.
Kir Kolyshkin
kir at openvz.org
Tue Apr 16 11:41:26 PDT 2013
Kostya,
We have a few problems with containers running under recent 3.x kernels
(non-openvz).
The problems are:
* Because of user namespace, we have to mount devpts with a special flag
"newinstance", and we do it from vzctl
* We are maybe going to have our own view of /proc, mounted on host via
fuse, and we can't allow container to remount it
So container is doing mounts of /proc and /dev/pts, and what Glauber
proposes below is to add a script to container that just do umount of
these things. This is only needed for "old" distros like RHEL 4-5-6,
because newer ones (like latest fedora releases) are CT-aware and don't
over-mount things.
*My question to you* -- can you think of a better approach to this
problem? I'm asking because you deal with this stuff daily :)
If you have more questions, feel free to ask me or Glauber (Glauber will
be in MSK office as of next Monday).
On 03/22/20Kirill Kolyshkin <kir at openvz.org>13 03:48 AM, Glauber Costa
wrote:
> We will need that infrastucture when running with Linux upstream, since some
> support is very unlikely to ever land in the Kernel. We need to do things like
> account for the fact that udev may kick in and destroy all the setup we have
> done for /dev. Since preemptively preventing those actions to happen is very
> hard ( as an example, centos6 init binary will issue mount syscalls itself),
> the most robust approach is to insert a script that will be run shortly after
> rc.sysinit. Although this can't guarantee that it will run *immediately* after,
> it should be early enough to undo every harmful operation done by /sbin/init
> and rc.sysinit, therefore allowing operation to continue freely.
>
> Signed-off-by: Glauber Costa <glommer at parallels.com>
> ---
> etc/dists/redhat.conf | 1 +
> etc/dists/scripts/fixups.sh | 43 +++++++++++++++++++++++++++++++++++++++++++
> include/dist.h | 2 ++
> include/env.h | 3 ++-
> src/lib/dist.c | 10 +++++++++-
> src/lib/env.c | 10 ++++++----
> src/lib/exec.c | 2 +-
> src/lib/hooks_ct.c | 38 ++++++++++++++++++++++++++++++++++++++
> 8 files changed, 102 insertions(+), 7 deletions(-)
> create mode 100755 etc/dists/scripts/fixups.sh
>
> diff --git a/etc/dists/redhat.conf b/etc/dists/redhat.conf
> index 727461d..49226c5 100644
> --- a/etc/dists/redhat.conf
> +++ b/etc/dists/redhat.conf
> @@ -25,3 +25,4 @@ SET_DNS=set_dns.sh
> SET_USERPASS=set_userpass.sh
> SET_UGID_QUOTA=set_ugid_quota.sh
> POST_CREATE=postcreate.sh
> +CT_FIXUP=fixups.sh
> diff --git a/etc/dists/scripts/fixups.sh b/etc/dists/scripts/fixups.sh
> new file mode 100755
> index 0000000..da3fd24
> --- /dev/null
> +++ b/etc/dists/scripts/fixups.sh
> @@ -0,0 +1,43 @@
> +#!/bin/bash
> +# Copyright (C) 2000-2009, Parallels, Inc. All rights reserved.
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write to the Free Software
> +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> +#
> +#
> +# Run-time fixup for legacy distributions that will rely on udevd to populate
> +# their /dev. Those distributions will effectively mount an empty tmpfs on top
> +# of /dev, defeating any setup we may have done. This includes RHEL-based
> +# distributions up to RHEL6. We are better off not allowing that to run at all
> +
> +function fixup_udev()
> +{
> + if [ -f /etc/fedora-release ]; then
> + return;
> + fi
> +
> + # All the first two may fail if the distribution didn't actually mount
> + # then.
> + umount /dev/pts
> + umount /dev/shm
> + # still may hold references to files open by rc.sysinit. Even if this is the
> + # case those will be simple things like /dev/null and should go away shortly.
> + # let us not fail because of that.
> + umount /dev -l
> +}
> +
> +fixup_udev
> +
> +exit 0
> +
> diff --git a/include/dist.h b/include/dist.h
> index 1f6a5a6..4faa452 100644
> --- a/include/dist.h
> +++ b/include/dist.h
> @@ -29,6 +29,7 @@
> #define SET_USERPASS 5
> #define SET_UGID_QUOTA 6
> #define POST_CREATE 7
> +#define CT_FIXUP 8
>
> typedef struct {
> char *def_ostmpl;
> @@ -46,6 +47,7 @@ typedef struct dist_actions {
> char *set_userpass; /**< setup user password. */
> char *set_ugid_quota; /**< setup 2level quota. */
> char *post_create; /**< sostcreate actions. */
> + char *ct_fixup; /**< run-time fixups. */
> } dist_actions;
>
> /* Read distribution specific actions configuration file.
> diff --git a/include/env.h b/include/env.h
> index d41df2e..acea123 100644
> --- a/include/env.h
> +++ b/include/env.h
> @@ -104,7 +104,7 @@ int vps_stop(vps_handler *h, envid_t veid, vps_param *param, int stop_mode,
> int vz_chroot(const char *root);
> int vz_env_create(vps_handler *h, envid_t veid, vps_res *res,
> int wait_p[2], int old_wait_p[2], int err_p[2],
> - env_create_FN fn, void *data);
> + env_create_FN fn, dist_actions *actions, void *data);
>
> struct vps_res;
> struct arg_start {
> @@ -116,6 +116,7 @@ struct arg_start {
> vps_handler *h;
> void *data;
> env_create_FN fn;
> + dist_actions *actions;
> int userns_p; /* while running in userns, there's extra sync needed */
> };
>
> diff --git a/src/lib/dist.c b/src/lib/dist.c
> index bde94ab..8047cf7 100644
> --- a/src/lib/dist.c
> +++ b/src/lib/dist.c
> @@ -36,7 +36,8 @@ static struct distr_conf {
> {"SET_DNS", SET_DNS},
> {"SET_USERPASS", SET_USERPASS},
> {"SET_UGID_QUOTA", SET_UGID_QUOTA},
> - {"POST_CREATE", POST_CREATE}
> + {"POST_CREATE", POST_CREATE},
> + {"CT_FIXUP", CT_FIXUP }
> };
>
> static int get_action_id(char *name)
> @@ -100,6 +101,12 @@ static int add_dist_action(dist_actions *d_actions, char *name, char *action,
> break;
> d_actions->post_create = strdup(file);
> break;
> + case CT_FIXUP:
> + if (d_actions->ct_fixup != NULL)
> + break;
> + d_actions->ct_fixup = strdup(file);
> + break;
> +
> }
> return 0;
> }
> @@ -115,6 +122,7 @@ void free_dist_actions(dist_actions *d_actions)
> free(d_actions->set_userpass);
> free(d_actions->set_ugid_quota);
> free(d_actions->post_create);
> + free(d_actions->ct_fixup);
> }
>
> static int get_dist_conf_name(char *dist_name, char *dir, char *file, int len)
> diff --git a/src/lib/env.c b/src/lib/env.c
> index ff4dad2..e3a3706 100644
> --- a/src/lib/env.c
> +++ b/src/lib/env.c
> @@ -297,7 +297,8 @@ int exec_container_init(struct arg_start *arg,
> }
>
> static int vz_real_env_create(vps_handler *h, envid_t veid, vps_res *res,
> - int wait_p, int old_wait_p, int err_p, env_create_FN fn, void *data)
> + int wait_p, int old_wait_p, int err_p, env_create_FN fn, dist_actions *actions,
> + void *data)
>
> {
> struct arg_start arg;
> @@ -310,6 +311,7 @@ static int vz_real_env_create(vps_handler *h, envid_t veid, vps_res *res,
> arg.h = h;
> arg.data = data;
> arg.fn = fn;
> + arg.actions = actions;
>
> return h->env_create(&arg);
> }
> @@ -416,7 +418,7 @@ static void get_osrelease(vps_res *res)
>
> int vz_env_create(vps_handler *h, envid_t veid, vps_res *res,
> int wait_p[2], int old_wait_p[2], int err_p[2],
> - env_create_FN fn, void *data)
> + env_create_FN fn, dist_actions *actions, void *data)
> {
> int ret, pid, errcode;
> int old_wait_fd;
> @@ -458,7 +460,7 @@ int vz_env_create(vps_handler *h, envid_t veid, vps_res *res,
> old_wait_fd = -1;
>
> ret = vz_real_env_create(h, veid, res, wait_p[0],
> - old_wait_fd, err_p[1], fn, data);
> + old_wait_fd, err_p[1], fn, actions, data);
> if (ret)
> write(STDIN_FILENO, &ret, sizeof(ret));
> exit(ret);
> @@ -629,7 +631,7 @@ int vps_start_custom(vps_handler *h, envid_t veid, vps_param *param,
> fix_cpu(&res->cpu);
>
> ret = vz_env_create(h, veid, res, wait_p,
> - old_wait_p, err_p, fn, data);
> + old_wait_p, err_p, fn, &actions, data);
> if (ret)
> goto err;
>
> diff --git a/src/lib/exec.c b/src/lib/exec.c
> index 9cf811c..74375dd 100644
> --- a/src/lib/exec.c
> +++ b/src/lib/exec.c
> @@ -454,7 +454,7 @@ int vps_run_script(vps_handler *h, envid_t veid, char *script, vps_param *vps_p)
> }
> }
> if ((ret = vz_env_create(h, veid, &vps_p->res, rd_p, NULL,
> - wr_p, NULL, NULL)))
> + wr_p, NULL, NULL, NULL)))
> {
> return ret;
> }
> diff --git a/src/lib/hooks_ct.c b/src/lib/hooks_ct.c
> index d5b15dc..02e4cc2 100644
> --- a/src/lib/hooks_ct.c
> +++ b/src/lib/hooks_ct.c
> @@ -258,6 +258,43 @@ static void create_devices(vps_handler *h, envid_t veid, const char *root)
>
> }
>
> +static int apply_fixups(vps_handler *h, envid_t veid, const char *root, dist_actions *actions)
> +{
> + char buf[STR_SIZE];
> +
> + /* Distributions that don't need the fixup will can stop right here */
> + if (!actions || !actions->ct_fixup)
> + return 0;
> +
> + if (snprintf(buf, sizeof(buf), "%s/%s", root, "/etc/rc3.d/S00vz-fixups.sh") < 0)
> + goto out;
> +
> + if (open(buf, O_RDWR|O_CREAT, 0) < 0)
> + goto out;
> +
> + if (mount(actions->ct_fixup, buf, "", MS_BIND, 0) < 0)
> + goto out;
> +
> + /*
> + * Being safe never killed anyone...
> + * We bind mount instead of symlinking the original rc3 so it won't appear in the original
> + * private mount.
> + */
> + if (snprintf(buf, sizeof(buf), "%s/%s", root, "/etc/rc5.d/S00vz-fixups.sh") < 0)
> + goto out;
> +
> + if (open(buf, O_RDWR|O_CREAT, 0) < 0)
> + goto out;
> +
> + if (mount(actions->ct_fixup, buf, "", MS_BIND, 0) < 0)
> + goto out;
> + return 0;
> +
> +out:
> + logger(-1, errno, "Could not install fixup script\n");
> + return VZ_RESOURCE_ERROR;
> +}
> +
> static int ct_env_create(struct arg_start *arg)
> {
>
> @@ -313,6 +350,7 @@ static int ct_env_create(struct arg_start *arg)
> arg->userns_p = userns_p[0];
>
> create_devices(arg->h, arg->veid, arg->res->fs.root);
> + apply_fixups(arg->h, arg->veid, arg->res->fs.root, arg->actions);
>
> ret = clone(_env_create, child_stack, clone_flags, arg);
> if (ret < 0) {
More information about the Devel
mailing list