[Devel] [PATCH v5 4/6] modify tar extraction to account for user namespace

Glauber Costa glommer at openvz.org
Fri May 17 09:26:44 PDT 2013


From: Glauber Costa <glommer at parallels.com>

If we are running upstream with user namespaces, we need to create the
container filesystem not with the ownership preserved, but reflecting the
mapping we need to apply. Note that according to our documentation, we should
ignore this if the user explicitly requested an uid mapping of 0 (gid is
ignored in this case).

Our tooling doesn't allow any easy way to unpack a whol distribution with
offsets mechanically applied like this. We could do the whole unpacking in a
user namespace itself, but that does not come without problems on its own (for
instance, we won't be able to create any device files, we have to carefully
adjust permissions in the root directory, etc)

To work around that, we can employ a trick to allow container creation right
now, as well as to avoid compatibility problems: we will resort to LD_PRELOAD
to load a schim that captures calls to the chown family of system calls and
applies the offset manually.

Signed-off-by: Glauber Costa <glommer at parallels.com>
---
 include/res.h           |  6 ++++
 scripts/vps-create.in   | 14 ++++++++
 src/lib/Makefile.am     |  3 ++
 src/lib/chown_preload.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/lib/create.c        | 38 +++++++++++++++++---
 vzctl.spec              |  2 +-
 6 files changed, 150 insertions(+), 6 deletions(-)
 create mode 100644 src/lib/chown_preload.c

diff --git a/include/res.h b/include/res.h
index 0dfacf7..047593a 100644
--- a/include/res.h
+++ b/include/res.h
@@ -47,6 +47,12 @@ struct env_param {
 };
 typedef struct env_param env_param_t;
 
+/*
+ * When running upstream kernels, we will need host-side UID and GID. Those
+ * are configurable, but if none is specified, those are used.
+ */
+#define VZ_DEFAULT_UID		100000
+#define VZ_DEFAULT_GID		100000
 typedef struct {
 	list_head_t userpw;
 	list_head_t nameserver;
diff --git a/scripts/vps-create.in b/scripts/vps-create.in
index 126f048..a1b3382 100755
--- a/scripts/vps-create.in
+++ b/scripts/vps-create.in
@@ -22,11 +22,22 @@
 # Required parameters:
 #   VE_PRVT		- path to root of CT private areas
 #   PRIVATE_TEMPLATE	- path to private template used as a source for copying
+#
+# Optional parameters:
+#   UID_OFFSET		- offset to be added to all tar UIDs
+#   GID_OFFSET		- offset to be added to all tar GIDs
 
 . @SCRIPTDIR@/vps-functions
 
 vzcheckvar VE_PRVT PRIVATE_TEMPLATE
 
+chown_preload_if_needed()
+{
+	[ -z "$UID_OFFSET" -o -z "$GID_OFFSET" ] && return 
+
+	export LD_PRELOAD=libvzchown.so
+}
+
 create_prvt()
 {
 	local TMP AVAIL NEEDED HEADER OPT
@@ -75,6 +86,9 @@ create_prvt()
 	[ "$AVAIL" -ge "$NEEDED" ] ||
 		vzerror "Insufficient disk space in $VE_PRVT; available: $AVAIL, needed: $NEEDED" ${VZ_FS_NO_DISK_SPACE}
 	CAT=cat
+
+	chown_preload_if_needed
+
 	# Use pv to show nice progress bar if we can
 	pv -V >/dev/null 2>&1 && CAT=pv
 	chmod 700 "$VE_PRVT"
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
index b009d15..34c463b 100644
--- a/src/lib/Makefile.am
+++ b/src/lib/Makefile.am
@@ -72,6 +72,9 @@ libvzctl_la_LIBADD = $(XML_LIBS) $(CGROUP_LIBS) $(DL_LIBS)
 
 if HAVE_CGROUP
 libvzctl_la_SOURCES += cgroup.c hooks_ct.c
+
+lib_LTLIBRARIES += libvzchown.la
+libvzchown_la_SOURCES = chown_preload.c
 endif
 
 if HAVE_VZ_KERNEL
diff --git a/src/lib/chown_preload.c b/src/lib/chown_preload.c
new file mode 100644
index 0000000..4e2be4a
--- /dev/null
+++ b/src/lib/chown_preload.c
@@ -0,0 +1,93 @@
+/*
+ *  Copyright (C) 2013, Parallels, Inc. All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *  Authors: Kir Kolyshkin and Glauber Costa
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <dlfcn.h>
+
+static int (*real_chown)(const char *path, uid_t owner, gid_t group) = NULL;
+static int (*real_fchown)(int fd, uid_t owner, gid_t group) = NULL;
+static int (*real_lchown)(const char *path, uid_t owner, gid_t group) = NULL;
+static int (*real_fchownat)(int dirfd, const char *pathname,
+		uid_t owner, gid_t group, int flags) = NULL;
+
+uid_t uid_offset = 0;
+gid_t gid_offset = 0;
+
+static void __init(void)
+{
+	char *uidstr, *gidstr;
+
+	uidstr = getenv("UID_OFFSET");
+	gidstr = getenv("GID_OFFSET");
+	if (!uidstr || !gidstr) {
+		fprintf(stderr, "Environment variables UID_OFFSET "
+				"and GID_OFFSET are required -- aborting\n");
+		exit(33);
+	}
+	uid_offset = strtol(uidstr, NULL, 10);
+	gid_offset = strtol(gidstr, NULL, 10);
+
+	real_chown = dlsym(RTLD_NEXT, "chown");
+	real_fchown = dlsym(RTLD_NEXT, "fchown");
+	real_lchown = dlsym(RTLD_NEXT, "lchown");
+	real_fchownat = dlsym(RTLD_NEXT, "fchownat");
+
+	if (!real_chown || !real_fchown || !real_lchown || !real_fchownat) {
+		fprintf(stderr, "dlsym failed: %s\n", dlerror());
+		exit(34);
+	}
+}
+
+int chown(const char *path, uid_t owner, gid_t group)
+{
+	if (!real_chown)
+		__init();
+
+	return real_chown(path, owner + uid_offset, group + gid_offset);
+}
+
+int fchown(int fd, uid_t owner, gid_t group)
+{
+	if (!real_fchown)
+		__init();
+
+	return real_fchown(fd, owner + uid_offset, group + gid_offset);
+}
+
+int lchown(const char *path, uid_t owner, gid_t group)
+{
+	if (!real_lchown)
+		__init();
+
+	return real_lchown(path, owner + uid_offset, group + gid_offset);
+}
+
+int fchownat(int dirfd, const char *pathname,
+		uid_t owner, gid_t group, int flags)
+{
+	if (!real_fchownat)
+		__init();
+
+	return real_fchownat(dirfd, pathname,
+			owner + uid_offset, group + gid_offset, flags);
+}
diff --git a/src/lib/create.c b/src/lib/create.c
index 2fd9314..a07def5 100644
--- a/src/lib/create.c
+++ b/src/lib/create.c
@@ -110,7 +110,7 @@ static int fs_create(envid_t veid, vps_handler *h, vps_param *vps_p)
 	char buf[PATH_LEN];
 	int ret;
 	char *arg[2];
-	char *env[4];
+	char *env[6];
 	int quota = 0;
 	int i;
 	char *dst;
@@ -120,10 +120,31 @@ static int fs_create(envid_t veid, vps_handler *h, vps_param *vps_p)
 	int layout = vps_p->opt.layout;
 	fs_param *fs = &vps_p->res.fs;
 	tmpl_param *tmpl = &vps_p->res.tmpl;
+	unsigned long uid_offset = 0;
+	unsigned long gid_offset = 0;
 	int ploop = (layout == VE_LAYOUT_PLOOP);
 	struct destroy_ve ddata;
 	struct vzctl_cleanup_handler *ch;
 
+	/*
+	 * All other users will test directly for h->can_join_userns.  Create
+	 * is special, because we still don't have the container config file
+	 * yet, and the user may be requesting it to be disabled in the command
+	 * line. So that value may be outdated.
+	 *
+	 * By now cmd_p is already merged into vps_p. So what we need to do is
+	 * just to test it again. We will force it to false if it is disabled
+	 * here, or keep the old value otherwise.
+	 */
+	if (!(vps_p->res.misc.local_uid) || (!(*vps_p->res.misc.local_uid)))
+		h->can_join_userns = 0;
+
+	if (h->can_join_userns && vps_p->res.misc.local_uid) {
+		uid_offset = *vps_p->res.misc.local_uid;
+		if (uid_offset && vps_p->res.misc.local_gid)
+			gid_offset = *vps_p->res.misc.local_gid;
+	}
+
 	if (ploop && (!dq->diskspace || dq->diskspace[1] <= 0)) {
 		logger(-1, 0, "Error: diskspace not set (required for ploop)");
 		return VZ_DISKSPACE_NOT_SET;
@@ -215,11 +236,18 @@ find:
 	arg[0] = VPS_CREATE;
 	arg[1] = NULL;
 	snprintf(buf, sizeof(buf), "PRIVATE_TEMPLATE=%s", tarball);
-	env[0] = strdup(buf);
+	i = 0;
+	env[i++] = strdup(buf);
 	snprintf(buf, sizeof(buf), "VE_PRVT=%s", dst);
-	env[1] = strdup(buf);
-	env[2] = strdup(ENV_PATH);
-	env[3] = NULL;
+	env[i++] = strdup(buf);
+	if (!is_vz_kernel(h) && h->can_join_userns) {
+		snprintf(buf, sizeof(buf), "UID_OFFSET=%lu", uid_offset);
+		env[i++] = strdup(buf);
+		snprintf(buf, sizeof(buf), "GID_OFFSET=%lu", gid_offset);
+		env[i++] = strdup(buf);
+	}
+	env[i++] = strdup(ENV_PATH);
+	env[i] = NULL;
 	logger(0, 0, "Creating container private area (%s)", tmpl->ostmpl);
 	ret = run_script(VPS_CREATE, arg, env, 0);
 	free_arg(env);
diff --git a/vzctl.spec b/vzctl.spec
index 168a23a..bd778e4 100644
--- a/vzctl.spec
+++ b/vzctl.spec
@@ -183,7 +183,7 @@ Requires: wget
 OpenVZ containers control utility core package
 
 %files core
-%{_libdir}/libvzctl-*.so
+%{_libdir}/libvz*.so
 %dir %{_lockdir}
 %dir %{_dumpdir}
 %dir %{_privdir}
-- 
1.7.11.7




More information about the Devel mailing list