[CRIU] [PATCH 3/3] sysctl: move sysctl calls to usernsd

Tycho Andersen tycho.andersen at canonical.com
Thu Aug 13 15:58:51 PDT 2015


Mostly for my own education in what is required to port something to be userns
restorable, I ported the sysctl stuff. Two potential concerns with this patch:

1. It does a lot more open("/proc/sys") calls, since we can't (easily) cache
   the directory with the call structure. There aren't too many of these so I
   thought it might be ok, but if not I can add a service FD for /proc/sys so
   that we only open it once.

2. Copying structures with pointers around is kind of gory. I did it ad-hoc
   here, but it may be worth inventing some mechanisms to make it easier,
   although I'm not sure what exactly that would look like.

2a. There is not a great way to return non-fd stuff in memory right now from
    userns_call; one of the little hacks in this code would be "simplified" if
    we invented a way to do this.

Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
 sysctl.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 85 insertions(+), 12 deletions(-)

diff --git a/sysctl.c b/sysctl.c
index b059140..8a9d979 100644
--- a/sysctl.c
+++ b/sysctl.c
@@ -5,9 +5,18 @@
 #include <stdlib.h>
 
 #include "asm/types.h"
+#include "namespaces.h"
 #include "sysctl.h"
 #include "util.h"
 
+struct sysctl_userns_req {
+	char	*name;
+	void	*arg;
+	int	type;
+	int	flags;
+	int	op;
+};
+
 #define __SYSCTL_OP(__ret, __fd, __req, __type, __nr, __op)		\
 do {									\
 	if (__op == CTL_READ)						\
@@ -24,7 +33,7 @@ do {									\
 
 #define GEN_SYSCTL_READ_FUNC(__type, __conv)				\
 static int sysctl_read_##__type(int fd,					\
-				struct sysctl_req *req,			\
+				struct sysctl_userns_req *req,		\
 				__type *arg,				\
 				int nr)					\
 {									\
@@ -56,7 +65,7 @@ err:									\
 
 #define GEN_SYSCTL_WRITE_FUNC(__type, __fmt)				\
 static int sysctl_write_##__type(int fd,				\
-				 struct sysctl_req *req,		\
+				 struct sysctl_userns_req *req,		\
 				 __type *arg,				\
 				 int nr)				\
 {									\
@@ -100,7 +109,7 @@ GEN_SYSCTL_WRITE_FUNC(u64, "%"PRIu64" ");
 GEN_SYSCTL_WRITE_FUNC(s32, "%d ");
 
 static int
-sysctl_write_char(int fd, struct sysctl_req *req, char *arg, int nr)
+sysctl_write_char(int fd, struct sysctl_userns_req *req, char *arg, int nr)
 {
 	pr_debug("%s nr %d\n", req->name, nr);
 	if (dprintf(fd, "%s\n", arg) < 0)
@@ -110,7 +119,7 @@ sysctl_write_char(int fd, struct sysctl_req *req, char *arg, int nr)
 }
 
 static int
-sysctl_read_char(int fd, struct sysctl_req *req, char *arg, int nr)
+sysctl_read_char(int fd, struct sysctl_userns_req *req, char *arg, int nr)
 {
 	int ret = -1;
 
@@ -126,9 +135,44 @@ err:
 	return ret;
 }
 
-static int __sysctl_op(int dir, struct sysctl_req *req, int op)
+static int sysctl_userns_arg_size(int type)
 {
-	int fd, ret = -1, nr = 1, flags;
+	switch(CTL_TYPE(type)) {
+	case __CTL_U32A:
+		return sizeof(u32) * CTL_LEN(type);
+	case CTL_U32:
+		return sizeof(u32);
+	case CTL_32:
+		return sizeof(s32);
+	case __CTL_U64A:
+		return sizeof(u64) * CTL_LEN(type);
+	case CTL_U64:
+		return sizeof(u64);
+	case __CTL_STR:
+		return sizeof(char) * CTL_LEN(type) + 1;
+	default:
+		pr_err("unknown arg type %d\n", type);
+
+		/* Ensure overflow to cause an error */
+		return MAX_MSG_SIZE;
+	}
+}
+
+static int __sysctl_op(void *arg, int unused)
+{
+	int fd, ret = -1, nr = 1, flags, dir;
+	struct sysctl_userns_req *req = arg;
+	int op = req->op;
+
+	// fix up the pointers
+	req->name = (char *) &req[1];
+	req->arg = req->name + strlen(req->name) + 2;
+
+	dir = open("/proc/sys", O_RDONLY, O_DIRECTORY);
+	if (dir < 0) {
+		pr_perror("Can't open sysctl dir");
+		return -1;
+	}
 
 	if (op == CTL_READ)
 		flags = O_RDONLY;
@@ -137,6 +181,7 @@ static int __sysctl_op(int dir, struct sysctl_req *req, int op)
 
 	fd = openat(dir, req->name, flags);
 	if (fd < 0) {
+		close_safe(&dir);
 		if (errno == ENOENT && (req->flags & CTL_FLAGS_OPTIONAL))
 			return 0;
 		pr_perror("Can't open sysctl %s", req->name);
@@ -163,6 +208,7 @@ static int __sysctl_op(int dir, struct sysctl_req *req, int op)
 		break;
 	}
 
+	close_safe(&dir);
 	close_safe(&fd);
 
 	return ret;
@@ -172,17 +218,44 @@ int sysctl_op(struct sysctl_req *req, size_t nr_req, int op)
 {
 	int ret = 0;
 	int dir = -1;
+	struct sysctl_userns_req *userns_req;
 
-	dir = open("/proc/sys", O_RDONLY);
-	if (dir < 0) {
-		pr_perror("Can't open sysctl dir");
-		return -1;
-	}
+	userns_req = alloca(MAX_MSG_SIZE);
+	userns_req->name = (char *) (&userns_req[1]);
 
 	while (nr_req--) {
-		ret = __sysctl_op(dir, req, op);
+		int arg_len = sysctl_userns_arg_size(req->type);
+		int name_len = strlen(req->name) + 1;
+		int total_len = sizeof(*userns_req) + arg_len + name_len;
+
+		if (total_len > MAX_MSG_SIZE) {
+			pr_err("sysctl msg too big: %s\n", req->name);
+			return -1;
+		}
+
+		strcpy(userns_req->name, req->name);
+
+		userns_req->arg = userns_req->name + name_len + 1;
+		if (op == CTL_WRITE)
+			memcpy(userns_req->arg, req->arg, arg_len);
+
+		userns_req->type = req->type;
+		userns_req->flags = req->flags;
+		userns_req->op = op;
+
+		ret = userns_call(__sysctl_op, UNS_ASYNC, userns_req, total_len, 0);
 		if (ret < 0)
 			break;
+
+		/*
+		 * Here, we use a little hack: since we only read in dump mode
+		 * when usernsd is not active, we know the above call happened
+		 * in this address space, so we can just copy the value read
+		 * back out. If ther was an API to return stuff via
+		 * userns_call(), that would be preferable.
+		 */
+		if (op == CTL_READ)
+			memcpy(req->arg, userns_req->arg, arg_len);
 		req++;
 	}
 
-- 
2.1.0



More information about the CRIU mailing list