[CRIU] [PATCH 3/3] sysctl: move sysctl calls to usernsd
Tycho Andersen
tycho.andersen at canonical.com
Thu Aug 13 15:58:51 PDT 2015
Mostly for my own education in what is required to port something to be userns
restorable, I ported the sysctl stuff. Two potential concerns with this patch:
1. It does a lot more open("/proc/sys") calls, since we can't (easily) cache
the directory with the call structure. There aren't too many of these so I
thought it might be ok, but if not I can add a service FD for /proc/sys so
that we only open it once.
2. Copying structures with pointers around is kind of gory. I did it ad-hoc
here, but it may be worth inventing some mechanisms to make it easier,
although I'm not sure what exactly that would look like.
2a. There is not a great way to return non-fd stuff in memory right now from
userns_call; one of the little hacks in this code would be "simplified" if
we invented a way to do this.
Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
---
sysctl.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 85 insertions(+), 12 deletions(-)
diff --git a/sysctl.c b/sysctl.c
index b059140..8a9d979 100644
--- a/sysctl.c
+++ b/sysctl.c
@@ -5,9 +5,18 @@
#include <stdlib.h>
#include "asm/types.h"
+#include "namespaces.h"
#include "sysctl.h"
#include "util.h"
+struct sysctl_userns_req {
+ char *name;
+ void *arg;
+ int type;
+ int flags;
+ int op;
+};
+
#define __SYSCTL_OP(__ret, __fd, __req, __type, __nr, __op) \
do { \
if (__op == CTL_READ) \
@@ -24,7 +33,7 @@ do { \
#define GEN_SYSCTL_READ_FUNC(__type, __conv) \
static int sysctl_read_##__type(int fd, \
- struct sysctl_req *req, \
+ struct sysctl_userns_req *req, \
__type *arg, \
int nr) \
{ \
@@ -56,7 +65,7 @@ err: \
#define GEN_SYSCTL_WRITE_FUNC(__type, __fmt) \
static int sysctl_write_##__type(int fd, \
- struct sysctl_req *req, \
+ struct sysctl_userns_req *req, \
__type *arg, \
int nr) \
{ \
@@ -100,7 +109,7 @@ GEN_SYSCTL_WRITE_FUNC(u64, "%"PRIu64" ");
GEN_SYSCTL_WRITE_FUNC(s32, "%d ");
static int
-sysctl_write_char(int fd, struct sysctl_req *req, char *arg, int nr)
+sysctl_write_char(int fd, struct sysctl_userns_req *req, char *arg, int nr)
{
pr_debug("%s nr %d\n", req->name, nr);
if (dprintf(fd, "%s\n", arg) < 0)
@@ -110,7 +119,7 @@ sysctl_write_char(int fd, struct sysctl_req *req, char *arg, int nr)
}
static int
-sysctl_read_char(int fd, struct sysctl_req *req, char *arg, int nr)
+sysctl_read_char(int fd, struct sysctl_userns_req *req, char *arg, int nr)
{
int ret = -1;
@@ -126,9 +135,44 @@ err:
return ret;
}
-static int __sysctl_op(int dir, struct sysctl_req *req, int op)
+static int sysctl_userns_arg_size(int type)
{
- int fd, ret = -1, nr = 1, flags;
+ switch(CTL_TYPE(type)) {
+ case __CTL_U32A:
+ return sizeof(u32) * CTL_LEN(type);
+ case CTL_U32:
+ return sizeof(u32);
+ case CTL_32:
+ return sizeof(s32);
+ case __CTL_U64A:
+ return sizeof(u64) * CTL_LEN(type);
+ case CTL_U64:
+ return sizeof(u64);
+ case __CTL_STR:
+ return sizeof(char) * CTL_LEN(type) + 1;
+ default:
+ pr_err("unknown arg type %d\n", type);
+
+ /* Ensure overflow to cause an error */
+ return MAX_MSG_SIZE;
+ }
+}
+
+static int __sysctl_op(void *arg, int unused)
+{
+ int fd, ret = -1, nr = 1, flags, dir;
+ struct sysctl_userns_req *req = arg;
+ int op = req->op;
+
+ // fix up the pointers
+ req->name = (char *) &req[1];
+ req->arg = req->name + strlen(req->name) + 2;
+
+ dir = open("/proc/sys", O_RDONLY, O_DIRECTORY);
+ if (dir < 0) {
+ pr_perror("Can't open sysctl dir");
+ return -1;
+ }
if (op == CTL_READ)
flags = O_RDONLY;
@@ -137,6 +181,7 @@ static int __sysctl_op(int dir, struct sysctl_req *req, int op)
fd = openat(dir, req->name, flags);
if (fd < 0) {
+ close_safe(&dir);
if (errno == ENOENT && (req->flags & CTL_FLAGS_OPTIONAL))
return 0;
pr_perror("Can't open sysctl %s", req->name);
@@ -163,6 +208,7 @@ static int __sysctl_op(int dir, struct sysctl_req *req, int op)
break;
}
+ close_safe(&dir);
close_safe(&fd);
return ret;
@@ -172,17 +218,44 @@ int sysctl_op(struct sysctl_req *req, size_t nr_req, int op)
{
int ret = 0;
int dir = -1;
+ struct sysctl_userns_req *userns_req;
- dir = open("/proc/sys", O_RDONLY);
- if (dir < 0) {
- pr_perror("Can't open sysctl dir");
- return -1;
- }
+ userns_req = alloca(MAX_MSG_SIZE);
+ userns_req->name = (char *) (&userns_req[1]);
while (nr_req--) {
- ret = __sysctl_op(dir, req, op);
+ int arg_len = sysctl_userns_arg_size(req->type);
+ int name_len = strlen(req->name) + 1;
+ int total_len = sizeof(*userns_req) + arg_len + name_len;
+
+ if (total_len > MAX_MSG_SIZE) {
+ pr_err("sysctl msg too big: %s\n", req->name);
+ return -1;
+ }
+
+ strcpy(userns_req->name, req->name);
+
+ userns_req->arg = userns_req->name + name_len + 1;
+ if (op == CTL_WRITE)
+ memcpy(userns_req->arg, req->arg, arg_len);
+
+ userns_req->type = req->type;
+ userns_req->flags = req->flags;
+ userns_req->op = op;
+
+ ret = userns_call(__sysctl_op, UNS_ASYNC, userns_req, total_len, 0);
if (ret < 0)
break;
+
+ /*
+ * Here, we use a little hack: since we only read in dump mode
+ * when usernsd is not active, we know the above call happened
+ * in this address space, so we can just copy the value read
+ * back out. If ther was an API to return stuff via
+ * userns_call(), that would be preferable.
+ */
+ if (op == CTL_READ)
+ memcpy(req->arg, userns_req->arg, arg_len);
req++;
}
--
2.1.0
More information about the CRIU
mailing list