[CRIU] [PATCH 4/5] sk-unix: Add trivial name resolver for sockets with relative names

Cyrill Gorcunov gorcunov at openvz.org
Thu Jul 23 12:22:39 PDT 2015


Unix sockets may be created with non-absolute (relative) path
(when kernel creates one it always use AT_FDCWD for name resolving),
So when we collect sockets we see them as having names without leading
slash.

In common cases for such sockets application doesn't change own
working directory after that but this is not always the true.
So we need to invent some name resolver. The good candidate is
IRMAP cache but after a number of testings I found that it might
slow down performance very dramatically. Thus we need some more
intelligent way here.

For a while, for common applications such as postfix, fetching
dumpee working directory and root is enough. So here what we do

 - when socket get collected from diag interface we remember
   its relative name parameters (device and inode) but postprone
   name resolving to not bring perf penalty until really needed

 - when we meet a socket to dump with relative name assigned we
   try to use $cwd/name and $root/name for this socket to check
   if it has been created in those directories. On success we
   simply remember the directory in image and when restore such
   socket call for chdir helper to change working dir and generate
   relative name

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 protobuf/sk-unix.proto |   5 ++
 sk-unix.c              | 122 +++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 118 insertions(+), 9 deletions(-)

diff --git a/protobuf/sk-unix.proto b/protobuf/sk-unix.proto
index 7dd0765c93e1..e2b5c6806138 100644
--- a/protobuf/sk-unix.proto
+++ b/protobuf/sk-unix.proto
@@ -40,4 +40,9 @@ message unix_sk_entry {
 	optional sk_shutdown		shutdown	= 12;
 
 	optional file_perms_entry	file_perms	= 13;
+
+	/*
+	 * Relative socket name may have prefix.
+	 */
+	optional bytes			name_dir	= 14;
 }
diff --git a/sk-unix.c b/sk-unix.c
index 16f04a8e0f64..a46d0bc55ec9 100644
--- a/sk-unix.c
+++ b/sk-unix.c
@@ -27,6 +27,7 @@
 #include "plugin.h"
 #include "namespaces.h"
 #include "pstree.h"
+#include "irmap.h"
 
 #include "protobuf.h"
 #include "protobuf/sk-unix.pb-c.h"
@@ -34,6 +35,12 @@
 #undef	LOG_PREFIX
 #define LOG_PREFIX "sk unix: "
 
+typedef struct {
+	char			*dir;
+	unsigned int		udiag_vfs_dev;
+	unsigned int		udiag_vfs_ino;
+} rel_name_desc_t;
+
 struct unix_sk_desc {
 	struct socket_desc	sd;
 	unsigned int		type;
@@ -43,6 +50,7 @@ struct unix_sk_desc {
 	unsigned int		wqlen;
 	unsigned int		namelen;
 	char			*name;
+	rel_name_desc_t		*rel_name;
 	unsigned int		nr_icons;
 	unsigned int		*icons;
 	unsigned char		shutdown;
@@ -148,6 +156,62 @@ static int write_unix_entry(struct unix_sk_desc *sk)
 	return ret;
 }
 
+static int resolve_rel_name(struct unix_sk_desc *sk, const struct fd_parms *p)
+{
+	rel_name_desc_t *rel_name = sk->rel_name;
+	const char *dirs[] = { "cwd", "root" };
+	int mntns_root, i;
+	struct ns_id *ns;
+
+	ns = lookup_ns_by_id(root_item->ids->mnt_ns_id, &mnt_ns_desc);
+	if (!ns)
+		return -ENOENT;
+
+	mntns_root = mntns_get_root_fd(ns);
+	if (mntns_root < 0)
+		return -ENOENT;
+
+	pr_debug("Resolving relative name %s for socket %x\n",
+		 sk->name, sk->sd.ino);
+
+	for (i = 0; i < ARRAY_SIZE(dirs); i++) {
+		char dir[PATH_MAX], path[PATH_MAX];
+		struct stat st;
+		int ret;
+
+		snprintf(path, sizeof(path), "/proc/%d/%s", p->pid, dirs[i]);
+		ret = readlink(path, dir, sizeof(dir));
+		if (ret < 0 || (size_t)ret == sizeof(dir)) {
+			pr_err("Can't readlink for %s\n", dirs[i]);
+			return -1;
+		}
+		dir[ret] = 0;
+
+		snprintf(path, sizeof(path), ".%s/%s", dir, sk->name);
+		if (fstatat(mntns_root, path, &st, 0)) {
+			if (errno == ENOENT)
+				continue;
+			goto err;
+		}
+
+		if ((st.st_ino == rel_name->udiag_vfs_ino) &&
+		    phys_stat_dev_match(st.st_dev, rel_name->udiag_vfs_dev, ns, path)) {
+			rel_name->dir = xstrdup(path);
+			if (!rel_name->dir)
+				return -ENOMEM;
+
+			sk->mode = st.st_mode;
+			sk->uid	= st.st_uid;
+			sk->gid	= st.st_gid;
+			return 0;
+		}
+	}
+
+err:
+	pr_err("Can't resolve name for socket %#x\n", rel_name->udiag_vfs_ino);
+	return -ENOENT;
+}
+
 static int dump_one_unix_fd(int lfd, u32 id, const struct fd_parms *p)
 {
 	struct unix_sk_desc *sk, *peer;
@@ -198,6 +262,14 @@ static int dump_one_unix_fd(int lfd, u32 id, const struct fd_parms *p)
 	ue->opts	= skopts;
 	ue->uflags	= 0;
 
+	if (sk->rel_name) {
+		if (resolve_rel_name(sk, p))
+			goto err;
+		ue->has_name_dir = true;
+		ue->name_dir.len  = (size_t)strlen(sk->rel_name->dir) + 1;
+		ue->name_dir.data = (void *)sk->rel_name->dir;
+	}
+
 	/*
 	 * Check if this socket is connected to criu service.
 	 * Dump it like closed one and mark it for restore.
@@ -397,17 +469,27 @@ static int unix_process_name(struct unix_sk_desc *d, const struct unix_diag_msg
 		char rpath[PATH_MAX];
 		struct stat st;
 
-		if (name[0] != '/') {
-			pr_warn("Relative bind path '%s' unsupported\n", name);
-			goto skip;
-		}
-
 		if (!tb[UNIX_DIAG_VFS]) {
 			pr_err("Bound socket w/o inode %#x\n", m->udiag_ino);
 			goto skip;
 		}
 
 		uv = RTA_DATA(tb[UNIX_DIAG_VFS]);
+		if (name[0] != '/') {
+			/*
+			 * Relative names are be resolved later at first
+			 * dump attempt.
+			 */
+			rel_name_desc_t *rel_name = xzalloc(sizeof(*rel_name));
+			if (!rel_name)
+				return -ENOMEM;
+			rel_name->udiag_vfs_dev = uv->udiag_vfs_dev;
+			rel_name->udiag_vfs_ino = uv->udiag_vfs_ino;
+
+			d->rel_name = rel_name;
+			goto postprone;
+		}
+
 		snprintf(rpath, sizeof(rpath), ".%s", name);
 		if (fstatat(mntns_root, rpath, &st, 0)) {
 			if (errno != ENOENT) {
@@ -444,6 +526,7 @@ static int unix_process_name(struct unix_sk_desc *d, const struct unix_diag_msg
 		d->gid	= st.st_gid;
 	}
 
+postprone:
 	d->namelen = len;
 	d->name = name;
 	return 0;
@@ -641,6 +724,7 @@ struct unix_sk_info {
 	UnixSkEntry *ue;
 	struct list_head list;
 	char *name;
+	char *name_dir;
 	unsigned flags;
 	struct unix_sk_info *peer;
 	struct file_desc d;
@@ -686,6 +770,16 @@ static int shutdown_unix_sk(int sk, struct unix_sk_info *ui)
 	return 0;
 }
 
+static int prep_unix_sk_cwd(struct unix_sk_info *ui)
+{
+	if (ui->name_dir && chdir(ui->name_dir)) {
+		pr_perror("Can't change working dir %s\n",
+			  ui->name_dir);
+		return -1;
+	}
+	return 0;
+}
+
 static int post_open_unix_sk(struct file_desc *d, int fd)
 {
 	struct unix_sk_info *ui;
@@ -714,6 +808,9 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
 	addr.sun_family = AF_UNIX;
 	memcpy(&addr.sun_path, peer->name, peer->ue->name.len);
 
+	if (prep_unix_sk_cwd(ui))
+		return -1;
+
 	if (connect(fd, (struct sockaddr *)&addr,
 				sizeof(addr.sun_family) +
 				peer->ue->name.len) < 0) {
@@ -754,6 +851,9 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 	addr.sun_family = AF_UNIX;
 	memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
 
+	if (prep_unix_sk_cwd(ui))
+		return -1;
+
 	if (bind(sk, (struct sockaddr *)&addr,
 				sizeof(addr.sun_family) + ui->ue->name.len)) {
 		pr_perror("Can't bind socket");
@@ -769,8 +869,11 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 			return -1;
 		}
 
-		memcpy(fname, ui->name, ui->ue->name.len);
-		fname[ui->ue->name.len] = '\0';
+		if (!ui->name_dir) {
+			memcpy(fname, ui->name, ui->ue->name.len);
+			fname[ui->ue->name.len] = '\0';
+		} else
+			snprintf(fname, PATH_MAX, "%s/%s", ui->name_dir, ui->name);
 
 		if (chown(fname, perms->uid, perms->gid) == -1) {
 			pr_perror("Unable to change file owner and group");
@@ -1036,9 +1139,10 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base)
 	futex_init(&ui->prepared);
 	ui->peer = NULL;
 	ui->flags = 0;
-	pr_info(" `- Got %#x peer %#x (name %s)\n",
+	pr_info(" `- Got %#x peer %#x (name %s dir %s)\n",
 		ui->ue->ino, ui->ue->peer,
-		ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-");
+		ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-",
+		ui->name_dir ? ui->name_dir : "-");
 	list_add_tail(&ui->list, &unix_sockets);
 	return file_desc_add(&ui->d, ui->ue->id, &unix_desc_ops);
 }
-- 
2.4.3



More information about the CRIU mailing list