[CRIU] [PATCH 17/18] sk-unix: Add ability to restore sockets with deleted vfs addresses

Kirill Tkhai ktkhai at virtuozzo.com
Mon Apr 24 05:51:40 PDT 2017


I splited the patch in 4 separate patches. Could you please do the same? Otherwise,
it's difficult to understand what happens there.

On 12.04.2017 16:58, Cyrill Gorcunov wrote:
> If dgram sockets are bound with vfs name and the name removed
> from the file system we can't bind/connect to such name. To
> resolve it we do the following
> 
>  - all ghost names previously gathered into chains are
>    changed to have more-less unique names adding postfixes
> 
>  - opon socket opening it's binding/connection is delayed
>    until previous copy is removed
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
>  criu/sk-unix.c | 261 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 238 insertions(+), 23 deletions(-)
> 
> diff --git a/criu/sk-unix.c b/criu/sk-unix.c
> index cdad971e3dea..b0f616e7b299 100644
> --- a/criu/sk-unix.c
> +++ b/criu/sk-unix.c
> @@ -9,6 +9,7 @@
>  #include <sys/un.h>
>  #include <stdlib.h>
>  #include <dlfcn.h>
> +#include <libgen.h>
>  
>  #include "libnetlink.h"
>  #include "cr_options.h"
> @@ -29,6 +30,7 @@
>  #include "external.h"
>  #include "crtools.h"
>  #include "rst-malloc.h"
> +#include "atomic.h"
>  
>  #include "protobuf.h"
>  #include "images/sk-unix.pb-c.h"
> @@ -102,6 +104,7 @@ struct unix_sk_listen_icon {
>  typedef struct {
>  	struct list_head	list;
>  	struct list_head	children;
> +	char			*name_dir;
>  	char			*name;
>  	size_t			namelen;
>  } ghost_addr_t;
> @@ -119,13 +122,15 @@ static struct unix_sk_listen_icon *lookup_unix_listen_icons(int peer_ino)
>  	return NULL;
>  }
>  
> -static ghost_addr_t *lookup_ghost_addr(void *name, size_t namelen)
> +static ghost_addr_t *lookup_ghost_addr(void *name_dir, void *name, size_t namelen)
>  {
>  	ghost_addr_t *ga;
>  
>  	list_for_each_entry(ga, &unix_ghost_addr, list) {
>  		if (ga->namelen != namelen ||
> -		    memcmp(ga->name, name, namelen))
> +		    memcmp(ga->name, name, namelen) ||
> +		    ((unsigned long)name_dir ^
> +		     (unsigned long)(void *)ga->name_dir))
>  			continue;
>  		return ga;
>  	}
> @@ -812,6 +817,10 @@ struct unix_sk_info {
>  	struct list_head	node;		/* To link in peer's connected list  */
>  
>  	struct list_head	ghost_addr_node;
> +	struct list_head	ghost_wait_head;
> +	struct list_head	ghost_waiters;
> +	struct unix_sk_info	*ghost_master;
> +	atomic_t		ghost_counter;
>  
>  	/*
>  	 * For DGRAM sockets with queues, we should only restore the queue
> @@ -823,6 +832,8 @@ struct unix_sk_info {
>  	bool			listen;
>  };
>  
> +static int bind_unix_sk(int sk, struct unix_sk_info *ui);
> +
>  #define USK_PAIR_MASTER		(1 << 0)
>  #define USK_PAIR_SLAVE		(1 << 1)
>  #define USK_GHOST_NAME		(1 << 2)
> @@ -853,6 +864,18 @@ static int wake_connected_sockets(struct unix_sk_info *ui)
>  	return 0;
>  }
>  
> +static int wake_ghost_waiters(struct unix_sk_info *ui)
> +{
> +	struct fdinfo_list_entry *fle;
> +	struct unix_sk_info *tmp;
> +
> +	list_for_each_entry(tmp, &ui->ghost_wait_head, ghost_waiters) {
> +		fle = file_master(&tmp->d);
> +		set_fds_event(fle->pid);
> +	}
> +	return 0;
> +}
> +
>  static bool peer_is_not_prepared(struct unix_sk_info *peer)
>  {
>  	if (peer->ue->state != TCP_LISTEN)
> @@ -961,10 +984,27 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
>  {
>  	struct unix_sk_info *ui;
>  	struct unix_sk_info *peer;
> +	struct unix_sk_info *gm;
>  	struct sockaddr_un addr;
>  	int cwd_fd = -1, root_fd = -1;
>  
>  	ui = container_of(d, struct unix_sk_info, d);
> +	gm = ui->ghost_master;
> +
> +	if (ui->flags & USK_GHOST_WAIT) {
> +		if (!(gm->flags & USK_ADDR_RDY))
> +			return 1;
> +		if (ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) {

And where are standalone sockets are being bound if they have USK_GHOST_WAIT flag?

> +			if (bind_unix_sk(fd, ui))
> +				return -1;
> +			return 0;
> +		}
> +	} else if (ui->flags & USK_GHOST_NAME) {
> +		if (bind_unix_sk(fd, ui))
> +			return -1;
> +		return 0;
> +	}
> +
>  	BUG_ON((ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) ||
>  			(ui->ue->uflags & (USK_CALLBACK | USK_INHERIT)));
>  
> @@ -993,6 +1033,15 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
>  		return -1;
>  	}
>  
> +	if (gm) {
> +		if (atomic_dec_and_test(&gm->ghost_counter)) {
> +			pr_debug("ghost: Unlinking %s\n", gm->ue->name.data);
> +			if (unlink((char *)gm->ue->name.data))
> +				pr_perror("ghost: Failed to unlink master %s",
> +					  gm->ue->name.data);
> +		}
> +	}
> +
>  	revert_unix_sk_cwd(peer, &cwd_fd, &root_fd);
>  
>  	if (peer->queuer == ui->ue->ino && restore_sk_queue(fd, peer->ue->id))
> @@ -1001,6 +1050,63 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
>  	return restore_sk_common(fd, ui);
>  }
>  
> +/*
> + * When path where socket lives is deleted, we need to reconstruct
> + * it back up but allow caller to remove it after.
> + */
> +static int bind_on_deleted(int sk, struct unix_sk_info *ui)
> +{
> +	char path[PATH_MAX], *pos;
> +	struct sockaddr_un addr;
> +	int ret;
> +
> +	if (ui->ue->name.len >= sizeof(path)) {
> +		pr_err("Too long name for socket\n");
> +		return -ENOSPC;
> +	}
> +
> +	memcpy(path, ui->name, ui->ue->name.len);
> +	path[ui->ue->name.len] = '\0';
> +
> +	for (pos = strrchr(path, '/'); pos;
> +	     pos = strrchr(path, '/')) {
> +		*pos = '\0';
> +
> +		ret = access(path, R_OK | W_OK | X_OK);
> +		if (ret == 0)
> +			break;
> +
> +		if (errno != ENOENT) {
> +			ret = -errno;
> +			pr_perror("Can't access %s\n", path);
> +			return ret;
> +		}
> +	}
> +
> +	memcpy(path, ui->name, ui->ue->name.len);
> +	path[ui->ue->name.len] = '\0';
> +
> +	pos = dirname(path);
> +	ret = mkdirpat(AT_FDCWD, pos, 0755);
> +	if (ret) {
> +		pr_err("Can't create %s\n", pos);
> +		return ret;
> +	}
> +
> +	memset(&addr, 0, sizeof(addr));
> +	addr.sun_family = AF_UNIX;
> +	memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
> +
> +	ret = bind(sk, (struct sockaddr *)&addr,
> +		   sizeof(addr.sun_family) + ui->ue->name.len);
> +	if (ret < 0) {
> +		pr_perror("Can't bind on socket %s", (char *)ui->ue->name.data);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
>  static int bind_unix_sk(int sk, struct unix_sk_info *ui)
>  {
>  	struct sockaddr_un addr;
> @@ -1071,10 +1177,12 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
>  				ui->ue->deleted = false;
>  
>  			} else {
> -				pr_perror("Can't bind socket");
> -				goto done;
> +				if (bind_on_deleted(sk, ui))
> +					goto done;
> +				ui->flags |= USK_ADDR_RDY;
>  			}
>  		}
> +		ui->flags |= USK_ADDR_RDY;
>  
>  		if (*ui->name && ui->ue->file_perms) {
>  			FilePermsEntry *perms = ui->ue->file_perms;
> @@ -1099,9 +1207,22 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
>  			}
>  		}
>  
> -		if (ui->ue->deleted && unlink((char *)ui->ue->name.data) < 0) {
> -			pr_perror("failed to unlink %s", ui->ue->name.data);
> -			goto done;
> +		if (ui->ue->deleted || ui->ghost_master) {
> +			struct unix_sk_info *gm = ui->ghost_master;
> +			bool do_unlink = true;
> +
> +			if (gm && !atomic_dec_and_test(&gm->ghost_counter))
> +				do_unlink = false;
> +
> +			if (!atomic_dec_and_test(&ui->ghost_counter))
> +				do_unlink = false;
> +
> +			if (do_unlink) {
> +				pr_debug("ghost: Unlinking %s\n", ui->ue->name.data);
> +				if (unlink((char *)ui->ue->name.data))
> +					pr_perror("ghost: Failed to unlink %s",
> +						  ui->ue->name.data);
> +			}
>  		}
>  	}
>  
> @@ -1110,6 +1231,8 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
>  		wake_connected_sockets(ui);
>  	}
>  
> +	wake_ghost_waiters(ui);
> +
>  	ret = 0;
>  done:
>  	revert_unix_sk_cwd(ui, &cwd_fd, &root_fd);
> @@ -1118,7 +1241,7 @@ done:
>  
>  static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd)
>  {
> -	int sk[2];
> +	int sk[2], ret = 0;
>  	struct unix_sk_info *peer = ui->peer;
>  
>  	pr_info("Opening pair master (id %#x ino %#x peer %#x)\n",
> @@ -1137,8 +1260,11 @@ static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd)
>  	if (restore_sk_queue(sk[1], ui->ue->id))
>  		return -1;
>  
> -	if (bind_unix_sk(sk[0], ui))
> -		return -1;
> +	if (!(ui->flags & USK_GHOST_WAIT)) {
> +		if (bind_unix_sk(sk[0], ui))
> +			return -1;

You mask using USK_GHOST_WAIT sockets, which are connected to deleted peers (according to resolve_unix_ghosts()).
It's a limitation on connect() time, not bind(). So, why they can't be bound right here?
Here should be used USK_GHOST_NAME, shouldn't it?

> +	} else
> +		ret = 1;
>  
>  	if (restore_sk_common(sk[0], ui))
>  		return -1;
> @@ -1151,12 +1277,12 @@ static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd)
>  	close(sk[1]);
>  
>  	*new_fd = sk[0];
> -	return 0;
> +	return ret;
>  }
>  
>  static int open_unixsk_pair_slave(struct unix_sk_info *ui, int *new_fd)
>  {
> -	int sk, ret;
> +	int sk, ret = 0;
>  
>  	ret = recv_desc_from_peer(&ui->d, &sk);
>  	if (ret != 0) {
> @@ -1165,19 +1291,22 @@ static int open_unixsk_pair_slave(struct unix_sk_info *ui, int *new_fd)
>  		return ret;
>  	}
>  
> -	if (bind_unix_sk(sk, ui))
> -		return -1;
> +	if (!(ui->flags & USK_GHOST_WAIT)) {
> +		if (bind_unix_sk(sk, ui))
> +			return -1;

The same as above.

> +	} else
> +		ret = 1;
>  
>  	if (restore_sk_common(sk, ui))
>  		return -1;
>  
>  	*new_fd = sk;
> -	return 0;
> +	return ret;
>  }
>  
>  static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
>  {
> -	int sk;
> +	int sk, ret = 0;
>  
>  	pr_info("Opening standalone socket (id %#x ino %#x peer %#x)\n",
>  			ui->ue->id, ui->ue->ino, ui->ue->peer);
> @@ -1295,8 +1424,11 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
>  		}
>  	}
>  
> -	if (bind_unix_sk(sk, ui))
> -		return -1;
> +	if (!(ui->flags & USK_GHOST_WAIT)) {
> +		if (bind_unix_sk(sk, ui))
> +			return -1;
> +	} else
> +		ret = 1;

ret = 1 and the socket is not bound, but below you wake connected sockets
in (ui->ue->state == TCP_LISTEN) branch.

>  
>  	if (ui->ue->state == TCP_LISTEN) {
>  		pr_info("\tPutting %#x into listen state\n", ui->ue->ino);
> @@ -1323,7 +1455,7 @@ out:
>  		return -1;
>  
>  	*new_fd = sk;
> -	return 0;
> +	return ret;
>  }
>  
>  static int open_unix_sk(struct file_desc *d, int *new_fd)
> @@ -1332,12 +1464,12 @@ static int open_unix_sk(struct file_desc *d, int *new_fd)
>  	struct unix_sk_info *ui;
>  	int ret;
>  
> +	ui = container_of(d, struct unix_sk_info, d);
> +
>  	fle = file_master(d);
>  	if (fle->stage >= FLE_OPEN)
>  		return post_open_unix_sk(d, fle->fe->fd);
>  
> -	ui = container_of(d, struct unix_sk_info, d);
> -

Why do we need this HUNK?

>  	if (inherited_fd(d, new_fd)) {
>  		ui->ue->uflags |= USK_INHERIT;
>  		ret = *new_fd >= 0 ? 0 : -1;
> @@ -1408,6 +1540,10 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
>  	ui->name_dir = (void *)ui->ue->name_dir;
>  
>  	INIT_LIST_HEAD(&ui->ghost_addr_node);
> +	INIT_LIST_HEAD(&ui->ghost_wait_head);
> +	INIT_LIST_HEAD(&ui->ghost_waiters);
> +	ui->ghost_master = NULL;
> +	atomic_set(&ui->ghost_counter, 1);
>  
>  	if (add_post_prepare_cb_once(resolve_unix_peers, NULL))
>  		return -1;
> @@ -1425,7 +1561,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
>  		    ui->ue->type == SOCK_DGRAM) {
>  			ghost_addr_t *ga;
>  
> -			ga = lookup_ghost_addr(ui->name, ui->ue->name.len);
> +			ga = lookup_ghost_addr(ui->name_dir, ui->name, ui->ue->name.len);
>  			if (!ga) {
>  				ga = shmalloc(sizeof(*ga));
>  				if (!ga)
> @@ -1433,6 +1569,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
>  
>  				INIT_LIST_HEAD(&ga->children);
>  
> +				ga->name_dir	= (void *)ui->name_dir;
>  				ga->name	= (void *)ui->name;
>  				ga->namelen	= ui->ue->name.len;
>  
> @@ -1515,6 +1652,84 @@ static void interconnected_pair(struct unix_sk_info *ui, struct unix_sk_info *pe
>  	}
>  }
>  
> +static int ghost_new_name(char *name, size_t namelen,
> +			  char **name_new, size_t *namelen_new)
> +{
> +	static unsigned int cnt = 0;
> +	char sname[64];
> +	size_t k;
> +
> +	k = snprintf(sname, sizeof(sname), "criu-%u", cnt++);
> +	*namelen_new = namelen + k + 1;
> +	if (*namelen_new > UNIX_PATH_MAX) {
> +		pr_err("ghost: New name for socket is too long\n");
> +		return -1;
> +	}
> +
> +	*name_new = shmalloc(*namelen_new);
> +	if (!*name_new) {
> +		pr_err("ghost: Can't allocate new name for socket\n");
> +		return -ENOMEM;
> +	}
> +
> +	k = snprintf(*name_new, *namelen_new, "%s-%s", name, sname) + 1;

There was:
	*namelen_new = namelen + 1 + k;

Q: Is the last '\0' byte contained in namelen? So, namelen in image always contain last '\0',
isn't it? (We printf newname later, so it must contain. Just a question).

> +	if (k != *namelen_new) {
> +		pr_err("ghost: Name stripped\n");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int resolve_unix_ghosts(void)
> +{
> +	struct unix_sk_info *ui, *t;
> +	ghost_addr_t *ga;
> +
> +	pr_debug("ghost: Resolving addresses\n");
> +
> +	list_for_each_entry(ga, &unix_ghost_addr, list) {
> +		pr_debug("ghost: address %s\n", ga->name);
> +
> +		list_for_each_entry(ui, &ga->children, ghost_addr_node) {
> +			size_t newnamelen;
> +			char *newname;
> +
> +			pr_debug("\tghost: ino %#x peer %#x\n", ui->ue->ino,
> +				 ui->peer ? ui->peer->ue->ino : 0);
> +
> +			if (ghost_new_name(ga->name, ga->namelen,
> +					   &newname, &newnamelen))
> +				return -1;

Why can't we find a new name in collect_one_unixsk()? In this case we could
do not iterate over unix_sockets once again one paragraph below and do all
the work in resolve_unix_peers() cycle.

> +
> +			pr_debug("\tghost: name transition %s -> %s\n",
> +				 ui->name, newname);
> +			ui->name = newname;
> +			ui->ue->name.len = newnamelen;
> +			ui->ue->name.data = (void *)newname;
> +			ui->flags |= USK_GHOST_NAME;
> +
> +			unlink_stale(ui);

What if there is a file with the same name on disc?

> +
> +			list_for_each_entry(t, &unix_sockets, list) {
> +				if (t->flags & (USK_GHOST_NAME | USK_GHOST_WAIT))
> +					continue;

Why do we need this check, while there is one more check (t->peer != ui)? A socket t,
which has t->peer == ui can't has at least USK_GHOST_WAIT set at this moment, so
this check is excess.

> +				if (t->peer != ui)
> +					continue;
> +				pr_debug("\t\tghost: connected to us %#x -> %#x\n",
> +					 t->ue->ino, ui->ue->ino);
> +
> +				t->flags |= USK_GHOST_NAME | USK_GHOST_WAIT;
> +				t->ghost_master = ui;

We set ghost_master in the only place and never change it. Do we really need
a special field unix_sk_info::ghost_master, while it duplicates unix_sk_info::peer?
We can make ghost_master() as a helper.

> +				atomic_inc(&ui->ghost_counter);
> +				list_add(&t->ghost_waiters, &ui->ghost_wait_head);
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  static int resolve_unix_peers(void *unused)
>  {
>  	struct unix_sk_info *ui, *peer;
> @@ -1559,7 +1774,7 @@ static int resolve_unix_peers(void *unused)
>  
>  	}
>  
> -	return 0;
> +	return resolve_unix_ghosts();
>  }
>  
>  int unix_sk_id_add(unsigned int ino)

I'm not sure about circular dependencies, I will write, if I found something.
-------------- next part --------------


From: Kirill Tkhai <ktkhai at virtuozzo.com>


---
 criu/sk-unix.c |   18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index cdad971e..1f14e95d 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -9,6 +9,7 @@
 #include <sys/un.h>
 #include <stdlib.h>
 #include <dlfcn.h>
+#include <libgen.h>
 
 #include "libnetlink.h"
 #include "cr_options.h"
@@ -29,6 +30,7 @@
 #include "external.h"
 #include "crtools.h"
 #include "rst-malloc.h"
+#include "atomic.h"
 
 #include "protobuf.h"
 #include "images/sk-unix.pb-c.h"
@@ -102,6 +104,7 @@ struct unix_sk_listen_icon {
 typedef struct {
 	struct list_head	list;
 	struct list_head	children;
+	char			*name_dir;
 	char			*name;
 	size_t			namelen;
 } ghost_addr_t;
@@ -812,6 +815,10 @@ struct unix_sk_info {
 	struct list_head	node;		/* To link in peer's connected list  */
 
 	struct list_head	ghost_addr_node;
+	struct list_head	ghost_wait_head;
+	struct list_head	ghost_waiters;
+	struct unix_sk_info	*ghost_master;
+	atomic_t		ghost_counter;
 
 	/*
 	 * For DGRAM sockets with queues, we should only restore the queue
@@ -823,6 +830,8 @@ struct unix_sk_info {
 	bool			listen;
 };
 
+static int bind_unix_sk(int sk, struct unix_sk_info *ui);
+
 #define USK_PAIR_MASTER		(1 << 0)
 #define USK_PAIR_SLAVE		(1 << 1)
 #define USK_GHOST_NAME		(1 << 2)
@@ -1332,12 +1341,12 @@ static int open_unix_sk(struct file_desc *d, int *new_fd)
 	struct unix_sk_info *ui;
 	int ret;
 
+	ui = container_of(d, struct unix_sk_info, d);
+
 	fle = file_master(d);
 	if (fle->stage >= FLE_OPEN)
 		return post_open_unix_sk(d, fle->fe->fd);
 
-	ui = container_of(d, struct unix_sk_info, d);
-
 	if (inherited_fd(d, new_fd)) {
 		ui->ue->uflags |= USK_INHERIT;
 		ret = *new_fd >= 0 ? 0 : -1;
@@ -1408,6 +1417,10 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
 	ui->name_dir = (void *)ui->ue->name_dir;
 
 	INIT_LIST_HEAD(&ui->ghost_addr_node);
+	INIT_LIST_HEAD(&ui->ghost_wait_head);
+	INIT_LIST_HEAD(&ui->ghost_waiters);
+	ui->ghost_master = NULL;
+	atomic_set(&ui->ghost_counter, 1);
 
 	if (add_post_prepare_cb_once(resolve_unix_peers, NULL))
 		return -1;
@@ -1433,6 +1446,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
 
 				INIT_LIST_HEAD(&ga->children);
 
+				ga->name_dir	= (void *)ui->name_dir;
 				ga->name	= (void *)ui->name;
 				ga->namelen	= ui->ue->name.len;
 
-------------- next part --------------


From: Kirill Tkhai <ktkhai at virtuozzo.com>


---
 criu/sk-unix.c |    8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index 1f14e95d..9e6adb78 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -122,13 +122,15 @@ static struct unix_sk_listen_icon *lookup_unix_listen_icons(int peer_ino)
 	return NULL;
 }
 
-static ghost_addr_t *lookup_ghost_addr(void *name, size_t namelen)
+static ghost_addr_t *lookup_ghost_addr(void *name_dir, void *name, size_t namelen)
 {
 	ghost_addr_t *ga;
 
 	list_for_each_entry(ga, &unix_ghost_addr, list) {
 		if (ga->namelen != namelen ||
-		    memcmp(ga->name, name, namelen))
+		    memcmp(ga->name, name, namelen) ||
+		    ((unsigned long)name_dir ^
+		     (unsigned long)(void *)ga->name_dir))
 			continue;
 		return ga;
 	}
@@ -1438,7 +1440,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
 		    ui->ue->type == SOCK_DGRAM) {
 			ghost_addr_t *ga;
 
-			ga = lookup_ghost_addr(ui->name, ui->ue->name.len);
+			ga = lookup_ghost_addr(ui->name_dir, ui->name, ui->ue->name.len);
 			if (!ga) {
 				ga = shmalloc(sizeof(*ga));
 				if (!ga)
-------------- next part --------------


From: Kirill Tkhai <ktkhai at virtuozzo.com>


---
 criu/sk-unix.c |   80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 1 deletion(-)

diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index 9e6adb78..ee5803d9 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -1531,6 +1531,84 @@ static void interconnected_pair(struct unix_sk_info *ui, struct unix_sk_info *pe
 	}
 }
 
+static int ghost_new_name(char *name, size_t namelen,
+			  char **name_new, size_t *namelen_new)
+{
+	static unsigned int cnt = 0;
+	char sname[64];
+	size_t k;
+
+	k = snprintf(sname, sizeof(sname), "criu-%u", cnt++);
+	*namelen_new = namelen + k + 1;
+	if (*namelen_new > UNIX_PATH_MAX) {
+		pr_err("ghost: New name for socket is too long\n");
+		return -1;
+	}
+
+	*name_new = shmalloc(*namelen_new);
+	if (!*name_new) {
+		pr_err("ghost: Can't allocate new name for socket\n");
+		return -ENOMEM;
+	}
+
+	k = snprintf(*name_new, *namelen_new, "%s-%s", name, sname) + 1;
+	if (k != *namelen_new) {
+		pr_err("ghost: Name stripped\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int resolve_unix_ghosts(void)
+{
+	struct unix_sk_info *ui, *t;
+	ghost_addr_t *ga;
+
+	pr_debug("ghost: Resolving addresses\n");
+
+	list_for_each_entry(ga, &unix_ghost_addr, list) {
+		pr_debug("ghost: address %s\n", ga->name);
+
+		list_for_each_entry(ui, &ga->children, ghost_addr_node) {
+			size_t newnamelen;
+			char *newname;
+
+			pr_debug("\tghost: ino %#x peer %#x\n", ui->ue->ino,
+				 ui->peer ? ui->peer->ue->ino : 0);
+
+			if (ghost_new_name(ga->name, ga->namelen,
+					   &newname, &newnamelen))
+				return -1;
+
+			pr_debug("\tghost: name transition %s -> %s\n",
+				 ui->name, newname);
+			ui->name = newname;
+			ui->ue->name.len = newnamelen;
+			ui->ue->name.data = (void *)newname;
+			ui->flags |= USK_GHOST_NAME;
+
+			unlink_stale(ui);
+
+			list_for_each_entry(t, &unix_sockets, list) {
+				if (t->flags & (USK_GHOST_NAME | USK_GHOST_WAIT))
+					continue;
+				if (t->peer != ui)
+					continue;
+				pr_debug("\t\tghost: connected to us %#x -> %#x\n",
+					 t->ue->ino, ui->ue->ino);
+
+				t->flags |= USK_GHOST_NAME | USK_GHOST_WAIT;
+				t->ghost_master = ui;
+				atomic_inc(&ui->ghost_counter);
+				list_add(&t->ghost_waiters, &ui->ghost_wait_head);
+			}
+		}
+	}
+
+	return 0;
+}
+
 static int resolve_unix_peers(void *unused)
 {
 	struct unix_sk_info *ui, *peer;
@@ -1575,7 +1653,7 @@ static int resolve_unix_peers(void *unused)
 
 	}
 
-	return 0;
+	return resolve_unix_ghosts();
 }
 
 int unix_sk_id_add(unsigned int ino)
-------------- next part --------------
sk-unix: Add ability to restore sockets with deleted vfs addresses

From: Cyrill Gorcunov <gorcunov at openvz.org>

If dgram sockets are bound with vfs name and the name removed
from the file system we can't bind/connect to such name. To
resolve it we do the following

 - all ghost names previously gathered into chains are
   changed to have more-less unique names adding postfixes

 - opon socket opening it's binding/connection is delayed
   until previous copy is removed

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 criu/sk-unix.c |  155 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 138 insertions(+), 17 deletions(-)

diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index ee5803d9..b0f616e7 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -864,6 +864,18 @@ static int wake_connected_sockets(struct unix_sk_info *ui)
 	return 0;
 }
 
+static int wake_ghost_waiters(struct unix_sk_info *ui)
+{
+	struct fdinfo_list_entry *fle;
+	struct unix_sk_info *tmp;
+
+	list_for_each_entry(tmp, &ui->ghost_wait_head, ghost_waiters) {
+		fle = file_master(&tmp->d);
+		set_fds_event(fle->pid);
+	}
+	return 0;
+}
+
 static bool peer_is_not_prepared(struct unix_sk_info *peer)
 {
 	if (peer->ue->state != TCP_LISTEN)
@@ -972,10 +984,27 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
 {
 	struct unix_sk_info *ui;
 	struct unix_sk_info *peer;
+	struct unix_sk_info *gm;
 	struct sockaddr_un addr;
 	int cwd_fd = -1, root_fd = -1;
 
 	ui = container_of(d, struct unix_sk_info, d);
+	gm = ui->ghost_master;
+
+	if (ui->flags & USK_GHOST_WAIT) {
+		if (!(gm->flags & USK_ADDR_RDY))
+			return 1;
+		if (ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) {
+			if (bind_unix_sk(fd, ui))
+				return -1;
+			return 0;
+		}
+	} else if (ui->flags & USK_GHOST_NAME) {
+		if (bind_unix_sk(fd, ui))
+			return -1;
+		return 0;
+	}
+
 	BUG_ON((ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) ||
 			(ui->ue->uflags & (USK_CALLBACK | USK_INHERIT)));
 
@@ -1004,6 +1033,15 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
 		return -1;
 	}
 
+	if (gm) {
+		if (atomic_dec_and_test(&gm->ghost_counter)) {
+			pr_debug("ghost: Unlinking %s\n", gm->ue->name.data);
+			if (unlink((char *)gm->ue->name.data))
+				pr_perror("ghost: Failed to unlink master %s",
+					  gm->ue->name.data);
+		}
+	}
+
 	revert_unix_sk_cwd(peer, &cwd_fd, &root_fd);
 
 	if (peer->queuer == ui->ue->ino && restore_sk_queue(fd, peer->ue->id))
@@ -1012,6 +1050,63 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
 	return restore_sk_common(fd, ui);
 }
 
+/*
+ * When path where socket lives is deleted, we need to reconstruct
+ * it back up but allow caller to remove it after.
+ */
+static int bind_on_deleted(int sk, struct unix_sk_info *ui)
+{
+	char path[PATH_MAX], *pos;
+	struct sockaddr_un addr;
+	int ret;
+
+	if (ui->ue->name.len >= sizeof(path)) {
+		pr_err("Too long name for socket\n");
+		return -ENOSPC;
+	}
+
+	memcpy(path, ui->name, ui->ue->name.len);
+	path[ui->ue->name.len] = '\0';
+
+	for (pos = strrchr(path, '/'); pos;
+	     pos = strrchr(path, '/')) {
+		*pos = '\0';
+
+		ret = access(path, R_OK | W_OK | X_OK);
+		if (ret == 0)
+			break;
+
+		if (errno != ENOENT) {
+			ret = -errno;
+			pr_perror("Can't access %s\n", path);
+			return ret;
+		}
+	}
+
+	memcpy(path, ui->name, ui->ue->name.len);
+	path[ui->ue->name.len] = '\0';
+
+	pos = dirname(path);
+	ret = mkdirpat(AT_FDCWD, pos, 0755);
+	if (ret) {
+		pr_err("Can't create %s\n", pos);
+		return ret;
+	}
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sun_family = AF_UNIX;
+	memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
+
+	ret = bind(sk, (struct sockaddr *)&addr,
+		   sizeof(addr.sun_family) + ui->ue->name.len);
+	if (ret < 0) {
+		pr_perror("Can't bind on socket %s", (char *)ui->ue->name.data);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 {
 	struct sockaddr_un addr;
@@ -1082,10 +1177,12 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 				ui->ue->deleted = false;
 
 			} else {
-				pr_perror("Can't bind socket");
-				goto done;
+				if (bind_on_deleted(sk, ui))
+					goto done;
+				ui->flags |= USK_ADDR_RDY;
 			}
 		}
+		ui->flags |= USK_ADDR_RDY;
 
 		if (*ui->name && ui->ue->file_perms) {
 			FilePermsEntry *perms = ui->ue->file_perms;
@@ -1110,9 +1207,22 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 			}
 		}
 
-		if (ui->ue->deleted && unlink((char *)ui->ue->name.data) < 0) {
-			pr_perror("failed to unlink %s", ui->ue->name.data);
-			goto done;
+		if (ui->ue->deleted || ui->ghost_master) {
+			struct unix_sk_info *gm = ui->ghost_master;
+			bool do_unlink = true;
+
+			if (gm && !atomic_dec_and_test(&gm->ghost_counter))
+				do_unlink = false;
+
+			if (!atomic_dec_and_test(&ui->ghost_counter))
+				do_unlink = false;
+
+			if (do_unlink) {
+				pr_debug("ghost: Unlinking %s\n", ui->ue->name.data);
+				if (unlink((char *)ui->ue->name.data))
+					pr_perror("ghost: Failed to unlink %s",
+						  ui->ue->name.data);
+			}
 		}
 	}
 
@@ -1121,6 +1231,8 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 		wake_connected_sockets(ui);
 	}
 
+	wake_ghost_waiters(ui);
+
 	ret = 0;
 done:
 	revert_unix_sk_cwd(ui, &cwd_fd, &root_fd);
@@ -1129,7 +1241,7 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
 
 static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd)
 {
-	int sk[2];
+	int sk[2], ret = 0;
 	struct unix_sk_info *peer = ui->peer;
 
 	pr_info("Opening pair master (id %#x ino %#x peer %#x)\n",
@@ -1148,8 +1260,11 @@ static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd)
 	if (restore_sk_queue(sk[1], ui->ue->id))
 		return -1;
 
-	if (bind_unix_sk(sk[0], ui))
-		return -1;
+	if (!(ui->flags & USK_GHOST_WAIT)) {
+		if (bind_unix_sk(sk[0], ui))
+			return -1;
+	} else
+		ret = 1;
 
 	if (restore_sk_common(sk[0], ui))
 		return -1;
@@ -1162,12 +1277,12 @@ static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd)
 	close(sk[1]);
 
 	*new_fd = sk[0];
-	return 0;
+	return ret;
 }
 
 static int open_unixsk_pair_slave(struct unix_sk_info *ui, int *new_fd)
 {
-	int sk, ret;
+	int sk, ret = 0;
 
 	ret = recv_desc_from_peer(&ui->d, &sk);
 	if (ret != 0) {
@@ -1176,19 +1291,22 @@ static int open_unixsk_pair_slave(struct unix_sk_info *ui, int *new_fd)
 		return ret;
 	}
 
-	if (bind_unix_sk(sk, ui))
-		return -1;
+	if (!(ui->flags & USK_GHOST_WAIT)) {
+		if (bind_unix_sk(sk, ui))
+			return -1;
+	} else
+		ret = 1;
 
 	if (restore_sk_common(sk, ui))
 		return -1;
 
 	*new_fd = sk;
-	return 0;
+	return ret;
 }
 
 static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
 {
-	int sk;
+	int sk, ret = 0;
 
 	pr_info("Opening standalone socket (id %#x ino %#x peer %#x)\n",
 			ui->ue->id, ui->ue->ino, ui->ue->peer);
@@ -1306,8 +1424,11 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
 		}
 	}
 
-	if (bind_unix_sk(sk, ui))
-		return -1;
+	if (!(ui->flags & USK_GHOST_WAIT)) {
+		if (bind_unix_sk(sk, ui))
+			return -1;
+	} else
+		ret = 1;
 
 	if (ui->ue->state == TCP_LISTEN) {
 		pr_info("\tPutting %#x into listen state\n", ui->ue->ino);
@@ -1334,7 +1455,7 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
 		return -1;
 
 	*new_fd = sk;
-	return 0;
+	return ret;
 }
 
 static int open_unix_sk(struct file_desc *d, int *new_fd)


More information about the CRIU mailing list