[CRIU] [PATCH 17/18] sk-unix: Add ability to restore sockets with deleted vfs addresses
Kirill Tkhai
ktkhai at virtuozzo.com
Mon Apr 24 05:51:40 PDT 2017
I splited the patch in 4 separate patches. Could you please do the same? Otherwise,
it's difficult to understand what happens there.
On 12.04.2017 16:58, Cyrill Gorcunov wrote:
> If dgram sockets are bound with vfs name and the name removed
> from the file system we can't bind/connect to such name. To
> resolve it we do the following
>
> - all ghost names previously gathered into chains are
> changed to have more-less unique names adding postfixes
>
> - opon socket opening it's binding/connection is delayed
> until previous copy is removed
>
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
> criu/sk-unix.c | 261 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 238 insertions(+), 23 deletions(-)
>
> diff --git a/criu/sk-unix.c b/criu/sk-unix.c
> index cdad971e3dea..b0f616e7b299 100644
> --- a/criu/sk-unix.c
> +++ b/criu/sk-unix.c
> @@ -9,6 +9,7 @@
> #include <sys/un.h>
> #include <stdlib.h>
> #include <dlfcn.h>
> +#include <libgen.h>
>
> #include "libnetlink.h"
> #include "cr_options.h"
> @@ -29,6 +30,7 @@
> #include "external.h"
> #include "crtools.h"
> #include "rst-malloc.h"
> +#include "atomic.h"
>
> #include "protobuf.h"
> #include "images/sk-unix.pb-c.h"
> @@ -102,6 +104,7 @@ struct unix_sk_listen_icon {
> typedef struct {
> struct list_head list;
> struct list_head children;
> + char *name_dir;
> char *name;
> size_t namelen;
> } ghost_addr_t;
> @@ -119,13 +122,15 @@ static struct unix_sk_listen_icon *lookup_unix_listen_icons(int peer_ino)
> return NULL;
> }
>
> -static ghost_addr_t *lookup_ghost_addr(void *name, size_t namelen)
> +static ghost_addr_t *lookup_ghost_addr(void *name_dir, void *name, size_t namelen)
> {
> ghost_addr_t *ga;
>
> list_for_each_entry(ga, &unix_ghost_addr, list) {
> if (ga->namelen != namelen ||
> - memcmp(ga->name, name, namelen))
> + memcmp(ga->name, name, namelen) ||
> + ((unsigned long)name_dir ^
> + (unsigned long)(void *)ga->name_dir))
> continue;
> return ga;
> }
> @@ -812,6 +817,10 @@ struct unix_sk_info {
> struct list_head node; /* To link in peer's connected list */
>
> struct list_head ghost_addr_node;
> + struct list_head ghost_wait_head;
> + struct list_head ghost_waiters;
> + struct unix_sk_info *ghost_master;
> + atomic_t ghost_counter;
>
> /*
> * For DGRAM sockets with queues, we should only restore the queue
> @@ -823,6 +832,8 @@ struct unix_sk_info {
> bool listen;
> };
>
> +static int bind_unix_sk(int sk, struct unix_sk_info *ui);
> +
> #define USK_PAIR_MASTER (1 << 0)
> #define USK_PAIR_SLAVE (1 << 1)
> #define USK_GHOST_NAME (1 << 2)
> @@ -853,6 +864,18 @@ static int wake_connected_sockets(struct unix_sk_info *ui)
> return 0;
> }
>
> +static int wake_ghost_waiters(struct unix_sk_info *ui)
> +{
> + struct fdinfo_list_entry *fle;
> + struct unix_sk_info *tmp;
> +
> + list_for_each_entry(tmp, &ui->ghost_wait_head, ghost_waiters) {
> + fle = file_master(&tmp->d);
> + set_fds_event(fle->pid);
> + }
> + return 0;
> +}
> +
> static bool peer_is_not_prepared(struct unix_sk_info *peer)
> {
> if (peer->ue->state != TCP_LISTEN)
> @@ -961,10 +984,27 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
> {
> struct unix_sk_info *ui;
> struct unix_sk_info *peer;
> + struct unix_sk_info *gm;
> struct sockaddr_un addr;
> int cwd_fd = -1, root_fd = -1;
>
> ui = container_of(d, struct unix_sk_info, d);
> + gm = ui->ghost_master;
> +
> + if (ui->flags & USK_GHOST_WAIT) {
> + if (!(gm->flags & USK_ADDR_RDY))
> + return 1;
> + if (ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) {
And where are standalone sockets are being bound if they have USK_GHOST_WAIT flag?
> + if (bind_unix_sk(fd, ui))
> + return -1;
> + return 0;
> + }
> + } else if (ui->flags & USK_GHOST_NAME) {
> + if (bind_unix_sk(fd, ui))
> + return -1;
> + return 0;
> + }
> +
> BUG_ON((ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) ||
> (ui->ue->uflags & (USK_CALLBACK | USK_INHERIT)));
>
> @@ -993,6 +1033,15 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
> return -1;
> }
>
> + if (gm) {
> + if (atomic_dec_and_test(&gm->ghost_counter)) {
> + pr_debug("ghost: Unlinking %s\n", gm->ue->name.data);
> + if (unlink((char *)gm->ue->name.data))
> + pr_perror("ghost: Failed to unlink master %s",
> + gm->ue->name.data);
> + }
> + }
> +
> revert_unix_sk_cwd(peer, &cwd_fd, &root_fd);
>
> if (peer->queuer == ui->ue->ino && restore_sk_queue(fd, peer->ue->id))
> @@ -1001,6 +1050,63 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
> return restore_sk_common(fd, ui);
> }
>
> +/*
> + * When path where socket lives is deleted, we need to reconstruct
> + * it back up but allow caller to remove it after.
> + */
> +static int bind_on_deleted(int sk, struct unix_sk_info *ui)
> +{
> + char path[PATH_MAX], *pos;
> + struct sockaddr_un addr;
> + int ret;
> +
> + if (ui->ue->name.len >= sizeof(path)) {
> + pr_err("Too long name for socket\n");
> + return -ENOSPC;
> + }
> +
> + memcpy(path, ui->name, ui->ue->name.len);
> + path[ui->ue->name.len] = '\0';
> +
> + for (pos = strrchr(path, '/'); pos;
> + pos = strrchr(path, '/')) {
> + *pos = '\0';
> +
> + ret = access(path, R_OK | W_OK | X_OK);
> + if (ret == 0)
> + break;
> +
> + if (errno != ENOENT) {
> + ret = -errno;
> + pr_perror("Can't access %s\n", path);
> + return ret;
> + }
> + }
> +
> + memcpy(path, ui->name, ui->ue->name.len);
> + path[ui->ue->name.len] = '\0';
> +
> + pos = dirname(path);
> + ret = mkdirpat(AT_FDCWD, pos, 0755);
> + if (ret) {
> + pr_err("Can't create %s\n", pos);
> + return ret;
> + }
> +
> + memset(&addr, 0, sizeof(addr));
> + addr.sun_family = AF_UNIX;
> + memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
> +
> + ret = bind(sk, (struct sockaddr *)&addr,
> + sizeof(addr.sun_family) + ui->ue->name.len);
> + if (ret < 0) {
> + pr_perror("Can't bind on socket %s", (char *)ui->ue->name.data);
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> static int bind_unix_sk(int sk, struct unix_sk_info *ui)
> {
> struct sockaddr_un addr;
> @@ -1071,10 +1177,12 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
> ui->ue->deleted = false;
>
> } else {
> - pr_perror("Can't bind socket");
> - goto done;
> + if (bind_on_deleted(sk, ui))
> + goto done;
> + ui->flags |= USK_ADDR_RDY;
> }
> }
> + ui->flags |= USK_ADDR_RDY;
>
> if (*ui->name && ui->ue->file_perms) {
> FilePermsEntry *perms = ui->ue->file_perms;
> @@ -1099,9 +1207,22 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
> }
> }
>
> - if (ui->ue->deleted && unlink((char *)ui->ue->name.data) < 0) {
> - pr_perror("failed to unlink %s", ui->ue->name.data);
> - goto done;
> + if (ui->ue->deleted || ui->ghost_master) {
> + struct unix_sk_info *gm = ui->ghost_master;
> + bool do_unlink = true;
> +
> + if (gm && !atomic_dec_and_test(&gm->ghost_counter))
> + do_unlink = false;
> +
> + if (!atomic_dec_and_test(&ui->ghost_counter))
> + do_unlink = false;
> +
> + if (do_unlink) {
> + pr_debug("ghost: Unlinking %s\n", ui->ue->name.data);
> + if (unlink((char *)ui->ue->name.data))
> + pr_perror("ghost: Failed to unlink %s",
> + ui->ue->name.data);
> + }
> }
> }
>
> @@ -1110,6 +1231,8 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
> wake_connected_sockets(ui);
> }
>
> + wake_ghost_waiters(ui);
> +
> ret = 0;
> done:
> revert_unix_sk_cwd(ui, &cwd_fd, &root_fd);
> @@ -1118,7 +1241,7 @@ done:
>
> static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd)
> {
> - int sk[2];
> + int sk[2], ret = 0;
> struct unix_sk_info *peer = ui->peer;
>
> pr_info("Opening pair master (id %#x ino %#x peer %#x)\n",
> @@ -1137,8 +1260,11 @@ static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd)
> if (restore_sk_queue(sk[1], ui->ue->id))
> return -1;
>
> - if (bind_unix_sk(sk[0], ui))
> - return -1;
> + if (!(ui->flags & USK_GHOST_WAIT)) {
> + if (bind_unix_sk(sk[0], ui))
> + return -1;
You mask using USK_GHOST_WAIT sockets, which are connected to deleted peers (according to resolve_unix_ghosts()).
It's a limitation on connect() time, not bind(). So, why they can't be bound right here?
Here should be used USK_GHOST_NAME, shouldn't it?
> + } else
> + ret = 1;
>
> if (restore_sk_common(sk[0], ui))
> return -1;
> @@ -1151,12 +1277,12 @@ static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd)
> close(sk[1]);
>
> *new_fd = sk[0];
> - return 0;
> + return ret;
> }
>
> static int open_unixsk_pair_slave(struct unix_sk_info *ui, int *new_fd)
> {
> - int sk, ret;
> + int sk, ret = 0;
>
> ret = recv_desc_from_peer(&ui->d, &sk);
> if (ret != 0) {
> @@ -1165,19 +1291,22 @@ static int open_unixsk_pair_slave(struct unix_sk_info *ui, int *new_fd)
> return ret;
> }
>
> - if (bind_unix_sk(sk, ui))
> - return -1;
> + if (!(ui->flags & USK_GHOST_WAIT)) {
> + if (bind_unix_sk(sk, ui))
> + return -1;
The same as above.
> + } else
> + ret = 1;
>
> if (restore_sk_common(sk, ui))
> return -1;
>
> *new_fd = sk;
> - return 0;
> + return ret;
> }
>
> static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
> {
> - int sk;
> + int sk, ret = 0;
>
> pr_info("Opening standalone socket (id %#x ino %#x peer %#x)\n",
> ui->ue->id, ui->ue->ino, ui->ue->peer);
> @@ -1295,8 +1424,11 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
> }
> }
>
> - if (bind_unix_sk(sk, ui))
> - return -1;
> + if (!(ui->flags & USK_GHOST_WAIT)) {
> + if (bind_unix_sk(sk, ui))
> + return -1;
> + } else
> + ret = 1;
ret = 1 and the socket is not bound, but below you wake connected sockets
in (ui->ue->state == TCP_LISTEN) branch.
>
> if (ui->ue->state == TCP_LISTEN) {
> pr_info("\tPutting %#x into listen state\n", ui->ue->ino);
> @@ -1323,7 +1455,7 @@ out:
> return -1;
>
> *new_fd = sk;
> - return 0;
> + return ret;
> }
>
> static int open_unix_sk(struct file_desc *d, int *new_fd)
> @@ -1332,12 +1464,12 @@ static int open_unix_sk(struct file_desc *d, int *new_fd)
> struct unix_sk_info *ui;
> int ret;
>
> + ui = container_of(d, struct unix_sk_info, d);
> +
> fle = file_master(d);
> if (fle->stage >= FLE_OPEN)
> return post_open_unix_sk(d, fle->fe->fd);
>
> - ui = container_of(d, struct unix_sk_info, d);
> -
Why do we need this HUNK?
> if (inherited_fd(d, new_fd)) {
> ui->ue->uflags |= USK_INHERIT;
> ret = *new_fd >= 0 ? 0 : -1;
> @@ -1408,6 +1540,10 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
> ui->name_dir = (void *)ui->ue->name_dir;
>
> INIT_LIST_HEAD(&ui->ghost_addr_node);
> + INIT_LIST_HEAD(&ui->ghost_wait_head);
> + INIT_LIST_HEAD(&ui->ghost_waiters);
> + ui->ghost_master = NULL;
> + atomic_set(&ui->ghost_counter, 1);
>
> if (add_post_prepare_cb_once(resolve_unix_peers, NULL))
> return -1;
> @@ -1425,7 +1561,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
> ui->ue->type == SOCK_DGRAM) {
> ghost_addr_t *ga;
>
> - ga = lookup_ghost_addr(ui->name, ui->ue->name.len);
> + ga = lookup_ghost_addr(ui->name_dir, ui->name, ui->ue->name.len);
> if (!ga) {
> ga = shmalloc(sizeof(*ga));
> if (!ga)
> @@ -1433,6 +1569,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
>
> INIT_LIST_HEAD(&ga->children);
>
> + ga->name_dir = (void *)ui->name_dir;
> ga->name = (void *)ui->name;
> ga->namelen = ui->ue->name.len;
>
> @@ -1515,6 +1652,84 @@ static void interconnected_pair(struct unix_sk_info *ui, struct unix_sk_info *pe
> }
> }
>
> +static int ghost_new_name(char *name, size_t namelen,
> + char **name_new, size_t *namelen_new)
> +{
> + static unsigned int cnt = 0;
> + char sname[64];
> + size_t k;
> +
> + k = snprintf(sname, sizeof(sname), "criu-%u", cnt++);
> + *namelen_new = namelen + k + 1;
> + if (*namelen_new > UNIX_PATH_MAX) {
> + pr_err("ghost: New name for socket is too long\n");
> + return -1;
> + }
> +
> + *name_new = shmalloc(*namelen_new);
> + if (!*name_new) {
> + pr_err("ghost: Can't allocate new name for socket\n");
> + return -ENOMEM;
> + }
> +
> + k = snprintf(*name_new, *namelen_new, "%s-%s", name, sname) + 1;
There was:
*namelen_new = namelen + 1 + k;
Q: Is the last '\0' byte contained in namelen? So, namelen in image always contain last '\0',
isn't it? (We printf newname later, so it must contain. Just a question).
> + if (k != *namelen_new) {
> + pr_err("ghost: Name stripped\n");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int resolve_unix_ghosts(void)
> +{
> + struct unix_sk_info *ui, *t;
> + ghost_addr_t *ga;
> +
> + pr_debug("ghost: Resolving addresses\n");
> +
> + list_for_each_entry(ga, &unix_ghost_addr, list) {
> + pr_debug("ghost: address %s\n", ga->name);
> +
> + list_for_each_entry(ui, &ga->children, ghost_addr_node) {
> + size_t newnamelen;
> + char *newname;
> +
> + pr_debug("\tghost: ino %#x peer %#x\n", ui->ue->ino,
> + ui->peer ? ui->peer->ue->ino : 0);
> +
> + if (ghost_new_name(ga->name, ga->namelen,
> + &newname, &newnamelen))
> + return -1;
Why can't we find a new name in collect_one_unixsk()? In this case we could
do not iterate over unix_sockets once again one paragraph below and do all
the work in resolve_unix_peers() cycle.
> +
> + pr_debug("\tghost: name transition %s -> %s\n",
> + ui->name, newname);
> + ui->name = newname;
> + ui->ue->name.len = newnamelen;
> + ui->ue->name.data = (void *)newname;
> + ui->flags |= USK_GHOST_NAME;
> +
> + unlink_stale(ui);
What if there is a file with the same name on disc?
> +
> + list_for_each_entry(t, &unix_sockets, list) {
> + if (t->flags & (USK_GHOST_NAME | USK_GHOST_WAIT))
> + continue;
Why do we need this check, while there is one more check (t->peer != ui)? A socket t,
which has t->peer == ui can't has at least USK_GHOST_WAIT set at this moment, so
this check is excess.
> + if (t->peer != ui)
> + continue;
> + pr_debug("\t\tghost: connected to us %#x -> %#x\n",
> + t->ue->ino, ui->ue->ino);
> +
> + t->flags |= USK_GHOST_NAME | USK_GHOST_WAIT;
> + t->ghost_master = ui;
We set ghost_master in the only place and never change it. Do we really need
a special field unix_sk_info::ghost_master, while it duplicates unix_sk_info::peer?
We can make ghost_master() as a helper.
> + atomic_inc(&ui->ghost_counter);
> + list_add(&t->ghost_waiters, &ui->ghost_wait_head);
> + }
> + }
> + }
> +
> + return 0;
> +}
> +
> static int resolve_unix_peers(void *unused)
> {
> struct unix_sk_info *ui, *peer;
> @@ -1559,7 +1774,7 @@ static int resolve_unix_peers(void *unused)
>
> }
>
> - return 0;
> + return resolve_unix_ghosts();
> }
>
> int unix_sk_id_add(unsigned int ino)
I'm not sure about circular dependencies, I will write, if I found something.
-------------- next part --------------
From: Kirill Tkhai <ktkhai at virtuozzo.com>
---
criu/sk-unix.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index cdad971e..1f14e95d 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -9,6 +9,7 @@
#include <sys/un.h>
#include <stdlib.h>
#include <dlfcn.h>
+#include <libgen.h>
#include "libnetlink.h"
#include "cr_options.h"
@@ -29,6 +30,7 @@
#include "external.h"
#include "crtools.h"
#include "rst-malloc.h"
+#include "atomic.h"
#include "protobuf.h"
#include "images/sk-unix.pb-c.h"
@@ -102,6 +104,7 @@ struct unix_sk_listen_icon {
typedef struct {
struct list_head list;
struct list_head children;
+ char *name_dir;
char *name;
size_t namelen;
} ghost_addr_t;
@@ -812,6 +815,10 @@ struct unix_sk_info {
struct list_head node; /* To link in peer's connected list */
struct list_head ghost_addr_node;
+ struct list_head ghost_wait_head;
+ struct list_head ghost_waiters;
+ struct unix_sk_info *ghost_master;
+ atomic_t ghost_counter;
/*
* For DGRAM sockets with queues, we should only restore the queue
@@ -823,6 +830,8 @@ struct unix_sk_info {
bool listen;
};
+static int bind_unix_sk(int sk, struct unix_sk_info *ui);
+
#define USK_PAIR_MASTER (1 << 0)
#define USK_PAIR_SLAVE (1 << 1)
#define USK_GHOST_NAME (1 << 2)
@@ -1332,12 +1341,12 @@ static int open_unix_sk(struct file_desc *d, int *new_fd)
struct unix_sk_info *ui;
int ret;
+ ui = container_of(d, struct unix_sk_info, d);
+
fle = file_master(d);
if (fle->stage >= FLE_OPEN)
return post_open_unix_sk(d, fle->fe->fd);
- ui = container_of(d, struct unix_sk_info, d);
-
if (inherited_fd(d, new_fd)) {
ui->ue->uflags |= USK_INHERIT;
ret = *new_fd >= 0 ? 0 : -1;
@@ -1408,6 +1417,10 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
ui->name_dir = (void *)ui->ue->name_dir;
INIT_LIST_HEAD(&ui->ghost_addr_node);
+ INIT_LIST_HEAD(&ui->ghost_wait_head);
+ INIT_LIST_HEAD(&ui->ghost_waiters);
+ ui->ghost_master = NULL;
+ atomic_set(&ui->ghost_counter, 1);
if (add_post_prepare_cb_once(resolve_unix_peers, NULL))
return -1;
@@ -1433,6 +1446,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
INIT_LIST_HEAD(&ga->children);
+ ga->name_dir = (void *)ui->name_dir;
ga->name = (void *)ui->name;
ga->namelen = ui->ue->name.len;
-------------- next part --------------
From: Kirill Tkhai <ktkhai at virtuozzo.com>
---
criu/sk-unix.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index 1f14e95d..9e6adb78 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -122,13 +122,15 @@ static struct unix_sk_listen_icon *lookup_unix_listen_icons(int peer_ino)
return NULL;
}
-static ghost_addr_t *lookup_ghost_addr(void *name, size_t namelen)
+static ghost_addr_t *lookup_ghost_addr(void *name_dir, void *name, size_t namelen)
{
ghost_addr_t *ga;
list_for_each_entry(ga, &unix_ghost_addr, list) {
if (ga->namelen != namelen ||
- memcmp(ga->name, name, namelen))
+ memcmp(ga->name, name, namelen) ||
+ ((unsigned long)name_dir ^
+ (unsigned long)(void *)ga->name_dir))
continue;
return ga;
}
@@ -1438,7 +1440,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
ui->ue->type == SOCK_DGRAM) {
ghost_addr_t *ga;
- ga = lookup_ghost_addr(ui->name, ui->ue->name.len);
+ ga = lookup_ghost_addr(ui->name_dir, ui->name, ui->ue->name.len);
if (!ga) {
ga = shmalloc(sizeof(*ga));
if (!ga)
-------------- next part --------------
From: Kirill Tkhai <ktkhai at virtuozzo.com>
---
criu/sk-unix.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 79 insertions(+), 1 deletion(-)
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index 9e6adb78..ee5803d9 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -1531,6 +1531,84 @@ static void interconnected_pair(struct unix_sk_info *ui, struct unix_sk_info *pe
}
}
+static int ghost_new_name(char *name, size_t namelen,
+ char **name_new, size_t *namelen_new)
+{
+ static unsigned int cnt = 0;
+ char sname[64];
+ size_t k;
+
+ k = snprintf(sname, sizeof(sname), "criu-%u", cnt++);
+ *namelen_new = namelen + k + 1;
+ if (*namelen_new > UNIX_PATH_MAX) {
+ pr_err("ghost: New name for socket is too long\n");
+ return -1;
+ }
+
+ *name_new = shmalloc(*namelen_new);
+ if (!*name_new) {
+ pr_err("ghost: Can't allocate new name for socket\n");
+ return -ENOMEM;
+ }
+
+ k = snprintf(*name_new, *namelen_new, "%s-%s", name, sname) + 1;
+ if (k != *namelen_new) {
+ pr_err("ghost: Name stripped\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int resolve_unix_ghosts(void)
+{
+ struct unix_sk_info *ui, *t;
+ ghost_addr_t *ga;
+
+ pr_debug("ghost: Resolving addresses\n");
+
+ list_for_each_entry(ga, &unix_ghost_addr, list) {
+ pr_debug("ghost: address %s\n", ga->name);
+
+ list_for_each_entry(ui, &ga->children, ghost_addr_node) {
+ size_t newnamelen;
+ char *newname;
+
+ pr_debug("\tghost: ino %#x peer %#x\n", ui->ue->ino,
+ ui->peer ? ui->peer->ue->ino : 0);
+
+ if (ghost_new_name(ga->name, ga->namelen,
+ &newname, &newnamelen))
+ return -1;
+
+ pr_debug("\tghost: name transition %s -> %s\n",
+ ui->name, newname);
+ ui->name = newname;
+ ui->ue->name.len = newnamelen;
+ ui->ue->name.data = (void *)newname;
+ ui->flags |= USK_GHOST_NAME;
+
+ unlink_stale(ui);
+
+ list_for_each_entry(t, &unix_sockets, list) {
+ if (t->flags & (USK_GHOST_NAME | USK_GHOST_WAIT))
+ continue;
+ if (t->peer != ui)
+ continue;
+ pr_debug("\t\tghost: connected to us %#x -> %#x\n",
+ t->ue->ino, ui->ue->ino);
+
+ t->flags |= USK_GHOST_NAME | USK_GHOST_WAIT;
+ t->ghost_master = ui;
+ atomic_inc(&ui->ghost_counter);
+ list_add(&t->ghost_waiters, &ui->ghost_wait_head);
+ }
+ }
+ }
+
+ return 0;
+}
+
static int resolve_unix_peers(void *unused)
{
struct unix_sk_info *ui, *peer;
@@ -1575,7 +1653,7 @@ static int resolve_unix_peers(void *unused)
}
- return 0;
+ return resolve_unix_ghosts();
}
int unix_sk_id_add(unsigned int ino)
-------------- next part --------------
sk-unix: Add ability to restore sockets with deleted vfs addresses
From: Cyrill Gorcunov <gorcunov at openvz.org>
If dgram sockets are bound with vfs name and the name removed
from the file system we can't bind/connect to such name. To
resolve it we do the following
- all ghost names previously gathered into chains are
changed to have more-less unique names adding postfixes
- opon socket opening it's binding/connection is delayed
until previous copy is removed
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
criu/sk-unix.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 138 insertions(+), 17 deletions(-)
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index ee5803d9..b0f616e7 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -864,6 +864,18 @@ static int wake_connected_sockets(struct unix_sk_info *ui)
return 0;
}
+static int wake_ghost_waiters(struct unix_sk_info *ui)
+{
+ struct fdinfo_list_entry *fle;
+ struct unix_sk_info *tmp;
+
+ list_for_each_entry(tmp, &ui->ghost_wait_head, ghost_waiters) {
+ fle = file_master(&tmp->d);
+ set_fds_event(fle->pid);
+ }
+ return 0;
+}
+
static bool peer_is_not_prepared(struct unix_sk_info *peer)
{
if (peer->ue->state != TCP_LISTEN)
@@ -972,10 +984,27 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
{
struct unix_sk_info *ui;
struct unix_sk_info *peer;
+ struct unix_sk_info *gm;
struct sockaddr_un addr;
int cwd_fd = -1, root_fd = -1;
ui = container_of(d, struct unix_sk_info, d);
+ gm = ui->ghost_master;
+
+ if (ui->flags & USK_GHOST_WAIT) {
+ if (!(gm->flags & USK_ADDR_RDY))
+ return 1;
+ if (ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) {
+ if (bind_unix_sk(fd, ui))
+ return -1;
+ return 0;
+ }
+ } else if (ui->flags & USK_GHOST_NAME) {
+ if (bind_unix_sk(fd, ui))
+ return -1;
+ return 0;
+ }
+
BUG_ON((ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) ||
(ui->ue->uflags & (USK_CALLBACK | USK_INHERIT)));
@@ -1004,6 +1033,15 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
return -1;
}
+ if (gm) {
+ if (atomic_dec_and_test(&gm->ghost_counter)) {
+ pr_debug("ghost: Unlinking %s\n", gm->ue->name.data);
+ if (unlink((char *)gm->ue->name.data))
+ pr_perror("ghost: Failed to unlink master %s",
+ gm->ue->name.data);
+ }
+ }
+
revert_unix_sk_cwd(peer, &cwd_fd, &root_fd);
if (peer->queuer == ui->ue->ino && restore_sk_queue(fd, peer->ue->id))
@@ -1012,6 +1050,63 @@ static int post_open_unix_sk(struct file_desc *d, int fd)
return restore_sk_common(fd, ui);
}
+/*
+ * When path where socket lives is deleted, we need to reconstruct
+ * it back up but allow caller to remove it after.
+ */
+static int bind_on_deleted(int sk, struct unix_sk_info *ui)
+{
+ char path[PATH_MAX], *pos;
+ struct sockaddr_un addr;
+ int ret;
+
+ if (ui->ue->name.len >= sizeof(path)) {
+ pr_err("Too long name for socket\n");
+ return -ENOSPC;
+ }
+
+ memcpy(path, ui->name, ui->ue->name.len);
+ path[ui->ue->name.len] = '\0';
+
+ for (pos = strrchr(path, '/'); pos;
+ pos = strrchr(path, '/')) {
+ *pos = '\0';
+
+ ret = access(path, R_OK | W_OK | X_OK);
+ if (ret == 0)
+ break;
+
+ if (errno != ENOENT) {
+ ret = -errno;
+ pr_perror("Can't access %s\n", path);
+ return ret;
+ }
+ }
+
+ memcpy(path, ui->name, ui->ue->name.len);
+ path[ui->ue->name.len] = '\0';
+
+ pos = dirname(path);
+ ret = mkdirpat(AT_FDCWD, pos, 0755);
+ if (ret) {
+ pr_err("Can't create %s\n", pos);
+ return ret;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
+
+ ret = bind(sk, (struct sockaddr *)&addr,
+ sizeof(addr.sun_family) + ui->ue->name.len);
+ if (ret < 0) {
+ pr_perror("Can't bind on socket %s", (char *)ui->ue->name.data);
+ return ret;
+ }
+
+ return 0;
+}
+
static int bind_unix_sk(int sk, struct unix_sk_info *ui)
{
struct sockaddr_un addr;
@@ -1082,10 +1177,12 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
ui->ue->deleted = false;
} else {
- pr_perror("Can't bind socket");
- goto done;
+ if (bind_on_deleted(sk, ui))
+ goto done;
+ ui->flags |= USK_ADDR_RDY;
}
}
+ ui->flags |= USK_ADDR_RDY;
if (*ui->name && ui->ue->file_perms) {
FilePermsEntry *perms = ui->ue->file_perms;
@@ -1110,9 +1207,22 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
}
}
- if (ui->ue->deleted && unlink((char *)ui->ue->name.data) < 0) {
- pr_perror("failed to unlink %s", ui->ue->name.data);
- goto done;
+ if (ui->ue->deleted || ui->ghost_master) {
+ struct unix_sk_info *gm = ui->ghost_master;
+ bool do_unlink = true;
+
+ if (gm && !atomic_dec_and_test(&gm->ghost_counter))
+ do_unlink = false;
+
+ if (!atomic_dec_and_test(&ui->ghost_counter))
+ do_unlink = false;
+
+ if (do_unlink) {
+ pr_debug("ghost: Unlinking %s\n", ui->ue->name.data);
+ if (unlink((char *)ui->ue->name.data))
+ pr_perror("ghost: Failed to unlink %s",
+ ui->ue->name.data);
+ }
}
}
@@ -1121,6 +1231,8 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
wake_connected_sockets(ui);
}
+ wake_ghost_waiters(ui);
+
ret = 0;
done:
revert_unix_sk_cwd(ui, &cwd_fd, &root_fd);
@@ -1129,7 +1241,7 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd)
{
- int sk[2];
+ int sk[2], ret = 0;
struct unix_sk_info *peer = ui->peer;
pr_info("Opening pair master (id %#x ino %#x peer %#x)\n",
@@ -1148,8 +1260,11 @@ static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd)
if (restore_sk_queue(sk[1], ui->ue->id))
return -1;
- if (bind_unix_sk(sk[0], ui))
- return -1;
+ if (!(ui->flags & USK_GHOST_WAIT)) {
+ if (bind_unix_sk(sk[0], ui))
+ return -1;
+ } else
+ ret = 1;
if (restore_sk_common(sk[0], ui))
return -1;
@@ -1162,12 +1277,12 @@ static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd)
close(sk[1]);
*new_fd = sk[0];
- return 0;
+ return ret;
}
static int open_unixsk_pair_slave(struct unix_sk_info *ui, int *new_fd)
{
- int sk, ret;
+ int sk, ret = 0;
ret = recv_desc_from_peer(&ui->d, &sk);
if (ret != 0) {
@@ -1176,19 +1291,22 @@ static int open_unixsk_pair_slave(struct unix_sk_info *ui, int *new_fd)
return ret;
}
- if (bind_unix_sk(sk, ui))
- return -1;
+ if (!(ui->flags & USK_GHOST_WAIT)) {
+ if (bind_unix_sk(sk, ui))
+ return -1;
+ } else
+ ret = 1;
if (restore_sk_common(sk, ui))
return -1;
*new_fd = sk;
- return 0;
+ return ret;
}
static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
{
- int sk;
+ int sk, ret = 0;
pr_info("Opening standalone socket (id %#x ino %#x peer %#x)\n",
ui->ue->id, ui->ue->ino, ui->ue->peer);
@@ -1306,8 +1424,11 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
}
}
- if (bind_unix_sk(sk, ui))
- return -1;
+ if (!(ui->flags & USK_GHOST_WAIT)) {
+ if (bind_unix_sk(sk, ui))
+ return -1;
+ } else
+ ret = 1;
if (ui->ue->state == TCP_LISTEN) {
pr_info("\tPutting %#x into listen state\n", ui->ue->ino);
@@ -1334,7 +1455,7 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
return -1;
*new_fd = sk;
- return 0;
+ return ret;
}
static int open_unix_sk(struct file_desc *d, int *new_fd)
More information about the CRIU
mailing list