[CRIU] [PATCH 16/17] unix: Add support of ghost sockets
Cyrill Gorcunov
gorcunov at gmail.com
Sun Apr 1 23:07:42 MSK 2018
Unix sockets may be connected via deleted socket name,
moreover the name may be reused (ie same sun_addr but
different inodes).
To be able to handle them we do a few tricks:
- when collecting sockets we figure out if "deleted"
mark is present on the socket and if such we rename
it into a new unique name
- then we wait until all users are connected and
remove the socket from the FS
Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
criu/cr-restore.c | 4 +
criu/include/sockets.h | 1 +
criu/sk-unix.c | 290 ++++++++++++++++++++++++++++++++++++++++++-------
3 files changed, 255 insertions(+), 40 deletions(-)
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index db913b2dae2e..ff1e4dcc34df 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -388,6 +388,10 @@ static int root_prepare_shared(void)
if (ret)
goto err;
+ ret = unix_resolve_ghost_addr();
+ if (ret)
+ goto err;
+
show_saved_files();
err:
return ret;
diff --git a/criu/include/sockets.h b/criu/include/sockets.h
index db330428850c..23f5b11c1b58 100644
--- a/criu/include/sockets.h
+++ b/criu/include/sockets.h
@@ -60,6 +60,7 @@ extern int netlink_receive_one(struct nlmsghdr *hdr, struct ns_id *ns, void *arg
extern int unix_sk_id_add(unsigned int ino);
extern int unix_sk_ids_parse(char *optarg);
+extern int unix_resolve_ghost_addr(void);
extern int do_dump_opt(int sk, int level, int name, void *val, int len);
#define dump_opt(s, l, n, f) do_dump_opt(s, l, n, f, sizeof(*f))
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index 4feaa7722a50..c0861d17c8b1 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -9,6 +9,7 @@
#include <sys/un.h>
#include <stdlib.h>
#include <dlfcn.h>
+#include <libgen.h>
#include "libnetlink.h"
#include "cr_options.h"
@@ -31,6 +32,7 @@
#include "fdstore.h"
#include "fdinfo.h"
#include "kerndat.h"
+#include "rst-malloc.h"
#include "protobuf.h"
#include "images/sk-unix.pb-c.h"
@@ -90,10 +92,14 @@ struct unix_sk_desc {
};
static LIST_HEAD(unix_sockets);
+static LIST_HEAD(unix_ghost_addr);
static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d,
UnixSkEntry *ue, const struct fd_parms *p);
+struct unix_sk_info;
+static void unlink_sk(struct unix_sk_info *ui);
+
struct unix_sk_listen_icon {
unsigned int peer_ino;
struct unix_sk_desc *sk_desc;
@@ -892,6 +898,12 @@ struct unix_sk_info {
struct list_head connected; /* List of sockets, connected to me */
struct list_head node; /* To link in peer's connected list */
struct list_head scm_fles;
+ struct list_head ghost_node;
+ struct list_head ghost_wait_head;
+ struct list_head ghost_waiter;
+ atomic_t name_ref;
+ atomic_t name_rdy;
+ struct unix_sk_info *ghost_master;
/*
* For DGRAM sockets with queues, we should only restore the queue
@@ -916,6 +928,7 @@ struct scm_fle {
#define USK_PAIR_MASTER 0x1
#define USK_PAIR_SLAVE 0x2
+#define USK_GHOST_NAME 0x4
static struct unix_sk_info *find_unix_sk_by_ino(int ino)
{
@@ -1077,6 +1090,17 @@ static int wake_connected_sockets(struct unix_sk_info *ui)
return 0;
}
+static void wake_ghost_waiters(struct unix_sk_info *ui)
+{
+ struct fdinfo_list_entry *fle;
+ struct unix_sk_info *tmp;
+
+ list_for_each_entry(tmp, &ui->ghost_wait_head, ghost_waiter) {
+ fle = file_master(&tmp->d);
+ set_fds_event(fle->pid);
+ }
+}
+
static bool peer_is_not_prepared(struct unix_sk_info *peer)
{
if (peer->ue->state != TCP_LISTEN)
@@ -1239,6 +1263,38 @@ static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd,
return -1;
}
+static void drop_ghost_master(struct unix_sk_info *ui)
+{
+ struct unix_sk_info *gm = ui->ghost_master;
+ if (gm) {
+ if (atomic_dec_and_test(&gm->name_ref)) {
+ pr_debug("ghost: Unlinking ghost master %s\n", gm->ue->name.data);
+ unlink_sk(gm);
+ }
+ }
+}
+
+static void drop_deleted(struct unix_sk_info *ui)
+{
+ if (ui->ue->has_deleted && ui->ue->deleted) {
+ if (atomic_dec_and_test(&ui->name_ref)) {
+ pr_debug("ghost: Unlinking regular %s\n", ui->ue->name.data);
+ unlink_sk(ui);
+ }
+ }
+}
+
+static bool wait_ghost_master(struct unix_sk_info *ui)
+{
+ /*
+ * If we're to bound to deleted wait socket,
+ * wait until master create it.
+ */
+ if (ui->ghost_master)
+ return !atomic_read(&ui->ghost_master->name_rdy);
+ return false;
+}
+
static int post_open_standalone(struct file_desc *d, int fd)
{
struct unix_sk_info *ui;
@@ -1286,6 +1342,7 @@ static int post_open_standalone(struct file_desc *d, int fd)
ui->is_connected = true;
revert_unix_sk_cwd(peer, &cwd_fd, &root_fd, &ns_fd);
+ drop_ghost_master(ui);
restore_queue:
if (peer->queuer == ui &&
@@ -1298,46 +1355,60 @@ static int post_open_standalone(struct file_desc *d, int fd)
return restore_sk_common(fd, ui);
}
-static int bind_deleted_unix_sk(int sk, struct unix_sk_info *ui,
- struct sockaddr_un *addr)
+/*
+ * When path where socket lives is deleted, we need to reconstruct
+ * it back up but allow caller to remove it after.
+ */
+static int bind_on_deleted(int sk, struct unix_sk_info *ui)
{
- char temp[PATH_MAX];
+ char path[PATH_MAX], *pos;
+ struct sockaddr_un addr;
int ret;
- pr_info("found duplicate unix socket bound at %s\n", addr->sun_path);
-
- ret = snprintf(temp, sizeof(temp),
- "%s-%s-%d", addr->sun_path, "criu-temp", getpid());
- /* this shouldn't happen, since sun_addr is only 108 chars long */
- if (ret < 0 || ret >= sizeof(temp)) {
- pr_err("snprintf of %s failed?\n", addr->sun_path);
- return -1;;
+ if (ui->ue->name.len >= sizeof(path)) {
+ pr_err("Too long name for socket\n");
+ return -ENOSPC;
}
- ret = rename(addr->sun_path, temp);
- if (ret < 0) {
- pr_perror("couldn't move socket for binding");
- return -1;
+ memcpy(path, ui->name, ui->ue->name.len);
+ path[ui->ue->name.len] = '\0';
+
+ for (pos = strrchr(path, '/'); pos;
+ pos = strrchr(path, '/')) {
+ *pos = '\0';
+
+ ret = access(path, R_OK | W_OK | X_OK);
+ if (ret == 0)
+ break;
+
+ if (errno != ENOENT) {
+ ret = -errno;
+ pr_perror("Can't access %s\n", path);
+ return ret;
+ }
}
- ret = bind(sk, (struct sockaddr *)addr,
- sizeof(addr->sun_family) + ui->ue->name.len);
- if (ret < 0) {
- pr_perror("Can't bind socket after move");
- return -1;;
+ memcpy(path, ui->name, ui->ue->name.len);
+ path[ui->ue->name.len] = '\0';
+
+ pos = dirname(path);
+ ret = mkdirpat(AT_FDCWD, pos, 0755);
+ if (ret) {
+ pr_err("Can't create %s\n", pos);
+ return ret;
}
- ret = rename(temp, addr->sun_path);
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ memcpy(&addr.sun_path, ui->name, ui->ue->name.len);
+
+ ret = bind(sk, (struct sockaddr *)&addr,
+ sizeof(addr.sun_family) + ui->ue->name.len);
if (ret < 0) {
- pr_perror("couldn't move socket back");
- return -1;
+ pr_perror("Can't bind on socket %s", (char *)ui->ue->name.data);
+ return ret;
}
- /* we've handled the deleted-ness of this
- * socket and we don't want to delete it later
- * since it's not /this/ socket.
- */
- ui->ue->deleted = false;
return 0;
}
@@ -1347,7 +1418,7 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
int cwd_fd = -1, root_fd = -1, ns_fd = -1;
int ret, exit_code = -1;
- if (ui->ue->name.len == 0)
+ if (ui->ue->name.len == 0 || atomic_read(&ui->name_rdy))
return 0;
if ((ui->ue->type == SOCK_STREAM) && (ui->ue->state == TCP_ESTABLISHED)) {
@@ -1371,16 +1442,13 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
ret = bind(sk, (struct sockaddr *)&addr,
sizeof(addr.sun_family) + ui->ue->name.len);
if (ret < 0) {
- if (ui->ue->has_deleted && ui->ue->deleted && errno == EADDRINUSE) {
- if (bind_deleted_unix_sk(sk, ui, &addr))
- goto done;
- } else {
- pr_perror("Can't bind socket");
+ if (ui->ue->has_deleted && ui->ue->deleted)
+ ret = bind_on_deleted(sk, ui);
+ if (ret)
goto done;
- }
}
- if (*ui->name && ui->ue->file_perms) {
+ if (ui->ue->file_perms) {
FilePermsEntry *perms = ui->ue->file_perms;
char fname[PATH_MAX];
@@ -1403,19 +1471,20 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui)
}
}
- if (ui->ue->deleted && unlink((char *)ui->ue->name.data) < 0) {
- pr_perror("failed to unlink %s", ui->ue->name.data);
- goto done;
- }
+ atomic_inc(&ui->name_rdy);
+ pr_debug("name_rdy %#x\n", ui->ue->ino);
if (ui->ue->state != TCP_LISTEN) {
ui->bound = 1;
wake_connected_sockets(ui);
}
+ wake_ghost_waiters(ui);
exit_code = 0;
done:
revert_unix_sk_cwd(ui, &cwd_fd, &root_fd, &ns_fd);
+ if (exit_code == 0)
+ drop_deleted(ui);
return exit_code;
}
@@ -1501,6 +1570,9 @@ static int open_unixsk_pair_master(struct unix_sk_info *ui, int *new_fd)
if (bind_unix_sk(sk[1], peer))
return -1;
+ drop_ghost_master(ui);
+ drop_ghost_master(peer);
+
*new_fd = sk[0];
return 1;
}
@@ -1556,6 +1628,10 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
fle = file_master(&ui->d);
pr_info_opening("standalone", ui, fle);
+
+ if (wait_ghost_master(ui))
+ return 1;
+
if (fle->stage == FLE_OPEN)
return post_open_standalone(&ui->d, fle->fe->fd);
@@ -1814,11 +1890,15 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
ui->flags = 0;
ui->peer = NULL;
ui->queuer = NULL;
+ ui->ghost_master = NULL;
ui->bound = 0;
ui->listen = 0;
ui->is_connected = 0;
ui->peer_queue_restored = 0;
+ atomic_set(&ui->name_ref, 1);
+ atomic_set(&ui->name_rdy, 0);
+
memzero(&ui->peer_resolve, sizeof(ui->peer_resolve));
memzero(&ui->d, sizeof(ui->d));
@@ -1826,6 +1906,127 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
INIT_LIST_HEAD(&ui->connected);
INIT_LIST_HEAD(&ui->node);
INIT_LIST_HEAD(&ui->scm_fles);
+ INIT_LIST_HEAD(&ui->ghost_node);
+ INIT_LIST_HEAD(&ui->ghost_wait_head);
+ INIT_LIST_HEAD(&ui->ghost_waiter);
+
+ return 0;
+}
+
+#define GHOST_NAME_FMT "~criu-%u"
+#define GHOST_NAME_FMT_PREFIX 6 /* num of chars before counter */
+
+static int ghost_new_name(char *name, size_t namelen,
+ char **name_new, size_t *namelen_new)
+{
+ char sname[64], *pos, *oldname = name;
+ static unsigned int cnt = 0;
+ size_t k;
+
+ pr_debug("\tghost: handling name %s namelen %zu\n", name, namelen);
+
+ for (pos = &name[namelen - 1]; pos > name; pos--) {
+ if (*pos == GHOST_NAME_FMT[0])
+ break;
+ }
+
+ if (strncmp(pos, GHOST_NAME_FMT, GHOST_NAME_FMT_PREFIX) == 0) {
+ unsigned int __cnt;
+ char *__name;
+
+ if (sscanf(pos, GHOST_NAME_FMT, &__cnt) == 1) {
+ pr_debug("\tghost: cnt %d detected\n", __cnt);
+ cnt = __cnt + 1;
+ }
+
+ namelen = (pos - name);
+ __name = alloca(namelen + 1);
+ memcpy(__name, name, namelen);
+ __name[namelen++] = '\0';
+ name = __name;
+ pr_debug("\tghost: Name stipped to %s (namelen %zu)\n",
+ name, namelen);
+ }
+
+ memzero(sname, sizeof(sname));
+ k = snprintf(sname, sizeof(sname), GHOST_NAME_FMT, cnt++);
+ *namelen_new = namelen + k;
+ if (*namelen_new > UNIX_PATH_MAX) {
+ pr_err("\tghost: New name for socket is too long\n");
+ return -1;
+ }
+
+ *name_new = shmalloc(*namelen_new);
+ if (!*name_new) {
+ pr_err("\tghost: Can't allocate new name for socket\n");
+ return -ENOMEM;
+ }
+
+ k = snprintf(*name_new, *namelen_new, "%s%s", name, sname);
+ if (k != (*namelen_new - 1)) {
+ pr_err("\tghost: Name generation failed (%s %d %d)\n",
+ *name_new, (int)k, (int)*namelen_new);
+ return -1;
+ }
+
+ pr_debug("\tghost: name transition %s -> %s\n", oldname, *name_new);
+ return 0;
+}
+
+int unix_resolve_ghost_addr(void)
+{
+ struct unix_sk_info *ui, *t;
+
+ pr_debug("ghost: Resolving addresses\n");
+
+ /*
+ * Walk over ghost unix entries and find one
+ * which gonna be a master and won't unlink
+ * the name until all peers are connected to
+ * this designation.
+ */
+
+ list_for_each_entry(ui, &unix_ghost_addr, ghost_node) {
+ size_t newnamelen;
+ char *newname;
+
+ pr_debug("ghost: ino %#x peer %#x address %s\n",
+ ui->ue->ino, ui->peer ? ui->peer->ue->ino : 0,
+ ui->name);
+
+ unlink_sk(ui);
+
+ if (ghost_new_name(ui->name, ui->ue->name.len,
+ &newname, &newnamelen))
+ return -1;
+
+ ui->name = newname;
+ ui->ue->name.len = newnamelen;
+ ui->ue->name.data = (void *)newname;
+ ui->flags |= USK_GHOST_NAME;
+
+ unlink_sk(ui);
+
+ /*
+ * Figure out who is connected to this peer,
+ * so the name will be removed from FS only
+ * when last one is connected.
+ */
+ list_for_each_entry(t, &unix_sockets, list) {
+ if (t->flags & USK_GHOST_NAME)
+ continue;
+ if (ui == t || t->peer != ui)
+ continue;
+
+ pr_debug("\t\tghost: connected to us %#x -> %#x\n",
+ t->ue->ino, ui->ue->ino);
+
+ t->flags |= USK_GHOST_NAME;
+ t->ghost_master = ui;
+ atomic_inc(&ui->name_ref);
+ list_add(&t->ghost_waiter, &ui->ghost_wait_head);
+ }
+ }
return 0;
}
@@ -1873,6 +2074,15 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
add_post_prepare_cb(&ui->peer_resolve);
}
+ if (ui->ue->deleted) {
+ if (!ui->name || !ui->ue->name.len || !ui->name[0]) {
+ pr_err("No name present, ino %#x\n", ui->ue->ino);
+ return -1;
+ }
+
+ list_add_tail(&ui->ghost_node, &unix_ghost_addr);
+ }
+
list_add_tail(&ui->list, &unix_sockets);
return file_desc_add(&ui->d, ui->ue->id, &unix_desc_ops);
}
--
2.14.3
More information about the CRIU
mailing list