[CRIU] [PATCH 8/9] unix: Add support for bindmounted sockets
Cyrill Gorcunov
gorcunov at gmail.com
Sat Jun 9 16:26:10 MSK 2018
Some unix sockets might be bindmounted (say /dev/log
bound to another place). So to handle it we need to
change the logic we open such sockets especially
because we create mount tree earlier than we start
to restore files.
Thus here what we do:
- on dump mark such sockets with UNIX_UFLAGS__BINDMOUNT
flag so we would distinguish them on restore;
- collect unix sockets before creating mount tree;
note that at this moment we able to simply gather
this sockets into own @unix_mnt_sockets list and
nothing more because setting up the peers and such
happens later in that named post action procedures;
- when we need to create a bindmount point we enter
into unix engine and figure out if there a socket
to bindmount over; if found we pre-allocate the
socketpair, bind it and save inside fdstore engine;
using socketpair is important because later we need
both peers to restore queued data;
- finally when we start restoring files we simply
fetch the socket from the fdstore and use it
directly.
All this scheme is working simply because we support
dgram standalone sockets only, adding support for
streamed sockets requires a way more engine rework
and hopefully we won't need it in near future.
Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
criu/include/sockets.h | 2 +
criu/mount.c | 7 ++
criu/sk-unix.c | 270 +++++++++++++++++++++++++++++++++++++++----------
3 files changed, 223 insertions(+), 56 deletions(-)
diff --git a/criu/include/sockets.h b/criu/include/sockets.h
index 8df7b70da9a7..01033ac9c933 100644
--- a/criu/include/sockets.h
+++ b/criu/include/sockets.h
@@ -9,6 +9,7 @@
struct fdinfo_list_entry;
struct sk_opts_entry;
+struct mount_info;
struct file_desc;
struct fd_parms;
struct cr_imgset;
@@ -42,6 +43,7 @@ extern int fix_external_unix_sockets(void);
extern int prepare_scms(void);
extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids);
extern int collect_unix_bindmounts(void);
+extern int unix_prepare_bindmount(struct mount_info *mi);
extern struct collect_image_info netlink_sk_cinfo;
diff --git a/criu/mount.c b/criu/mount.c
index 47c65d510993..98e87225c771 100644
--- a/criu/mount.c
+++ b/criu/mount.c
@@ -28,6 +28,7 @@
#include "external.h"
#include "fdstore.h"
#include "clone-noasan.h"
+#include "sockets.h"
#include "images/mnt.pb-c.h"
@@ -2263,6 +2264,12 @@ static int do_bind_mount(struct mount_info *mi)
}
}
+ if (unix_prepare_bindmount(mi)) {
+ pr_err("Failed to prepare bindmount on unix at %s\n",
+ mi->mountpoint);
+ goto err;
+ }
+
if (mount(root, mi->mountpoint, NULL, MS_BIND | (mi->flags & MS_REC), NULL) < 0) {
pr_perror("Can't mount at %s", mi->mountpoint);
goto err;
diff --git a/criu/sk-unix.c b/criu/sk-unix.c
index 5fd6fc73f841..b2a28540ee03 100644
--- a/criu/sk-unix.c
+++ b/criu/sk-unix.c
@@ -976,7 +976,10 @@ struct unix_sk_info {
char *name;
char *name_dir;
unsigned flags;
- int fdstore_id;
+ union {
+ int fdstore_id;
+ int fdstore_mnt_id[2];
+ };
struct unix_sk_info *peer;
struct pprep_head peer_resolve; /* XXX : union with the above? */
struct file_desc d;
@@ -1010,6 +1013,8 @@ struct scm_fle {
#define USK_PAIR_MASTER 0x1
#define USK_PAIR_SLAVE 0x2
#define USK_GHOST_FDSTORE 0x4 /* bound but removed address */
+#define USK_BINDMOUNT 0x8 /* socket is pre-openeded for bindmount reason */
+#define USK_NOCWD 0x10 /* no cwd switch */
static struct unix_sk_info *find_unix_sk_by_ino(int ino)
{
@@ -1229,6 +1234,9 @@ static int revert_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd, int *ro
{
int ret = 0;
+ if (ui->flags & USK_NOCWD)
+ return 0;
+
if (*ns_fd >= 0 && restore_ns(*ns_fd, &mnt_ns_desc))
ret = -1;
if (*root_fd >= 0) {
@@ -1256,6 +1264,9 @@ static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd,
static struct ns_id *root = NULL, *ns;
int fd;
+ if (ui->flags & USK_NOCWD)
+ return 0;
+
if (prev_mntns_fd && ui->name[0] && ui->ue->mnt_id >= 0) {
struct ns_id *mntns = lookup_nsid_by_mnt_id(ui->ue->mnt_id);
int ns_fd;
@@ -1832,12 +1843,68 @@ static int setup_second_end(int *sks, struct fdinfo_list_entry *second_end)
return 0;
}
+static int break_connected(struct unix_sk_info *ui, int sk)
+{
+ if (ui->ue->type == SOCK_DGRAM) {
+ struct sockaddr_un addr = { .sun_family = AF_UNSPEC };
+ /*
+ * socketpair() assigns sks[1] as a peer of sks[0]
+ * (and vice versa). But in this case (not zero peer)
+ * it's impossible for other sockets to connect
+ * to sks[0] (see unix_dgram_connect()->unix_may_send()).
+ * The below is hack: we use that connect with AF_UNSPEC
+ * clears socket's peer.
+ * Note, that connect hack flushes receive queue,
+ * so restore_unix_queue() must be after it.
+ */
+ if (connect(sk, (struct sockaddr *)&addr, sizeof(addr.sun_family))) {
+ pr_perror("Can't clear socket id %#x peer", ui->ue->id);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int make_socket(struct unix_sk_info *ui, int sks[2], bool pair, bool disjoin_master)
+{
+ if (unlikely(ui->flags & USK_BINDMOUNT)) {
+ sks[0] = fdstore_get(ui->fdstore_mnt_id[0]);
+ sks[1] = fdstore_get(ui->fdstore_mnt_id[1]);
+ if (sks[0] < 0 || sks[1] < 0) {
+ pr_err("bindmount: Can't fetch id %#x socketpair from the store\n",
+ ui->ue->id);
+ return -1;
+ }
+ } else {
+ int ret;
+
+ sks[0] = sks[1] = -1;
+ if (!pair) {
+ ret = socket(PF_UNIX, ui->ue->type, 0);
+ sks[0] = ret;
+ } else
+ ret = socketpair(PF_UNIX, ui->ue->type, 0, sks);
+
+ if (ret < 0) {
+ pr_perror("Can't create %s id %#x\n",
+ pair ? "socketpair" : "socket",
+ ui->ue->id);
+ return -1;
+ }
+ }
+
+ if (disjoin_master && pair)
+ return break_connected(ui, sks[0]);
+
+ return 0;
+}
+
static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
{
struct unix_sk_info *queuer = ui->queuer;
struct unix_sk_info *peer = ui->peer;
struct fdinfo_list_entry *fle, *fle_peer;
- int sk;
+ int sks[2];
fle = file_master(&ui->d);
pr_info_opening("standalone", ui, fle);
@@ -1872,21 +1939,14 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
* was successful.
*/
if (ui->ue->uflags & UNIX_UFLAGS__SERVICE) {
- int sks[2];
-
- if (socketpair(PF_UNIX, ui->ue->type, 0, sks)) {
- pr_perror("Can't create socketpair");
+ if (make_socket(ui, sks, true, false))
return -1;
- }
if (send_criu_dump_resp(sks[1], true, true) == -1)
return -1;
close(sks[1]);
- sk = sks[0];
} else if (ui->ue->state == TCP_ESTABLISHED && queuer && queuer->ue->ino == FAKE_INO) {
- int ret, sks[2];
-
if (ui->ue->type != SOCK_STREAM) {
pr_err("Non-stream socket %d in established state\n",
ui->ue->ino);
@@ -1899,51 +1959,21 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
return -1;
}
- ret = socketpair(PF_UNIX, ui->ue->type, 0, sks);
- if (ret < 0) {
- pr_perror("Can't create socketpair");
+ if (make_socket(ui, sks, true, false))
return -1;
- }
if (setup_second_end(sks, file_master(&queuer->d)))
return -1;
-
- sk = sks[0];
} else if (ui->ue->type == SOCK_DGRAM && queuer && queuer->ue->ino == FAKE_INO) {
- struct sockaddr_un addr;
- int sks[2];
-
- if (socketpair(PF_UNIX, ui->ue->type, 0, sks) < 0) {
- pr_perror("Can't create socketpair");
+ if (make_socket(ui, sks, true, true))
return -1;
- }
-
- sk = sks[0];
- addr.sun_family = AF_UNSPEC;
-
- /*
- * socketpair() assigns sks[1] as a peer of sks[0]
- * (and vice versa). But in this case (not zero peer)
- * it's impossible for other sockets to connect
- * to sks[0] (see unix_dgram_connect()->unix_may_send()).
- * The below is hack: we use that connect with AF_UNSPEC
- * clears socket's peer.
- * Note, that connect hack flushes receive queue,
- * so restore_unix_queue() must be after it.
- */
- if (connect(sk, (struct sockaddr *)&addr, sizeof(addr.sun_family))) {
- pr_perror("Can't clear socket's peer");
- return -1;
- }
if (setup_second_end(sks, file_master(&queuer->d)))
return -1;
-
- sk = sks[0];
} else {
if (ui->ue->uflags & UNIX_UFLAGS__CALLBACK) {
- sk = run_plugins(RESTORE_UNIX_SK, ui->ue->ino);
- if (sk >= 0)
+ sks[0] = run_plugins(RESTORE_UNIX_SK, ui->ue->ino);
+ if (sks[0] >= 0)
goto out;
}
@@ -1959,19 +1989,20 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
return -1;
}
- sk = socket(PF_UNIX, ui->ue->type, 0);
- if (sk < 0) {
- pr_perror("Can't make unix socket");
+ pr_debug("socketpair instead of plain socket\n");
+ if (make_socket(ui, sks, false, true))
return -1;
- }
+ close(sks[1]);
}
- if (bind_unix_sk(sk, ui))
- return -1;
+ if (!(ui->ue->uflags & UNIX_UFLAGS__BINDMOUNT)) {
+ if (bind_unix_sk(sks[0], ui))
+ return -1;
+ }
if (ui->ue->state == TCP_LISTEN) {
pr_info("\tPutting %d into listen state\n", ui->ue->ino);
- if (listen(sk, ui->ue->backlog) < 0) {
+ if (listen(sks[0], ui->ue->backlog) < 0) {
pr_perror("Can't make usk listen");
return -1;
}
@@ -1987,15 +2018,15 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd)
* 2)Queuer won't be able to connect, if we do
* shutdown, so postpone it.
*/
- *new_fd = sk;
+ *new_fd = sks[0];
return 1;
}
out:
- if (restore_sk_common(sk, ui))
+ if (restore_sk_common(sks[0], ui))
return -1;
- *new_fd = sk;
+ *new_fd = sks[0];
return 0;
}
@@ -2116,7 +2147,8 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue)
ui->name_dir = (void *)ue->name_dir;
ui->flags = 0;
- ui->fdstore_id = -1;
+ ui->fdstore_mnt_id[0] = -1; /* fdstore_id in union */
+ ui->fdstore_mnt_id[1] = -1;
ui->ghost_dir_pos = 0;
ui->peer = NULL;
ui->queuer = NULL;
@@ -2227,8 +2259,24 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i)
list_add_tail(&ui->ghost_node, &unix_ghost_addr);
}
- if (ui->ue->uflags & UNIX_UFLAGS__BINDMOUNT)
+ if (ui->ue->uflags & UNIX_UFLAGS__BINDMOUNT) {
+ /*
+ * Make sure it is supported socket!
+ */
+ if ((ui->ue->uflags & ~UNIX_UFLAGS__BINDMOUNT) ||
+ (ui->ue->type != SOCK_DGRAM) ||
+ (ui->ue->state != TCP_CLOSE)) {
+ pr_err("bindmount: Unsupported socket id %#x "
+ "(expect %x:%s:%s got %x:%s:%s)\n",
+ ui->ue->id, UNIX_UFLAGS__BINDMOUNT,
+ socket_type_name(SOCK_DGRAM),
+ tcp_state_name(TCP_CLOSE),
+ ui->ue->uflags, socket_type_name(ui->ue->type),
+ tcp_state_name(ui->ue->state));
+ return -1;
+ }
list_add_tail(&ui->mnt_list, &unix_mnt_sockets);
+ }
list_add_tail(&ui->list, &unix_sockets);
return file_desc_add(&ui->d, ui->ue->id, &unix_desc_ops);
@@ -2242,6 +2290,116 @@ struct collect_image_info unix_sk_cinfo = {
.flags = COLLECT_SHARED,
};
+int unix_prepare_bindmount(struct mount_info *mi)
+{
+ int prev_cwd_fd = -1, prev_root_fd = -1;
+ int ret = -1, sks[2] = { -1, -1 };
+ struct unix_sk_info *ui;
+ char path[PATH_MAX];
+
+ list_for_each_entry(ui, &unix_mnt_sockets, mnt_list) {
+ if (ui->ue->mnt_id == mi->mnt_id) {
+ pr_info("bindmount: id %#x ino %d type %s state %s queuer %p peer %d (name %.*s dir %s)\n",
+ ui->ue->id, ui->ue->ino, socket_type_name(ui->ue->type),
+ tcp_state_name(ui->ue->state), ui->queuer, ui->ue->peer,
+ (int)ui->ue->name.len, ui->ue->name.data,
+ ui->name_dir ? ui->name_dir : "-");
+ break;
+ }
+ }
+
+ if (&ui->mnt_list == &unix_mnt_sockets)
+ return 0;
+
+ /*
+ * Mark it as bindmount so when need to use we
+ * would fetch it from the fdstore, and point
+ * out that no need to cwd change since we
+ * already opened it in proper place.
+ */
+ ui->flags |= USK_BINDMOUNT | USK_NOCWD;
+
+ if (rst_get_mnt_root(mi->mnt_id, path, sizeof(path)) < 0) {
+ pr_err("bindmount: Can't setup mnt_root for %s\n", mi->ns_mountpoint);
+ return -1;
+ }
+
+ prev_cwd_fd = open(".", O_RDONLY);
+ if (prev_cwd_fd < 0) {
+ pr_perror("bindmount: Can't save current cwd");
+ goto out;
+ }
+
+ prev_root_fd = open("/", O_RDONLY);
+ if (prev_root_fd < 0) {
+ pr_perror("bindmount: Can't save current root");
+ goto out;
+ }
+
+ if (chdir(path)) {
+ pr_perror("bindmount: Can't chdir to %s", path);
+ goto out;
+ } else if (chroot(".")) {
+ pr_perror("bindmount: Can't chroot");
+ goto out;
+ }
+
+ if (ui->name_dir && chdir(ui->name_dir)) {
+ pr_perror("bindmount: Can't chdir to %s", ui->name_dir);
+ goto out;
+ }
+
+ if (set_netns(ui->ue->ns_id))
+ return -1;
+
+ /*
+ * We support only DGRAM sockets for now so it is safe
+ * to preallocate socket pair here and later the
+ * open_unixsk_standalone helper will simply fetch the
+ * peers, closing the ends it doesn't need.
+ */
+ if (socketpair(PF_UNIX, ui->ue->type, 0, sks)) {
+ pr_perror("bindmount: Can't create socketpair id %#x",
+ ui->ue->id);
+ goto out;
+ }
+
+ if (bind_unix_sk(sks[0], ui))
+ goto out;
+
+ ui->fdstore_mnt_id[0] = fdstore_add(sks[0]);
+ ui->fdstore_mnt_id[1] = fdstore_add(sks[1]);
+ if (ui->fdstore_mnt_id[0] < 0 || ui->fdstore_mnt_id[1] < 0) {
+ pr_err("bindmount: Can't add socketpair id %#x into fdstore\n",
+ ui->ue->id);
+ goto out;
+ }
+
+ if (fchdir(prev_root_fd)) {
+ pr_perror("bindmount: Can't revert root directory");
+ goto out;
+ } else if (chroot(".")) {
+ pr_perror("bindmount: Can't revert chroot ");
+ goto out;
+ } else if (fchdir(prev_cwd_fd)) {
+ pr_perror("bindmount: Can't revert working dir");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ close_safe(&prev_cwd_fd);
+ close_safe(&prev_root_fd);
+ close_safe(&sks[0]);
+ close_safe(&sks[1]);
+
+ if (ret == 0)
+ pr_debug("bindmount: Standalone socket moved into fdstore (id %#x ino %d peer %d)\n",
+ ui->ue->id, ui->ue->ino, ui->ue->peer);
+
+ return ret;
+}
+
static void set_peer(struct unix_sk_info *ui, struct unix_sk_info *peer)
{
ui->peer = peer;
--
2.14.4
More information about the CRIU
mailing list