[Devel] [PATCH 1/3] Make sockets proper objhash objects and use checkpoint_obj() on them (v2)
Dan Smith
danms at us.ibm.com
Wed Sep 2 11:22:38 PDT 2009
This changes the checkpoint/restart procedure for sockets a bit. The
socket file header is now checkpointed separately from the socket itself,
which allows us to checkpoint a socket without arriving at it from a
file descriptor. Thus, most sockets will be checkpointed as a result
of processing the file table, calling sock_file_checkpoint(fd), which
in turn calls checkpoint_obj(socket).
However, we may arrive at some sockets while checkpointing other objects,
such as the other end of an AF_UNIX socket with buffers in flight. This
patch just opens that door, which is utilized by the next patch.
Changes in v2:
- If we attempt to checkpoint an orphan socket, create a struct socket
to adopt it for the purposes of the checkpoint
Signed-off-by: Dan Smith <danms at us.ibm.com>
---
checkpoint/objhash.c | 2 +
include/linux/checkpoint_hdr.h | 6 +-
include/net/sock.h | 2 +
net/checkpoint.c | 140 +++++++++++++++++++++++++++++++--------
net/unix/checkpoint.c | 3 +-
5 files changed, 120 insertions(+), 33 deletions(-)
diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c
index a9a10d1..a410346 100644
--- a/checkpoint/objhash.c
+++ b/checkpoint/objhash.c
@@ -381,6 +381,8 @@ static struct ckpt_obj_ops ckpt_obj_ops[] = {
.obj_type = CKPT_OBJ_SOCK,
.ref_drop = obj_sock_drop,
.ref_grab = obj_sock_grab,
+ .checkpoint = checkpoint_sock,
+ .restore = restore_sock,
},
};
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 06bc6e2..b75562c 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -70,6 +70,7 @@ enum {
CKPT_HDR_USER,
CKPT_HDR_GROUPINFO,
CKPT_HDR_TASK_CREDS,
+ CKPT_HDR_SOCKET,
/* 201-299: reserved for arch-dependent */
@@ -368,7 +369,8 @@ struct ckpt_hdr_file_pipe {
} __attribute__((aligned(8)));
/* socket */
-struct ckpt_socket {
+struct ckpt_hdr_socket {
+ struct ckpt_hdr h;
struct { /* struct socket */
__u64 flags;
__u8 state;
@@ -428,7 +430,7 @@ struct ckpt_hdr_socket_unix {
struct ckpt_hdr_file_socket {
struct ckpt_hdr_file common;
- struct ckpt_socket socket;
+ __s32 sock_objref;
} __attribute__((aligned(8)));
struct ckpt_hdr_utsns {
diff --git a/include/net/sock.h b/include/net/sock.h
index 8e3b050..0db1ca3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1644,6 +1644,8 @@ extern __u32 sysctl_rmem_default;
/* Checkpoint/Restart Functions */
struct ckpt_ctx;
struct ckpt_hdr_file;
+extern int checkpoint_sock(struct ckpt_ctx *ctx, void *ptr);
+extern void *restore_sock(struct ckpt_ctx *ctx);
extern int sock_file_checkpoint(struct ckpt_ctx *ctx, struct file *file);
extern struct file *sock_file_restore(struct ckpt_ctx *ctx,
struct ckpt_hdr_file *h);
diff --git a/net/checkpoint.c b/net/checkpoint.c
index 2541e81..42a8853 100644
--- a/net/checkpoint.c
+++ b/net/checkpoint.c
@@ -428,31 +428,26 @@ static int sock_cptrst(struct ckpt_ctx *ctx, struct sock *sk,
return 0;
}
-int sock_file_checkpoint(struct ckpt_ctx *ctx, struct file *file)
+static int __do_sock_checkpoint(struct ckpt_ctx *ctx, struct sock *sk)
{
- struct ckpt_hdr_file_socket *h;
- struct socket *sock = file->private_data;
- struct sock *sk = sock->sk;
int ret;
+ struct socket *sock = sk->sk_socket;
+ struct ckpt_hdr_socket *h;
if (!sock->ops->checkpoint) {
ckpt_write_err(ctx, "socket (proto_ops: %pS)", sock->ops);
return -ENOSYS;
}
- h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FILE);
+ h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_SOCKET);
if (!h)
return -ENOMEM;
- h->common.f_type = CKPT_FILE_SOCKET;
-
/* part I: common to all sockets */
- ret = sock_cptrst(ctx, sk, &h->socket, CKPT_CPT);
- if (ret < 0)
- goto out;
- ret = checkpoint_file_common(ctx, file, &h->common);
+ ret = sock_cptrst(ctx, sk, h, CKPT_CPT);
if (ret < 0)
goto out;
+
ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
if (ret < 0)
goto out;
@@ -463,12 +458,71 @@ int sock_file_checkpoint(struct ckpt_ctx *ctx, struct file *file)
goto out;
/* part III: socket buffers */
- if (sk->sk_state != TCP_LISTEN) {
+ if ((sk->sk_state != TCP_LISTEN) && (!sock_flag(sk, SOCK_DEAD))) {
ret = sock_write_buffers(ctx, &sk->sk_receive_queue);
if (ret)
goto out;
ret = sock_write_buffers(ctx, &sk->sk_write_queue);
}
+
+ out:
+ ckpt_hdr_put(ctx, h);
+
+ return ret;
+}
+
+static int do_sock_checkpoint(struct ckpt_ctx *ctx, struct sock *sk)
+{
+ struct socket *sock;
+ int ret;
+
+ if (sk->sk_socket)
+ return __do_sock_checkpoint(ctx, sk);
+
+ /* Temporarily adopt this orphan socket */
+ ret = sock_create(sk->sk_family, sk->sk_type, 0, &sock);
+ if (ret < 0)
+ return ret;
+ sock_graft(sk, sock);
+
+ ret = __do_sock_checkpoint(ctx, sk);
+
+ sock_orphan(sk);
+ sock->sk = NULL;
+ sock_release(sock);
+
+ return ret;
+}
+
+int checkpoint_sock(struct ckpt_ctx *ctx, void *ptr)
+{
+ return do_sock_checkpoint(ctx, (struct sock *)ptr);
+}
+
+int sock_file_checkpoint(struct ckpt_ctx *ctx, struct file *file)
+{
+ struct ckpt_hdr_file_socket *h;
+ struct socket *sock = file->private_data;
+ struct sock *sk = sock->sk;
+ int ret;
+
+ h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FILE);
+ if (!h)
+ return -ENOMEM;
+
+ h->common.f_type = CKPT_FILE_SOCKET;
+
+ h->sock_objref = checkpoint_obj(ctx, sk, CKPT_OBJ_SOCK);
+ if (h->sock_objref < 0) {
+ ret = h->sock_objref;
+ goto out;
+ }
+
+ ret = checkpoint_file_common(ctx, file, &h->common);
+ if (ret < 0)
+ goto out;
+
+ ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
out:
ckpt_hdr_put(ctx, h);
return ret;
@@ -525,27 +579,31 @@ static struct file *sock_alloc_attach_fd(struct socket *sock)
file = ERR_PTR(err);
}
+ /* Since objhash assumes the initial reference for a socket,
+ * we bump it here for this descriptor, unlike other places in the
+ * socket code which assume the descriptor is the owner.
+ */
+ sock_hold(sock->sk);
+
return file;
}
struct file *sock_file_restore(struct ckpt_ctx *ctx, struct ckpt_hdr_file *ptr)
{
- struct ckpt_hdr_file_socket *hh = (struct ckpt_hdr_file_socket *) ptr;
- struct ckpt_socket *h = &hh->socket;
+ struct ckpt_hdr_socket *h;
struct socket *sock;
- struct file *file;
int ret;
- if (ptr->h.type != CKPT_HDR_FILE ||
- ptr->h.len != sizeof(*hh) || ptr->f_type != CKPT_FILE_SOCKET)
- return ERR_PTR(-EINVAL);
+ h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_SOCKET);
+ if (IS_ERR(h))
+ return ERR_PTR(PTR_ERR(h));
/* silently clear flags, e.g. SOCK_NONBLOCK or SOCK_CLOEXEC */
h->sock.type &= SOCK_TYPE_MASK;
ret = sock_create(h->sock_common.family, h->sock.type, 0, &sock);
if (ret < 0)
- return ERR_PTR(ret);
+ goto err;
if (!sock->ops->restore) {
ckpt_debug("proto_ops lacks checkpoint: %pS\n", sock->ops);
@@ -566,21 +624,45 @@ struct file *sock_file_restore(struct ckpt_ctx *ctx, struct ckpt_hdr_file *ptr)
if (ret < 0)
goto err;
- file = sock_alloc_attach_fd(sock);
- if (IS_ERR(file)) {
- ret = PTR_ERR(file);
- goto err;
- }
+ ckpt_hdr_put(ctx, h);
+
+ return sock->sk;
+ err:
+ ckpt_hdr_put(ctx, h);
+ sock_release(sock);
+
+ return ERR_PTR(ret);
+}
+
+void *restore_sock(struct ckpt_ctx *ctx)
+{
+ return do_sock_restore(ctx);
+}
+
+struct file *sock_file_restore(struct ckpt_ctx *ctx, struct ckpt_hdr_file *ptr)
+{
+ struct ckpt_hdr_file_socket *h = (struct ckpt_hdr_file_socket *)ptr;
+ struct sock *sk;
+ struct file *file;
+ int ret;
+
+ if (ptr->h.type != CKPT_HDR_FILE || ptr->f_type != CKPT_FILE_SOCKET)
+ return ERR_PTR(-EINVAL);
+
+ sk = ckpt_obj_fetch(ctx, h->sock_objref, CKPT_OBJ_SOCK);
+ if (IS_ERR(sk))
+ return ERR_PTR(PTR_ERR(sk));
+
+ file = sock_alloc_attach_fd(sk->sk_socket);
+ if (IS_ERR(file))
+ return file;
ret = restore_file_common(ctx, file, ptr);
if (ret < 0) {
fput(file);
- file = ERR_PTR(ret);
+ return ERR_PTR(ret);
}
- return file;
- err:
- sock_release(sock);
- return ERR_PTR(ret);
+ return file;
}
diff --git a/net/unix/checkpoint.c b/net/unix/checkpoint.c
index 08e664b..f4905db 100644
--- a/net/unix/checkpoint.c
+++ b/net/unix/checkpoint.c
@@ -57,7 +57,6 @@ static int unix_write_cwd(struct ckpt_ctx *ctx,
int unix_checkpoint(struct ckpt_ctx *ctx, struct socket *sock)
{
struct unix_sock *sk = unix_sk(sock->sk);
- struct unix_sock *pr = unix_sk(sk->peer);
struct ckpt_hdr_socket_unix *un;
int new;
int ret = -ENOMEM;
@@ -86,7 +85,7 @@ int unix_checkpoint(struct ckpt_ctx *ctx, struct socket *sock)
goto out;
if (sk->peer)
- un->peer = ckpt_obj_lookup_add(ctx, pr, CKPT_OBJ_SOCK, &new);
+ un->peer = checkpoint_obj(ctx, sk->peer, CKPT_OBJ_SOCK);
else
un->peer = 0;
--
1.6.2.5
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list