[CRIU] [PATCH 1/2] net: Move sockets.c into net/ directory

Cyrill Gorcunov gorcunov at openvz.org
Fri Apr 20 05:20:45 EDT 2012


Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 Makefile              |    4 +-
 cr-check.c            |    2 +-
 cr-dump.c             |    2 +-
 cr-restore.c          |    2 +-
 cr-show.c             |    2 +-
 crtools.c             |    2 +-
 files.c               |    2 +-
 include/net/sockets.h |   27 +
 include/parasite.h    |    2 +-
 include/sockets.h     |   27 -
 net/sockets.c         | 1612 +++++++++++++++++++++++++++++++++++++++++++++++++
 parasite-syscall.c    |    2 +-
 sockets.c             | 1612 -------------------------------------------------
 13 files changed, 1650 insertions(+), 1648 deletions(-)
 create mode 100644 include/net/sockets.h
 delete mode 100644 include/sockets.h
 create mode 100644 net/sockets.c
 delete mode 100644 sockets.c

diff --git a/Makefile b/Makefile
index b16ee4f..7c6d2e3 100644
--- a/Makefile
+++ b/Makefile
@@ -38,7 +38,6 @@ OBJS		+= kcmp-ids.o
 OBJS		+= rbtree.o
 OBJS		+= log.o
 OBJS		+= libnetlink.o
-OBJS		+= sockets.o
 OBJS		+= files.o
 OBJS		+= pipes.o
 OBJS		+= file-ids.o
@@ -48,6 +47,8 @@ OBJS		+= ipc_ns.o
 OBJS		+= mount.o
 OBJS		+= inotify.o
 
+OBJS		+= net/sockets.o
+
 DEPS		:= $(patsubst %.o,%.d,$(OBJS))
 
 include Makefile.syscall
@@ -97,6 +98,7 @@ clean: cleanpie cleansyscall
 	$(E) "  CLEAN"
 	$(Q) $(RM) -f ./*.o
 	$(Q) $(RM) -f ./*.d
+	$(Q) $(RM) -f ./net/*.d
 	$(Q) $(RM) -f ./*.i
 	$(Q) $(RM) -f ./*.img
 	$(Q) $(RM) -f ./*.out
diff --git a/cr-check.c b/cr-check.c
index 82032a5..eb505d4 100644
--- a/cr-check.c
+++ b/cr-check.c
@@ -3,7 +3,7 @@
 #include <sys/types.h>
 #include <fcntl.h>
 #include "proc_parse.h"
-#include "sockets.h"
+#include "net/sockets.h"
 #include "crtools.h"
 #include "log.h"
 #include "util-net.h"
diff --git a/cr-dump.c b/cr-dump.c
index 9e8ecc2..b24ef2c 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -27,7 +27,7 @@
 #include "syscall.h"
 #include "ptrace.h"
 #include "util.h"
-#include "sockets.h"
+#include "net/sockets.h"
 #include "namespaces.h"
 #include "image.h"
 #include "proc_parse.h"
diff --git a/cr-restore.c b/cr-restore.c
index 2bac361..794b9c2 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -30,7 +30,7 @@
 #include "log.h"
 #include "syscall.h"
 #include "restorer.h"
-#include "sockets.h"
+#include "net/sockets.h"
 #include "lock.h"
 #include "files.h"
 #include "proc_parse.h"
diff --git a/cr-show.c b/cr-show.c
index d612523..e39dbfb 100644
--- a/cr-show.c
+++ b/cr-show.c
@@ -18,7 +18,7 @@
 #include "compiler.h"
 #include "crtools.h"
 #include "util.h"
-#include "sockets.h"
+#include "net/sockets.h"
 #include "image.h"
 #include "uts_ns.h"
 #include "ipc_ns.h"
diff --git a/crtools.c b/crtools.c
index 0cddcaf..bf12a0c 100644
--- a/crtools.c
+++ b/crtools.c
@@ -18,7 +18,7 @@
 #include "crtools.h"
 #include "util.h"
 #include "log.h"
-#include "sockets.h"
+#include "net/sockets.h"
 #include "syscall.h"
 #include "uts_ns.h"
 #include "ipc_ns.h"
diff --git a/files.c b/files.c
index 34be21f..87b3ace 100644
--- a/files.c
+++ b/files.c
@@ -19,7 +19,7 @@
 #include "util.h"
 #include "util-net.h"
 #include "lock.h"
-#include "sockets.h"
+#include "net/sockets.h"
 
 static struct fdinfo_list_entry *fdinfo_list;
 static int nr_fdinfo_list;
diff --git a/include/net/sockets.h b/include/net/sockets.h
new file mode 100644
index 0000000..9d78370
--- /dev/null
+++ b/include/net/sockets.h
@@ -0,0 +1,27 @@
+#ifndef CR_SOCKETS_H__
+#define CR_SOCKETS_H__
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdbool.h>
+
+struct cr_fdset;
+struct fd_parms;
+extern int dump_socket(struct fd_parms *p, int lfd,
+		const struct cr_fdset *cr_fdset);
+
+struct fdinfo_list_entry;
+struct file_desc;
+struct fdinfo_entry;
+extern int collect_sockets(void);
+extern int dump_external_sockets(void);
+extern int collect_inet_sockets(void);
+extern int collect_unix_sockets(void);
+extern int resolve_unix_peers(void);
+extern int run_unix_connections(void);
+struct cr_options;
+extern void show_unixsk(int fd, struct cr_options *);
+extern void show_inetsk(int fd, struct cr_options *);
+extern void show_sk_queues(int fd, struct cr_options *);
+
+#endif /* CR_SOCKETS_H__ */
diff --git a/include/parasite.h b/include/parasite.h
index 75eed92..1b5a867 100644
--- a/include/parasite.h
+++ b/include/parasite.h
@@ -14,7 +14,7 @@
 
 #include "compiler.h"
 #include "image.h"
-#include "sockets.h"
+#include "net/sockets.h"
 
 #include "util-net.h"
 
diff --git a/include/sockets.h b/include/sockets.h
deleted file mode 100644
index 9d78370..0000000
--- a/include/sockets.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef CR_SOCKETS_H__
-#define CR_SOCKETS_H__
-
-#include <sys/types.h>
-#include <unistd.h>
-#include <stdbool.h>
-
-struct cr_fdset;
-struct fd_parms;
-extern int dump_socket(struct fd_parms *p, int lfd,
-		const struct cr_fdset *cr_fdset);
-
-struct fdinfo_list_entry;
-struct file_desc;
-struct fdinfo_entry;
-extern int collect_sockets(void);
-extern int dump_external_sockets(void);
-extern int collect_inet_sockets(void);
-extern int collect_unix_sockets(void);
-extern int resolve_unix_peers(void);
-extern int run_unix_connections(void);
-struct cr_options;
-extern void show_unixsk(int fd, struct cr_options *);
-extern void show_inetsk(int fd, struct cr_options *);
-extern void show_sk_queues(int fd, struct cr_options *);
-
-#endif /* CR_SOCKETS_H__ */
diff --git a/net/sockets.c b/net/sockets.c
new file mode 100644
index 0000000..d87b66f
--- /dev/null
+++ b/net/sockets.c
@@ -0,0 +1,1612 @@
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <netinet/tcp.h>
+#include <errno.h>
+#include <unistd.h>
+#include <limits.h>
+#include <arpa/inet.h>
+#include <sys/sendfile.h>
+
+#include "types.h"
+#include "libnetlink.h"
+#include "net/sockets.h"
+#include "unix_diag.h"
+#include "image.h"
+#include "crtools.h"
+#include "util.h"
+#include "inet_diag.h"
+#include "files.h"
+#include "util-net.h"
+
+static char buf[4096];
+
+#ifndef NETLINK_SOCK_DIAG
+#define NETLINK_SOCK_DIAG NETLINK_INET_DIAG
+#endif
+
+#ifndef SOCK_DIAG_BY_FAMILY
+#define SOCK_DIAG_BY_FAMILY 20
+#endif
+
+#ifndef SOCKFS_MAGIC
+#define SOCKFS_MAGIC	0x534F434B
+#endif
+
+struct socket_desc {
+	unsigned int		family;
+	unsigned int		ino;
+	struct socket_desc	*next;
+	int			already_dumped;
+	bool			external;
+};
+
+struct unix_sk_desc {
+	struct socket_desc	sd;
+	unsigned int		type;
+	unsigned int		state;
+	unsigned int		peer_ino;
+	unsigned int		rqlen;
+	unsigned int		wqlen;
+	unsigned int		namelen;
+	char			*name;
+	unsigned int		nr_icons;
+	unsigned int		*icons;
+};
+
+struct unix_sk_listen_icon {
+	unsigned int			peer_ino;
+	struct unix_sk_desc		*sk_desc;
+	struct unix_sk_listen_icon	*next;
+};
+
+#define INET_ADDR_LEN		40
+
+struct inet_sk_desc {
+	struct socket_desc	sd;
+	unsigned int		type;
+	unsigned int		proto;
+	unsigned int		src_port;
+	unsigned int		dst_port;
+	unsigned int		state;
+	unsigned int		rqlen;
+	unsigned int		wqlen;
+	unsigned int		src_addr[4];
+	unsigned int		dst_addr[4];
+};
+
+static int dump_socket_queue(int sock_fd, int sock_id)
+{
+	struct sk_packet_entry *pe;
+	unsigned long size;
+	socklen_t tmp;
+	int ret, orig_peek_off;
+
+	/*
+	 * Save original peek offset. 
+	 */
+	tmp = sizeof(orig_peek_off);
+	ret = getsockopt(sock_fd, SOL_SOCKET, SO_PEEK_OFF, &orig_peek_off, &tmp);
+	if (ret < 0) {
+		pr_perror("getsockopt failed\n");
+		return ret;
+	}
+	/*
+	 * Discover max DGRAM size
+	 */
+	tmp = sizeof(size);
+	ret = getsockopt(sock_fd, SOL_SOCKET, SO_SNDBUF, &size, &tmp);
+	if (ret < 0) {
+		pr_perror("getsockopt failed\n");
+		return ret;
+	}
+
+	/* Note: 32 bytes will be used by kernel for protocol header. */
+	size -= 32;
+	/*
+	 * Try to alloc buffer for max supported DGRAM + our header.
+	 * Note: STREAM queue will be written by chunks of this size.
+	 */
+	pe = xmalloc(size + sizeof(struct sk_packet_entry));
+	if (!pe)
+		return -ENOMEM;
+
+	/*
+	 * Enable peek offset incrementation.
+	 */
+	ret = setsockopt(sock_fd, SOL_SOCKET, SO_PEEK_OFF, &ret, sizeof(int));
+	if (ret < 0) {
+		pr_perror("setsockopt fail\n");
+		goto err_brk;
+	}
+
+	pe->id_for = sock_id;
+
+	while (1) {
+		struct iovec iov = {
+			.iov_base	= pe->data,
+			.iov_len	= size,
+		};
+		struct msghdr msg = {
+			.msg_iov	= &iov,
+			.msg_iovlen	= 1,
+		};
+
+		ret = pe->length = recvmsg(sock_fd, &msg, MSG_DONTWAIT | MSG_PEEK);
+		if (ret < 0) {
+			if (ret == -EAGAIN)
+				break; /* we're done */
+			pr_perror("sys_recvmsg fail: error\n");
+			goto err_set_sock;
+		}
+		if (msg.msg_flags & MSG_TRUNC) {
+			/*
+			 * DGRAM thuncated. This should not happen. But we have
+			 * to check...
+			 */
+			pr_err("sys_recvmsg failed: truncated\n");
+			ret = -E2BIG;
+			goto err_set_sock;
+		}
+		ret = write_img_buf(fdset_fd(glob_fdset, CR_FD_SK_QUEUES),
+				pe, sizeof(pe) + pe->length);
+		if (ret < 0) {
+			ret = -EIO;
+			goto err_set_sock;
+		}
+	}
+	ret = 0;
+
+err_set_sock:
+	/*
+	 * Restore original peek offset. 
+	 */
+	ret = setsockopt(sock_fd, SOL_SOCKET, SO_PEEK_OFF, &orig_peek_off, sizeof(int));
+	if (ret < 0)
+		pr_perror("setsockopt failed on restore\n");
+err_brk:
+	xfree(pe);
+	return ret;
+}
+
+#define SK_HASH_SIZE		32
+#define SK_HASH_LINK(head, key, elem)					\
+	do {								\
+		(elem)->next = (head)[(key) % SK_HASH_SIZE];		\
+		(head)[(key) % SK_HASH_SIZE] = (elem);			\
+	} while (0)
+
+#define __gen_static_lookup_func(ret, name, head, _member, _type, _name)\
+	static ret *name(_type _name) {					\
+		ret *d;							\
+		for (d = head[_name % SK_HASH_SIZE]; d; d = d->next) {	\
+			if (d->_member == _name)			\
+				break;					\
+		}							\
+		return d;						\
+	}
+
+static struct socket_desc *sockets[SK_HASH_SIZE];
+__gen_static_lookup_func(struct socket_desc, lookup_socket, sockets,
+			ino, int, ino);
+
+static struct unix_sk_listen_icon *unix_listen_icons[SK_HASH_SIZE];
+__gen_static_lookup_func(struct unix_sk_listen_icon,
+			 lookup_unix_listen_icons,
+			 unix_listen_icons,
+			 peer_ino, unsigned int, ino);
+
+static int sk_collect_one(int ino, int family, struct socket_desc *d)
+{
+	d->ino		= ino;
+	d->family	= family;
+
+	SK_HASH_LINK(sockets, ino, d);
+
+	return 0;
+}
+
+static void show_one_inet(const char *act, const struct inet_sk_desc *sk)
+{
+	char src_addr[INET_ADDR_LEN] = "<unknown>";
+
+	if (inet_ntop(AF_INET, (void *)sk->src_addr, src_addr,
+		      INET_ADDR_LEN) == NULL) {
+		pr_perror("Failed to translate address");
+	}
+
+	pr_debug("\t%s: ino 0x%x family %d type %d port %d "
+		"state %d src_addr %s\n",
+		act, sk->sd.ino, sk->sd.family, sk->type, sk->src_port,
+		sk->state, src_addr);
+}
+
+static void show_one_inet_img(const char *act, const struct inet_sk_entry *e)
+{
+	char src_addr[INET_ADDR_LEN] = "<unknown>";
+
+	if (inet_ntop(AF_INET, (void *)e->src_addr, src_addr,
+		      INET_ADDR_LEN) == NULL) {
+		pr_perror("Failed to translate address");
+	}
+
+	pr_debug("\t%s: family %d type %d proto %d port %d "
+		"state %d src_addr %s\n",
+		act, e->family, e->type, e->proto, e->src_port,
+		e->state, src_addr);
+}
+
+static void show_one_unix(char *act, const struct unix_sk_desc *sk)
+{
+	pr_debug("\t%s: ino 0x%x type %d state %d name %s\n",
+		act, sk->sd.ino, sk->type, sk->state, sk->name);
+
+	if (sk->nr_icons) {
+		int i;
+
+		for (i = 0; i < sk->nr_icons; i++)
+			pr_debug("\t\ticon: %4d\n", sk->icons[i]);
+	}
+}
+
+static void show_one_unix_img(const char *act, const struct unix_sk_entry *e)
+{
+	pr_info("\t%s: id %u type %d state %d name %d bytes\n",
+		act, e->id, e->type, e->state, e->namelen);
+}
+
+static int can_dump_inet_sk(const struct inet_sk_desc *sk)
+{
+	if (sk->sd.family != AF_INET) {
+		pr_err("Only IPv4 sockets for now\n");
+		return 0;
+	}
+
+	if (sk->type == SOCK_DGRAM)
+		return 1;
+
+	if (sk->type != SOCK_STREAM) {
+		pr_err("Only stream and dgram inet sockets for now\n");
+		return 0;
+	}
+
+	switch (sk->state) {
+	case TCP_LISTEN:
+		if (sk->rqlen != 0) {
+			/*
+			 * Currently the ICONS nla reports the conn
+			 * requests for listen sockets. Need to pick
+			 * those up and fix the connect job respectively
+			 */
+			pr_err("In-flight connection (l)\n");
+			return 0;
+		}
+		break;
+	default:
+		pr_err("Unknown state %d\n", sk->state);
+		return 0;
+	}
+
+	return 1;
+}
+
+static int dump_one_inet(struct socket_desc *_sk, struct fd_parms *p,
+			 const struct cr_fdset *cr_fdset)
+{
+	struct inet_sk_desc *sk = (struct inet_sk_desc *)_sk;
+	struct inet_sk_entry ie;
+	struct fdinfo_entry fe;
+
+	if (!can_dump_inet_sk(sk))
+		goto err;
+
+	fe.fd = p->fd;
+	fe.type = FDINFO_INETSK;
+	fe.id = sk->sd.ino;
+	fe.flags = p->fd_flags;
+
+	if (write_img(fdset_fd(cr_fdset, CR_FD_FDINFO), &fe))
+		goto err;
+
+	if (sk->sd.already_dumped)
+		return 0;
+
+	memset(&ie, 0, sizeof(ie));
+
+	ie.id		= sk->sd.ino;
+	ie.family	= sk->sd.family;
+	ie.type		= sk->type;
+	ie.proto	= sk->proto;
+	ie.state	= sk->state;
+	ie.src_port	= sk->src_port;
+	ie.dst_port	= sk->dst_port;
+	ie.backlog	= sk->wqlen;
+	ie.flags	= p->flags;
+	ie.fown		= p->fown;
+	memcpy(ie.src_addr, sk->src_addr, sizeof(u32) * 4);
+	memcpy(ie.dst_addr, sk->dst_addr, sizeof(u32) * 4);
+
+	if (write_img(fdset_fd(glob_fdset, CR_FD_INETSK), &ie))
+		goto err;
+
+	pr_info("Dumping inet socket at %d\n", p->fd);
+	show_one_inet("Dumping", sk);
+	show_one_inet_img("Dumped", &ie);
+	sk->sd.already_dumped = 1;
+	return 0;
+
+err:
+	return -1;
+}
+
+static int can_dump_unix_sk(const struct unix_sk_desc *sk)
+{
+	if (sk->type != SOCK_STREAM &&
+	    sk->type != SOCK_DGRAM) {
+		pr_err("Only stream/dgram sockets for now\n");
+		return 0;
+	}
+
+	switch (sk->state) {
+	case TCP_LISTEN:
+		break;
+	case TCP_ESTABLISHED:
+		break;
+	case TCP_CLOSE:
+		if (sk->type != SOCK_DGRAM)
+			return 0;
+		break;
+	default:
+		pr_err("Unknown state %d\n", sk->state);
+		return 0;
+	}
+
+	return 1;
+}
+
+static int dump_one_unix(const struct socket_desc *_sk, struct fd_parms *p,
+		int lfd, const struct cr_fdset *cr_fdset)
+{
+	struct unix_sk_desc *sk = (struct unix_sk_desc *)_sk;
+	struct fdinfo_entry fe;
+	struct unix_sk_entry ue;
+
+	if (!can_dump_unix_sk(sk))
+		goto err;
+
+	fe.fd = p->fd;
+	fe.type = FDINFO_UNIXSK;
+	fe.id = sk->sd.ino;
+	fe.flags = p->fd_flags;
+
+	if (write_img(fdset_fd(cr_fdset, CR_FD_FDINFO), &fe))
+		goto err;
+
+	if (sk->sd.already_dumped)
+		return 0;
+
+	ue.id		= sk->sd.ino;
+	ue.type		= sk->type;
+	ue.state	= sk->state;
+	ue.namelen	= sk->namelen;
+	ue.flags	= p->flags;
+	ue.backlog	= sk->wqlen;
+	ue.peer		= sk->peer_ino;
+	ue.fown		= p->fown;
+	ue.uflags	= 0;
+
+	if (ue.peer) {
+		struct unix_sk_desc *peer;
+
+		peer = (struct unix_sk_desc *)lookup_socket(ue.peer);
+		if (!peer) {
+			pr_err("Unix socket 0x%x without peer 0x%x\n",
+					ue.id, ue.peer);
+			goto err;
+		}
+
+		/*
+		 * Peer should have us as peer or have a name by which
+		 * we can access one.
+		 */
+		if (peer->peer_ino != ue.id) {
+			if (!peer->name) {
+				pr_err("Unix socket 0x%x with unreachable peer 0x%x (0x%x/%s)\n",
+				       ue.id, ue.peer, peer->peer_ino, peer->name);
+				goto err;
+			}
+
+			/*
+			 * It can be external socket, so we defer dumping
+			 * until all sockets the program owns are processed.
+			 */
+			peer->sd.external = true;
+		}
+	} else if (ue.state == TCP_ESTABLISHED) {
+		const struct unix_sk_listen_icon *e;
+
+		/*
+		 * If this is in-flight connection we need to figure
+		 * out where to connect it on restore. Thus, tune up peer
+		 * id by searching an existing listening socket.
+		 *
+		 * Note the socket name will be found at restore stage,
+		 * not now, just to reduce size of dump files.
+		 */
+
+		e = lookup_unix_listen_icons(ue.id);
+		if (!e) {
+			pr_err("Dangling in-flight connection %d\n", ue.id);
+			goto err;
+		}
+
+		/* e->sk_desc is _never_ NULL */
+		if (e->sk_desc->state != TCP_LISTEN) {
+			pr_err("In-flight connection on "
+				"non-listening socket %d\n", ue.id);
+			goto err;
+		}
+
+		ue.peer = e->sk_desc->sd.ino;
+
+		pr_debug("\t\tFixed inflight socket 0x%x peer 0x%x)\n",
+				ue.id, ue.peer);
+	}
+
+	if (write_img(fdset_fd(glob_fdset, CR_FD_UNIXSK), &ue))
+		goto err;
+	if (write_img_buf(fdset_fd(glob_fdset, CR_FD_UNIXSK), sk->name, ue.namelen))
+		goto err;
+
+	if (sk->rqlen != 0 && !(sk->type == SOCK_STREAM &&
+				sk->state == TCP_LISTEN))
+		if (dump_socket_queue(lfd, ue.id))
+			goto err;
+
+	pr_info("Dumping unix socket at %d\n", p->fd);
+	show_one_unix("Dumping", sk);
+	show_one_unix_img("Dumped", &ue);
+
+	sk->sd.already_dumped = 1;
+	return 0;
+
+err:
+	return -1;
+}
+
+int dump_socket(struct fd_parms *p, int lfd, const struct cr_fdset *cr_fdset)
+{
+	struct socket_desc *sk;
+
+	sk = lookup_socket(p->stat.st_ino);
+	if (!sk) {
+		pr_err("Uncollected socket %ld\n", p->stat.st_ino);
+		return -1;
+	}
+
+	switch (sk->family) {
+	case AF_UNIX:
+		return dump_one_unix(sk, p, lfd, cr_fdset);
+	case AF_INET:
+		return dump_one_inet(sk, p, cr_fdset);
+	default:
+		pr_err("BUG! Unknown socket collected\n");
+		break;
+	}
+
+	return -1;
+}
+
+static int inet_collect_one(struct nlmsghdr *h, int type, int proto)
+{
+	struct inet_sk_desc *d;
+	struct inet_diag_msg *m = NLMSG_DATA(h);
+	struct rtattr *tb[INET_DIAG_MAX+1];
+
+	parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr *)(m + 1),
+		     h->nlmsg_len - NLMSG_LENGTH(sizeof(*m)));
+
+	d = xzalloc(sizeof(*d));
+	if (!d)
+		return -1;
+
+	d->type = type;
+	d->proto = proto;
+	d->src_port = ntohs(m->id.idiag_sport);
+	d->dst_port = ntohs(m->id.idiag_dport);
+	d->state = m->idiag_state;
+	d->rqlen = m->idiag_rqueue;
+	d->wqlen = m->idiag_wqueue;
+	memcpy(d->src_addr, m->id.idiag_src, sizeof(u32) * 4);
+	memcpy(d->dst_addr, m->id.idiag_dst, sizeof(u32) * 4);
+
+	return sk_collect_one(m->idiag_inode, AF_INET, &d->sd);
+}
+
+static int inet_tcp_receive_one(struct nlmsghdr *h)
+{
+	return inet_collect_one(h, SOCK_STREAM, IPPROTO_TCP);
+}
+
+static int inet_udp_receive_one(struct nlmsghdr *h)
+{
+	return inet_collect_one(h, SOCK_DGRAM, IPPROTO_UDP);
+}
+
+static int inet_udplite_receive_one(struct nlmsghdr *h)
+{
+	return inet_collect_one(h, SOCK_DGRAM, IPPROTO_UDPLITE);
+}
+
+static int unix_collect_one(const struct unix_diag_msg *m,
+		struct rtattr **tb)
+{
+	struct unix_sk_desc *d, **h;
+
+	d = xzalloc(sizeof(*d));
+	if (!d)
+		return -1;
+
+	d->type	= m->udiag_type;
+	d->state= m->udiag_state;
+
+	if (tb[UNIX_DIAG_PEER])
+		d->peer_ino = *(int *)RTA_DATA(tb[UNIX_DIAG_PEER]);
+
+	if (tb[UNIX_DIAG_NAME]) {
+		int len		= RTA_PAYLOAD(tb[UNIX_DIAG_NAME]);
+		char *name	= xmalloc(len + 1);
+
+		if (!name)
+			goto err;
+
+		memcpy(name, RTA_DATA(tb[UNIX_DIAG_NAME]), len);
+		name[len] = '\0';
+
+		if (name[0] != '\0') {
+			struct unix_diag_vfs *uv;
+			struct stat st;
+
+			if (name[0] != '/') {
+				pr_warn("Relative bind path '%s' "
+					"unsupported\n", name);
+				xfree(name);
+				xfree(d);
+				return 0;
+			}
+
+			if (!tb[UNIX_DIAG_VFS]) {
+				pr_err("Bound socket w/o inode %d\n",
+						m->udiag_ino);
+				goto err;
+			}
+
+			uv = RTA_DATA(tb[UNIX_DIAG_VFS]);
+			if (stat(name, &st)) {
+				pr_perror("Can't stat socket %d(%s)",
+						m->udiag_ino, name);
+				goto err;
+			}
+
+			if ((st.st_ino != uv->udiag_vfs_ino) ||
+			    (st.st_dev != kdev_to_odev(uv->udiag_vfs_dev))) {
+				pr_info("unix: Dropping path for "
+						"unlinked bound "
+						"sk 0x%x.0x%x real 0x%x.0x%x\n",
+						(int)st.st_dev,
+						(int)st.st_ino,
+						(int)uv->udiag_vfs_dev,
+						(int)uv->udiag_vfs_ino);
+				/*
+				 * When a socket is bound to unlinked file, we
+				 * just drop his name, since noone will access
+				 * it via one.
+				 */
+				xfree(name);
+				len = 0;
+				name = NULL;
+			}
+		}
+
+		d->namelen = len;
+		d->name = name;
+	}
+
+	if (tb[UNIX_DIAG_ICONS]) {
+		int len = RTA_PAYLOAD(tb[UNIX_DIAG_ICONS]);
+		int i;
+
+		d->icons = xmalloc(len);
+		if (!d->icons)
+			goto err;
+
+		memcpy(d->icons, RTA_DATA(tb[UNIX_DIAG_ICONS]), len);
+		d->nr_icons = len / sizeof(u32);
+
+		/*
+		 * Remember these sockets, we will need them
+		 * to fix up in-flight sockets peers.
+		 */
+		for (i = 0; i < d->nr_icons; i++) {
+			struct unix_sk_listen_icon *e;
+			int n;
+
+			e = xzalloc(sizeof(*e));
+			if (!e)
+				goto err;
+
+			SK_HASH_LINK(unix_listen_icons, d->icons[i], e);
+
+			pr_debug("\t\tCollected icon %d\n", d->icons[i]);
+
+			e->peer_ino	= d->icons[i];
+			e->sk_desc	= d;
+		}
+
+
+	}
+
+	if (tb[UNIX_DIAG_RQLEN]) {
+		struct unix_diag_rqlen *rq;
+
+		rq = (struct unix_diag_rqlen *)RTA_DATA(tb[UNIX_DIAG_RQLEN]);
+		d->rqlen = rq->udiag_rqueue;
+		d->wqlen = rq->udiag_wqueue;
+	}
+
+	sk_collect_one(m->udiag_ino, AF_UNIX, &d->sd);
+	show_one_unix("Collected", d);
+
+	return 0;
+
+err:
+	xfree(d->icons);
+	xfree(d->name);
+	xfree(d);
+	return -1;
+}
+
+static int unix_receive_one(struct nlmsghdr *h)
+{
+	struct unix_diag_msg *m = NLMSG_DATA(h);
+	struct rtattr *tb[UNIX_DIAG_MAX+1];
+
+	parse_rtattr(tb, UNIX_DIAG_MAX, (struct rtattr *)(m + 1),
+		     h->nlmsg_len - NLMSG_LENGTH(sizeof(*m)));
+
+	return unix_collect_one(m, tb);
+}
+
+static int collect_sockets_nl(int nl, void *req, int size,
+			      int (*receive_callback)(struct nlmsghdr *h))
+{
+	struct msghdr msg;
+	struct sockaddr_nl nladdr;
+	struct iovec iov;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_name	= &nladdr;
+	msg.msg_namelen	= sizeof(nladdr);
+	msg.msg_iov	= &iov;
+	msg.msg_iovlen	= 1;
+
+	memset(&nladdr, 0, sizeof(nladdr));
+	nladdr.nl_family= AF_NETLINK;
+
+	iov.iov_base	= req;
+	iov.iov_len	= size;
+
+	if (sendmsg(nl, &msg, 0) < 0) {
+		pr_perror("Can't send request message");
+		goto err;
+	}
+
+	iov.iov_base	= buf;
+	iov.iov_len	= sizeof(buf);
+
+	while (1) {
+		int err;
+
+		memset(&msg, 0, sizeof(msg));
+		msg.msg_name	= &nladdr;
+		msg.msg_namelen	= sizeof(nladdr);
+		msg.msg_iov	= &iov;
+		msg.msg_iovlen	= 1;
+
+		err = recvmsg(nl, &msg, 0);
+		if (err < 0) {
+			if (errno == EINTR)
+				continue;
+			else {
+				pr_perror("Error receiving nl report");
+				goto err;
+			}
+		}
+		if (err == 0)
+			break;
+
+		err = nlmsg_receive(buf, err, receive_callback);
+		if (err < 0)
+			goto err;
+		if (err == 0)
+			break;
+	}
+
+	return 0;
+
+err:
+	return -1;
+}
+
+int dump_external_sockets(void)
+{
+	struct socket_desc *head, *sd;
+	int i, ret = -1;
+
+	if (!opts.ext_unix_sk)
+		return 0;
+
+	pr_debug("Dumping external sockets\n");
+
+	for (i = 0; i < SK_HASH_SIZE; i++) {
+		head = sockets[i];
+		if (!head)
+			continue;
+
+		for (sd = head; sd; sd = sd->next) {
+			struct unix_sk_entry e = { };
+			struct unix_sk_desc *sk;
+
+			if (sd->already_dumped		||
+			    sd->external == false	||
+			    sd->family != AF_UNIX)
+				continue;
+
+			sk = container_of(sd, struct unix_sk_desc, sd);
+
+			if (sk->type != SOCK_DGRAM)
+				continue;
+
+			e.id		= sd->ino;
+			e.type		= SOCK_DGRAM;
+			e.state		= TCP_LISTEN;
+			e.namelen	= sk->namelen;
+			e.uflags	= USK_EXTERN;
+			e.peer		= 0;
+
+			show_one_unix("Dumping extern", sk);
+
+			if (write_img(fdset_fd(glob_fdset, CR_FD_UNIXSK), &e))
+				goto err;
+			if (write_img_buf(fdset_fd(glob_fdset, CR_FD_UNIXSK),
+					  sk->name, e.namelen))
+				goto err;
+
+			show_one_unix_img("Dumped extern", &e);
+
+			sd->already_dumped = 1;
+		}
+	}
+
+	return 0;
+err:
+	return -1;
+}
+
+int collect_sockets(void)
+{
+	int err = 0, tmp;
+	int nl;
+	int supp_type = 0;
+	struct {
+		struct nlmsghdr hdr;
+		union {
+			struct unix_diag_req	u;
+			struct inet_diag_req_v2	i;
+		} r;
+	} req;
+
+	nl = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+	if (nl < 0) {
+		pr_perror("Can't create sock diag socket");
+		return -1;
+	}
+
+	memset(&req, 0, sizeof(req));
+	req.hdr.nlmsg_len	= sizeof(req);
+	req.hdr.nlmsg_type	= SOCK_DIAG_BY_FAMILY;
+	req.hdr.nlmsg_flags	= NLM_F_DUMP | NLM_F_REQUEST;
+	req.hdr.nlmsg_seq	= CR_NLMSG_SEQ;
+
+	/* Collect UNIX sockets */
+	req.r.u.sdiag_family	= AF_UNIX;
+	req.r.u.udiag_states	= -1; /* All */
+	req.r.u.udiag_show	= UDIAG_SHOW_NAME | UDIAG_SHOW_VFS |
+				  UDIAG_SHOW_PEER | UDIAG_SHOW_ICONS |
+				  UDIAG_SHOW_RQLEN;
+	tmp = collect_sockets_nl(nl, &req, sizeof(req), unix_receive_one);
+	if (tmp)
+		err = tmp;
+
+	/* Collect IPv4 TCP sockets */
+	req.r.i.sdiag_family	= AF_INET;
+	req.r.i.sdiag_protocol	= IPPROTO_TCP;
+	req.r.i.idiag_ext	= 0;
+	/* Only listening sockets supported yet */
+	req.r.i.idiag_states	= 1 << TCP_LISTEN;
+	tmp = collect_sockets_nl(nl, &req, sizeof(req), inet_tcp_receive_one);
+	if (tmp)
+		err = tmp;
+
+	/* Collect IPv4 UDP sockets */
+	req.r.i.sdiag_family	= AF_INET;
+	req.r.i.sdiag_protocol	= IPPROTO_UDP;
+	req.r.i.idiag_ext	= 0;
+	req.r.i.idiag_states	= -1; /* All */
+	tmp = collect_sockets_nl(nl, &req, sizeof(req), inet_udp_receive_one);
+	if (tmp)
+		err = tmp;
+
+	/* Collect IPv4 UDP-lite sockets */
+	req.r.i.sdiag_family	= AF_INET;
+	req.r.i.sdiag_protocol	= IPPROTO_UDPLITE;
+	req.r.i.idiag_ext	= 0;
+	req.r.i.idiag_states	= -1; /* All */
+	tmp = collect_sockets_nl(nl, &req, sizeof(req), inet_udplite_receive_one);
+	if (tmp)
+		err = tmp;
+
+out:
+	close(nl);
+	return err;
+}
+
+struct unix_sk_info {
+	struct unix_sk_entry ue;
+	struct list_head list;
+	char *name;
+	unsigned flags;
+	struct unix_sk_info *peer;
+	struct file_desc d;
+};
+
+#define USK_PAIR_MASTER		0x1
+#define USK_PAIR_SLAVE		0x2
+
+static LIST_HEAD(unix_sockets);
+
+static struct unix_sk_info *find_unix_sk(int id)
+{
+	struct file_desc *d;
+
+	d = find_file_desc_raw(FDINFO_UNIXSK, id);
+	if (d)
+		return container_of(d, struct unix_sk_info, d);
+	return NULL;
+}
+
+struct sk_packet {
+	struct list_head list;
+	struct sk_packet_entry entry;
+	off_t img_off;
+};
+
+static LIST_HEAD(packets_list);
+
+static int read_sockets_queues(void)
+{
+	struct sk_packet *pkt;
+	int ret, fd;
+
+	pr_info("Trying to read socket queues image\n");
+
+	fd = open_image_ro(CR_FD_SK_QUEUES);
+	if (fd < 0)
+		return -1;
+
+	while (1) {
+		struct sk_packet_entry tmp;
+
+		pkt = xmalloc(sizeof(*pkt));
+		if (!pkt) {
+			pr_err("Failed to allocate packet header\n");
+			return -ENOMEM;
+		}
+		ret = read_img_eof(fd, &pkt->entry);
+		if (ret <= 0)
+			break;
+
+		pkt->img_off = lseek(fd, 0, SEEK_CUR);
+		/*
+		 * NOTE: packet must be added to the tail. Otherwise sequence
+		 * will be broken.
+		 */
+		list_add_tail(&pkt->list, &packets_list);
+		lseek(fd, pkt->entry.length, SEEK_CUR);
+	}
+	close(fd);
+	xfree(pkt);
+
+	return ret;
+}
+
+static int restore_socket_queue(int fd, unsigned int peer_id)
+{
+	struct sk_packet *pkt, *tmp;
+	int ret, img_fd;
+
+	pr_info("Trying to restore recv queue for %u\n", peer_id);
+
+	img_fd = open_image_ro(CR_FD_SK_QUEUES);
+	if (img_fd < 0)
+		return -1;
+
+	list_for_each_entry_safe(pkt, tmp, &packets_list, list) {
+		struct sk_packet_entry *entry = &pkt->entry;
+
+		if (entry->id_for != peer_id)
+			continue;
+
+		pr_info("\tRestoring %d-bytes skb for %u\n",
+				entry->length, peer_id);
+
+		ret = sendfile(fd, img_fd, &pkt->img_off, entry->length);
+		if (ret < 0) {
+			pr_perror("Failed to sendfile packet");
+			return -1;
+		}
+		if (ret != entry->length) {
+			pr_err("Restored skb trimmed to %d/%d\n",
+					ret, entry->length);
+			return -1;
+		}
+		list_del(&pkt->list);
+		xfree(pkt);
+	}
+
+	close(img_fd);
+	return 0;
+}
+
+struct inet_sk_info {
+	struct inet_sk_entry ie;
+	struct file_desc d;
+};
+
+static int open_inet_sk(struct file_desc *d);
+
+static struct file_desc_ops inet_desc_ops = {
+	.open = open_inet_sk,
+};
+
+int collect_inet_sockets(void)
+{
+	struct inet_sk_info *ii = NULL;
+	int fd, ret = -1;
+
+	fd = open_image_ro(CR_FD_INETSK);
+	if (fd < 0)
+		return -1;
+
+	while (1) {
+		ii = xmalloc(sizeof(*ii));
+		ret = -1;
+		if (!ii)
+			break;
+
+		ret = read_img_eof(fd, &ii->ie);
+		if (ret <= 0)
+			break;
+
+		file_desc_add(&ii->d, FDINFO_INETSK, ii->ie.id,
+				&inet_desc_ops);
+	}
+
+	if (ii)
+		xfree(ii);
+
+	close(fd);
+	return 0;
+}
+
+static int open_inet_sk(struct file_desc *d)
+{
+	int sk;
+	struct sockaddr_in addr;
+	struct inet_sk_info *ii;
+
+	ii = container_of(d, struct inet_sk_info, d);
+
+	show_one_inet_img("Restore", &ii->ie);
+
+	if (ii->ie.family != AF_INET) {
+		pr_err("Unsupported socket family: %d\n", ii->ie.family);
+		return -1;
+	}
+
+	if ((ii->ie.type != SOCK_STREAM) && (ii->ie.type != SOCK_DGRAM)) {
+		pr_err("Unsupported socket type: %d\n", ii->ie.type);
+		return -1;
+	}
+
+	sk = socket(ii->ie.family, ii->ie.type, ii->ie.proto);
+	if (sk < 0) {
+		pr_perror("Can't create unix socket");
+		return -1;
+	}
+
+	if (restore_fown(sk, &ii->ie.fown))
+		goto err;
+
+	/*
+	 * Listen sockets are easiest ones -- simply
+	 * bind() and listen(), and that's all.
+	 */
+	memset(&addr, 0, sizeof(addr));
+	addr.sin_family = ii->ie.family;
+	addr.sin_port = htons(ii->ie.src_port);
+	memcpy(&addr.sin_addr.s_addr, ii->ie.src_addr, sizeof(unsigned int) * 4);
+
+	if (bind(sk, (struct sockaddr *) &addr, sizeof(addr)) == -1) {
+		pr_perror("Can't bind to a socket");
+		goto err;
+	}
+
+	if (ii->ie.state == TCP_LISTEN) {
+		if (ii->ie.proto != IPPROTO_TCP) {
+			pr_err("Wrong socket in listen state %d\n", ii->ie.proto);
+			goto err;
+		}
+
+		if (listen(sk, ii->ie.backlog) == -1) {
+			pr_perror("Can't listen on a socket");
+			goto err;
+		}
+	}
+
+	if (ii->ie.state == TCP_ESTABLISHED) {
+		if (ii->ie.proto == IPPROTO_TCP) {
+			pr_err("Connected TCP socket in image\n");
+			goto err;
+		}
+
+		memset(&addr, 0, sizeof(addr));
+		addr.sin_family = ii->ie.family;
+		addr.sin_port = htons(ii->ie.dst_port);
+		memcpy(&addr.sin_addr.s_addr, ii->ie.dst_addr, sizeof(ii->ie.dst_addr));
+
+		if (connect(sk, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
+			pr_perror("Can't connect UDP socket back");
+			goto err;
+		}
+	}
+
+	if (set_fd_flags(sk, ii->ie.flags))
+		return -1;
+
+	return sk;
+
+err:
+	close(sk);
+	return -1;
+}
+
+static inline char *unknown(u32 val)
+{
+	static char unk[12];
+	snprintf(unk, sizeof(unk), "x%d", val);
+	return unk;
+}
+
+static inline char *skfamily2s(u32 f)
+{
+	if (f == AF_INET)
+		return " inet";
+	else
+		return unknown(f);
+}
+
+static inline char *sktype2s(u32 t)
+{
+	if (t == SOCK_STREAM)
+		return "stream";
+	else if (t == SOCK_DGRAM)
+		return " dgram";
+	else
+		return unknown(t);
+}
+
+static inline char *skproto2s(u32 p)
+{
+	if (p == IPPROTO_UDP)
+		return "udp";
+	else if (p == IPPROTO_UDPLITE)
+		return "udpl";
+	else if (p == IPPROTO_TCP)
+		return "tcp";
+	else
+		return unknown(p);
+}
+
+static inline char *skstate2s(u32 state)
+{
+	if (state == TCP_ESTABLISHED)
+		return " estab";
+	else if (state == TCP_CLOSE)
+		return "closed";
+	else if (state == TCP_LISTEN)
+		return "listen";
+	else
+		return unknown(state);
+}
+
+void show_inetsk(int fd, struct cr_options *o)
+{
+	struct inet_sk_entry ie;
+	int ret = 0;
+
+	pr_img_head(CR_FD_INETSK);
+
+	while (1) {
+		char src_addr[INET_ADDR_LEN] = "<unknown>";
+		char dst_addr[INET_ADDR_LEN] = "<unknown>";
+
+		ret = read_img_eof(fd, &ie);
+		if (ret <= 0)
+			goto out;
+
+		if (inet_ntop(AF_INET, (void *)ie.src_addr, src_addr,
+			      INET_ADDR_LEN) == NULL) {
+			pr_perror("Failed to translate src address");
+		}
+
+		if (ie.state == TCP_ESTABLISHED) {
+			if (inet_ntop(AF_INET, (void *)ie.dst_addr, dst_addr,
+				      INET_ADDR_LEN) == NULL) {
+				pr_perror("Failed to translate dst address");
+			}
+		}
+
+		pr_msg("id 0x%x family %s type %s proto %s state %s %s:%d <-> %s:%d flags 0x%2x\n",
+			ie.id, skfamily2s(ie.family), sktype2s(ie.type), skproto2s(ie.proto),
+			skstate2s(ie.state), src_addr, ie.src_port, dst_addr, ie.dst_port, ie.flags);
+		pr_msg("\t"), show_fown_cont(&ie.fown), pr_msg("\n");
+	}
+
+out:
+	if (ret)
+		pr_info("\n");
+	pr_img_tail(CR_FD_INETSK);
+}
+
+void show_unixsk(int fd, struct cr_options *o)
+{
+	struct unix_sk_entry ue;
+	int ret = 0;
+
+	pr_img_head(CR_FD_UNIXSK);
+
+	while (1) {
+		ret = read_img_eof(fd, &ue);
+		if (ret <= 0)
+			goto out;
+
+		pr_msg("id 0x%8x type %s state %s namelen %4d backlog %4d peer 0x%8x flags 0x%2x uflags 0x%2x",
+			ue.id, sktype2s(ue.type), skstate2s(ue.state),
+			ue.namelen, ue.backlog, ue.peer, ue.flags, ue.uflags);
+
+		if (ue.namelen) {
+			BUG_ON(ue.namelen > sizeof(buf));
+			ret = read_img_buf(fd, buf, ue.namelen);
+			if (ret < 0) {
+				pr_info("\n");
+				goto out;
+			}
+			if (!buf[0])
+				buf[0] = '@';
+			pr_msg(" --> %s\n", buf);
+		} else
+			pr_msg("\n");
+		pr_msg("\t"), show_fown_cont(&ue.fown), pr_msg("\n");
+	}
+out:
+	pr_img_tail(CR_FD_UNIXSK);
+}
+
+void show_sk_queues(int fd, struct cr_options *o)
+{
+	struct sk_packet_entry pe;
+	int ret;
+
+	pr_img_head(CR_FD_SK_QUEUES);
+	while (1) {
+		ret = read_img_eof(fd, &pe);
+		if (ret <= 0)
+			break;
+
+		pr_info("pkt for %u length %u bytes\n",
+				pe.id_for, pe.length);
+
+		ret = read_img_buf(fd, (unsigned char *)buf, pe.length);
+		if (ret < 0)
+			break;
+
+		print_data(0, (unsigned char *)buf, pe.length);
+	}
+	pr_img_tail(CR_FD_SK_QUEUES);
+}
+
+struct unix_conn_job {
+	struct unix_sk_info	*sk;
+	struct unix_conn_job	*next;
+};
+
+static struct unix_conn_job *conn_jobs;
+
+static int schedule_conn_job(struct unix_sk_info *ui)
+{
+	struct unix_conn_job *cj;
+
+	cj = xmalloc(sizeof(*cj));
+	if (!cj)
+		return -1;
+
+	cj->sk = ui;
+	cj->next = conn_jobs;
+	conn_jobs = cj;
+
+	return 0;
+}
+
+int run_unix_connections(void)
+{
+	struct unix_conn_job *cj;
+
+	pr_info("Running delayed unix connections\n");
+
+	cj = conn_jobs;
+	while (cj) {
+		int attempts = 8;
+		struct unix_sk_info *ui = cj->sk;
+		struct unix_sk_info *peer = ui->peer;
+		struct fdinfo_list_entry *fle;
+		struct sockaddr_un addr;
+
+		pr_info("\tConnect 0x%x to 0x%x\n", ui->ue.id, peer->ue.id);
+
+		fle = file_master(&ui->d);
+
+		memset(&addr, 0, sizeof(addr));
+		addr.sun_family = AF_UNIX;
+		memcpy(&addr.sun_path, peer->name, peer->ue.namelen);
+try_again:
+		if (connect(fle->fe.fd, (struct sockaddr *)&addr,
+					sizeof(addr.sun_family) +
+					peer->ue.namelen) < 0) {
+			if (attempts) {
+				usleep(1000);
+				attempts--;
+				goto try_again; /* FIXME use futex waiters */
+			}
+
+			pr_perror("Can't connect 0x%x socket", ui->ue.id);
+			return -1;
+		}
+
+		if (restore_socket_queue(fle->fe.fd, peer->ue.id))
+			return -1;
+
+		if (set_fd_flags(fle->fe.fd, ui->ue.flags))
+			return -1;
+
+		cj = cj->next;
+	}
+
+	return 0;
+}
+
+static int bind_unix_sk(int sk, struct unix_sk_info *ui)
+{
+	struct sockaddr_un addr;
+
+	if ((ui->ue.type == SOCK_STREAM) && (ui->ue.state != TCP_LISTEN))
+		/*
+		 * FIXME this can be done, but for doing this properly we
+		 * need to bind socket to its name, then rename one to
+		 * some temporary unique one and after all the sockets are
+		 * restored we should walk those temp names and rename
+		 * some of them back to real ones.
+		 */
+		goto done;
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sun_family = AF_UNIX;
+	memcpy(&addr.sun_path, ui->name, ui->ue.namelen);
+
+	if (bind(sk, (struct sockaddr *)&addr,
+				sizeof(addr.sun_family) + ui->ue.namelen)) {
+		pr_perror("Can't bind socket");
+		return -1;
+	}
+done:
+	return 0;
+}
+
+static int unixsk_should_open_transport(struct fdinfo_entry *fe,
+				struct file_desc *d)
+{
+	struct unix_sk_info *ui;
+
+	ui = container_of(d, struct unix_sk_info, d);
+	return ui->flags & USK_PAIR_SLAVE;
+}
+
+static int open_unixsk_pair_master(struct unix_sk_info *ui)
+{
+	int sk[2], tsk;
+	struct unix_sk_info *peer = ui->peer;
+	struct fdinfo_list_entry *fle;
+
+	pr_info("Opening pair master (id 0x%x peer 0x%x)\n",
+			ui->ue.id, ui->ue.peer);
+
+	if (socketpair(PF_UNIX, ui->ue.type, 0, sk) < 0) {
+		pr_perror("Can't make socketpair");
+		return -1;
+	}
+
+	if (restore_socket_queue(sk[0], peer->ue.id))
+		return -1;
+	if (restore_socket_queue(sk[1], ui->ue.id))
+		return -1;
+
+	if (set_fd_flags(sk[0], ui->ue.flags))
+		return -1;
+	if (set_fd_flags(sk[1], peer->ue.flags))
+		return -1;
+
+	if (restore_fown(sk[0], &ui->ue.fown))
+		return -1;
+	if (restore_fown(sk[1], &peer->ue.fown))
+		return -1;
+
+	if (bind_unix_sk(sk[0], ui))
+		return -1;
+
+	tsk = socket(PF_UNIX, SOCK_DGRAM, 0);
+	if (tsk < 0) {
+		pr_perror("Can't make transport socket");
+		return -1;
+	}
+
+	fle = file_master(&peer->d);
+	if (send_fd_to_peer(sk[1], fle, tsk)) {
+		pr_err("Can't send pair slave\n");
+		return -1;
+	}
+
+	close(tsk);
+	close(sk[1]);
+
+	return sk[0];
+}
+
+static int open_unixsk_pair_slave(struct unix_sk_info *ui)
+{
+	struct fdinfo_list_entry *fle;
+	int sk;
+
+	fle = file_master(&ui->d);
+
+	pr_info("Opening pair slave (id 0x%x peer 0x%x) on %d\n",
+			ui->ue.id, ui->ue.peer, fle->fe.fd);
+
+	sk = recv_fd(fle->fe.fd);
+	if (sk < 0) {
+		pr_err("Can't recv pair slave");
+		return -1;
+	}
+	close(fle->fe.fd);
+
+	if (bind_unix_sk(sk, ui))
+		return -1;
+
+	return sk;
+}
+
+static int open_unixsk_standalone(struct unix_sk_info *ui)
+{
+	int sk;
+
+	pr_info("Opening standalone socket (id 0x%x peer 0x%x)\n",
+			ui->ue.id, ui->ue.peer);
+
+	sk = socket(PF_UNIX, ui->ue.type, 0);
+	if (sk < 0) {
+		pr_perror("Can't make unix socket");
+		return -1;
+	}
+
+	if (restore_fown(sk, &ui->ue.fown))
+		return -1;
+
+	if (bind_unix_sk(sk, ui))
+		return -1;
+
+	if (ui->ue.state == TCP_LISTEN) {
+		pr_info("\tPutting 0x%x into listen state\n", ui->ue.id);
+		if (listen(sk, ui->ue.backlog) < 0) {
+			pr_perror("Can't make usk listen");
+			return -1;
+		}
+	} else if (ui->peer) {
+		pr_info("\tWill connect 0x%x to 0x%x later\n", ui->ue.id, ui->ue.peer);
+		if (schedule_conn_job(ui))
+			return -1;
+	}
+
+	return sk;
+}
+
+static int open_unix_sk(struct file_desc *d)
+{
+	struct unix_sk_info *ui;
+
+	ui = container_of(d, struct unix_sk_info, d);
+	if (ui->flags & USK_PAIR_MASTER)
+		return open_unixsk_pair_master(ui);
+	else if (ui->flags & USK_PAIR_SLAVE)
+		return open_unixsk_pair_slave(ui);
+	else
+		return open_unixsk_standalone(ui);
+}
+
+static struct file_desc_ops unix_desc_ops = {
+	.open = open_unix_sk,
+	.want_transport = unixsk_should_open_transport,
+};
+
+int collect_unix_sockets(void)
+{
+	int fd, ret;
+
+	pr_info("Reading unix sockets in\n");
+
+	fd = open_image_ro(CR_FD_UNIXSK);
+	if (fd < 0) {
+		if (errno == ENOENT)
+			return 0;
+		else
+			return -1;
+	}
+
+	while (1) {
+		struct unix_sk_info *ui;
+
+		ui = xmalloc(sizeof(*ui));
+		ret = -1;
+		if (ui == NULL)
+			break;
+
+		ret = read_img_eof(fd, &ui->ue);
+		if (ret <= 0) {
+			xfree(ui);
+			break;
+		}
+
+		if (ui->ue.namelen) {
+			ret = -1;
+
+			if (!ui->ue.namelen || ui->ue.namelen >= UNIX_PATH_MAX) {
+				pr_err("Bad unix name len %d\n", ui->ue.namelen);
+				break;
+			}
+
+			ui->name = xmalloc(ui->ue.namelen);
+			if (ui->name == NULL)
+				break;
+
+			ret = read_img_buf(fd, ui->name, ui->ue.namelen);
+			if (ret < 0)
+				break;
+
+			/*
+			 * Make FS clean from sockets we're about to
+			 * restore. See for how we bind them for details
+			 */
+			if (ui->name[0] != '\0' &&
+			    !(ui->ue.uflags & USK_EXTERN))
+				unlink(ui->name);
+		} else
+			ui->name = NULL;
+
+		ui->peer = NULL;
+		ui->flags = 0;
+		pr_info(" `- Got %u peer %u\n", ui->ue.id, ui->ue.peer);
+		file_desc_add(&ui->d, FDINFO_UNIXSK, ui->ue.id,
+				&unix_desc_ops);
+		list_add_tail(&ui->list, &unix_sockets);
+	}
+
+	close(fd);
+
+	return read_sockets_queues();
+}
+
+int resolve_unix_peers(void)
+{
+	struct unix_sk_info *ui, *peer;
+	struct fdinfo_list_entry *fle, *fle_peer;
+
+	list_for_each_entry(ui, &unix_sockets, list) {
+		if (ui->peer)
+			continue;
+		if (!ui->ue.peer)
+			continue;
+
+		peer = find_unix_sk(ui->ue.peer);
+
+		/*
+		 * Connect to external sockets requires
+		 * special option to be passed.
+		 */
+		if (peer &&
+		    (peer->ue.uflags & USK_EXTERN) &&
+		    !(opts.ext_unix_sk))
+			peer = NULL;
+
+		if (!peer) {
+			pr_err("FATAL: Peer 0x%x unresolved for 0x%x\n",
+					ui->ue.peer, ui->ue.id);
+			return -1;
+		}
+
+		ui->peer = peer;
+		if (ui == peer)
+			/* socket connected to self %) */
+			continue;
+		if (peer->ue.peer != ui->ue.id)
+			continue;
+
+		/* socketpair or interconnected sockets */
+		peer->peer = ui;
+
+		/*
+		 * Select who will restore the pair. Check is identical to
+		 * the one in pipes.c and makes sure tasks wait for each other
+		 * in pids sorting order (ascending).
+		 */
+
+		fle = file_master(&ui->d);
+		fle_peer = file_master(&peer->d);
+
+		if ((fle->pid < fle_peer->pid) ||
+				(fle->pid == fle_peer->pid &&
+				 fle->fe.fd < fle_peer->fe.fd)) {
+			ui->flags |= USK_PAIR_MASTER;
+			peer->flags |= USK_PAIR_SLAVE;
+		} else {
+			peer->flags |= USK_PAIR_MASTER;
+			ui->flags |= USK_PAIR_SLAVE;
+		}
+	}
+
+	pr_info("Unix sockets:\n");
+	list_for_each_entry(ui, &unix_sockets, list) {
+		struct fdinfo_list_entry *fle;
+
+		pr_info("\t0x%x -> 0x%x (0x%x) flags 0x%x\n", ui->ue.id, ui->ue.peer,
+				ui->peer ? ui->peer->ue.id : 0, ui->flags);
+		list_for_each_entry(fle, &ui->d.fd_info_head, desc_list)
+			pr_info("\t\tfd %d in pid %d\n",
+					fle->fe.fd, fle->pid);
+
+	}
+
+	return 0;
+}
diff --git a/parasite-syscall.c b/parasite-syscall.c
index e81fd68..712d653 100644
--- a/parasite-syscall.c
+++ b/parasite-syscall.c
@@ -22,7 +22,7 @@
 #include "util.h"
 #include "util-net.h"
 #include "log.h"
-#include "sockets.h"
+#include "net/sockets.h"
 #include "processor-flags.h"
 #include "parasite-syscall.h"
 #include "parasite-blob.h"
diff --git a/sockets.c b/sockets.c
deleted file mode 100644
index 06ab9ae..0000000
--- a/sockets.c
+++ /dev/null
@@ -1,1612 +0,0 @@
-#include <sys/socket.h>
-#include <linux/netlink.h>
-#include <linux/types.h>
-#include <linux/net.h>
-#include <sys/types.h>
-#include <sys/vfs.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <netinet/tcp.h>
-#include <errno.h>
-#include <unistd.h>
-#include <limits.h>
-#include <arpa/inet.h>
-#include <sys/sendfile.h>
-
-#include "types.h"
-#include "libnetlink.h"
-#include "sockets.h"
-#include "unix_diag.h"
-#include "image.h"
-#include "crtools.h"
-#include "util.h"
-#include "inet_diag.h"
-#include "files.h"
-#include "util-net.h"
-
-static char buf[4096];
-
-#ifndef NETLINK_SOCK_DIAG
-#define NETLINK_SOCK_DIAG NETLINK_INET_DIAG
-#endif
-
-#ifndef SOCK_DIAG_BY_FAMILY
-#define SOCK_DIAG_BY_FAMILY 20
-#endif
-
-#ifndef SOCKFS_MAGIC
-#define SOCKFS_MAGIC	0x534F434B
-#endif
-
-struct socket_desc {
-	unsigned int		family;
-	unsigned int		ino;
-	struct socket_desc	*next;
-	int			already_dumped;
-	bool			external;
-};
-
-struct unix_sk_desc {
-	struct socket_desc	sd;
-	unsigned int		type;
-	unsigned int		state;
-	unsigned int		peer_ino;
-	unsigned int		rqlen;
-	unsigned int		wqlen;
-	unsigned int		namelen;
-	char			*name;
-	unsigned int		nr_icons;
-	unsigned int		*icons;
-};
-
-struct unix_sk_listen_icon {
-	unsigned int			peer_ino;
-	struct unix_sk_desc		*sk_desc;
-	struct unix_sk_listen_icon	*next;
-};
-
-#define INET_ADDR_LEN		40
-
-struct inet_sk_desc {
-	struct socket_desc	sd;
-	unsigned int		type;
-	unsigned int		proto;
-	unsigned int		src_port;
-	unsigned int		dst_port;
-	unsigned int		state;
-	unsigned int		rqlen;
-	unsigned int		wqlen;
-	unsigned int		src_addr[4];
-	unsigned int		dst_addr[4];
-};
-
-static int dump_socket_queue(int sock_fd, int sock_id)
-{
-	struct sk_packet_entry *pe;
-	unsigned long size;
-	socklen_t tmp;
-	int ret, orig_peek_off;
-
-	/*
-	 * Save original peek offset. 
-	 */
-	tmp = sizeof(orig_peek_off);
-	ret = getsockopt(sock_fd, SOL_SOCKET, SO_PEEK_OFF, &orig_peek_off, &tmp);
-	if (ret < 0) {
-		pr_perror("getsockopt failed\n");
-		return ret;
-	}
-	/*
-	 * Discover max DGRAM size
-	 */
-	tmp = sizeof(size);
-	ret = getsockopt(sock_fd, SOL_SOCKET, SO_SNDBUF, &size, &tmp);
-	if (ret < 0) {
-		pr_perror("getsockopt failed\n");
-		return ret;
-	}
-
-	/* Note: 32 bytes will be used by kernel for protocol header. */
-	size -= 32;
-	/*
-	 * Try to alloc buffer for max supported DGRAM + our header.
-	 * Note: STREAM queue will be written by chunks of this size.
-	 */
-	pe = xmalloc(size + sizeof(struct sk_packet_entry));
-	if (!pe)
-		return -ENOMEM;
-
-	/*
-	 * Enable peek offset incrementation.
-	 */
-	ret = setsockopt(sock_fd, SOL_SOCKET, SO_PEEK_OFF, &ret, sizeof(int));
-	if (ret < 0) {
-		pr_perror("setsockopt fail\n");
-		goto err_brk;
-	}
-
-	pe->id_for = sock_id;
-
-	while (1) {
-		struct iovec iov = {
-			.iov_base	= pe->data,
-			.iov_len	= size,
-		};
-		struct msghdr msg = {
-			.msg_iov	= &iov,
-			.msg_iovlen	= 1,
-		};
-
-		ret = pe->length = recvmsg(sock_fd, &msg, MSG_DONTWAIT | MSG_PEEK);
-		if (ret < 0) {
-			if (ret == -EAGAIN)
-				break; /* we're done */
-			pr_perror("sys_recvmsg fail: error\n");
-			goto err_set_sock;
-		}
-		if (msg.msg_flags & MSG_TRUNC) {
-			/*
-			 * DGRAM thuncated. This should not happen. But we have
-			 * to check...
-			 */
-			pr_err("sys_recvmsg failed: truncated\n");
-			ret = -E2BIG;
-			goto err_set_sock;
-		}
-		ret = write_img_buf(fdset_fd(glob_fdset, CR_FD_SK_QUEUES),
-				pe, sizeof(pe) + pe->length);
-		if (ret < 0) {
-			ret = -EIO;
-			goto err_set_sock;
-		}
-	}
-	ret = 0;
-
-err_set_sock:
-	/*
-	 * Restore original peek offset. 
-	 */
-	ret = setsockopt(sock_fd, SOL_SOCKET, SO_PEEK_OFF, &orig_peek_off, sizeof(int));
-	if (ret < 0)
-		pr_perror("setsockopt failed on restore\n");
-err_brk:
-	xfree(pe);
-	return ret;
-}
-
-#define SK_HASH_SIZE		32
-#define SK_HASH_LINK(head, key, elem)					\
-	do {								\
-		(elem)->next = (head)[(key) % SK_HASH_SIZE];		\
-		(head)[(key) % SK_HASH_SIZE] = (elem);			\
-	} while (0)
-
-#define __gen_static_lookup_func(ret, name, head, _member, _type, _name)\
-	static ret *name(_type _name) {					\
-		ret *d;							\
-		for (d = head[_name % SK_HASH_SIZE]; d; d = d->next) {	\
-			if (d->_member == _name)			\
-				break;					\
-		}							\
-		return d;						\
-	}
-
-static struct socket_desc *sockets[SK_HASH_SIZE];
-__gen_static_lookup_func(struct socket_desc, lookup_socket, sockets,
-			ino, int, ino);
-
-static struct unix_sk_listen_icon *unix_listen_icons[SK_HASH_SIZE];
-__gen_static_lookup_func(struct unix_sk_listen_icon,
-			 lookup_unix_listen_icons,
-			 unix_listen_icons,
-			 peer_ino, unsigned int, ino);
-
-static int sk_collect_one(int ino, int family, struct socket_desc *d)
-{
-	d->ino		= ino;
-	d->family	= family;
-
-	SK_HASH_LINK(sockets, ino, d);
-
-	return 0;
-}
-
-static void show_one_inet(const char *act, const struct inet_sk_desc *sk)
-{
-	char src_addr[INET_ADDR_LEN] = "<unknown>";
-
-	if (inet_ntop(AF_INET, (void *)sk->src_addr, src_addr,
-		      INET_ADDR_LEN) == NULL) {
-		pr_perror("Failed to translate address");
-	}
-
-	pr_debug("\t%s: ino 0x%x family %d type %d port %d "
-		"state %d src_addr %s\n",
-		act, sk->sd.ino, sk->sd.family, sk->type, sk->src_port,
-		sk->state, src_addr);
-}
-
-static void show_one_inet_img(const char *act, const struct inet_sk_entry *e)
-{
-	char src_addr[INET_ADDR_LEN] = "<unknown>";
-
-	if (inet_ntop(AF_INET, (void *)e->src_addr, src_addr,
-		      INET_ADDR_LEN) == NULL) {
-		pr_perror("Failed to translate address");
-	}
-
-	pr_debug("\t%s: family %d type %d proto %d port %d "
-		"state %d src_addr %s\n",
-		act, e->family, e->type, e->proto, e->src_port,
-		e->state, src_addr);
-}
-
-static void show_one_unix(char *act, const struct unix_sk_desc *sk)
-{
-	pr_debug("\t%s: ino 0x%x type %d state %d name %s\n",
-		act, sk->sd.ino, sk->type, sk->state, sk->name);
-
-	if (sk->nr_icons) {
-		int i;
-
-		for (i = 0; i < sk->nr_icons; i++)
-			pr_debug("\t\ticon: %4d\n", sk->icons[i]);
-	}
-}
-
-static void show_one_unix_img(const char *act, const struct unix_sk_entry *e)
-{
-	pr_info("\t%s: id %u type %d state %d name %d bytes\n",
-		act, e->id, e->type, e->state, e->namelen);
-}
-
-static int can_dump_inet_sk(const struct inet_sk_desc *sk)
-{
-	if (sk->sd.family != AF_INET) {
-		pr_err("Only IPv4 sockets for now\n");
-		return 0;
-	}
-
-	if (sk->type == SOCK_DGRAM)
-		return 1;
-
-	if (sk->type != SOCK_STREAM) {
-		pr_err("Only stream and dgram inet sockets for now\n");
-		return 0;
-	}
-
-	switch (sk->state) {
-	case TCP_LISTEN:
-		if (sk->rqlen != 0) {
-			/*
-			 * Currently the ICONS nla reports the conn
-			 * requests for listen sockets. Need to pick
-			 * those up and fix the connect job respectively
-			 */
-			pr_err("In-flight connection (l)\n");
-			return 0;
-		}
-		break;
-	default:
-		pr_err("Unknown state %d\n", sk->state);
-		return 0;
-	}
-
-	return 1;
-}
-
-static int dump_one_inet(struct socket_desc *_sk, struct fd_parms *p,
-			 const struct cr_fdset *cr_fdset)
-{
-	struct inet_sk_desc *sk = (struct inet_sk_desc *)_sk;
-	struct inet_sk_entry ie;
-	struct fdinfo_entry fe;
-
-	if (!can_dump_inet_sk(sk))
-		goto err;
-
-	fe.fd = p->fd;
-	fe.type = FDINFO_INETSK;
-	fe.id = sk->sd.ino;
-	fe.flags = p->fd_flags;
-
-	if (write_img(fdset_fd(cr_fdset, CR_FD_FDINFO), &fe))
-		goto err;
-
-	if (sk->sd.already_dumped)
-		return 0;
-
-	memset(&ie, 0, sizeof(ie));
-
-	ie.id		= sk->sd.ino;
-	ie.family	= sk->sd.family;
-	ie.type		= sk->type;
-	ie.proto	= sk->proto;
-	ie.state	= sk->state;
-	ie.src_port	= sk->src_port;
-	ie.dst_port	= sk->dst_port;
-	ie.backlog	= sk->wqlen;
-	ie.flags	= p->flags;
-	ie.fown		= p->fown;
-	memcpy(ie.src_addr, sk->src_addr, sizeof(u32) * 4);
-	memcpy(ie.dst_addr, sk->dst_addr, sizeof(u32) * 4);
-
-	if (write_img(fdset_fd(glob_fdset, CR_FD_INETSK), &ie))
-		goto err;
-
-	pr_info("Dumping inet socket at %d\n", p->fd);
-	show_one_inet("Dumping", sk);
-	show_one_inet_img("Dumped", &ie);
-	sk->sd.already_dumped = 1;
-	return 0;
-
-err:
-	return -1;
-}
-
-static int can_dump_unix_sk(const struct unix_sk_desc *sk)
-{
-	if (sk->type != SOCK_STREAM &&
-	    sk->type != SOCK_DGRAM) {
-		pr_err("Only stream/dgram sockets for now\n");
-		return 0;
-	}
-
-	switch (sk->state) {
-	case TCP_LISTEN:
-		break;
-	case TCP_ESTABLISHED:
-		break;
-	case TCP_CLOSE:
-		if (sk->type != SOCK_DGRAM)
-			return 0;
-		break;
-	default:
-		pr_err("Unknown state %d\n", sk->state);
-		return 0;
-	}
-
-	return 1;
-}
-
-static int dump_one_unix(const struct socket_desc *_sk, struct fd_parms *p,
-		int lfd, const struct cr_fdset *cr_fdset)
-{
-	struct unix_sk_desc *sk = (struct unix_sk_desc *)_sk;
-	struct fdinfo_entry fe;
-	struct unix_sk_entry ue;
-
-	if (!can_dump_unix_sk(sk))
-		goto err;
-
-	fe.fd = p->fd;
-	fe.type = FDINFO_UNIXSK;
-	fe.id = sk->sd.ino;
-	fe.flags = p->fd_flags;
-
-	if (write_img(fdset_fd(cr_fdset, CR_FD_FDINFO), &fe))
-		goto err;
-
-	if (sk->sd.already_dumped)
-		return 0;
-
-	ue.id		= sk->sd.ino;
-	ue.type		= sk->type;
-	ue.state	= sk->state;
-	ue.namelen	= sk->namelen;
-	ue.flags	= p->flags;
-	ue.backlog	= sk->wqlen;
-	ue.peer		= sk->peer_ino;
-	ue.fown		= p->fown;
-	ue.uflags	= 0;
-
-	if (ue.peer) {
-		struct unix_sk_desc *peer;
-
-		peer = (struct unix_sk_desc *)lookup_socket(ue.peer);
-		if (!peer) {
-			pr_err("Unix socket 0x%x without peer 0x%x\n",
-					ue.id, ue.peer);
-			goto err;
-		}
-
-		/*
-		 * Peer should have us as peer or have a name by which
-		 * we can access one.
-		 */
-		if (peer->peer_ino != ue.id) {
-			if (!peer->name) {
-				pr_err("Unix socket 0x%x with unreachable peer 0x%x (0x%x/%s)\n",
-				       ue.id, ue.peer, peer->peer_ino, peer->name);
-				goto err;
-			}
-
-			/*
-			 * It can be external socket, so we defer dumping
-			 * until all sockets the program owns are processed.
-			 */
-			peer->sd.external = true;
-		}
-	} else if (ue.state == TCP_ESTABLISHED) {
-		const struct unix_sk_listen_icon *e;
-
-		/*
-		 * If this is in-flight connection we need to figure
-		 * out where to connect it on restore. Thus, tune up peer
-		 * id by searching an existing listening socket.
-		 *
-		 * Note the socket name will be found at restore stage,
-		 * not now, just to reduce size of dump files.
-		 */
-
-		e = lookup_unix_listen_icons(ue.id);
-		if (!e) {
-			pr_err("Dangling in-flight connection %d\n", ue.id);
-			goto err;
-		}
-
-		/* e->sk_desc is _never_ NULL */
-		if (e->sk_desc->state != TCP_LISTEN) {
-			pr_err("In-flight connection on "
-				"non-listening socket %d\n", ue.id);
-			goto err;
-		}
-
-		ue.peer = e->sk_desc->sd.ino;
-
-		pr_debug("\t\tFixed inflight socket 0x%x peer 0x%x)\n",
-				ue.id, ue.peer);
-	}
-
-	if (write_img(fdset_fd(glob_fdset, CR_FD_UNIXSK), &ue))
-		goto err;
-	if (write_img_buf(fdset_fd(glob_fdset, CR_FD_UNIXSK), sk->name, ue.namelen))
-		goto err;
-
-	if (sk->rqlen != 0 && !(sk->type == SOCK_STREAM &&
-				sk->state == TCP_LISTEN))
-		if (dump_socket_queue(lfd, ue.id))
-			goto err;
-
-	pr_info("Dumping unix socket at %d\n", p->fd);
-	show_one_unix("Dumping", sk);
-	show_one_unix_img("Dumped", &ue);
-
-	sk->sd.already_dumped = 1;
-	return 0;
-
-err:
-	return -1;
-}
-
-int dump_socket(struct fd_parms *p, int lfd, const struct cr_fdset *cr_fdset)
-{
-	struct socket_desc *sk;
-
-	sk = lookup_socket(p->stat.st_ino);
-	if (!sk) {
-		pr_err("Uncollected socket %ld\n", p->stat.st_ino);
-		return -1;
-	}
-
-	switch (sk->family) {
-	case AF_UNIX:
-		return dump_one_unix(sk, p, lfd, cr_fdset);
-	case AF_INET:
-		return dump_one_inet(sk, p, cr_fdset);
-	default:
-		pr_err("BUG! Unknown socket collected\n");
-		break;
-	}
-
-	return -1;
-}
-
-static int inet_collect_one(struct nlmsghdr *h, int type, int proto)
-{
-	struct inet_sk_desc *d;
-	struct inet_diag_msg *m = NLMSG_DATA(h);
-	struct rtattr *tb[INET_DIAG_MAX+1];
-
-	parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr *)(m + 1),
-		     h->nlmsg_len - NLMSG_LENGTH(sizeof(*m)));
-
-	d = xzalloc(sizeof(*d));
-	if (!d)
-		return -1;
-
-	d->type = type;
-	d->proto = proto;
-	d->src_port = ntohs(m->id.idiag_sport);
-	d->dst_port = ntohs(m->id.idiag_dport);
-	d->state = m->idiag_state;
-	d->rqlen = m->idiag_rqueue;
-	d->wqlen = m->idiag_wqueue;
-	memcpy(d->src_addr, m->id.idiag_src, sizeof(u32) * 4);
-	memcpy(d->dst_addr, m->id.idiag_dst, sizeof(u32) * 4);
-
-	return sk_collect_one(m->idiag_inode, AF_INET, &d->sd);
-}
-
-static int inet_tcp_receive_one(struct nlmsghdr *h)
-{
-	return inet_collect_one(h, SOCK_STREAM, IPPROTO_TCP);
-}
-
-static int inet_udp_receive_one(struct nlmsghdr *h)
-{
-	return inet_collect_one(h, SOCK_DGRAM, IPPROTO_UDP);
-}
-
-static int inet_udplite_receive_one(struct nlmsghdr *h)
-{
-	return inet_collect_one(h, SOCK_DGRAM, IPPROTO_UDPLITE);
-}
-
-static int unix_collect_one(const struct unix_diag_msg *m,
-		struct rtattr **tb)
-{
-	struct unix_sk_desc *d, **h;
-
-	d = xzalloc(sizeof(*d));
-	if (!d)
-		return -1;
-
-	d->type	= m->udiag_type;
-	d->state= m->udiag_state;
-
-	if (tb[UNIX_DIAG_PEER])
-		d->peer_ino = *(int *)RTA_DATA(tb[UNIX_DIAG_PEER]);
-
-	if (tb[UNIX_DIAG_NAME]) {
-		int len		= RTA_PAYLOAD(tb[UNIX_DIAG_NAME]);
-		char *name	= xmalloc(len + 1);
-
-		if (!name)
-			goto err;
-
-		memcpy(name, RTA_DATA(tb[UNIX_DIAG_NAME]), len);
-		name[len] = '\0';
-
-		if (name[0] != '\0') {
-			struct unix_diag_vfs *uv;
-			struct stat st;
-
-			if (name[0] != '/') {
-				pr_warn("Relative bind path '%s' "
-					"unsupported\n", name);
-				xfree(name);
-				xfree(d);
-				return 0;
-			}
-
-			if (!tb[UNIX_DIAG_VFS]) {
-				pr_err("Bound socket w/o inode %d\n",
-						m->udiag_ino);
-				goto err;
-			}
-
-			uv = RTA_DATA(tb[UNIX_DIAG_VFS]);
-			if (stat(name, &st)) {
-				pr_perror("Can't stat socket %d(%s)",
-						m->udiag_ino, name);
-				goto err;
-			}
-
-			if ((st.st_ino != uv->udiag_vfs_ino) ||
-			    (st.st_dev != kdev_to_odev(uv->udiag_vfs_dev))) {
-				pr_info("unix: Dropping path for "
-						"unlinked bound "
-						"sk 0x%x.0x%x real 0x%x.0x%x\n",
-						(int)st.st_dev,
-						(int)st.st_ino,
-						(int)uv->udiag_vfs_dev,
-						(int)uv->udiag_vfs_ino);
-				/*
-				 * When a socket is bound to unlinked file, we
-				 * just drop his name, since noone will access
-				 * it via one.
-				 */
-				xfree(name);
-				len = 0;
-				name = NULL;
-			}
-		}
-
-		d->namelen = len;
-		d->name = name;
-	}
-
-	if (tb[UNIX_DIAG_ICONS]) {
-		int len = RTA_PAYLOAD(tb[UNIX_DIAG_ICONS]);
-		int i;
-
-		d->icons = xmalloc(len);
-		if (!d->icons)
-			goto err;
-
-		memcpy(d->icons, RTA_DATA(tb[UNIX_DIAG_ICONS]), len);
-		d->nr_icons = len / sizeof(u32);
-
-		/*
-		 * Remember these sockets, we will need them
-		 * to fix up in-flight sockets peers.
-		 */
-		for (i = 0; i < d->nr_icons; i++) {
-			struct unix_sk_listen_icon *e;
-			int n;
-
-			e = xzalloc(sizeof(*e));
-			if (!e)
-				goto err;
-
-			SK_HASH_LINK(unix_listen_icons, d->icons[i], e);
-
-			pr_debug("\t\tCollected icon %d\n", d->icons[i]);
-
-			e->peer_ino	= d->icons[i];
-			e->sk_desc	= d;
-		}
-
-
-	}
-
-	if (tb[UNIX_DIAG_RQLEN]) {
-		struct unix_diag_rqlen *rq;
-
-		rq = (struct unix_diag_rqlen *)RTA_DATA(tb[UNIX_DIAG_RQLEN]);
-		d->rqlen = rq->udiag_rqueue;
-		d->wqlen = rq->udiag_wqueue;
-	}
-
-	sk_collect_one(m->udiag_ino, AF_UNIX, &d->sd);
-	show_one_unix("Collected", d);
-
-	return 0;
-
-err:
-	xfree(d->icons);
-	xfree(d->name);
-	xfree(d);
-	return -1;
-}
-
-static int unix_receive_one(struct nlmsghdr *h)
-{
-	struct unix_diag_msg *m = NLMSG_DATA(h);
-	struct rtattr *tb[UNIX_DIAG_MAX+1];
-
-	parse_rtattr(tb, UNIX_DIAG_MAX, (struct rtattr *)(m + 1),
-		     h->nlmsg_len - NLMSG_LENGTH(sizeof(*m)));
-
-	return unix_collect_one(m, tb);
-}
-
-static int collect_sockets_nl(int nl, void *req, int size,
-			      int (*receive_callback)(struct nlmsghdr *h))
-{
-	struct msghdr msg;
-	struct sockaddr_nl nladdr;
-	struct iovec iov;
-
-	memset(&msg, 0, sizeof(msg));
-	msg.msg_name	= &nladdr;
-	msg.msg_namelen	= sizeof(nladdr);
-	msg.msg_iov	= &iov;
-	msg.msg_iovlen	= 1;
-
-	memset(&nladdr, 0, sizeof(nladdr));
-	nladdr.nl_family= AF_NETLINK;
-
-	iov.iov_base	= req;
-	iov.iov_len	= size;
-
-	if (sendmsg(nl, &msg, 0) < 0) {
-		pr_perror("Can't send request message");
-		goto err;
-	}
-
-	iov.iov_base	= buf;
-	iov.iov_len	= sizeof(buf);
-
-	while (1) {
-		int err;
-
-		memset(&msg, 0, sizeof(msg));
-		msg.msg_name	= &nladdr;
-		msg.msg_namelen	= sizeof(nladdr);
-		msg.msg_iov	= &iov;
-		msg.msg_iovlen	= 1;
-
-		err = recvmsg(nl, &msg, 0);
-		if (err < 0) {
-			if (errno == EINTR)
-				continue;
-			else {
-				pr_perror("Error receiving nl report");
-				goto err;
-			}
-		}
-		if (err == 0)
-			break;
-
-		err = nlmsg_receive(buf, err, receive_callback);
-		if (err < 0)
-			goto err;
-		if (err == 0)
-			break;
-	}
-
-	return 0;
-
-err:
-	return -1;
-}
-
-int dump_external_sockets(void)
-{
-	struct socket_desc *head, *sd;
-	int i, ret = -1;
-
-	if (!opts.ext_unix_sk)
-		return 0;
-
-	pr_debug("Dumping external sockets\n");
-
-	for (i = 0; i < SK_HASH_SIZE; i++) {
-		head = sockets[i];
-		if (!head)
-			continue;
-
-		for (sd = head; sd; sd = sd->next) {
-			struct unix_sk_entry e = { };
-			struct unix_sk_desc *sk;
-
-			if (sd->already_dumped		||
-			    sd->external == false	||
-			    sd->family != AF_UNIX)
-				continue;
-
-			sk = container_of(sd, struct unix_sk_desc, sd);
-
-			if (sk->type != SOCK_DGRAM)
-				continue;
-
-			e.id		= sd->ino;
-			e.type		= SOCK_DGRAM;
-			e.state		= TCP_LISTEN;
-			e.namelen	= sk->namelen;
-			e.uflags	= USK_EXTERN;
-			e.peer		= 0;
-
-			show_one_unix("Dumping extern", sk);
-
-			if (write_img(fdset_fd(glob_fdset, CR_FD_UNIXSK), &e))
-				goto err;
-			if (write_img_buf(fdset_fd(glob_fdset, CR_FD_UNIXSK),
-					  sk->name, e.namelen))
-				goto err;
-
-			show_one_unix_img("Dumped extern", &e);
-
-			sd->already_dumped = 1;
-		}
-	}
-
-	return 0;
-err:
-	return -1;
-}
-
-int collect_sockets(void)
-{
-	int err = 0, tmp;
-	int nl;
-	int supp_type = 0;
-	struct {
-		struct nlmsghdr hdr;
-		union {
-			struct unix_diag_req	u;
-			struct inet_diag_req_v2	i;
-		} r;
-	} req;
-
-	nl = socket(PF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
-	if (nl < 0) {
-		pr_perror("Can't create sock diag socket");
-		return -1;
-	}
-
-	memset(&req, 0, sizeof(req));
-	req.hdr.nlmsg_len	= sizeof(req);
-	req.hdr.nlmsg_type	= SOCK_DIAG_BY_FAMILY;
-	req.hdr.nlmsg_flags	= NLM_F_DUMP | NLM_F_REQUEST;
-	req.hdr.nlmsg_seq	= CR_NLMSG_SEQ;
-
-	/* Collect UNIX sockets */
-	req.r.u.sdiag_family	= AF_UNIX;
-	req.r.u.udiag_states	= -1; /* All */
-	req.r.u.udiag_show	= UDIAG_SHOW_NAME | UDIAG_SHOW_VFS |
-				  UDIAG_SHOW_PEER | UDIAG_SHOW_ICONS |
-				  UDIAG_SHOW_RQLEN;
-	tmp = collect_sockets_nl(nl, &req, sizeof(req), unix_receive_one);
-	if (tmp)
-		err = tmp;
-
-	/* Collect IPv4 TCP sockets */
-	req.r.i.sdiag_family	= AF_INET;
-	req.r.i.sdiag_protocol	= IPPROTO_TCP;
-	req.r.i.idiag_ext	= 0;
-	/* Only listening sockets supported yet */
-	req.r.i.idiag_states	= 1 << TCP_LISTEN;
-	tmp = collect_sockets_nl(nl, &req, sizeof(req), inet_tcp_receive_one);
-	if (tmp)
-		err = tmp;
-
-	/* Collect IPv4 UDP sockets */
-	req.r.i.sdiag_family	= AF_INET;
-	req.r.i.sdiag_protocol	= IPPROTO_UDP;
-	req.r.i.idiag_ext	= 0;
-	req.r.i.idiag_states	= -1; /* All */
-	tmp = collect_sockets_nl(nl, &req, sizeof(req), inet_udp_receive_one);
-	if (tmp)
-		err = tmp;
-
-	/* Collect IPv4 UDP-lite sockets */
-	req.r.i.sdiag_family	= AF_INET;
-	req.r.i.sdiag_protocol	= IPPROTO_UDPLITE;
-	req.r.i.idiag_ext	= 0;
-	req.r.i.idiag_states	= -1; /* All */
-	tmp = collect_sockets_nl(nl, &req, sizeof(req), inet_udplite_receive_one);
-	if (tmp)
-		err = tmp;
-
-out:
-	close(nl);
-	return err;
-}
-
-struct unix_sk_info {
-	struct unix_sk_entry ue;
-	struct list_head list;
-	char *name;
-	unsigned flags;
-	struct unix_sk_info *peer;
-	struct file_desc d;
-};
-
-#define USK_PAIR_MASTER		0x1
-#define USK_PAIR_SLAVE		0x2
-
-static LIST_HEAD(unix_sockets);
-
-static struct unix_sk_info *find_unix_sk(int id)
-{
-	struct file_desc *d;
-
-	d = find_file_desc_raw(FDINFO_UNIXSK, id);
-	if (d)
-		return container_of(d, struct unix_sk_info, d);
-	return NULL;
-}
-
-struct sk_packet {
-	struct list_head list;
-	struct sk_packet_entry entry;
-	off_t img_off;
-};
-
-static LIST_HEAD(packets_list);
-
-static int read_sockets_queues(void)
-{
-	struct sk_packet *pkt;
-	int ret, fd;
-
-	pr_info("Trying to read socket queues image\n");
-
-	fd = open_image_ro(CR_FD_SK_QUEUES);
-	if (fd < 0)
-		return -1;
-
-	while (1) {
-		struct sk_packet_entry tmp;
-
-		pkt = xmalloc(sizeof(*pkt));
-		if (!pkt) {
-			pr_err("Failed to allocate packet header\n");
-			return -ENOMEM;
-		}
-		ret = read_img_eof(fd, &pkt->entry);
-		if (ret <= 0)
-			break;
-
-		pkt->img_off = lseek(fd, 0, SEEK_CUR);
-		/*
-		 * NOTE: packet must be added to the tail. Otherwise sequence
-		 * will be broken.
-		 */
-		list_add_tail(&pkt->list, &packets_list);
-		lseek(fd, pkt->entry.length, SEEK_CUR);
-	}
-	close(fd);
-	xfree(pkt);
-
-	return ret;
-}
-
-static int restore_socket_queue(int fd, unsigned int peer_id)
-{
-	struct sk_packet *pkt, *tmp;
-	int ret, img_fd;
-
-	pr_info("Trying to restore recv queue for %u\n", peer_id);
-
-	img_fd = open_image_ro(CR_FD_SK_QUEUES);
-	if (img_fd < 0)
-		return -1;
-
-	list_for_each_entry_safe(pkt, tmp, &packets_list, list) {
-		struct sk_packet_entry *entry = &pkt->entry;
-
-		if (entry->id_for != peer_id)
-			continue;
-
-		pr_info("\tRestoring %d-bytes skb for %u\n",
-				entry->length, peer_id);
-
-		ret = sendfile(fd, img_fd, &pkt->img_off, entry->length);
-		if (ret < 0) {
-			pr_perror("Failed to sendfile packet");
-			return -1;
-		}
-		if (ret != entry->length) {
-			pr_err("Restored skb trimmed to %d/%d\n",
-					ret, entry->length);
-			return -1;
-		}
-		list_del(&pkt->list);
-		xfree(pkt);
-	}
-
-	close(img_fd);
-	return 0;
-}
-
-struct inet_sk_info {
-	struct inet_sk_entry ie;
-	struct file_desc d;
-};
-
-static int open_inet_sk(struct file_desc *d);
-
-static struct file_desc_ops inet_desc_ops = {
-	.open = open_inet_sk,
-};
-
-int collect_inet_sockets(void)
-{
-	struct inet_sk_info *ii = NULL;
-	int fd, ret = -1;
-
-	fd = open_image_ro(CR_FD_INETSK);
-	if (fd < 0)
-		return -1;
-
-	while (1) {
-		ii = xmalloc(sizeof(*ii));
-		ret = -1;
-		if (!ii)
-			break;
-
-		ret = read_img_eof(fd, &ii->ie);
-		if (ret <= 0)
-			break;
-
-		file_desc_add(&ii->d, FDINFO_INETSK, ii->ie.id,
-				&inet_desc_ops);
-	}
-
-	if (ii)
-		xfree(ii);
-
-	close(fd);
-	return 0;
-}
-
-static int open_inet_sk(struct file_desc *d)
-{
-	int sk;
-	struct sockaddr_in addr;
-	struct inet_sk_info *ii;
-
-	ii = container_of(d, struct inet_sk_info, d);
-
-	show_one_inet_img("Restore", &ii->ie);
-
-	if (ii->ie.family != AF_INET) {
-		pr_err("Unsupported socket family: %d\n", ii->ie.family);
-		return -1;
-	}
-
-	if ((ii->ie.type != SOCK_STREAM) && (ii->ie.type != SOCK_DGRAM)) {
-		pr_err("Unsupported socket type: %d\n", ii->ie.type);
-		return -1;
-	}
-
-	sk = socket(ii->ie.family, ii->ie.type, ii->ie.proto);
-	if (sk < 0) {
-		pr_perror("Can't create unix socket");
-		return -1;
-	}
-
-	if (restore_fown(sk, &ii->ie.fown))
-		goto err;
-
-	/*
-	 * Listen sockets are easiest ones -- simply
-	 * bind() and listen(), and that's all.
-	 */
-	memset(&addr, 0, sizeof(addr));
-	addr.sin_family = ii->ie.family;
-	addr.sin_port = htons(ii->ie.src_port);
-	memcpy(&addr.sin_addr.s_addr, ii->ie.src_addr, sizeof(unsigned int) * 4);
-
-	if (bind(sk, (struct sockaddr *) &addr, sizeof(addr)) == -1) {
-		pr_perror("Can't bind to a socket");
-		goto err;
-	}
-
-	if (ii->ie.state == TCP_LISTEN) {
-		if (ii->ie.proto != IPPROTO_TCP) {
-			pr_err("Wrong socket in listen state %d\n", ii->ie.proto);
-			goto err;
-		}
-
-		if (listen(sk, ii->ie.backlog) == -1) {
-			pr_perror("Can't listen on a socket");
-			goto err;
-		}
-	}
-
-	if (ii->ie.state == TCP_ESTABLISHED) {
-		if (ii->ie.proto == IPPROTO_TCP) {
-			pr_err("Connected TCP socket in image\n");
-			goto err;
-		}
-
-		memset(&addr, 0, sizeof(addr));
-		addr.sin_family = ii->ie.family;
-		addr.sin_port = htons(ii->ie.dst_port);
-		memcpy(&addr.sin_addr.s_addr, ii->ie.dst_addr, sizeof(ii->ie.dst_addr));
-
-		if (connect(sk, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
-			pr_perror("Can't connect UDP socket back");
-			goto err;
-		}
-	}
-
-	if (set_fd_flags(sk, ii->ie.flags))
-		return -1;
-
-	return sk;
-
-err:
-	close(sk);
-	return -1;
-}
-
-static inline char *unknown(u32 val)
-{
-	static char unk[12];
-	snprintf(unk, sizeof(unk), "x%d", val);
-	return unk;
-}
-
-static inline char *skfamily2s(u32 f)
-{
-	if (f == AF_INET)
-		return " inet";
-	else
-		return unknown(f);
-}
-
-static inline char *sktype2s(u32 t)
-{
-	if (t == SOCK_STREAM)
-		return "stream";
-	else if (t == SOCK_DGRAM)
-		return " dgram";
-	else
-		return unknown(t);
-}
-
-static inline char *skproto2s(u32 p)
-{
-	if (p == IPPROTO_UDP)
-		return "udp";
-	else if (p == IPPROTO_UDPLITE)
-		return "udpl";
-	else if (p == IPPROTO_TCP)
-		return "tcp";
-	else
-		return unknown(p);
-}
-
-static inline char *skstate2s(u32 state)
-{
-	if (state == TCP_ESTABLISHED)
-		return " estab";
-	else if (state == TCP_CLOSE)
-		return "closed";
-	else if (state == TCP_LISTEN)
-		return "listen";
-	else
-		return unknown(state);
-}
-
-void show_inetsk(int fd, struct cr_options *o)
-{
-	struct inet_sk_entry ie;
-	int ret = 0;
-
-	pr_img_head(CR_FD_INETSK);
-
-	while (1) {
-		char src_addr[INET_ADDR_LEN] = "<unknown>";
-		char dst_addr[INET_ADDR_LEN] = "<unknown>";
-
-		ret = read_img_eof(fd, &ie);
-		if (ret <= 0)
-			goto out;
-
-		if (inet_ntop(AF_INET, (void *)ie.src_addr, src_addr,
-			      INET_ADDR_LEN) == NULL) {
-			pr_perror("Failed to translate src address");
-		}
-
-		if (ie.state == TCP_ESTABLISHED) {
-			if (inet_ntop(AF_INET, (void *)ie.dst_addr, dst_addr,
-				      INET_ADDR_LEN) == NULL) {
-				pr_perror("Failed to translate dst address");
-			}
-		}
-
-		pr_msg("id 0x%x family %s type %s proto %s state %s %s:%d <-> %s:%d flags 0x%2x\n",
-			ie.id, skfamily2s(ie.family), sktype2s(ie.type), skproto2s(ie.proto),
-			skstate2s(ie.state), src_addr, ie.src_port, dst_addr, ie.dst_port, ie.flags);
-		pr_msg("\t"), show_fown_cont(&ie.fown), pr_msg("\n");
-	}
-
-out:
-	if (ret)
-		pr_info("\n");
-	pr_img_tail(CR_FD_INETSK);
-}
-
-void show_unixsk(int fd, struct cr_options *o)
-{
-	struct unix_sk_entry ue;
-	int ret = 0;
-
-	pr_img_head(CR_FD_UNIXSK);
-
-	while (1) {
-		ret = read_img_eof(fd, &ue);
-		if (ret <= 0)
-			goto out;
-
-		pr_msg("id 0x%8x type %s state %s namelen %4d backlog %4d peer 0x%8x flags 0x%2x uflags 0x%2x",
-			ue.id, sktype2s(ue.type), skstate2s(ue.state),
-			ue.namelen, ue.backlog, ue.peer, ue.flags, ue.uflags);
-
-		if (ue.namelen) {
-			BUG_ON(ue.namelen > sizeof(buf));
-			ret = read_img_buf(fd, buf, ue.namelen);
-			if (ret < 0) {
-				pr_info("\n");
-				goto out;
-			}
-			if (!buf[0])
-				buf[0] = '@';
-			pr_msg(" --> %s\n", buf);
-		} else
-			pr_msg("\n");
-		pr_msg("\t"), show_fown_cont(&ue.fown), pr_msg("\n");
-	}
-out:
-	pr_img_tail(CR_FD_UNIXSK);
-}
-
-void show_sk_queues(int fd, struct cr_options *o)
-{
-	struct sk_packet_entry pe;
-	int ret;
-
-	pr_img_head(CR_FD_SK_QUEUES);
-	while (1) {
-		ret = read_img_eof(fd, &pe);
-		if (ret <= 0)
-			break;
-
-		pr_info("pkt for %u length %u bytes\n",
-				pe.id_for, pe.length);
-
-		ret = read_img_buf(fd, (unsigned char *)buf, pe.length);
-		if (ret < 0)
-			break;
-
-		print_data(0, (unsigned char *)buf, pe.length);
-	}
-	pr_img_tail(CR_FD_SK_QUEUES);
-}
-
-struct unix_conn_job {
-	struct unix_sk_info	*sk;
-	struct unix_conn_job	*next;
-};
-
-static struct unix_conn_job *conn_jobs;
-
-static int schedule_conn_job(struct unix_sk_info *ui)
-{
-	struct unix_conn_job *cj;
-
-	cj = xmalloc(sizeof(*cj));
-	if (!cj)
-		return -1;
-
-	cj->sk = ui;
-	cj->next = conn_jobs;
-	conn_jobs = cj;
-
-	return 0;
-}
-
-int run_unix_connections(void)
-{
-	struct unix_conn_job *cj;
-
-	pr_info("Running delayed unix connections\n");
-
-	cj = conn_jobs;
-	while (cj) {
-		int attempts = 8;
-		struct unix_sk_info *ui = cj->sk;
-		struct unix_sk_info *peer = ui->peer;
-		struct fdinfo_list_entry *fle;
-		struct sockaddr_un addr;
-
-		pr_info("\tConnect 0x%x to 0x%x\n", ui->ue.id, peer->ue.id);
-
-		fle = file_master(&ui->d);
-
-		memset(&addr, 0, sizeof(addr));
-		addr.sun_family = AF_UNIX;
-		memcpy(&addr.sun_path, peer->name, peer->ue.namelen);
-try_again:
-		if (connect(fle->fe.fd, (struct sockaddr *)&addr,
-					sizeof(addr.sun_family) +
-					peer->ue.namelen) < 0) {
-			if (attempts) {
-				usleep(1000);
-				attempts--;
-				goto try_again; /* FIXME use futex waiters */
-			}
-
-			pr_perror("Can't connect 0x%x socket", ui->ue.id);
-			return -1;
-		}
-
-		if (restore_socket_queue(fle->fe.fd, peer->ue.id))
-			return -1;
-
-		if (set_fd_flags(fle->fe.fd, ui->ue.flags))
-			return -1;
-
-		cj = cj->next;
-	}
-
-	return 0;
-}
-
-static int bind_unix_sk(int sk, struct unix_sk_info *ui)
-{
-	struct sockaddr_un addr;
-
-	if ((ui->ue.type == SOCK_STREAM) && (ui->ue.state != TCP_LISTEN))
-		/*
-		 * FIXME this can be done, but for doing this properly we
-		 * need to bind socket to its name, then rename one to
-		 * some temporary unique one and after all the sockets are
-		 * restored we should walk those temp names and rename
-		 * some of them back to real ones.
-		 */
-		goto done;
-
-	memset(&addr, 0, sizeof(addr));
-	addr.sun_family = AF_UNIX;
-	memcpy(&addr.sun_path, ui->name, ui->ue.namelen);
-
-	if (bind(sk, (struct sockaddr *)&addr,
-				sizeof(addr.sun_family) + ui->ue.namelen)) {
-		pr_perror("Can't bind socket");
-		return -1;
-	}
-done:
-	return 0;
-}
-
-static int unixsk_should_open_transport(struct fdinfo_entry *fe,
-				struct file_desc *d)
-{
-	struct unix_sk_info *ui;
-
-	ui = container_of(d, struct unix_sk_info, d);
-	return ui->flags & USK_PAIR_SLAVE;
-}
-
-static int open_unixsk_pair_master(struct unix_sk_info *ui)
-{
-	int sk[2], tsk;
-	struct unix_sk_info *peer = ui->peer;
-	struct fdinfo_list_entry *fle;
-
-	pr_info("Opening pair master (id 0x%x peer 0x%x)\n",
-			ui->ue.id, ui->ue.peer);
-
-	if (socketpair(PF_UNIX, ui->ue.type, 0, sk) < 0) {
-		pr_perror("Can't make socketpair");
-		return -1;
-	}
-
-	if (restore_socket_queue(sk[0], peer->ue.id))
-		return -1;
-	if (restore_socket_queue(sk[1], ui->ue.id))
-		return -1;
-
-	if (set_fd_flags(sk[0], ui->ue.flags))
-		return -1;
-	if (set_fd_flags(sk[1], peer->ue.flags))
-		return -1;
-
-	if (restore_fown(sk[0], &ui->ue.fown))
-		return -1;
-	if (restore_fown(sk[1], &peer->ue.fown))
-		return -1;
-
-	if (bind_unix_sk(sk[0], ui))
-		return -1;
-
-	tsk = socket(PF_UNIX, SOCK_DGRAM, 0);
-	if (tsk < 0) {
-		pr_perror("Can't make transport socket");
-		return -1;
-	}
-
-	fle = file_master(&peer->d);
-	if (send_fd_to_peer(sk[1], fle, tsk)) {
-		pr_err("Can't send pair slave\n");
-		return -1;
-	}
-
-	close(tsk);
-	close(sk[1]);
-
-	return sk[0];
-}
-
-static int open_unixsk_pair_slave(struct unix_sk_info *ui)
-{
-	struct fdinfo_list_entry *fle;
-	int sk;
-
-	fle = file_master(&ui->d);
-
-	pr_info("Opening pair slave (id 0x%x peer 0x%x) on %d\n",
-			ui->ue.id, ui->ue.peer, fle->fe.fd);
-
-	sk = recv_fd(fle->fe.fd);
-	if (sk < 0) {
-		pr_err("Can't recv pair slave");
-		return -1;
-	}
-	close(fle->fe.fd);
-
-	if (bind_unix_sk(sk, ui))
-		return -1;
-
-	return sk;
-}
-
-static int open_unixsk_standalone(struct unix_sk_info *ui)
-{
-	int sk;
-
-	pr_info("Opening standalone socket (id 0x%x peer 0x%x)\n",
-			ui->ue.id, ui->ue.peer);
-
-	sk = socket(PF_UNIX, ui->ue.type, 0);
-	if (sk < 0) {
-		pr_perror("Can't make unix socket");
-		return -1;
-	}
-
-	if (restore_fown(sk, &ui->ue.fown))
-		return -1;
-
-	if (bind_unix_sk(sk, ui))
-		return -1;
-
-	if (ui->ue.state == TCP_LISTEN) {
-		pr_info("\tPutting 0x%x into listen state\n", ui->ue.id);
-		if (listen(sk, ui->ue.backlog) < 0) {
-			pr_perror("Can't make usk listen");
-			return -1;
-		}
-	} else if (ui->peer) {
-		pr_info("\tWill connect 0x%x to 0x%x later\n", ui->ue.id, ui->ue.peer);
-		if (schedule_conn_job(ui))
-			return -1;
-	}
-
-	return sk;
-}
-
-static int open_unix_sk(struct file_desc *d)
-{
-	struct unix_sk_info *ui;
-
-	ui = container_of(d, struct unix_sk_info, d);
-	if (ui->flags & USK_PAIR_MASTER)
-		return open_unixsk_pair_master(ui);
-	else if (ui->flags & USK_PAIR_SLAVE)
-		return open_unixsk_pair_slave(ui);
-	else
-		return open_unixsk_standalone(ui);
-}
-
-static struct file_desc_ops unix_desc_ops = {
-	.open = open_unix_sk,
-	.want_transport = unixsk_should_open_transport,
-};
-
-int collect_unix_sockets(void)
-{
-	int fd, ret;
-
-	pr_info("Reading unix sockets in\n");
-
-	fd = open_image_ro(CR_FD_UNIXSK);
-	if (fd < 0) {
-		if (errno == ENOENT)
-			return 0;
-		else
-			return -1;
-	}
-
-	while (1) {
-		struct unix_sk_info *ui;
-
-		ui = xmalloc(sizeof(*ui));
-		ret = -1;
-		if (ui == NULL)
-			break;
-
-		ret = read_img_eof(fd, &ui->ue);
-		if (ret <= 0) {
-			xfree(ui);
-			break;
-		}
-
-		if (ui->ue.namelen) {
-			ret = -1;
-
-			if (!ui->ue.namelen || ui->ue.namelen >= UNIX_PATH_MAX) {
-				pr_err("Bad unix name len %d\n", ui->ue.namelen);
-				break;
-			}
-
-			ui->name = xmalloc(ui->ue.namelen);
-			if (ui->name == NULL)
-				break;
-
-			ret = read_img_buf(fd, ui->name, ui->ue.namelen);
-			if (ret < 0)
-				break;
-
-			/*
-			 * Make FS clean from sockets we're about to
-			 * restore. See for how we bind them for details
-			 */
-			if (ui->name[0] != '\0' &&
-			    !(ui->ue.uflags & USK_EXTERN))
-				unlink(ui->name);
-		} else
-			ui->name = NULL;
-
-		ui->peer = NULL;
-		ui->flags = 0;
-		pr_info(" `- Got %u peer %u\n", ui->ue.id, ui->ue.peer);
-		file_desc_add(&ui->d, FDINFO_UNIXSK, ui->ue.id,
-				&unix_desc_ops);
-		list_add_tail(&ui->list, &unix_sockets);
-	}
-
-	close(fd);
-
-	return read_sockets_queues();
-}
-
-int resolve_unix_peers(void)
-{
-	struct unix_sk_info *ui, *peer;
-	struct fdinfo_list_entry *fle, *fle_peer;
-
-	list_for_each_entry(ui, &unix_sockets, list) {
-		if (ui->peer)
-			continue;
-		if (!ui->ue.peer)
-			continue;
-
-		peer = find_unix_sk(ui->ue.peer);
-
-		/*
-		 * Connect to external sockets requires
-		 * special option to be passed.
-		 */
-		if (peer &&
-		    (peer->ue.uflags & USK_EXTERN) &&
-		    !(opts.ext_unix_sk))
-			peer = NULL;
-
-		if (!peer) {
-			pr_err("FATAL: Peer 0x%x unresolved for 0x%x\n",
-					ui->ue.peer, ui->ue.id);
-			return -1;
-		}
-
-		ui->peer = peer;
-		if (ui == peer)
-			/* socket connected to self %) */
-			continue;
-		if (peer->ue.peer != ui->ue.id)
-			continue;
-
-		/* socketpair or interconnected sockets */
-		peer->peer = ui;
-
-		/*
-		 * Select who will restore the pair. Check is identical to
-		 * the one in pipes.c and makes sure tasks wait for each other
-		 * in pids sorting order (ascending).
-		 */
-
-		fle = file_master(&ui->d);
-		fle_peer = file_master(&peer->d);
-
-		if ((fle->pid < fle_peer->pid) ||
-				(fle->pid == fle_peer->pid &&
-				 fle->fe.fd < fle_peer->fe.fd)) {
-			ui->flags |= USK_PAIR_MASTER;
-			peer->flags |= USK_PAIR_SLAVE;
-		} else {
-			peer->flags |= USK_PAIR_MASTER;
-			ui->flags |= USK_PAIR_SLAVE;
-		}
-	}
-
-	pr_info("Unix sockets:\n");
-	list_for_each_entry(ui, &unix_sockets, list) {
-		struct fdinfo_list_entry *fle;
-
-		pr_info("\t0x%x -> 0x%x (0x%x) flags 0x%x\n", ui->ue.id, ui->ue.peer,
-				ui->peer ? ui->peer->ue.id : 0, ui->flags);
-		list_for_each_entry(fle, &ui->d.fd_info_head, desc_list)
-			pr_info("\t\tfd %d in pid %d\n",
-					fle->fe.fd, fle->pid);
-
-	}
-
-	return 0;
-}
-- 
1.7.7.6



More information about the CRIU mailing list