[CRIU] [PATCH 1/2] sk-inet: Add initial support for raw sockets

Cyrill Gorcunov gorcunov at openvz.org
Wed Feb 22 05:53:27 PST 2017


For raw sockets we need DIAG module extension, so in case
if we're failing while collecting socket don't exit with
error but warn a user and if we really meet raw socket
we will exit later on socket's lookup stage.

Strictly speaking we can use procfs parsing instead but
this gonna be a way more complex that well-known diag
approach and taking into account that raw sockets are
note that widely used lets support only when diag module
is present in the system.

In the patch the initial raw sockets support added
compelte enough to handle SO_IP_SET request from
ipset tool (needed by modern containers). But the
code might need extention/fixes in future.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 criu/cr-check.c        | 11 ++++++++++
 criu/include/sk-inet.h | 12 +++++++++++
 criu/sk-inet.c         | 58 ++++++++++++++++++++++++++++++++++++--------------
 criu/sockets.c         | 35 ++++++++++++++++++++++++++++++
 images/sk-inet.proto   |  2 ++
 5 files changed, 102 insertions(+), 16 deletions(-)

diff --git a/criu/cr-check.c b/criu/cr-check.c
index c8261255fdeb..3e487a774403 100644
--- a/criu/cr-check.c
+++ b/criu/cr-check.c
@@ -49,6 +49,7 @@
 #include "cr_options.h"
 #include "libnetlink.h"
 #include "net.h"
+#include "inet_diag.h"
 #include "linux/userfaultfd.h"
 #include "restorer.h"
 
@@ -1090,6 +1091,14 @@ static int check_sk_netns(void)
 	return 0;
 }
 
+static int check_net_diag_raw(void)
+{
+	check_sock_diag();
+	return !socket_test_collect_bit(AF_INET, IPPROTO_RAW) &&
+		!socket_test_collect_bit(AF_INET6, IPPROTO_RAW);
+}
+
+
 static int check_compat_cr(void)
 {
 	if (kdat_compat_sigreturn_test())
@@ -1202,6 +1211,7 @@ int cr_check(void)
 		ret |= check_userns();
 		ret |= check_loginuid();
 		ret |= check_sk_netns();
+		ret |= check_net_diag_raw();
 	}
 
 	/*
@@ -1254,6 +1264,7 @@ static struct feature_list feature_list[] = {
 	{ "lazy_pages", check_uffd },
 	{ "compat_cr", check_compat_cr },
 	{ "sk_ns", check_sk_netns },
+	{ "net_diag_raw", check_net_diag_raw },
 	{ NULL, NULL },
 };
 
diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h
index bf6fb1d77ddf..5d996581c18d 100644
--- a/criu/include/sk-inet.h
+++ b/criu/include/sk-inet.h
@@ -16,6 +16,18 @@
 #define TCP_REPAIR_OPTIONS	22
 #endif
 
+#ifndef IP_HDRINCL
+# define IP_HDRINCL		3
+#endif
+
+#ifndef IP_NODEFRAG
+# define IP_NODEFRAG		22
+#endif
+
+#ifndef IPV6_HDRINCL
+# define IPV6_HDRINCL		36
+#endif
+
 struct inet_sk_desc {
 	struct socket_desc	sd;
 	unsigned int		type;
diff --git a/criu/sk-inet.c b/criu/sk-inet.c
index ee6ce60e2b12..4efe6eb4cc11 100644
--- a/criu/sk-inet.c
+++ b/criu/sk-inet.c
@@ -101,7 +101,7 @@ static void show_one_inet_img(const char *act, const InetSkEntry *e)
 		e->state, src_addr);
 }
 
-static int can_dump_ipproto(int ino, int proto)
+static int can_dump_ipproto(int ino, int proto, int type)
 {
 	/* Make sure it's a proto we support */
 	switch (proto) {
@@ -111,8 +111,12 @@ static int can_dump_ipproto(int ino, int proto)
 	case IPPROTO_UDPLITE:
 		break;
 	default:
-		pr_err("Unsupported proto %d for socket %x\n", proto, ino);
-		return 0;
+		/* Raw sockets may have any protocol inside */
+		if (type != SOCK_RAW) {
+			pr_err("Unsupported proto %d (type %d) for socket %x\n",
+			       proto, type, ino);
+			return 0;
+		}
 	}
 
 	return 1;
@@ -142,9 +146,9 @@ static int can_dump_inet_sk(const struct inet_sk_desc *sk)
 		return 1;
 	}
 
-	if (sk->type != SOCK_STREAM) {
+	if (sk->type != SOCK_STREAM && sk->type != SOCK_RAW) {
 		pr_err("Can't dump %d inet socket %x. "
-				"Only can stream and dgram.\n",
+				"Only can stream, dgram and raw.\n",
 				sk->type, sk->sd.ino);
 		return 0;
 	}
@@ -288,12 +292,24 @@ err:
 	return NULL;
 }
 
-static int dump_ip_opts(int sk, IpOptsEntry *ioe)
+
+static int dump_ip_opts(int family, int type, int sk, IpOptsEntry *ioe)
 {
 	int ret = 0;
 
-	ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
-	ioe->has_freebind = ioe->freebind;
+	if (type == SOCK_RAW) {
+		if (family == AF_INET6) {
+			ret |= dump_opt(sk, SOL_IPV6, IPV6_HDRINCL, &ioe->hdrincl);
+		} else {
+			ret |= dump_opt(sk, SOL_IP, IP_HDRINCL, &ioe->hdrincl);
+			ret |= dump_opt(sk, SOL_IP, IP_NODEFRAG, &ioe->nodefrag);
+			ioe->has_nodefrag = ioe->nodefrag;
+		}
+		ioe->has_hdrincl = ioe->hdrincl;
+	} else {
+		ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
+		ioe->has_freebind = ioe->freebind;
+	}
 
 	return ret;
 }
@@ -323,14 +339,18 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
 	InetSkEntry ie = INET_SK_ENTRY__INIT;
 	IpOptsEntry ipopts = IP_OPTS_ENTRY__INIT;
 	SkOptsEntry skopts = SK_OPTS_ENTRY__INIT;
-	int ret = -1, err = -1, proto;
+	int ret = -1, err = -1, proto, type;
 
 	ret = do_dump_opt(lfd, SOL_SOCKET, SO_PROTOCOL,
 					&proto, sizeof(proto));
 	if (ret)
 		goto err;
+	ret = do_dump_opt(lfd, SOL_SOCKET, SO_TYPE,
+			  &type, sizeof(type));
+	if (ret)
+		goto err;
 
-	if (!can_dump_ipproto(p->stat.st_ino, proto))
+	if (!can_dump_ipproto(p->stat.st_ino, proto, type))
 		goto err;
 
 	sk = (struct inet_sk_desc *)lookup_socket(p->stat.st_ino, family, proto);
@@ -410,7 +430,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
 	memcpy(ie.src_addr, sk->src_addr, pb_repeated_size(&ie, src_addr));
 	memcpy(ie.dst_addr, sk->dst_addr, pb_repeated_size(&ie, dst_addr));
 
-	if (dump_ip_opts(lfd, &ipopts))
+	if (dump_ip_opts(family, sk->type, lfd, &ipopts))
 		goto err;
 
 	if (dump_socket_opts(lfd, &skopts))
@@ -424,7 +444,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
 
 	switch (proto) {
 	case IPPROTO_TCP:
-		err = dump_one_tcp(lfd, sk);
+		err = (sk->type != SOCK_RAW) ? dump_one_tcp(lfd, sk) : 0;
 		break;
 	default:
 		err = 0;
@@ -606,12 +626,18 @@ static int post_open_inet_sk(struct file_desc *d, int sk)
 	return 0;
 }
 
-int restore_ip_opts(int sk, IpOptsEntry *ioe)
+int restore_ip_opts(int family, int sk, IpOptsEntry *ioe)
 {
 	int ret = 0;
 
 	if (ioe->has_freebind)
 		ret |= restore_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
+	if (ioe->has_nodefrag)
+		ret |= restore_opt(sk, SOL_IP, IP_NODEFRAG, &ioe->nodefrag);
+	if (ioe->has_hdrincl)
+		ret |= restore_opt(sk, family == AF_INET6 ? SOL_IPV6 : SOL_IP,
+				   family == AF_INET6 ? IPV6_HDRINCL : IP_HDRINCL,
+				   &ioe->hdrincl);
 
 	return ret;
 }
@@ -635,7 +661,7 @@ static int open_inet_sk(struct file_desc *d, int *new_fd)
 		return -1;
 	}
 
-	if ((ie->type != SOCK_STREAM) && (ie->type != SOCK_DGRAM)) {
+	if ((ie->type != SOCK_STREAM) && (ie->type != SOCK_DGRAM) && (ie->type != SOCK_RAW)) {
 		pr_err("Unsupported socket type: %d\n", ie->type);
 		return -1;
 	}
@@ -713,7 +739,7 @@ done:
 	if (rst_file_params(sk, ie->fown, ie->flags))
 		goto err;
 
-	if (ie->ip_opts && restore_ip_opts(sk, ie->ip_opts))
+	if (ie->ip_opts && restore_ip_opts(ie->family, sk, ie->ip_opts))
 		goto err;
 
 	if (restore_socket_opts(sk, ie->opts))
@@ -780,7 +806,7 @@ int inet_bind(int sk, struct inet_sk_info *ii)
 	 * sockets could not be bound to them in this moment
 	 * without setting IP_FREEBIND.
 	 */
-	if (ii->ie->family == AF_INET6) {
+	if (ii->ie->family == AF_INET6 && ii->ie->proto != IPPROTO_RAW) {
 		int yes = 1;
 
 		if (restore_opt(sk, SOL_IP, IP_FREEBIND, &yes))
diff --git a/criu/sockets.c b/criu/sockets.c
index 420eee8252e2..06938f27ce05 100644
--- a/criu/sockets.c
+++ b/criu/sockets.c
@@ -61,9 +61,11 @@ enum socket_cl_bits
 	INET_TCP_CL_BIT,
 	INET_UDP_CL_BIT,
 	INET_UDPLITE_CL_BIT,
+	INET_RAW_CL_BIT,
 	INET6_TCP_CL_BIT,
 	INET6_UDP_CL_BIT,
 	INET6_UDPLITE_CL_BIT,
+	INET6_RAW_CL_BIT,
 	UNIX_CL_BIT,
 	PACKET_CL_BIT,
 	_MAX_CL_BIT,
@@ -89,6 +91,8 @@ enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsigned int proto)
 			return INET_UDP_CL_BIT;
 		if (proto == IPPROTO_UDPLITE)
 			return INET_UDPLITE_CL_BIT;
+		if (proto == IPPROTO_RAW)
+			return INET_RAW_CL_BIT;
 	}
 	if (family == AF_INET6) {
 		if (proto == IPPROTO_TCP)
@@ -97,6 +101,8 @@ enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsigned int proto)
 			return INET6_UDP_CL_BIT;
 		if (proto == IPPROTO_UDPLITE)
 			return INET6_UDPLITE_CL_BIT;
+		if (proto == IPPROTO_RAW)
+			return INET6_RAW_CL_BIT;
 	}
 
 	pr_err("Unknown pair family %d proto %d\n", family, proto);
@@ -598,6 +604,9 @@ static int inet_receive_one(struct nlmsghdr *h, struct ns_id *ns, void *arg)
 	case IPPROTO_TCP:
 		type = SOCK_STREAM;
 		break;
+	case IPPROTO_RAW:
+		type = SOCK_RAW;
+		break;
 	case IPPROTO_UDP:
 	case IPPROTO_UDPLITE:
 		type = SOCK_DGRAM;
@@ -620,6 +629,14 @@ static int do_collect_req(int nl, struct sock_diag_req *req, int size,
 
 	if (tmp == 0)
 		set_collect_bit(req->r.n.sdiag_family, req->r.n.sdiag_protocol);
+	else if (tmp == -ENOENT &&
+		 ((req->r.n.sdiag_family == AF_INET ||
+		   req->r.n.sdiag_family == AF_INET6) &&
+		  req->r.n.sdiag_protocol == IPPROTO_RAW)) {
+		pr_warn("No support for DIAG module on family %s with protocol IPPROTO_RAW, may fail later\n",
+			req->r.n.sdiag_family == AF_INET ? "IPv4" : "IPv6");
+		tmp = 0;
+	}
 
 	return tmp;
 }
@@ -677,6 +694,15 @@ int collect_sockets(struct ns_id *ns)
 	if (tmp)
 		err = tmp;
 
+	/* Collect IPv4 RAW sockets */
+	req.r.i.sdiag_family	= AF_INET;
+	req.r.i.sdiag_protocol	= IPPROTO_RAW;
+	req.r.i.idiag_ext	= 0;
+	req.r.i.idiag_states	= -1; /* All */
+	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, ns, &req.r.i);
+	if (tmp)
+		err = tmp;
+
 	/* Collect IPv6 TCP sockets */
 	req.r.i.sdiag_family	= AF_INET6;
 	req.r.i.sdiag_protocol	= IPPROTO_TCP;
@@ -708,6 +734,15 @@ int collect_sockets(struct ns_id *ns)
 	if (tmp)
 		err = tmp;
 
+	/* Collect IPv6 RAW sockets */
+	req.r.i.sdiag_family	= AF_INET6;
+	req.r.i.sdiag_protocol	= IPPROTO_RAW;
+	req.r.i.idiag_ext	= 0;
+	req.r.i.idiag_states	= -1; /* All */
+	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, ns, &req.r.i);
+	if (tmp)
+		err = tmp;
+
 	req.r.p.sdiag_family	= AF_PACKET;
 	req.r.p.sdiag_protocol	= 0;
 	req.r.p.pdiag_show	= PACKET_SHOW_INFO | PACKET_SHOW_MCLIST |
diff --git a/images/sk-inet.proto b/images/sk-inet.proto
index 09c5a47d2464..173c74a40df7 100644
--- a/images/sk-inet.proto
+++ b/images/sk-inet.proto
@@ -6,6 +6,8 @@ import "sk-opts.proto";
 
 message ip_opts_entry {
 	optional bool		freebind	= 1;
+	optional bool		hdrincl		= 2;
+	optional bool		nodefrag	= 3;
 }
 
 message inet_sk_entry {
-- 
2.7.4



More information about the CRIU mailing list