[CRIU] [PATCH 1/2] sk-inet: Add initial support for raw sockets

Cyrill Gorcunov gorcunov at openvz.org
Wed Nov 16 02:59:45 PST 2016


For raw sockets we need DIAG module extension, so in case
if we're failing while collecting socket don't exit with
error but warn a user and if we really meet raw socket
we will exit later on socket's lookup stage.

Strictly speaking we can use procfs parsing instead but
this gonna be a way more complex that well-known diag
approach and taking into account that raw sockets are
note that widely used lets support only when diag module
is present in the system.

In the patch the initial raw sockets support added
compelte enough to handle SO_IP_SET request from
ipset tool (needed by modern containers). But the
code might need extention/fixes in future.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 criu/include/sk-inet.h | 12 +++++++++++
 criu/sk-inet.c         | 58 ++++++++++++++++++++++++++++++++++++--------------
 criu/sockets.c         | 35 ++++++++++++++++++++++++++++++
 images/sk-inet.proto   |  2 ++
 4 files changed, 91 insertions(+), 16 deletions(-)

diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h
index a06a8ac161da..58c05887e04d 100644
--- a/criu/include/sk-inet.h
+++ b/criu/include/sk-inet.h
@@ -16,6 +16,18 @@
 #define TCP_REPAIR_OPTIONS	22
 #endif
 
+#ifndef IP_HDRINCL
+# define IP_HDRINCL		3
+#endif
+
+#ifndef IP_NODEFRAG
+# define IP_NODEFRAG		22
+#endif
+
+#ifndef IPV6_HDRINCL
+# define IPV6_HDRINCL		36
+#endif
+
 struct inet_sk_desc {
 	struct socket_desc	sd;
 	unsigned int		type;
diff --git a/criu/sk-inet.c b/criu/sk-inet.c
index 924cf8c09d60..42134d4d5b3b 100644
--- a/criu/sk-inet.c
+++ b/criu/sk-inet.c
@@ -94,7 +94,7 @@ static void show_one_inet_img(const char *act, const InetSkEntry *e)
 		e->state, src_addr);
 }
 
-static int can_dump_ipproto(int ino, int proto)
+static int can_dump_ipproto(int ino, int proto, int type)
 {
 	/* Make sure it's a proto we support */
 	switch (proto) {
@@ -104,8 +104,12 @@ static int can_dump_ipproto(int ino, int proto)
 	case IPPROTO_UDPLITE:
 		break;
 	default:
-		pr_err("Unsupported proto %d for socket %x\n", proto, ino);
-		return 0;
+		/* Raw sockets may have any protocol inside */
+		if (type != SOCK_RAW) {
+			pr_err("Unsupported proto %d (type %d) for socket %x\n",
+			       proto, type, ino);
+			return 0;
+		}
 	}
 
 	return 1;
@@ -135,9 +139,9 @@ static int can_dump_inet_sk(const struct inet_sk_desc *sk)
 		return 1;
 	}
 
-	if (sk->type != SOCK_STREAM) {
+	if (sk->type != SOCK_STREAM && sk->type != SOCK_RAW) {
 		pr_err("Can't dump %d inet socket %x. "
-				"Only can stream and dgram.\n",
+				"Only can stream, dgram and raw.\n",
 				sk->type, sk->sd.ino);
 		return 0;
 	}
@@ -240,12 +244,24 @@ err:
 	return NULL;
 }
 
-static int dump_ip_opts(int sk, IpOptsEntry *ioe)
+
+static int dump_ip_opts(int family, int type, int sk, IpOptsEntry *ioe)
 {
 	int ret = 0;
 
-	ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
-	ioe->has_freebind = ioe->freebind;
+	if (type == SOCK_RAW) {
+		if (family == AF_INET6) {
+			ret |= dump_opt(sk, SOL_IPV6, IPV6_HDRINCL, &ioe->hdrincl);
+		} else {
+			ret |= dump_opt(sk, SOL_IP, IP_HDRINCL, &ioe->hdrincl);
+			ret |= dump_opt(sk, SOL_IP, IP_NODEFRAG, &ioe->nodefrag);
+			ioe->has_nodefrag = ioe->nodefrag;
+		}
+		ioe->has_hdrincl = ioe->hdrincl;
+	} else {
+		ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
+		ioe->has_freebind = ioe->freebind;
+	}
 
 	return ret;
 }
@@ -275,14 +291,18 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
 	InetSkEntry ie = INET_SK_ENTRY__INIT;
 	IpOptsEntry ipopts = IP_OPTS_ENTRY__INIT;
 	SkOptsEntry skopts = SK_OPTS_ENTRY__INIT;
-	int ret = -1, err = -1, proto;
+	int ret = -1, err = -1, proto, type;
 
 	ret = do_dump_opt(lfd, SOL_SOCKET, SO_PROTOCOL,
 					&proto, sizeof(proto));
 	if (ret)
 		goto err;
+	ret = do_dump_opt(lfd, SOL_SOCKET, SO_TYPE,
+			  &type, sizeof(type));
+	if (ret)
+		goto err;
 
-	if (!can_dump_ipproto(p->stat.st_ino, proto))
+	if (!can_dump_ipproto(p->stat.st_ino, proto, type))
 		goto err;
 
 	sk = (struct inet_sk_desc *)lookup_socket(p->stat.st_ino, family, proto);
@@ -359,7 +379,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
 	memcpy(ie.src_addr, sk->src_addr, pb_repeated_size(&ie, src_addr));
 	memcpy(ie.dst_addr, sk->dst_addr, pb_repeated_size(&ie, dst_addr));
 
-	if (dump_ip_opts(lfd, &ipopts))
+	if (dump_ip_opts(family, sk->type, lfd, &ipopts))
 		goto err;
 
 	if (dump_socket_opts(lfd, &skopts))
@@ -376,7 +396,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
 
 	switch (proto) {
 	case IPPROTO_TCP:
-		err = dump_one_tcp(lfd, sk);
+		err = (sk->type != SOCK_RAW) ? dump_one_tcp(lfd, sk) : 0;
 		break;
 	default:
 		err = 0;
@@ -540,12 +560,18 @@ static int post_open_inet_sk(struct file_desc *d, int sk)
 	return 0;
 }
 
-int restore_ip_opts(int sk, IpOptsEntry *ioe)
+int restore_ip_opts(int family, int sk, IpOptsEntry *ioe)
 {
 	int ret = 0;
 
 	if (ioe->has_freebind)
 		ret |= restore_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
+	if (ioe->has_nodefrag)
+		ret |= restore_opt(sk, SOL_IP, IP_NODEFRAG, &ioe->nodefrag);
+	if (ioe->has_hdrincl)
+		ret |= restore_opt(sk, family == AF_INET6 ? SOL_IPV6 : SOL_IP,
+				   family == AF_INET6 ? IPV6_HDRINCL : IP_HDRINCL,
+				   &ioe->hdrincl);
 
 	return ret;
 }
@@ -565,7 +591,7 @@ static int open_inet_sk(struct file_desc *d)
 		return -1;
 	}
 
-	if ((ie->type != SOCK_STREAM) && (ie->type != SOCK_DGRAM)) {
+	if ((ie->type != SOCK_STREAM) && (ie->type != SOCK_DGRAM) && (ie->type != SOCK_RAW)) {
 		pr_err("Unsupported socket type: %d\n", ie->type);
 		return -1;
 	}
@@ -641,7 +667,7 @@ done:
 	if (rst_file_params(sk, ie->fown, ie->flags))
 		goto err;
 
-	if (ie->ip_opts && restore_ip_opts(sk, ie->ip_opts))
+	if (ie->ip_opts && restore_ip_opts(ie->family, sk, ie->ip_opts))
 		goto err;
 
 	if (restore_socket_opts(sk, ie->opts))
@@ -713,7 +739,7 @@ int inet_bind(int sk, struct inet_sk_info *ii)
 	 * sockets could not be bound to them in this moment
 	 * without setting IP_FREEBIND.
 	 */
-	if (ii->ie->family == AF_INET6) {
+	if (ii->ie->family == AF_INET6 && ii->ie->proto != IPPROTO_RAW) {
 		int yes = 1;
 
 		if (restore_opt(sk, SOL_IP, IP_FREEBIND, &yes))
diff --git a/criu/sockets.c b/criu/sockets.c
index b5c03fdf2984..57fa4c16a646 100644
--- a/criu/sockets.c
+++ b/criu/sockets.c
@@ -57,9 +57,11 @@ enum socket_cl_bits
 	INET_TCP_CL_BIT,
 	INET_UDP_CL_BIT,
 	INET_UDPLITE_CL_BIT,
+	INET_RAW_CL_BIT,
 	INET6_TCP_CL_BIT,
 	INET6_UDP_CL_BIT,
 	INET6_UDPLITE_CL_BIT,
+	INET6_RAW_CL_BIT,
 	UNIX_CL_BIT,
 	PACKET_CL_BIT,
 	_MAX_CL_BIT,
@@ -85,6 +87,8 @@ enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsigned int proto)
 			return INET_UDP_CL_BIT;
 		if (proto == IPPROTO_UDPLITE)
 			return INET_UDPLITE_CL_BIT;
+		if (proto == IPPROTO_RAW)
+			return INET_RAW_CL_BIT;
 	}
 	if (family == AF_INET6) {
 		if (proto == IPPROTO_TCP)
@@ -93,6 +97,8 @@ enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsigned int proto)
 			return INET6_UDP_CL_BIT;
 		if (proto == IPPROTO_UDPLITE)
 			return INET6_UDPLITE_CL_BIT;
+		if (proto == IPPROTO_RAW)
+			return INET6_RAW_CL_BIT;
 	}
 
 	pr_err("Unknown pair family %d proto %d\n", family, proto);
@@ -593,6 +599,9 @@ static int inet_receive_one(struct nlmsghdr *h, void *arg)
 	case IPPROTO_TCP:
 		type = SOCK_STREAM;
 		break;
+	case IPPROTO_RAW:
+		type = SOCK_RAW;
+		break;
 	case IPPROTO_UDP:
 	case IPPROTO_UDPLITE:
 		type = SOCK_DGRAM;
@@ -614,6 +623,14 @@ static int do_collect_req(int nl, struct sock_diag_req *req, int size,
 
 	if (tmp == 0)
 		set_collect_bit(req->r.n.sdiag_family, req->r.n.sdiag_protocol);
+	else if (tmp == -ENOENT &&
+		 ((req->r.n.sdiag_family == AF_INET ||
+		   req->r.n.sdiag_family == AF_INET6) &&
+		  req->r.n.sdiag_protocol == IPPROTO_RAW)) {
+		pr_warn("No support for DIAG module on family %s with protocol IPPROTO_RAW, may fail later\n",
+			req->r.n.sdiag_family == AF_INET ? "IPv4" : "IPv6");
+		tmp = 0;
+	}
 
 	return tmp;
 }
@@ -668,6 +685,15 @@ int collect_sockets(struct ns_id *ns)
 	if (tmp)
 		err = tmp;
 
+	/* Collect IPv4 RAW sockets */
+	req.r.i.sdiag_family	= AF_INET;
+	req.r.i.sdiag_protocol	= IPPROTO_RAW;
+	req.r.i.idiag_ext	= 0;
+	req.r.i.idiag_states	= -1; /* All */
+	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
+	if (tmp)
+		err = tmp;
+
 	/* Collect IPv6 TCP sockets */
 	req.r.i.sdiag_family	= AF_INET6;
 	req.r.i.sdiag_protocol	= IPPROTO_TCP;
@@ -696,6 +722,15 @@ int collect_sockets(struct ns_id *ns)
 	if (tmp)
 		err = tmp;
 
+	/* Collect IPv6 RAW sockets */
+	req.r.i.sdiag_family	= AF_INET6;
+	req.r.i.sdiag_protocol	= IPPROTO_RAW;
+	req.r.i.idiag_ext	= 0;
+	req.r.i.idiag_states	= -1; /* All */
+	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
+	if (tmp)
+		err = tmp;
+
 	req.r.p.sdiag_family	= AF_PACKET;
 	req.r.p.sdiag_protocol	= 0;
 	req.r.p.pdiag_show	= PACKET_SHOW_INFO | PACKET_SHOW_MCLIST |
diff --git a/images/sk-inet.proto b/images/sk-inet.proto
index 01dda875a247..6c5b8df585e7 100644
--- a/images/sk-inet.proto
+++ b/images/sk-inet.proto
@@ -6,6 +6,8 @@ import "sk-opts.proto";
 
 message ip_opts_entry {
 	optional bool		freebind	= 1;
+	optional bool		hdrincl		= 2;
+	optional bool		nodefrag	= 3;
 }
 
 message inet_sk_entry {
-- 
2.7.4



More information about the CRIU mailing list