[CRIU] [RFC] sk-inet: Add support for raw sockets

Cyrill Gorcunov gorcunov at openvz.org
Mon Sep 12 01:43:15 PDT 2016


For raw sockets we need DIAG module extension, so in case
if we're failing while collecting socket don't exit with
error but warn a user and if we really meet raw socket
we will exit later on socket's lookup stage.

Strictly speaking we can use procfs parsing instead but
this gonna be a way more complex that well-known diag
approach and taking into account that raw sockets are
note that widely used lets support only when diag module
is present in the system.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
This patch requires testing on which I'm working now,
also need to examinate if we need to extend inkernel
code for raw sockets queues to peek data. Thus just
RFC for ealy review sake.

 criu/include/sk-inet.h | 12 ++++++++++++
 criu/sk-inet.c         | 37 ++++++++++++++++++++++++++++---------
 criu/sockets.c         | 35 +++++++++++++++++++++++++++++++++++
 images/sk-inet.proto   |  2 ++
 4 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h
index 9d2bda6e01f5..721638663498 100644
--- a/criu/include/sk-inet.h
+++ b/criu/include/sk-inet.h
@@ -17,6 +17,18 @@
 #define TCP_REPAIR_OPTIONS	22
 #endif
 
+#ifndef IP_HDRINCL
+# define IP_HDRINCL		3
+#endif
+
+#ifndef IP_NODEFRAG
+# define IP_NODEFRAG		22
+#endif
+
+#ifndef IPV6_HDRINCL
+# define IPV6_HDRINCL		36
+#endif
+
 struct inet_sk_desc {
 	struct socket_desc	sd;
 	unsigned int		type;
diff --git a/criu/sk-inet.c b/criu/sk-inet.c
index f4bc51923de9..55677d2fba2f 100644
--- a/criu/sk-inet.c
+++ b/criu/sk-inet.c
@@ -101,6 +101,7 @@ static int can_dump_ipproto(int ino, int proto)
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
 	case IPPROTO_UDPLITE:
+	case IPPROTO_RAW:
 		break;
 	default:
 		pr_err("Unsupported proto %d for socket %x\n", proto, ino);
@@ -134,7 +135,7 @@ static int can_dump_inet_sk(const struct inet_sk_desc *sk)
 		return 1;
 	}
 
-	if (sk->type != SOCK_STREAM) {
+	if (sk->type != SOCK_STREAM && sk->type != SOCK_RAW) {
 		pr_err("Can't dump %d inet socket %x. "
 				"Only can stream and dgram.\n",
 				sk->type, sk->sd.ino);
@@ -239,12 +240,24 @@ err:
 	return NULL;
 }
 
-static int dump_ip_opts(int sk, IpOptsEntry *ioe)
+
+static int dump_ip_opts(int family, int type, int sk, IpOptsEntry *ioe)
 {
 	int ret = 0;
 
-	ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
-	ioe->has_freebind = ioe->freebind;
+	if (type == SOCK_RAW) {
+		if (family == AF_INET6) {
+			ret |= dump_opt(sk, SOL_IPV6, IPV6_HDRINCL, &ioe->hdrincl);
+		} else {
+			ret |= dump_opt(sk, SOL_IP, IP_HDRINCL, &ioe->hdrincl);
+			ret |= dump_opt(sk, SOL_IP, IP_NODEFRAG, &ioe->nodefrag);
+			ioe->has_nodefrag = ioe->nodefrag;
+		}
+		ioe->has_hdrincl = ioe->hdrincl;
+	} else {
+		ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
+		ioe->has_freebind = ioe->freebind;
+	}
 
 	return ret;
 }
@@ -358,7 +371,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
 	memcpy(ie.src_addr, sk->src_addr, pb_repeated_size(&ie, src_addr));
 	memcpy(ie.dst_addr, sk->dst_addr, pb_repeated_size(&ie, dst_addr));
 
-	if (dump_ip_opts(lfd, &ipopts))
+	if (dump_ip_opts(family, sk->type, lfd, &ipopts))
 		goto err;
 
 	if (dump_socket_opts(lfd, &skopts))
@@ -539,12 +552,18 @@ static int post_open_inet_sk(struct file_desc *d, int sk)
 	return 0;
 }
 
-int restore_ip_opts(int sk, IpOptsEntry *ioe)
+int restore_ip_opts(int family, int sk, IpOptsEntry *ioe)
 {
 	int ret = 0;
 
 	if (ioe->has_freebind)
 		ret |= restore_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
+	if (ioe->has_nodefrag)
+		ret |= restore_opt(sk, SOL_IP, IP_NODEFRAG, &ioe->nodefrag);
+	if (ioe->has_hdrincl)
+		ret |= restore_opt(sk, family == AF_INET6 ? SOL_IPV6 : SOL_IP,
+				   family == AF_INET6 ? IPV6_HDRINCL : IP_HDRINCL,
+				   &ioe->hdrincl);
 
 	return ret;
 }
@@ -564,7 +583,7 @@ static int open_inet_sk(struct file_desc *d)
 		return -1;
 	}
 
-	if ((ie->type != SOCK_STREAM) && (ie->type != SOCK_DGRAM)) {
+	if ((ie->type != SOCK_STREAM) && (ie->type != SOCK_DGRAM) && (ie->type != SOCK_RAW)) {
 		pr_err("Unsupported socket type: %d\n", ie->type);
 		return -1;
 	}
@@ -640,7 +659,7 @@ done:
 	if (rst_file_params(sk, ie->fown, ie->flags))
 		goto err;
 
-	if (ie->ip_opts && restore_ip_opts(sk, ie->ip_opts))
+	if (ie->ip_opts && restore_ip_opts(ie->family, sk, ie->ip_opts))
 		goto err;
 
 	if (restore_socket_opts(sk, ie->opts))
@@ -712,7 +731,7 @@ int inet_bind(int sk, struct inet_sk_info *ii)
 	 * sockets could not be bound to them in this moment
 	 * without setting IP_FREEBIND.
 	 */
-	if (ii->ie->family == AF_INET6) {
+	if (ii->ie->family == AF_INET6 && ii->ie->proto != IPPROTO_RAW) {
 		int yes = 1;
 
 		if (restore_opt(sk, SOL_IP, IP_FREEBIND, &yes))
diff --git a/criu/sockets.c b/criu/sockets.c
index ffc4f39274b8..942736504fa2 100644
--- a/criu/sockets.c
+++ b/criu/sockets.c
@@ -54,9 +54,11 @@ enum socket_cl_bits
 	INET_TCP_CL_BIT,
 	INET_UDP_CL_BIT,
 	INET_UDPLITE_CL_BIT,
+	INET_RAW_CL_BIT,
 	INET6_TCP_CL_BIT,
 	INET6_UDP_CL_BIT,
 	INET6_UDPLITE_CL_BIT,
+	INET6_RAW_CL_BIT,
 	UNIX_CL_BIT,
 	PACKET_CL_BIT,
 	_MAX_CL_BIT,
@@ -82,6 +84,8 @@ enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsigned int proto)
 			return INET_UDP_CL_BIT;
 		if (proto == IPPROTO_UDPLITE)
 			return INET_UDPLITE_CL_BIT;
+		if (proto == IPPROTO_RAW)
+			return INET_RAW_CL_BIT;
 	}
 	if (family == AF_INET6) {
 		if (proto == IPPROTO_TCP)
@@ -90,6 +94,8 @@ enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsigned int proto)
 			return INET6_UDP_CL_BIT;
 		if (proto == IPPROTO_UDPLITE)
 			return INET6_UDPLITE_CL_BIT;
+		if (proto == IPPROTO_RAW)
+			return INET6_RAW_CL_BIT;
 	}
 
 	pr_err("Unknown pair family %d proto %d\n", family, proto);
@@ -590,6 +596,9 @@ static int inet_receive_one(struct nlmsghdr *h, void *arg)
 	case IPPROTO_TCP:
 		type = SOCK_STREAM;
 		break;
+	case IPPROTO_RAW:
+		type = SOCK_RAW;
+		break;
 	case IPPROTO_UDP:
 	case IPPROTO_UDPLITE:
 		type = SOCK_DGRAM;
@@ -611,6 +620,14 @@ static int do_collect_req(int nl, struct sock_diag_req *req, int size,
 
 	if (tmp == 0)
 		set_collect_bit(req->r.n.sdiag_family, req->r.n.sdiag_protocol);
+	else if (tmp == -ENOENT &&
+		 ((req->r.n.sdiag_family == AF_INET ||
+		   req->r.n.sdiag_family == AF_INET6) &&
+		  req->r.n.sdiag_protocol == IPPROTO_RAW)) {
+		pr_warn("No support for DIAG module on family %s with protocol IPPROTO_RAW, may fail later\n",
+			req->r.n.sdiag_family == AF_INET ? "IPv4" : "IPv6");
+		tmp = 0;
+	}
 
 	return tmp;
 }
@@ -665,6 +682,15 @@ int collect_sockets(struct ns_id *ns)
 	if (tmp)
 		err = tmp;
 
+	/* Collect IPv4 RAW sockets */
+	req.r.i.sdiag_family	= AF_INET;
+	req.r.i.sdiag_protocol	= IPPROTO_RAW;
+	req.r.i.idiag_ext	= 0;
+	req.r.i.idiag_states	= -1; /* All */
+	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
+	if (tmp)
+		err = tmp;
+
 	/* Collect IPv6 TCP sockets */
 	req.r.i.sdiag_family	= AF_INET6;
 	req.r.i.sdiag_protocol	= IPPROTO_TCP;
@@ -693,6 +719,15 @@ int collect_sockets(struct ns_id *ns)
 	if (tmp)
 		err = tmp;
 
+	/* Collect IPv6 RAW sockets */
+	req.r.i.sdiag_family	= AF_INET6;
+	req.r.i.sdiag_protocol	= IPPROTO_RAW;
+	req.r.i.idiag_ext	= 0;
+	req.r.i.idiag_states	= -1; /* All */
+	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
+	if (tmp)
+		err = tmp;
+
 	req.r.p.sdiag_family	= AF_PACKET;
 	req.r.p.sdiag_protocol	= 0;
 	req.r.p.pdiag_show	= PACKET_SHOW_INFO | PACKET_SHOW_MCLIST |
diff --git a/images/sk-inet.proto b/images/sk-inet.proto
index 01dda875a247..6c5b8df585e7 100644
--- a/images/sk-inet.proto
+++ b/images/sk-inet.proto
@@ -6,6 +6,8 @@ import "sk-opts.proto";
 
 message ip_opts_entry {
 	optional bool		freebind	= 1;
+	optional bool		hdrincl		= 2;
+	optional bool		nodefrag	= 3;
 }
 
 message inet_sk_entry {
-- 
2.7.4



More information about the CRIU mailing list