[CRIU] [PATCH 1/2] sk-inet: Add initial support for raw sockets

Andrey Vagin avagin at virtuozzo.com
Wed Mar 15 15:33:18 PDT 2017


On Wed, Feb 22, 2017 at 04:53:27PM +0300, Cyrill Gorcunov wrote:
> For raw sockets we need DIAG module extension, so in case
> if we're failing while collecting socket don't exit with
> error but warn a user and if we really meet raw socket
> we will exit later on socket's lookup stage.
> 
> Strictly speaking we can use procfs parsing instead but
> this gonna be a way more complex that well-known diag
> approach and taking into account that raw sockets are
> note that widely used lets support only when diag module
> is present in the system.
> 
> In the patch the initial raw sockets support added
> compelte enough to handle SO_IP_SET request from
> ipset tool (needed by modern containers). But the
> code might need extention/fixes in future.
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
>  criu/cr-check.c        | 11 ++++++++++
>  criu/include/sk-inet.h | 12 +++++++++++
>  criu/sk-inet.c         | 58 ++++++++++++++++++++++++++++++++++++--------------
>  criu/sockets.c         | 35 ++++++++++++++++++++++++++++++
>  images/sk-inet.proto   |  2 ++
>  5 files changed, 102 insertions(+), 16 deletions(-)
> 
> diff --git a/criu/cr-check.c b/criu/cr-check.c
> index c8261255fdeb..3e487a774403 100644
> --- a/criu/cr-check.c
> +++ b/criu/cr-check.c
> @@ -49,6 +49,7 @@
>  #include "cr_options.h"
>  #include "libnetlink.h"
>  #include "net.h"
> +#include "inet_diag.h"
>  #include "linux/userfaultfd.h"
>  #include "restorer.h"
>  
> @@ -1090,6 +1091,14 @@ static int check_sk_netns(void)
>  	return 0;
>  }
>  
> +static int check_net_diag_raw(void)
> +{
> +	check_sock_diag();
> +	return !socket_test_collect_bit(AF_INET, IPPROTO_RAW) &&
> +		!socket_test_collect_bit(AF_INET6, IPPROTO_RAW);
> +}
> +
> +
>  static int check_compat_cr(void)
>  {
>  	if (kdat_compat_sigreturn_test())
> @@ -1202,6 +1211,7 @@ int cr_check(void)
>  		ret |= check_userns();
>  		ret |= check_loginuid();
>  		ret |= check_sk_netns();
> +		ret |= check_net_diag_raw();
>  	}
>  
>  	/*
> @@ -1254,6 +1264,7 @@ static struct feature_list feature_list[] = {
>  	{ "lazy_pages", check_uffd },
>  	{ "compat_cr", check_compat_cr },
>  	{ "sk_ns", check_sk_netns },
> +	{ "net_diag_raw", check_net_diag_raw },
>  	{ NULL, NULL },
>  };
>  
> diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h
> index bf6fb1d77ddf..5d996581c18d 100644
> --- a/criu/include/sk-inet.h
> +++ b/criu/include/sk-inet.h
> @@ -16,6 +16,18 @@
>  #define TCP_REPAIR_OPTIONS	22
>  #endif
>  
> +#ifndef IP_HDRINCL
> +# define IP_HDRINCL		3
> +#endif
> +
> +#ifndef IP_NODEFRAG
> +# define IP_NODEFRAG		22
> +#endif
> +
> +#ifndef IPV6_HDRINCL
> +# define IPV6_HDRINCL		36
> +#endif
> +
>  struct inet_sk_desc {
>  	struct socket_desc	sd;
>  	unsigned int		type;
> diff --git a/criu/sk-inet.c b/criu/sk-inet.c
> index ee6ce60e2b12..4efe6eb4cc11 100644
> --- a/criu/sk-inet.c
> +++ b/criu/sk-inet.c
> @@ -101,7 +101,7 @@ static void show_one_inet_img(const char *act, const InetSkEntry *e)
>  		e->state, src_addr);
>  }
>  
> -static int can_dump_ipproto(int ino, int proto)
> +static int can_dump_ipproto(int ino, int proto, int type)
>  {
>  	/* Make sure it's a proto we support */
>  	switch (proto) {
> @@ -111,8 +111,12 @@ static int can_dump_ipproto(int ino, int proto)
>  	case IPPROTO_UDPLITE:
>  		break;
>  	default:
> -		pr_err("Unsupported proto %d for socket %x\n", proto, ino);
> -		return 0;
> +		/* Raw sockets may have any protocol inside */
> +		if (type != SOCK_RAW) {

maybe we can check type out of this function?

> +			pr_err("Unsupported proto %d (type %d) for socket %x\n",
> +			       proto, type, ino);
> +			return 0;
> +		}
>  	}
>  
>  	return 1;
> @@ -142,9 +146,9 @@ static int can_dump_inet_sk(const struct inet_sk_desc *sk)
>  		return 1;
>  	}
>  
> -	if (sk->type != SOCK_STREAM) {
> +	if (sk->type != SOCK_STREAM && sk->type != SOCK_RAW) {
>  		pr_err("Can't dump %d inet socket %x. "
> -				"Only can stream and dgram.\n",
> +				"Only can stream, dgram and raw.\n",
>  				sk->type, sk->sd.ino);
>  		return 0;
>  	}
> @@ -288,12 +292,24 @@ err:
>  	return NULL;
>  }
>  
> -static int dump_ip_opts(int sk, IpOptsEntry *ioe)
> +
> +static int dump_ip_opts(int family, int type, int sk, IpOptsEntry *ioe)
>  {
>  	int ret = 0;
>  
> -	ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
> -	ioe->has_freebind = ioe->freebind;
> +	if (type == SOCK_RAW) {
> +		if (family == AF_INET6) {
> +			ret |= dump_opt(sk, SOL_IPV6, IPV6_HDRINCL, &ioe->hdrincl);
> +		} else {
> +			ret |= dump_opt(sk, SOL_IP, IP_HDRINCL, &ioe->hdrincl);
> +			ret |= dump_opt(sk, SOL_IP, IP_NODEFRAG, &ioe->nodefrag);
> +			ioe->has_nodefrag = ioe->nodefrag;
> +		}
> +		ioe->has_hdrincl = ioe->hdrincl;
> +	} else {
> +		ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);

is IP_FREEBIND not suitable for raw sockets?

> +		ioe->has_freebind = ioe->freebind;
> +	}
>  
>  	return ret;
>  }
> @@ -323,14 +339,18 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
>  	InetSkEntry ie = INET_SK_ENTRY__INIT;
>  	IpOptsEntry ipopts = IP_OPTS_ENTRY__INIT;
>  	SkOptsEntry skopts = SK_OPTS_ENTRY__INIT;
> -	int ret = -1, err = -1, proto;
> +	int ret = -1, err = -1, proto, type;
>  
>  	ret = do_dump_opt(lfd, SOL_SOCKET, SO_PROTOCOL,
>  					&proto, sizeof(proto));
>  	if (ret)
>  		goto err;
> +	ret = do_dump_opt(lfd, SOL_SOCKET, SO_TYPE,
> +			  &type, sizeof(type));
> +	if (ret)
> +		goto err;
>  
> -	if (!can_dump_ipproto(p->stat.st_ino, proto))
> +	if (!can_dump_ipproto(p->stat.st_ino, proto, type))

something like this

	if (type != SOCK_RAW && !can_dump_ipproto(p->stat.st_ino, proto, type))) {

>  		goto err;
>  
>  	sk = (struct inet_sk_desc *)lookup_socket(p->stat.st_ino, family, proto);
> @@ -410,7 +430,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
>  	memcpy(ie.src_addr, sk->src_addr, pb_repeated_size(&ie, src_addr));
>  	memcpy(ie.dst_addr, sk->dst_addr, pb_repeated_size(&ie, dst_addr));
>  
> -	if (dump_ip_opts(lfd, &ipopts))
> +	if (dump_ip_opts(family, sk->type, lfd, &ipopts))
>  		goto err;
>  
>  	if (dump_socket_opts(lfd, &skopts))
> @@ -424,7 +444,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
>  
>  	switch (proto) {
>  	case IPPROTO_TCP:
> -		err = dump_one_tcp(lfd, sk);
> +		err = (sk->type != SOCK_RAW) ? dump_one_tcp(lfd, sk) : 0;
>  		break;
>  	default:
>  		err = 0;
> @@ -606,12 +626,18 @@ static int post_open_inet_sk(struct file_desc *d, int sk)
>  	return 0;
>  }
>  
> -int restore_ip_opts(int sk, IpOptsEntry *ioe)
> +int restore_ip_opts(int family, int sk, IpOptsEntry *ioe)
>  {
>  	int ret = 0;
>  
>  	if (ioe->has_freebind)
>  		ret |= restore_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
> +	if (ioe->has_nodefrag)
> +		ret |= restore_opt(sk, SOL_IP, IP_NODEFRAG, &ioe->nodefrag);
> +	if (ioe->has_hdrincl)
> +		ret |= restore_opt(sk, family == AF_INET6 ? SOL_IPV6 : SOL_IP,
> +				   family == AF_INET6 ? IPV6_HDRINCL : IP_HDRINCL,
> +				   &ioe->hdrincl);
>  
>  	return ret;
>  }
> @@ -635,7 +661,7 @@ static int open_inet_sk(struct file_desc *d, int *new_fd)
>  		return -1;
>  	}
>  
> -	if ((ie->type != SOCK_STREAM) && (ie->type != SOCK_DGRAM)) {
> +	if ((ie->type != SOCK_STREAM) && (ie->type != SOCK_DGRAM) && (ie->type != SOCK_RAW)) {
>  		pr_err("Unsupported socket type: %d\n", ie->type);
>  		return -1;
>  	}
> @@ -713,7 +739,7 @@ done:
>  	if (rst_file_params(sk, ie->fown, ie->flags))
>  		goto err;
>  
> -	if (ie->ip_opts && restore_ip_opts(sk, ie->ip_opts))
> +	if (ie->ip_opts && restore_ip_opts(ie->family, sk, ie->ip_opts))
>  		goto err;
>  
>  	if (restore_socket_opts(sk, ie->opts))
> @@ -780,7 +806,7 @@ int inet_bind(int sk, struct inet_sk_info *ii)
>  	 * sockets could not be bound to them in this moment
>  	 * without setting IP_FREEBIND.
>  	 */
> -	if (ii->ie->family == AF_INET6) {
> +	if (ii->ie->family == AF_INET6 && ii->ie->proto != IPPROTO_RAW) {
>  		int yes = 1;
>  
>  		if (restore_opt(sk, SOL_IP, IP_FREEBIND, &yes))
> diff --git a/criu/sockets.c b/criu/sockets.c
> index 420eee8252e2..06938f27ce05 100644
> --- a/criu/sockets.c
> +++ b/criu/sockets.c
> @@ -61,9 +61,11 @@ enum socket_cl_bits
>  	INET_TCP_CL_BIT,
>  	INET_UDP_CL_BIT,
>  	INET_UDPLITE_CL_BIT,
> +	INET_RAW_CL_BIT,
>  	INET6_TCP_CL_BIT,
>  	INET6_UDP_CL_BIT,
>  	INET6_UDPLITE_CL_BIT,
> +	INET6_RAW_CL_BIT,
>  	UNIX_CL_BIT,
>  	PACKET_CL_BIT,
>  	_MAX_CL_BIT,
> @@ -89,6 +91,8 @@ enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsigned int proto)
>  			return INET_UDP_CL_BIT;
>  		if (proto == IPPROTO_UDPLITE)
>  			return INET_UDPLITE_CL_BIT;
> +		if (proto == IPPROTO_RAW)
> +			return INET_RAW_CL_BIT;
>  	}
>  	if (family == AF_INET6) {
>  		if (proto == IPPROTO_TCP)
> @@ -97,6 +101,8 @@ enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsigned int proto)
>  			return INET6_UDP_CL_BIT;
>  		if (proto == IPPROTO_UDPLITE)
>  			return INET6_UDPLITE_CL_BIT;
> +		if (proto == IPPROTO_RAW)
> +			return INET6_RAW_CL_BIT;
>  	}
>  
>  	pr_err("Unknown pair family %d proto %d\n", family, proto);
> @@ -598,6 +604,9 @@ static int inet_receive_one(struct nlmsghdr *h, struct ns_id *ns, void *arg)
>  	case IPPROTO_TCP:
>  		type = SOCK_STREAM;
>  		break;
> +	case IPPROTO_RAW:
> +		type = SOCK_RAW;
> +		break;
>  	case IPPROTO_UDP:
>  	case IPPROTO_UDPLITE:
>  		type = SOCK_DGRAM;
> @@ -620,6 +629,14 @@ static int do_collect_req(int nl, struct sock_diag_req *req, int size,
>  
>  	if (tmp == 0)
>  		set_collect_bit(req->r.n.sdiag_family, req->r.n.sdiag_protocol);
> +	else if (tmp == -ENOENT &&
> +		 ((req->r.n.sdiag_family == AF_INET ||
> +		   req->r.n.sdiag_family == AF_INET6) &&
> +		  req->r.n.sdiag_protocol == IPPROTO_RAW)) {
> +		pr_warn("No support for DIAG module on family %s with protocol IPPROTO_RAW, may fail later\n",
> +			req->r.n.sdiag_family == AF_INET ? "IPv4" : "IPv6");
> +		tmp = 0;


can you handle this error in collect_sockets() like we do for netlink
and packet sockers

> +	}
>  
>  	return tmp;
>  }
> @@ -677,6 +694,15 @@ int collect_sockets(struct ns_id *ns)
>  	if (tmp)
>  		err = tmp;
>  
> +	/* Collect IPv4 RAW sockets */
> +	req.r.i.sdiag_family	= AF_INET;
> +	req.r.i.sdiag_protocol	= IPPROTO_RAW;
> +	req.r.i.idiag_ext	= 0;
> +	req.r.i.idiag_states	= -1; /* All */
> +	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, ns, &req.r.i);
> +	if (tmp)
> +		err = tmp;
> +
>  	/* Collect IPv6 TCP sockets */
>  	req.r.i.sdiag_family	= AF_INET6;
>  	req.r.i.sdiag_protocol	= IPPROTO_TCP;
> @@ -708,6 +734,15 @@ int collect_sockets(struct ns_id *ns)
>  	if (tmp)
>  		err = tmp;
>  
> +	/* Collect IPv6 RAW sockets */
> +	req.r.i.sdiag_family	= AF_INET6;
> +	req.r.i.sdiag_protocol	= IPPROTO_RAW;
> +	req.r.i.idiag_ext	= 0;
> +	req.r.i.idiag_states	= -1; /* All */
> +	tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, ns, &req.r.i);
> +	if (tmp)
> +		err = tmp;
> +
>  	req.r.p.sdiag_family	= AF_PACKET;
>  	req.r.p.sdiag_protocol	= 0;
>  	req.r.p.pdiag_show	= PACKET_SHOW_INFO | PACKET_SHOW_MCLIST |
> diff --git a/images/sk-inet.proto b/images/sk-inet.proto
> index 09c5a47d2464..173c74a40df7 100644
> --- a/images/sk-inet.proto
> +++ b/images/sk-inet.proto
> @@ -6,6 +6,8 @@ import "sk-opts.proto";
>  
>  message ip_opts_entry {
>  	optional bool		freebind	= 1;
> +	optional bool		hdrincl		= 2;
> +	optional bool		nodefrag	= 3;
>  }
>  
>  message inet_sk_entry {
> -- 
> 2.7.4
> 


More information about the CRIU mailing list