[CRIU] [PATCH 4/5] inet: remember ipv6 connections' ifindex for restore

Andrew Vagin avagin at virtuozzo.com
Tue Nov 24 03:08:16 PST 2015


On Mon, Nov 23, 2015 at 01:55:27PM -0700, Tycho Andersen wrote:
> For some ipv6 scope types (link local, amongo others), we need to tell the
> kernel the ifindex of the interface whose address we want to bind to as
> well as the address itself.
>

Can we use SO_BINDTODEVICE to get this information?

diff --git a/test/zdtm/live/static/bridge.c b/test/zdtm/live/static/bridge.c
index 06bff50..5bb9c40 100644
--- a/test/zdtm/live/static/bridge.c
+++ b/test/zdtm/live/static/bridge.c
@@ -77,6 +77,13 @@ int main(int argc, char **argv)
                goto out;
        }
 
+       {
+               char name[1024];
+               socklen_t len = 1024;
+               getsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, &name, &len);
+               pr_err("%s\n", name);
+       }
+
        /* Here, we grep for inet because some of the IPV6 DAD stuff can be
         * racy, and all we really care about is that the bridge got restored
         * with the right MAC, since we know DAD will succeed eventually.

[root at fc22-vm criu]# cat test/zdtm/live/static/bridge.out
10:58:52.252:     4: ERR: bridge.c:84: zdtmbr0
10:58:52.261:     4: PASS

> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
>  include/namespaces.h   |  1 +
>  include/net.h          |  2 ++
>  net.c                  | 42 +++++++++++++++++++++++++++++++++++++-----
>  protobuf/sk-inet.proto |  1 +
>  sk-inet.c              | 21 +++++++++++++++++----
>  5 files changed, 58 insertions(+), 9 deletions(-)
> 
> diff --git a/include/namespaces.h b/include/namespaces.h
> index c655890..5544c1c 100644
> --- a/include/namespaces.h
> +++ b/include/namespaces.h
> @@ -42,6 +42,7 @@ struct ns_id {
>  
>  		struct {
>  			int nlsk;	/* for sockets collection */
> +			int rtsk;	/* for v6 address collection */
>  			int seqsk;	/* to talk to parasite daemons */
>  		} net;
>  	};
> diff --git a/include/net.h b/include/net.h
> index bcf0d4a..34d9788 100644
> --- a/include/net.h
> +++ b/include/net.h
> @@ -33,4 +33,6 @@ extern int move_veth_to_bridge(void);
>  #define PB_ALEN_INET	1
>  #define PB_ALEN_INET6	4
>  
> +extern int find_ifindex(void *v6addr, u32 *ifindex);
> +
>  #endif /* __CR_NET_H__ */
> diff --git a/net.c b/net.c
> index 1b9691b..86c0376 100644
> --- a/net.c
> +++ b/net.c
> @@ -245,6 +245,23 @@ static int attach_v6_addresses(NetDeviceEntry *nde)
>  	return 0;
>  }
>  
> +int find_ifindex(void *v6addr, u32 *ifindex)
> +{
> +	int i;
> +
> +	for (i = 0; i < n_v6_addresses; i++) {
> +		V6Address *cur = &v6_addresses[i];
> +
> +		if (memcmp(cur->addr, v6addr, IPV6_ADDRLEN))
> +			continue;
> +
> +		*ifindex = cur->ifindex;
> +		return 0;
> +	}
> +
> +	return -1;
> +}
> +
>  static int dump_one_netdev(int type, struct ifinfomsg *ifi,
>  		struct rtattr **tb, struct cr_imgset *fds,
>  		int (*dump)(NetDeviceEntry *, struct cr_imgset *))
> @@ -462,11 +479,6 @@ static int dump_links(struct cr_imgset *fds)
>  		goto out;
>  	}
>  
> -	if (collect_v6_addresses(sk) < 0) {
> -		pr_err("getting v6 addresses failed\n");
> -		return -1;
> -	}
> -
>  	memset(&req, 0, sizeof(req));
>  	req.nlh.nlmsg_len = sizeof(req);
>  	req.nlh.nlmsg_type = RTM_GETLINK;
> @@ -1297,6 +1309,13 @@ static int prep_ns_sockets(struct ns_id *ns, bool for_dump)
>  	} else
>  		ns->net.nlsk = -1;
>  
> +	ret = ns->net.rtsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> +	if (ret < 0) {
> +		pr_perror("can't open rtnl socket for v6 addr dump");
> +		goto err_sq;
> +	}
> +
> +
>  	ret = ns->net.seqsk = socket(PF_UNIX, SOCK_SEQPACKET, 0);
>  	if (ret < 0) {
>  		pr_perror("Can't create seqsk for parasite");
> @@ -1332,6 +1351,19 @@ static int collect_net_ns(struct ns_id *ns, void *oarg)
>  	if (ret)
>  		return ret;
>  
> +	/* We need to collect the v6 addresses before we collect the sockets,
> +	 * because we need to find the ifindex for an address (so that we can
> +	 * bind() successfully on restore if something has bound to a v6
> +	 * address). Unfortunately, the inet_diag module doesn't export the
> +	 * ifindex of a connection, so we query all the addresses via netlink
> +	 * and keep track of their ifindicies so we can find them later. (This
> +	 * is no extra work, since we need all this information for
> +	 * attach_v6_addresses anyways; we just have to do it earlier.)
> +	 */
> +	ret = collect_v6_addresses(ns->net.rtsk);
> +	if (ret)
> +		return ret;
> +
>  	if (!for_dump)
>  		return 0;
>  
> diff --git a/protobuf/sk-inet.proto b/protobuf/sk-inet.proto
> index ad49928..2d7527f 100644
> --- a/protobuf/sk-inet.proto
> +++ b/protobuf/sk-inet.proto
> @@ -33,4 +33,5 @@ message inet_sk_entry {
>  	required sk_opts_entry		opts		= 14;
>  	optional bool			v6only		= 15;
>  	optional ip_opts_entry		ip_opts		= 16;
> +	optional uint32			ifindex		= 17;
>  }
> diff --git a/sk-inet.c b/sk-inet.c
> index 9194dbc..42234d0 100644
> --- a/sk-inet.c
> +++ b/sk-inet.c
> @@ -296,6 +296,13 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
>  
>  		ie.v6only = val ? true : false;
>  		ie.has_v6only = true;
> +
> +		/* ifindex only matters on source ports for bind, so let's
> +		 * find only that ifindex. */
> +		if (find_ifindex(sk->src_addr, &ie.ifindex) < 0)
> +			pr_warn("ifindex for %d not found, can't bind\n", ie.id);
> +		else
> +			ie.has_ifindex = true;
>  	}
>  
>  	ie.src_addr = xmalloc(pb_repeated_size(&ie, src_addr));
> @@ -605,7 +612,7 @@ union sockaddr_inet {
>  };
>  
>  static int restore_sockaddr(union sockaddr_inet *sa,
> -		int family, u32 pb_port, u32 *pb_addr)
> +		int family, u32 pb_port, u32 *pb_addr, u32 ifindex)
>  {
>  	BUILD_BUG_ON(sizeof(sa->v4.sin_addr.s_addr) > PB_ALEN_INET * sizeof(u32));
>  	BUILD_BUG_ON(sizeof(sa->v6.sin6_addr.s6_addr) > PB_ALEN_INET6 * sizeof(u32));
> @@ -623,6 +630,12 @@ static int restore_sockaddr(union sockaddr_inet *sa,
>  		sa->v6.sin6_family = AF_INET6;
>  		sa->v6.sin6_port = htons(pb_port);
>  		memcpy(sa->v6.sin6_addr.s6_addr, pb_addr, sizeof(sa->v6.sin6_addr.s6_addr));
> +
> +		/* Here although the struct member is called scope_id, the
> +		 * kernel really wants ifindex. See
> +		 * /net/ipv6/af_inet6.c:inet6_bind for details.
> +		 */
> +		sa->v6.sin6_scope_id = ifindex;
>  		return sizeof(sa->v6);
>  	}
>  
> @@ -637,7 +650,7 @@ int inet_bind(int sk, struct inet_sk_info *ii)
>  	int addr_size;
>  
>  	addr_size = restore_sockaddr(&addr, ii->ie->family,
> -			ii->ie->src_port, ii->ie->src_addr);
> +			ii->ie->src_port, ii->ie->src_addr, ii->ie->ifindex);
>  
>  	/*
>  	 * ipv6 addresses go through a “tentative” phase and
> @@ -661,7 +674,7 @@ int inet_bind(int sk, struct inet_sk_info *ii)
>  	}
>  
>  	if (bind(sk, (struct sockaddr *)&addr, addr_size) == -1) {
> -		pr_perror("Can't bind inet socket");
> +		pr_perror("Can't bind inet socket (id %d)", ii->ie->id);
>  		return -1;
>  	}
>  
> @@ -685,7 +698,7 @@ int inet_connect(int sk, struct inet_sk_info *ii)
>  	int addr_size;
>  
>  	addr_size = restore_sockaddr(&addr, ii->ie->family,
> -			ii->ie->dst_port, ii->ie->dst_addr);
> +			ii->ie->dst_port, ii->ie->dst_addr, ii->ie->ifindex);
>  
>  	if (connect(sk, (struct sockaddr *)&addr, addr_size) == -1) {
>  		pr_perror("Can't connect inet socket back");
> -- 
> 2.6.2
> 
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list