[CRIU] [PATCH 4/5] inet: remember ipv6 connections' ifindex for restore
Andrew Vagin
avagin at virtuozzo.com
Tue Nov 24 03:08:16 PST 2015
On Mon, Nov 23, 2015 at 01:55:27PM -0700, Tycho Andersen wrote:
> For some ipv6 scope types (link local, amongo others), we need to tell the
> kernel the ifindex of the interface whose address we want to bind to as
> well as the address itself.
>
Can we use SO_BINDTODEVICE to get this information?
diff --git a/test/zdtm/live/static/bridge.c b/test/zdtm/live/static/bridge.c
index 06bff50..5bb9c40 100644
--- a/test/zdtm/live/static/bridge.c
+++ b/test/zdtm/live/static/bridge.c
@@ -77,6 +77,13 @@ int main(int argc, char **argv)
goto out;
}
+ {
+ char name[1024];
+ socklen_t len = 1024;
+ getsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, &name, &len);
+ pr_err("%s\n", name);
+ }
+
/* Here, we grep for inet because some of the IPV6 DAD stuff can be
* racy, and all we really care about is that the bridge got restored
* with the right MAC, since we know DAD will succeed eventually.
[root at fc22-vm criu]# cat test/zdtm/live/static/bridge.out
10:58:52.252: 4: ERR: bridge.c:84: zdtmbr0
10:58:52.261: 4: PASS
> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
> include/namespaces.h | 1 +
> include/net.h | 2 ++
> net.c | 42 +++++++++++++++++++++++++++++++++++++-----
> protobuf/sk-inet.proto | 1 +
> sk-inet.c | 21 +++++++++++++++++----
> 5 files changed, 58 insertions(+), 9 deletions(-)
>
> diff --git a/include/namespaces.h b/include/namespaces.h
> index c655890..5544c1c 100644
> --- a/include/namespaces.h
> +++ b/include/namespaces.h
> @@ -42,6 +42,7 @@ struct ns_id {
>
> struct {
> int nlsk; /* for sockets collection */
> + int rtsk; /* for v6 address collection */
> int seqsk; /* to talk to parasite daemons */
> } net;
> };
> diff --git a/include/net.h b/include/net.h
> index bcf0d4a..34d9788 100644
> --- a/include/net.h
> +++ b/include/net.h
> @@ -33,4 +33,6 @@ extern int move_veth_to_bridge(void);
> #define PB_ALEN_INET 1
> #define PB_ALEN_INET6 4
>
> +extern int find_ifindex(void *v6addr, u32 *ifindex);
> +
> #endif /* __CR_NET_H__ */
> diff --git a/net.c b/net.c
> index 1b9691b..86c0376 100644
> --- a/net.c
> +++ b/net.c
> @@ -245,6 +245,23 @@ static int attach_v6_addresses(NetDeviceEntry *nde)
> return 0;
> }
>
> +int find_ifindex(void *v6addr, u32 *ifindex)
> +{
> + int i;
> +
> + for (i = 0; i < n_v6_addresses; i++) {
> + V6Address *cur = &v6_addresses[i];
> +
> + if (memcmp(cur->addr, v6addr, IPV6_ADDRLEN))
> + continue;
> +
> + *ifindex = cur->ifindex;
> + return 0;
> + }
> +
> + return -1;
> +}
> +
> static int dump_one_netdev(int type, struct ifinfomsg *ifi,
> struct rtattr **tb, struct cr_imgset *fds,
> int (*dump)(NetDeviceEntry *, struct cr_imgset *))
> @@ -462,11 +479,6 @@ static int dump_links(struct cr_imgset *fds)
> goto out;
> }
>
> - if (collect_v6_addresses(sk) < 0) {
> - pr_err("getting v6 addresses failed\n");
> - return -1;
> - }
> -
> memset(&req, 0, sizeof(req));
> req.nlh.nlmsg_len = sizeof(req);
> req.nlh.nlmsg_type = RTM_GETLINK;
> @@ -1297,6 +1309,13 @@ static int prep_ns_sockets(struct ns_id *ns, bool for_dump)
> } else
> ns->net.nlsk = -1;
>
> + ret = ns->net.rtsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> + if (ret < 0) {
> + pr_perror("can't open rtnl socket for v6 addr dump");
> + goto err_sq;
> + }
> +
> +
> ret = ns->net.seqsk = socket(PF_UNIX, SOCK_SEQPACKET, 0);
> if (ret < 0) {
> pr_perror("Can't create seqsk for parasite");
> @@ -1332,6 +1351,19 @@ static int collect_net_ns(struct ns_id *ns, void *oarg)
> if (ret)
> return ret;
>
> + /* We need to collect the v6 addresses before we collect the sockets,
> + * because we need to find the ifindex for an address (so that we can
> + * bind() successfully on restore if something has bound to a v6
> + * address). Unfortunately, the inet_diag module doesn't export the
> + * ifindex of a connection, so we query all the addresses via netlink
> + * and keep track of their ifindicies so we can find them later. (This
> + * is no extra work, since we need all this information for
> + * attach_v6_addresses anyways; we just have to do it earlier.)
> + */
> + ret = collect_v6_addresses(ns->net.rtsk);
> + if (ret)
> + return ret;
> +
> if (!for_dump)
> return 0;
>
> diff --git a/protobuf/sk-inet.proto b/protobuf/sk-inet.proto
> index ad49928..2d7527f 100644
> --- a/protobuf/sk-inet.proto
> +++ b/protobuf/sk-inet.proto
> @@ -33,4 +33,5 @@ message inet_sk_entry {
> required sk_opts_entry opts = 14;
> optional bool v6only = 15;
> optional ip_opts_entry ip_opts = 16;
> + optional uint32 ifindex = 17;
> }
> diff --git a/sk-inet.c b/sk-inet.c
> index 9194dbc..42234d0 100644
> --- a/sk-inet.c
> +++ b/sk-inet.c
> @@ -296,6 +296,13 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
>
> ie.v6only = val ? true : false;
> ie.has_v6only = true;
> +
> + /* ifindex only matters on source ports for bind, so let's
> + * find only that ifindex. */
> + if (find_ifindex(sk->src_addr, &ie.ifindex) < 0)
> + pr_warn("ifindex for %d not found, can't bind\n", ie.id);
> + else
> + ie.has_ifindex = true;
> }
>
> ie.src_addr = xmalloc(pb_repeated_size(&ie, src_addr));
> @@ -605,7 +612,7 @@ union sockaddr_inet {
> };
>
> static int restore_sockaddr(union sockaddr_inet *sa,
> - int family, u32 pb_port, u32 *pb_addr)
> + int family, u32 pb_port, u32 *pb_addr, u32 ifindex)
> {
> BUILD_BUG_ON(sizeof(sa->v4.sin_addr.s_addr) > PB_ALEN_INET * sizeof(u32));
> BUILD_BUG_ON(sizeof(sa->v6.sin6_addr.s6_addr) > PB_ALEN_INET6 * sizeof(u32));
> @@ -623,6 +630,12 @@ static int restore_sockaddr(union sockaddr_inet *sa,
> sa->v6.sin6_family = AF_INET6;
> sa->v6.sin6_port = htons(pb_port);
> memcpy(sa->v6.sin6_addr.s6_addr, pb_addr, sizeof(sa->v6.sin6_addr.s6_addr));
> +
> + /* Here although the struct member is called scope_id, the
> + * kernel really wants ifindex. See
> + * /net/ipv6/af_inet6.c:inet6_bind for details.
> + */
> + sa->v6.sin6_scope_id = ifindex;
> return sizeof(sa->v6);
> }
>
> @@ -637,7 +650,7 @@ int inet_bind(int sk, struct inet_sk_info *ii)
> int addr_size;
>
> addr_size = restore_sockaddr(&addr, ii->ie->family,
> - ii->ie->src_port, ii->ie->src_addr);
> + ii->ie->src_port, ii->ie->src_addr, ii->ie->ifindex);
>
> /*
> * ipv6 addresses go through a “tentative” phase and
> @@ -661,7 +674,7 @@ int inet_bind(int sk, struct inet_sk_info *ii)
> }
>
> if (bind(sk, (struct sockaddr *)&addr, addr_size) == -1) {
> - pr_perror("Can't bind inet socket");
> + pr_perror("Can't bind inet socket (id %d)", ii->ie->id);
> return -1;
> }
>
> @@ -685,7 +698,7 @@ int inet_connect(int sk, struct inet_sk_info *ii)
> int addr_size;
>
> addr_size = restore_sockaddr(&addr, ii->ie->family,
> - ii->ie->dst_port, ii->ie->dst_addr);
> + ii->ie->dst_port, ii->ie->dst_addr, ii->ie->ifindex);
>
> if (connect(sk, (struct sockaddr *)&addr, addr_size) == -1) {
> pr_perror("Can't connect inet socket back");
> --
> 2.6.2
>
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
More information about the CRIU
mailing list