[CRIU] [PATCH v9 6/9] net: add support for macvlan link types

Pavel Emelyanov xemul at virtuozzo.com
Thu Oct 20 05:23:20 PDT 2016


> @@ -1004,6 +1077,218 @@ static int changeflags(int s, char *name, short flags)
>  	return 0;
>  }
>  
> +static int macvlan_link_info(NetDeviceEntry *nde, struct newlink_req *req)
> +{
> +	struct rtattr *macvlan_data;
> +	MacvlanLinkEntry *macvlan = nde->macvlan;
> +
> +	if (!macvlan) {
> +		pr_err("Missing macvlan link entry %d\n", nde->ifindex);
> +		return -1;
> +	}
> +
> +	addattr_l(&req->h, sizeof(*req), IFLA_INFO_KIND, "macvlan", 7);
> +
> +	macvlan_data = NLMSG_TAIL(&req->h);
> +	addattr_l(&req->h, sizeof(*req), IFLA_INFO_DATA, NULL, 0);
> +
> +	addattr_l(&req->h, sizeof(*req), IFLA_MACVLAN_MODE, &macvlan->mode, sizeof(macvlan->mode));
> +
> +	if (macvlan->has_flags)
> +		addattr_l(&req->h, sizeof(*req), IFLA_MACVLAN_FLAGS, &macvlan->flags, sizeof(macvlan->flags));
> +
> +	macvlan_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)macvlan_data;
> +
> +	return 0;
> +}
> +
> +static int userns_restore_one_link(void *arg, int fd, pid_t pid)
> +{
> +	int nlsk, ret;
> +	struct newlink_req *req = arg;
> +
> +	nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> +	if (nlsk < 0) {
> +		pr_perror("Can't create nlk socket");
> +		return -1;
> +	}
> +
> +	addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &fd, sizeof(fd));
> +
> +	ret = do_rtnl_req(nlsk, req, req->h.nlmsg_len, restore_link_cb, NULL, NULL);
> +	close(nlsk);
> +	return ret;
> +}
> +
> +static int get_nsid_cb(struct nlmsghdr *nlh, void *arg)
> +{
> +	struct rtgenmsg *rthdr;
> +	struct rtattr *rta;
> +	int len, *netnsid = arg;
> +
> +	rthdr = NLMSG_DATA(nlh);
> +	len = nlh->nlmsg_len - NLMSG_SPACE(sizeof(*rthdr));
> +
> +	if (len < 0)
> +		return -1;
> +
> +	rta = NETNS_RTA(rthdr);
> +
> +	while (RTA_OK(rta, len)) {
> +		if (rta->rta_type == NETNSA_NSID)
> +			*netnsid = *((int *) RTA_DATA(rta));
> +		rta = RTA_NEXT(rta, len);
> +	}
> +
> +	if (netnsid < 0) {
> +		pr_err("Didn't get a netnsid back from netlink?\n");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int get_criu_netnsid(int nlsk)
> +{
> +	static int netnsid = -1;
> +	struct {
> +		struct nlmsghdr n;
> +		struct rtgenmsg g;
> +		char buf[1024];
> +	} req;
> +	int ns_fd = get_service_fd(NS_FD_OFF), i;
> +
> +	if (netnsid > 0)
> +		return netnsid;
> +
> +	for (i = 0; i < 10; i++) {
> +		int ret;
> +
> +		memset(&req, 0, sizeof(req));
> +
> +		req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.g));
> +		req.n.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK;
> +		req.n.nlmsg_type = RTM_NEWNSID;
> +		req.n.nlmsg_seq = CR_NLMSG_SEQ;
> +
> +		addattr_l(&req.n, sizeof(req), NETNSA_FD, &ns_fd, sizeof(ns_fd));
> +		addattr_l(&req.n, sizeof(req), NETNSA_NSID, &i, sizeof(i));
> +
> +		ret = do_rtnl_req(nlsk, &req, req.n.nlmsg_len, NULL, NULL, NULL);
> +		if (ret < 0) {
> +			if (ret == -EEXIST) {
> +				req.n.nlmsg_type = RTM_GETNSID;
> +				ret = do_rtnl_req(nlsk, &req, req.n.nlmsg_len, get_nsid_cb, NULL, &netnsid);
> +				if (ret < 0) {
> +					pr_err("Couldn't get netnsid: %d\n", ret);
> +					return -1;
> +				}
> +
> +				return netnsid;
> +			}
> +			errno = -ret;
> +			pr_perror("couldn't create new netnsid");
> +			return -1;
> +		}
> +
> +		netnsid = i;
> +		return netnsid;
> +	}
> +
> +	pr_err("tried to create too many netnsids\n");
> +	return -1;
> +}
> +
> +static int restore_one_macvlan(NetDeviceEntry *nde, int nlsk)
> +{
> +	struct newlink_extras extras = {
> +		.netns_id = -1,
> +		.link = -1,
> +		.target_netns = -1,
> +	};
> +	char key[100], *val;
> +	int my_netns = -1, ret = -1, s;
> +
> +	snprintf(key, sizeof(key), "macvlan[%s]", nde->name);
> +	val = external_lookup_data(key);
> +	if (IS_ERR_OR_NULL(val)) {
> +		pr_err("a macvlan parent for %s is required\n", nde->name);
> +		return -1;
> +	}
> +
> +	extras.link = (int) (unsigned long) val;
> +
> +	extras.netns_id = get_criu_netnsid(nlsk);
> +	if (extras.netns_id < 0) {
> +		pr_err("failed to get criu's netnsid\n");
> +		return -1;
> +	}

OK, so .link and .netns_id are both used in userns and non-userns case
to identify the master device. Right?

Please, add a code comment about it.

> +
> +	my_netns = open_proc(PROC_SELF, "ns/net");
> +	if (my_netns < 0) {
> +		pr_perror("couldn't get my netns");
> +		return -1;
> +	}
> +
> +	if (root_ns_mask & CLONE_NEWUSER) {
> +		struct newlink_req req;
> +
> +		if (populate_newlink_req(&req, RTM_NEWLINK, nde, macvlan_link_info, &extras) < 0)
> +			goto out;
> +

You don't put the my_netns into extras since it's a file descriptor and
you need to set up IFLA_NET_NS_FD to the usernsD's value, right?

Please, add a code comment about it too.

> +		if (userns_call(userns_restore_one_link, 0, &req, sizeof(req), my_netns) < 0) {
> +			pr_err("couldn't restore macvlan interface %s via usernsd\n", nde->name);
> +			goto out;
> +		}
> +	} else {
> +		int root_nlsk, ns_fd = get_service_fd(NS_FD_OFF);
> +
> +		extras.target_netns = my_netns;
> +
> +		if (setns(ns_fd, CLONE_NEWNET) < 0) {
> +			pr_perror("couldn't setns to parent ns");
> +			goto out;
> +		}
> +
> +		root_nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> +
> +		if (setns(my_netns, CLONE_NEWNET) < 0) {
> +			close(root_nlsk);
> +			pr_perror("couldn't setns back to my ns");
> +			goto out;
> +		}

Damn, we only need these two netns-s to create nlsk in criu net namespace, do we?
That's too bad :(

The proper fix as I see it would be to keep ns_id corresponding to this netns here
in this routine and create the socket only once for the first macvlan we meet and
keep on that ns_id. This would work as this routine is only called in the context 
of the root_item.

The problem here is that we don't allocate ns_id-s for net namespaces on restore,
only for mount namespaces. But :) Andrey's set with nested netns-s changes this.
And coordinating your set and Andrey's would slow things down.

So in order to move forward, can you please keep the criu ns's socket in static
variable somewhere on restore_links() stack with an FIXME comment saying that
this crap should be moved on ns_id?

And one tiny comment below :)

> +
> +		if (root_nlsk < 0) {
> +			pr_perror("Can't create nlk socket");
> +			goto out;
> +		}
> +
> +		ret = restore_one_link(nde, root_nlsk, macvlan_link_info, &extras);
> +		close(root_nlsk);
> +		if (ret < 0)
> +			return -1;
> +	}
> +
> +	/* We have to change the flags of the NDE manually here because
> +	 * we used IFLA_LINK_NETNSID to restore it, which creates the
> +	 * device and then shuts it down when it changes the device's
> +	 * namespace, but doesn't start it back up when it goes to the
> +	 * other namespace. So, we restore its state here.
> +	 */
> +	s = socket(AF_LOCAL, SOCK_STREAM, 0);
> +	if (s < 0) {
> +		pr_perror("couldn't open socket for flag changing");
> +		goto out;
> +	}
> +	ret = changeflags(s, nde->name, nde->flags);
> +	close(s);
> +
> +out:
> +	if (my_netns >= 0)
> +		close(my_netns);
> +	return ret;
> +}
> +
>  static int restore_link(NetDeviceEntry *nde, int nlsk)
>  {
>  	pr_info("Restoring link %s type %d\n", nde->name, nde->type);
> @@ -1013,14 +1298,15 @@ static int restore_link(NetDeviceEntry *nde, int nlsk)
>  	case ND_TYPE__EXTLINK:  /* see comment in images/netdev.proto */
>  		return restore_link_parms(nde, nlsk);
>  	case ND_TYPE__VENET:
> -		return restore_one_link(nde, nlsk, venet_link_info);
> +		return restore_one_link(nde, nlsk, venet_link_info, NULL);

Can we have restore_one_link()'s new argument in separate patch like you did it
in patch #1 adding (NULL at that time) arg for dump() callback?

>  	case ND_TYPE__VETH:
> -		return restore_one_link(nde, nlsk, veth_link_info);
> +		return restore_one_link(nde, nlsk, veth_link_info, NULL);
>  	case ND_TYPE__TUN:
>  		return restore_one_tun(nde, nlsk);
>  	case ND_TYPE__BRIDGE:
> -		return restore_one_link(nde, nlsk, bridge_link_info);
> -
> +		return restore_one_link(nde, nlsk, bridge_link_info, NULL);
> +	case ND_TYPE__MACVLAN:
> +		return restore_one_macvlan(nde, nlsk);
>  	default:
>  		pr_err("Unsupported link type %d\n", nde->type);
>  		break;



More information about the CRIU mailing list