[CRIU] [PATCH 3/5] net: add support for macvlan link types

Pavel Emelyanov xemul at virtuozzo.com
Wed Jun 15 05:00:37 PDT 2016


On 06/14/2016 07:45 PM, Tycho Andersen wrote:
> While this is in principle similar to how veths are handled, we have to do
> things in two different ways depending on whether or not there is a user
> namespace involved, because there is no way to ask the kernel to attach a
> macvlan NIC to a device in a net ns that we don't have CAP_NET_ADMIN in.
> 
> So we do it in two ways:
> 
> a. If we are in a user namespace, we create the device in usernsd and use
>    IFLA_NET_NS_PID to set the netns which it should be created in (saving
>    us a "move into this netns" step).
> 
> b. If we aren't in a user namespace, we could still be in a net namespace,
>    so we use IFLA_LINK_NETNSID to set namespace that the i/o device will be
>    in. This is sort of a hack, since we have to do it via NSID, which we
>    know is 0 because we don't allow namespace nesting.

Can we use IFLA_LINK_NETNSID even for the case "a"?

(more comments inline)

> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
>  criu/net.c           | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  images/Makefile      |   1 +
>  images/macvlan.proto |   4 ++
>  images/netdev.proto  |   6 +++
>  4 files changed, 143 insertions(+)
>  create mode 100644 images/macvlan.proto
> 
> diff --git a/criu/net.c b/criu/net.c
> index 9954270..793b625 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -476,6 +476,61 @@ static int dump_bridge(NetDeviceEntry *nde, struct cr_imgset *imgset, struct nla
>  	return write_netdev_img(nde, imgset, data);
>  }
>  
> +static int dump_macvlan(NetDeviceEntry *nde, struct cr_imgset *imgset, struct nlattr **tb)
> +{
> +	MacvlanLinkEntry macvlan = MACVLAN_LINK_ENTRY__INIT;
> +	int ret;
> +	struct nlattr *info[IFLA_INFO_MAX], *data[IFLA_MACVLAN_MAX];
> +
> +	if (!tb || !tb[IFLA_LINKINFO]) {
> +		pr_err("no for macvlan\n");
> +		return -1;
> +	}
> +
> +	if (tb[IFLA_LINK]) {
> +		nde->has_link = true;
> +		nde->link = *(int *)RTA_DATA(tb[IFLA_LINK]);
> +	}
> +
> +	if (tb[IFLA_LINK_NETNSID]) {
> +		nde->has_netns_id = true;
> +		nde->netns_id = *(int *)RTA_DATA(tb[IFLA_LINK_NETNSID]);
> +	}

This seems to be generic enough to be placed in dump_one_netdev.

> +
> +	ret = nla_parse_nested(info, IFLA_INFO_MAX, tb[IFLA_LINKINFO], NULL);
> +	if (ret < 0) {
> +		pr_err("failed to parse nested linkinfo\n");
> +		return -1;
> +	}
> +
> +	if (!info[IFLA_INFO_DATA]) {
> +		pr_err("no link info data for macvlan\n");
> +		return -1;
> +	}
> +
> +	ret = nla_parse_nested(data, IFLA_MACVLAN_MAX, info[IFLA_INFO_DATA], NULL);
> +	if (ret < 0) {
> +		pr_err("failed ot parse macvlan data\n");
> +		return -1;
> +	}
> +
> +	if (!data[IFLA_MACVLAN_MODE]) {
> +		pr_err("macvlan mode required for %s\n", nde->name);
> +		return -1;
> +	}
> +
> +	macvlan.mode = *((u32 *)RTA_DATA(data[IFLA_MACVLAN_MODE]));
> +
> +	if (data[IFLA_MACVLAN_FLAGS])
> +		macvlan.flags = *((u16 *) RTA_DATA(data[IFLA_MACVLAN_FLAGS]));
> +
> +	nde->macvlan = &macvlan;
> +	ret = write_netdev_img(nde, imgset, data);
> +
> +	nde->macvlan = NULL;
> +	return ret;

Just doing 'return write_netdev_img()' should be enough.

> +}
> +
>  static int dump_one_ethernet(struct ifinfomsg *ifi, char *kind,
>  		struct nlattr **tb, struct cr_imgset *fds)
>  {
> @@ -508,6 +563,8 @@ static int dump_one_ethernet(struct ifinfomsg *ifi, char *kind,
>  
>  		pr_warn("GRE tap device %s not supported natively\n", name);
>  	}
> +	if (!strcmp(kind, "macvlan"))
> +		return dump_one_netdev(ND_TYPE__MACVLAN, ifi, tb, fds, dump_macvlan);
>  
>  	return dump_unknown_device(ifi, kind, tb, fds);
>  }
> @@ -828,6 +885,22 @@ static int populate_newlink_req(struct newlink_req *req, int msg_type, NetDevice
>  		req->i.ifi_index = nde->ifindex;
>  	req->i.ifi_flags = nde->flags;
>  
> +	/* Note that this id isn't preserved anywhere, but since we don't
> +	 * support nested namespaces, right now there is only one peer
> +	 * namespace, the parent NS with an id of 0, so this works. In the
> +	 * future, we'll need to be more careful about munging this ID to be
> +	 * correct (or restoring namespaces in such a way that they get the
> +	 * same ID).
> +	 */
> +	if (nde->has_netns_id)
> +		addattr_l(&req->h, sizeof(*req), IFLA_LINK_NETNSID, &nde->netns_id, sizeof(nde->netns_id));
> +
> +	/* Like netns_id, this is not preserved across hosts (indeed, a link
> +	 * with this ifindex may not even exist). We add support for rewriting
> +	 * it in a later patch.
> +	 */
> +	if (nde->has_link)
> +		addattr_l(&req->h, sizeof(*req), IFLA_LINK, &nde->link, sizeof(nde->link));

Can you make these attrs setting and getting be symmetrincal -- either all
are get/set in macvlan dump/restore code, or all are get/set in generic code,
or one (netnsid?) is get/set in generic and the other in macvlan?

>  	addattr_l(&req->h, sizeof(*req), IFLA_IFNAME, nde->name, strlen(nde->name));
>  	addattr_l(&req->h, sizeof(*req), IFLA_MTU, &nde->mtu, sizeof(nde->mtu));
> @@ -949,6 +1022,49 @@ static int bridge_link_info(NetDeviceEntry *nde, struct newlink_req *req)
>  	return 0;
>  }
>  
> +static int macvlan_link_info(NetDeviceEntry *nde, struct newlink_req *req)
> +{
> +	struct rtattr *macvlan_data;
> +	MacvlanLinkEntry *macvlan = nde->macvlan;
> +
> +	if (!macvlan) {
> +		pr_err("Missing macvlan link entry %d\n", nde->ifindex);
> +		return -1;
> +	}
> +
> +	addattr_l(&req->h, sizeof(*req), IFLA_INFO_KIND, "macvlan", 7);
> +
> +	macvlan_data = NLMSG_TAIL(&req->h);
> +	addattr_l(&req->h, sizeof(*req), IFLA_INFO_DATA, NULL, 0);
> +
> +	addattr_l(&req->h, sizeof(*req), IFLA_MACVLAN_MODE, &macvlan->mode, sizeof(macvlan->mode));
> +
> +	if (macvlan->has_flags)
> +		addattr_l(&req->h, sizeof(*req), IFLA_MACVLAN_FLAGS, &macvlan->flags, sizeof(macvlan->flags));
> +
> +	macvlan_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)macvlan_data;
> +
> +	return 0;
> +}
> +
> +static int userns_restore_one_link(void *arg, int fd, pid_t pid)
> +{
> +	int nlsk, ret;
> +	struct newlink_req *req = arg;
> +
> +	nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> +	if (nlsk < 0) {
> +		pr_perror("Can't create nlk socket");
> +		return -1;
> +	}
> +
> +	addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_PID, &pid, sizeof(pid));
> +
> +	ret = do_rtnl_req(nlsk, req, req->h.nlmsg_len, restore_link_cb, NULL, NULL);
> +	close(nlsk);
> +	return ret;
> +}
> +
>  static int restore_link(NetDeviceEntry *nde, int nlsk)
>  {
>  	pr_info("Restoring link %s type %d\n", nde->name, nde->type);
> @@ -965,7 +1081,23 @@ static int restore_link(NetDeviceEntry *nde, int nlsk)
>  		return restore_one_tun(nde, nlsk);
>  	case ND_TYPE__BRIDGE:
>  		return restore_one_link(nde, nlsk, bridge_link_info);
> +	case ND_TYPE__MACVLAN: {
> +		if (root_ns_mask & CLONE_NEWNET) {

Shouldn't this be CLONE_NEWUSER instead?

> +			struct newlink_req req;
>  
> +			if (populate_newlink_req(&req, RTM_NEWLINK, nde, macvlan_link_info) < 0)
> +				return -1;
> +
> +			if (userns_call(userns_restore_one_link, 0, &req, sizeof(req), -1) < 0) {
> +				pr_err("couldn't restore macvlan interface %s via usernsd\n", nde->name);
> +				return -1;
> +			}
> +
> +			return 0;
> +		}
> +
> +		return restore_one_link(nde, nlsk, macvlan_link_info);
> +	}
>  	default:
>  		pr_err("Unsupported link type %d\n", nde->type);
>  		break;
> diff --git a/images/Makefile b/images/Makefile
> index cf50794..eb18526 100644
> --- a/images/Makefile
> +++ b/images/Makefile
> @@ -60,6 +60,7 @@ proto-obj-y	+= binfmt-misc.o
>  proto-obj-y	+= time.o
>  proto-obj-y	+= sysctl.o
>  proto-obj-y	+= autofs.o
> +proto-obj-y	+= macvlan.o
>  
>  CFLAGS		+= -iquote $(obj)/
>  
> diff --git a/images/macvlan.proto b/images/macvlan.proto
> new file mode 100644
> index 0000000..c9c9045
> --- /dev/null
> +++ b/images/macvlan.proto
> @@ -0,0 +1,4 @@
> +message macvlan_link_entry {
> +	required uint32	mode	= 1;
> +	optional uint32 flags	= 2;
> +}
> diff --git a/images/netdev.proto b/images/netdev.proto
> index 37cafb3..746db16 100644
> --- a/images/netdev.proto
> +++ b/images/netdev.proto
> @@ -1,3 +1,4 @@
> +import "macvlan.proto";
>  import "opts.proto";
>  import "tun.proto";
>  import "sysctl.proto";
> @@ -18,6 +19,7 @@ enum nd_type {
>  	 */
>  	VENET		= 5;
>  	BRIDGE		= 6;
> +	MACVLAN		= 7;
>  }
>  
>  message net_device_entry {
> @@ -36,6 +38,10 @@ message net_device_entry {
>  	repeated sysctl_entry conf4	= 9;
>  
>  	repeated sysctl_entry conf6	= 10;
> +
> +	optional int32			link		= 11;
> +	optional int32			netns_id	= 12;
> +	optional macvlan_link_entry	macvlan		= 13;
>  }
>  
>  message netns_entry {
> 



More information about the CRIU mailing list