[CRIU] [PATCH v2 5/6] net: add support for macvlan link types

Pavel Emelyanov xemul at virtuozzo.com
Mon Oct 17 04:09:22 PDT 2016


> diff --git a/Makefile.config b/Makefile.config
> index cce32fa..2259ad6 100644
> --- a/Makefile.config
> +++ b/Makefile.config
> @@ -28,7 +28,7 @@ export DEFINES += $(FEATURE_DEFINES)
>  export CFLAGS += $(FEATURE_DEFINES)
>  
>  FEATURES_LIST	:= TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \
> -	SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW
> +	SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW NET_NAMESPACE_H

Can we feature test in separate patch?

>  
>  # $1 - config name
>  define gen-feature-test
> diff --git a/criu/crtools.c b/criu/crtools.c
> index 8b5ec5d..8deb20a 100644
> --- a/criu/crtools.c
> +++ b/criu/crtools.c
> @@ -842,6 +842,7 @@ usage:
>  "                        Formats of RES on restore:\n"
>  "                            dev[VAL]:DEVPATH\n"
>  "                            veth[IFNAME]:OUTNAME{@BRIDGE}\n"
> +"                            macvlan[IFNAME]:OUTNAME\n"
>  "\n"
>  "* Special resources support:\n"
>  "     --" SK_EST_PARAM "  checkpoint/restore established TCP connections\n"
> diff --git a/criu/external.c b/criu/external.c
> index bc6c6d4..d8fee21 100644
> --- a/criu/external.c
> +++ b/criu/external.c
> @@ -3,6 +3,9 @@
>  #include "cr_options.h"
>  #include "xmalloc.h"
>  #include "external.h"
> +#include "util.h"
> +
> +#include "net.h"
>  
>  int add_external(char *key)
>  {
> @@ -12,6 +15,12 @@ int add_external(char *key)
>  	if (!ext)
>  		return -1;
>  	ext->id = key;
> +
> +	if (strstartswith(key, "macvlan") && macvlan_ext_add(ext) < 0) {
> +		xfree(ext);
> +		return -1;
> +	}
> +
>  	list_add(&ext->node, &opts.external);
>  
>  	return 0;
> diff --git a/criu/include/net.h b/criu/include/net.h
> index b4a6e99..f94d838 100644
> --- a/criu/include/net.h
> +++ b/criu/include/net.h
> @@ -4,6 +4,7 @@
>  #include <linux/netlink.h>
>  
>  #include "list.h"
> +#include "external.h"
>  
>  struct cr_imgset;
>  extern int dump_net_ns(int ns_id);
> @@ -17,6 +18,12 @@ struct veth_pair {
>  	char *bridge;
>  };
>  
> +struct macvlan_pair {
> +	struct list_head node;
> +	char *inside;
> +	int ifi_outside;
> +};

Unused.

> +
>  extern int collect_net_namespaces(bool for_dump);
>  
>  extern int network_lock(void);
> @@ -30,6 +37,7 @@ extern int read_ns_sys_file(char *path, char *buf, int len);
>  extern int restore_link_parms(NetDeviceEntry *nde, int nlsk);
>  
>  extern int veth_pair_add(char *in, char *out);
> +extern int macvlan_ext_add(struct external *ext);
>  extern int move_veth_to_bridge(void);
>  
>  #endif /* __CR_NET_H__ */
> diff --git a/criu/net.c b/criu/net.c
> index b5f9818..aac077c 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -10,6 +10,7 @@
>  #include <sys/wait.h>
>  #include <sched.h>
>  #include <sys/mount.h>
> +#include <sys/types.h>
>  #include <net/if.h>
>  #include <linux/sockios.h>
>  #include <libnl3/netlink/msg.h>
> @@ -34,6 +35,33 @@
>  #include "protobuf.h"
>  #include "images/netdev.pb-c.h"
>  
> +#ifdef CONFIG_HAS_NET_NAMESPACE_H
> +#include <linux/net_namespace.h>
> +#else
> +#define NETNSA_NSID	1
> +#define NETNSA_FD	3
> +#endif
> +
> +#ifndef IFLA_LINK_NETNSID
> +#define IFLA_LINK_NETNSID	37
> +#endif
> +
> +#ifndef RTM_NEWNSID
> +#define RTM_NEWNSID		88
> +#endif
> +
> +/* if we're being compiled with older headers, allow more macvlan attributes to
> + * be set in case we're talking to a newer kernel
> + */
> +#if IFLA_MACVLAN_MAX < 7
> +#undef IFLA_MACVLAN_MAX
> +#define IFLA_MACVLAN_MAX 7
> +#endif
> +
> +#ifndef IFLA_MACVLAN_FLAGS
> +#define IFLA_MACVLAN_FLAGS 2
> +#endif
> +
>  static int ns_sysfs_fd = -1;
>  
>  int read_ns_sys_file(char *path, char *buf, int len)
> @@ -354,6 +382,7 @@ static int dump_one_netdev(int type, struct ifinfomsg *ifi,
>  	SysctlEntry *confs6 = NULL;
>  	int size6 = ARRAY_SIZE(devconfs6);
>  	char stable_secret[MAX_STR_CONF_LEN + 1] = {};
> +	struct nlattr *info[IFLA_INFO_MAX], **arg = NULL;
>  
>  	if (!tb[IFLA_IFNAME]) {
>  		pr_err("No name for link %d\n", ifi->ifi_index);
> @@ -421,7 +450,16 @@ static int dump_one_netdev(int type, struct ifinfomsg *ifi,
>  	if (!dump)
>  		dump = write_netdev_img;
>  
> -	ret = dump(&netdev, fds, tb);
> +	if (tb[IFLA_LINKINFO]) {
> +		ret = nla_parse_nested(info, IFLA_INFO_MAX, tb[IFLA_LINKINFO], NULL);
> +		if (ret < 0) {
> +			pr_err("failed to parse nested linkinfo\n");
> +			return -1;
> +		}
> +		arg = info;

Shouldn't this rather be in dump_macvlan()? The tb is there already :)

> +	}
> +
> +	ret = dump(&netdev, fds, arg);
>  err_free:
>  	xfree(netdev.conf4);
>  	xfree(confs4);
> @@ -498,6 +536,37 @@ static int dump_bridge(NetDeviceEntry *nde, struct cr_imgset *imgset, struct nla
>  	return write_netdev_img(nde, imgset, data);
>  }
>  
> +static int dump_macvlan(NetDeviceEntry *nde, struct cr_imgset *imgset, struct nlattr **info)
> +{
> +	MacvlanLinkEntry macvlan = MACVLAN_LINK_ENTRY__INIT;
> +	int ret;
> +	struct nlattr *data[IFLA_MACVLAN_MAX];
> +
> +	if (!info || !info[IFLA_INFO_DATA]) {
> +		pr_err("no data for macvlan\n");
> +		return -1;
> +	}
> +
> +	ret = nla_parse_nested(data, IFLA_MACVLAN_MAX, info[IFLA_INFO_DATA], NULL);
> +	if (ret < 0) {
> +		pr_err("failed ot parse macvlan data\n");
> +		return -1;
> +	}
> +
> +	if (!data[IFLA_MACVLAN_MODE]) {
> +		pr_err("macvlan mode required for %s\n", nde->name);
> +		return -1;
> +	}
> +
> +	macvlan.mode = *((u32 *)RTA_DATA(data[IFLA_MACVLAN_MODE]));
> +
> +	if (data[IFLA_MACVLAN_FLAGS])
> +		macvlan.flags = *((u16 *) RTA_DATA(data[IFLA_MACVLAN_FLAGS]));
> +
> +	nde->macvlan = &macvlan;
> +	return write_netdev_img(nde, imgset, data);
> +}
> +
>  static int dump_one_ethernet(struct ifinfomsg *ifi, char *kind,
>  		struct nlattr **tb, struct cr_imgset *fds)
>  {
> @@ -530,6 +599,8 @@ static int dump_one_ethernet(struct ifinfomsg *ifi, char *kind,
>  
>  		pr_warn("GRE tap device %s not supported natively\n", name);
>  	}
> +	if (!strcmp(kind, "macvlan"))
> +		return dump_one_netdev(ND_TYPE__MACVLAN, ifi, tb, fds, dump_macvlan);
>  
>  	return dump_unknown_device(ifi, kind, tb, fds);
>  }
> @@ -831,8 +902,14 @@ struct newlink_req {
>  	char buf[1024];
>  };
>  
> +struct newlink_extras {
> +	int netns_id;
> +	int link;
> +	int target_netns;

Please, document these fields.

> +};
> +
>  static int populate_newlink_req(struct newlink_req *req, int msg_type, NetDeviceEntry *nde,
> -		int (*link_info)(NetDeviceEntry *, struct newlink_req *))
> +		int (*link_info)(NetDeviceEntry *, struct newlink_req *), struct newlink_extras *extras)
>  {
>  	memset(req, 0, sizeof(*req));
>  
> @@ -850,6 +927,18 @@ static int populate_newlink_req(struct newlink_req *req, int msg_type, NetDevice
>  		req->i.ifi_index = nde->ifindex;
>  	req->i.ifi_flags = nde->flags;
>  
> +	if (extras) {
> +		if (extras->netns_id >= 0)
> +			addattr_l(&req->h, sizeof(*req), IFLA_LINK_NETNSID, &extras->netns_id, sizeof(extras->netns_id));
> +
> +		if (extras->link >= 0)
> +			addattr_l(&req->h, sizeof(*req), IFLA_LINK, &extras->link, sizeof(extras->link));
> +
> +		if (extras->target_netns >= 0) {
> +			addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &extras->target_netns, sizeof(extras->target_netns));
> +		}
> +
> +	}

Shouldn't this be in the macvlan_link_info()?

>  
>  	addattr_l(&req->h, sizeof(*req), IFLA_IFNAME, nde->name, strlen(nde->name));
>  	addattr_l(&req->h, sizeof(*req), IFLA_MTU, &nde->mtu, sizeof(nde->mtu));
> @@ -879,11 +968,12 @@ static int populate_newlink_req(struct newlink_req *req, int msg_type, NetDevice
>  }
>  
>  static int do_rtm_link_req(int msg_type, NetDeviceEntry *nde, int nlsk,
> -		int (*link_info)(NetDeviceEntry *, struct newlink_req *))
> +		int (*link_info)(NetDeviceEntry *, struct newlink_req *),
> +		struct newlink_extras *extras)
>  {
>  	struct newlink_req req;
>  
> -	if (populate_newlink_req(&req, msg_type, nde, link_info) < 0)
> +	if (populate_newlink_req(&req, msg_type, nde, link_info, extras) < 0)
>  		return -1;
>  
>  	return do_rtnl_req(nlsk, &req, req.h.nlmsg_len, restore_link_cb, NULL, NULL);
> @@ -891,14 +981,15 @@ static int do_rtm_link_req(int msg_type, NetDeviceEntry *nde, int nlsk,
>  
>  int restore_link_parms(NetDeviceEntry *nde, int nlsk)
>  {
> -	return do_rtm_link_req(RTM_SETLINK, nde, nlsk, NULL);
> +	return do_rtm_link_req(RTM_SETLINK, nde, nlsk, NULL, NULL);
>  }
>  
>  static int restore_one_link(NetDeviceEntry *nde, int nlsk,
> -		int (*link_info)(NetDeviceEntry *, struct newlink_req *))
> +		int (*link_info)(NetDeviceEntry *, struct newlink_req *),
> +		struct newlink_extras *extras)
>  {
>  	pr_info("Restoring netdev %s idx %d\n", nde->name, nde->ifindex);
> -	return do_rtm_link_req(RTM_NEWLINK, nde, nlsk, link_info);
> +	return do_rtm_link_req(RTM_NEWLINK, nde, nlsk, link_info, extras);
>  }
>  
>  #ifndef VETH_INFO_MAX
> @@ -979,6 +1070,109 @@ static int bridge_link_info(NetDeviceEntry *nde, struct newlink_req *req)
>  	return 0;
>  }
>  
> +static int macvlan_link_info(NetDeviceEntry *nde, struct newlink_req *req)
> +{
> +	struct rtattr *macvlan_data;
> +	MacvlanLinkEntry *macvlan = nde->macvlan;
> +
> +	if (!macvlan) {
> +		pr_err("Missing macvlan link entry %d\n", nde->ifindex);
> +		return -1;
> +	}
> +
> +	addattr_l(&req->h, sizeof(*req), IFLA_INFO_KIND, "macvlan", 7);
> +
> +	macvlan_data = NLMSG_TAIL(&req->h);
> +	addattr_l(&req->h, sizeof(*req), IFLA_INFO_DATA, NULL, 0);
> +
> +	addattr_l(&req->h, sizeof(*req), IFLA_MACVLAN_MODE, &macvlan->mode, sizeof(macvlan->mode));
> +
> +	if (macvlan->has_flags)
> +		addattr_l(&req->h, sizeof(*req), IFLA_MACVLAN_FLAGS, &macvlan->flags, sizeof(macvlan->flags));
> +
> +	macvlan_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)macvlan_data;
> +
> +	return 0;
> +}
> +
> +static int userns_restore_one_link(void *arg, int fd, pid_t pid)
> +{
> +	int nlsk, ret;
> +	struct newlink_req *req = arg;
> +
> +	nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> +	if (nlsk < 0) {
> +		pr_perror("Can't create nlk socket");
> +		return -1;
> +	}
> +
> +	addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_PID, &pid, sizeof(pid));
> +
> +	ret = do_rtnl_req(nlsk, req, req->h.nlmsg_len, restore_link_cb, NULL, NULL);
> +	close(nlsk);
> +	return ret;
> +}
> +
> +static int changeflags(NetDeviceEntry *nde, int nlsk)
> +{
> +	struct newlink_req req;
> +
> +	memzero(&req, sizeof(req));
> +
> +	req.h.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
> +	req.h.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK;
> +	req.h.nlmsg_type = RTM_SETLINK;
> +	req.h.nlmsg_seq = CR_NLMSG_SEQ;
> +	req.i.ifi_family = AF_PACKET;
> +	req.i.ifi_flags = nde->flags;
> +
> +	addattr_l(&req.h, sizeof(req), IFLA_IFNAME, nde->name, strlen(nde->name));
> +
> +	return do_rtnl_req(nlsk, &req, req.h.nlmsg_len, NULL, NULL, NULL);

We have the setflags code in move_to_bridge() that UP-s the device. Can we
merge that code with this?

> +}
> +
> +static int get_criu_netnsid(int nlsk)
> +{
> +	static int netnsid = -1;
> +	struct {
> +		struct nlmsghdr n;
> +		struct rtgenmsg g;
> +		char buf[1024];
> +	} req;
> +	int ns_fd = get_service_fd(NS_FD_OFF), i;
> +
> +	if (netnsid > 0)
> +		return netnsid;
> +
> +	for (i = 0; i < 10; i++) {
> +		int ret;
> +
> +		memset(&req, 0, sizeof(req));
> +
> +		req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.g));
> +		req.n.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK;
> +		req.n.nlmsg_type = RTM_NEWNSID;
> +		req.n.nlmsg_seq = CR_NLMSG_SEQ;
> +
> +		addattr_l(&req.n, sizeof(req), NETNSA_FD, &ns_fd, sizeof(ns_fd));
> +		addattr_l(&req.n, sizeof(req), NETNSA_NSID, &i, sizeof(i));
> +
> +		ret = do_rtnl_req(nlsk, &req, req.n.nlmsg_len, NULL, NULL, NULL);
> +		if (ret < 0) {
> +			if (ret == -EEXIST)
> +				continue;

Erm... Shouldn't we re-use the netnsid if it exists already?

> +			errno = -ret;
> +			pr_perror("couldn't create new netnsid");
> +		}
> +
> +		netnsid = i;
> +		return netnsid;
> +	}
> +
> +	pr_err("tried to create too many netnsids\n");
> +	return -1;
> +}
> +
>  static int restore_link(NetDeviceEntry *nde, int nlsk)
>  {
>  	pr_info("Restoring link %s type %d\n", nde->name, nde->type);
> @@ -988,14 +1182,93 @@ static int restore_link(NetDeviceEntry *nde, int nlsk)
>  	case ND_TYPE__EXTLINK:  /* see comment in images/netdev.proto */
>  		return restore_link_parms(nde, nlsk);
>  	case ND_TYPE__VENET:
> -		return restore_one_link(nde, nlsk, venet_link_info);
> +		return restore_one_link(nde, nlsk, venet_link_info, NULL);
>  	case ND_TYPE__VETH:
> -		return restore_one_link(nde, nlsk, veth_link_info);
> +		return restore_one_link(nde, nlsk, veth_link_info, NULL);
>  	case ND_TYPE__TUN:
>  		return restore_one_tun(nde, nlsk);
>  	case ND_TYPE__BRIDGE:
> -		return restore_one_link(nde, nlsk, bridge_link_info);
> +		return restore_one_link(nde, nlsk, bridge_link_info, NULL);
> +	case ND_TYPE__MACVLAN: {

Please, move the whole case branch into a helper function.

> +		struct newlink_extras extras = {
> +			.netns_id = -1,
> +			.link = -1,
> +			.target_netns = -1,
> +		};
> +		char key[100], *val;
> +
> +		snprintf(key, sizeof(key), "macvlan[%s]", nde->name);
> +		val = external_lookup_data(key);
> +		if (IS_ERR_OR_NULL(val)) {
> +			pr_err("a macvlan parent for %s is required\n", nde->name);
> +			return -1;
> +		}
> +
> +		extras.link = (int) (unsigned long) val;
>  
> +		extras.netns_id = get_criu_netnsid(nlsk);
> +		if (extras.netns_id < 0) {
> +			pr_err("failed to get criu's netnsid\n");
> +			return -1;
> +		}
> +
> +		if (root_ns_mask & CLONE_NEWUSER) {
> +			struct newlink_req req;
> +
> +			if (populate_newlink_req(&req, RTM_NEWLINK, nde, macvlan_link_info, &extras) < 0)
> +				return -1;
> +
> +			if (userns_call(userns_restore_one_link, 0, &req, sizeof(req), -1) < 0) {
> +				pr_err("couldn't restore macvlan interface %s via usernsd\n", nde->name);
> +				return -1;
> +			}
> +		} else {
> +			int my_netns, ret, root_nlsk;
> +			int ns_fd = get_service_fd(NS_FD_OFF);
> +
> +			my_netns = open_proc(PROC_SELF, "ns/net");
> +			if (my_netns < 0) {
> +				pr_perror("couldn't get my netns");
> +				return -1;
> +			}
> +
> +			extras.target_netns = my_netns;

For userns case the target netns is identified by pid.
Can we do it the same way here?

> +
> +			if (setns(ns_fd, CLONE_NEWNET) < 0) {
> +				close(my_netns);
> +				pr_perror("couldn't setns to parent ns");
> +				return -1;
> +			}
> +
> +			root_nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);

So we need a socket in criu netns for both cases -- userns and
non-userns, don't we?

> +			if (root_nlsk < 0) {
> +				close(my_netns);
> +				pr_perror("Can't create nlk socket");
> +				return -1;
> +			}
> +
> +			if (setns(my_netns, CLONE_NEWNET) < 0) {
> +				close(my_netns);
> +				close(root_nlsk);
> +				pr_perror("couldn't setns back to my ns");
> +				return -1;
> +			}
> +
> +			ret = restore_one_link(nde, root_nlsk, macvlan_link_info, &extras);
> +			close(my_netns);
> +			close(root_nlsk);
> +			if (ret < 0)
> +				return -1;
> +		}
> +
> +		/* We have to change the flags of the NDE manually here because
> +		 * we used IFLA_LINK_NETNSID to restore it, which creates the
> +		 * device and then shuts it down when it changes the device's
> +		 * namespace, but doesn't start it back up when it goes to the
> +		 * other namespace. So, we restore its state here.
> +		 */
> +		return changeflags(nde, nlsk);
> +	}
>  	default:
>  		pr_err("Unsupported link type %d\n", nde->type);
>  		break;
> @@ -1648,6 +1921,17 @@ int veth_pair_add(char *in, char *out)
>  	return add_external(e_str);
>  }
>  
> +int macvlan_ext_add(struct external *ext)
> +{
> +	ext->data = (void *) (unsigned long) if_nametoindex(external_val(ext));
> +	if (ext->data == 0) {
> +		pr_perror("can't get ifindex of %s", ext->id);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
>  /*
>   * The setns() syscall (called by switch_ns()) can be extremely
>   * slow. If we call it two or more times from the same task the
> diff --git a/images/Makefile b/images/Makefile
> index cf50794..eb18526 100644
> --- a/images/Makefile
> +++ b/images/Makefile
> @@ -60,6 +60,7 @@ proto-obj-y	+= binfmt-misc.o
>  proto-obj-y	+= time.o
>  proto-obj-y	+= sysctl.o
>  proto-obj-y	+= autofs.o
> +proto-obj-y	+= macvlan.o
>  
>  CFLAGS		+= -iquote $(obj)/
>  
> diff --git a/images/macvlan.proto b/images/macvlan.proto
> new file mode 100644
> index 0000000..c9c9045
> --- /dev/null
> +++ b/images/macvlan.proto
> @@ -0,0 +1,4 @@
> +message macvlan_link_entry {
> +	required uint32	mode	= 1;
> +	optional uint32 flags	= 2;
> +}
> diff --git a/images/netdev.proto b/images/netdev.proto
> index 19b501c..2f2f3d1 100644
> --- a/images/netdev.proto
> +++ b/images/netdev.proto
> @@ -1,5 +1,6 @@
>  syntax = "proto2";
>  
> +import "macvlan.proto";
>  import "opts.proto";
>  import "tun.proto";
>  import "sysctl.proto";
> @@ -20,6 +21,7 @@ enum nd_type {
>  	 */
>  	VENET		= 5;
>  	BRIDGE		= 6;
> +	MACVLAN		= 7;
>  }
>  
>  message net_device_entry {
> @@ -38,6 +40,8 @@ message net_device_entry {
>  	repeated sysctl_entry conf4	= 9;
>  
>  	repeated sysctl_entry conf6	= 10;
> +
> +	optional macvlan_link_entry	macvlan		= 11;
>  }
>  
>  message netns_entry {
> diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak
> index ad50eb4..8812c47 100644
> --- a/scripts/feature-tests.mak
> +++ b/scripts/feature-tests.mak
> @@ -100,3 +100,14 @@ int main(int argc, char **argv)
>  }
>  
>  endef
> +
> +define FEATURE_TEST_NET_NAMESPACE_H
> +
> +#include <linux/net_namespace.h>
> +
> +int main(int argc, char **argv)
> +{
> +        return 0;
> +}
> +
> +endef




More information about the CRIU mailing list