[CRIU] [PATCH v2 5/6] net: add support for macvlan link types
Pavel Emelyanov
xemul at virtuozzo.com
Mon Oct 17 04:09:22 PDT 2016
> diff --git a/Makefile.config b/Makefile.config
> index cce32fa..2259ad6 100644
> --- a/Makefile.config
> +++ b/Makefile.config
> @@ -28,7 +28,7 @@ export DEFINES += $(FEATURE_DEFINES)
> export CFLAGS += $(FEATURE_DEFINES)
>
> FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \
> - SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW
> + SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW NET_NAMESPACE_H
Can we feature test in separate patch?
>
> # $1 - config name
> define gen-feature-test
> diff --git a/criu/crtools.c b/criu/crtools.c
> index 8b5ec5d..8deb20a 100644
> --- a/criu/crtools.c
> +++ b/criu/crtools.c
> @@ -842,6 +842,7 @@ usage:
> " Formats of RES on restore:\n"
> " dev[VAL]:DEVPATH\n"
> " veth[IFNAME]:OUTNAME{@BRIDGE}\n"
> +" macvlan[IFNAME]:OUTNAME\n"
> "\n"
> "* Special resources support:\n"
> " --" SK_EST_PARAM " checkpoint/restore established TCP connections\n"
> diff --git a/criu/external.c b/criu/external.c
> index bc6c6d4..d8fee21 100644
> --- a/criu/external.c
> +++ b/criu/external.c
> @@ -3,6 +3,9 @@
> #include "cr_options.h"
> #include "xmalloc.h"
> #include "external.h"
> +#include "util.h"
> +
> +#include "net.h"
>
> int add_external(char *key)
> {
> @@ -12,6 +15,12 @@ int add_external(char *key)
> if (!ext)
> return -1;
> ext->id = key;
> +
> + if (strstartswith(key, "macvlan") && macvlan_ext_add(ext) < 0) {
> + xfree(ext);
> + return -1;
> + }
> +
> list_add(&ext->node, &opts.external);
>
> return 0;
> diff --git a/criu/include/net.h b/criu/include/net.h
> index b4a6e99..f94d838 100644
> --- a/criu/include/net.h
> +++ b/criu/include/net.h
> @@ -4,6 +4,7 @@
> #include <linux/netlink.h>
>
> #include "list.h"
> +#include "external.h"
>
> struct cr_imgset;
> extern int dump_net_ns(int ns_id);
> @@ -17,6 +18,12 @@ struct veth_pair {
> char *bridge;
> };
>
> +struct macvlan_pair {
> + struct list_head node;
> + char *inside;
> + int ifi_outside;
> +};
Unused.
> +
> extern int collect_net_namespaces(bool for_dump);
>
> extern int network_lock(void);
> @@ -30,6 +37,7 @@ extern int read_ns_sys_file(char *path, char *buf, int len);
> extern int restore_link_parms(NetDeviceEntry *nde, int nlsk);
>
> extern int veth_pair_add(char *in, char *out);
> +extern int macvlan_ext_add(struct external *ext);
> extern int move_veth_to_bridge(void);
>
> #endif /* __CR_NET_H__ */
> diff --git a/criu/net.c b/criu/net.c
> index b5f9818..aac077c 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -10,6 +10,7 @@
> #include <sys/wait.h>
> #include <sched.h>
> #include <sys/mount.h>
> +#include <sys/types.h>
> #include <net/if.h>
> #include <linux/sockios.h>
> #include <libnl3/netlink/msg.h>
> @@ -34,6 +35,33 @@
> #include "protobuf.h"
> #include "images/netdev.pb-c.h"
>
> +#ifdef CONFIG_HAS_NET_NAMESPACE_H
> +#include <linux/net_namespace.h>
> +#else
> +#define NETNSA_NSID 1
> +#define NETNSA_FD 3
> +#endif
> +
> +#ifndef IFLA_LINK_NETNSID
> +#define IFLA_LINK_NETNSID 37
> +#endif
> +
> +#ifndef RTM_NEWNSID
> +#define RTM_NEWNSID 88
> +#endif
> +
> +/* if we're being compiled with older headers, allow more macvlan attributes to
> + * be set in case we're talking to a newer kernel
> + */
> +#if IFLA_MACVLAN_MAX < 7
> +#undef IFLA_MACVLAN_MAX
> +#define IFLA_MACVLAN_MAX 7
> +#endif
> +
> +#ifndef IFLA_MACVLAN_FLAGS
> +#define IFLA_MACVLAN_FLAGS 2
> +#endif
> +
> static int ns_sysfs_fd = -1;
>
> int read_ns_sys_file(char *path, char *buf, int len)
> @@ -354,6 +382,7 @@ static int dump_one_netdev(int type, struct ifinfomsg *ifi,
> SysctlEntry *confs6 = NULL;
> int size6 = ARRAY_SIZE(devconfs6);
> char stable_secret[MAX_STR_CONF_LEN + 1] = {};
> + struct nlattr *info[IFLA_INFO_MAX], **arg = NULL;
>
> if (!tb[IFLA_IFNAME]) {
> pr_err("No name for link %d\n", ifi->ifi_index);
> @@ -421,7 +450,16 @@ static int dump_one_netdev(int type, struct ifinfomsg *ifi,
> if (!dump)
> dump = write_netdev_img;
>
> - ret = dump(&netdev, fds, tb);
> + if (tb[IFLA_LINKINFO]) {
> + ret = nla_parse_nested(info, IFLA_INFO_MAX, tb[IFLA_LINKINFO], NULL);
> + if (ret < 0) {
> + pr_err("failed to parse nested linkinfo\n");
> + return -1;
> + }
> + arg = info;
Shouldn't this rather be in dump_macvlan()? The tb is there already :)
> + }
> +
> + ret = dump(&netdev, fds, arg);
> err_free:
> xfree(netdev.conf4);
> xfree(confs4);
> @@ -498,6 +536,37 @@ static int dump_bridge(NetDeviceEntry *nde, struct cr_imgset *imgset, struct nla
> return write_netdev_img(nde, imgset, data);
> }
>
> +static int dump_macvlan(NetDeviceEntry *nde, struct cr_imgset *imgset, struct nlattr **info)
> +{
> + MacvlanLinkEntry macvlan = MACVLAN_LINK_ENTRY__INIT;
> + int ret;
> + struct nlattr *data[IFLA_MACVLAN_MAX];
> +
> + if (!info || !info[IFLA_INFO_DATA]) {
> + pr_err("no data for macvlan\n");
> + return -1;
> + }
> +
> + ret = nla_parse_nested(data, IFLA_MACVLAN_MAX, info[IFLA_INFO_DATA], NULL);
> + if (ret < 0) {
> + pr_err("failed ot parse macvlan data\n");
> + return -1;
> + }
> +
> + if (!data[IFLA_MACVLAN_MODE]) {
> + pr_err("macvlan mode required for %s\n", nde->name);
> + return -1;
> + }
> +
> + macvlan.mode = *((u32 *)RTA_DATA(data[IFLA_MACVLAN_MODE]));
> +
> + if (data[IFLA_MACVLAN_FLAGS])
> + macvlan.flags = *((u16 *) RTA_DATA(data[IFLA_MACVLAN_FLAGS]));
> +
> + nde->macvlan = &macvlan;
> + return write_netdev_img(nde, imgset, data);
> +}
> +
> static int dump_one_ethernet(struct ifinfomsg *ifi, char *kind,
> struct nlattr **tb, struct cr_imgset *fds)
> {
> @@ -530,6 +599,8 @@ static int dump_one_ethernet(struct ifinfomsg *ifi, char *kind,
>
> pr_warn("GRE tap device %s not supported natively\n", name);
> }
> + if (!strcmp(kind, "macvlan"))
> + return dump_one_netdev(ND_TYPE__MACVLAN, ifi, tb, fds, dump_macvlan);
>
> return dump_unknown_device(ifi, kind, tb, fds);
> }
> @@ -831,8 +902,14 @@ struct newlink_req {
> char buf[1024];
> };
>
> +struct newlink_extras {
> + int netns_id;
> + int link;
> + int target_netns;
Please, document these fields.
> +};
> +
> static int populate_newlink_req(struct newlink_req *req, int msg_type, NetDeviceEntry *nde,
> - int (*link_info)(NetDeviceEntry *, struct newlink_req *))
> + int (*link_info)(NetDeviceEntry *, struct newlink_req *), struct newlink_extras *extras)
> {
> memset(req, 0, sizeof(*req));
>
> @@ -850,6 +927,18 @@ static int populate_newlink_req(struct newlink_req *req, int msg_type, NetDevice
> req->i.ifi_index = nde->ifindex;
> req->i.ifi_flags = nde->flags;
>
> + if (extras) {
> + if (extras->netns_id >= 0)
> + addattr_l(&req->h, sizeof(*req), IFLA_LINK_NETNSID, &extras->netns_id, sizeof(extras->netns_id));
> +
> + if (extras->link >= 0)
> + addattr_l(&req->h, sizeof(*req), IFLA_LINK, &extras->link, sizeof(extras->link));
> +
> + if (extras->target_netns >= 0) {
> + addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &extras->target_netns, sizeof(extras->target_netns));
> + }
> +
> + }
Shouldn't this be in the macvlan_link_info()?
>
> addattr_l(&req->h, sizeof(*req), IFLA_IFNAME, nde->name, strlen(nde->name));
> addattr_l(&req->h, sizeof(*req), IFLA_MTU, &nde->mtu, sizeof(nde->mtu));
> @@ -879,11 +968,12 @@ static int populate_newlink_req(struct newlink_req *req, int msg_type, NetDevice
> }
>
> static int do_rtm_link_req(int msg_type, NetDeviceEntry *nde, int nlsk,
> - int (*link_info)(NetDeviceEntry *, struct newlink_req *))
> + int (*link_info)(NetDeviceEntry *, struct newlink_req *),
> + struct newlink_extras *extras)
> {
> struct newlink_req req;
>
> - if (populate_newlink_req(&req, msg_type, nde, link_info) < 0)
> + if (populate_newlink_req(&req, msg_type, nde, link_info, extras) < 0)
> return -1;
>
> return do_rtnl_req(nlsk, &req, req.h.nlmsg_len, restore_link_cb, NULL, NULL);
> @@ -891,14 +981,15 @@ static int do_rtm_link_req(int msg_type, NetDeviceEntry *nde, int nlsk,
>
> int restore_link_parms(NetDeviceEntry *nde, int nlsk)
> {
> - return do_rtm_link_req(RTM_SETLINK, nde, nlsk, NULL);
> + return do_rtm_link_req(RTM_SETLINK, nde, nlsk, NULL, NULL);
> }
>
> static int restore_one_link(NetDeviceEntry *nde, int nlsk,
> - int (*link_info)(NetDeviceEntry *, struct newlink_req *))
> + int (*link_info)(NetDeviceEntry *, struct newlink_req *),
> + struct newlink_extras *extras)
> {
> pr_info("Restoring netdev %s idx %d\n", nde->name, nde->ifindex);
> - return do_rtm_link_req(RTM_NEWLINK, nde, nlsk, link_info);
> + return do_rtm_link_req(RTM_NEWLINK, nde, nlsk, link_info, extras);
> }
>
> #ifndef VETH_INFO_MAX
> @@ -979,6 +1070,109 @@ static int bridge_link_info(NetDeviceEntry *nde, struct newlink_req *req)
> return 0;
> }
>
> +static int macvlan_link_info(NetDeviceEntry *nde, struct newlink_req *req)
> +{
> + struct rtattr *macvlan_data;
> + MacvlanLinkEntry *macvlan = nde->macvlan;
> +
> + if (!macvlan) {
> + pr_err("Missing macvlan link entry %d\n", nde->ifindex);
> + return -1;
> + }
> +
> + addattr_l(&req->h, sizeof(*req), IFLA_INFO_KIND, "macvlan", 7);
> +
> + macvlan_data = NLMSG_TAIL(&req->h);
> + addattr_l(&req->h, sizeof(*req), IFLA_INFO_DATA, NULL, 0);
> +
> + addattr_l(&req->h, sizeof(*req), IFLA_MACVLAN_MODE, &macvlan->mode, sizeof(macvlan->mode));
> +
> + if (macvlan->has_flags)
> + addattr_l(&req->h, sizeof(*req), IFLA_MACVLAN_FLAGS, &macvlan->flags, sizeof(macvlan->flags));
> +
> + macvlan_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)macvlan_data;
> +
> + return 0;
> +}
> +
> +static int userns_restore_one_link(void *arg, int fd, pid_t pid)
> +{
> + int nlsk, ret;
> + struct newlink_req *req = arg;
> +
> + nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> + if (nlsk < 0) {
> + pr_perror("Can't create nlk socket");
> + return -1;
> + }
> +
> + addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_PID, &pid, sizeof(pid));
> +
> + ret = do_rtnl_req(nlsk, req, req->h.nlmsg_len, restore_link_cb, NULL, NULL);
> + close(nlsk);
> + return ret;
> +}
> +
> +static int changeflags(NetDeviceEntry *nde, int nlsk)
> +{
> + struct newlink_req req;
> +
> + memzero(&req, sizeof(req));
> +
> + req.h.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
> + req.h.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK;
> + req.h.nlmsg_type = RTM_SETLINK;
> + req.h.nlmsg_seq = CR_NLMSG_SEQ;
> + req.i.ifi_family = AF_PACKET;
> + req.i.ifi_flags = nde->flags;
> +
> + addattr_l(&req.h, sizeof(req), IFLA_IFNAME, nde->name, strlen(nde->name));
> +
> + return do_rtnl_req(nlsk, &req, req.h.nlmsg_len, NULL, NULL, NULL);
We have the setflags code in move_to_bridge() that UP-s the device. Can we
merge that code with this?
> +}
> +
> +static int get_criu_netnsid(int nlsk)
> +{
> + static int netnsid = -1;
> + struct {
> + struct nlmsghdr n;
> + struct rtgenmsg g;
> + char buf[1024];
> + } req;
> + int ns_fd = get_service_fd(NS_FD_OFF), i;
> +
> + if (netnsid > 0)
> + return netnsid;
> +
> + for (i = 0; i < 10; i++) {
> + int ret;
> +
> + memset(&req, 0, sizeof(req));
> +
> + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.g));
> + req.n.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK;
> + req.n.nlmsg_type = RTM_NEWNSID;
> + req.n.nlmsg_seq = CR_NLMSG_SEQ;
> +
> + addattr_l(&req.n, sizeof(req), NETNSA_FD, &ns_fd, sizeof(ns_fd));
> + addattr_l(&req.n, sizeof(req), NETNSA_NSID, &i, sizeof(i));
> +
> + ret = do_rtnl_req(nlsk, &req, req.n.nlmsg_len, NULL, NULL, NULL);
> + if (ret < 0) {
> + if (ret == -EEXIST)
> + continue;
Erm... Shouldn't we re-use the netnsid if it exists already?
> + errno = -ret;
> + pr_perror("couldn't create new netnsid");
> + }
> +
> + netnsid = i;
> + return netnsid;
> + }
> +
> + pr_err("tried to create too many netnsids\n");
> + return -1;
> +}
> +
> static int restore_link(NetDeviceEntry *nde, int nlsk)
> {
> pr_info("Restoring link %s type %d\n", nde->name, nde->type);
> @@ -988,14 +1182,93 @@ static int restore_link(NetDeviceEntry *nde, int nlsk)
> case ND_TYPE__EXTLINK: /* see comment in images/netdev.proto */
> return restore_link_parms(nde, nlsk);
> case ND_TYPE__VENET:
> - return restore_one_link(nde, nlsk, venet_link_info);
> + return restore_one_link(nde, nlsk, venet_link_info, NULL);
> case ND_TYPE__VETH:
> - return restore_one_link(nde, nlsk, veth_link_info);
> + return restore_one_link(nde, nlsk, veth_link_info, NULL);
> case ND_TYPE__TUN:
> return restore_one_tun(nde, nlsk);
> case ND_TYPE__BRIDGE:
> - return restore_one_link(nde, nlsk, bridge_link_info);
> + return restore_one_link(nde, nlsk, bridge_link_info, NULL);
> + case ND_TYPE__MACVLAN: {
Please, move the whole case branch into a helper function.
> + struct newlink_extras extras = {
> + .netns_id = -1,
> + .link = -1,
> + .target_netns = -1,
> + };
> + char key[100], *val;
> +
> + snprintf(key, sizeof(key), "macvlan[%s]", nde->name);
> + val = external_lookup_data(key);
> + if (IS_ERR_OR_NULL(val)) {
> + pr_err("a macvlan parent for %s is required\n", nde->name);
> + return -1;
> + }
> +
> + extras.link = (int) (unsigned long) val;
>
> + extras.netns_id = get_criu_netnsid(nlsk);
> + if (extras.netns_id < 0) {
> + pr_err("failed to get criu's netnsid\n");
> + return -1;
> + }
> +
> + if (root_ns_mask & CLONE_NEWUSER) {
> + struct newlink_req req;
> +
> + if (populate_newlink_req(&req, RTM_NEWLINK, nde, macvlan_link_info, &extras) < 0)
> + return -1;
> +
> + if (userns_call(userns_restore_one_link, 0, &req, sizeof(req), -1) < 0) {
> + pr_err("couldn't restore macvlan interface %s via usernsd\n", nde->name);
> + return -1;
> + }
> + } else {
> + int my_netns, ret, root_nlsk;
> + int ns_fd = get_service_fd(NS_FD_OFF);
> +
> + my_netns = open_proc(PROC_SELF, "ns/net");
> + if (my_netns < 0) {
> + pr_perror("couldn't get my netns");
> + return -1;
> + }
> +
> + extras.target_netns = my_netns;
For userns case the target netns is identified by pid.
Can we do it the same way here?
> +
> + if (setns(ns_fd, CLONE_NEWNET) < 0) {
> + close(my_netns);
> + pr_perror("couldn't setns to parent ns");
> + return -1;
> + }
> +
> + root_nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
So we need a socket in criu netns for both cases -- userns and
non-userns, don't we?
> + if (root_nlsk < 0) {
> + close(my_netns);
> + pr_perror("Can't create nlk socket");
> + return -1;
> + }
> +
> + if (setns(my_netns, CLONE_NEWNET) < 0) {
> + close(my_netns);
> + close(root_nlsk);
> + pr_perror("couldn't setns back to my ns");
> + return -1;
> + }
> +
> + ret = restore_one_link(nde, root_nlsk, macvlan_link_info, &extras);
> + close(my_netns);
> + close(root_nlsk);
> + if (ret < 0)
> + return -1;
> + }
> +
> + /* We have to change the flags of the NDE manually here because
> + * we used IFLA_LINK_NETNSID to restore it, which creates the
> + * device and then shuts it down when it changes the device's
> + * namespace, but doesn't start it back up when it goes to the
> + * other namespace. So, we restore its state here.
> + */
> + return changeflags(nde, nlsk);
> + }
> default:
> pr_err("Unsupported link type %d\n", nde->type);
> break;
> @@ -1648,6 +1921,17 @@ int veth_pair_add(char *in, char *out)
> return add_external(e_str);
> }
>
> +int macvlan_ext_add(struct external *ext)
> +{
> + ext->data = (void *) (unsigned long) if_nametoindex(external_val(ext));
> + if (ext->data == 0) {
> + pr_perror("can't get ifindex of %s", ext->id);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> /*
> * The setns() syscall (called by switch_ns()) can be extremely
> * slow. If we call it two or more times from the same task the
> diff --git a/images/Makefile b/images/Makefile
> index cf50794..eb18526 100644
> --- a/images/Makefile
> +++ b/images/Makefile
> @@ -60,6 +60,7 @@ proto-obj-y += binfmt-misc.o
> proto-obj-y += time.o
> proto-obj-y += sysctl.o
> proto-obj-y += autofs.o
> +proto-obj-y += macvlan.o
>
> CFLAGS += -iquote $(obj)/
>
> diff --git a/images/macvlan.proto b/images/macvlan.proto
> new file mode 100644
> index 0000000..c9c9045
> --- /dev/null
> +++ b/images/macvlan.proto
> @@ -0,0 +1,4 @@
> +message macvlan_link_entry {
> + required uint32 mode = 1;
> + optional uint32 flags = 2;
> +}
> diff --git a/images/netdev.proto b/images/netdev.proto
> index 19b501c..2f2f3d1 100644
> --- a/images/netdev.proto
> +++ b/images/netdev.proto
> @@ -1,5 +1,6 @@
> syntax = "proto2";
>
> +import "macvlan.proto";
> import "opts.proto";
> import "tun.proto";
> import "sysctl.proto";
> @@ -20,6 +21,7 @@ enum nd_type {
> */
> VENET = 5;
> BRIDGE = 6;
> + MACVLAN = 7;
> }
>
> message net_device_entry {
> @@ -38,6 +40,8 @@ message net_device_entry {
> repeated sysctl_entry conf4 = 9;
>
> repeated sysctl_entry conf6 = 10;
> +
> + optional macvlan_link_entry macvlan = 11;
> }
>
> message netns_entry {
> diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak
> index ad50eb4..8812c47 100644
> --- a/scripts/feature-tests.mak
> +++ b/scripts/feature-tests.mak
> @@ -100,3 +100,14 @@ int main(int argc, char **argv)
> }
>
> endef
> +
> +define FEATURE_TEST_NET_NAMESPACE_H
> +
> +#include <linux/net_namespace.h>
> +
> +int main(int argc, char **argv)
> +{
> + return 0;
> +}
> +
> +endef
More information about the CRIU
mailing list