[CRIU] [PATCH 3/5] net: add support for macvlan link types
Pavel Emelyanov
xemul at virtuozzo.com
Wed Jun 15 05:00:37 PDT 2016
On 06/14/2016 07:45 PM, Tycho Andersen wrote:
> While this is in principle similar to how veths are handled, we have to do
> things in two different ways depending on whether or not there is a user
> namespace involved, because there is no way to ask the kernel to attach a
> macvlan NIC to a device in a net ns that we don't have CAP_NET_ADMIN in.
>
> So we do it in two ways:
>
> a. If we are in a user namespace, we create the device in usernsd and use
> IFLA_NET_NS_PID to set the netns which it should be created in (saving
> us a "move into this netns" step).
>
> b. If we aren't in a user namespace, we could still be in a net namespace,
> so we use IFLA_LINK_NETNSID to set namespace that the i/o device will be
> in. This is sort of a hack, since we have to do it via NSID, which we
> know is 0 because we don't allow namespace nesting.
Can we use IFLA_LINK_NETNSID even for the case "a"?
(more comments inline)
> Signed-off-by: Tycho Andersen <tycho.andersen at canonical.com>
> ---
> criu/net.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++
> images/Makefile | 1 +
> images/macvlan.proto | 4 ++
> images/netdev.proto | 6 +++
> 4 files changed, 143 insertions(+)
> create mode 100644 images/macvlan.proto
>
> diff --git a/criu/net.c b/criu/net.c
> index 9954270..793b625 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -476,6 +476,61 @@ static int dump_bridge(NetDeviceEntry *nde, struct cr_imgset *imgset, struct nla
> return write_netdev_img(nde, imgset, data);
> }
>
> +static int dump_macvlan(NetDeviceEntry *nde, struct cr_imgset *imgset, struct nlattr **tb)
> +{
> + MacvlanLinkEntry macvlan = MACVLAN_LINK_ENTRY__INIT;
> + int ret;
> + struct nlattr *info[IFLA_INFO_MAX], *data[IFLA_MACVLAN_MAX];
> +
> + if (!tb || !tb[IFLA_LINKINFO]) {
> + pr_err("no for macvlan\n");
> + return -1;
> + }
> +
> + if (tb[IFLA_LINK]) {
> + nde->has_link = true;
> + nde->link = *(int *)RTA_DATA(tb[IFLA_LINK]);
> + }
> +
> + if (tb[IFLA_LINK_NETNSID]) {
> + nde->has_netns_id = true;
> + nde->netns_id = *(int *)RTA_DATA(tb[IFLA_LINK_NETNSID]);
> + }
This seems to be generic enough to be placed in dump_one_netdev.
> +
> + ret = nla_parse_nested(info, IFLA_INFO_MAX, tb[IFLA_LINKINFO], NULL);
> + if (ret < 0) {
> + pr_err("failed to parse nested linkinfo\n");
> + return -1;
> + }
> +
> + if (!info[IFLA_INFO_DATA]) {
> + pr_err("no link info data for macvlan\n");
> + return -1;
> + }
> +
> + ret = nla_parse_nested(data, IFLA_MACVLAN_MAX, info[IFLA_INFO_DATA], NULL);
> + if (ret < 0) {
> + pr_err("failed ot parse macvlan data\n");
> + return -1;
> + }
> +
> + if (!data[IFLA_MACVLAN_MODE]) {
> + pr_err("macvlan mode required for %s\n", nde->name);
> + return -1;
> + }
> +
> + macvlan.mode = *((u32 *)RTA_DATA(data[IFLA_MACVLAN_MODE]));
> +
> + if (data[IFLA_MACVLAN_FLAGS])
> + macvlan.flags = *((u16 *) RTA_DATA(data[IFLA_MACVLAN_FLAGS]));
> +
> + nde->macvlan = &macvlan;
> + ret = write_netdev_img(nde, imgset, data);
> +
> + nde->macvlan = NULL;
> + return ret;
Just doing 'return write_netdev_img()' should be enough.
> +}
> +
> static int dump_one_ethernet(struct ifinfomsg *ifi, char *kind,
> struct nlattr **tb, struct cr_imgset *fds)
> {
> @@ -508,6 +563,8 @@ static int dump_one_ethernet(struct ifinfomsg *ifi, char *kind,
>
> pr_warn("GRE tap device %s not supported natively\n", name);
> }
> + if (!strcmp(kind, "macvlan"))
> + return dump_one_netdev(ND_TYPE__MACVLAN, ifi, tb, fds, dump_macvlan);
>
> return dump_unknown_device(ifi, kind, tb, fds);
> }
> @@ -828,6 +885,22 @@ static int populate_newlink_req(struct newlink_req *req, int msg_type, NetDevice
> req->i.ifi_index = nde->ifindex;
> req->i.ifi_flags = nde->flags;
>
> + /* Note that this id isn't preserved anywhere, but since we don't
> + * support nested namespaces, right now there is only one peer
> + * namespace, the parent NS with an id of 0, so this works. In the
> + * future, we'll need to be more careful about munging this ID to be
> + * correct (or restoring namespaces in such a way that they get the
> + * same ID).
> + */
> + if (nde->has_netns_id)
> + addattr_l(&req->h, sizeof(*req), IFLA_LINK_NETNSID, &nde->netns_id, sizeof(nde->netns_id));
> +
> + /* Like netns_id, this is not preserved across hosts (indeed, a link
> + * with this ifindex may not even exist). We add support for rewriting
> + * it in a later patch.
> + */
> + if (nde->has_link)
> + addattr_l(&req->h, sizeof(*req), IFLA_LINK, &nde->link, sizeof(nde->link));
Can you make these attrs setting and getting be symmetrincal -- either all
are get/set in macvlan dump/restore code, or all are get/set in generic code,
or one (netnsid?) is get/set in generic and the other in macvlan?
> addattr_l(&req->h, sizeof(*req), IFLA_IFNAME, nde->name, strlen(nde->name));
> addattr_l(&req->h, sizeof(*req), IFLA_MTU, &nde->mtu, sizeof(nde->mtu));
> @@ -949,6 +1022,49 @@ static int bridge_link_info(NetDeviceEntry *nde, struct newlink_req *req)
> return 0;
> }
>
> +static int macvlan_link_info(NetDeviceEntry *nde, struct newlink_req *req)
> +{
> + struct rtattr *macvlan_data;
> + MacvlanLinkEntry *macvlan = nde->macvlan;
> +
> + if (!macvlan) {
> + pr_err("Missing macvlan link entry %d\n", nde->ifindex);
> + return -1;
> + }
> +
> + addattr_l(&req->h, sizeof(*req), IFLA_INFO_KIND, "macvlan", 7);
> +
> + macvlan_data = NLMSG_TAIL(&req->h);
> + addattr_l(&req->h, sizeof(*req), IFLA_INFO_DATA, NULL, 0);
> +
> + addattr_l(&req->h, sizeof(*req), IFLA_MACVLAN_MODE, &macvlan->mode, sizeof(macvlan->mode));
> +
> + if (macvlan->has_flags)
> + addattr_l(&req->h, sizeof(*req), IFLA_MACVLAN_FLAGS, &macvlan->flags, sizeof(macvlan->flags));
> +
> + macvlan_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)macvlan_data;
> +
> + return 0;
> +}
> +
> +static int userns_restore_one_link(void *arg, int fd, pid_t pid)
> +{
> + int nlsk, ret;
> + struct newlink_req *req = arg;
> +
> + nlsk = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> + if (nlsk < 0) {
> + pr_perror("Can't create nlk socket");
> + return -1;
> + }
> +
> + addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_PID, &pid, sizeof(pid));
> +
> + ret = do_rtnl_req(nlsk, req, req->h.nlmsg_len, restore_link_cb, NULL, NULL);
> + close(nlsk);
> + return ret;
> +}
> +
> static int restore_link(NetDeviceEntry *nde, int nlsk)
> {
> pr_info("Restoring link %s type %d\n", nde->name, nde->type);
> @@ -965,7 +1081,23 @@ static int restore_link(NetDeviceEntry *nde, int nlsk)
> return restore_one_tun(nde, nlsk);
> case ND_TYPE__BRIDGE:
> return restore_one_link(nde, nlsk, bridge_link_info);
> + case ND_TYPE__MACVLAN: {
> + if (root_ns_mask & CLONE_NEWNET) {
Shouldn't this be CLONE_NEWUSER instead?
> + struct newlink_req req;
>
> + if (populate_newlink_req(&req, RTM_NEWLINK, nde, macvlan_link_info) < 0)
> + return -1;
> +
> + if (userns_call(userns_restore_one_link, 0, &req, sizeof(req), -1) < 0) {
> + pr_err("couldn't restore macvlan interface %s via usernsd\n", nde->name);
> + return -1;
> + }
> +
> + return 0;
> + }
> +
> + return restore_one_link(nde, nlsk, macvlan_link_info);
> + }
> default:
> pr_err("Unsupported link type %d\n", nde->type);
> break;
> diff --git a/images/Makefile b/images/Makefile
> index cf50794..eb18526 100644
> --- a/images/Makefile
> +++ b/images/Makefile
> @@ -60,6 +60,7 @@ proto-obj-y += binfmt-misc.o
> proto-obj-y += time.o
> proto-obj-y += sysctl.o
> proto-obj-y += autofs.o
> +proto-obj-y += macvlan.o
>
> CFLAGS += -iquote $(obj)/
>
> diff --git a/images/macvlan.proto b/images/macvlan.proto
> new file mode 100644
> index 0000000..c9c9045
> --- /dev/null
> +++ b/images/macvlan.proto
> @@ -0,0 +1,4 @@
> +message macvlan_link_entry {
> + required uint32 mode = 1;
> + optional uint32 flags = 2;
> +}
> diff --git a/images/netdev.proto b/images/netdev.proto
> index 37cafb3..746db16 100644
> --- a/images/netdev.proto
> +++ b/images/netdev.proto
> @@ -1,3 +1,4 @@
> +import "macvlan.proto";
> import "opts.proto";
> import "tun.proto";
> import "sysctl.proto";
> @@ -18,6 +19,7 @@ enum nd_type {
> */
> VENET = 5;
> BRIDGE = 6;
> + MACVLAN = 7;
> }
>
> message net_device_entry {
> @@ -36,6 +38,10 @@ message net_device_entry {
> repeated sysctl_entry conf4 = 9;
>
> repeated sysctl_entry conf6 = 10;
> +
> + optional int32 link = 11;
> + optional int32 netns_id = 12;
> + optional macvlan_link_entry macvlan = 13;
> }
>
> message netns_entry {
>
More information about the CRIU
mailing list