[CRIU] [PATCH 08/12] netns: restore internal veth devices
Pavel Emelyanov
xemul at virtuozzo.com
Tue Mar 21 08:18:02 PDT 2017
On 03/21/2017 12:10 AM, Andrei Vagin wrote:
> On Mon, Mar 13, 2017 at 01:50:15PM +0300, Pavel Emelyanov wrote:
>> On 03/01/2017 02:53 AM, Andrei Vagin wrote:
>>> From: Andrei Vagin <avagin at virtuozzo.com>
>>>
>>> When we dump a veth device, the kernel reports where a peer device lives
>>> and we use this information to restore this veth pair.
>>>
>>> On restore we set a net ns id for a peer and it is created in the required
>>> netns.
>>>
>>> Signed-off-by: Andrei Vagin <avagin at virtuozzo.com>
>>> ---
>>> criu/include/namespaces.h | 7 +++
>>> criu/namespaces.c | 5 ++-
>>> criu/net.c | 99 +++++++++++++++++++++++++++++++++++++++----
>>> scripts/build/Dockerfile.tmpl | 1 +
>>> scripts/travis/travis-tests | 2 +-
>>> 5 files changed, 104 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
>>> index 5df7679..2302cff 100644
>>> --- a/criu/include/namespaces.h
>>> +++ b/criu/include/namespaces.h
>>> @@ -88,6 +88,12 @@ struct netns_id {
>>> struct list_head node;
>>> };
>>>
>>> +struct net_link {
>>> + unsigned int ifindex;
>>> + bool created;
>>> + struct list_head node;
>>> +};
>>> +
>>> struct ns_id {
>>> unsigned int kid;
>>> unsigned int id;
>>> @@ -122,6 +128,7 @@ struct ns_id {
>>> int nlsk; /* for sockets collection */
>>> int seqsk; /* to talk to parasite daemons */
>>> struct list_head ids;
>>> + struct list_head links;
>>> } net;
>>> struct {
>>> UsernsEntry *e;
>>> diff --git a/criu/namespaces.c b/criu/namespaces.c
>>> index 8e170aa..797e5ee 100644
>>> --- a/criu/namespaces.c
>>> +++ b/criu/namespaces.c
>>> @@ -308,8 +308,10 @@ struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
>>> INIT_LIST_HEAD(&nsid->children);
>>> INIT_LIST_HEAD(&nsid->siblings);
>>>
>>> - if (nd == &net_ns_desc)
>>> + if (nd == &net_ns_desc) {
>>> INIT_LIST_HEAD(&nsid->net.ids);
>>> + INIT_LIST_HEAD(&nsid->net.links);
>>> + }
>>> }
>>>
>>> return nsid;
>>> @@ -437,6 +439,7 @@ static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
>>>
>>> if (nd == &net_ns_desc) {
>>> INIT_LIST_HEAD(&nsid->net.ids);
>>> + INIT_LIST_HEAD(&nsid->net.links);
>>> }
>>>
>>> found:
>>> diff --git a/criu/net.c b/criu/net.c
>>> index f889403..b48440e 100644
>>> --- a/criu/net.c
>>> +++ b/criu/net.c
>>> @@ -366,12 +366,22 @@ int write_netdev_img(NetDeviceEntry *nde, struct cr_imgset *fds, struct nlattr *
>>> return pb_write_one(img_from_set(fds, CR_FD_NETDEV), nde, PB_NETDEV);
>>> }
>>>
>>> +static int lookup_net_by_netid(struct ns_id *ns, int net_id)
>>> +{
>>> + struct netns_id *p;
>>> +
>>> + list_for_each_entry(p, &ns->net.ids, node)
>>> + if (p->net_id == net_id)
>>> + return p->id;
>>> +
>>> + return -1;
>>> +}
>>> +
>>> static int dump_one_netdev(int type, struct ifinfomsg *ifi,
>>> struct nlattr **tb, struct ns_id *ns, struct cr_imgset *fds,
>>> int (*dump)(NetDeviceEntry *, struct cr_imgset *, struct nlattr **info))
>>> {
>>> - int ret = -1;
>>> - int i;
>>> + int ret = -1, i, peer_ifindex;
>>> NetDeviceEntry netdev = NET_DEVICE_ENTRY__INIT;
>>> SysctlEntry *confs4 = NULL;
>>> int size4 = ARRAY_SIZE(devconfs4);
>>> @@ -391,6 +401,37 @@ static int dump_one_netdev(int type, struct ifinfomsg *ifi,
>>> netdev.flags = ifi->ifi_flags;
>>> netdev.name = RTA_DATA(tb[IFLA_IFNAME]);
>>>
>>> + if (kdat.has_nsid) {
>>> + peer_ifindex = ifi->ifi_index;
>>> + if (tb[IFLA_LINK])
>>> + peer_ifindex = nla_get_u32(tb[IFLA_LINK]);
>>> +
>>> + netdev.has_peer_ifindex = true;
>>> + netdev.peer_ifindex = peer_ifindex;
>>> + }
>>> +
>>> + if (kdat.has_nsid) {
>
> here we check that kernel reports nsid for devices.
>
>>> + s32 nsid = -1;
>>> +
>>> + if (tb[IFLA_LINK_NETNSID])
>>> + nsid = nla_get_s32(tb[IFLA_LINK_NETNSID]);
>>> +
>>> + pr_debug("The peer link is in the %d netns with the %u index\n",
>>> + nsid, netdev.peer_ifindex);
>>> +
>>> + if (nsid == -1)
>>> + nsid = ns->id;
>>
>> This place is not clear. If the kernel didn't report the ns veth lives
>> in then ... what? We assume it lives in current? Is this correct?
>
> so here this means that a pair device lives in the current netns
Ah, again I mix up -1 between error and no-value. Would you add a
code comment at this place?
>>
>>> + else
>>> + nsid = lookup_net_by_netid(ns, nsid);
>>> + if (nsid < 0) {
>>> + pr_warn("The %s veth is in an external netns\n",
>>> + netdev.name);
>>> + } else {
>>> + netdev.has_peer_nsid = true;
>>> + netdev.peer_nsid = nsid;
>>> + }
>>> + }
>>> +
>>> if (tb[IFLA_ADDRESS] && (type != ND_TYPE__LOOPBACK)) {
>>> netdev.has_address = true;
>>> netdev.address.data = nla_data(tb[IFLA_ADDRESS]);
>>> @@ -1017,9 +1058,11 @@ enum {
>>> #define IFLA_NET_NS_FD 28
>>> #endif
>>>
>>> -static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req)
>>> +static int veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req,
>>> + struct ns_id *ns, int ns_fd)
>>> {
>>> char key[100], *val;
>>> + struct ns_id *peer_ns = NULL;
>>>
>>> snprintf(key, sizeof(key), "veth[%s]", nde->name);
>>> val = external_lookup_by_key(key);
>>> @@ -1028,7 +1071,47 @@ static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req)
>>>
>>> aux = strchrnul(val, '@');
>>> addattr_l(&req->h, sizeof(*req), IFLA_IFNAME, val, aux - val);
>>> + addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd));
>>> + return 0;
>>> }
>>> +
>>> + if (nde->has_peer_nsid) {
>>> + if (ns && nde->peer_nsid == ns->id) {
>>> + struct net_link *link;
>>> +
>>> + list_for_each_entry(link, &ns->net.links, node)
>>> + if (link->ifindex == nde->peer_ifindex && link->created) {
>>> + pr_err("%d\n", nde->peer_ifindex);
>>> + req->h.nlmsg_type = RTM_SETLINK;
>>> + return 0;
>>> + }
>>> + }
>>> + peer_ns = lookup_ns_by_id(nde->peer_nsid, &net_ns_desc);
>>> + if (peer_ns->ns_populated) {
>>> + req->h.nlmsg_type = RTM_SETLINK;
>>> + return 0;
>>> + }
>>> + }
>>> +
>>> + if (peer_ns) {
>>> + if (ns && nde->peer_nsid == ns->id) {
>>> + struct net_link *link;
>>> +
>>> + link = xmalloc(sizeof(*link));
>>> + if (link == NULL)
>>> + return -1;
>>> +
>>> + link->ifindex = nde->ifindex;
>>> + link->created = true;
>>> + list_add(&link->node, &ns->net.links);
>>> + }
>>> +
>>> + addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &peer_ns->net.ns_fd, sizeof(int));
>>> + return 0;
>>> + }
>>> +
>>> + pr_err("Unknown peer net namespace");
>>> + return -1;
>>> }
>>>
>>> static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_req *req)
>>> @@ -1037,17 +1120,17 @@ static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_
>>> struct rtattr *veth_data, *peer_data;
>>> struct ifinfomsg ifm;
>>>
>>> - BUG_ON(ns_fd < 0);
>>> -
>>> addattr_l(&req->h, sizeof(*req), IFLA_INFO_KIND, "veth", 4);
>>>
>>> veth_data = NLMSG_TAIL(&req->h);
>>> addattr_l(&req->h, sizeof(*req), IFLA_INFO_DATA, NULL, 0);
>>> peer_data = NLMSG_TAIL(&req->h);
>>> memset(&ifm, 0, sizeof(ifm));
>>> +
>>> + ifm.ifi_index = nde->peer_ifindex;
>>> addattr_l(&req->h, sizeof(*req), VETH_INFO_PEER, &ifm, sizeof(ifm));
>>> - veth_peer_info(nde, req);
>>> - addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd));
>>> +
>>> + veth_peer_info(nde, req, ns, ns_fd);
>>> peer_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)peer_data;
>>> veth_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)veth_data;
>>>
>>> @@ -1253,7 +1336,7 @@ static int restore_links(struct ns_id *ns, NetnsEntry **netns)
>>>
>>> ret = restore_link(ns, nde, nlsk, criu_nlsk);
>>> if (ret) {
>>> - pr_err("Can't restore link\n");
>>> + pr_err("Can't restore link: %d\n", ret);
>>> goto exit;
>>> }
>>>
>>> diff --git a/scripts/build/Dockerfile.tmpl b/scripts/build/Dockerfile.tmpl
>>> index 6e35f87..0f7c214 100644
>>> --- a/scripts/build/Dockerfile.tmpl
>>> +++ b/scripts/build/Dockerfile.tmpl
>>> @@ -12,6 +12,7 @@ RUN apt-get update && apt-get install -y \
>>> libcap-dev \
>>> iptables \
>>> libnl-3-dev \
>>> + libnl-route-3-dev \
>>> libselinux-dev \
>>> pkg-config \
>>> git-core \
>>> diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests
>>> index 75d15f5..7b487cb 100755
>>> --- a/scripts/travis/travis-tests
>>> +++ b/scripts/travis/travis-tests
>>> @@ -4,7 +4,7 @@ set -x -e
>>> TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c0-dev libaio-dev
>>> libprotobuf-dev protobuf-compiler python-ipaddr libcap-dev
>>> libnl-3-dev gcc-multilib libc6-dev-i386 gdb bash python-protobuf
>>> - libnet-dev util-linux"
>>> + libnet-dev util-linux libnl-route-3-dev"
>>>
>>> travis_prep () {
>>> [ -n "$SKIP_TRAVIS_PREP" ] && return
>>>
>>
> .
>
More information about the CRIU
mailing list