[CRIU] [PATCH 08/12] [v2] netns: restore internal veth devices
Andrei Vagin
avagin at openvz.org
Tue Mar 21 17:28:49 PDT 2017
From: Andrei Vagin <avagin at virtuozzo.com>
When we dump a veth device, the kernel reports where a peer device lives
and we use this information to restore this veth pair.
On restore we set a net ns id for a peer and it is created in the required
netns.
v2: add more comments
Signed-off-by: Andrei Vagin <avagin at virtuozzo.com>
---
criu/include/namespaces.h | 7 +++
criu/namespaces.c | 5 ++-
criu/net.c | 101 ++++++++++++++++++++++++++++++++++++++----
scripts/build/Dockerfile.tmpl | 1 +
scripts/travis/travis-tests | 2 +-
5 files changed, 106 insertions(+), 10 deletions(-)
diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index 2c71a1e..5fc3826 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -88,6 +88,12 @@ struct netns_id {
struct list_head node;
};
+struct net_link {
+ unsigned int ifindex;
+ bool created;
+ struct list_head node;
+};
+
struct ns_id {
unsigned int kid;
unsigned int id;
@@ -133,6 +139,7 @@ struct ns_id {
int nlsk; /* for sockets collection */
int seqsk; /* to talk to parasite daemons */
struct list_head ids;
+ struct list_head links;
} net;
struct {
UsernsEntry *e;
diff --git a/criu/namespaces.c b/criu/namespaces.c
index 5fe2461..c180977 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -308,8 +308,10 @@ struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
INIT_LIST_HEAD(&nsid->children);
INIT_LIST_HEAD(&nsid->siblings);
- if (nd == &net_ns_desc)
+ if (nd == &net_ns_desc) {
INIT_LIST_HEAD(&nsid->net.ids);
+ INIT_LIST_HEAD(&nsid->net.links);
+ }
}
return nsid;
@@ -433,6 +435,7 @@ static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
if (nd == &net_ns_desc) {
INIT_LIST_HEAD(&nsid->net.ids);
+ INIT_LIST_HEAD(&nsid->net.links);
}
found:
diff --git a/criu/net.c b/criu/net.c
index 4abaaa5..f7a76e2 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -366,12 +366,22 @@ int write_netdev_img(NetDeviceEntry *nde, struct cr_imgset *fds, struct nlattr *
return pb_write_one(img_from_set(fds, CR_FD_NETDEV), nde, PB_NETDEV);
}
+static int lookup_net_by_netid(struct ns_id *ns, int net_id)
+{
+ struct netns_id *p;
+
+ list_for_each_entry(p, &ns->net.ids, node)
+ if (p->netnsid_value == net_id)
+ return p->target_ns_id;
+
+ return -1;
+}
+
static int dump_one_netdev(int type, struct ifinfomsg *ifi,
struct nlattr **tb, struct ns_id *ns, struct cr_imgset *fds,
int (*dump)(NetDeviceEntry *, struct cr_imgset *, struct nlattr **info))
{
- int ret = -1;
- int i;
+ int ret = -1, i, peer_ifindex;
NetDeviceEntry netdev = NET_DEVICE_ENTRY__INIT;
SysctlEntry *confs4 = NULL;
int size4 = ARRAY_SIZE(devconfs4);
@@ -391,6 +401,39 @@ static int dump_one_netdev(int type, struct ifinfomsg *ifi,
netdev.flags = ifi->ifi_flags;
netdev.name = RTA_DATA(tb[IFLA_IFNAME]);
+ if (kdat.has_nsid) {
+ s32 nsid = -1;
+
+ peer_ifindex = ifi->ifi_index;
+ if (tb[IFLA_LINK])
+ peer_ifindex = nla_get_u32(tb[IFLA_LINK]);
+
+ netdev.has_peer_ifindex = true;
+ netdev.peer_ifindex = peer_ifindex;
+
+ if (tb[IFLA_LINK_NETNSID])
+ nsid = nla_get_s32(tb[IFLA_LINK_NETNSID]);
+
+ pr_debug("The peer link is in the %d netns with the %u index\n",
+ nsid, netdev.peer_ifindex);
+
+ if (nsid == -1)
+ nsid = ns->id;
+ else
+ nsid = lookup_net_by_netid(ns, nsid);
+ if (nsid < 0) {
+ pr_warn("The %s veth is in an external netns\n",
+ netdev.name);
+ } else {
+ netdev.has_peer_nsid = true;
+ netdev.peer_nsid = nsid;
+ }
+ }
+ /*
+ * If kdat.has_nsid is false, a multiple network namespaces are not dumped,
+ * so if we are here, this means only one netns is dumped.
+ */
+
if (tb[IFLA_ADDRESS] && (type != ND_TYPE__LOOPBACK)) {
netdev.has_address = true;
netdev.address.data = nla_data(tb[IFLA_ADDRESS]);
@@ -1025,9 +1068,11 @@ enum {
#define IFLA_NET_NS_FD 28
#endif
-static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req)
+static int veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req,
+ struct ns_id *ns, int ns_fd)
{
char key[100], *val;
+ struct ns_id *peer_ns = NULL;
snprintf(key, sizeof(key), "veth[%s]", nde->name);
val = external_lookup_by_key(key);
@@ -1036,7 +1081,47 @@ static void veth_peer_info(NetDeviceEntry *nde, struct newlink_req *req)
aux = strchrnul(val, '@');
addattr_l(&req->h, sizeof(*req), IFLA_IFNAME, val, aux - val);
+ addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd));
+ return 0;
}
+
+ if (nde->has_peer_nsid) {
+ if (ns && nde->peer_nsid == ns->id) {
+ struct net_link *link;
+
+ list_for_each_entry(link, &ns->net.links, node)
+ if (link->ifindex == nde->peer_ifindex && link->created) {
+ pr_err("%d\n", nde->peer_ifindex);
+ req->h.nlmsg_type = RTM_SETLINK;
+ return 0;
+ }
+ }
+ peer_ns = lookup_ns_by_id(nde->peer_nsid, &net_ns_desc);
+ if (peer_ns->ns_populated) {
+ req->h.nlmsg_type = RTM_SETLINK;
+ return 0;
+ }
+ }
+
+ if (peer_ns) {
+ if (ns && nde->peer_nsid == ns->id) {
+ struct net_link *link;
+
+ link = xmalloc(sizeof(*link));
+ if (link == NULL)
+ return -1;
+
+ link->ifindex = nde->ifindex;
+ link->created = true;
+ list_add(&link->node, &ns->net.links);
+ }
+
+ addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &peer_ns->net.ns_fd, sizeof(int));
+ return 0;
+ }
+
+ pr_err("Unknown peer net namespace");
+ return -1;
}
static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_req *req)
@@ -1045,17 +1130,17 @@ static int veth_link_info(struct ns_id *ns, NetDeviceEntry *nde, struct newlink_
struct rtattr *veth_data, *peer_data;
struct ifinfomsg ifm;
- BUG_ON(ns_fd < 0);
-
addattr_l(&req->h, sizeof(*req), IFLA_INFO_KIND, "veth", 4);
veth_data = NLMSG_TAIL(&req->h);
addattr_l(&req->h, sizeof(*req), IFLA_INFO_DATA, NULL, 0);
peer_data = NLMSG_TAIL(&req->h);
memset(&ifm, 0, sizeof(ifm));
+
+ ifm.ifi_index = nde->peer_ifindex;
addattr_l(&req->h, sizeof(*req), VETH_INFO_PEER, &ifm, sizeof(ifm));
- veth_peer_info(nde, req);
- addattr_l(&req->h, sizeof(*req), IFLA_NET_NS_FD, &ns_fd, sizeof(ns_fd));
+
+ veth_peer_info(nde, req, ns, ns_fd);
peer_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)peer_data;
veth_data->rta_len = (void *)NLMSG_TAIL(&req->h) - (void *)veth_data;
@@ -1261,7 +1346,7 @@ static int restore_links(struct ns_id *ns, NetnsEntry **netns)
ret = restore_link(ns, nde, nlsk, criu_nlsk);
if (ret) {
- pr_err("Can't restore link\n");
+ pr_err("Can't restore link: %d\n", ret);
goto exit;
}
diff --git a/scripts/build/Dockerfile.tmpl b/scripts/build/Dockerfile.tmpl
index 60e0e45..7987b01 100644
--- a/scripts/build/Dockerfile.tmpl
+++ b/scripts/build/Dockerfile.tmpl
@@ -13,6 +13,7 @@ RUN apt-get update && apt-get install -y \
libcap-dev \
iptables \
libnl-3-dev \
+ libnl-route-3-dev \
libselinux-dev \
pkg-config \
git-core \
diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests
index 50a51ff..37117a3 100755
--- a/scripts/travis/travis-tests
+++ b/scripts/travis/travis-tests
@@ -4,7 +4,7 @@ set -x -e
TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c0-dev libaio-dev
libprotobuf-dev protobuf-compiler python-ipaddr libcap-dev
libnl-3-dev gcc-multilib libc6-dev-i386 gdb bash python-protobuf
- libnet-dev util-linux asciidoc"
+ libnet-dev util-linux asciidoc libnl-route-3-dev"
travis_prep () {
[ -n "$SKIP_TRAVIS_PREP" ] && return
--
2.7.4
More information about the CRIU
mailing list