[CRIU] [PATCHv2 1/2] net: add nftables c/r
Andrei Vagin (C)
avagin at gmail.com
Thu Nov 14 07:29:15 MSK 2019
On Wed, Nov 13, 2019 at 02:14:06PM +0000, Alexander Mikhalitsyn wrote:
> On Wed, 13 Nov 2019 00:45:20 -0800
> Andrei Vagin <avagin at gmail.com> wrote:
>
> > On Tue, Nov 12, 2019 at 07:06:42PM +0300, Alexander Mikhalitsyn wrote:
> > > From: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> > >
> > > After Centos-8 nft used instead of iptables. But we had never supported nft rules in
> > > CRIU, and after c/r all rules are flushed.
> > >
> > > Path to nft tool can be changed via CR_NFTABLES environment variable
> > > similar to CR_IPTABLES.
> >
> >
> > Can we use libnftnl? This should be faster, because we will not need to
> > fork a new process and exec a binary.
> >
> Good point, but we have some problems with that. Currently, when we do
> "nft list ruleset" it output have special format (that corresponds to
> formal grammar defined by (nftables/src/parser_bison.y)). It's not
> format that supported by libnftnl. Currently libnftnl supports only
> dumping ruleset in "command format" as a sequence of nft commands that
> describes current ruleset. But if we try to use this format on
> Checkpoint in CRIU, then we have a problems on Restore - we need to
> execute a lot (maybe) "nft ..." commands. In performance terms it may
> be even worse then as we doing now. Of course, we can grab this parser
> from nftables to CRIU, but it's a lot of code and then we need
> additional compile-time deps - Bison for example. I don't know what to
> choose... :)
Maybe we can use this way:
on dump:
* create a netlink socket
* send a request to list all rules
* save raw netlink messages in an image file.
on restore:
* create a netlink socket
* send netlink messages from the image file into the socket.
"ip addr save" and "ip addr restore" work this way.
https://github.com/shemminger/iproute2/blob/master/ip/ipaddress.c#L1552
https://github.com/shemminger/iproute2/blob/master/ip/ipaddress.c#L1618
>
> > >
> > > Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> > > Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
> > > Signed-off-by: Alexander Mikhalitsyn <alexander at mihalicyn.com>
> > > ---
> > > criu/image-desc.c | 1 +
> > > criu/include/image-desc.h | 1 +
> > > criu/include/magic.h | 1 +
> > > criu/include/util.h | 2 ++
> > > criu/net.c | 65 +++++++++++++++++++++++++++++++++++++--
> > > criu/util.c | 39 +++++++++++++++++++++++
> > > 6 files changed, 107 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/criu/image-desc.c b/criu/image-desc.c
> > > index 81cd0748..ae5d817f 100644
> > > --- a/criu/image-desc.c
> > > +++ b/criu/image-desc.c
> > > @@ -76,6 +76,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
> > > FD_ENTRY_F(RULE, "rule-%u", O_NOBUF),
> > > FD_ENTRY_F(IPTABLES, "iptables-%u", O_NOBUF),
> > > FD_ENTRY_F(IP6TABLES, "ip6tables-%u", O_NOBUF),
> > > + FD_ENTRY_F(NFTABLES, "nftables-%u", O_NOBUF),
> > > FD_ENTRY_F(TMPFS_IMG, "tmpfs-%u.tar.gz", O_NOBUF),
> > > FD_ENTRY_F(TMPFS_DEV, "tmpfs-dev-%u.tar.gz", O_NOBUF),
> > > FD_ENTRY_F(AUTOFS, "autofs-%u", O_NOBUF),
> > > diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h
> > > index fea80a71..6db8bf94 100644
> > > --- a/criu/include/image-desc.h
> > > +++ b/criu/include/image-desc.h
> > > @@ -42,6 +42,7 @@ enum {
> > > CR_FD_RULE,
> > > CR_FD_IPTABLES,
> > > CR_FD_IP6TABLES,
> > > + CR_FD_NFTABLES,
> > > CR_FD_NETNS,
> > > CR_FD_NETNF_CT,
> > > CR_FD_NETNF_EXP,
> > > diff --git a/criu/include/magic.h b/criu/include/magic.h
> > > index 05101f43..1a583f4e 100644
> > > --- a/criu/include/magic.h
> > > +++ b/criu/include/magic.h
> > > @@ -103,6 +103,7 @@
> > > #define TMPFS_DEV_MAGIC RAW_IMAGE_MAGIC
> > > #define IPTABLES_MAGIC RAW_IMAGE_MAGIC
> > > #define IP6TABLES_MAGIC RAW_IMAGE_MAGIC
> > > +#define NFTABLES_MAGIC RAW_IMAGE_MAGIC
> > > #define NETNF_CT_MAGIC RAW_IMAGE_MAGIC
> > > #define NETNF_EXP_MAGIC RAW_IMAGE_MAGIC
> > >
> > > diff --git a/criu/include/util.h b/criu/include/util.h
> > > index a14be722..57b46dcc 100644
> > > --- a/criu/include/util.h
> > > +++ b/criu/include/util.h
> > > @@ -252,6 +252,8 @@ static inline bool issubpath(const char *path, const char *sub_path)
> > > (end == '/' || end == '\0');
> > > }
> > >
> > > +int check_cmd_exists(const char *cmd);
> > > +
> > > /*
> > > * mkdir -p
> > > */
> > > diff --git a/criu/net.c b/criu/net.c
> > > index fe9b51ad..4737b604 100644
> > > --- a/criu/net.c
> > > +++ b/criu/net.c
> > > @@ -1739,12 +1739,12 @@ static int run_ip_tool(char *arg1, char *arg2, char *arg3, char *arg4, int fdin,
> > > return 0;
> > > }
> > >
> > > -static int run_iptables_tool(char *def_cmd, int fdin, int fdout)
> > > +static int run_tool(const char *env_var, char *def_cmd, int fdin, int fdout)
> > > {
> > > int ret;
> > > char *cmd;
> > >
> > > - cmd = getenv("CR_IPTABLES");
> > > + cmd = getenv(env_var);
> > > if (!cmd)
> > > cmd = def_cmd;
> > > pr_debug("\tRunning %s for %s\n", cmd, def_cmd);
> > > @@ -1755,6 +1755,16 @@ static int run_iptables_tool(char *def_cmd, int fdin, int fdout)
> > > return ret;
> > > }
> > >
> > > +static int run_iptables_tool(char *def_cmd, int fdin, int fdout)
> > > +{
> > > + return run_tool("CR_IPTABLES", def_cmd, fdin, fdout);
> > > +}
> > > +
> > > +static int run_nftables_tool(char *def_cmd, int fdin, int fdout)
> > > +{
> > > + return run_tool("CR_NFTABLES", def_cmd, fdin, fdout);
> > > +}
> > > +
> > > static inline int dump_ifaddr(struct cr_imgset *fds)
> > > {
> > > struct cr_img *img = img_from_set(fds, CR_FD_IFADDR);
> > > @@ -1818,6 +1828,21 @@ static inline int dump_iptables(struct cr_imgset *fds)
> > > return 0;
> > > }
> > >
> > > +static inline int dump_nftables(struct cr_imgset *fds)
> > > +{
> > > + struct cr_img *img;
> > > +
> > > + /* we not dump nftables if nft utility isn't present */
> > > + if (!check_cmd_exists("nft"))
> > > + return 0;
> > > +
> > > + img = img_from_set(fds, CR_FD_NFTABLES);
> > > + if (run_nftables_tool("nft list ruleset", -1, img_raw_fd(img)))
> > > + return -1;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
> > > {
> > > void *buf, *o_buf;
> > > @@ -2082,6 +2107,38 @@ out:
> > > return ret;
> > > }
> > >
> > > +static inline int restore_nftables(int pid)
> >
> > why do we need to inline this function?
> >
> > > +{
> > > + int ret = -1;
> > > + struct cr_img *img;
> > > +
> > > + img = open_image(CR_FD_NFTABLES, O_RSTR, pid);
> > > + if (img == NULL)
> > > + return -1;
> > > + if (empty_image(img)) {
> > > + /* Backward compatibility */
> > > + pr_info("Skipping nft restore, no image");
> > > + ret = 0;
> > > + goto out;
> > > + }
> > > +
> > > + /*
> > > + * At this point we already know, that image may contain nftables info,
> > > + * if nft utility not present we need to warn user about possible
> > > + * problems
> > > + */
> > > + if (!check_cmd_exists("nft")) {
> > > + pr_warn("Skipping nft restore, no nft utility");
> >
> > This must be the error and need to return -1. If we have rules in a
> > image file, we don't want to ignore them.
> >
> > > + ret = 0;
> > > + goto out;
> > > + }
> > > +
> > > + ret = run_nftables_tool("nft -f /proc/self/fd/0", img_raw_fd(img), -1);
> >
> >
> > > +out:
> > > + close_image(img);
> > > + return ret;
> > > +}
> > > +
> > > int read_net_ns_img(void)
> > > {
> > > struct ns_id *ns;
> > > @@ -2299,6 +2356,8 @@ int dump_net_ns(struct ns_id *ns)
> > > ret = dump_rule(fds);
> > > if (!ret)
> > > ret = dump_iptables(fds);
> > > + if (!ret)
> > > + ret = dump_nftables(fds);
> > > if (!ret)
> > > ret = dump_netns_conf(ns, fds);
> > > } else if (ns->type != NS_ROOT) {
> > > @@ -2392,6 +2451,8 @@ static int prepare_net_ns_second_stage(struct ns_id *ns)
> > > ret = restore_rule(nsid);
> > > if (!ret)
> > > ret = restore_iptables(nsid);
> > > + if (!ret)
> > > + ret = restore_nftables(nsid);
> > > }
> > >
> > > if (!ret)
> > > diff --git a/criu/util.c b/criu/util.c
> > > index 2a3d7abc..b4b1564f 100644
> > > --- a/criu/util.c
> > > +++ b/criu/util.c
> > > @@ -795,6 +795,45 @@ struct vma_area *alloc_vma_area(void)
> > > return p;
> > > }
> > >
> > > +static int __check_cmd_exists(const char *path)
> > > +{
> > > + return (access(path, F_OK) == 0);
> > > +}
> > > +
> > > +int check_cmd_exists(const char *cmd)
> > > +{
> > > + int ret = 0;
> > > + char buf[255];
> > > + char *env_path, *dup, *s, *p;
> > > +
> > > + env_path = getenv("PATH");
> > > +
> > > + /* ok, the simply __check_cmd_exists */
> > > + if (!env_path || cmd[0] == '/')
> > > + return __check_cmd_exists(cmd);
> > > +
> > > + dup = strdup(env_path);
> > > +
> > dup = xstrdup(env_path);
> > if (dup == NULL)
> > return -1;
> >
> > > + /* let's try to find program in PATH */
> > > + s = dup;
> > > + p = NULL;
> > > + do {
> > > + p = strchr(s, ':');
> > > + if (p != NULL)
> > > + p[0] = 0;
> > > +
> > > + sprintf(buf, "%s/%s", s, cmd);
> > > + if ((ret = __check_cmd_exists(buf)))
> >
> > ret = __check_cmd_exists(buf);
> > if (ret)
> >
> > > + goto free;
> > > +
> > > + s = p + 1;
> > > + } while (p != NULL);
> > > +
> > > +free:
> > > + free(dup);
> >
> > xfree(dup);
> >
> > > + return ret;
> > > +}
> > > +
> > > int mkdirpat(int fd, const char *path, int mode)
> > > {
> > > size_t i;
> > > --
> > > 2.17.1
> > >
> > > _______________________________________________
> > > CRIU mailing list
> > > CRIU at openvz.org
> > > https://lists.openvz.org/mailman/listinfo/criu
More information about the CRIU
mailing list