[CRIU] [PATCHv2 1/2] net: add nftables c/r

Andrei Vagin (C) avagin at gmail.com
Thu Nov 14 07:29:15 MSK 2019


On Wed, Nov 13, 2019 at 02:14:06PM +0000, Alexander Mikhalitsyn wrote:
> On Wed, 13 Nov 2019 00:45:20 -0800
> Andrei Vagin <avagin at gmail.com> wrote:
> 
> > On Tue, Nov 12, 2019 at 07:06:42PM +0300, Alexander Mikhalitsyn wrote:
> > > From: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> > > 
> > > After Centos-8 nft used instead of iptables. But we had never supported nft rules in
> > > CRIU, and after c/r all rules are flushed.
> > > 
> > > Path to nft tool can be changed via CR_NFTABLES environment variable
> > > similar to CR_IPTABLES.
> > 
> > 
> > Can we use libnftnl? This should be faster, because we will not need to
> > fork a new process and exec a binary.
> > 

> Good point, but we have some problems with that. Currently, when we do
> "nft list ruleset" it output have special format (that corresponds to
> formal grammar defined by (nftables/src/parser_bison.y)). It's not
> format that supported by libnftnl. Currently libnftnl supports only
> dumping ruleset in "command format" as a sequence of nft commands that
> describes current ruleset. But if we try to use this format on
> Checkpoint in CRIU, then we have a problems on Restore - we need to
> execute a lot (maybe) "nft ..." commands. In performance terms it may
> be even worse then as we doing now. Of course, we can grab this parser
> from nftables to CRIU, but it's a lot of code and then we need
> additional compile-time deps - Bison for example. I don't know what to
> choose... :)

Maybe we can use this way:

on dump:
  * create a netlink socket
  * send a request to list all rules
  * save raw netlink messages in an image file.

on restore:
  * create a netlink socket
  * send netlink messages from the image file into the socket.

"ip addr save" and "ip addr restore" work this way.
https://github.com/shemminger/iproute2/blob/master/ip/ipaddress.c#L1552
https://github.com/shemminger/iproute2/blob/master/ip/ipaddress.c#L1618

> 
> > > 
> > > Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> > > Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn at virtuozzo.com>
> > > Signed-off-by: Alexander Mikhalitsyn <alexander at mihalicyn.com>
> > > ---
> > >  criu/image-desc.c         |  1 +
> > >  criu/include/image-desc.h |  1 +
> > >  criu/include/magic.h      |  1 +
> > >  criu/include/util.h       |  2 ++
> > >  criu/net.c                | 65 +++++++++++++++++++++++++++++++++++++--
> > >  criu/util.c               | 39 +++++++++++++++++++++++
> > >  6 files changed, 107 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/criu/image-desc.c b/criu/image-desc.c
> > > index 81cd0748..ae5d817f 100644
> > > --- a/criu/image-desc.c
> > > +++ b/criu/image-desc.c
> > > @@ -76,6 +76,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
> > >  	FD_ENTRY_F(RULE,	"rule-%u", O_NOBUF),
> > >  	FD_ENTRY_F(IPTABLES,	"iptables-%u", O_NOBUF),
> > >  	FD_ENTRY_F(IP6TABLES,	"ip6tables-%u", O_NOBUF),
> > > +	FD_ENTRY_F(NFTABLES,	"nftables-%u", O_NOBUF),
> > >  	FD_ENTRY_F(TMPFS_IMG,	"tmpfs-%u.tar.gz", O_NOBUF),
> > >  	FD_ENTRY_F(TMPFS_DEV,	"tmpfs-dev-%u.tar.gz", O_NOBUF),
> > >  	FD_ENTRY_F(AUTOFS,	"autofs-%u", O_NOBUF),
> > > diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h
> > > index fea80a71..6db8bf94 100644
> > > --- a/criu/include/image-desc.h
> > > +++ b/criu/include/image-desc.h
> > > @@ -42,6 +42,7 @@ enum {
> > >  	CR_FD_RULE,
> > >  	CR_FD_IPTABLES,
> > >  	CR_FD_IP6TABLES,
> > > +	CR_FD_NFTABLES,
> > >  	CR_FD_NETNS,
> > >  	CR_FD_NETNF_CT,
> > >  	CR_FD_NETNF_EXP,
> > > diff --git a/criu/include/magic.h b/criu/include/magic.h
> > > index 05101f43..1a583f4e 100644
> > > --- a/criu/include/magic.h
> > > +++ b/criu/include/magic.h
> > > @@ -103,6 +103,7 @@
> > >  #define TMPFS_DEV_MAGIC		RAW_IMAGE_MAGIC
> > >  #define IPTABLES_MAGIC		RAW_IMAGE_MAGIC
> > >  #define IP6TABLES_MAGIC		RAW_IMAGE_MAGIC
> > > +#define NFTABLES_MAGIC		RAW_IMAGE_MAGIC
> > >  #define NETNF_CT_MAGIC		RAW_IMAGE_MAGIC
> > >  #define NETNF_EXP_MAGIC		RAW_IMAGE_MAGIC
> > >  
> > > diff --git a/criu/include/util.h b/criu/include/util.h
> > > index a14be722..57b46dcc 100644
> > > --- a/criu/include/util.h
> > > +++ b/criu/include/util.h
> > > @@ -252,6 +252,8 @@ static inline bool issubpath(const char *path, const char *sub_path)
> > >  		(end == '/' || end == '\0');
> > >  }
> > >  
> > > +int check_cmd_exists(const char *cmd);
> > > +
> > >  /*
> > >   * mkdir -p
> > >   */
> > > diff --git a/criu/net.c b/criu/net.c
> > > index fe9b51ad..4737b604 100644
> > > --- a/criu/net.c
> > > +++ b/criu/net.c
> > > @@ -1739,12 +1739,12 @@ static int run_ip_tool(char *arg1, char *arg2, char *arg3, char *arg4, int fdin,
> > >  	return 0;
> > >  }
> > >  
> > > -static int run_iptables_tool(char *def_cmd, int fdin, int fdout)
> > > +static int run_tool(const char *env_var, char *def_cmd, int fdin, int fdout)
> > >  {
> > >  	int ret;
> > >  	char *cmd;
> > >  
> > > -	cmd = getenv("CR_IPTABLES");
> > > +	cmd = getenv(env_var);
> > >  	if (!cmd)
> > >  		cmd = def_cmd;
> > >  	pr_debug("\tRunning %s for %s\n", cmd, def_cmd);
> > > @@ -1755,6 +1755,16 @@ static int run_iptables_tool(char *def_cmd, int fdin, int fdout)
> > >  	return ret;
> > >  }
> > >  
> > > +static int run_iptables_tool(char *def_cmd, int fdin, int fdout)
> > > +{
> > > +	return run_tool("CR_IPTABLES", def_cmd, fdin, fdout);
> > > +}
> > > +
> > > +static int run_nftables_tool(char *def_cmd, int fdin, int fdout)
> > > +{
> > > +	return run_tool("CR_NFTABLES", def_cmd, fdin, fdout);
> > > +}
> > > +
> > >  static inline int dump_ifaddr(struct cr_imgset *fds)
> > >  {
> > >  	struct cr_img *img = img_from_set(fds, CR_FD_IFADDR);
> > > @@ -1818,6 +1828,21 @@ static inline int dump_iptables(struct cr_imgset *fds)
> > >  	return 0;
> > >  }
> > >  
> > > +static inline int dump_nftables(struct cr_imgset *fds)
> > > +{
> > > +	struct cr_img *img;
> > > +
> > > +	/* we not dump nftables if nft utility isn't present */
> > > +	if (!check_cmd_exists("nft"))
> > > +		return 0;
> > > +
> > > +	img = img_from_set(fds, CR_FD_NFTABLES);
> > > +	if (run_nftables_tool("nft list ruleset", -1, img_raw_fd(img)))
> > > +		return -1;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > >  static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
> > >  {
> > >  	void *buf, *o_buf;
> > > @@ -2082,6 +2107,38 @@ out:
> > >  	return ret;
> > >  }
> > >  
> > > +static inline int restore_nftables(int pid)
> > 
> > why do we need to inline this function?
> > 
> > > +{
> > > +	int ret = -1;
> > > +	struct cr_img *img;
> > > +
> > > +	img = open_image(CR_FD_NFTABLES, O_RSTR, pid);
> > > +	if (img == NULL)
> > > +		return -1;
> > > +	if (empty_image(img)) {
> > > +		/* Backward compatibility */
> > > +		pr_info("Skipping nft restore, no image");
> > > +		ret = 0;
> > > +		goto out;
> > > +	}
> > > +
> > > +	/*
> > > +	 * At this point we already know, that image may contain nftables info,
> > > +	 * if nft utility not present we need to warn user about possible
> > > +	 * problems
> > > +	 */
> > > +	if (!check_cmd_exists("nft")) {
> > > +		pr_warn("Skipping nft restore, no nft utility");
> > 
> > This must be the error and need to return -1. If we have rules in a
> > image file, we don't want to ignore them.
> > 
> > > +		ret = 0;
> > > +		goto out;
> > > +	}
> > > +
> > > +	ret = run_nftables_tool("nft -f /proc/self/fd/0", img_raw_fd(img), -1);
> > 
> > 
> > > +out:
> > > +	close_image(img);
> > > +	return ret;
> > > +}
> > > +
> > >  int read_net_ns_img(void)
> > >  {
> > >  	struct ns_id *ns;
> > > @@ -2299,6 +2356,8 @@ int dump_net_ns(struct ns_id *ns)
> > >  			ret = dump_rule(fds);
> > >  		if (!ret)
> > >  			ret = dump_iptables(fds);
> > > +		if (!ret)
> > > +			ret = dump_nftables(fds);
> > >  		if (!ret)
> > >  			ret = dump_netns_conf(ns, fds);
> > >  	} else if (ns->type != NS_ROOT) {
> > > @@ -2392,6 +2451,8 @@ static int prepare_net_ns_second_stage(struct ns_id *ns)
> > >  			ret = restore_rule(nsid);
> > >  		if (!ret)
> > >  			ret = restore_iptables(nsid);
> > > +		if (!ret)
> > > +			ret = restore_nftables(nsid);
> > >  	}
> > >  
> > >  	if (!ret)
> > > diff --git a/criu/util.c b/criu/util.c
> > > index 2a3d7abc..b4b1564f 100644
> > > --- a/criu/util.c
> > > +++ b/criu/util.c
> > > @@ -795,6 +795,45 @@ struct vma_area *alloc_vma_area(void)
> > >  	return p;
> > >  }
> > >  
> > > +static int __check_cmd_exists(const char *path)
> > > +{
> > > +	return (access(path, F_OK) == 0);
> > > +}
> > > +
> > > +int check_cmd_exists(const char *cmd)
> > > +{
> > > +	int ret = 0;
> > > +	char buf[255];
> > > +	char *env_path, *dup, *s, *p;
> > > +
> > > +	env_path = getenv("PATH");
> > > +
> > > +	/* ok, the simply __check_cmd_exists */
> > > +	if (!env_path || cmd[0] == '/')
> > > +		return __check_cmd_exists(cmd);
> > > +
> > > +	dup = strdup(env_path);
> > > +
> > 	dup = xstrdup(env_path);
> > 	if (dup == NULL)
> > 		return -1;
> > 
> > > +	/* let's try to find program in PATH */
> > > +	s = dup;
> > > +	p = NULL;
> > > +	do {
> > > +		p = strchr(s, ':');
> > > +		if (p != NULL)
> > > +			p[0] = 0;
> > > +
> > > +		sprintf(buf, "%s/%s", s, cmd);
> > > +		if ((ret = __check_cmd_exists(buf)))
> > 
> > 		ret = __check_cmd_exists(buf);
> > 		if (ret)
> > 
> > > +			goto free;
> > > +
> > > +		s = p + 1;
> > > +	} while (p != NULL);
> > > +
> > > +free:
> > > +	free(dup);
> > 
> > 	xfree(dup);
> > 
> > > +	return ret;
> > > +}
> > > +
> > >  int mkdirpat(int fd, const char *path, int mode)
> > >  {
> > >  	size_t i;
> > > -- 
> > > 2.17.1
> > > 
> > > _______________________________________________
> > > CRIU mailing list
> > > CRIU at openvz.org
> > > https://lists.openvz.org/mailman/listinfo/criu


More information about the CRIU mailing list