[CRIU] [PATCH v3 3/5] net/sysctl: c/r all except *.conf.* and *.neigh.*

Pavel Emelyanov xemul at virtuozzo.com
Fri Jul 15 10:39:20 PDT 2016


On 07/14/2016 04:51 PM, Pavel Tikhomirov wrote:
> Add prepare_sysctl_requests_filtered helper to search sysctl directory
> for all(filtered) sysctl paths and allocate for each named sysctl_req
> to later dump them all with sysctl_op.

NAK, sucking in everything based on just name regex match is bad idea.
Write the needed sysctl-s by names and update the list from time to time.

> Add NamedSysctlEntry'es to dump sysclt name+value pairs to image.
> 
> Skip:
> 1. non-(readable)writable sysctls as we can do nothing for them through
> procfs
> 2. conf and neigh directories are per-device and will be restored after
> devices restore separately, also they might need special care like conf
> sysctls need special order
> 3. nf_log.xx if it is "NONE" as we can not set it with sysctl_op as
> sysctl_write_char prints "\n" at the end and nf_log_proc_dostring does
> not like '\n' =)
> 4. if sysctl do not exist on restore - !CTL_FLAGS_HAS
> 5. skip igmp_link_local_mcast_reports if it is not safe in current
> kernel see https://bugzilla.redhat.com/show_bug.cgi?id=1352177
> 
> *We have now 32 such net.* sysctls writable in VZ7 CT
> https://jira.sw.ru/browse/PSBM-48397
> 
> v3: skip igmp_link_local_mcast_reports
> Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> ---
>  criu/include/sysctl.h |  16 ++++
>  criu/net.c            | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  criu/sysctl.c         | 122 +++++++++++++++++++++++++++
>  images/netdev.proto   |  14 ++--
>  images/sysctl.proto   |   5 ++
>  5 files changed, 375 insertions(+), 6 deletions(-)
> 
> diff --git a/criu/include/sysctl.h b/criu/include/sysctl.h
> index e271f5e..95312d7 100644
> --- a/criu/include/sysctl.h
> +++ b/criu/include/sysctl.h
> @@ -9,6 +9,9 @@ struct sysctl_req {
>  };
>  
>  extern int sysctl_op(struct sysctl_req *req, size_t nr_req, int op, unsigned int ns);
> +extern int prepare_sysctl_requests_filtered(char *path, char *filter,
> +		struct sysctl_req **reqs, size_t *n_reqs);
> +extern void free_sysctl_requests(struct sysctl_req *reqs, size_t n_reqs);
>  
>  enum {
>  	CTL_READ,
> @@ -38,4 +41,17 @@ enum {
>  #define CTL_FLAGS_HAS		2
>  #define CTL_FLAGS_READ_EIO_SKIP	4
>  
> +/*
> + * Max sysctl path is 70 chars:
> + * "/proc/sys/net/ipv4/conf/virbr0-nic/igmpv2_unsolicited_report_interval"
> + */
> +#define PROC_PATH_MAX_LEN 100
> +/*
> + * We have only two sysctls longer than 256:
> + * /proc/sys/dev/cdrom/info - CDROM_STR_SIZE=1000
> + * /proc/sys/net/ipv4/tcp_allowed_congestion_control - TCP_CA_BUF_MAX=2048
> + * first one is readonly and second is hostonly
> + */
> +#define PROC_ARG_MAX_LEN 257
> +
>  #endif /* __CR_SYSCTL_H__ */
> diff --git a/criu/net.c b/criu/net.c
> index a6d5f00..3ab950b 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -1144,6 +1144,219 @@ static inline int dump_iptables(struct cr_imgset *fds)
>  	return 0;
>  }
>  
> +static void free_nses(NamedSysctlEntry **nses, size_t n_nses)
> +{
> +	int i;
> +
> +	if (nses) {
> +		if (nses[0]) {
> +			for (i = 0; i < n_nses; i++)
> +				xfree(nses[i]->name);
> +
> +			if (nses[0]->se) {
> +				for (i = 0; i < n_nses; i++)
> +					xfree(nses[i]->se->sarg);
> +
> +				xfree(nses[0]->se);
> +			}
> +			xfree(nses[0]);
> +		}
> +		xfree(nses);
> +	}
> +}
> +
> +static int alloc_nses(NamedSysctlEntry ***nses, size_t n_nses)
> +{
> +	int i;
> +
> +	*nses = xmalloc(n_nses * sizeof(NamedSysctlEntry *));
> +	if (!*nses) {
> +		pr_perror("Failed to xmalloc nses pointers");
> +		return -1;
> +	}
> +
> +	(*nses)[0] = xmalloc(n_nses * sizeof(NamedSysctlEntry));
> +	if (!(*nses)[0]) {
> +		pr_perror("Failed to xmalloc nses");
> +		return -1;
> +	}
> +
> +	for (i = 0; i < n_nses; i++) {
> +		(*nses)[i] = (*nses)[0] + i;
> +		named_sysctl_entry__init((*nses)[i]);
> +	}
> +
> +	(*nses)[0]->se = xmalloc(n_nses * sizeof(SysctlEntry));
> +	if (!(*nses)[0]->se) {
> +		pr_perror("Failed to xmalloc se");
> +		return -1;
> +	}
> +
> +	for (i = 0; i < n_nses; i++) {
> +		(*nses)[i]->se = (*nses)[0]->se + i;
> +		sysctl_entry__init((*nses)[i]->se);
> +	}
> +
> +	for (i = 0; i < n_nses; i++) {
> +		SysctlEntry *se = (*nses)[i]->se;
> +
> +		se->sarg = xmalloc(sizeof(char) * PROC_ARG_MAX_LEN);
> +		if (!se->sarg) {
> +			pr_perror("Failed to xmalloc se sarg");
> +			return -1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int sysctl_arg_to_int(char *sarg, int *iarg)
> +{
> +	int ret;
> +	char buf[PROC_ARG_MAX_LEN];
> +
> +	ret = sscanf(sarg, "%d%s", iarg, buf);
> +	if (ret != 1)
> +		return 0;
> +	return 1;
> +}
> +
> +#define SYSCTL_NET_DIR "/proc/sys/net"
> +#define FILTER_MAX_LEN 60
> +#define CONF_OR_NEIGH_FILTER "conf|neigh"
> +#define IGMP_LL_MCAST_REPORTS_FILTER "|igmp_link_local_mcast_reports"
> +
> +static int dump_netns_sysctls(NetnsEntry *netns)
> +{
> +	struct sysctl_req *reqs = NULL;
> +	size_t n_reqs = 0;
> +	int ret = 0;
> +	int i;
> +	char filter[FILTER_MAX_LEN];
> +
> +	sprintf(filter, "%s%s", CONF_OR_NEIGH_FILTER,
> +		kdat.sysctl_igmp_link_local_mcast_reports_safe ?
> +		"" : IGMP_LL_MCAST_REPORTS_FILTER);
> +
> +	ret = prepare_sysctl_requests_filtered(SYSCTL_NET_DIR, filter,
> +	                             &reqs, &n_reqs);
> +	if (ret)
> +		goto err_req;
> +
> +	netns->n_nses = n_reqs;
> +	ret = alloc_nses(&netns->nses, netns->n_nses);
> +	if (ret)
> +		goto err_req;
> +
> +	for (i = 0; i < netns->n_nses; i++) {
> +		reqs[i].arg = netns->nses[i]->se->sarg;
> +		netns->nses[i]->name = reqs[i].name;
> +	}
> +
> +	ret = sysctl_op(reqs, netns->n_nses, CTL_READ, CLONE_NEWNET);
> +	if (ret != 0) {
> +		pr_err("Failed to read net sysctls\n");
> +		goto err_free;
> +	}
> +
> +	for (i = 0; i < netns->n_nses; i++) {
> +		char *sarg = netns->nses[i]->se->sarg;
> +
> +		if (reqs[i].flags & CTL_FLAGS_HAS) {
> +			/* Strip trailing newline */
> +			if (sarg[strlen(sarg) - 1] == '\n')
> +				sarg[strlen(sarg) - 1] = '\0';
> +
> +			if (sysctl_arg_to_int(sarg, &netns->nses[i]->se->iarg)) {
> +				netns->nses[i]->se->type = SYSCTL_TYPE__CTL_32;
> +				netns->nses[i]->se->has_iarg = true;
> +				netns->nses[i]->se->sarg = NULL;
> +				xfree(sarg);
> +				continue;
> +			}
> +			netns->nses[i]->se->type = SYSCTL_TYPE__CTL_STR;
> +
> +			/*
> +			 * Skip nf_log/xx if it is set to default "NONE"
> +			 */
> +			if (!strstr(reqs[i].name, "/proc/sys/net/netfilter/nf_log")
> +			     || strcmp(sarg, "NONE"))
> +				continue;
> +
> +			pr_info("Skipping net sysctl %s\n", reqs[i].name);
> +		}
> +
> +		netns->nses[i]->se->sarg = NULL;
> +		xfree(sarg);
> +	}
> +
> +err_free:
> +	xfree(reqs);
> +	return ret;
> +err_req:
> +	free_sysctl_requests(reqs, n_reqs);
> +	return ret;
> +}
> +
> +#define IGMP_LL_MCAST_REPORTS_PATH "/proc/sys/net/ipv4/igmp_link_local_mcast_reports"
> +
> +static int restore_netns_sysctls(NetnsEntry *netns)
> +{
> +	struct sysctl_req *req;
> +	int i, ri;
> +	int ret = 0;
> +
> +	req = xmalloc(sizeof(struct sysctl_req) * netns->n_nses);
> +	if (!req) {
> +		pr_perror("Failed to alloc sysctl_req array");
> +		return -1;
> +	}
> +
> +	for (i = 0, ri = 0; i < netns->n_nses; i++) {
> +		NamedSysctlEntry *nse = netns->nses[i];
> +
> +		if (!kdat.sysctl_igmp_link_local_mcast_reports_safe
> +		    && !strcmp(nse->name, IGMP_LL_MCAST_REPORTS_PATH))
> +			continue;
> +
> +		/* Skip restore not writable sysctls */
> +		if (access(nse->name, W_OK) != 0)
> +			continue;
> +
> +		switch (nse->se->type) {
> +			case SYSCTL_TYPE__CTL_32:
> +				/* skip non-existing sysctl */
> +				if (!nse->se->has_iarg)
> +					continue;
> +
> +				req[ri].type = CTL_32;
> +				req[ri].arg = &nse->se->iarg;
> +				break;
> +			case SYSCTL_TYPE__CTL_STR:
> +				/* skip non-existing sysctl */
> +				if (!nse->se->sarg)
> +					continue;
> +
> +				req[ri].type = CTL_STR(strlen(nse->se->sarg));
> +				req[ri].arg = nse->se->sarg;
> +				break;
> +			default:
> +				continue;
> +		}
> +
> +		req[ri].name = nse->name;
> +		req[ri].flags = 0;
> +		ri++;
> +	}
> +
> +	ret = sysctl_op(req, ri, CTL_WRITE, CLONE_NEWNET);
> +	if (ret < 0)
> +		pr_err("Failed to write net sysctls\n");
> +
> +	xfree(req);
> +	return ret;
> +}
> +
>  static int dump_netns_conf(struct cr_imgset *fds)
>  {
>  	int ret = -1;
> @@ -1211,6 +1424,10 @@ static int dump_netns_conf(struct cr_imgset *fds)
>  		}
>  	}
>  
> +	ret = dump_netns_sysctls(&netns);
> +	if (ret < 0)
> +		goto err_free;
> +
>  	ret = ipv4_conf_op("default", netns.def_conf4, size4, CTL_READ, NULL);
>  	if (ret < 0)
>  		goto err_free;
> @@ -1227,6 +1444,7 @@ static int dump_netns_conf(struct cr_imgset *fds)
>  
>  	ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS);
>  err_free:
> +	free_nses(netns.nses, netns.n_nses);
>  	xfree(netns.def_conf4);
>  	xfree(netns.all_conf4);
>  	xfree(def_confs4);
> @@ -1347,6 +1565,12 @@ static int restore_netns_conf(int pid, NetnsEntry **netns)
>  		return -1;
>  	}
>  
> +	if ((*netns)->nses) {
> +		ret = restore_netns_sysctls(*netns);
> +		if (ret)
> +			goto out;
> +	}
> +
>  	if ((*netns)->def_conf4) {
>  		ret = ipv4_conf_op("all", (*netns)->all_conf4, (*netns)->n_all_conf4, CTL_WRITE, NULL);
>  		if (ret)
> diff --git a/criu/sysctl.c b/criu/sysctl.c
> index 87bd267..eac058d 100644
> --- a/criu/sysctl.c
> +++ b/criu/sysctl.c
> @@ -6,6 +6,8 @@
>  #include <sys/types.h>
>  #include <sys/wait.h>
>  #include <sched.h>
> +#include <sys/stat.h>
> +#include <regex.h>
>  
>  #include "asm/types.h"
>  #include "namespaces.h"
> @@ -480,3 +482,123 @@ int sysctl_op(struct sysctl_req *req, size_t nr_req, int op, unsigned int ns)
>  	close(fd);
>  	return ret;
>  }
> +
> +static int match_pattern(char *string, char *pattern)
> +{
> +	int status;
> +	regex_t re;
> +
> +	if (regcomp(&re, pattern, REG_NOSUB|REG_EXTENDED) != 0) {
> +		pr_perror("Failed to regcomp \"%s\"", pattern);
> +		return 0;
> +	}
> +
> +	status = regexec(&re, string, (size_t) 0, NULL, 0);
> +	regfree(&re);
> +
> +	if (status != 0) {
> +		return 0;
> +	}
> +	return 1;
> +}
> +
> +void free_sysctl_requests(struct sysctl_req *reqs, size_t n_reqs)
> +{
> +	int i;
> +
> +	for (i = 0; i < n_reqs; i++)
> +		xfree(reqs[i].name);
> +	xfree(reqs);
> +}
> +
> +static int prepare_sysctl_request(char *name, struct sysctl_req **preq,
> +		size_t *n_reqs)
> +{
> +	struct sysctl_req *req;
> +
> +	req = xrealloc(*preq, ++(*n_reqs) * sizeof(struct sysctl_req));
> +	if (!req) {
> +		(*n_reqs)--;
> +		pr_perror("Failed to xrealloc requests");
> +		return -1;
> +	}
> +
> +	*preq = req;
> +	req = &(*preq)[*n_reqs - 1];
> +
> +	req->name = xmalloc(sizeof(char) * (strlen(name) + 1));
> +	if (!req->name) {
> +		pr_perror("Failed to xmalloc request name");
> +		return -1;
> +	}
> +
> +	sprintf(req->name, "%s", name);
> +	req->arg = NULL;
> +	req->type = CTL_STR(PROC_ARG_MAX_LEN);
> +	req->flags = CTL_FLAGS_OPTIONAL;
> +
> +	return 0;
> +}
> +
> +/*
> + * Search sysctls in directory, allocate and setup sysctl requests
> + * to dump them.
> + * @path - where to look sysctls for
> + * @filter - regex to filter unwanted subdirs
> + * @reqs - return pointer to requests
> + * @n_reqs - return number of sysctls found
> + */
> +int prepare_sysctl_requests_filtered(char *path, char *filter,
> +		struct sysctl_req **reqs, size_t *n_reqs)
> +{
> +	DIR *dp;
> +	struct dirent *de;
> +	int ret = 0;
> +
> +	dp = opendir(path);
> +	if (!dp) {
> +		pr_perror("Failed to open %s", path);
> +		return -1;
> +	}
> +
> +	while ((de = readdir(dp))) {
> +		char dir[PROC_PATH_MAX_LEN];
> +		struct stat st;
> +
> +		if (!strcmp(de->d_name, ".") ||
> +		    !strcmp(de->d_name, ".."))
> +			continue;
> +
> +		/* Skip specified directories */
> +		if (match_pattern(de->d_name, filter))
> +			continue;
> +
> +		sprintf(dir, "%s/%s", path, de->d_name);
> +
> +		ret = stat(dir, &st);
> +		if (ret == -1) {
> +			pr_perror("Failed to stat %s", dir);
> +			goto err_close;
> +		} else {
> +			if (S_ISDIR(st.st_mode)) {
> +				prepare_sysctl_requests_filtered(dir, filter, reqs, n_reqs);
> +			} else if (st.st_mode & S_IRUSR &&
> +				   st.st_mode & S_IWUSR) {
> +				/*
> +				 * Need the check above to exclude sysctls like
> +				 * net.netfilter.nf_conntrack_buckets, which are
> +				 * readonly, from being dupmed to image. They can
> +				 * not be restored through procfs. And of course
> +				 * ones that can't be read
> +				 */
> +				ret = prepare_sysctl_request(dir, reqs, n_reqs);
> +				if (ret == -1)
> +					goto err_close;
> +			}
> +		}
> +	}
> +
> +err_close:
> +	closedir(dp);
> +	return ret;
> +}
> diff --git a/images/netdev.proto b/images/netdev.proto
> index 19b501c..08f7eb9 100644
> --- a/images/netdev.proto
> +++ b/images/netdev.proto
> @@ -41,12 +41,14 @@ message net_device_entry {
>  }
>  
>  message netns_entry {
> -	repeated int32 def_conf		= 1;
> -	repeated int32 all_conf		= 2;
> +	repeated int32 def_conf			= 1;
> +	repeated int32 all_conf			= 2;
>  
> -	repeated sysctl_entry def_conf4	= 3;
> -	repeated sysctl_entry all_conf4	= 4;
> +	repeated sysctl_entry def_conf4		= 3;
> +	repeated sysctl_entry all_conf4		= 4;
>  
> -	repeated sysctl_entry def_conf6	= 5;
> -	repeated sysctl_entry all_conf6	= 6;
> +	repeated sysctl_entry def_conf6		= 5;
> +	repeated sysctl_entry all_conf6		= 6;
> +
> +	repeated named_sysctl_entry nses	= 7;
>  }
> diff --git a/images/sysctl.proto b/images/sysctl.proto
> index 4ecdf27..5927386 100644
> --- a/images/sysctl.proto
> +++ b/images/sysctl.proto
> @@ -11,3 +11,8 @@ message sysctl_entry {
>  	optional int32 iarg		= 2;
>  	optional string sarg		= 3;
>  }
> +
> +message named_sysctl_entry {
> +	required string name		= 1;
> +	required sysctl_entry se	= 2;
> +}
> 



More information about the CRIU mailing list