[CRIU] [PATCH v3 3/5] net/sysctl: c/r all except *.conf.* and *.neigh.*

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Thu Jul 14 06:51:12 PDT 2016


Add prepare_sysctl_requests_filtered helper to search sysctl directory
for all(filtered) sysctl paths and allocate for each named sysctl_req
to later dump them all with sysctl_op.

Add NamedSysctlEntry'es to dump sysclt name+value pairs to image.

Skip:
1. non-(readable)writable sysctls as we can do nothing for them through
procfs
2. conf and neigh directories are per-device and will be restored after
devices restore separately, also they might need special care like conf
sysctls need special order
3. nf_log.xx if it is "NONE" as we can not set it with sysctl_op as
sysctl_write_char prints "\n" at the end and nf_log_proc_dostring does
not like '\n' =)
4. if sysctl do not exist on restore - !CTL_FLAGS_HAS
5. skip igmp_link_local_mcast_reports if it is not safe in current
kernel see https://bugzilla.redhat.com/show_bug.cgi?id=1352177

*We have now 32 such net.* sysctls writable in VZ7 CT
https://jira.sw.ru/browse/PSBM-48397

v3: skip igmp_link_local_mcast_reports
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
 criu/include/sysctl.h |  16 ++++
 criu/net.c            | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++
 criu/sysctl.c         | 122 +++++++++++++++++++++++++++
 images/netdev.proto   |  14 ++--
 images/sysctl.proto   |   5 ++
 5 files changed, 375 insertions(+), 6 deletions(-)

diff --git a/criu/include/sysctl.h b/criu/include/sysctl.h
index e271f5e..95312d7 100644
--- a/criu/include/sysctl.h
+++ b/criu/include/sysctl.h
@@ -9,6 +9,9 @@ struct sysctl_req {
 };
 
 extern int sysctl_op(struct sysctl_req *req, size_t nr_req, int op, unsigned int ns);
+extern int prepare_sysctl_requests_filtered(char *path, char *filter,
+		struct sysctl_req **reqs, size_t *n_reqs);
+extern void free_sysctl_requests(struct sysctl_req *reqs, size_t n_reqs);
 
 enum {
 	CTL_READ,
@@ -38,4 +41,17 @@ enum {
 #define CTL_FLAGS_HAS		2
 #define CTL_FLAGS_READ_EIO_SKIP	4
 
+/*
+ * Max sysctl path is 70 chars:
+ * "/proc/sys/net/ipv4/conf/virbr0-nic/igmpv2_unsolicited_report_interval"
+ */
+#define PROC_PATH_MAX_LEN 100
+/*
+ * We have only two sysctls longer than 256:
+ * /proc/sys/dev/cdrom/info - CDROM_STR_SIZE=1000
+ * /proc/sys/net/ipv4/tcp_allowed_congestion_control - TCP_CA_BUF_MAX=2048
+ * first one is readonly and second is hostonly
+ */
+#define PROC_ARG_MAX_LEN 257
+
 #endif /* __CR_SYSCTL_H__ */
diff --git a/criu/net.c b/criu/net.c
index a6d5f00..3ab950b 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -1144,6 +1144,219 @@ static inline int dump_iptables(struct cr_imgset *fds)
 	return 0;
 }
 
+static void free_nses(NamedSysctlEntry **nses, size_t n_nses)
+{
+	int i;
+
+	if (nses) {
+		if (nses[0]) {
+			for (i = 0; i < n_nses; i++)
+				xfree(nses[i]->name);
+
+			if (nses[0]->se) {
+				for (i = 0; i < n_nses; i++)
+					xfree(nses[i]->se->sarg);
+
+				xfree(nses[0]->se);
+			}
+			xfree(nses[0]);
+		}
+		xfree(nses);
+	}
+}
+
+static int alloc_nses(NamedSysctlEntry ***nses, size_t n_nses)
+{
+	int i;
+
+	*nses = xmalloc(n_nses * sizeof(NamedSysctlEntry *));
+	if (!*nses) {
+		pr_perror("Failed to xmalloc nses pointers");
+		return -1;
+	}
+
+	(*nses)[0] = xmalloc(n_nses * sizeof(NamedSysctlEntry));
+	if (!(*nses)[0]) {
+		pr_perror("Failed to xmalloc nses");
+		return -1;
+	}
+
+	for (i = 0; i < n_nses; i++) {
+		(*nses)[i] = (*nses)[0] + i;
+		named_sysctl_entry__init((*nses)[i]);
+	}
+
+	(*nses)[0]->se = xmalloc(n_nses * sizeof(SysctlEntry));
+	if (!(*nses)[0]->se) {
+		pr_perror("Failed to xmalloc se");
+		return -1;
+	}
+
+	for (i = 0; i < n_nses; i++) {
+		(*nses)[i]->se = (*nses)[0]->se + i;
+		sysctl_entry__init((*nses)[i]->se);
+	}
+
+	for (i = 0; i < n_nses; i++) {
+		SysctlEntry *se = (*nses)[i]->se;
+
+		se->sarg = xmalloc(sizeof(char) * PROC_ARG_MAX_LEN);
+		if (!se->sarg) {
+			pr_perror("Failed to xmalloc se sarg");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int sysctl_arg_to_int(char *sarg, int *iarg)
+{
+	int ret;
+	char buf[PROC_ARG_MAX_LEN];
+
+	ret = sscanf(sarg, "%d%s", iarg, buf);
+	if (ret != 1)
+		return 0;
+	return 1;
+}
+
+#define SYSCTL_NET_DIR "/proc/sys/net"
+#define FILTER_MAX_LEN 60
+#define CONF_OR_NEIGH_FILTER "conf|neigh"
+#define IGMP_LL_MCAST_REPORTS_FILTER "|igmp_link_local_mcast_reports"
+
+static int dump_netns_sysctls(NetnsEntry *netns)
+{
+	struct sysctl_req *reqs = NULL;
+	size_t n_reqs = 0;
+	int ret = 0;
+	int i;
+	char filter[FILTER_MAX_LEN];
+
+	sprintf(filter, "%s%s", CONF_OR_NEIGH_FILTER,
+		kdat.sysctl_igmp_link_local_mcast_reports_safe ?
+		"" : IGMP_LL_MCAST_REPORTS_FILTER);
+
+	ret = prepare_sysctl_requests_filtered(SYSCTL_NET_DIR, filter,
+	                             &reqs, &n_reqs);
+	if (ret)
+		goto err_req;
+
+	netns->n_nses = n_reqs;
+	ret = alloc_nses(&netns->nses, netns->n_nses);
+	if (ret)
+		goto err_req;
+
+	for (i = 0; i < netns->n_nses; i++) {
+		reqs[i].arg = netns->nses[i]->se->sarg;
+		netns->nses[i]->name = reqs[i].name;
+	}
+
+	ret = sysctl_op(reqs, netns->n_nses, CTL_READ, CLONE_NEWNET);
+	if (ret != 0) {
+		pr_err("Failed to read net sysctls\n");
+		goto err_free;
+	}
+
+	for (i = 0; i < netns->n_nses; i++) {
+		char *sarg = netns->nses[i]->se->sarg;
+
+		if (reqs[i].flags & CTL_FLAGS_HAS) {
+			/* Strip trailing newline */
+			if (sarg[strlen(sarg) - 1] == '\n')
+				sarg[strlen(sarg) - 1] = '\0';
+
+			if (sysctl_arg_to_int(sarg, &netns->nses[i]->se->iarg)) {
+				netns->nses[i]->se->type = SYSCTL_TYPE__CTL_32;
+				netns->nses[i]->se->has_iarg = true;
+				netns->nses[i]->se->sarg = NULL;
+				xfree(sarg);
+				continue;
+			}
+			netns->nses[i]->se->type = SYSCTL_TYPE__CTL_STR;
+
+			/*
+			 * Skip nf_log/xx if it is set to default "NONE"
+			 */
+			if (!strstr(reqs[i].name, "/proc/sys/net/netfilter/nf_log")
+			     || strcmp(sarg, "NONE"))
+				continue;
+
+			pr_info("Skipping net sysctl %s\n", reqs[i].name);
+		}
+
+		netns->nses[i]->se->sarg = NULL;
+		xfree(sarg);
+	}
+
+err_free:
+	xfree(reqs);
+	return ret;
+err_req:
+	free_sysctl_requests(reqs, n_reqs);
+	return ret;
+}
+
+#define IGMP_LL_MCAST_REPORTS_PATH "/proc/sys/net/ipv4/igmp_link_local_mcast_reports"
+
+static int restore_netns_sysctls(NetnsEntry *netns)
+{
+	struct sysctl_req *req;
+	int i, ri;
+	int ret = 0;
+
+	req = xmalloc(sizeof(struct sysctl_req) * netns->n_nses);
+	if (!req) {
+		pr_perror("Failed to alloc sysctl_req array");
+		return -1;
+	}
+
+	for (i = 0, ri = 0; i < netns->n_nses; i++) {
+		NamedSysctlEntry *nse = netns->nses[i];
+
+		if (!kdat.sysctl_igmp_link_local_mcast_reports_safe
+		    && !strcmp(nse->name, IGMP_LL_MCAST_REPORTS_PATH))
+			continue;
+
+		/* Skip restore not writable sysctls */
+		if (access(nse->name, W_OK) != 0)
+			continue;
+
+		switch (nse->se->type) {
+			case SYSCTL_TYPE__CTL_32:
+				/* skip non-existing sysctl */
+				if (!nse->se->has_iarg)
+					continue;
+
+				req[ri].type = CTL_32;
+				req[ri].arg = &nse->se->iarg;
+				break;
+			case SYSCTL_TYPE__CTL_STR:
+				/* skip non-existing sysctl */
+				if (!nse->se->sarg)
+					continue;
+
+				req[ri].type = CTL_STR(strlen(nse->se->sarg));
+				req[ri].arg = nse->se->sarg;
+				break;
+			default:
+				continue;
+		}
+
+		req[ri].name = nse->name;
+		req[ri].flags = 0;
+		ri++;
+	}
+
+	ret = sysctl_op(req, ri, CTL_WRITE, CLONE_NEWNET);
+	if (ret < 0)
+		pr_err("Failed to write net sysctls\n");
+
+	xfree(req);
+	return ret;
+}
+
 static int dump_netns_conf(struct cr_imgset *fds)
 {
 	int ret = -1;
@@ -1211,6 +1424,10 @@ static int dump_netns_conf(struct cr_imgset *fds)
 		}
 	}
 
+	ret = dump_netns_sysctls(&netns);
+	if (ret < 0)
+		goto err_free;
+
 	ret = ipv4_conf_op("default", netns.def_conf4, size4, CTL_READ, NULL);
 	if (ret < 0)
 		goto err_free;
@@ -1227,6 +1444,7 @@ static int dump_netns_conf(struct cr_imgset *fds)
 
 	ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS);
 err_free:
+	free_nses(netns.nses, netns.n_nses);
 	xfree(netns.def_conf4);
 	xfree(netns.all_conf4);
 	xfree(def_confs4);
@@ -1347,6 +1565,12 @@ static int restore_netns_conf(int pid, NetnsEntry **netns)
 		return -1;
 	}
 
+	if ((*netns)->nses) {
+		ret = restore_netns_sysctls(*netns);
+		if (ret)
+			goto out;
+	}
+
 	if ((*netns)->def_conf4) {
 		ret = ipv4_conf_op("all", (*netns)->all_conf4, (*netns)->n_all_conf4, CTL_WRITE, NULL);
 		if (ret)
diff --git a/criu/sysctl.c b/criu/sysctl.c
index 87bd267..eac058d 100644
--- a/criu/sysctl.c
+++ b/criu/sysctl.c
@@ -6,6 +6,8 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <sched.h>
+#include <sys/stat.h>
+#include <regex.h>
 
 #include "asm/types.h"
 #include "namespaces.h"
@@ -480,3 +482,123 @@ int sysctl_op(struct sysctl_req *req, size_t nr_req, int op, unsigned int ns)
 	close(fd);
 	return ret;
 }
+
+static int match_pattern(char *string, char *pattern)
+{
+	int status;
+	regex_t re;
+
+	if (regcomp(&re, pattern, REG_NOSUB|REG_EXTENDED) != 0) {
+		pr_perror("Failed to regcomp \"%s\"", pattern);
+		return 0;
+	}
+
+	status = regexec(&re, string, (size_t) 0, NULL, 0);
+	regfree(&re);
+
+	if (status != 0) {
+		return 0;
+	}
+	return 1;
+}
+
+void free_sysctl_requests(struct sysctl_req *reqs, size_t n_reqs)
+{
+	int i;
+
+	for (i = 0; i < n_reqs; i++)
+		xfree(reqs[i].name);
+	xfree(reqs);
+}
+
+static int prepare_sysctl_request(char *name, struct sysctl_req **preq,
+		size_t *n_reqs)
+{
+	struct sysctl_req *req;
+
+	req = xrealloc(*preq, ++(*n_reqs) * sizeof(struct sysctl_req));
+	if (!req) {
+		(*n_reqs)--;
+		pr_perror("Failed to xrealloc requests");
+		return -1;
+	}
+
+	*preq = req;
+	req = &(*preq)[*n_reqs - 1];
+
+	req->name = xmalloc(sizeof(char) * (strlen(name) + 1));
+	if (!req->name) {
+		pr_perror("Failed to xmalloc request name");
+		return -1;
+	}
+
+	sprintf(req->name, "%s", name);
+	req->arg = NULL;
+	req->type = CTL_STR(PROC_ARG_MAX_LEN);
+	req->flags = CTL_FLAGS_OPTIONAL;
+
+	return 0;
+}
+
+/*
+ * Search sysctls in directory, allocate and setup sysctl requests
+ * to dump them.
+ * @path - where to look sysctls for
+ * @filter - regex to filter unwanted subdirs
+ * @reqs - return pointer to requests
+ * @n_reqs - return number of sysctls found
+ */
+int prepare_sysctl_requests_filtered(char *path, char *filter,
+		struct sysctl_req **reqs, size_t *n_reqs)
+{
+	DIR *dp;
+	struct dirent *de;
+	int ret = 0;
+
+	dp = opendir(path);
+	if (!dp) {
+		pr_perror("Failed to open %s", path);
+		return -1;
+	}
+
+	while ((de = readdir(dp))) {
+		char dir[PROC_PATH_MAX_LEN];
+		struct stat st;
+
+		if (!strcmp(de->d_name, ".") ||
+		    !strcmp(de->d_name, ".."))
+			continue;
+
+		/* Skip specified directories */
+		if (match_pattern(de->d_name, filter))
+			continue;
+
+		sprintf(dir, "%s/%s", path, de->d_name);
+
+		ret = stat(dir, &st);
+		if (ret == -1) {
+			pr_perror("Failed to stat %s", dir);
+			goto err_close;
+		} else {
+			if (S_ISDIR(st.st_mode)) {
+				prepare_sysctl_requests_filtered(dir, filter, reqs, n_reqs);
+			} else if (st.st_mode & S_IRUSR &&
+				   st.st_mode & S_IWUSR) {
+				/*
+				 * Need the check above to exclude sysctls like
+				 * net.netfilter.nf_conntrack_buckets, which are
+				 * readonly, from being dupmed to image. They can
+				 * not be restored through procfs. And of course
+				 * ones that can't be read
+				 */
+				ret = prepare_sysctl_request(dir, reqs, n_reqs);
+				if (ret == -1)
+					goto err_close;
+			}
+		}
+	}
+
+err_close:
+	closedir(dp);
+	return ret;
+}
diff --git a/images/netdev.proto b/images/netdev.proto
index 19b501c..08f7eb9 100644
--- a/images/netdev.proto
+++ b/images/netdev.proto
@@ -41,12 +41,14 @@ message net_device_entry {
 }
 
 message netns_entry {
-	repeated int32 def_conf		= 1;
-	repeated int32 all_conf		= 2;
+	repeated int32 def_conf			= 1;
+	repeated int32 all_conf			= 2;
 
-	repeated sysctl_entry def_conf4	= 3;
-	repeated sysctl_entry all_conf4	= 4;
+	repeated sysctl_entry def_conf4		= 3;
+	repeated sysctl_entry all_conf4		= 4;
 
-	repeated sysctl_entry def_conf6	= 5;
-	repeated sysctl_entry all_conf6	= 6;
+	repeated sysctl_entry def_conf6		= 5;
+	repeated sysctl_entry all_conf6		= 6;
+
+	repeated named_sysctl_entry nses	= 7;
 }
diff --git a/images/sysctl.proto b/images/sysctl.proto
index 4ecdf27..5927386 100644
--- a/images/sysctl.proto
+++ b/images/sysctl.proto
@@ -11,3 +11,8 @@ message sysctl_entry {
 	optional int32 iarg		= 2;
 	optional string sarg		= 3;
 }
+
+message named_sysctl_entry {
+	required string name		= 1;
+	required sysctl_entry se	= 2;
+}
-- 
2.5.5



More information about the CRIU mailing list