[CRIU] [PATCH v3 3/5] net/sysctl: c/r all except *.conf.* and *.neigh.*
Pavel Tikhomirov
ptikhomirov at virtuozzo.com
Thu Jul 14 06:51:12 PDT 2016
Add prepare_sysctl_requests_filtered helper to search sysctl directory
for all(filtered) sysctl paths and allocate for each named sysctl_req
to later dump them all with sysctl_op.
Add NamedSysctlEntry'es to dump sysclt name+value pairs to image.
Skip:
1. non-(readable)writable sysctls as we can do nothing for them through
procfs
2. conf and neigh directories are per-device and will be restored after
devices restore separately, also they might need special care like conf
sysctls need special order
3. nf_log.xx if it is "NONE" as we can not set it with sysctl_op as
sysctl_write_char prints "\n" at the end and nf_log_proc_dostring does
not like '\n' =)
4. if sysctl do not exist on restore - !CTL_FLAGS_HAS
5. skip igmp_link_local_mcast_reports if it is not safe in current
kernel see https://bugzilla.redhat.com/show_bug.cgi?id=1352177
*We have now 32 such net.* sysctls writable in VZ7 CT
https://jira.sw.ru/browse/PSBM-48397
v3: skip igmp_link_local_mcast_reports
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
criu/include/sysctl.h | 16 ++++
criu/net.c | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++
criu/sysctl.c | 122 +++++++++++++++++++++++++++
images/netdev.proto | 14 ++--
images/sysctl.proto | 5 ++
5 files changed, 375 insertions(+), 6 deletions(-)
diff --git a/criu/include/sysctl.h b/criu/include/sysctl.h
index e271f5e..95312d7 100644
--- a/criu/include/sysctl.h
+++ b/criu/include/sysctl.h
@@ -9,6 +9,9 @@ struct sysctl_req {
};
extern int sysctl_op(struct sysctl_req *req, size_t nr_req, int op, unsigned int ns);
+extern int prepare_sysctl_requests_filtered(char *path, char *filter,
+ struct sysctl_req **reqs, size_t *n_reqs);
+extern void free_sysctl_requests(struct sysctl_req *reqs, size_t n_reqs);
enum {
CTL_READ,
@@ -38,4 +41,17 @@ enum {
#define CTL_FLAGS_HAS 2
#define CTL_FLAGS_READ_EIO_SKIP 4
+/*
+ * Max sysctl path is 70 chars:
+ * "/proc/sys/net/ipv4/conf/virbr0-nic/igmpv2_unsolicited_report_interval"
+ */
+#define PROC_PATH_MAX_LEN 100
+/*
+ * We have only two sysctls longer than 256:
+ * /proc/sys/dev/cdrom/info - CDROM_STR_SIZE=1000
+ * /proc/sys/net/ipv4/tcp_allowed_congestion_control - TCP_CA_BUF_MAX=2048
+ * first one is readonly and second is hostonly
+ */
+#define PROC_ARG_MAX_LEN 257
+
#endif /* __CR_SYSCTL_H__ */
diff --git a/criu/net.c b/criu/net.c
index a6d5f00..3ab950b 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -1144,6 +1144,219 @@ static inline int dump_iptables(struct cr_imgset *fds)
return 0;
}
+static void free_nses(NamedSysctlEntry **nses, size_t n_nses)
+{
+ int i;
+
+ if (nses) {
+ if (nses[0]) {
+ for (i = 0; i < n_nses; i++)
+ xfree(nses[i]->name);
+
+ if (nses[0]->se) {
+ for (i = 0; i < n_nses; i++)
+ xfree(nses[i]->se->sarg);
+
+ xfree(nses[0]->se);
+ }
+ xfree(nses[0]);
+ }
+ xfree(nses);
+ }
+}
+
+static int alloc_nses(NamedSysctlEntry ***nses, size_t n_nses)
+{
+ int i;
+
+ *nses = xmalloc(n_nses * sizeof(NamedSysctlEntry *));
+ if (!*nses) {
+ pr_perror("Failed to xmalloc nses pointers");
+ return -1;
+ }
+
+ (*nses)[0] = xmalloc(n_nses * sizeof(NamedSysctlEntry));
+ if (!(*nses)[0]) {
+ pr_perror("Failed to xmalloc nses");
+ return -1;
+ }
+
+ for (i = 0; i < n_nses; i++) {
+ (*nses)[i] = (*nses)[0] + i;
+ named_sysctl_entry__init((*nses)[i]);
+ }
+
+ (*nses)[0]->se = xmalloc(n_nses * sizeof(SysctlEntry));
+ if (!(*nses)[0]->se) {
+ pr_perror("Failed to xmalloc se");
+ return -1;
+ }
+
+ for (i = 0; i < n_nses; i++) {
+ (*nses)[i]->se = (*nses)[0]->se + i;
+ sysctl_entry__init((*nses)[i]->se);
+ }
+
+ for (i = 0; i < n_nses; i++) {
+ SysctlEntry *se = (*nses)[i]->se;
+
+ se->sarg = xmalloc(sizeof(char) * PROC_ARG_MAX_LEN);
+ if (!se->sarg) {
+ pr_perror("Failed to xmalloc se sarg");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int sysctl_arg_to_int(char *sarg, int *iarg)
+{
+ int ret;
+ char buf[PROC_ARG_MAX_LEN];
+
+ ret = sscanf(sarg, "%d%s", iarg, buf);
+ if (ret != 1)
+ return 0;
+ return 1;
+}
+
+#define SYSCTL_NET_DIR "/proc/sys/net"
+#define FILTER_MAX_LEN 60
+#define CONF_OR_NEIGH_FILTER "conf|neigh"
+#define IGMP_LL_MCAST_REPORTS_FILTER "|igmp_link_local_mcast_reports"
+
+static int dump_netns_sysctls(NetnsEntry *netns)
+{
+ struct sysctl_req *reqs = NULL;
+ size_t n_reqs = 0;
+ int ret = 0;
+ int i;
+ char filter[FILTER_MAX_LEN];
+
+ sprintf(filter, "%s%s", CONF_OR_NEIGH_FILTER,
+ kdat.sysctl_igmp_link_local_mcast_reports_safe ?
+ "" : IGMP_LL_MCAST_REPORTS_FILTER);
+
+ ret = prepare_sysctl_requests_filtered(SYSCTL_NET_DIR, filter,
+ &reqs, &n_reqs);
+ if (ret)
+ goto err_req;
+
+ netns->n_nses = n_reqs;
+ ret = alloc_nses(&netns->nses, netns->n_nses);
+ if (ret)
+ goto err_req;
+
+ for (i = 0; i < netns->n_nses; i++) {
+ reqs[i].arg = netns->nses[i]->se->sarg;
+ netns->nses[i]->name = reqs[i].name;
+ }
+
+ ret = sysctl_op(reqs, netns->n_nses, CTL_READ, CLONE_NEWNET);
+ if (ret != 0) {
+ pr_err("Failed to read net sysctls\n");
+ goto err_free;
+ }
+
+ for (i = 0; i < netns->n_nses; i++) {
+ char *sarg = netns->nses[i]->se->sarg;
+
+ if (reqs[i].flags & CTL_FLAGS_HAS) {
+ /* Strip trailing newline */
+ if (sarg[strlen(sarg) - 1] == '\n')
+ sarg[strlen(sarg) - 1] = '\0';
+
+ if (sysctl_arg_to_int(sarg, &netns->nses[i]->se->iarg)) {
+ netns->nses[i]->se->type = SYSCTL_TYPE__CTL_32;
+ netns->nses[i]->se->has_iarg = true;
+ netns->nses[i]->se->sarg = NULL;
+ xfree(sarg);
+ continue;
+ }
+ netns->nses[i]->se->type = SYSCTL_TYPE__CTL_STR;
+
+ /*
+ * Skip nf_log/xx if it is set to default "NONE"
+ */
+ if (!strstr(reqs[i].name, "/proc/sys/net/netfilter/nf_log")
+ || strcmp(sarg, "NONE"))
+ continue;
+
+ pr_info("Skipping net sysctl %s\n", reqs[i].name);
+ }
+
+ netns->nses[i]->se->sarg = NULL;
+ xfree(sarg);
+ }
+
+err_free:
+ xfree(reqs);
+ return ret;
+err_req:
+ free_sysctl_requests(reqs, n_reqs);
+ return ret;
+}
+
+#define IGMP_LL_MCAST_REPORTS_PATH "/proc/sys/net/ipv4/igmp_link_local_mcast_reports"
+
+static int restore_netns_sysctls(NetnsEntry *netns)
+{
+ struct sysctl_req *req;
+ int i, ri;
+ int ret = 0;
+
+ req = xmalloc(sizeof(struct sysctl_req) * netns->n_nses);
+ if (!req) {
+ pr_perror("Failed to alloc sysctl_req array");
+ return -1;
+ }
+
+ for (i = 0, ri = 0; i < netns->n_nses; i++) {
+ NamedSysctlEntry *nse = netns->nses[i];
+
+ if (!kdat.sysctl_igmp_link_local_mcast_reports_safe
+ && !strcmp(nse->name, IGMP_LL_MCAST_REPORTS_PATH))
+ continue;
+
+ /* Skip restore not writable sysctls */
+ if (access(nse->name, W_OK) != 0)
+ continue;
+
+ switch (nse->se->type) {
+ case SYSCTL_TYPE__CTL_32:
+ /* skip non-existing sysctl */
+ if (!nse->se->has_iarg)
+ continue;
+
+ req[ri].type = CTL_32;
+ req[ri].arg = &nse->se->iarg;
+ break;
+ case SYSCTL_TYPE__CTL_STR:
+ /* skip non-existing sysctl */
+ if (!nse->se->sarg)
+ continue;
+
+ req[ri].type = CTL_STR(strlen(nse->se->sarg));
+ req[ri].arg = nse->se->sarg;
+ break;
+ default:
+ continue;
+ }
+
+ req[ri].name = nse->name;
+ req[ri].flags = 0;
+ ri++;
+ }
+
+ ret = sysctl_op(req, ri, CTL_WRITE, CLONE_NEWNET);
+ if (ret < 0)
+ pr_err("Failed to write net sysctls\n");
+
+ xfree(req);
+ return ret;
+}
+
static int dump_netns_conf(struct cr_imgset *fds)
{
int ret = -1;
@@ -1211,6 +1424,10 @@ static int dump_netns_conf(struct cr_imgset *fds)
}
}
+ ret = dump_netns_sysctls(&netns);
+ if (ret < 0)
+ goto err_free;
+
ret = ipv4_conf_op("default", netns.def_conf4, size4, CTL_READ, NULL);
if (ret < 0)
goto err_free;
@@ -1227,6 +1444,7 @@ static int dump_netns_conf(struct cr_imgset *fds)
ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS);
err_free:
+ free_nses(netns.nses, netns.n_nses);
xfree(netns.def_conf4);
xfree(netns.all_conf4);
xfree(def_confs4);
@@ -1347,6 +1565,12 @@ static int restore_netns_conf(int pid, NetnsEntry **netns)
return -1;
}
+ if ((*netns)->nses) {
+ ret = restore_netns_sysctls(*netns);
+ if (ret)
+ goto out;
+ }
+
if ((*netns)->def_conf4) {
ret = ipv4_conf_op("all", (*netns)->all_conf4, (*netns)->n_all_conf4, CTL_WRITE, NULL);
if (ret)
diff --git a/criu/sysctl.c b/criu/sysctl.c
index 87bd267..eac058d 100644
--- a/criu/sysctl.c
+++ b/criu/sysctl.c
@@ -6,6 +6,8 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <sched.h>
+#include <sys/stat.h>
+#include <regex.h>
#include "asm/types.h"
#include "namespaces.h"
@@ -480,3 +482,123 @@ int sysctl_op(struct sysctl_req *req, size_t nr_req, int op, unsigned int ns)
close(fd);
return ret;
}
+
+static int match_pattern(char *string, char *pattern)
+{
+ int status;
+ regex_t re;
+
+ if (regcomp(&re, pattern, REG_NOSUB|REG_EXTENDED) != 0) {
+ pr_perror("Failed to regcomp \"%s\"", pattern);
+ return 0;
+ }
+
+ status = regexec(&re, string, (size_t) 0, NULL, 0);
+ regfree(&re);
+
+ if (status != 0) {
+ return 0;
+ }
+ return 1;
+}
+
+void free_sysctl_requests(struct sysctl_req *reqs, size_t n_reqs)
+{
+ int i;
+
+ for (i = 0; i < n_reqs; i++)
+ xfree(reqs[i].name);
+ xfree(reqs);
+}
+
+static int prepare_sysctl_request(char *name, struct sysctl_req **preq,
+ size_t *n_reqs)
+{
+ struct sysctl_req *req;
+
+ req = xrealloc(*preq, ++(*n_reqs) * sizeof(struct sysctl_req));
+ if (!req) {
+ (*n_reqs)--;
+ pr_perror("Failed to xrealloc requests");
+ return -1;
+ }
+
+ *preq = req;
+ req = &(*preq)[*n_reqs - 1];
+
+ req->name = xmalloc(sizeof(char) * (strlen(name) + 1));
+ if (!req->name) {
+ pr_perror("Failed to xmalloc request name");
+ return -1;
+ }
+
+ sprintf(req->name, "%s", name);
+ req->arg = NULL;
+ req->type = CTL_STR(PROC_ARG_MAX_LEN);
+ req->flags = CTL_FLAGS_OPTIONAL;
+
+ return 0;
+}
+
+/*
+ * Search sysctls in directory, allocate and setup sysctl requests
+ * to dump them.
+ * @path - where to look sysctls for
+ * @filter - regex to filter unwanted subdirs
+ * @reqs - return pointer to requests
+ * @n_reqs - return number of sysctls found
+ */
+int prepare_sysctl_requests_filtered(char *path, char *filter,
+ struct sysctl_req **reqs, size_t *n_reqs)
+{
+ DIR *dp;
+ struct dirent *de;
+ int ret = 0;
+
+ dp = opendir(path);
+ if (!dp) {
+ pr_perror("Failed to open %s", path);
+ return -1;
+ }
+
+ while ((de = readdir(dp))) {
+ char dir[PROC_PATH_MAX_LEN];
+ struct stat st;
+
+ if (!strcmp(de->d_name, ".") ||
+ !strcmp(de->d_name, ".."))
+ continue;
+
+ /* Skip specified directories */
+ if (match_pattern(de->d_name, filter))
+ continue;
+
+ sprintf(dir, "%s/%s", path, de->d_name);
+
+ ret = stat(dir, &st);
+ if (ret == -1) {
+ pr_perror("Failed to stat %s", dir);
+ goto err_close;
+ } else {
+ if (S_ISDIR(st.st_mode)) {
+ prepare_sysctl_requests_filtered(dir, filter, reqs, n_reqs);
+ } else if (st.st_mode & S_IRUSR &&
+ st.st_mode & S_IWUSR) {
+ /*
+ * Need the check above to exclude sysctls like
+ * net.netfilter.nf_conntrack_buckets, which are
+ * readonly, from being dupmed to image. They can
+ * not be restored through procfs. And of course
+ * ones that can't be read
+ */
+ ret = prepare_sysctl_request(dir, reqs, n_reqs);
+ if (ret == -1)
+ goto err_close;
+ }
+ }
+ }
+
+err_close:
+ closedir(dp);
+ return ret;
+}
diff --git a/images/netdev.proto b/images/netdev.proto
index 19b501c..08f7eb9 100644
--- a/images/netdev.proto
+++ b/images/netdev.proto
@@ -41,12 +41,14 @@ message net_device_entry {
}
message netns_entry {
- repeated int32 def_conf = 1;
- repeated int32 all_conf = 2;
+ repeated int32 def_conf = 1;
+ repeated int32 all_conf = 2;
- repeated sysctl_entry def_conf4 = 3;
- repeated sysctl_entry all_conf4 = 4;
+ repeated sysctl_entry def_conf4 = 3;
+ repeated sysctl_entry all_conf4 = 4;
- repeated sysctl_entry def_conf6 = 5;
- repeated sysctl_entry all_conf6 = 6;
+ repeated sysctl_entry def_conf6 = 5;
+ repeated sysctl_entry all_conf6 = 6;
+
+ repeated named_sysctl_entry nses = 7;
}
diff --git a/images/sysctl.proto b/images/sysctl.proto
index 4ecdf27..5927386 100644
--- a/images/sysctl.proto
+++ b/images/sysctl.proto
@@ -11,3 +11,8 @@ message sysctl_entry {
optional int32 iarg = 2;
optional string sarg = 3;
}
+
+message named_sysctl_entry {
+ required string name = 1;
+ required sysctl_entry se = 2;
+}
--
2.5.5
More information about the CRIU
mailing list