[CRIU] [PATCH v3 3/5] net/sysctl: c/r all except *.conf.* and *.neigh.*
Pavel Emelyanov
xemul at virtuozzo.com
Fri Jul 15 10:39:20 PDT 2016
On 07/14/2016 04:51 PM, Pavel Tikhomirov wrote:
> Add prepare_sysctl_requests_filtered helper to search sysctl directory
> for all(filtered) sysctl paths and allocate for each named sysctl_req
> to later dump them all with sysctl_op.
NAK, sucking in everything based on just name regex match is bad idea.
Write the needed sysctl-s by names and update the list from time to time.
> Add NamedSysctlEntry'es to dump sysclt name+value pairs to image.
>
> Skip:
> 1. non-(readable)writable sysctls as we can do nothing for them through
> procfs
> 2. conf and neigh directories are per-device and will be restored after
> devices restore separately, also they might need special care like conf
> sysctls need special order
> 3. nf_log.xx if it is "NONE" as we can not set it with sysctl_op as
> sysctl_write_char prints "\n" at the end and nf_log_proc_dostring does
> not like '\n' =)
> 4. if sysctl do not exist on restore - !CTL_FLAGS_HAS
> 5. skip igmp_link_local_mcast_reports if it is not safe in current
> kernel see https://bugzilla.redhat.com/show_bug.cgi?id=1352177
>
> *We have now 32 such net.* sysctls writable in VZ7 CT
> https://jira.sw.ru/browse/PSBM-48397
>
> v3: skip igmp_link_local_mcast_reports
> Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
> ---
> criu/include/sysctl.h | 16 ++++
> criu/net.c | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++
> criu/sysctl.c | 122 +++++++++++++++++++++++++++
> images/netdev.proto | 14 ++--
> images/sysctl.proto | 5 ++
> 5 files changed, 375 insertions(+), 6 deletions(-)
>
> diff --git a/criu/include/sysctl.h b/criu/include/sysctl.h
> index e271f5e..95312d7 100644
> --- a/criu/include/sysctl.h
> +++ b/criu/include/sysctl.h
> @@ -9,6 +9,9 @@ struct sysctl_req {
> };
>
> extern int sysctl_op(struct sysctl_req *req, size_t nr_req, int op, unsigned int ns);
> +extern int prepare_sysctl_requests_filtered(char *path, char *filter,
> + struct sysctl_req **reqs, size_t *n_reqs);
> +extern void free_sysctl_requests(struct sysctl_req *reqs, size_t n_reqs);
>
> enum {
> CTL_READ,
> @@ -38,4 +41,17 @@ enum {
> #define CTL_FLAGS_HAS 2
> #define CTL_FLAGS_READ_EIO_SKIP 4
>
> +/*
> + * Max sysctl path is 70 chars:
> + * "/proc/sys/net/ipv4/conf/virbr0-nic/igmpv2_unsolicited_report_interval"
> + */
> +#define PROC_PATH_MAX_LEN 100
> +/*
> + * We have only two sysctls longer than 256:
> + * /proc/sys/dev/cdrom/info - CDROM_STR_SIZE=1000
> + * /proc/sys/net/ipv4/tcp_allowed_congestion_control - TCP_CA_BUF_MAX=2048
> + * first one is readonly and second is hostonly
> + */
> +#define PROC_ARG_MAX_LEN 257
> +
> #endif /* __CR_SYSCTL_H__ */
> diff --git a/criu/net.c b/criu/net.c
> index a6d5f00..3ab950b 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -1144,6 +1144,219 @@ static inline int dump_iptables(struct cr_imgset *fds)
> return 0;
> }
>
> +static void free_nses(NamedSysctlEntry **nses, size_t n_nses)
> +{
> + int i;
> +
> + if (nses) {
> + if (nses[0]) {
> + for (i = 0; i < n_nses; i++)
> + xfree(nses[i]->name);
> +
> + if (nses[0]->se) {
> + for (i = 0; i < n_nses; i++)
> + xfree(nses[i]->se->sarg);
> +
> + xfree(nses[0]->se);
> + }
> + xfree(nses[0]);
> + }
> + xfree(nses);
> + }
> +}
> +
> +static int alloc_nses(NamedSysctlEntry ***nses, size_t n_nses)
> +{
> + int i;
> +
> + *nses = xmalloc(n_nses * sizeof(NamedSysctlEntry *));
> + if (!*nses) {
> + pr_perror("Failed to xmalloc nses pointers");
> + return -1;
> + }
> +
> + (*nses)[0] = xmalloc(n_nses * sizeof(NamedSysctlEntry));
> + if (!(*nses)[0]) {
> + pr_perror("Failed to xmalloc nses");
> + return -1;
> + }
> +
> + for (i = 0; i < n_nses; i++) {
> + (*nses)[i] = (*nses)[0] + i;
> + named_sysctl_entry__init((*nses)[i]);
> + }
> +
> + (*nses)[0]->se = xmalloc(n_nses * sizeof(SysctlEntry));
> + if (!(*nses)[0]->se) {
> + pr_perror("Failed to xmalloc se");
> + return -1;
> + }
> +
> + for (i = 0; i < n_nses; i++) {
> + (*nses)[i]->se = (*nses)[0]->se + i;
> + sysctl_entry__init((*nses)[i]->se);
> + }
> +
> + for (i = 0; i < n_nses; i++) {
> + SysctlEntry *se = (*nses)[i]->se;
> +
> + se->sarg = xmalloc(sizeof(char) * PROC_ARG_MAX_LEN);
> + if (!se->sarg) {
> + pr_perror("Failed to xmalloc se sarg");
> + return -1;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int sysctl_arg_to_int(char *sarg, int *iarg)
> +{
> + int ret;
> + char buf[PROC_ARG_MAX_LEN];
> +
> + ret = sscanf(sarg, "%d%s", iarg, buf);
> + if (ret != 1)
> + return 0;
> + return 1;
> +}
> +
> +#define SYSCTL_NET_DIR "/proc/sys/net"
> +#define FILTER_MAX_LEN 60
> +#define CONF_OR_NEIGH_FILTER "conf|neigh"
> +#define IGMP_LL_MCAST_REPORTS_FILTER "|igmp_link_local_mcast_reports"
> +
> +static int dump_netns_sysctls(NetnsEntry *netns)
> +{
> + struct sysctl_req *reqs = NULL;
> + size_t n_reqs = 0;
> + int ret = 0;
> + int i;
> + char filter[FILTER_MAX_LEN];
> +
> + sprintf(filter, "%s%s", CONF_OR_NEIGH_FILTER,
> + kdat.sysctl_igmp_link_local_mcast_reports_safe ?
> + "" : IGMP_LL_MCAST_REPORTS_FILTER);
> +
> + ret = prepare_sysctl_requests_filtered(SYSCTL_NET_DIR, filter,
> + &reqs, &n_reqs);
> + if (ret)
> + goto err_req;
> +
> + netns->n_nses = n_reqs;
> + ret = alloc_nses(&netns->nses, netns->n_nses);
> + if (ret)
> + goto err_req;
> +
> + for (i = 0; i < netns->n_nses; i++) {
> + reqs[i].arg = netns->nses[i]->se->sarg;
> + netns->nses[i]->name = reqs[i].name;
> + }
> +
> + ret = sysctl_op(reqs, netns->n_nses, CTL_READ, CLONE_NEWNET);
> + if (ret != 0) {
> + pr_err("Failed to read net sysctls\n");
> + goto err_free;
> + }
> +
> + for (i = 0; i < netns->n_nses; i++) {
> + char *sarg = netns->nses[i]->se->sarg;
> +
> + if (reqs[i].flags & CTL_FLAGS_HAS) {
> + /* Strip trailing newline */
> + if (sarg[strlen(sarg) - 1] == '\n')
> + sarg[strlen(sarg) - 1] = '\0';
> +
> + if (sysctl_arg_to_int(sarg, &netns->nses[i]->se->iarg)) {
> + netns->nses[i]->se->type = SYSCTL_TYPE__CTL_32;
> + netns->nses[i]->se->has_iarg = true;
> + netns->nses[i]->se->sarg = NULL;
> + xfree(sarg);
> + continue;
> + }
> + netns->nses[i]->se->type = SYSCTL_TYPE__CTL_STR;
> +
> + /*
> + * Skip nf_log/xx if it is set to default "NONE"
> + */
> + if (!strstr(reqs[i].name, "/proc/sys/net/netfilter/nf_log")
> + || strcmp(sarg, "NONE"))
> + continue;
> +
> + pr_info("Skipping net sysctl %s\n", reqs[i].name);
> + }
> +
> + netns->nses[i]->se->sarg = NULL;
> + xfree(sarg);
> + }
> +
> +err_free:
> + xfree(reqs);
> + return ret;
> +err_req:
> + free_sysctl_requests(reqs, n_reqs);
> + return ret;
> +}
> +
> +#define IGMP_LL_MCAST_REPORTS_PATH "/proc/sys/net/ipv4/igmp_link_local_mcast_reports"
> +
> +static int restore_netns_sysctls(NetnsEntry *netns)
> +{
> + struct sysctl_req *req;
> + int i, ri;
> + int ret = 0;
> +
> + req = xmalloc(sizeof(struct sysctl_req) * netns->n_nses);
> + if (!req) {
> + pr_perror("Failed to alloc sysctl_req array");
> + return -1;
> + }
> +
> + for (i = 0, ri = 0; i < netns->n_nses; i++) {
> + NamedSysctlEntry *nse = netns->nses[i];
> +
> + if (!kdat.sysctl_igmp_link_local_mcast_reports_safe
> + && !strcmp(nse->name, IGMP_LL_MCAST_REPORTS_PATH))
> + continue;
> +
> + /* Skip restore not writable sysctls */
> + if (access(nse->name, W_OK) != 0)
> + continue;
> +
> + switch (nse->se->type) {
> + case SYSCTL_TYPE__CTL_32:
> + /* skip non-existing sysctl */
> + if (!nse->se->has_iarg)
> + continue;
> +
> + req[ri].type = CTL_32;
> + req[ri].arg = &nse->se->iarg;
> + break;
> + case SYSCTL_TYPE__CTL_STR:
> + /* skip non-existing sysctl */
> + if (!nse->se->sarg)
> + continue;
> +
> + req[ri].type = CTL_STR(strlen(nse->se->sarg));
> + req[ri].arg = nse->se->sarg;
> + break;
> + default:
> + continue;
> + }
> +
> + req[ri].name = nse->name;
> + req[ri].flags = 0;
> + ri++;
> + }
> +
> + ret = sysctl_op(req, ri, CTL_WRITE, CLONE_NEWNET);
> + if (ret < 0)
> + pr_err("Failed to write net sysctls\n");
> +
> + xfree(req);
> + return ret;
> +}
> +
> static int dump_netns_conf(struct cr_imgset *fds)
> {
> int ret = -1;
> @@ -1211,6 +1424,10 @@ static int dump_netns_conf(struct cr_imgset *fds)
> }
> }
>
> + ret = dump_netns_sysctls(&netns);
> + if (ret < 0)
> + goto err_free;
> +
> ret = ipv4_conf_op("default", netns.def_conf4, size4, CTL_READ, NULL);
> if (ret < 0)
> goto err_free;
> @@ -1227,6 +1444,7 @@ static int dump_netns_conf(struct cr_imgset *fds)
>
> ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS);
> err_free:
> + free_nses(netns.nses, netns.n_nses);
> xfree(netns.def_conf4);
> xfree(netns.all_conf4);
> xfree(def_confs4);
> @@ -1347,6 +1565,12 @@ static int restore_netns_conf(int pid, NetnsEntry **netns)
> return -1;
> }
>
> + if ((*netns)->nses) {
> + ret = restore_netns_sysctls(*netns);
> + if (ret)
> + goto out;
> + }
> +
> if ((*netns)->def_conf4) {
> ret = ipv4_conf_op("all", (*netns)->all_conf4, (*netns)->n_all_conf4, CTL_WRITE, NULL);
> if (ret)
> diff --git a/criu/sysctl.c b/criu/sysctl.c
> index 87bd267..eac058d 100644
> --- a/criu/sysctl.c
> +++ b/criu/sysctl.c
> @@ -6,6 +6,8 @@
> #include <sys/types.h>
> #include <sys/wait.h>
> #include <sched.h>
> +#include <sys/stat.h>
> +#include <regex.h>
>
> #include "asm/types.h"
> #include "namespaces.h"
> @@ -480,3 +482,123 @@ int sysctl_op(struct sysctl_req *req, size_t nr_req, int op, unsigned int ns)
> close(fd);
> return ret;
> }
> +
> +static int match_pattern(char *string, char *pattern)
> +{
> + int status;
> + regex_t re;
> +
> + if (regcomp(&re, pattern, REG_NOSUB|REG_EXTENDED) != 0) {
> + pr_perror("Failed to regcomp \"%s\"", pattern);
> + return 0;
> + }
> +
> + status = regexec(&re, string, (size_t) 0, NULL, 0);
> + regfree(&re);
> +
> + if (status != 0) {
> + return 0;
> + }
> + return 1;
> +}
> +
> +void free_sysctl_requests(struct sysctl_req *reqs, size_t n_reqs)
> +{
> + int i;
> +
> + for (i = 0; i < n_reqs; i++)
> + xfree(reqs[i].name);
> + xfree(reqs);
> +}
> +
> +static int prepare_sysctl_request(char *name, struct sysctl_req **preq,
> + size_t *n_reqs)
> +{
> + struct sysctl_req *req;
> +
> + req = xrealloc(*preq, ++(*n_reqs) * sizeof(struct sysctl_req));
> + if (!req) {
> + (*n_reqs)--;
> + pr_perror("Failed to xrealloc requests");
> + return -1;
> + }
> +
> + *preq = req;
> + req = &(*preq)[*n_reqs - 1];
> +
> + req->name = xmalloc(sizeof(char) * (strlen(name) + 1));
> + if (!req->name) {
> + pr_perror("Failed to xmalloc request name");
> + return -1;
> + }
> +
> + sprintf(req->name, "%s", name);
> + req->arg = NULL;
> + req->type = CTL_STR(PROC_ARG_MAX_LEN);
> + req->flags = CTL_FLAGS_OPTIONAL;
> +
> + return 0;
> +}
> +
> +/*
> + * Search sysctls in directory, allocate and setup sysctl requests
> + * to dump them.
> + * @path - where to look sysctls for
> + * @filter - regex to filter unwanted subdirs
> + * @reqs - return pointer to requests
> + * @n_reqs - return number of sysctls found
> + */
> +int prepare_sysctl_requests_filtered(char *path, char *filter,
> + struct sysctl_req **reqs, size_t *n_reqs)
> +{
> + DIR *dp;
> + struct dirent *de;
> + int ret = 0;
> +
> + dp = opendir(path);
> + if (!dp) {
> + pr_perror("Failed to open %s", path);
> + return -1;
> + }
> +
> + while ((de = readdir(dp))) {
> + char dir[PROC_PATH_MAX_LEN];
> + struct stat st;
> +
> + if (!strcmp(de->d_name, ".") ||
> + !strcmp(de->d_name, ".."))
> + continue;
> +
> + /* Skip specified directories */
> + if (match_pattern(de->d_name, filter))
> + continue;
> +
> + sprintf(dir, "%s/%s", path, de->d_name);
> +
> + ret = stat(dir, &st);
> + if (ret == -1) {
> + pr_perror("Failed to stat %s", dir);
> + goto err_close;
> + } else {
> + if (S_ISDIR(st.st_mode)) {
> + prepare_sysctl_requests_filtered(dir, filter, reqs, n_reqs);
> + } else if (st.st_mode & S_IRUSR &&
> + st.st_mode & S_IWUSR) {
> + /*
> + * Need the check above to exclude sysctls like
> + * net.netfilter.nf_conntrack_buckets, which are
> + * readonly, from being dupmed to image. They can
> + * not be restored through procfs. And of course
> + * ones that can't be read
> + */
> + ret = prepare_sysctl_request(dir, reqs, n_reqs);
> + if (ret == -1)
> + goto err_close;
> + }
> + }
> + }
> +
> +err_close:
> + closedir(dp);
> + return ret;
> +}
> diff --git a/images/netdev.proto b/images/netdev.proto
> index 19b501c..08f7eb9 100644
> --- a/images/netdev.proto
> +++ b/images/netdev.proto
> @@ -41,12 +41,14 @@ message net_device_entry {
> }
>
> message netns_entry {
> - repeated int32 def_conf = 1;
> - repeated int32 all_conf = 2;
> + repeated int32 def_conf = 1;
> + repeated int32 all_conf = 2;
>
> - repeated sysctl_entry def_conf4 = 3;
> - repeated sysctl_entry all_conf4 = 4;
> + repeated sysctl_entry def_conf4 = 3;
> + repeated sysctl_entry all_conf4 = 4;
>
> - repeated sysctl_entry def_conf6 = 5;
> - repeated sysctl_entry all_conf6 = 6;
> + repeated sysctl_entry def_conf6 = 5;
> + repeated sysctl_entry all_conf6 = 6;
> +
> + repeated named_sysctl_entry nses = 7;
> }
> diff --git a/images/sysctl.proto b/images/sysctl.proto
> index 4ecdf27..5927386 100644
> --- a/images/sysctl.proto
> +++ b/images/sysctl.proto
> @@ -11,3 +11,8 @@ message sysctl_entry {
> optional int32 iarg = 2;
> optional string sarg = 3;
> }
> +
> +message named_sysctl_entry {
> + required string name = 1;
> + required sysctl_entry se = 2;
> +}
>
More information about the CRIU
mailing list