[Devel] [PATCH RHEL9 COMMIT] pid_ns: More fixes for pid_max virtualization

Konstantin Khorenko khorenko at virtuozzo.com
Tue Jun 7 17:16:01 MSK 2022


The commit is pushed to "branch-rh9-5.14.0-70.13.1.vz9.16.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh9-5.14.0-70.13.1.vz9.16.2
------>
commit efb9096755e890dd88752bc83e625a3b20856476
Author: Konstantin Khorenko <khorenko at virtuozzo.com>
Date:   Tue May 31 16:28:59 2022 +0300

    pid_ns: More fixes for pid_max virtualization
    
     * dropped reappeared global "pid_max" variable
     * fixed usage of global "pid_max" variable
     * introduced "pid_max_default" to be used on pid_ns creation instead of
       the hardcoded value. Should be useful on Nodes with many CPUs.
     * changed tracing code to use pid_max from init_pid_ns
     * dropped extra PID_MAX_NS_DEFAULT define: until we really use
       different values for init_pid_ns and nested pid ns, no need for it
     * moved pid_max_ns_default and init_pid_ns.pid_max initialization to
       pid_idr_init()
    
    https://jira.sw.ru/browse/PSBM-140308
    Fixes: f6a7abc88764 ("pid_ns: Virtualize pid_max")
    
    Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
    Reviewed-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
 include/linux/pid.h     |  2 +-
 include/linux/threads.h |  1 -
 kernel/pid.c            | 15 +++++++++------
 kernel/pid_namespace.c  |  2 +-
 kernel/trace/trace.c    |  4 ++--
 kernel/trace/trace.h    |  2 +-
 6 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 518fcfccb1ed..71e1189b5aba 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -110,7 +110,7 @@ extern void transfer_pid(struct task_struct *old, struct task_struct *new,
 struct pid_namespace;
 extern struct pid_namespace init_pid_ns;
 
-extern int pid_max;
+extern int pid_max_ns_default;
 extern int pid_max_min, pid_max_max;
 
 /*
diff --git a/include/linux/threads.h b/include/linux/threads.h
index f7dd16fc5606..18d5a74bcc3d 100644
--- a/include/linux/threads.h
+++ b/include/linux/threads.h
@@ -26,7 +26,6 @@
  * This controls the default maximum pid allocated to a process
  */
 #define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)
-#define PID_MAX_NS_DEFAULT	(PID_MAX_DEFAULT)
 
 /*
  * A maximum of 4 million PIDs should be enough for a while.
diff --git a/kernel/pid.c b/kernel/pid.c
index ebe115ce2a34..67a21ca2bed4 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -59,7 +59,7 @@ struct pid init_struct_pid = {
 	}, }
 };
 
-int pid_max = PID_MAX_DEFAULT;
+int pid_max_ns_default;
 
 #define RESERVED_PIDS		300
 
@@ -76,7 +76,6 @@ struct pid_namespace init_pid_ns = {
 	.ns.count = REFCOUNT_INIT(2),
 	.idr = IDR_INIT(init_pid_ns.idr),
 	.pid_allocated = PIDNS_ADDING,
-	.pid_max = PID_MAX_DEFAULT,
 	.level = 0,
 	.child_reaper = &init_task,
 	.user_ns = &init_user_ns,
@@ -192,7 +191,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
 			tid = set_tid[ns->level - i];
 
 			retval = -EINVAL;
-			if (tid < 1 || tid >= pid_max)
+			if (tid < 1 || tid >= tmp->pid_max)
 				goto out_free;
 			/*
 			 * Also fail if a PID != 1 is requested and
@@ -623,12 +622,16 @@ void __init pid_idr_init(void)
 	/* Verify no one has done anything silly: */
 	BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);
 
-	/* bump default and minimum pid_max based on number of cpus */
-	pid_max = min(pid_max_max, max_t(int, pid_max,
+	/* setup default and init_pid_ns pid_max based on number of cpus */
+	pid_max_ns_default = min(pid_max_max, max_t(int, PID_MAX_DEFAULT,
 				PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
+	init_pid_ns.pid_max = pid_max_ns_default;
+
+	/* bump minimum pid_max based on number of cpus */
 	pid_max_min = max_t(int, pid_max_min,
 				PIDS_PER_CPU_MIN * num_possible_cpus());
-	pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
+	pr_info("pid_max: default: %u minimum: %u\n",
+		pid_max_ns_default, pid_max_min);
 
 	idr_init(&init_pid_ns.idr);
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 450d613ba8d5..c7874b1bf4bd 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -110,7 +110,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	ns->user_ns = get_user_ns(user_ns);
 	ns->ucounts = ucounts;
 	ns->pid_allocated = PIDNS_ADDING;
-	ns->pid_max = PID_MAX_NS_DEFAULT;
+	ns->pid_max = pid_max_ns_default;
 
 	return ns;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 924d08a5538b..3717d7462f00 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -705,7 +705,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
 		return -ENOMEM;
 	}
 
-	pid_list->pid_max = READ_ONCE(pid_max);
+	pid_list->pid_max = READ_ONCE(init_pid_ns.pid_max);
 
 	/* Only truncating will shrink pid_max */
 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
@@ -5288,7 +5288,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
 
 	if (mask == TRACE_ITER_RECORD_TGID) {
 		if (!tgid_map) {
-			tgid_map_max = pid_max;
+			tgid_map_max = init_pid_ns.pid_max;
 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
 				       GFP_KERNEL);
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4a0e693000c6..30ab4375f7cb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -648,7 +648,7 @@ extern unsigned long tracing_thresh;
 
 /* PID filtering */
 
-extern int pid_max;
+extern struct pid_namespace init_pid_ns;
 
 bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids,
 			     pid_t search_pid);


More information about the Devel mailing list