[Devel] [PATCH RHEL7 COMMIT] module: Optimize __module_address() using a latched RB-tree

Konstantin Khorenko khorenko at virtuozzo.com
Fri Sep 15 17:27:37 MSK 2017


The commit is pushed to "branch-rh7-3.10.0-693.1.1.vz7.37.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.1.1.vz7.37.4
------>
commit d514c18772100d3f459f383ee1dca501118bc5cc
Author: Peter Zijlstra <peterz at infradead.org>
Date:   Fri Sep 15 17:27:37 2017 +0300

    module: Optimize __module_address() using a latched RB-tree
    
    Currently __module_address() is using a linear search through all
    modules in order to find the module corresponding to the provided
    address. With a lot of modules this can take a lot of time.
    
    One of the users of this is kernel_text_address() which is employed
    in many stack unwinders; which in turn are used by perf-callchain and
    ftrace (possibly from NMI context).
    
    So by optimizing __module_address() we optimize many stack unwinders
    which are used by both perf and tracing in performance sensitive code.
    
    Cc: Rusty Russell <rusty at rustcorp.com.au>
    Cc: Steven Rostedt <rostedt at goodmis.org>
    Cc: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>
    Cc: Oleg Nesterov <oleg at redhat.com>
    Cc: "Paul E. McKenney" <paulmck at linux.vnet.ibm.com>
    Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
    Signed-off-by: Rusty Russell <rusty at rustcorp.com.au>
    
    https://jira.sw.ru/browse/PSBM-69081
    (cherry picked from commit 93c2e105f6bcee231c951ba0e56e84505c4b0483)
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 include/linux/module.h |  32 +++++++++++---
 kernel/module.c        | 117 ++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 138 insertions(+), 11 deletions(-)

diff --git a/include/linux/module.h b/include/linux/module.h
index a4155ca..48c7335 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -17,6 +17,7 @@
 #include <linux/moduleparam.h>
 #include <linux/tracepoint.h>
 #include <linux/export.h>
+#include <linux/rbtree_latch.h>
 
 #include <linux/percpu.h>
 #include <asm/module.h>
@@ -236,8 +237,14 @@ struct module_ext {
 #endif
 };
 
-struct module
-{
+struct module;
+
+struct mod_tree_node {
+	struct module *mod;
+	struct latch_tree_node node;
+};
+
+struct module {
 	enum module_state state;
 
 	/* Member of list of modules */
@@ -296,8 +303,15 @@ struct module
 	/* Startup function. */
 	int (*init)(void);
 
-	/* If this is non-NULL, vfree after init() returns */
-	void *module_init;
+	/*
+	 * If this is non-NULL, vfree() after init() returns.
+	 *
+	 * Cacheline align here, such that:
+	 *   module_init, module_core, init_size, core_size,
+	 *   init_text_size, core_text_size and ltn_core.node[0]
+	 * are on the same cacheline.
+	 */
+	void *module_init	____cacheline_aligned;
 
 	/* Here is the actual code + data, vfree'd on unload. */
 	void *module_core;
@@ -308,6 +322,14 @@ struct module
 	/* The size of the executable code in each section.  */
 	unsigned int init_text_size, core_text_size;
 
+	/*
+	 * We want mtn_core::{mod,node[0]} to be in the same cacheline as the
+	 * above entries such that a regular lookup will only touch one
+	 * cacheline.
+	 */
+	struct mod_tree_node	mtn_core;
+	struct mod_tree_node	mtn_init;
+
 	/* Size of RO sections of the module (text+rodata) */
 	unsigned int init_ro_size, core_ro_size;
 
@@ -392,7 +414,7 @@ struct module
 	ctor_fn_t *ctors;
 	unsigned int num_ctors;
 #endif
-};
+} ____cacheline_aligned;
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
 #endif
diff --git a/kernel/module.c b/kernel/module.c
index 3f5edae..952a958 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -105,6 +105,108 @@
 DEFINE_MUTEX(module_mutex);
 EXPORT_SYMBOL_GPL(module_mutex);
 static LIST_HEAD(modules);
+
+/*
+ * Use a latched RB-tree for __module_address(); this allows us to use
+ * RCU-sched lookups of the address from any context.
+ *
+ * Because modules have two address ranges: init and core, we need two
+ * latch_tree_nodes entries. Therefore we need the back-pointer from
+ * mod_tree_node.
+ *
+ * Because init ranges are short lived we mark them unlikely and have placed
+ * them outside the critical cacheline in struct module.
+ */
+
+static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n)
+{
+	struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
+	struct module *mod = mtn->mod;
+
+	if (unlikely(mtn == &mod->mtn_init))
+		return (unsigned long)mod->module_init;
+
+	return (unsigned long)mod->module_core;
+}
+
+static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n)
+{
+	struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
+	struct module *mod = mtn->mod;
+
+	if (unlikely(mtn == &mod->mtn_init))
+		return (unsigned long)mod->init_size;
+
+	return (unsigned long)mod->core_size;
+}
+
+static __always_inline bool
+mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b)
+{
+	return __mod_tree_val(a) < __mod_tree_val(b);
+}
+
+static __always_inline int
+mod_tree_comp(void *key, struct latch_tree_node *n)
+{
+	unsigned long val = (unsigned long)key;
+	unsigned long start, end;
+
+	start = __mod_tree_val(n);
+	if (val < start)
+		return -1;
+
+	end = start + __mod_tree_size(n);
+	if (val >= end)
+		return 1;
+
+	return 0;
+}
+
+static const struct latch_tree_ops mod_tree_ops = {
+	.less = mod_tree_less,
+	.comp = mod_tree_comp,
+};
+
+static struct latch_tree_root mod_tree __cacheline_aligned;
+
+/*
+ * These modifications: insert, remove_init and remove; are serialized by the
+ * module_mutex.
+ */
+static void mod_tree_insert(struct module *mod)
+{
+	mod->mtn_core.mod = mod;
+	mod->mtn_init.mod = mod;
+
+	latch_tree_insert(&mod->mtn_core.node, &mod_tree, &mod_tree_ops);
+	if (mod->init_size)
+		latch_tree_insert(&mod->mtn_init.node, &mod_tree, &mod_tree_ops);
+}
+
+static void mod_tree_remove_init(struct module *mod)
+{
+	if (mod->init_size)
+		latch_tree_erase(&mod->mtn_init.node, &mod_tree, &mod_tree_ops);
+}
+
+static void mod_tree_remove(struct module *mod)
+{
+	latch_tree_erase(&mod->mtn_core.node, &mod_tree, &mod_tree_ops);
+	mod_tree_remove_init(mod);
+}
+
+static struct module *mod_tree_find(unsigned long addr)
+{
+	struct latch_tree_node *ltn;
+
+	ltn = latch_tree_find((void *)addr, &mod_tree, &mod_tree_ops);
+	if (!ltn)
+		return NULL;
+
+	return container_of(ltn, struct mod_tree_node, node)->mod;
+}
+
 #ifdef CONFIG_KGDB_KDB
 struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
 #endif /* CONFIG_KGDB_KDB */
@@ -1974,6 +2076,7 @@ static void free_module(struct module *mod)
 	/* Now we can delete it from the lists */
 	mutex_lock(&module_mutex);
 	stop_machine(__unlink_module, mod, NULL);
+	mod_tree_remove(mod);
 	mutex_unlock(&module_mutex);
 
 	mutex_lock(&module_ext_mutex);
@@ -3274,6 +3377,7 @@ static int do_init_module(struct module *mod)
 	mod->symtab = mod->core_symtab;
 	mod->strtab = mod->core_strtab;
 #endif
+	mod_tree_remove_init(mod);
 	unset_module_init_ro_nx(mod);
 	module_free(mod, mod->module_init);
 	mod->module_init = NULL;
@@ -3323,6 +3427,7 @@ static int add_unformed_module(struct module *mod)
 		goto out;
 	}
 	list_add_rcu(&mod->list, &modules);
+	mod_tree_insert(mod);
 	err = 0;
 
 out:
@@ -3987,14 +4092,14 @@ struct module *__module_address(unsigned long addr)
 	if (addr < module_addr_min || addr > module_addr_max)
 		return NULL;
 
-	list_for_each_entry_rcu(mod, &modules, list) {
+	mod = mod_tree_find(addr);
+	if (mod) {
+		BUG_ON(!(within_module_core(addr, mod)
+		    || within_module_init(addr, mod)));
 		if (mod->state == MODULE_STATE_UNFORMED)
-			continue;
-		if (within_module_core(addr, mod)
-		    || within_module_init(addr, mod))
-			return mod;
+			mod = NULL;
 	}
-	return NULL;
+	return mod;
 }
 EXPORT_SYMBOL_GPL(__module_address);
 


More information about the Devel mailing list