[Devel] [PATCH RH8] ve/net/bridge: make net.bridge.* sysctl visible in Containers (r/o)

Vasily Averin vvs at virtuozzo.com
Mon May 31 10:40:26 MSK 2021


Kubernetes does some prechecks before run, in particular it requires
"net.bridge.bridge-nf-call-ip[6]tables" sysctls to be enabled.

Thus let's make all "net.bridge.*" sysctls visible in Containers
but (as they are not virtualized) in readonly mode.

The implementation is not minimal to gain the goal, but it was chosen
due to 2 reasons:

1) it's now similar to netfilter sysctls implementation (although
   netfilter sysctls are fully virtualized)

2) if we ever have to fully virtualize bridge netfilter sysctls,
   we won't have to rewrite the code completely again.

https://jira.sw.ru/browse/PSBM-92107

Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>

v2: brnf_sysctl per-net table pointer is moved to struct net (new struct
netns_brnf introduced)

v3: style cleanup: drop unnecessary variable, redundant brackets
(cherry picked from vz7 commit 6245a3ec39f494556612ba76d68b0735289bd3a1)
VvS: reworked br_netfilter_init/fini
Signed-off-by: Vasily Averin <vvs at virtuozzo.com>
---
 include/net/net_namespace.h      |   4 ++
 include/net/netns/br_netfilter.h |   9 +++
 net/bridge/br_netfilter_hooks.c  | 110 +++++++++++++++++++++++++------
 3 files changed, 104 insertions(+), 19 deletions(-)
 create mode 100644 include/net/netns/br_netfilter.h

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 634d107dff8b..90ce08faf2cd 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -24,6 +24,7 @@
 #include <net/netns/sctp.h>
 #include <net/netns/dccp.h>
 #include <net/netns/netfilter.h>
+#include <net/netns/br_netfilter.h>
 #include <net/netns/x_tables.h>
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netns/conntrack.h>
@@ -133,6 +134,9 @@ struct net {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct netns_ct		ct;
 #endif
+#if defined(CONFIG_BRIDGE_NETFILTER) || defined(CONFIG_BRIDGE_NETFILTER_MODULE)
+	struct netns_brnf	brnf;
+#endif
 #if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE)
 	struct netns_nftables	nft;
 #endif
diff --git a/include/net/netns/br_netfilter.h b/include/net/netns/br_netfilter.h
new file mode 100644
index 000000000000..c842c0657e67
--- /dev/null
+++ b/include/net/netns/br_netfilter.h
@@ -0,0 +1,9 @@
+#ifndef __NETNS_BR_NETFILTER_H
+#define __NETNS_BR_NETFILTER_H
+
+struct netns_brnf {
+#ifdef CONFIG_SYSCTL
+	struct ctl_table_header *brnf_sysctl_header;
+#endif
+};
+#endif
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index e8ddf7d081bf..428e0009ec0f 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -53,7 +53,6 @@ struct brnf_net {
 };
 
 #ifdef CONFIG_SYSCTL
-static struct ctl_table_header *brnf_sysctl_header;
 static int brnf_call_iptables __read_mostly = 1;
 static int brnf_call_ip6tables __read_mostly = 1;
 static int brnf_call_arptables __read_mostly = 1;
@@ -1088,43 +1087,116 @@ static struct ctl_table brnf_table[] = {
 };
 #endif
 
+#ifdef CONFIG_SYSCTL
+static int br_netfilter_init_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+	int num_entries, i;
+
+	table = kmemdup(brnf_table, sizeof(brnf_table), GFP_KERNEL);
+	if (!table)
+		goto out_kmemdup;
+
+	/*
+	 * Bridge netfilter sysctls are not virtualized, show them in RO mode
+	 * in non-init netns.
+	 */
+	if (!net_eq(net, &init_net)) {
+		num_entries = sizeof(brnf_table) / sizeof(struct ctl_table);
+		for (i = 0; i < num_entries; i++)
+			table[i].mode = 0444;
+	}
+
+	/* Don't export sysctls to unprivileged users */
+	if (ve_net_hide_sysctl(net))
+		table[0].procname = NULL;
+
+	net->brnf.brnf_sysctl_header = register_net_sysctl(net, "net/bridge",
+							   table);
+	if (!net->brnf.brnf_sysctl_header)
+		goto out_unregister_netfilter;
+
+	return 0;
+
+out_unregister_netfilter:
+	kfree(table);
+out_kmemdup:
+	return -ENOMEM;
+}
+
+static void br_netfilter_fini_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->brnf.brnf_sysctl_header->ctl_table_arg;
+	unregister_net_sysctl_table(net->brnf.brnf_sysctl_header);
+	kfree(table);
+}
+#else
+static int br_netfilter_init_sysctl(struct net *net)
+{
+	return 0;
+}
+
+static void br_netfilter_fini_sysctl(struct net *net)
+{
+}
+#endif /* CONFIG_SYSCTL */
+
+static int br_netfilter_pernet_init(struct net *net)
+{
+	return br_netfilter_init_sysctl(net);
+}
+
+static void br_netfilter_pernet_exit(struct list_head *net_exit_list)
+{
+	struct net *net;
+
+	list_for_each_entry(net, net_exit_list, exit_list)
+		br_netfilter_fini_sysctl(net);
+}
+
+static struct pernet_operations br_netfilter_net_ops = {
+	.init		= br_netfilter_pernet_init,
+	.exit_batch	= br_netfilter_pernet_exit,
+};
+
 static int __init br_netfilter_init(void)
 {
 	int ret;
 
 	ret = register_pernet_subsys(&brnf_net_ops);
 	if (ret < 0)
-		return ret;
+		goto out_start;
 
 	ret = register_netdevice_notifier(&brnf_notifier);
-	if (ret < 0) {
-		unregister_pernet_subsys(&brnf_net_ops);
-		return ret;
-	}
+	if (ret < 0)
+		goto out_notifier;
+
+	ret = register_pernet_subsys(&br_netfilter_net_ops);
+	if (ret < 0)
+		goto out_pernet;
 
-#ifdef CONFIG_SYSCTL
-	brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
-	if (brnf_sysctl_header == NULL) {
-		printk(KERN_WARNING
-		       "br_netfilter: can't register to sysctl.\n");
-		unregister_netdevice_notifier(&brnf_notifier);
-		unregister_pernet_subsys(&brnf_net_ops);
-		return -ENOMEM;
-	}
-#endif
 	RCU_INIT_POINTER(nf_br_ops, &br_ops);
 	printk(KERN_NOTICE "Bridge firewalling registered\n");
+
 	return 0;
+
+out_pernet:
+	printk(KERN_WARNING "br_netfilter: can't register to sysctl.\n");
+	unregister_netdevice_notifier(&brnf_notifier);
+out_notifier:
+	unregister_pernet_subsys(&brnf_net_ops);
+out_start:
+	return ret;
 }
 
 static void __exit br_netfilter_fini(void)
 {
 	RCU_INIT_POINTER(nf_br_ops, NULL);
+	unregister_pernet_subsys(&br_netfilter_net_ops);
 	unregister_netdevice_notifier(&brnf_notifier);
 	unregister_pernet_subsys(&brnf_net_ops);
-#ifdef CONFIG_SYSCTL
-	unregister_net_sysctl_table(brnf_sysctl_header);
-#endif
 }
 
 module_init(br_netfilter_init);
-- 
2.25.1



More information about the Devel mailing list