[Devel] [PATCH RHEL7 COMMIT] netfilter: drop/reaquire nfnl_lock on request_module() in nft_log module
Vasily Averin
vvs at virtuozzo.com
Tue Jul 21 11:59:38 MSK 2020
The commit is pushed to "branch-rh7-3.10.0-1127.10.1.vz7.162.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.10.1.vz7.162.11
------>
commit 19745d654b4e86439306e342fa20328dd9b23946
Author: Konstantin Khorenko <khorenko at virtuozzo.com>
Date: Tue Jul 21 11:59:38 2020 +0300
netfilter: drop/reaquire nfnl_lock on request_module() in nft_log module
There could be a deadlock:
1. "nft" process calls request_module() with
nfnl_lock(NFNL_SUBSYS_NFTABLES) taken and waits for its completion:
[UN] PID: 7391 TASK: ffff939e154bc000 COMMAND: "nft"
wait_for_completion_killable
__call_usermodehelper_exec
__request_module
nf_logger_find_get
nft_log_init // ops->init()
nf_tables_newexpr
nf_tables_newrule // called with nfnl_lock taken
nfnetlink_rcv_batch // takes nfnl_lock
nfnetlink_rcv
netlink_unicast
netlink_sendmsg
2. request_module() tries to load the module, but waits on "net_mutex":
[UN] PID: 8913 TASK: ffff939e140ec000 COMMAND: "modprobe"
__mutex_lock_slowpath
mutex_lock // mutex_lock(&net_mutex);
(owner = 0xffff939e315f2000 == PID: 158 COMMAND: "kworker/u64:1")
register_pernet_subsys
init_module
do_one_initcall
load_module
sys_init_module
3. "kworker" holds net_mutex, but itself waits for nfnl_lock:
[UN] PID: 158 TASK: ffff939e315f2000 COMMAND: "kworker/u64:1"
__mutex_lock_slowpath
mutex_lock // nfnl_lock(NFNL_SUBSYS_NFTABLES)
(owner = 0xffff939e154bc000 PID: 7391 COMMAND: "nft")
nfnl_lock
nft_unregister_afinfo
nf_tables_inet_exit_net
ops_exit_list
cleanup_net
process_one_work
Reproducer:
===========
[console 1]: export i=0; while true; do
nft add table filter; \
nft add chain filter input; \
nft add rule filter input tcp dport 23 ct state new \
log prefix \"Connection to port 23:\" accept; \
nft flush ruleset; \
rmmod nft_log; rmmod nf_log_ipv4; rmmod nf_log_common; \
i=$(($i+1)); echo $i; \
done
[console 2]: modprobe nf_tables; \
export j=0; while true; do \
ip net add n1; \
ip net del n1; \
j=$(($j + 1)); echo $j; \
done
In mainstream this sutiation is impossible, it's fixed as a side effect
by: f102d66b335a4 ("netfilter: nf_tables: use dedicated mutex to guard
transactions"), but backporting it is quite heavy,
so let's just drop/reacquire nfnl lock around request_module() like it's
done in similar places.
v2: be ready in nfnetlink_rcv_batch() that -ENOENT could be returned
with nfnl lock dropped/reacquired like -EAGAIN => re-read ss.
https://jira.sw.ru/browse/PSBM-105534
https://bugzilla.redhat.com/show_bug.cgi?id=1858329
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
Reviewed-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
include/net/netfilter/nf_log.h | 2 +-
net/netfilter/nf_log.c | 8 ++++---
net/netfilter/nfnetlink.c | 11 +++++++++-
net/netfilter/nft_log.c | 49 +++++++++++++++++++++++++++++++++++++++---
net/netfilter/xt_LOG.c | 2 +-
net/netfilter/xt_TRACE.c | 2 +-
6 files changed, 64 insertions(+), 10 deletions(-)
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 4b1b4c44e537b..2686caac8080e 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -64,7 +64,7 @@ int nf_log_bind_pf(struct net *net, u_int8_t pf,
const struct nf_logger *logger);
void nf_log_unbind_pf(struct net *net, u_int8_t pf);
-int nf_logger_find_get(int pf, enum nf_log_type type);
+int nf_logger_find_get_nolock(int pf, enum nf_log_type type);
void nf_logger_put(int pf, enum nf_log_type type);
void nf_logger_request_module(int pf, enum nf_log_type type);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index ff07ee54bbdd3..dd34fd081a89d 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -19,7 +19,9 @@
int sysctl_nf_log_all_netns __read_mostly;
EXPORT_SYMBOL(sysctl_nf_log_all_netns);
-static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
+struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
+EXPORT_SYMBOL(loggers);
+
static DEFINE_MUTEX(nf_log_mutex);
#define nft_log_dereference(logger) \
@@ -164,7 +166,7 @@ void nf_logger_request_module(int pf, enum nf_log_type type)
}
EXPORT_SYMBOL_GPL(nf_logger_request_module);
-int nf_logger_find_get(int pf, enum nf_log_type type)
+int nf_logger_find_get_nolock(int pf, enum nf_log_type type)
{
struct nf_logger *logger;
int ret = -ENOENT;
@@ -184,7 +186,7 @@ int nf_logger_find_get(int pf, enum nf_log_type type)
rcu_read_unlock();
return ret;
}
-EXPORT_SYMBOL_GPL(nf_logger_find_get);
+EXPORT_SYMBOL_GPL(nf_logger_find_get_nolock);
void nf_logger_put(int pf, enum nf_log_type type)
{
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 83259e7be0fbd..a48f185f7c1a1 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -440,7 +440,16 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
} else if (status == NFNL_BATCH_DONE) {
ss->commit(oskb);
} else {
- ss->abort(oskb);
+ const struct nfnetlink_subsystem *ss2;
+
+ /* nf_logger_find_get_lock() can return -ENOENT after
+ * nfnl_unlock drop/reacquire, so need to reread ss like in
+ * -EGAIN case
+ */
+ ss2 = rcu_dereference_protected(table[subsys_id].subsys,
+ lockdep_is_held(&table[subsys_id].mutex));
+ if (ss2 == ss)
+ ss->abort(oskb);
}
nfnl_err_deliver(&err_list, oskb);
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index d21fe91d50fc0..71e35feb1fe27 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -15,6 +15,7 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter/nfnetlink.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h>
#include <linux/netdevice.h>
@@ -46,6 +47,48 @@ static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
[NFTA_LOG_FLAGS] = { .type = NLA_U32 },
};
+extern struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
+
+/*
+ * In "nft_log" module we need to drop nfnl lock while performing
+ * request_module(), calls to nf_logger_find_get() in other
+ * modules are done without nfnl_lock taken.
+ *
+ * nf_logger_find_get
+ * nft_log_init
+ * nf_tables_newexpr
+ * nf_tables_newrule // nc->call_batch
+ * // called with nfnl_lock taken
+ *
+ * nfnetlink_rcv_batch // takes nfnl_lock(NFNL_SUBSYS_NFTABLES)
+ * nfnetlink_rcv
+ * netlink_unicast
+ * netlink_sendmsg
+ */
+static int nf_logger_find_get_lock(int pf, enum nf_log_type type)
+{
+ struct nf_logger *logger;
+ int ret = 0;
+
+ logger = loggers[pf][type];
+ if (logger == NULL) {
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nf-logger-%u-%u", pf, type);
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ ret = -EAGAIN;
+ }
+
+ rcu_read_lock();
+ logger = rcu_dereference(loggers[pf][type]);
+
+ if ((logger == NULL) ||
+ ((ret == 0) && !try_module_get(logger->me))) {
+ ret = -ENOENT;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
static int nft_log_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -99,11 +142,11 @@ static int nft_log_init(const struct nft_ctx *ctx,
}
if (ctx->afi->family == NFPROTO_INET) {
- ret = nf_logger_find_get(NFPROTO_IPV4, li->type);
+ ret = nf_logger_find_get_lock(NFPROTO_IPV4, li->type);
if (ret < 0)
return ret;
- ret = nf_logger_find_get(NFPROTO_IPV6, li->type);
+ ret = nf_logger_find_get_lock(NFPROTO_IPV6, li->type);
if (ret < 0) {
nf_logger_put(NFPROTO_IPV4, li->type);
return ret;
@@ -111,7 +154,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
return 0;
}
- return nf_logger_find_get(ctx->afi->family, li->type);
+ return nf_logger_find_get_lock(ctx->afi->family, li->type);
}
static void nft_log_destroy(const struct nft_ctx *ctx,
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index eb31d73e31ebf..89059468e0f80 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -61,7 +61,7 @@ static int log_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
}
- return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+ return nf_logger_find_get_nolock(par->family, NF_LOG_TYPE_LOG);
}
static void log_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 858d189a13031..d0e77eea6b829 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -13,7 +13,7 @@ MODULE_ALIAS("ip6t_TRACE");
static int trace_tg_check(const struct xt_tgchk_param *par)
{
- return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+ return nf_logger_find_get_nolock(par->family, NF_LOG_TYPE_LOG);
}
static void trace_tg_destroy(const struct xt_tgdtor_param *par)
More information about the Devel
mailing list