[Devel] [PATCH v2 rh7] qed: Fix scheduling in a tasklet while getting stats
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Jul 27 18:37:58 MSK 2023
Here we've got to a situation when tasklet called usleep(), thus welcome
to the "scheduling while atomic" BUG().
BUG: scheduling while atomic: swapper/24/0/0x00000100
[<ffffffffb41c6199>] schedule+0x29/0x70
[<ffffffffb41c5512>] schedule_hrtimeout_range_clock+0xb2/0x150
[<ffffffffb41c55c3>] schedule_hrtimeout_range+0x13/0x20
[<ffffffffb41c3bcf>] usleep_range+0x4f/0x70
[<ffffffffc08d3e58>] qed_ptt_acquire+0x38/0x100 [qed]
[<ffffffffc08eac48>] _qed_get_vport_stats+0x458/0x580 [qed]
[<ffffffffc08ead8c>] qed_get_vport_stats+0x1c/0xd0 [qed]
[<ffffffffc08dffd3>] qed_get_protocol_stats+0x93/0x100 [qed]
qed_mcp_send_protocol_stats
case MFW_DRV_MSG_GET_LAN_STATS:
case MFW_DRV_MSG_GET_FCOE_STATS:
case MFW_DRV_MSG_GET_ISCSI_STATS:
case MFW_DRV_MSG_GET_RDMA_STATS:
[<ffffffffc08e36d8>] qed_mcp_handle_events+0x2d8/0x890 [qed]
qed_int_assertion
qed_int_attentions
[<ffffffffc08d9490>] qed_int_sp_dpc+0xa50/0xdc0 [qed]
[<ffffffffb3aa7623>] tasklet_action+0x83/0x140
[<ffffffffb41d9125>] __do_softirq+0x125/0x2bb
[<ffffffffb41d560c>] call_softirq+0x1c/0x30
[<ffffffffb3a30645>] do_softirq+0x65/0xa0
[<ffffffffb3aa78d5>] irq_exit+0x105/0x110
[<ffffffffb41d8996>] do_IRQ+0x56/0xf0
There was a similar issue in mainstream, but for qede driver and caused
by reading stats via sysfs.
3rd version of the patch fixing that issue, which i have taken as a
base: https://www.spinics.net/lists/netdev/msg901089.html
ms final commit: 42510dffd0e2 ("qed/qede: Fix scheduling while atomic")
Unfortunately last version of the patch which is accepted by mainstream
is far away from version 3, it moves stats updates to a delayed work,
which is the correct way,
but in qed driver i do not see which exactly stats should i collect in a
delayed work for each device (and devices supported by qed driver are
quite different).
Taking into account that we do not have a node access to reproduce and
verify the changes, reworking the code to use delayed work seems
impossible.
So let's live with udelay() crutch which at least is technically clear.
https://jira.vzint.dev/browse/PSBM-146761
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
---
v1->v2:
- Fixed kdoc for qed_get_protocol_stats_iscsi()
- Added kdoc descriptions for qed_ptt_acquire_context(),
qed_get_protocol_stats_fcoe(), qed_get_vport_stats() and
qed_get_vport_stats_context()
---
drivers/net/ethernet/qlogic/qed/qed_dev_api.h | 16 ++++++++++++
drivers/net/ethernet/qlogic/qed/qed_fcoe.c | 19 ++++++++++----
drivers/net/ethernet/qlogic/qed/qed_fcoe.h | 17 ++++++++++--
drivers/net/ethernet/qlogic/qed/qed_hw.c | 26 ++++++++++++++++---
drivers/net/ethernet/qlogic/qed/qed_iscsi.c | 19 ++++++++++----
drivers/net/ethernet/qlogic/qed/qed_iscsi.h | 17 ++++++++----
drivers/net/ethernet/qlogic/qed/qed_l2.c | 19 ++++++++++----
drivers/net/ethernet/qlogic/qed/qed_l2.h | 24 +++++++++++++++++
drivers/net/ethernet/qlogic/qed/qed_main.c | 6 ++---
9 files changed, 134 insertions(+), 29 deletions(-)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index e4b4e3b78e8a..440047dc858f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -210,6 +210,22 @@ void qed_hw_remove(struct qed_dev *cdev);
*/
struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn);
+/**
+ * qed_ptt_acquire_context(): Allocate a PTT window honoring the context
+ * atomicy.
+ *
+ * @p_hwfn: HW device data.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: struct qed_ptt.
+ *
+ * Should be called at the entry point to the driver
+ * (at the beginning of an exported function).
+ */
+struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn,
+ bool is_atomic);
+
/**
* @brief qed_ptt_release - Release PTT Window
*
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
index 46dc93d3b9b5..bbf3e4e41235 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
@@ -716,13 +716,14 @@ static void _qed_fcoe_get_pstats(struct qed_hwfn *p_hwfn,
}
static int qed_fcoe_get_stats(struct qed_hwfn *p_hwfn,
- struct qed_fcoe_stats *p_stats)
+ struct qed_fcoe_stats *p_stats,
+ bool is_atomic)
{
struct qed_ptt *p_ptt;
memset(p_stats, 0, sizeof(*p_stats));
- p_ptt = qed_ptt_acquire(p_hwfn);
+ p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
if (!p_ptt) {
DP_ERR(p_hwfn, "Failed to acquire ptt\n");
@@ -996,19 +997,27 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev,
QED_SPQ_MODE_EBLOCK, NULL);
}
+static int qed_fcoe_stats_context(struct qed_dev *cdev,
+ struct qed_fcoe_stats *stats,
+ bool is_atomic)
+{
+ return qed_fcoe_get_stats(QED_LEADING_HWFN(cdev), stats, is_atomic);
+}
+
static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats)
{
- return qed_fcoe_get_stats(QED_LEADING_HWFN(cdev), stats);
+ return qed_fcoe_stats_context(cdev, stats, false);
}
void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
- struct qed_mcp_fcoe_stats *stats)
+ struct qed_mcp_fcoe_stats *stats,
+ bool is_atomic)
{
struct qed_fcoe_stats proto_stats;
/* Retrieve FW statistics */
memset(&proto_stats, 0, sizeof(proto_stats));
- if (qed_fcoe_stats(cdev, &proto_stats)) {
+ if (qed_fcoe_stats_context(cdev, &proto_stats, is_atomic)) {
DP_VERBOSE(cdev, QED_MSG_STORAGE,
"Failed to collect FCoE statistics\n");
return;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
index 027a76ac839a..da14cc02a55d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
@@ -54,8 +54,20 @@ int qed_fcoe_alloc(struct qed_hwfn *p_hwfn);
void qed_fcoe_setup(struct qed_hwfn *p_hwfn);
void qed_fcoe_free(struct qed_hwfn *p_hwfn);
+/**
+ * qed_get_protocol_stats_fcoe(): Fills provided statistics
+ * struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: Void.
+ */
void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
- struct qed_mcp_fcoe_stats *stats);
+ struct qed_mcp_fcoe_stats *stats,
+ bool is_atomic);
#else /* CONFIG_QED_FCOE */
static inline int qed_fcoe_alloc(struct qed_hwfn *p_hwfn)
{
@@ -66,7 +78,8 @@ static inline void qed_fcoe_setup(struct qed_hwfn *p_hwfn) {}
static inline void qed_fcoe_free(struct qed_hwfn *p_hwfn) {}
static inline void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
- struct qed_mcp_fcoe_stats *stats)
+ struct qed_mcp_fcoe_stats *stats,
+ bool is_atomic)
{
}
#endif /* CONFIG_QED_FCOE */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 72ec1c6bdf70..86033b759a68 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -49,7 +49,10 @@
#include "qed_reg_addr.h"
#include "qed_sriov.h"
-#define QED_BAR_ACQUIRE_TIMEOUT 1000
+#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT 1000
+#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP 1000
+#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT 100000
+#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY 10
/* Invalid values */
#define QED_BAR_INVALID_OFFSET (cpu_to_le32(-1))
@@ -110,12 +113,22 @@ void qed_ptt_pool_free(struct qed_hwfn *p_hwfn)
}
struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
+{
+ return qed_ptt_acquire_context(p_hwfn, false);
+}
+
+struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn, bool is_atomic)
{
struct qed_ptt *p_ptt;
- unsigned int i;
+ unsigned int i, count;
+
+ if (is_atomic)
+ count = QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT;
+ else
+ count = QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT;
/* Take the free PTT from the list */
- for (i = 0; i < QED_BAR_ACQUIRE_TIMEOUT; i++) {
+ for (i = 0; i < count; i++) {
spin_lock_bh(&p_hwfn->p_ptt_pool->lock);
if (!list_empty(&p_hwfn->p_ptt_pool->free_list)) {
@@ -131,7 +144,12 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
}
spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
- usleep_range(1000, 2000);
+
+ if (is_atomic)
+ udelay(QED_BAR_ACQUIRE_TIMEOUT_UDELAY);
+ else
+ usleep_range(QED_BAR_ACQUIRE_TIMEOUT_USLEEP,
+ QED_BAR_ACQUIRE_TIMEOUT_USLEEP * 2);
}
DP_NOTICE(p_hwfn, "PTT acquire timeout - failed to allocate PTT\n");
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 4f8a685d1a55..40eaab4330f9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -1049,13 +1049,14 @@ static void _qed_iscsi_get_pstats(struct qed_hwfn *p_hwfn,
}
static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn,
- struct qed_iscsi_stats *stats)
+ struct qed_iscsi_stats *stats,
+ bool is_atomic)
{
struct qed_ptt *p_ptt;
memset(stats, 0, sizeof(*stats));
- p_ptt = qed_ptt_acquire(p_hwfn);
+ p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
if (!p_ptt) {
DP_ERR(p_hwfn, "Failed to acquire ptt\n");
return -EAGAIN;
@@ -1390,9 +1391,16 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
QED_SPQ_MODE_EBLOCK, NULL);
}
+static int qed_iscsi_stats_context(struct qed_dev *cdev,
+ struct qed_iscsi_stats *stats,
+ bool is_atomic)
+{
+ return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats, is_atomic);
+}
+
static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
{
- return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats);
+ return qed_iscsi_stats_context(cdev, stats, false);
}
static int qed_iscsi_change_mac(struct qed_dev *cdev,
@@ -1413,13 +1421,14 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev,
}
void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
- struct qed_mcp_iscsi_stats *stats)
+ struct qed_mcp_iscsi_stats *stats,
+ bool is_atomic)
{
struct qed_iscsi_stats proto_stats;
/* Retrieve FW statistics */
memset(&proto_stats, 0, sizeof(proto_stats));
- if (qed_iscsi_stats(cdev, &proto_stats)) {
+ if (qed_iscsi_stats_context(cdev, &proto_stats, is_atomic)) {
DP_VERBOSE(cdev, QED_MSG_STORAGE,
"Failed to collect ISCSI statistics\n");
return;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
index 225c75b02a06..5d84c8d382f1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
@@ -64,13 +64,19 @@ void qed_iscsi_setup(struct qed_hwfn *p_hwfn);
void qed_iscsi_free(struct qed_hwfn *p_hwfn);
/**
- * @brief - Fills provided statistics struct with statistics.
+ * qed_get_protocol_stats_iscsi(): Fills provided statistics
+ * struct with statistics.
*
- * @param cdev
- * @param stats - points to struct that will be filled with statistics.
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: Void.
*/
void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
- struct qed_mcp_iscsi_stats *stats);
+ struct qed_mcp_iscsi_stats *stats,
+ bool is_atomic);
#else /* IS_ENABLED(CONFIG_QED_ISCSI) */
static inline int qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
{
@@ -83,7 +89,8 @@ static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn) {}
static inline void
qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
- struct qed_mcp_iscsi_stats *stats) {}
+ struct qed_mcp_iscsi_stats *stats,
+ bool is_atomic) {}
#endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index b3937935d2a8..8b3d9a06b979 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -1887,7 +1887,8 @@ static void __qed_get_vport_stats(struct qed_hwfn *p_hwfn,
}
static void _qed_get_vport_stats(struct qed_dev *cdev,
- struct qed_eth_stats *stats)
+ struct qed_eth_stats *stats,
+ bool is_atomic)
{
u8 fw_vport = 0;
int i;
@@ -1896,10 +1897,11 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
for_each_hwfn(cdev, i) {
struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
- struct qed_ptt *p_ptt = IS_PF(cdev) ? qed_ptt_acquire(p_hwfn)
- : NULL;
+ struct qed_ptt *p_ptt;
bool b_get_port_stats;
+ p_ptt = IS_PF(cdev) ? qed_ptt_acquire_context(p_hwfn, is_atomic)
+ : NULL;
if (IS_PF(cdev)) {
/* The main vport index is relative first */
if (qed_fw_vport(p_hwfn, 0, &fw_vport)) {
@@ -1924,6 +1926,13 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
}
void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
+{
+ qed_get_vport_stats_context(cdev, stats, false);
+}
+
+void qed_get_vport_stats_context(struct qed_dev *cdev,
+ struct qed_eth_stats *stats,
+ bool is_atomic)
{
u32 i;
@@ -1932,7 +1941,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
return;
}
- _qed_get_vport_stats(cdev, stats);
+ _qed_get_vport_stats(cdev, stats, is_atomic);
if (!cdev->reset_stats)
return;
@@ -1984,7 +1993,7 @@ void qed_reset_vport_stats(struct qed_dev *cdev)
if (!cdev->reset_stats) {
DP_INFO(cdev, "Reset stats not allocated\n");
} else {
- _qed_get_vport_stats(cdev, cdev->reset_stats);
+ _qed_get_vport_stats(cdev, cdev->reset_stats, false);
cdev->reset_stats->common.link_change_count = 0;
}
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index 7127d5aaac42..3545235a1f6e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -277,8 +277,32 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
enum spq_mode comp_mode,
struct qed_spq_comp_cb *p_comp_data);
+/**
+ * qed_get_vport_stats(): Fills provided statistics
+ * struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ *
+ * Return: Void.
+ */
void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
+/**
+ * qed_get_vport_stats_context(): Fills provided statistics
+ * struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: Void.
+ */
+void qed_get_vport_stats_context(struct qed_dev *cdev,
+ struct qed_eth_stats *stats,
+ bool is_atomic);
+
void qed_reset_vport_stats(struct qed_dev *cdev);
/**
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index ed052f67ce50..80d6501ea62b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -2430,7 +2430,7 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
switch (type) {
case QED_MCP_LAN_STATS:
- qed_get_vport_stats(cdev, ð_stats);
+ qed_get_vport_stats_context(cdev, ð_stats, true);
stats->lan_stats.ucast_rx_pkts =
eth_stats.common.rx_ucast_pkts;
stats->lan_stats.ucast_tx_pkts =
@@ -2438,10 +2438,10 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
stats->lan_stats.fcs_err = -1;
break;
case QED_MCP_FCOE_STATS:
- qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats);
+ qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats, true);
break;
case QED_MCP_ISCSI_STATS:
- qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats);
+ qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats, true);
break;
default:
DP_VERBOSE(cdev, QED_MSG_SP,
--
2.24.3
More information about the Devel
mailing list