[Devel] [PATCH v2 rh7] qed: Fix scheduling in a tasklet while getting stats
Alexander Atanasov
alexander.atanasov at virtuozzo.com
Fri Jul 28 10:52:50 MSK 2023
On 27.07.23 18:37, Konstantin Khorenko wrote:
> Here we've got to a situation when tasklet called usleep(), thus welcome
> to the "scheduling while atomic" BUG().
>
> BUG: scheduling while atomic: swapper/24/0/0x00000100
>
> [<ffffffffb41c6199>] schedule+0x29/0x70
> [<ffffffffb41c5512>] schedule_hrtimeout_range_clock+0xb2/0x150
> [<ffffffffb41c55c3>] schedule_hrtimeout_range+0x13/0x20
> [<ffffffffb41c3bcf>] usleep_range+0x4f/0x70
> [<ffffffffc08d3e58>] qed_ptt_acquire+0x38/0x100 [qed]
> [<ffffffffc08eac48>] _qed_get_vport_stats+0x458/0x580 [qed]
> [<ffffffffc08ead8c>] qed_get_vport_stats+0x1c/0xd0 [qed]
> [<ffffffffc08dffd3>] qed_get_protocol_stats+0x93/0x100 [qed]
> qed_mcp_send_protocol_stats
> case MFW_DRV_MSG_GET_LAN_STATS:
> case MFW_DRV_MSG_GET_FCOE_STATS:
> case MFW_DRV_MSG_GET_ISCSI_STATS:
> case MFW_DRV_MSG_GET_RDMA_STATS:
> [<ffffffffc08e36d8>] qed_mcp_handle_events+0x2d8/0x890 [qed]
> qed_int_assertion
> qed_int_attentions
> [<ffffffffc08d9490>] qed_int_sp_dpc+0xa50/0xdc0 [qed]
> [<ffffffffb3aa7623>] tasklet_action+0x83/0x140
> [<ffffffffb41d9125>] __do_softirq+0x125/0x2bb
> [<ffffffffb41d560c>] call_softirq+0x1c/0x30
> [<ffffffffb3a30645>] do_softirq+0x65/0xa0
> [<ffffffffb3aa78d5>] irq_exit+0x105/0x110
> [<ffffffffb41d8996>] do_IRQ+0x56/0xf0
>
> There was a similar issue in mainstream, but for qede driver and caused
> by reading stats via sysfs.
>
> 3rd version of the patch fixing that issue, which i have taken as a
> base: https://www.spinics.net/lists/netdev/msg901089.html
>
> ms final commit: 42510dffd0e2 ("qed/qede: Fix scheduling while atomic")
>
> Unfortunately last version of the patch which is accepted by mainstream
> is far away from version 3, it moves stats updates to a delayed work,
> which is the correct way,
> but in qed driver i do not see which exactly stats should i collect in a
> delayed work for each device (and devices supported by qed driver are
> quite different).
>
> Taking into account that we do not have a node access to reproduce and
> verify the changes, reworking the code to use delayed work seems
> impossible.
>
> So let's live with udelay() crutch which at least is technically clear.
>
> https://jira.vzint.dev/browse/PSBM-146761
>
> Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
>
> ---
> v1->v2:
> - Fixed kdoc for qed_get_protocol_stats_iscsi()
>
> - Added kdoc descriptions for qed_ptt_acquire_context(),
> qed_get_protocol_stats_fcoe(), qed_get_vport_stats() and
> qed_get_vport_stats_context()
> ---
> drivers/net/ethernet/qlogic/qed/qed_dev_api.h | 16 ++++++++++++
> drivers/net/ethernet/qlogic/qed/qed_fcoe.c | 19 ++++++++++----
> drivers/net/ethernet/qlogic/qed/qed_fcoe.h | 17 ++++++++++--
> drivers/net/ethernet/qlogic/qed/qed_hw.c | 26 ++++++++++++++++---
> drivers/net/ethernet/qlogic/qed/qed_iscsi.c | 19 ++++++++++----
> drivers/net/ethernet/qlogic/qed/qed_iscsi.h | 17 ++++++++----
> drivers/net/ethernet/qlogic/qed/qed_l2.c | 19 ++++++++++----
> drivers/net/ethernet/qlogic/qed/qed_l2.h | 24 +++++++++++++++++
> drivers/net/ethernet/qlogic/qed/qed_main.c | 6 ++---
> 9 files changed, 134 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
> index e4b4e3b78e8a..440047dc858f 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
> +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
> @@ -210,6 +210,22 @@ void qed_hw_remove(struct qed_dev *cdev);
> */
> struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn);
>
> +/**
> + * qed_ptt_acquire_context(): Allocate a PTT window honoring the context
> + * atomicy.
> + *
> + * @p_hwfn: HW device data.
> + * @is_atomic: Hint from the caller - if the func can sleep or not.
> + *
> + * Context: The function should not sleep in case is_atomic == true.
> + * Return: struct qed_ptt.
> + *
> + * Should be called at the entry point to the driver
> + * (at the beginning of an exported function).
> + */
> +struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn,
> + bool is_atomic);
> +
> /**
> * @brief qed_ptt_release - Release PTT Window
> *
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
> index 46dc93d3b9b5..bbf3e4e41235 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
> +++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
> @@ -716,13 +716,14 @@ static void _qed_fcoe_get_pstats(struct qed_hwfn *p_hwfn,
> }
>
> static int qed_fcoe_get_stats(struct qed_hwfn *p_hwfn,
> - struct qed_fcoe_stats *p_stats)
> + struct qed_fcoe_stats *p_stats,
> + bool is_atomic)
> {
> struct qed_ptt *p_ptt;
>
> memset(p_stats, 0, sizeof(*p_stats));
>
> - p_ptt = qed_ptt_acquire(p_hwfn);
> + p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
>
> if (!p_ptt) {
> DP_ERR(p_hwfn, "Failed to acquire ptt\n");
> @@ -996,19 +997,27 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev,
> QED_SPQ_MODE_EBLOCK, NULL);
> }
>
> +static int qed_fcoe_stats_context(struct qed_dev *cdev,
> + struct qed_fcoe_stats *stats,
> + bool is_atomic)
> +{
> + return qed_fcoe_get_stats(QED_LEADING_HWFN(cdev), stats, is_atomic);
> +}
> +
> static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats)
> {
> - return qed_fcoe_get_stats(QED_LEADING_HWFN(cdev), stats);
> + return qed_fcoe_stats_context(cdev, stats, false);
> }
>
> void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
> - struct qed_mcp_fcoe_stats *stats)
> + struct qed_mcp_fcoe_stats *stats,
> + bool is_atomic)
> {
> struct qed_fcoe_stats proto_stats;
>
> /* Retrieve FW statistics */
> memset(&proto_stats, 0, sizeof(proto_stats));
> - if (qed_fcoe_stats(cdev, &proto_stats)) {
> + if (qed_fcoe_stats_context(cdev, &proto_stats, is_atomic)) {
> DP_VERBOSE(cdev, QED_MSG_STORAGE,
> "Failed to collect FCoE statistics\n");
> return;
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
> index 027a76ac839a..da14cc02a55d 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
> +++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
> @@ -54,8 +54,20 @@ int qed_fcoe_alloc(struct qed_hwfn *p_hwfn);
> void qed_fcoe_setup(struct qed_hwfn *p_hwfn);
>
> void qed_fcoe_free(struct qed_hwfn *p_hwfn);
> +/**
> + * qed_get_protocol_stats_fcoe(): Fills provided statistics
> + * struct with statistics.
> + *
> + * @cdev: Qed dev pointer.
> + * @stats: Points to struct that will be filled with statistics.
> + * @is_atomic: Hint from the caller - if the func can sleep or not.
> + *
> + * Context: The function should not sleep in case is_atomic == true.
> + * Return: Void.
> + */
> void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
> - struct qed_mcp_fcoe_stats *stats);
> + struct qed_mcp_fcoe_stats *stats,
> + bool is_atomic);
> #else /* CONFIG_QED_FCOE */
> static inline int qed_fcoe_alloc(struct qed_hwfn *p_hwfn)
> {
> @@ -66,7 +78,8 @@ static inline void qed_fcoe_setup(struct qed_hwfn *p_hwfn) {}
> static inline void qed_fcoe_free(struct qed_hwfn *p_hwfn) {}
>
> static inline void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
> - struct qed_mcp_fcoe_stats *stats)
> + struct qed_mcp_fcoe_stats *stats,
> + bool is_atomic)
> {
> }
> #endif /* CONFIG_QED_FCOE */
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
> index 72ec1c6bdf70..86033b759a68 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
> +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
> @@ -49,7 +49,10 @@
> #include "qed_reg_addr.h"
> #include "qed_sriov.h"
>
> -#define QED_BAR_ACQUIRE_TIMEOUT 1000
> +#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT 1000
> +#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP 1000
> +#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT 100000
> +#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY 10
>
> /* Invalid values */
> #define QED_BAR_INVALID_OFFSET (cpu_to_le32(-1))
> @@ -110,12 +113,22 @@ void qed_ptt_pool_free(struct qed_hwfn *p_hwfn)
> }
>
> struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
> +{
> + return qed_ptt_acquire_context(p_hwfn, false);
> +}
> +
> +struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn, bool is_atomic)
> {
> struct qed_ptt *p_ptt;
> - unsigned int i;
> + unsigned int i, count;
> +
> + if (is_atomic)
> + count = QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT;
> + else
> + count = QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT;
>
> /* Take the free PTT from the list */
> - for (i = 0; i < QED_BAR_ACQUIRE_TIMEOUT; i++) {
> + for (i = 0; i < count; i++) {
> spin_lock_bh(&p_hwfn->p_ptt_pool->lock);
>
> if (!list_empty(&p_hwfn->p_ptt_pool->free_list)) {
> @@ -131,7 +144,12 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
> }
>
> spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
> - usleep_range(1000, 2000);
> +
> + if (is_atomic)
> + udelay(QED_BAR_ACQUIRE_TIMEOUT_UDELAY);
> + else
> + usleep_range(QED_BAR_ACQUIRE_TIMEOUT_USLEEP,
> + QED_BAR_ACQUIRE_TIMEOUT_USLEEP * 2);
> }
>
> DP_NOTICE(p_hwfn, "PTT acquire timeout - failed to allocate PTT\n");
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
> index 4f8a685d1a55..40eaab4330f9 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
> +++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
> @@ -1049,13 +1049,14 @@ static void _qed_iscsi_get_pstats(struct qed_hwfn *p_hwfn,
> }
>
> static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn,
> - struct qed_iscsi_stats *stats)
> + struct qed_iscsi_stats *stats,
> + bool is_atomic)
> {
> struct qed_ptt *p_ptt;
>
> memset(stats, 0, sizeof(*stats));
>
> - p_ptt = qed_ptt_acquire(p_hwfn);
> + p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
> if (!p_ptt) {
> DP_ERR(p_hwfn, "Failed to acquire ptt\n");
> return -EAGAIN;
> @@ -1390,9 +1391,16 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
> QED_SPQ_MODE_EBLOCK, NULL);
> }
>
> +static int qed_iscsi_stats_context(struct qed_dev *cdev,
> + struct qed_iscsi_stats *stats,
> + bool is_atomic)
> +{
> + return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats, is_atomic);
> +}
> +
> static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
> {
> - return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats);
> + return qed_iscsi_stats_context(cdev, stats, false);
> }
>
> static int qed_iscsi_change_mac(struct qed_dev *cdev,
> @@ -1413,13 +1421,14 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev,
> }
>
> void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
> - struct qed_mcp_iscsi_stats *stats)
> + struct qed_mcp_iscsi_stats *stats,
> + bool is_atomic)
> {
> struct qed_iscsi_stats proto_stats;
>
> /* Retrieve FW statistics */
> memset(&proto_stats, 0, sizeof(proto_stats));
> - if (qed_iscsi_stats(cdev, &proto_stats)) {
> + if (qed_iscsi_stats_context(cdev, &proto_stats, is_atomic)) {
> DP_VERBOSE(cdev, QED_MSG_STORAGE,
> "Failed to collect ISCSI statistics\n");
> return;
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
> index 225c75b02a06..5d84c8d382f1 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
> +++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
> @@ -64,13 +64,19 @@ void qed_iscsi_setup(struct qed_hwfn *p_hwfn);
> void qed_iscsi_free(struct qed_hwfn *p_hwfn);
>
> /**
> - * @brief - Fills provided statistics struct with statistics.
> + * qed_get_protocol_stats_iscsi(): Fills provided statistics
> + * struct with statistics.
> *
> - * @param cdev
> - * @param stats - points to struct that will be filled with statistics.
> + * @cdev: Qed dev pointer.
> + * @stats: Points to struct that will be filled with statistics.
> + * @is_atomic: Hint from the caller - if the func can sleep or not.
> + *
> + * Context: The function should not sleep in case is_atomic == true.
> + * Return: Void.
> */
> void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
> - struct qed_mcp_iscsi_stats *stats);
> + struct qed_mcp_iscsi_stats *stats,
> + bool is_atomic);
> #else /* IS_ENABLED(CONFIG_QED_ISCSI) */
> static inline int qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
> {
> @@ -83,7 +89,8 @@ static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn) {}
>
> static inline void
> qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
> - struct qed_mcp_iscsi_stats *stats) {}
> + struct qed_mcp_iscsi_stats *stats,
> + bool is_atomic) {}
> #endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
>
> #endif
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
> index b3937935d2a8..8b3d9a06b979 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
> +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
> @@ -1887,7 +1887,8 @@ static void __qed_get_vport_stats(struct qed_hwfn *p_hwfn,
> }
>
> static void _qed_get_vport_stats(struct qed_dev *cdev,
> - struct qed_eth_stats *stats)
> + struct qed_eth_stats *stats,
> + bool is_atomic)
> {
> u8 fw_vport = 0;
> int i;
> @@ -1896,10 +1897,11 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
>
> for_each_hwfn(cdev, i) {
> struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
> - struct qed_ptt *p_ptt = IS_PF(cdev) ? qed_ptt_acquire(p_hwfn)
> - : NULL;
> + struct qed_ptt *p_ptt;
> bool b_get_port_stats;
>
> + p_ptt = IS_PF(cdev) ? qed_ptt_acquire_context(p_hwfn, is_atomic)
> + : NULL;
> if (IS_PF(cdev)) {
> /* The main vport index is relative first */
> if (qed_fw_vport(p_hwfn, 0, &fw_vport)) {
> @@ -1924,6 +1926,13 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
> }
>
> void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
> +{
> + qed_get_vport_stats_context(cdev, stats, false);
> +}
> +
> +void qed_get_vport_stats_context(struct qed_dev *cdev,
> + struct qed_eth_stats *stats,
> + bool is_atomic)
> {
> u32 i;
>
> @@ -1932,7 +1941,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
> return;
> }
>
> - _qed_get_vport_stats(cdev, stats);
> + _qed_get_vport_stats(cdev, stats, is_atomic);
>
> if (!cdev->reset_stats)
> return;
> @@ -1984,7 +1993,7 @@ void qed_reset_vport_stats(struct qed_dev *cdev)
> if (!cdev->reset_stats) {
> DP_INFO(cdev, "Reset stats not allocated\n");
> } else {
> - _qed_get_vport_stats(cdev, cdev->reset_stats);
> + _qed_get_vport_stats(cdev, cdev->reset_stats, false);
> cdev->reset_stats->common.link_change_count = 0;
> }
> }
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
> index 7127d5aaac42..3545235a1f6e 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
> +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
> @@ -277,8 +277,32 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
> enum spq_mode comp_mode,
> struct qed_spq_comp_cb *p_comp_data);
>
> +/**
> + * qed_get_vport_stats(): Fills provided statistics
> + * struct with statistics.
> + *
> + * @cdev: Qed dev pointer.
> + * @stats: Points to struct that will be filled with statistics.
> + *
> + * Return: Void.
> + */
> void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
>
> +/**
> + * qed_get_vport_stats_context(): Fills provided statistics
> + * struct with statistics.
> + *
> + * @cdev: Qed dev pointer.
> + * @stats: Points to struct that will be filled with statistics.
> + * @is_atomic: Hint from the caller - if the func can sleep or not.
> + *
> + * Context: The function should not sleep in case is_atomic == true.
> + * Return: Void.
> + */
> +void qed_get_vport_stats_context(struct qed_dev *cdev,
> + struct qed_eth_stats *stats,
> + bool is_atomic);
> +
> void qed_reset_vport_stats(struct qed_dev *cdev);
>
> /**
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
> index ed052f67ce50..80d6501ea62b 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_main.c
> +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
> @@ -2430,7 +2430,7 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
>
> switch (type) {
> case QED_MCP_LAN_STATS:
> - qed_get_vport_stats(cdev, ð_stats);
> + qed_get_vport_stats_context(cdev, ð_stats, true);
> stats->lan_stats.ucast_rx_pkts =
> eth_stats.common.rx_ucast_pkts;
> stats->lan_stats.ucast_tx_pkts =
> @@ -2438,10 +2438,10 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
> stats->lan_stats.fcs_err = -1;
> break;
> case QED_MCP_FCOE_STATS:
> - qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats);
> + qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats, true);
> break;
> case QED_MCP_ISCSI_STATS:
> - qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats);
> + qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats, true);
> break;
> default:
> DP_VERBOSE(cdev, QED_MSG_SP,
LGTM - udelay does not look good but i agree it's the best in that
situation.
--
Regards,
Alexander Atanasov
More information about the Devel
mailing list