[Devel] [PATCH v2 rh7] qed: Fix scheduling in a tasklet while getting stats

Alexander Atanasov alexander.atanasov at virtuozzo.com
Fri Jul 28 10:52:50 MSK 2023


On 27.07.23 18:37, Konstantin Khorenko wrote:
> Here we've got to a situation when tasklet called usleep(), thus welcome
> to the "scheduling while atomic" BUG().
> 
> BUG: scheduling while atomic: swapper/24/0/0x00000100
> 
>   [<ffffffffb41c6199>] schedule+0x29/0x70
>   [<ffffffffb41c5512>] schedule_hrtimeout_range_clock+0xb2/0x150
>   [<ffffffffb41c55c3>] schedule_hrtimeout_range+0x13/0x20
>   [<ffffffffb41c3bcf>] usleep_range+0x4f/0x70
>   [<ffffffffc08d3e58>] qed_ptt_acquire+0x38/0x100 [qed]
>   [<ffffffffc08eac48>] _qed_get_vport_stats+0x458/0x580 [qed]
>   [<ffffffffc08ead8c>] qed_get_vport_stats+0x1c/0xd0 [qed]
>   [<ffffffffc08dffd3>] qed_get_protocol_stats+0x93/0x100 [qed]
>                        qed_mcp_send_protocol_stats
>            case MFW_DRV_MSG_GET_LAN_STATS:
>            case MFW_DRV_MSG_GET_FCOE_STATS:
>            case MFW_DRV_MSG_GET_ISCSI_STATS:
>            case MFW_DRV_MSG_GET_RDMA_STATS:
>   [<ffffffffc08e36d8>] qed_mcp_handle_events+0x2d8/0x890 [qed]
>                        qed_int_assertion
>                        qed_int_attentions
>   [<ffffffffc08d9490>] qed_int_sp_dpc+0xa50/0xdc0 [qed]
>   [<ffffffffb3aa7623>] tasklet_action+0x83/0x140
>   [<ffffffffb41d9125>] __do_softirq+0x125/0x2bb
>   [<ffffffffb41d560c>] call_softirq+0x1c/0x30
>   [<ffffffffb3a30645>] do_softirq+0x65/0xa0
>   [<ffffffffb3aa78d5>] irq_exit+0x105/0x110
>   [<ffffffffb41d8996>] do_IRQ+0x56/0xf0
> 
> There was a similar issue in mainstream, but for qede driver and caused
> by reading stats via sysfs.
> 
> 3rd version of the patch fixing that issue, which i have taken as a
> base: https://www.spinics.net/lists/netdev/msg901089.html
> 
> ms final commit: 42510dffd0e2 ("qed/qede: Fix scheduling while atomic")
> 
> Unfortunately last version of the patch which is accepted by mainstream
> is far away from version 3, it moves stats updates to a delayed work,
> which is the correct way,
> but in qed driver i do not see which exactly stats should i collect in a
> delayed work for each device (and devices supported by qed driver are
> quite different).
> 
> Taking into account that we do not have a node access to reproduce and
> verify the changes, reworking the code to use delayed work seems
> impossible.
> 
> So let's live with udelay() crutch which at least is technically clear.
> 
> https://jira.vzint.dev/browse/PSBM-146761
> 
> Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
> 
> ---
> v1->v2:
>   - Fixed kdoc for qed_get_protocol_stats_iscsi()
> 
>   - Added kdoc descriptions for qed_ptt_acquire_context(),
>     qed_get_protocol_stats_fcoe(), qed_get_vport_stats() and
>     qed_get_vport_stats_context()
> ---
>   drivers/net/ethernet/qlogic/qed/qed_dev_api.h | 16 ++++++++++++
>   drivers/net/ethernet/qlogic/qed/qed_fcoe.c    | 19 ++++++++++----
>   drivers/net/ethernet/qlogic/qed/qed_fcoe.h    | 17 ++++++++++--
>   drivers/net/ethernet/qlogic/qed/qed_hw.c      | 26 ++++++++++++++++---
>   drivers/net/ethernet/qlogic/qed/qed_iscsi.c   | 19 ++++++++++----
>   drivers/net/ethernet/qlogic/qed/qed_iscsi.h   | 17 ++++++++----
>   drivers/net/ethernet/qlogic/qed/qed_l2.c      | 19 ++++++++++----
>   drivers/net/ethernet/qlogic/qed/qed_l2.h      | 24 +++++++++++++++++
>   drivers/net/ethernet/qlogic/qed/qed_main.c    |  6 ++---
>   9 files changed, 134 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
> index e4b4e3b78e8a..440047dc858f 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
> +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
> @@ -210,6 +210,22 @@ void qed_hw_remove(struct qed_dev *cdev);
>    */
>   struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn);
>   
> +/**
> + * qed_ptt_acquire_context(): Allocate a PTT window honoring the context
> + *			      atomicy.
> + *
> + * @p_hwfn: HW device data.
> + * @is_atomic: Hint from the caller - if the func can sleep or not.
> + *
> + * Context: The function should not sleep in case is_atomic == true.
> + * Return: struct qed_ptt.
> + *
> + * Should be called at the entry point to the driver
> + * (at the beginning of an exported function).
> + */
> +struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn,
> +					bool is_atomic);
> +
>   /**
>    * @brief qed_ptt_release - Release PTT Window
>    *
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
> index 46dc93d3b9b5..bbf3e4e41235 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
> +++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
> @@ -716,13 +716,14 @@ static void _qed_fcoe_get_pstats(struct qed_hwfn *p_hwfn,
>   }
>   
>   static int qed_fcoe_get_stats(struct qed_hwfn *p_hwfn,
> -			      struct qed_fcoe_stats *p_stats)
> +			      struct qed_fcoe_stats *p_stats,
> +			      bool is_atomic)
>   {
>   	struct qed_ptt *p_ptt;
>   
>   	memset(p_stats, 0, sizeof(*p_stats));
>   
> -	p_ptt = qed_ptt_acquire(p_hwfn);
> +	p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
>   
>   	if (!p_ptt) {
>   		DP_ERR(p_hwfn, "Failed to acquire ptt\n");
> @@ -996,19 +997,27 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev,
>   					QED_SPQ_MODE_EBLOCK, NULL);
>   }
>   
> +static int qed_fcoe_stats_context(struct qed_dev *cdev,
> +				  struct qed_fcoe_stats *stats,
> +				  bool is_atomic)
> +{
> +	return qed_fcoe_get_stats(QED_LEADING_HWFN(cdev), stats, is_atomic);
> +}
> +
>   static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats)
>   {
> -	return qed_fcoe_get_stats(QED_LEADING_HWFN(cdev), stats);
> +	return qed_fcoe_stats_context(cdev, stats, false);
>   }
>   
>   void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
> -				 struct qed_mcp_fcoe_stats *stats)
> +				 struct qed_mcp_fcoe_stats *stats,
> +				 bool is_atomic)
>   {
>   	struct qed_fcoe_stats proto_stats;
>   
>   	/* Retrieve FW statistics */
>   	memset(&proto_stats, 0, sizeof(proto_stats));
> -	if (qed_fcoe_stats(cdev, &proto_stats)) {
> +	if (qed_fcoe_stats_context(cdev, &proto_stats, is_atomic)) {
>   		DP_VERBOSE(cdev, QED_MSG_STORAGE,
>   			   "Failed to collect FCoE statistics\n");
>   		return;
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
> index 027a76ac839a..da14cc02a55d 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
> +++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
> @@ -54,8 +54,20 @@ int qed_fcoe_alloc(struct qed_hwfn *p_hwfn);
>   void qed_fcoe_setup(struct qed_hwfn *p_hwfn);
>   
>   void qed_fcoe_free(struct qed_hwfn *p_hwfn);
> +/**
> + * qed_get_protocol_stats_fcoe(): Fills provided statistics
> + *				  struct with statistics.
> + *
> + * @cdev: Qed dev pointer.
> + * @stats: Points to struct that will be filled with statistics.
> + * @is_atomic: Hint from the caller - if the func can sleep or not.
> + *
> + * Context: The function should not sleep in case is_atomic == true.
> + * Return: Void.
> + */
>   void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
> -				 struct qed_mcp_fcoe_stats *stats);
> +				 struct qed_mcp_fcoe_stats *stats,
> +				 bool is_atomic);
>   #else /* CONFIG_QED_FCOE */
>   static inline int qed_fcoe_alloc(struct qed_hwfn *p_hwfn)
>   {
> @@ -66,7 +78,8 @@ static inline void qed_fcoe_setup(struct qed_hwfn *p_hwfn) {}
>   static inline void qed_fcoe_free(struct qed_hwfn *p_hwfn) {}
>   
>   static inline void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
> -					       struct qed_mcp_fcoe_stats *stats)
> +					       struct qed_mcp_fcoe_stats *stats,
> +					       bool is_atomic)
>   {
>   }
>   #endif /* CONFIG_QED_FCOE */
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
> index 72ec1c6bdf70..86033b759a68 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
> +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
> @@ -49,7 +49,10 @@
>   #include "qed_reg_addr.h"
>   #include "qed_sriov.h"
>   
> -#define QED_BAR_ACQUIRE_TIMEOUT 1000
> +#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT	1000
> +#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP		1000
> +#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT	100000
> +#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY		10
>   
>   /* Invalid values */
>   #define QED_BAR_INVALID_OFFSET          (cpu_to_le32(-1))
> @@ -110,12 +113,22 @@ void qed_ptt_pool_free(struct qed_hwfn *p_hwfn)
>   }
>   
>   struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
> +{
> +	return qed_ptt_acquire_context(p_hwfn, false);
> +}
> +
> +struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn, bool is_atomic)
>   {
>   	struct qed_ptt *p_ptt;
> -	unsigned int i;
> +	unsigned int i, count;
> +
> +	if (is_atomic)
> +		count = QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT;
> +	else
> +		count = QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT;
>   
>   	/* Take the free PTT from the list */
> -	for (i = 0; i < QED_BAR_ACQUIRE_TIMEOUT; i++) {
> +	for (i = 0; i < count; i++) {
>   		spin_lock_bh(&p_hwfn->p_ptt_pool->lock);
>   
>   		if (!list_empty(&p_hwfn->p_ptt_pool->free_list)) {
> @@ -131,7 +144,12 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
>   		}
>   
>   		spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
> -		usleep_range(1000, 2000);
> +
> +		if (is_atomic)
> +			udelay(QED_BAR_ACQUIRE_TIMEOUT_UDELAY);
> +		else
> +			usleep_range(QED_BAR_ACQUIRE_TIMEOUT_USLEEP,
> +				     QED_BAR_ACQUIRE_TIMEOUT_USLEEP * 2);
>   	}
>   
>   	DP_NOTICE(p_hwfn, "PTT acquire timeout - failed to allocate PTT\n");
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
> index 4f8a685d1a55..40eaab4330f9 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
> +++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
> @@ -1049,13 +1049,14 @@ static void _qed_iscsi_get_pstats(struct qed_hwfn *p_hwfn,
>   }
>   
>   static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn,
> -			       struct qed_iscsi_stats *stats)
> +			       struct qed_iscsi_stats *stats,
> +			       bool is_atomic)
>   {
>   	struct qed_ptt *p_ptt;
>   
>   	memset(stats, 0, sizeof(*stats));
>   
> -	p_ptt = qed_ptt_acquire(p_hwfn);
> +	p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
>   	if (!p_ptt) {
>   		DP_ERR(p_hwfn, "Failed to acquire ptt\n");
>   		return -EAGAIN;
> @@ -1390,9 +1391,16 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
>   					   QED_SPQ_MODE_EBLOCK, NULL);
>   }
>   
> +static int qed_iscsi_stats_context(struct qed_dev *cdev,
> +				   struct qed_iscsi_stats *stats,
> +				   bool is_atomic)
> +{
> +	return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats, is_atomic);
> +}
> +
>   static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
>   {
> -	return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats);
> +	return qed_iscsi_stats_context(cdev, stats, false);
>   }
>   
>   static int qed_iscsi_change_mac(struct qed_dev *cdev,
> @@ -1413,13 +1421,14 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev,
>   }
>   
>   void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
> -				  struct qed_mcp_iscsi_stats *stats)
> +				  struct qed_mcp_iscsi_stats *stats,
> +				  bool is_atomic)
>   {
>   	struct qed_iscsi_stats proto_stats;
>   
>   	/* Retrieve FW statistics */
>   	memset(&proto_stats, 0, sizeof(proto_stats));
> -	if (qed_iscsi_stats(cdev, &proto_stats)) {
> +	if (qed_iscsi_stats_context(cdev, &proto_stats, is_atomic)) {
>   		DP_VERBOSE(cdev, QED_MSG_STORAGE,
>   			   "Failed to collect ISCSI statistics\n");
>   		return;
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
> index 225c75b02a06..5d84c8d382f1 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
> +++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
> @@ -64,13 +64,19 @@ void qed_iscsi_setup(struct qed_hwfn *p_hwfn);
>   void qed_iscsi_free(struct qed_hwfn *p_hwfn);
>   
>   /**
> - * @brief - Fills provided statistics struct with statistics.
> + * qed_get_protocol_stats_iscsi(): Fills provided statistics
> + *				   struct with statistics.
>    *
> - * @param cdev
> - * @param stats - points to struct that will be filled with statistics.
> + * @cdev: Qed dev pointer.
> + * @stats: Points to struct that will be filled with statistics.
> + * @is_atomic: Hint from the caller - if the func can sleep or not.
> + *
> + * Context: The function should not sleep in case is_atomic == true.
> + * Return: Void.
>    */
>   void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
> -				  struct qed_mcp_iscsi_stats *stats);
> +				  struct qed_mcp_iscsi_stats *stats,
> +				  bool is_atomic);
>   #else /* IS_ENABLED(CONFIG_QED_ISCSI) */
>   static inline int qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
>   {
> @@ -83,7 +89,8 @@ static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn) {}
>   
>   static inline void
>   qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
> -			     struct qed_mcp_iscsi_stats *stats) {}
> +			     struct qed_mcp_iscsi_stats *stats,
> +			     bool is_atomic) {}
>   #endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
>   
>   #endif
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
> index b3937935d2a8..8b3d9a06b979 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
> +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
> @@ -1887,7 +1887,8 @@ static void __qed_get_vport_stats(struct qed_hwfn *p_hwfn,
>   }
>   
>   static void _qed_get_vport_stats(struct qed_dev *cdev,
> -				 struct qed_eth_stats *stats)
> +				 struct qed_eth_stats *stats,
> +				 bool is_atomic)
>   {
>   	u8 fw_vport = 0;
>   	int i;
> @@ -1896,10 +1897,11 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
>   
>   	for_each_hwfn(cdev, i) {
>   		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
> -		struct qed_ptt *p_ptt = IS_PF(cdev) ? qed_ptt_acquire(p_hwfn)
> -						    :  NULL;
> +		struct qed_ptt *p_ptt;
>   		bool b_get_port_stats;
>   
> +		p_ptt = IS_PF(cdev) ? qed_ptt_acquire_context(p_hwfn, is_atomic)
> +				    : NULL;
>   		if (IS_PF(cdev)) {
>   			/* The main vport index is relative first */
>   			if (qed_fw_vport(p_hwfn, 0, &fw_vport)) {
> @@ -1924,6 +1926,13 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
>   }
>   
>   void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
> +{
> +	qed_get_vport_stats_context(cdev, stats, false);
> +}
> +
> +void qed_get_vport_stats_context(struct qed_dev *cdev,
> +				 struct qed_eth_stats *stats,
> +				 bool is_atomic)
>   {
>   	u32 i;
>   
> @@ -1932,7 +1941,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
>   		return;
>   	}
>   
> -	_qed_get_vport_stats(cdev, stats);
> +	_qed_get_vport_stats(cdev, stats, is_atomic);
>   
>   	if (!cdev->reset_stats)
>   		return;
> @@ -1984,7 +1993,7 @@ void qed_reset_vport_stats(struct qed_dev *cdev)
>   	if (!cdev->reset_stats) {
>   		DP_INFO(cdev, "Reset stats not allocated\n");
>   	} else {
> -		_qed_get_vport_stats(cdev, cdev->reset_stats);
> +		_qed_get_vport_stats(cdev, cdev->reset_stats, false);
>   		cdev->reset_stats->common.link_change_count = 0;
>   	}
>   }
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
> index 7127d5aaac42..3545235a1f6e 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
> +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
> @@ -277,8 +277,32 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
>   			    enum spq_mode comp_mode,
>   			    struct qed_spq_comp_cb *p_comp_data);
>   
> +/**
> + * qed_get_vport_stats(): Fills provided statistics
> + *			  struct with statistics.
> + *
> + * @cdev: Qed dev pointer.
> + * @stats: Points to struct that will be filled with statistics.
> + *
> + * Return: Void.
> + */
>   void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
>   
> +/**
> + * qed_get_vport_stats_context(): Fills provided statistics
> + *				  struct with statistics.
> + *
> + * @cdev: Qed dev pointer.
> + * @stats: Points to struct that will be filled with statistics.
> + * @is_atomic: Hint from the caller - if the func can sleep or not.
> + *
> + * Context: The function should not sleep in case is_atomic == true.
> + * Return: Void.
> + */
> +void qed_get_vport_stats_context(struct qed_dev *cdev,
> +				 struct qed_eth_stats *stats,
> +				 bool is_atomic);
> +
>   void qed_reset_vport_stats(struct qed_dev *cdev);
>   
>   /**
> diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
> index ed052f67ce50..80d6501ea62b 100644
> --- a/drivers/net/ethernet/qlogic/qed/qed_main.c
> +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
> @@ -2430,7 +2430,7 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
>   
>   	switch (type) {
>   	case QED_MCP_LAN_STATS:
> -		qed_get_vport_stats(cdev, &eth_stats);
> +		qed_get_vport_stats_context(cdev, &eth_stats, true);
>   		stats->lan_stats.ucast_rx_pkts =
>   					eth_stats.common.rx_ucast_pkts;
>   		stats->lan_stats.ucast_tx_pkts =
> @@ -2438,10 +2438,10 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
>   		stats->lan_stats.fcs_err = -1;
>   		break;
>   	case QED_MCP_FCOE_STATS:
> -		qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats);
> +		qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats, true);
>   		break;
>   	case QED_MCP_ISCSI_STATS:
> -		qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats);
> +		qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats, true);
>   		break;
>   	default:
>   		DP_VERBOSE(cdev, QED_MSG_SP,


LGTM - udelay does not look good but i agree it's the best in that 
situation.

-- 
Regards,
Alexander Atanasov



More information about the Devel mailing list