[Devel] [PATCH RHEL7 COMMIT] ms/IB/ipoib: Fix race condition in neigh creation

Konstantin Khorenko khorenko at virtuozzo.com
Mon Jul 9 17:22:28 MSK 2018


The commit is pushed to "branch-rh7-3.10.0-862.6.3.vz7.62.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-862.6.3.vz7.62.1
------>
commit 01bc37269f39bb9dfee8c8216a29063c0a752850
Author: Erez Shitrit <erezsh at mellanox.com>
Date:   Sun Dec 31 15:33:15 2017 +0200

    ms/IB/ipoib: Fix race condition in neigh creation
    
    When using enhanced mode for IPoIB, two threads may execute xmit in
    parallel to two different TX queues while the target is the same.
    In this case, both of them will add the same neighbor to the path's
    neigh link list and we might see the following message:
    
      list_add double add: new=ffff88024767a348, prev=ffff88024767a348...
      WARNING: lib/list_debug.c:31__list_add_valid+0x4e/0x70
      ipoib_start_xmit+0x477/0x680 [ib_ipoib]
      dev_hard_start_xmit+0xb9/0x3e0
      sch_direct_xmit+0xf9/0x250
      __qdisc_run+0x176/0x5d0
      __dev_queue_xmit+0x1f5/0xb10
      __dev_queue_xmit+0x55/0xb10
    
    Analysis:
    Two SKB are scheduled to be transmitted from two cores.
    In ipoib_start_xmit, both gets NULL when calling ipoib_neigh_get.
    Two calls to neigh_add_path are made. One thread takes the spin-lock
    and calls ipoib_neigh_alloc which creates the neigh structure,
    then (after the __path_find) the neigh is added to the path's neigh
    link list. When the second thread enters the critical section it also
    calls ipoib_neigh_alloc but in this case it gets the already allocated
    ipoib_neigh structure, which is already linked to the path's neigh
    link list and adds it again to the list. Which beside of triggering
    the list, it creates a loop in the linked list. This loop leads to
    endless loop inside path_rec_completion.
    
    Solution:
    Check list_empty(&neigh->list) before adding to the list.
    Add a similar fix in "ipoib_multicast.c::ipoib_mcast_send"
    
    Fixes: b63b70d87741 ('IPoIB: Use a private hash table for path lookup in xmit path')
    Signed-off-by: Erez Shitrit <erezsh at mellanox.com>
    Reviewed-by: Alex Vesker <valex at mellanox.com>
    Signed-off-by: Leon Romanovsky <leon at kernel.org>
    Signed-off-by: Jason Gunthorpe <jgg at mellanox.com>
    
    (cherry picked from commit 16ba3defb8bd01a9464ba4820a487f5b196b455b)
    https://jira.sw.ru/browse/PSBM-86455
    
    Signed-off-by: Vasily Averin <vvs at virtuozzo.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c      | 25 ++++++++++++++++++-------
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |  5 ++++-
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 9efb260b968b..40a3d490d9f2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -907,8 +907,8 @@ static int path_rec_start(struct net_device *dev,
 	return 0;
 }
 
-static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
-			   struct net_device *dev)
+static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
+					  struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct rdma_netdev *rn = netdev_priv(dev);
@@ -922,7 +922,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 		spin_unlock_irqrestore(&priv->lock, flags);
 		++dev->stats.tx_dropped;
 		dev_kfree_skb_any(skb);
-		return;
+		return NULL;
+	}
+
+	/* To avoid race condition, make sure that the
+	 * neigh will be added only once.
+	 */
+	if (unlikely(!list_empty(&neigh->list))) {
+		spin_unlock_irqrestore(&priv->lock, flags);
+		return neigh;
 	}
 
 	path = __path_find(dev, daddr + 4);
@@ -961,7 +969,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 			path->ah->last_send = rn->send(dev, skb, path->ah->ah,
 						       IPOIB_QPN(daddr));
 			ipoib_neigh_put(neigh);
-			return;
+			return NULL;
 		}
 	} else {
 		neigh->ah  = NULL;
@@ -978,7 +986,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 	ipoib_neigh_put(neigh);
-	return;
+	return NULL;
 
 err_path:
 	ipoib_neigh_free(neigh);
@@ -988,6 +996,8 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 	ipoib_neigh_put(neigh);
+
+	return NULL;
 }
 
 static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
@@ -1096,8 +1106,9 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	case htons(ETH_P_TIPC):
 		neigh = ipoib_neigh_get(dev, phdr->hwaddr);
 		if (unlikely(!neigh)) {
-			neigh_add_path(skb, phdr->hwaddr, dev);
-			return NETDEV_TX_OK;
+			neigh = neigh_add_path(skb, phdr->hwaddr, dev);
+			if (likely(!neigh))
+				return NETDEV_TX_OK;
 		}
 		break;
 	case htons(ETH_P_ARP):
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 6308711e6c00..89c1882cf14e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -813,7 +813,10 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
 		spin_lock_irqsave(&priv->lock, flags);
 		if (!neigh) {
 			neigh = ipoib_neigh_alloc(daddr, dev);
-			if (neigh) {
+			/* Make sure that the neigh will be added only
+			 * once to mcast list.
+			 */
+			if (neigh && list_empty(&neigh->list)) {
 				kref_get(&mcast->ah->ref);
 				neigh->ah	= mcast->ah;
 				list_add_tail(&neigh->list, &mcast->neigh_list);


More information about the Devel mailing list