[Devel] [PATCH RHEL7 COMMIT] Revert: [fs] xfs: rework buffer dispose list tracking

Wed Jan 11 02:34:36 PST 2017

The commit is pushed to "branch-rh7-3.10.0-514.vz7.27.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-514.vz7.27.8
------>
commit d4dd61f05d16cabfdb0e277b45da08e99b55ebf5
Author: Dmitry Monakhov <dmonakhov at openvz.org>
Date:   Wed Jan 11 14:34:36 2017 +0400

    Revert: [fs] xfs: rework buffer dispose list tracking
    
    Patchset description:
    [7.3] rebase xfs lru patches
    
    rh7-3.10.0-514 already has 'fs-xfs-rework-buffer-dispose-list-tracking', but
    originally it depens on ms/xfs-convert-buftarg-LRU-to-generic, so
    In order to preserve original logic I've revert rhel's patch (1'st one),
    and reapply it later in natural order:
    TOC:
    0001-Revert-fs-xfs-rework-buffer-dispose-list-tracking.patch
    
    0002-ms-xfs-convert-buftarg-LRU-to-generic-code.patch
    0003-From-c70ded437bb646ace0dcbf3c7989d4edeed17f7e-Mon-Se.patch [not changed]
    0004-ms-xfs-rework-buffer-dispose-list-tracking.patch
    
    ===============================================================
    This patch description:
    
    RH Bugzilla: 1349175
    
    ms commit a408235726aa82c0358c9ec68124b6f4bc0a79df
    Author: Dave Chinner <dchinner at redhat.com>
    Date:   Wed Aug 28 10:18:06 2013 +1000
    
        xfs: rework buffer dispose list tracking
    
        In converting the buffer lru lists to use the generic code, the locking
        for marking the buffers as on the dispose list was lost.  This results in
        confusion in LRU buffer tracking and acocunting, resulting in reference
        counts being mucked up and filesystem beig unmountable.
    
        To fix this, introduce an internal buffer spinlock to protect the state
        field that holds the dispose list information.  Because there is now
        locking needed around xfs_buf_lru_add/del, and they are used in exactly
        one place each two lines apart, get rid of the wrappers and code the logic
        directly in place.
    
        Further, the LRU emptying code used on unmount is less than optimal.
        Convert it to use a dispose list as per a normal shrinker walk, and repeat
        the walk that fills the dispose list until the LRU is empty.  Thi avoids
        needing to drop and regain the LRU lock for every item being freed, and
        allows the same logic as the shrinker isolate call to be used.  Simpler,
        easier to understand.
    
        Signed-off-by: Dave Chinner <dchinner at redhat.com>
        Signed-off-by: Glauber Costa <glommer at openvz.org>
        Cc: "Theodore Ts'o" <tytso at mit.edu>
        Cc: Adrian Hunter <adrian.hunter at intel.com>
        Cc: Al Viro <viro at zeniv.linux.org.uk>
        Cc: Artem Bityutskiy <artem.bityutskiy at linux.intel.com>
        Cc: Arve Hjonnevag <arve at android.com>
        Cc: Carlos Maiolino <cmaiolino at redhat.com>
        Cc: Christoph Hellwig <hch at lst.de>
        Cc: Chuck Lever <chuck.lever at oracle.com>
        Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
        Cc: David Rientjes <rientjes at google.com>
        Cc: Gleb Natapov <gleb at redhat.com>
        Cc: Greg Thelen <gthelen at google.com>
        Cc: J. Bruce Fields <bfields at redhat.com>
        Cc: Jan Kara <jack at suse.cz>
        Cc: Jerome Glisse <jglisse at redhat.com>
        Cc: John Stultz <john.stultz at linaro.org>
        Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu at jp.fujitsu.com>
        Cc: Kent Overstreet <koverstreet at google.com>
        Cc: Kirill A. Shutemov <kirill.shutemov at linux.intel.com>
        Cc: Marcelo Tosatti <mtosatti at redhat.com>
        Cc: Mel Gorman <mgorman at suse.de>
        Cc: Steven Whitehouse <swhiteho at redhat.com>
        Cc: Thomas Hellstrom <thellstrom at vmware.com>
        Cc: Trond Myklebust <Trond.Myklebust at netapp.com>
        Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
        Signed-off-by: Al Viro <viro at zeniv.linux.org.uk>
    
    Signed-off-by: Brian Foster <bfoster at redhat.com>
    
    https://jira.sw.ru/browse/PSBM-55577
    
    Signed-off-by: Dmitry Monakhov <dmonakhov at openvz.org>
---
 fs/xfs/xfs_buf.c | 57 ++++++++------------------------------------------------
 fs/xfs/xfs_buf.h |  8 +++-----
 2 files changed, 11 insertions(+), 54 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e380398..c0de0e2 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -96,7 +96,7 @@ xfs_buf_lru_add(
 		atomic_inc(&bp->b_hold);
 		list_add_tail(&bp->b_lru, &btp->bt_lru);
 		btp->bt_lru_nr++;
-		bp->b_state &= ~XFS_BSTATE_DISPOSE;
+		bp->b_lru_flags &= ~_XBF_LRU_DISPOSE;
 	}
 	spin_unlock(&btp->bt_lru_lock);
 }
@@ -198,21 +198,19 @@ xfs_buf_stale(
 	 */
 	xfs_buf_ioacct_dec(bp);
 
-	spin_lock(&bp->b_lock);
-	atomic_set(&bp->b_lru_ref, 0);
+	atomic_set(&(bp)->b_lru_ref, 0);
 	if (!list_empty(&bp->b_lru)) {
 		struct xfs_buftarg *btp = bp->b_target;
 
 		spin_lock(&btp->bt_lru_lock);
 		if (!list_empty(&bp->b_lru) &&
-		    !(bp->b_state & XFS_BSTATE_DISPOSE)) {
+		    !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) {
 			list_del_init(&bp->b_lru);
 			btp->bt_lru_nr--;
 			atomic_dec(&bp->b_hold);
 		}
 		spin_unlock(&btp->bt_lru_lock);
 	}
-	spin_unlock(&bp->b_lock);
 	ASSERT(atomic_read(&bp->b_hold) >= 1);
 }
 
@@ -1014,26 +1012,10 @@ xfs_buf_rele(
 	/* the last reference has been dropped ... */
 	xfs_buf_ioacct_dec(bp);
 	if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
-		/*
-		 * If the buffer is added to the LRU take a new
-		 * reference to the buffer for the LRU and clear the
-		 * (now stale) dispose list state flag
-		 */
 		xfs_buf_lru_add(bp);
 		spin_unlock(&pag->pag_buf_lock);
 	} else {
-		/*
-		 * most of the time buffers will already be removed from
-		 * the LRU, so optimise that case by checking for the
-		 * XFS_BSTATE_DISPOSE flag indicating the last list the
-		 * buffer was on was the disposal list
-		 */
-		if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
-			xfs_buf_lru_del(bp);
-		} else {
-			ASSERT(list_empty(&bp->b_lru));
-		}
-
+		xfs_buf_lru_del(bp);
 		ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
 		rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
 		spin_unlock(&pag->pag_buf_lock);
@@ -1620,7 +1602,6 @@ xfs_wait_buftarg(
 	struct xfs_buftarg	*btp)
 {
 	struct xfs_buf		*bp;
-	LIST_HEAD(dispose);
 
 	/*
 	 * First wait on the buftarg I/O count for all in-flight buffers to be
@@ -1643,22 +1624,18 @@ restart:
 	while (!list_empty(&btp->bt_lru)) {
 		bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
 		if (atomic_read(&bp->b_hold) > 1) {
-			/* need to wait, so skip it this pass */
 			trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
-skip:
 			list_move_tail(&bp->b_lru, &btp->bt_lru);
 			spin_unlock(&btp->bt_lru_lock);
 			delay(100);
 			goto restart;
 		}
-		if (!spin_trylock(&bp->b_lock))
-			goto skip;
-
 		/*
 		 * clear the LRU reference count so the buffer doesn't get
 		 * ignored in xfs_buf_rele().
 		 */
 		atomic_set(&bp->b_lru_ref, 0);
+		spin_unlock(&btp->bt_lru_lock);
 		if (bp->b_flags & XBF_WRITE_FAIL) {
 			xfs_alert(btp->bt_mount,
 "Corruption Alert: Buffer at block 0x%llx had permanent write failures!",
@@ -1666,17 +1643,10 @@ skip:
 			xfs_alert(btp->bt_mount,
 "Please run xfs_repair to determine the extent of the problem.");
 		}
-		bp->b_state |= XFS_BSTATE_DISPOSE;
-		list_move_tail(&bp->b_lru, &dispose);
-		spin_unlock(&bp->b_lock);
-	}
-	spin_unlock(&btp->bt_lru_lock);
-
-	while (!list_empty(&dispose)) {
-		bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
-		list_del_init(&bp->b_lru);
 		xfs_buf_rele(bp);
+		spin_lock(&btp->bt_lru_lock);
 	}
+	spin_unlock(&btp->bt_lru_lock);
 }
 
 int
@@ -1701,21 +1671,11 @@ xfs_buftarg_shrink(
 		bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
 
 		/*
-		 * we are inverting the lru lock/bp->b_lock here, so use a trylock.
-		 * If we fail to get the lock, just skip it.
-		 */
-		if (!spin_trylock(&bp->b_lock)) {
-			list_move_tail(&bp->b_lru, &btp->bt_lru);
-			continue;
-		}
-
-		/*
 		 * Decrement the b_lru_ref count unless the value is already
 		 * zero. If the value is already zero, we need to reclaim the
 		 * buffer, otherwise it gets another trip through the LRU.
 		 */
 		if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
-			spin_unlock(&bp->b_lock);
 			list_move_tail(&bp->b_lru, &btp->bt_lru);
 			continue;
 		}
@@ -1726,8 +1686,7 @@ xfs_buftarg_shrink(
 		 */
 		list_move(&bp->b_lru, &dispose);
 		btp->bt_lru_nr--;
-		bp->b_state |= XFS_BSTATE_DISPOSE;
-		spin_unlock(&bp->b_lock);
+		bp->b_lru_flags |= _XBF_LRU_DISPOSE;
 	}
 	spin_unlock(&btp->bt_lru_lock);
 
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 648b20e..180bf33 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -62,6 +62,7 @@ typedef enum {
 #define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
 #define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
 #define _XBF_COMPOUND	 (1 << 23)/* compound buffer */
+#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */
 #define _XBF_IN_FLIGHT	 (1 << 26) /* I/O in flight, for accounting purposes */
 
 typedef unsigned int xfs_buf_flags_t;
@@ -83,12 +84,9 @@ typedef unsigned int xfs_buf_flags_t;
 	{ _XBF_KMEM,		"KMEM" }, \
 	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
 	{ _XBF_COMPOUND,	"COMPOUND" }, \
+	{ _XBF_LRU_DISPOSE,	"LRU_DISPOSE" }, \
 	{ _XBF_IN_FLIGHT,	"IN_FLIGHT" }
 
-/*
- * Internal state flags.
- */
-#define XFS_BSTATE_DISPOSE	(1 << 0)	/* buffer being discarded */
 
 /*
  * The xfs_buftarg contains 2 notions of "sector size" -
@@ -163,8 +161,8 @@ typedef struct xfs_buf {
 	 * bt_lru_lock and not by b_sema
 	 */
 	struct list_head	b_lru;		/* lru list */
+	xfs_buf_flags_t		b_lru_flags;	/* internal lru status flags */
 	spinlock_t		b_lock;		/* internal state lock */
-	unsigned int		b_state;	/* internal state flags */
 	int			b_io_error;	/* internal IO error state */
 	wait_queue_head_t	b_waiters;	/* unpin waiters */
 	struct list_head	b_list;