[Devel] [PATCH RHEL7 COMMIT] ms/dcache: dentry_kill(): don't try to remove from shrink list

Konstantin Khorenko khorenko at virtuozzo.com
Thu Mar 24 08:14:58 PDT 2016


The commit is pushed to "branch-rh7-3.10.0-327.10.1.vz7.12.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.10.1.vz7.12.3
------>
commit e33cae748d1afc09d565ef14d9514e019b5bead1
Author: Vladimir Davydov <vdavydov at virtuozzo.com>
Date:   Thu Mar 24 19:14:57 2016 +0400

    ms/dcache: dentry_kill(): don't try to remove from shrink list
    
    Backport mainstream commit:
    
      commit 41edf278fc2f042f4e22a12ed87d19c5201210e1
      Author: Al Viro <viro at zeniv.linux.org.uk>
      Date:   Thu May 1 10:30:00 2014 -0400
    
          dentry_kill(): don't try to remove from shrink list
    
          If the victim in on the shrink list, don't remove it from there.
          If shrink_dentry_list() manages to remove it from the list before
          we are done - fine, we'll just free it as usual.  If not - mark
          it with new flag (DCACHE_MAY_FREE) and leave it there.
    
          Eventually, shrink_dentry_list() will get to it, remove the sucker
          from shrink list and call dentry_kill(dentry, 0).  Which is where
          we'll deal with freeing.
    
          Since now dentry_kill(dentry, 0) may happen after or during
          dentry_kill(dentry, 1), we need to recognize that (by seeing
          DCACHE_DENTRY_KILLED already set), unlock everything
          and either free the sucker (in case DCACHE_MAY_FREE has been
          set) or leave it for ongoing dentry_kill(dentry, 1) to deal with.
    
          Signed-off-by: Al Viro <viro at zeniv.linux.org.uk>
    
    Dcache shrinker temporarily adds dentries to a shrink list before
    dropping them. This list is allocated on stack and not protected by any
    locks, therefore only the caller (i.e. the process scanning dcache) can
    safely remove entries from it. Currently, however, dentries can be
    removed from a shrink list by dentry_kill() and select_collect(). If
    this races with a shrinker invocation, we'll get list corruption:
    
      WARNING: at lib/list_debug.c:59 __list_del_entry+0xa1/0xd0()
      list_del corruption. prev->next should be ffff8801e1621980, but was ffff8801de3155c0
      CPU: 5 PID: 58031 Comm: trinity-main ve: 101 Not tainted 3.10.0-327.3.1.vz7.10.11 #1 10.11
       000000000000003b 00000000d7f09cf1 ffff8800a3ca3b80 ffffffff816306f3
       ffff8800a3ca3bb8 ffffffff8107b4c0 ffff8801e1621900 ffff8801e1621980
       ffff880207c172b8 ffff8801e27069c0 0000000000000000 ffff8800a3ca3c20
      Call Trace:
       [<ffffffff816306f3>] dump_stack+0x19/0x1b
       [<ffffffff8107b4c0>] warn_slowpath_common+0x70/0xb0
       [<ffffffff8107b55c>] warn_slowpath_fmt+0x5c/0x80
       [<ffffffff8130a0f1>] __list_del_entry+0xa1/0xd0
       [<ffffffff8121083c>] d_shrink_del+0x2c/0x80
       [<ffffffff81210fe5>] dentry_lru_del+0x25/0x30
       [<ffffffff812116f1>] dput+0x161/0x280
       [<ffffffff812057f5>] lookup_fast+0x275/0x2e0
       [<ffffffff8120833c>] path_lookupat+0x16c/0x7a0
       [<ffffffff81214dc7>] ? inode_init_always+0x107/0x1e0
       [<ffffffff811db380>] ? kmem_cache_alloc+0xf0/0x220
       [<ffffffff8120ab5f>] ? getname_flags+0x4f/0x1a0
       [<ffffffff8120899b>] filename_lookup+0x2b/0xc0
       [<ffffffff8120bc87>] user_path_at_empty+0x67/0xc0
       [<ffffffff81112962>] ? from_kgid_munged+0x12/0x20
       [<ffffffff811ff9e9>] ? cp_new_stat+0x149/0x180
       [<ffffffff8120bcf1>] user_path_at+0x11/0x20
       [<ffffffff811ff4f3>] vfs_fstatat+0x63/0xc0
       [<ffffffff811ffb04>] SYSC_newfstatat+0x24/0x60
       [<ffffffff8111c724>] ? __audit_syscall_entry+0xb4/0x110
       [<ffffffff81022923>] ? syscall_trace_enter+0x173/0x220
       [<ffffffff81640ff3>] ? tracesys+0x7e/0xe2
       [<ffffffff811ffd4e>] SyS_newfstatat+0xe/0x10
       [<ffffffff81641052>] tracesys+0xdd/0xe2
    
    https://jira.sw.ru/browse/PSBM-44210
    
    Signed-off-by: Vladimir Davydov <vdavydov at virtuozzo.com>
    Acked-by: Dmitry Monakhov <dmonakhov at virtuozzo.com>
---
 fs/dcache.c            | 41 ++++++++++++++++++++++++++++++++++-------
 include/linux/dcache.h |  2 ++
 2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index d026a22..30d3d70 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -252,22 +252,38 @@ static void __d_free(struct rcu_head *head)
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
+static void dentry_free(struct dentry *dentry)
+{
+	struct rcu_head *p = (struct rcu_head *)&dentry->d_alias;
+
+	/* if dentry was never visible to RCU, immediate free is OK */
+	if (!(dentry->d_flags & DCACHE_RCUACCESS))
+		__d_free(p);
+	else
+		call_rcu(p, __d_free);
+}
+
 /*
  * no locks, please.
  */
 static void d_free(struct dentry *dentry)
 {
-	struct rcu_head *p = (struct rcu_head *)&dentry->d_alias;
+	bool can_free = true;
+
 	BUG_ON((int)dentry->d_lockref.count > 0);
 	this_cpu_dec(nr_dentry);
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
 
-	/* if dentry was never visible to RCU, immediate free is OK */
-	if (!(dentry->d_flags & DCACHE_RCUACCESS))
-		__d_free(p);
-	else
-		call_rcu(p, __d_free);
+	spin_lock(&dentry->d_lock);
+	if (dentry->d_flags & DCACHE_SHRINK_LIST) {
+		dentry->d_flags |= DCACHE_MAY_FREE;
+		can_free = false;
+	}
+	spin_unlock(&dentry->d_lock);
+
+	if (likely(can_free))
+		dentry_free(dentry);
 }
 
 /**
@@ -527,6 +543,14 @@ dentry_kill(struct dentry *dentry, int unlock_on_failure)
 	struct inode *inode;
 	struct dentry *parent;
 
+	if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
+		bool can_free = dentry->d_flags & DCACHE_MAY_FREE;
+		spin_unlock(&dentry->d_lock);
+		if (likely(can_free))
+			dentry_free(dentry);
+		return NULL;
+	}
+
 	inode = dentry->d_inode;
 	if (inode && !spin_trylock(&inode->i_lock)) {
 relock:
@@ -558,7 +582,10 @@ relock:
 	if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry))
 		dentry->d_op->d_prune(dentry);
 
-	dentry_lru_del(dentry);
+	if (dentry->d_flags & DCACHE_LRU_LIST) {
+		if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
+			d_lru_del(dentry);
+	}
 	/* if it was on the hash then remove it */
 	__d_drop(dentry);
 	return d_kill(dentry, parent);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 4a44f37..51dffd2 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -220,6 +220,8 @@ struct dentry_operations {
 #define DCACHE_SYMLINK_TYPE		0x03000000 /* Symlink */
 #define DCACHE_FILE_TYPE		0x04000000 /* Other file type */
 
+#define DCACHE_MAY_FREE			0x00800000
+
 extern seqlock_t rename_lock;
 
 static inline int dname_external(struct dentry *dentry)


More information about the Devel mailing list