[Devel] [PATCH RHEL7 COMMIT] rh/ksm: Revert rh patch "introduce ksm_max_page_sharing per page deduplication limit"

Konstantin Khorenko khorenko at virtuozzo.com
Fri Mar 17 07:55:17 PDT 2017


The commit is pushed to "branch-rh7-3.10.0-514.10.2.vz7.29.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-514.10.2.vz7.29.4
------>
commit fa51d4db2257065ef11e25a527643f9ec6f1f0e2
Author: Andrey Ryabinin <aryabinin at virtuozzo.com>
Date:   Fri Mar 17 18:55:16 2017 +0400

    rh/ksm: Revert rh patch "introduce ksm_max_page_sharing per page deduplication limit"
    
    It's broken and proper fix appeared not so easy, so revert it for now.
    So now ksm code returned back to RHEL7.2 state.
    
    This patch was proposed by RedHat to mainstream in 2015 but was not accepted:
    http://marc.info/?t=144718119600007&r=1&w=2
    
    https://jira.sw.ru/browse/PSBM-60359
    
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
---
 mm/ksm.c | 732 ++++++---------------------------------------------------------
 1 file changed, 66 insertions(+), 666 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 5311d3c..ecdd302 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -126,12 +126,9 @@ struct ksm_scan {
  * struct stable_node - node of the stable rbtree
  * @node: rb node of this ksm page in the stable tree
  * @head: (overlaying parent) &migrate_nodes indicates temporarily on that list
- * @hlist_dup: linked into the stable_node->hlist with a stable_node chain
  * @list: linked into migrate_nodes, pending placement in the proper node tree
  * @hlist: hlist head of rmap_items using this ksm page
  * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid)
- * @chain_prune_time: time of the last full garbage collection
- * @rmap_hlist_len: number of rmap_item entries in hlist or STABLE_NODE_CHAIN
  * @nid: NUMA node id of stable tree in which linked (may not match kpfn)
  */
 struct stable_node {
@@ -139,24 +136,11 @@ struct stable_node {
 		struct rb_node node;	/* when node of stable tree */
 		struct {		/* when listed for migration */
 			struct list_head *head;
-			struct {
-				struct hlist_node hlist_dup;
-				struct list_head list;
-			};
+			struct list_head list;
 		};
 	};
 	struct hlist_head hlist;
-	union {
-		unsigned long kpfn;
-		unsigned long chain_prune_time;
-	};
-	/*
-	 * STABLE_NODE_CHAIN can be any negative number in
-	 * rmap_hlist_len negative range, but better not -1 to be able
-	 * to reliably detect underflows.
-	 */
-#define STABLE_NODE_CHAIN -1024
-	int rmap_hlist_len;
+	unsigned long kpfn;
 #ifdef CONFIG_NUMA
 	int nid;
 #endif
@@ -206,7 +190,6 @@ static struct rb_root *root_unstable_tree = one_unstable_tree;
 
 /* Recently migrated nodes of stable tree, pending proper placement */
 static LIST_HEAD(migrate_nodes);
-#define STABLE_NODE_DUP_HEAD ((struct list_head *)&migrate_nodes.prev)
 
 #define MM_SLOTS_HASH_BITS 10
 static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
@@ -234,18 +217,6 @@ static unsigned long ksm_pages_unshared;
 /* The number of rmap_items in use: to calculate pages_volatile */
 static unsigned long ksm_rmap_items;
 
-/* The number of stable_node chains */
-static unsigned long ksm_stable_node_chains;
-
-/* The number of stable_node dups linked to the stable_node chains */
-static unsigned long ksm_stable_node_dups;
-
-/* Delay in pruning stale stable_node_dups in the stable_node_chains */
-static int ksm_stable_node_chains_prune_millisecs = 2000;
-
-/* Maximum number of page slots sharing a stable node */
-static int ksm_max_page_sharing = 256;
-
 /* Number of pages ksmd should scan in one batch */
 static unsigned int ksm_thread_pages_to_scan = 100;
 
@@ -308,44 +279,6 @@ static void __init ksm_slab_free(void)
 	mm_slot_cache = NULL;
 }
 
-static __always_inline bool is_stable_node_chain(struct stable_node *chain)
-{
-	return chain->rmap_hlist_len == STABLE_NODE_CHAIN;
-}
-
-static __always_inline bool is_stable_node_dup(struct stable_node *dup)
-{
-	return dup->head == STABLE_NODE_DUP_HEAD;
-}
-
-static inline void stable_node_chain_add_dup(struct stable_node *dup,
-					     struct stable_node *chain)
-{
-	VM_BUG_ON(is_stable_node_dup(dup));
-	dup->head = STABLE_NODE_DUP_HEAD;
-	VM_BUG_ON(!is_stable_node_chain(chain));
-	hlist_add_head(&dup->hlist_dup, &chain->hlist);
-	ksm_stable_node_dups++;
-}
-
-static inline void __stable_node_dup_del(struct stable_node *dup)
-{
-	hlist_del(&dup->hlist_dup);
-	ksm_stable_node_dups--;
-}
-
-static inline void stable_node_dup_del(struct stable_node *dup)
-{
-	VM_BUG_ON(is_stable_node_chain(dup));
-	if (is_stable_node_dup(dup))
-		__stable_node_dup_del(dup);
-	else
-		rb_erase(&dup->node, root_stable_tree + NUMA(dup->nid));
-#ifdef CONFIG_DEBUG_VM
-	dup->head = NULL;
-#endif
-}
-
 static inline struct rmap_item *alloc_rmap_item(void)
 {
 	struct rmap_item *rmap_item;
@@ -370,8 +303,6 @@ static inline struct stable_node *alloc_stable_node(void)
 
 static inline void free_stable_node(struct stable_node *stable_node)
 {
-	VM_BUG_ON(stable_node->rmap_hlist_len &&
-		  !is_stable_node_chain(stable_node));
 	kmem_cache_free(stable_node_cache, stable_node);
 }
 
@@ -562,80 +493,25 @@ static inline int get_kpfn_nid(unsigned long kpfn)
 	return ksm_merge_across_nodes ? 0 : NUMA(pfn_to_nid(kpfn));
 }
 
-static struct stable_node *alloc_stable_node_chain(struct stable_node *dup,
-						   struct rb_root *root)
-{
-	struct stable_node *chain = alloc_stable_node();
-	VM_BUG_ON(is_stable_node_chain(dup));
-	if (likely(chain)) {
-		INIT_HLIST_HEAD(&chain->hlist);
-		chain->chain_prune_time = jiffies;
-		chain->rmap_hlist_len = STABLE_NODE_CHAIN;
-#if defined(CONFIG_DEBUG_VM) && defined(CONFIG_NUMA)
-		chain->nid = -1; /* debug */
-#endif
-		ksm_stable_node_chains++;
-
-		/*
-		 * Put the stable node chain in the first dimension of
-		 * the stable tree and at the same time remove the old
-		 * stable node.
-		 */
-		rb_replace_node(&dup->node, &chain->node, root);
-
-		/*
-		 * Move the old stable node to the second dimension
-		 * queued in the hlist_dup. The invariant is that all
-		 * dup stable_nodes in the chain->hlist point to pages
-		 * that are wrprotected and have the exact same
-		 * content.
-		 */
-		stable_node_chain_add_dup(dup, chain);
-	}
-	return chain;
-}
-
-static inline void free_stable_node_chain(struct stable_node *chain,
-					  struct rb_root *root)
-{
-	rb_erase(&chain->node, root);
-	free_stable_node(chain);
-	ksm_stable_node_chains--;
-}
-
 static void remove_node_from_stable_tree(struct stable_node *stable_node)
 {
 	struct rmap_item *rmap_item;
 
-	/* check it's not STABLE_NODE_CHAIN or negative */
-	BUG_ON(stable_node->rmap_hlist_len < 0);
-
 	hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
 		if (rmap_item->hlist.next)
 			ksm_pages_sharing--;
 		else
 			ksm_pages_shared--;
-		VM_BUG_ON(stable_node->rmap_hlist_len <= 0);
-		stable_node->rmap_hlist_len--;
 		put_anon_vma(rmap_item->anon_vma);
 		rmap_item->address &= PAGE_MASK;
 		cond_resched();
 	}
 
-	/*
-	 * We need the second aligned pointer of the migrate_nodes
-	 * list_head to stay clear from the rb_parent_color union
-	 * (aligned and different than any node) and also different
-	 * from &migrate_nodes. This will verify that future list.h changes
-	 * don't break STABLE_NODE_DUP_HEAD.
-	 */
-	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD <= &migrate_nodes);
-	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD >= &migrate_nodes + 1);
-
 	if (stable_node->head == &migrate_nodes)
 		list_del(&stable_node->list);
 	else
-		stable_node_dup_del(stable_node);
+		rb_erase(&stable_node->node,
+			 root_stable_tree + NUMA(stable_node->nid));
 	free_stable_node(stable_node);
 }
 
@@ -754,8 +630,6 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 			ksm_pages_sharing--;
 		else
 			ksm_pages_shared--;
-		VM_BUG_ON(stable_node->rmap_hlist_len <= 0);
-		stable_node->rmap_hlist_len--;
 
 		put_anon_vma(rmap_item->anon_vma);
 		rmap_item->address &= PAGE_MASK;
@@ -864,32 +738,6 @@ static int remove_stable_node(struct stable_node *stable_node)
 	return err;
 }
 
-static int remove_stable_node_chain(struct stable_node *stable_node,
-				    struct rb_root *root)
-{
-	struct stable_node *dup;
-	struct hlist_node *hlist_safe;
-
-	if (!is_stable_node_chain(stable_node)) {
-		VM_BUG_ON(is_stable_node_dup(stable_node));
-		if (remove_stable_node(stable_node))
-			return true;
-		else
-			return false;
-	}
-
-	hlist_for_each_entry_safe(dup, hlist_safe,
-				  &stable_node->hlist, hlist_dup) {
-		VM_BUG_ON(!is_stable_node_dup(dup));
-		if (remove_stable_node(dup))
-			return true;
-		cond_resched();
-	}
-	BUG_ON(!hlist_empty(&stable_node->hlist));
-	free_stable_node_chain(stable_node, root);
-	return false;
-}
-
 static int remove_all_stable_nodes(void)
 {
 	struct stable_node *stable_node;
@@ -901,8 +749,7 @@ static int remove_all_stable_nodes(void)
 		while (root_stable_tree[nid].rb_node) {
 			stable_node = rb_entry(root_stable_tree[nid].rb_node,
 						struct stable_node, node);
-			if (remove_stable_node_chain(stable_node,
-						     root_stable_tree + nid)) {
+			if (remove_stable_node(stable_node)) {
 				err = -EBUSY;
 				break;	/* proceed to next nid */
 			}
@@ -1298,163 +1145,6 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
 	return err ? NULL : page;
 }
 
-static __always_inline
-bool __is_page_sharing_candidate(struct stable_node *stable_node, int offset)
-{
-	VM_BUG_ON(stable_node->rmap_hlist_len < 0);
-	/*
-	 * Check that at least one mapping still exists, otherwise
-	 * there's no much point to merge and share with this
-	 * stable_node, as the underlying tree_page of the other
-	 * sharer is going to be freed soon.
-	 */
-	return stable_node->rmap_hlist_len &&
-		stable_node->rmap_hlist_len + offset < ksm_max_page_sharing;
-}
-
-static __always_inline
-bool is_page_sharing_candidate(struct stable_node *stable_node)
-{
-	return __is_page_sharing_candidate(stable_node, 0);
-}
-
-static struct stable_node *stable_node_dup(struct stable_node *stable_node,
-					   struct page **tree_page,
-					   struct rb_root *root,
-					   bool prune_stale_stable_nodes)
-{
-	struct stable_node *dup, *found = NULL;
-	struct hlist_node *hlist_safe;
-	struct page *_tree_page;
-	int nr = 0;
-	int found_rmap_hlist_len;
-
-	if (!prune_stale_stable_nodes ||
-	    time_before(jiffies, stable_node->chain_prune_time +
-			msecs_to_jiffies(
-				ksm_stable_node_chains_prune_millisecs)))
-		prune_stale_stable_nodes = false;
-	else
-		stable_node->chain_prune_time = jiffies;
-
-	hlist_for_each_entry_safe(dup, hlist_safe,
-				  &stable_node->hlist, hlist_dup) {
-		cond_resched();
-		/*
-		 * We must walk all stable_node_dup to prune the stale
-		 * stable nodes during lookup.
-		 *
-		 * get_ksm_page can drop the nodes from the
-		 * stable_node->hlist if they point to freed pages
-		 * (that's why we do a _safe walk). The "dup"
-		 * stable_node parameter itself will be freed from
-		 * under us if it returns NULL.
-		 */
-		_tree_page = get_ksm_page(dup, false);
-		if (!_tree_page)
-			continue;
-		nr += 1;
-		if (is_page_sharing_candidate(dup)) {
-			if (!found ||
-			    dup->rmap_hlist_len > found_rmap_hlist_len) {
-				if (found)
-					put_page(*tree_page);
-				found = dup;
-				found_rmap_hlist_len = found->rmap_hlist_len;
-				*tree_page = _tree_page;
-
-				if (!prune_stale_stable_nodes)
-					break;
-				/* skip put_page */
-				continue;
-			}
-		}
-		put_page(_tree_page);
-	}
-
-	/*
-	 * nr is relevant only if prune_stale_stable_nodes is true,
-	 * otherwise we may break the loop at nr == 1 even if there
-	 * are multiple entries.
-	 */
-	if (prune_stale_stable_nodes && found) {
-		if (nr == 1) {
-			/*
-			 * If there's not just one entry it would
-			 * corrupt memory, better BUG_ON. In KSM
-			 * context with no lock held it's not even
-			 * fatal.
-			 */
-			BUG_ON(stable_node->hlist.first->next);
-
-			/*
-			 * There's just one entry and it is below the
-			 * deduplication limit so drop the chain.
-			 */
-			rb_replace_node(&stable_node->node, &found->node,
-					root);
-			free_stable_node(stable_node);
-			ksm_stable_node_chains--;
-			ksm_stable_node_dups--;
-		} else if (__is_page_sharing_candidate(found, 1)) {
-			/*
-			 * Refile our candidate at the head
-			 * after the prune if our candidate
-			 * can accept one more future sharing
-			 * in addition to the one underway.
-			 */
-			hlist_del(&found->hlist_dup);
-			hlist_add_head(&found->hlist_dup,
-				       &stable_node->hlist);
-		}
-	}
-
-	return found;
-}
-
-static struct stable_node *stable_node_dup_any(struct stable_node *stable_node,
-					       struct rb_root *root)
-{
-	if (!is_stable_node_chain(stable_node))
-		return stable_node;
-	if (hlist_empty(&stable_node->hlist)) {
-		free_stable_node_chain(stable_node, root);
-		return NULL;
-	}
-	return hlist_entry(stable_node->hlist.first,
-			   typeof(*stable_node), hlist_dup);
-}
-
-static struct stable_node *__stable_node_chain(struct stable_node *stable_node,
-					       struct page **tree_page,
-					       struct rb_root *root,
-					       bool prune_stale_stable_nodes)
-{
-	if (!is_stable_node_chain(stable_node)) {
-		if (is_page_sharing_candidate(stable_node)) {
-			*tree_page = get_ksm_page(stable_node, false);
-			return stable_node;
-		}
-		return NULL;
-	}
-	return stable_node_dup(stable_node, tree_page, root,
-			       prune_stale_stable_nodes);
-}
-
-static __always_inline struct stable_node *chain_prune(struct stable_node *s_n,
-						       struct page **t_p,
-						       struct rb_root *root)
-{
-	return __stable_node_chain(s_n, t_p, root, true);
-}
-
-static __always_inline struct stable_node *chain(struct stable_node *s_n,
-						 struct page **t_p,
-						 struct rb_root *root)
-{
-	return __stable_node_chain(s_n, t_p, root, false);
-}
-
 /*
  * stable_tree_search - search for page inside the stable tree
  *
@@ -1470,7 +1160,7 @@ static struct page *stable_tree_search(struct page *page)
 	struct rb_root *root;
 	struct rb_node **new;
 	struct rb_node *parent;
-	struct stable_node *stable_node, *stable_node_dup, *stable_node_any;
+	struct stable_node *stable_node;
 	struct stable_node *page_node;
 
 	page_node = page_stable_node(page);
@@ -1492,32 +1182,7 @@ again:
 
 		cond_resched();
 		stable_node = rb_entry(*new, struct stable_node, node);
-		stable_node_any = NULL;
-		stable_node_dup = chain_prune(stable_node, &tree_page, root);
-		if (!stable_node_dup) {
-			/*
-			 * Either all stable_node dups were full in
-			 * this stable_node chain, or this chain was
-			 * empty and should be rb_erased.
-			 */
-			stable_node_any = stable_node_dup_any(stable_node,
-							      root);
-			if (!stable_node_any) {
-				/* rb_erase just run */
-				goto again;
-			}
-			/*
-			 * Take any of the stable_node dups page of
-			 * this stable_node chain to let the tree walk
-			 * continue. All KSM pages belonging to the
-			 * stable_node dups in a stable_node chain
-			 * have the same content and they're
-			 * wrprotected at all times. Any will work
-			 * fine to continue the walk.
-			 */
-			tree_page = get_ksm_page(stable_node_any, false);
-		}
-		VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
+		tree_page = get_ksm_page(stable_node, false);
 		if (!tree_page) {
 			/*
 			 * If we walked over a stale stable_node,
@@ -1540,34 +1205,6 @@ again:
 		else if (ret > 0)
 			new = &parent->rb_right;
 		else {
-			if (page_node) {
-				VM_BUG_ON(page_node->head != &migrate_nodes);
-				/*
-				 * Test if the migrated page should be merged
-				 * into a stable node dup. If the mapcount is
-				 * 1 we can migrate it with another KSM page
-				 * without adding it to the chain.
-				 */
-				if (page_mapcount(page) > 1)
-					goto chain_append;
-			}
-
-			if (!stable_node_dup) {
-				/*
-				 * If the stable_node is a chain and
-				 * we got a payload match in memcmp
-				 * but we cannot merge the scanned
-				 * page in any of the existing
-				 * stable_node dups because they're
-				 * all full, we need to wait the
-				 * scanned page to find itself a match
-				 * in the unstable tree to create a
-				 * brand new KSM page to add later to
-				 * the dups of this stable_node.
-				 */
-				return NULL;
-			}
-
 			/*
 			 * Lock and unlock the stable_node's page (which
 			 * might already have been migrated) so that page
@@ -1575,21 +1212,23 @@ again:
 			 * It would be more elegant to return stable_node
 			 * than kpage, but that involves more changes.
 			 */
-			tree_page = get_ksm_page(stable_node_dup, true);
-			if (unlikely(!tree_page))
-				/*
-				 * The tree may have been rebalanced,
-				 * so re-evaluate parent and new.
-				 */
-				goto again;
-			unlock_page(tree_page);
-
-			if (get_kpfn_nid(stable_node_dup->kpfn) !=
-			    NUMA(stable_node_dup->nid)) {
-				put_page(tree_page);
-				goto replace;
+			tree_page = get_ksm_page(stable_node, true);
+			if (tree_page) {
+				unlock_page(tree_page);
+				if (get_kpfn_nid(stable_node->kpfn) !=
+						NUMA(stable_node->nid)) {
+					put_page(tree_page);
+					goto replace;
+				}
+				return tree_page;
 			}
-			return tree_page;
+			/*
+			 * There is now a place for page_node, but the tree may
+			 * have been rebalanced, so re-evaluate parent and new.
+			 */
+			if (page_node)
+				goto again;
+			return NULL;
 		}
 	}
 
@@ -1600,72 +1239,22 @@ again:
 	DO_NUMA(page_node->nid = nid);
 	rb_link_node(&page_node->node, parent, new);
 	rb_insert_color(&page_node->node, root);
-out:
-	if (is_page_sharing_candidate(page_node)) {
-		get_page(page);
-		return page;
-	} else
-		return NULL;
+	get_page(page);
+	return page;
 
 replace:
-	if (stable_node_dup == stable_node) {
-		/* there is no chain */
-		if (page_node) {
-			VM_BUG_ON(page_node->head != &migrate_nodes);
-			list_del(&page_node->list);
-			DO_NUMA(page_node->nid = nid);
-			rb_replace_node(&stable_node->node, &page_node->node,
-					root);
-			if (is_page_sharing_candidate(page_node))
-				get_page(page);
-			else
-				page = NULL;
-		} else {
-			rb_erase(&stable_node->node, root);
-			page = NULL;
-		}
+	if (page_node) {
+		list_del(&page_node->list);
+		DO_NUMA(page_node->nid = nid);
+		rb_replace_node(&stable_node->node, &page_node->node, root);
+		get_page(page);
 	} else {
-		VM_BUG_ON(!is_stable_node_chain(stable_node));
-		__stable_node_dup_del(stable_node_dup);
-		if (page_node) {
-			VM_BUG_ON(page_node->head != &migrate_nodes);
-			list_del(&page_node->list);
-			DO_NUMA(page_node->nid = nid);
-			stable_node_chain_add_dup(page_node, stable_node);
-			if (is_page_sharing_candidate(page_node))
-				get_page(page);
-			else
-				page = NULL;
-		} else {
-			page = NULL;
-		}
+		rb_erase(&stable_node->node, root);
+		page = NULL;
 	}
-	stable_node_dup->head = &migrate_nodes;
-	list_add(&stable_node_dup->list, stable_node_dup->head);
+	stable_node->head = &migrate_nodes;
+	list_add(&stable_node->list, stable_node->head);
 	return page;
-
-chain_append:
-	/* stable_node_dup could be null if it reached the limit */
-	if (!stable_node_dup)
-		stable_node_dup = stable_node_any;
-	if (stable_node_dup == stable_node) {
-		/* chain is missing so create it */
-		stable_node = alloc_stable_node_chain(stable_node_dup,
-						      root);
-		if (!stable_node)
-			return NULL;
-	}
-	/*
-	 * Add this stable_node dup that was
-	 * migrated to the stable_node chain
-	 * of the current nid for this page
-	 * content.
-	 */
-	VM_BUG_ON(page_node->head != &migrate_nodes);
-	list_del(&page_node->list);
-	DO_NUMA(page_node->nid = nid);
-	stable_node_chain_add_dup(page_node, stable_node);
-	goto out;
 }
 
 /*
@@ -1682,8 +1271,7 @@ static struct stable_node *stable_tree_insert(struct page *kpage)
 	struct rb_root *root;
 	struct rb_node **new;
 	struct rb_node *parent;
-	struct stable_node *stable_node, *stable_node_dup, *stable_node_any;
-	bool need_chain = false;
+	struct stable_node *stable_node;
 
 	kpfn = page_to_pfn(kpage);
 	nid = get_kpfn_nid(kpfn);
@@ -1698,32 +1286,7 @@ again:
 
 		cond_resched();
 		stable_node = rb_entry(*new, struct stable_node, node);
-		stable_node_any = NULL;
-		stable_node_dup = chain(stable_node, &tree_page, root);
-		if (!stable_node_dup) {
-			/*
-			 * Either all stable_node dups were full in
-			 * this stable_node chain, or this chain was
-			 * empty and should be rb_erased.
-			 */
-			stable_node_any = stable_node_dup_any(stable_node,
-							      root);
-			if (!stable_node_any) {
-				/* rb_erase just run */
-				goto again;
-			}
-			/*
-			 * Take any of the stable_node dups page of
-			 * this stable_node chain to let the tree walk
-			 * continue. All KSM pages belonging to the
-			 * stable_node dups in a stable_node chain
-			 * have the same content and they're
-			 * wrprotected at all times. Any will work
-			 * fine to continue the walk.
-			 */
-			tree_page = get_ksm_page(stable_node_any, false);
-		}
-		VM_BUG_ON(!stable_node_dup ^ !!stable_node_any);
+		tree_page = get_ksm_page(stable_node, false);
 		if (!tree_page) {
 			/*
 			 * If we walked over a stale stable_node,
@@ -1746,37 +1309,27 @@ again:
 		else if (ret > 0)
 			new = &parent->rb_right;
 		else {
-			need_chain = true;
-			break;
+			/*
+			 * It is not a bug that stable_tree_search() didn't
+			 * find this node: because at that time our page was
+			 * not yet write-protected, so may have changed since.
+			 */
+			return NULL;
 		}
 	}
 
-	stable_node_dup = alloc_stable_node();
-	if (!stable_node_dup)
+	stable_node = alloc_stable_node();
+	if (!stable_node)
 		return NULL;
 
-	INIT_HLIST_HEAD(&stable_node_dup->hlist);
-	stable_node_dup->kpfn = kpfn;
-	set_page_stable_node(kpage, stable_node_dup);
-	stable_node_dup->rmap_hlist_len = 0;
-	DO_NUMA(stable_node_dup->nid = nid);
-	if (!need_chain) {
-		rb_link_node(&stable_node_dup->node, parent, new);
-		rb_insert_color(&stable_node_dup->node, root);
-	} else {
-		if (!is_stable_node_chain(stable_node)) {
-			struct stable_node *orig = stable_node;
-			/* chain is missing so create it */
-			stable_node = alloc_stable_node_chain(orig, root);
-			if (!stable_node) {
-				free_stable_node(stable_node_dup);
-				return NULL;
-			}
-		}
-		stable_node_chain_add_dup(stable_node_dup, stable_node);
-	}
+	INIT_HLIST_HEAD(&stable_node->hlist);
+	stable_node->kpfn = kpfn;
+	set_page_stable_node(kpage, stable_node);
+	DO_NUMA(stable_node->nid = nid);
+	rb_link_node(&stable_node->node, parent, new);
+	rb_insert_color(&stable_node->node, root);
 
-	return stable_node_dup;
+	return stable_node;
 }
 
 /*
@@ -1866,27 +1419,8 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
  * the same ksm page.
  */
 static void stable_tree_append(struct rmap_item *rmap_item,
-			       struct stable_node *stable_node,
-			       bool max_page_sharing_bypass)
+			       struct stable_node *stable_node)
 {
-	/*
-	 * rmap won't find this mapping if we don't insert the
-	 * rmap_item in the right stable_node
-	 * duplicate. page_migration could break later if rmap breaks,
-	 * so we can as well crash here. We really need to check for
-	 * rmap_hlist_len == STABLE_NODE_CHAIN, but we can as well check
-	 * for other negative values as an undeflow if detected here
-	 * for the first time (and not when decreasing rmap_hlist_len)
-	 * would be sign of memory corruption in the stable_node.
-	 */
-	BUG_ON(stable_node->rmap_hlist_len < 0);
-
-	stable_node->rmap_hlist_len++;
-	if (!max_page_sharing_bypass)
-		/* possibly non fatal but unexpected overflow, only warn */
-		WARN_ON_ONCE(stable_node->rmap_hlist_len >
-			     ksm_max_page_sharing);
-
 	rmap_item->head = stable_node;
 	rmap_item->address |= STABLE_FLAG;
 	hlist_add_head(&rmap_item->hlist, &stable_node->hlist);
@@ -1914,26 +1448,19 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 	struct page *kpage;
 	unsigned int checksum;
 	int err;
-	bool max_page_sharing_bypass = false;
 
 	stable_node = page_stable_node(page);
 	if (stable_node) {
 		if (stable_node->head != &migrate_nodes &&
-		    get_kpfn_nid(READ_ONCE(stable_node->kpfn)) !=
-		    NUMA(stable_node->nid)) {
-			stable_node_dup_del(stable_node);
+		    get_kpfn_nid(stable_node->kpfn) != NUMA(stable_node->nid)) {
+			rb_erase(&stable_node->node,
+				 root_stable_tree + NUMA(stable_node->nid));
 			stable_node->head = &migrate_nodes;
 			list_add(&stable_node->list, stable_node->head);
 		}
 		if (stable_node->head != &migrate_nodes &&
 		    rmap_item->head == stable_node)
 			return;
-		/*
-		 * If it's a KSM fork, allow it to go over the sharing limit
-		 * without warnings.
-		 */
-		if (!is_page_sharing_candidate(stable_node))
-			max_page_sharing_bypass = true;
 	}
 
 	/* We first start with searching the page inside the stable tree */
@@ -1953,8 +1480,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 			 * add its rmap_item to the stable tree.
 			 */
 			lock_page(kpage);
-			stable_tree_append(rmap_item, page_stable_node(kpage),
-					   max_page_sharing_bypass);
+			stable_tree_append(rmap_item, page_stable_node(kpage));
 			unlock_page(kpage);
 		}
 		put_page(kpage);
@@ -1987,10 +1513,8 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
 			lock_page(kpage);
 			stable_node = stable_tree_insert(kpage);
 			if (stable_node) {
-				stable_tree_append(tree_rmap_item, stable_node,
-						   false);
-				stable_tree_append(rmap_item, stable_node,
-						   false);
+				stable_tree_append(tree_rmap_item, stable_node);
+				stable_tree_append(rmap_item, stable_node);
 			}
 			unlock_page(kpage);
 
@@ -2607,48 +2131,6 @@ static void wait_while_offlining(void)
 	}
 }
 
-static bool stable_node_dup_remove_range(struct stable_node *stable_node,
-					 unsigned long start_pfn,
-					 unsigned long end_pfn)
-{
-	if (stable_node->kpfn >= start_pfn &&
-	    stable_node->kpfn < end_pfn) {
-		/*
-		 * Don't get_ksm_page, page has already gone:
-		 * which is why we keep kpfn instead of page*
-		 */
-		remove_node_from_stable_tree(stable_node);
-		return true;
-	}
-	return false;
-}
-
-static bool stable_node_chain_remove_range(struct stable_node *stable_node,
-					   unsigned long start_pfn,
-					   unsigned long end_pfn,
-					   struct rb_root *root)
-{
-	struct stable_node *dup;
-	struct hlist_node *hlist_safe;
-
-	if (!is_stable_node_chain(stable_node)) {
-		VM_BUG_ON(is_stable_node_dup(stable_node));
-		return stable_node_dup_remove_range(stable_node, start_pfn,
-						    end_pfn);
-	}
-
-	hlist_for_each_entry_safe(dup, hlist_safe,
-				  &stable_node->hlist, hlist_dup) {
-		VM_BUG_ON(!is_stable_node_dup(dup));
-		stable_node_dup_remove_range(dup, start_pfn, end_pfn);
-	}
-	if (hlist_empty(&stable_node->hlist)) {
-		free_stable_node_chain(stable_node, root);
-		return true; /* notify caller that tree was rebalanced */
-	} else
-		return false;
-}
-
 static void ksm_check_stable_tree(unsigned long start_pfn,
 				  unsigned long end_pfn)
 {
@@ -2661,12 +2143,15 @@ static void ksm_check_stable_tree(unsigned long start_pfn,
 		node = rb_first(root_stable_tree + nid);
 		while (node) {
 			stable_node = rb_entry(node, struct stable_node, node);
-			if (stable_node_chain_remove_range(stable_node,
-							   start_pfn, end_pfn,
-							   root_stable_tree +
-							   nid))
+			if (stable_node->kpfn >= start_pfn &&
+			    stable_node->kpfn < end_pfn) {
+				/*
+				 * Don't get_ksm_page, page has already gone:
+				 * which is why we keep kpfn instead of page*
+				 */
+				remove_node_from_stable_tree(stable_node);
 				node = rb_first(root_stable_tree + nid);
-			else
+			} else
 				node = rb_next(node);
 			cond_resched();
 		}
@@ -2891,47 +2376,6 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
 KSM_ATTR(merge_across_nodes);
 #endif
 
-static ssize_t max_page_sharing_show(struct kobject *kobj,
-				     struct kobj_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%u\n", ksm_max_page_sharing);
-}
-
-static ssize_t max_page_sharing_store(struct kobject *kobj,
-				      struct kobj_attribute *attr,
-				      const char *buf, size_t count)
-{
-	int err;
-	int knob;
-
-	err = kstrtoint(buf, 10, &knob);
-	if (err)
-		return err;
-	/*
-	 * When a KSM page is created it is shared by 2 mappings. This
-	 * being a signed comparison, it implicitly verifies it's not
-	 * negative.
-	 */
-	if (knob < 2)
-		return -EINVAL;
-
-	if (READ_ONCE(ksm_max_page_sharing) == knob)
-		return count;
-
-	mutex_lock(&ksm_thread_mutex);
-	wait_while_offlining();
-	if (ksm_max_page_sharing != knob) {
-		if (ksm_pages_shared || remove_all_stable_nodes())
-			err = -EBUSY;
-		else
-			ksm_max_page_sharing = knob;
-	}
-	mutex_unlock(&ksm_thread_mutex);
-
-	return err ? err : count;
-}
-KSM_ATTR(max_page_sharing);
-
 static ssize_t pages_shared_show(struct kobject *kobj,
 				 struct kobj_attribute *attr, char *buf)
 {
@@ -2970,46 +2414,6 @@ static ssize_t pages_volatile_show(struct kobject *kobj,
 }
 KSM_ATTR_RO(pages_volatile);
 
-static ssize_t stable_node_dups_show(struct kobject *kobj,
-				     struct kobj_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%lu\n", ksm_stable_node_dups);
-}
-KSM_ATTR_RO(stable_node_dups);
-
-static ssize_t stable_node_chains_show(struct kobject *kobj,
-				       struct kobj_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%lu\n", ksm_stable_node_chains);
-}
-KSM_ATTR_RO(stable_node_chains);
-
-static ssize_t
-stable_node_chains_prune_millisecs_show(struct kobject *kobj,
-					struct kobj_attribute *attr,
-					char *buf)
-{
-	return sprintf(buf, "%u\n", ksm_stable_node_chains_prune_millisecs);
-}
-
-static ssize_t
-stable_node_chains_prune_millisecs_store(struct kobject *kobj,
-					 struct kobj_attribute *attr,
-					 const char *buf, size_t count)
-{
-	unsigned long msecs;
-	int err;
-
-	err = kstrtoul(buf, 10, &msecs);
-	if (err || msecs > UINT_MAX)
-		return -EINVAL;
-
-	ksm_stable_node_chains_prune_millisecs = msecs;
-
-	return count;
-}
-KSM_ATTR(stable_node_chains_prune_millisecs);
-
 static ssize_t full_scans_show(struct kobject *kobj,
 			       struct kobj_attribute *attr, char *buf)
 {
@@ -3029,10 +2433,6 @@ static struct attribute *ksm_attrs[] = {
 #ifdef CONFIG_NUMA
 	&merge_across_nodes_attr.attr,
 #endif
-	&max_page_sharing_attr.attr,
-	&stable_node_chains_attr.attr,
-	&stable_node_dups_attr.attr,
-	&stable_node_chains_prune_millisecs_attr.attr,
 	NULL,
 };
 


More information about the Devel mailing list