All of lore.kernel.org
 help / color / mirror / Atom feed
* + maple-tree-add-new-data-structure-fix-8.patch added to mm-unstable branch
@ 2022-06-15 20:41 Andrew Morton
  0 siblings, 0 replies; only message in thread
From: Andrew Morton @ 2022-06-15 20:41 UTC (permalink / raw)
  To: mm-commits, quic_qiancai, Liam.Howlett, liam.howlett, akpm


The patch titled
     Subject: maple_tree: change spanning store to work on larger trees
has been added to the -mm mm-unstable branch.  Its filename is
     maple-tree-add-new-data-structure-fix-8.patch

This patch will shortly appear at
     https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/maple-tree-add-new-data-structure-fix-8.patch

This patch will later appear in the mm-unstable branch at
    git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days

------------------------------------------------------
From: Liam Howlett <liam.howlett@oracle.com>
Subject: maple_tree: change spanning store to work on larger trees
Date: Wed, 15 Jun 2022 14:19:39 +0000

Spanning store had an issue which could lead to double free during a large
tree modification.  Fix this by being more careful about how nodes are
added to the to-be-freed and to-be-destroyed list on this operation.

Link: https://lkml.kernel.org/r/20220615141921.417598-3-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reported-by: Qian Cai <quic_qiancai@quicinc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 lib/maple_tree.c |  325 ++++++++++++++++++++++++++++-----------------
 1 file changed, 206 insertions(+), 119 deletions(-)

--- a/lib/maple_tree.c~maple-tree-add-new-data-structure-fix-8
+++ a/lib/maple_tree.c
@@ -1388,7 +1388,6 @@ static inline unsigned char ma_data_end(
 /*
  * mas_data_end() - Find the end of the data (slot).
  * @mas: the maple state
- * @type: the type of maple node
  *
  * This method is optimized to check the metadata of a node if the node type
  * supports data end metadata.
@@ -2273,6 +2272,31 @@ done:
 }
 
 /*
+ * mas_topiary_range() - Add a range of slots to the topiary.
+ * @mas: The maple state
+ * @destroy: The topiary to add the slots (usually destroy)
+ * @start: The starting slot inclusively
+ * @end: The end slot inclusively
+ */
+static inline void mas_topiary_range(struct ma_state *mas,
+	struct ma_topiary *destroy, unsigned char start, unsigned char end)
+{
+	void __rcu **slots;
+	unsigned offset;
+
+	MT_BUG_ON(mas->tree, mte_is_leaf(mas->node));
+	slots = ma_slots(mas_mn(mas), mte_node_type(mas->node));
+	for (offset = start; offset <= end; offset++) {
+		struct maple_enode *enode = mas_slot_locked(mas, slots, offset);
+
+		if (mte_dead_node(enode))
+			continue;
+
+		mat_add(destroy, enode);
+	}
+}
+
+/*
  * mast_topiary() - Add the portions of the tree to the removal list; either to
  * be freed or discarded (destroy walk).
  * @mast: The maple_subtree_state.
@@ -2280,48 +2304,62 @@ done:
 static inline void mast_topiary(struct maple_subtree_state *mast)
 {
 	MA_WR_STATE(wr_mas, mast->orig_l, NULL);
-	unsigned char l_off, r_off, offset;
-	unsigned long l_index;
-	struct maple_enode *child;
-	void __rcu **slots;
+	unsigned char r_start, r_end;
+	unsigned char l_start, l_end;
+	void **l_slots, **r_slots;
 
 	wr_mas.type = mte_node_type(mast->orig_l->node);
-	/* The left node is consumed, so add to the free list. */
-	l_index = mast->orig_l->index;
 	mast->orig_l->index = mast->orig_l->last;
 	mas_wr_node_walk(&wr_mas);
-	mast->orig_l->index = l_index;
-	l_off = mast->orig_l->offset;
-	r_off = mast->orig_r->offset;
-	if (mast->orig_l->node == mast->orig_r->node) {
-		slots = ma_slots(mte_to_node(mast->orig_l->node), wr_mas.type);
-		for (offset = l_off + 1; offset < r_off; offset++)
-			mat_add(mast->destroy, mas_slot_locked(mast->orig_l,
-							slots, offset));
+	l_start = mast->orig_l->offset + 1;
+	l_end = mas_data_end(mast->orig_l);
+	r_start = 0;
+	r_end = mast->orig_r->offset;
+
+	if (r_end)
+		r_end--;
+
+	l_slots = ma_slots(mas_mn(mast->orig_l),
+			   mte_node_type(mast->orig_l->node));
+
+	r_slots = ma_slots(mas_mn(mast->orig_r),
+			   mte_node_type(mast->orig_r->node));
+
+	if ((l_start < l_end) &&
+	    mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_start))) {
+		l_start++;
+	}
 
+	if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_end))) {
+		if (r_end)
+			r_end--;
+	}
+
+	if ((l_start > r_end) && (mast->orig_l->node == mast->orig_r->node))
 		return;
+
+	/* At the node where left and right sides meet, add the parts between */
+	if (mast->orig_l->node == mast->orig_r->node) {
+		return mas_topiary_range(mast->orig_l, mast->destroy,
+					     l_start, r_end);
 	}
 
 	/* mast->orig_r is different and consumed. */
 	if (mte_is_leaf(mast->orig_r->node))
 		return;
 
-	/* Now destroy l_off + 1 -> end and 0 -> r_off - 1 */
-	offset = l_off + 1;
-	slots = ma_slots(mte_to_node(mast->orig_l->node), wr_mas.type);
-	while (offset < mt_slots[wr_mas.type]) {
-		child = mas_slot_locked(mast->orig_l, slots, offset++);
-		if (!child)
-			break;
+	if (mte_dead_node(mas_slot_locked(mast->orig_l, l_slots, l_end)))
+		l_end--;
 
-		mat_add(mast->destroy, child);
-	}
 
-	slots = ma_slots(mte_to_node(mast->orig_r->node),
-			     mte_node_type(mast->orig_r->node));
-	for (offset = 0; offset < r_off; offset++)
-		mat_add(mast->destroy,
-				mas_slot_locked(mast->orig_l, slots, offset));
+	if (l_start <= l_end)
+		mas_topiary_range(mast->orig_l, mast->destroy, l_start, l_end);
+
+	if (mte_dead_node(mas_slot_locked(mast->orig_r, r_slots, r_start)))
+		r_start++;
+
+	if (r_start <= r_end)
+		mas_topiary_range(mast->orig_r, mast->destroy, 0, r_end);
 }
 
 /*
@@ -2329,19 +2367,13 @@ static inline void mast_topiary(struct m
  * @mast: The maple subtree state
  * @old_r: The encoded maple node to the right (next node).
  */
-static inline void mast_rebalance_next(struct maple_subtree_state *mast,
-				       struct maple_enode *old_r, bool free)
+static inline void mast_rebalance_next(struct maple_subtree_state *mast)
 {
 	unsigned char b_end = mast->bn->b_end;
 
 	mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node),
 		   mast->bn, b_end);
-	if (free)
-		mat_add(mast->free, old_r);
-
 	mast->orig_r->last = mast->orig_r->max;
-	if (old_r == mast->orig_l->node)
-		mast->orig_l->node = mast->orig_r->node;
 }
 
 /*
@@ -2349,17 +2381,13 @@ static inline void mast_rebalance_next(s
  * @mast: The maple subtree state
  * @old_l: The encoded maple node to the left (previous node)
  */
-static inline void mast_rebalance_prev(struct maple_subtree_state *mast,
-				       struct maple_enode *old_l)
+static inline void mast_rebalance_prev(struct maple_subtree_state *mast)
 {
 	unsigned char end = mas_data_end(mast->orig_l) + 1;
 	unsigned char b_end = mast->bn->b_end;
 
 	mab_shift_right(mast->bn, end);
 	mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0);
-	mat_add(mast->free, old_l);
-	if (mast->orig_r->node == old_l)
-		mast->orig_r->node = mast->orig_l->node;
 	mast->l->min = mast->orig_l->min;
 	mast->orig_l->index = mast->orig_l->min;
 	mast->bn->b_end = end + b_end;
@@ -2367,68 +2395,116 @@ static inline void mast_rebalance_prev(s
 }
 
 /*
- * mast_sibling_rebalance_right() - Rebalance from nodes with the same parents.
- * Check the right side, then the left.  Data is copied into the @mast->bn.
+ * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring
+ * the node to the right.  Checking the nodes to the right then the left at each
+ * level upwards until root is reached.  Free and destroy as needed.
+ * Data is copied into the @mast->bn.
  * @mast: The maple_subtree_state.
  */
 static inline
-bool mast_sibling_rebalance_right(struct maple_subtree_state *mast, bool free)
+bool mast_spanning_rebalance(struct maple_subtree_state *mast)
 {
-	struct maple_enode *old_r;
-	struct maple_enode *old_l;
+	struct ma_state r_tmp = *mast->orig_r;
+	struct ma_state l_tmp = *mast->orig_l;
+	struct maple_enode *ancestor = NULL;
+	unsigned char start, end;
+	unsigned char depth = 0;
 
-	old_r = mast->orig_r->node;
-	if (mas_next_sibling(mast->orig_r)) {
-		mast_rebalance_next(mast, old_r, free);
-		return true;
-	}
+	r_tmp = *mast->orig_r;
+	l_tmp = *mast->orig_l;
+	do {
+		mas_ascend(mast->orig_r);
+		mas_ascend(mast->orig_l);
+		depth++;
+		if (!ancestor &&
+		    (mast->orig_r->node == mast->orig_l->node)) {
+			ancestor = mast->orig_r->node;
+			end = mast->orig_r->offset - 1;
+			start = mast->orig_l->offset + 1;
+		}
 
-	old_l = mast->orig_l->node;
-	if (mas_prev_sibling(mast->orig_l)) {
-		mast->bn->type = mte_node_type(mast->orig_l->node);
-		mast_rebalance_prev(mast, old_l);
-		return true;
-	}
+		if (mast->orig_r->offset < mas_data_end(mast->orig_r)) {
+			if (!ancestor) {
+				ancestor = mast->orig_r->node;
+				start = 0;
+			}
 
-	return false;
-}
+			mast->orig_r->offset++;
+			do {
+				mas_descend(mast->orig_r);
+				mast->orig_r->offset = 0;
+				depth--;
+			} while (depth);
+
+			mast_rebalance_next(mast);
+			do {
+				unsigned char l_off = 0;
+				struct maple_enode *child = r_tmp.node;
+
+				mas_ascend(&r_tmp);
+				if (ancestor == r_tmp.node)
+					l_off = start;
+
+				if (r_tmp.offset)
+					r_tmp.offset--;
+
+				if (l_off < r_tmp.offset)
+					mas_topiary_range(&r_tmp, mast->destroy,
+							  l_off, r_tmp.offset);
 
-static inline int mas_prev_node(struct ma_state *mas, unsigned long min);
-static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
-				unsigned long max);
-/*
- * mast_cousin_rebalance_right() - Rebalance from nodes with different parents.
- * Check the right side, then the left.  Data is copied into the @mast->bn.
- * @mast: The maple_subtree_state.
- */
-static inline
-bool mast_cousin_rebalance_right(struct maple_subtree_state *mast, bool free)
-{
-	struct maple_enode *old_l = mast->orig_l->node;
-	struct maple_enode *old_r = mast->orig_r->node;
+				if (l_tmp.node != child)
+					mat_add(mast->free, child);
 
-	MA_STATE(tmp, mast->orig_r->tree, mast->orig_r->index, mast->orig_r->last);
+			} while (r_tmp.node != ancestor);
 
-	tmp = *mast->orig_r;
-	mas_next_node(mast->orig_r, mas_mn(mast->orig_r), ULONG_MAX);
-	if (!mas_is_none(mast->orig_r)) {
-		mast_rebalance_next(mast, old_r, free);
-		return true;
-	}
+			*mast->orig_l = l_tmp;
+			return true;
 
-	*mast->orig_r = *mast->orig_l;
-	*mast->r = *mast->l;
-	mas_prev_node(mast->orig_l, 0);
-	if (mas_is_none(mast->orig_l)) {
-		/* Making a new root with the contents of mast->bn */
-		*mast->orig_l = *mast->orig_r;
-		*mast->orig_r = tmp;
-		return false;
-	}
+		} else if (mast->orig_l->offset != 0) {
+			if (!ancestor) {
+				ancestor = mast->orig_l->node;
+				end = mas_data_end(mast->orig_l);
+			}
 
-	mast->orig_l->offset = 0;
-	mast_rebalance_prev(mast, old_l);
-	return true;
+			mast->orig_l->offset--;
+			do {
+				mas_descend(mast->orig_l);
+				mast->orig_l->offset =
+					mas_data_end(mast->orig_l);
+				depth--;
+			} while (depth);
+
+			mast_rebalance_prev(mast);
+			do {
+				unsigned char r_off;
+				struct maple_enode *child = l_tmp.node;
+
+				mas_ascend(&l_tmp);
+				if (ancestor == l_tmp.node)
+					r_off = end;
+				else
+					r_off = mas_data_end(&l_tmp);
+
+				if (l_tmp.offset < r_off)
+					l_tmp.offset++;
+
+				if (l_tmp.offset < r_off)
+					mas_topiary_range(&l_tmp, mast->destroy,
+							  l_tmp.offset, r_off);
+
+				if (r_tmp.node != child)
+					mat_add(mast->free, child);
+
+			} while (l_tmp.node != ancestor);
+
+			*mast->orig_r = r_tmp;
+			return true;
+		}
+	} while (!mte_is_root(mast->orig_r->node));
+
+	*mast->orig_r = r_tmp;
+	*mast->orig_l = l_tmp;
+	return false;
 }
 
 /*
@@ -2462,18 +2538,16 @@ mast_ascend_free(struct maple_subtree_st
 	 * The node may not contain the value so set slot to ensure all
 	 * of the nodes contents are freed or destroyed.
 	 */
-	if (mast->orig_r->max < mast->orig_r->last)
-		mast->orig_r->offset = mas_data_end(mast->orig_r) + 1;
-	else {
-		wr_mas.type = mte_node_type(mast->orig_r->node);
-		mas_wr_node_walk(&wr_mas);
-	}
+	wr_mas.type = mte_node_type(mast->orig_r->node);
+	mas_wr_node_walk(&wr_mas);
 	/* Set up the left side of things */
 	mast->orig_l->offset = 0;
 	mast->orig_l->index = mast->l->min;
 	wr_mas.mas = mast->orig_l;
 	wr_mas.type = mte_node_type(mast->orig_l->node);
 	mas_wr_node_walk(&wr_mas);
+
+	mast->bn->type = wr_mas.type;
 }
 
 /*
@@ -2881,7 +2955,7 @@ static int mas_spanning_rebalance(struct
 	struct maple_enode *left = NULL, *middle = NULL, *right = NULL;
 
 	MA_STATE(l_mas, mas->tree, mas->index, mas->index);
-	MA_STATE(r_mas, mas->tree, mas->index, mas->index);
+	MA_STATE(r_mas, mas->tree, mas->index, mas->last);
 	MA_STATE(m_mas, mas->tree, mas->index, mas->index);
 	MA_TOPIARY(free, mas->tree);
 	MA_TOPIARY(destroy, mas->tree);
@@ -2897,14 +2971,9 @@ static int mas_spanning_rebalance(struct
 	mast->destroy = &destroy;
 	l_mas.node = r_mas.node = m_mas.node = MAS_NONE;
 	if (!mas_is_root_limits(mast->orig_l) &&
-	    unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) {
-		/*
-		 * Do not free the current node as it may be freed in a bulk
-		 * free.
-		 */
-		if (!mast_sibling_rebalance_right(mast, false))
-			mast_cousin_rebalance_right(mast, false);
-	}
+	    unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type]))
+		mast_spanning_rebalance(mast);
+
 	mast->orig_l->depth = 0;
 
 	/*
@@ -2948,6 +3017,15 @@ static int mas_spanning_rebalance(struct
 
 		/* Copy anything necessary out of the right node. */
 		mast_combine_cp_right(mast);
+		if (mte_dead_node(mast->orig_l->node) ||
+		    mte_dead_node(mast->orig_r->node)) {
+			printk("FUCKED.  l %p is %s and r %p is %s\n",
+			       mas_mn(mast->orig_l),
+			       mte_dead_node(mast->orig_l->node) ? "dead" : "alive",
+			       mas_mn(mast->orig_r),
+			       mte_dead_node(mast->orig_r->node) ? "dead" : "alive");
+			printk("Writing %lu-%lu\n", mas->index, mas->last);
+		}
 		mast_topiary(mast);
 		mast->orig_l->last = mast->orig_l->max;
 
@@ -2961,15 +3039,14 @@ static int mas_spanning_rebalance(struct
 		if (mas_is_root_limits(mast->orig_l))
 			break;
 
-		/* Try to get enough data for the next iteration. */
-		if (!mast_sibling_rebalance_right(mast, true))
-			if (!mast_cousin_rebalance_right(mast, true))
-				break;
+		if (!mast_spanning_rebalance(mast))
+			break;
 
 		/* rebalancing from other nodes may require another loop. */
 		if (!count)
 			count++;
 	}
+
 	l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)),
 				mte_node_type(mast->orig_l->node));
 	mast->orig_l->depth++;
@@ -3042,6 +3119,7 @@ static inline int mas_rebalance(struct m
 	mast.orig_l = &l_mas;
 	mast.orig_r = &r_mas;
 	mast.bn = b_node;
+	mast.bn->type = mte_node_type(mas->node);
 
 	l_mas = r_mas = *mas;
 
@@ -3855,7 +3933,7 @@ done:
 	return 1;
 }
 /*
- * mas_spanning_store() - Create a subtree with the store operation completed
+ * mas_wr_spanning_store() - Create a subtree with the store operation completed
  * and new nodes where necessary, then place the sub-tree in the actual tree.
  * Note that mas is expected to point to the node which caused the store to
  * span.
@@ -3941,6 +4019,13 @@ static inline int mas_wr_spanning_store(
 	mast.bn = &b_node;
 	mast.orig_l = &l_mas;
 	mast.orig_r = &r_mas;
+		if (mte_dead_node(mast.orig_l->node) ||
+		    mte_dead_node(mast.orig_r->node)) {
+			printk("FUCKED.  l is %s and r is %s\n",
+		       mte_dead_node(mast.orig_l->node) ? "dead" : "alive",
+		       mte_dead_node(mast.orig_r->node) ? "dead" : "alive");
+			printk("Writing %lu-%lu\n", mas->index, mas->last);
+		}
 	/* Combine l_mas and r_mas and split them up evenly again. */
 	return mas_spanning_rebalance(mas, &mast, height + 1);
 }
@@ -5387,6 +5472,9 @@ static inline void __rcu **mas_destroy_d
 		node = mas_mn(mas);
 		slots = ma_slots(node, mte_node_type(mas->node));
 		next = mas_slot_locked(mas, slots, 0);
+		if ((mte_dead_node(next)))
+			next = mas_slot_locked(mas, slots, 1);
+
 		mte_set_node_dead(mas->node);
 		node->type = mte_node_type(mas->node);
 		node->piv_parent = prev;
@@ -5394,6 +5482,7 @@ static inline void __rcu **mas_destroy_d
 		offset = 0;
 		prev = mas->node;
 	} while (!mte_is_leaf(next));
+
 	return slots;
 }
 
@@ -5427,17 +5516,15 @@ static void mt_destroy_walk(struct maple
 		mas.node = node->piv_parent;
 		if (mas_mn(&mas) == node)
 			goto start_slots_free;
+
 		type = mte_node_type(mas.node);
 		slots = ma_slots(mte_to_node(mas.node), type);
-		if ((offset < mt_slots[type])) {
-			struct maple_enode *next = slots[offset];
+		if ((offset < mt_slots[type]) && mte_node_type(slots[offset]) &&
+		    mte_to_node(slots[offset])) {
+			struct maple_enode *parent = mas.node;
 
-			if (mte_node_type(next) && mte_to_node(next)) {
-				struct maple_enode *parent = mas.node;
-
-				mas.node = mas_slot_locked(&mas, slots, offset);
-				slots = mas_destroy_descend(&mas, parent, offset);
-			}
+			mas.node = mas_slot_locked(&mas, slots, offset);
+			slots = mas_destroy_descend(&mas, parent, offset);
 		}
 		node = mas_mn(&mas);
 	} while (start != mas.node);
_

Patches currently in -mm which might be from liam.howlett@oracle.com are

maple-tree-add-new-data-structure-fix.patch
maple-tree-add-new-data-structure-fix-2.patch
maple-tree-add-new-data-structure-fix-3.patch
maple-tree-add-new-data-structure-fix-4.patch
maple-tree-add-new-data-structure-fix-7.patch
maple-tree-add-new-data-structure-fix-8.patch
maple-tree-add-new-data-structure-fix-9.patch
lib-test_maple_tree-add-testing-for-maple-tree-fix.patch
lib-test_maple_tree-add-testing-for-maple-tree-fix-2.patch
mm-start-tracking-vmas-with-maple-tree-fix-2.patch
mm-start-tracking-vmas-with-maple-tree-fix-3.patch
mm-mmap-use-advanced-maple-tree-api-for-mmap_region-fix-2.patch
mm-mmap-change-do_brk_munmap-to-use-do_mas_align_munmap-fix.patch
mm-remove-the-vma-linked-list-fix.patch
mm-mlock-drop-dead-code-in-count_mm_mlocked_page_nr.patch


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-06-15 20:42 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-06-15 20:41 + maple-tree-add-new-data-structure-fix-8.patch added to mm-unstable branch Andrew Morton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.