- * [PATCH 2/3] mm/vmalloc: Add adjust_search_size parameter
  2022-01-19 14:35 [PATCH 1/3] mm/vmalloc: Move draining areas out of caller context Uladzislau Rezki (Sony)
@ 2022-01-19 14:35 ` Uladzislau Rezki (Sony)
  2022-01-19 14:35 ` [PATCH 3/3] mm/vmalloc: Eliminate an extra orig_gfp_mask Uladzislau Rezki (Sony)
  2022-01-20  9:12 ` [PATCH 1/3] mm/vmalloc: Move draining areas out of caller context Christoph Hellwig
  2 siblings, 0 replies; 5+ messages in thread
From: Uladzislau Rezki (Sony) @ 2022-01-19 14:35 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Uladzislau Rezki, Oleksiy Avramchenko,
	Uladzislau Rezki
From: Uladzislau Rezki <uladzislau.rezki@sony.com>
Extend the find_vmap_lowest_match() function with one more
parameter. It is "adjust_search_size" boolean variable, so
it is possible to control an accuracy of search block if a
specific alignment is required.
With this patch, a search size is always adjusted, to serve
a request as fast as possible because of performance reason.
But there is one exception though, it is short ranges where
requested size corresponds to passed vstart/vend restriction
together with a specific alignment request. In such scenario
an adjustment wold not lead to success allocation.
Signed-off-by: Uladzislau Rezki <uladzislau.rezki@sony.com>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 mm/vmalloc.c | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ed0f9eaa61a9..52ee67107046 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1192,22 +1192,28 @@ is_within_this_va(struct vmap_area *va, unsigned long size,
 /*
  * Find the first free block(lowest start address) in the tree,
  * that will accomplish the request corresponding to passing
- * parameters.
+ * parameters. Please note, with an alignment bigger than PAGE_SIZE,
+ * a search length is adjusted to account for worst case alignment
+ * overhead.
  */
 static __always_inline struct vmap_area *
-find_vmap_lowest_match(unsigned long size,
-	unsigned long align, unsigned long vstart)
+find_vmap_lowest_match(unsigned long size, unsigned long align,
+	unsigned long vstart, bool adjust_search_size)
 {
 	struct vmap_area *va;
 	struct rb_node *node;
+	unsigned long length;
 
 	/* Start from the root. */
 	node = free_vmap_area_root.rb_node;
 
+	/* Adjust the search size for alignment overhead. */
+	length = adjust_search_size ? size + align - 1 : size;
+
 	while (node) {
 		va = rb_entry(node, struct vmap_area, rb_node);
 
-		if (get_subtree_max_size(node->rb_left) >= size &&
+		if (get_subtree_max_size(node->rb_left) >= length &&
 				vstart < va->va_start) {
 			node = node->rb_left;
 		} else {
@@ -1217,9 +1223,9 @@ find_vmap_lowest_match(unsigned long size,
 			/*
 			 * Does not make sense to go deeper towards the right
 			 * sub-tree if it does not have a free block that is
-			 * equal or bigger to the requested search size.
+			 * equal or bigger to the requested search length.
 			 */
-			if (get_subtree_max_size(node->rb_right) >= size) {
+			if (get_subtree_max_size(node->rb_right) >= length) {
 				node = node->rb_right;
 				continue;
 			}
@@ -1235,7 +1241,7 @@ find_vmap_lowest_match(unsigned long size,
 				if (is_within_this_va(va, size, align, vstart))
 					return va;
 
-				if (get_subtree_max_size(node->rb_right) >= size &&
+				if (get_subtree_max_size(node->rb_right) >= length &&
 						vstart <= va->va_start) {
 					/*
 					 * Shift the vstart forward. Please note, we update it with
@@ -1283,7 +1289,7 @@ find_vmap_lowest_match_check(unsigned long size, unsigned long align)
 	get_random_bytes(&rnd, sizeof(rnd));
 	vstart = VMALLOC_START + rnd;
 
-	va_1 = find_vmap_lowest_match(size, align, vstart);
+	va_1 = find_vmap_lowest_match(size, align, vstart, false);
 	va_2 = find_vmap_lowest_linear_match(size, align, vstart);
 
 	if (va_1 != va_2)
@@ -1434,12 +1440,25 @@ static __always_inline unsigned long
 __alloc_vmap_area(unsigned long size, unsigned long align,
 	unsigned long vstart, unsigned long vend)
 {
+	bool adjust_search_size = true;
 	unsigned long nva_start_addr;
 	struct vmap_area *va;
 	enum fit_type type;
 	int ret;
 
-	va = find_vmap_lowest_match(size, align, vstart);
+	/*
+	 * Do not adjust when:
+	 *   a) align <= PAGE_SIZE, because it does not make any sense.
+	 *      All blocks(their start addresses) are at least PAGE_SIZE
+	 *      aligned anyway;
+	 *   b) a short range where a requested size corresponds to exactly
+	 *      specified [vstart:vend] interval and an alignment > PAGE_SIZE.
+	 *      With adjusted search length an allocation would not succeed.
+	 */
+	if (align <= PAGE_SIZE || (align > PAGE_SIZE && (vend - vstart) == size))
+		adjust_search_size = false;
+
+	va = find_vmap_lowest_match(size, align, vstart, adjust_search_size);
 	if (unlikely(!va))
 		return vend;
 
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 5+ messages in thread
- * [PATCH 3/3] mm/vmalloc: Eliminate an extra orig_gfp_mask
  2022-01-19 14:35 [PATCH 1/3] mm/vmalloc: Move draining areas out of caller context Uladzislau Rezki (Sony)
  2022-01-19 14:35 ` [PATCH 2/3] mm/vmalloc: Add adjust_search_size parameter Uladzislau Rezki (Sony)
@ 2022-01-19 14:35 ` Uladzislau Rezki (Sony)
  2022-01-20  9:12 ` [PATCH 1/3] mm/vmalloc: Move draining areas out of caller context Christoph Hellwig
  2 siblings, 0 replies; 5+ messages in thread
From: Uladzislau Rezki (Sony) @ 2022-01-19 14:35 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Uladzislau Rezki, Oleksiy Avramchenko,
	Vasily Averin
That extra variable has been introduced just for keeping an original
passed gfp_mask because it is updated with __GFP_NOWARN on entry, thus
error handling messages were broken.
Instead we can keep an original gfp_mask without modifying it and add
an extra __GFP_NOWARN flag together with gfp_mask as a parameter to
the vm_area_alloc_pages() function. It will make it less confused.
Cc: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 mm/vmalloc.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 52ee67107046..04edd32ba6bc 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2953,7 +2953,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 				 int node)
 {
 	const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
-	const gfp_t orig_gfp_mask = gfp_mask;
 	bool nofail = gfp_mask & __GFP_NOFAIL;
 	unsigned long addr = (unsigned long)area->addr;
 	unsigned long size = get_vm_area_size(area);
@@ -2967,7 +2966,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	max_small_pages = ALIGN(size, 1UL << page_shift) >> PAGE_SHIFT;
 
 	array_size = (unsigned long)max_small_pages * sizeof(struct page *);
-	gfp_mask |= __GFP_NOWARN;
 	if (!(gfp_mask & (GFP_DMA | GFP_DMA32)))
 		gfp_mask |= __GFP_HIGHMEM;
 
@@ -2980,7 +2978,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	}
 
 	if (!area->pages) {
-		warn_alloc(orig_gfp_mask, NULL,
+		warn_alloc(gfp_mask, NULL,
 			"vmalloc error: size %lu, failed to allocated page array size %lu",
 			nr_small_pages * PAGE_SIZE, array_size);
 		free_vm_area(area);
@@ -2990,8 +2988,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	set_vm_area_page_order(area, page_shift - PAGE_SHIFT);
 	page_order = vm_area_page_order(area);
 
-	area->nr_pages = vm_area_alloc_pages(gfp_mask, node,
-		page_order, nr_small_pages, area->pages);
+	area->nr_pages = vm_area_alloc_pages(gfp_mask | __GFP_NOWARN,
+		node, page_order, nr_small_pages, area->pages);
 
 	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
 	if (gfp_mask & __GFP_ACCOUNT) {
@@ -3007,7 +3005,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	 * allocation request, free them via __vfree() if any.
 	 */
 	if (area->nr_pages != nr_small_pages) {
-		warn_alloc(orig_gfp_mask, NULL,
+		warn_alloc(gfp_mask, NULL,
 			"vmalloc error: size %lu, page order %u, failed to allocate pages",
 			area->nr_pages * PAGE_SIZE, page_order);
 		goto fail;
@@ -3035,7 +3033,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 		memalloc_noio_restore(flags);
 
 	if (ret < 0) {
-		warn_alloc(orig_gfp_mask, NULL,
+		warn_alloc(gfp_mask, NULL,
 			"vmalloc error: size %lu, failed to map pages",
 			area->nr_pages * PAGE_SIZE);
 		goto fail;
-- 
2.30.2
^ permalink raw reply related	[flat|nested] 5+ messages in thread
- * Re: [PATCH 1/3] mm/vmalloc: Move draining areas out of caller context
  2022-01-19 14:35 [PATCH 1/3] mm/vmalloc: Move draining areas out of caller context Uladzislau Rezki (Sony)
  2022-01-19 14:35 ` [PATCH 2/3] mm/vmalloc: Add adjust_search_size parameter Uladzislau Rezki (Sony)
  2022-01-19 14:35 ` [PATCH 3/3] mm/vmalloc: Eliminate an extra orig_gfp_mask Uladzislau Rezki (Sony)
@ 2022-01-20  9:12 ` Christoph Hellwig
  2022-01-20 10:42   ` Uladzislau Rezki
  2 siblings, 1 reply; 5+ messages in thread
From: Christoph Hellwig @ 2022-01-20  9:12 UTC (permalink / raw)
  To: Uladzislau Rezki (Sony)
  Cc: Andrew Morton, linux-mm, LKML, Christoph Hellwig, Matthew Wilcox,
	Nicholas Piggin, Oleksiy Avramchenko
On Wed, Jan 19, 2022 at 03:35:38PM +0100, Uladzislau Rezki (Sony) wrote:
> +static void drain_vmap_area(struct work_struct *work)
Nit, but I prefer to have a _work postix for workers just to keep
it easy to ready.
>  	/* After this point, we may free va at any time */
>  	if (unlikely(nr_lazy > lazy_max_pages()))
> -		try_purge_vmap_area_lazy();
> +		if (!atomic_xchg(&drain_vmap_area_work_in_progress, 1))
> +			schedule_work(&drain_vmap_area_work);
Work items are defined to be single threaded, so I don't think we need
the drain_vmap_area_work_in_progress hack.
>  
>  /*
> -- 
> 2.30.2
> 
> 
---end quoted text---
^ permalink raw reply	[flat|nested] 5+ messages in thread 
- * Re: [PATCH 1/3] mm/vmalloc: Move draining areas out of caller context
  2022-01-20  9:12 ` [PATCH 1/3] mm/vmalloc: Move draining areas out of caller context Christoph Hellwig
@ 2022-01-20 10:42   ` Uladzislau Rezki
  0 siblings, 0 replies; 5+ messages in thread
From: Uladzislau Rezki @ 2022-01-20 10:42 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Uladzislau Rezki (Sony), Andrew Morton, linux-mm, LKML,
	Matthew Wilcox, Nicholas Piggin, Oleksiy Avramchenko
> On Wed, Jan 19, 2022 at 03:35:38PM +0100, Uladzislau Rezki (Sony) wrote:
> > +static void drain_vmap_area(struct work_struct *work)
> 
> Nit, but I prefer to have a _work postix for workers just to keep
> it easy to ready.
> 
Will fix it!
> >  	/* After this point, we may free va at any time */
> >  	if (unlikely(nr_lazy > lazy_max_pages()))
> > -		try_purge_vmap_area_lazy();
> > +		if (!atomic_xchg(&drain_vmap_area_work_in_progress, 1))
> > +			schedule_work(&drain_vmap_area_work);
> 
> Work items are defined to be single threaded, so I don't think we need
> the drain_vmap_area_work_in_progress hack.
> 
The motivation with that hack was to prevent the drain work being placed
several times at once, i.e. schedule_work() checks only a pending bit.
If the work is in run-queue another caller of vfree() will place it one
more time, since pending bit is not set because the work is in TASK_RUNNING
state.
Or am i missing something?
--
Vlad Rezki
^ permalink raw reply	[flat|nested] 5+ messages in thread