All of lore.kernel.org
 help / color / mirror / Atom feed
From: Baoquan He <bhe@redhat.com>
To: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Cc: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>,
	LKML <linux-kernel@vger.kernel.org>,
	Lorenzo Stoakes <lstoakes@gmail.com>,
	Christoph Hellwig <hch@infradead.org>,
	Matthew Wilcox <willy@infradead.org>,
	"Liam R . Howlett" <Liam.Howlett@oracle.com>,
	Dave Chinner <david@fromorbit.com>,
	"Paul E . McKenney" <paulmck@kernel.org>,
	Joel Fernandes <joel@joelfernandes.org>,
	Oleksiy Avramchenko <oleksiy.avramchenko@sony.com>
Subject: Re: [PATCH 8/9] mm: vmalloc: Offload free_vmap_area_lock global lock
Date: Mon, 5 Jun 2023 08:43:39 +0800	[thread overview]
Message-ID: <ZH0vuwaSddREy9dz@MiWiFi-R3L-srv> (raw)
In-Reply-To: <20230522110849.2921-9-urezki@gmail.com>

On 05/22/23 at 01:08pm, Uladzislau Rezki (Sony) wrote:
......  
> +static unsigned long
> +this_cpu_zone_alloc_fill(struct cpu_vmap_zone *z,
> +	unsigned long size, unsigned long align,
> +	gfp_t gfp_mask, int node)
> +{
> +	unsigned long addr = VMALLOC_END;
> +	struct vmap_area *va;
> +
> +	/*
> +	 * It still can race. One task sets a progress to
> +	 * 1 a second one gets preempted on entry, the first
> +	 * zeroed the progress flag and second proceed with
> +	 * an extra prefetch.
> +	 */
> +	if (atomic_xchg(&z->fill_in_progress, 1))
> +		return addr;
> +
> +	va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
> +	if (unlikely(!va))
> +		goto out;
> +
> +	spin_lock(&free_vmap_area_lock);
> +	addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
> +		cvz_size, 1, VMALLOC_START, VMALLOC_END);
> +	spin_unlock(&free_vmap_area_lock);

The 'z' is passed in from this_cpu_zone_alloc(), and it's got with
raw_cpu_ptr(&cpu_vmap_zone). Here when we try to get chunk of cvz_size
from free_vmap_area_root/free_vmap_area_list, how can we guarantee it
must belong to the 'z' zone? With my understanding, __alloc_vmap_area()
will get efficient address range sequentially bottom up from
free_vmap_area_root. Please correct me if I am wrong.

static unsigned long
this_cpu_zone_alloc(unsigned long size, unsigned long align, gfp_t gfp_mask, int node)
{
        struct cpu_vmap_zone *z = raw_cpu_ptr(&cpu_vmap_zone);
	......
	if (addr == VMALLOC_END && left < 4 * PAGE_SIZE)
                addr = this_cpu_zone_alloc_fill(z, size, align, gfp_mask, node);
}

> +
> +	if (addr == VMALLOC_END) {
> +		kmem_cache_free(vmap_area_cachep, va);
> +		goto out;
> +	}
> +
> +	va->va_start = addr;
> +	va->va_end = addr + cvz_size;
> +
> +	fbl_lock(z, FREE);
> +	va = merge_or_add_vmap_area_augment(va,
> +		&fbl_root(z, FREE), &fbl_head(z, FREE));
> +	addr = va_alloc(va, &fbl_root(z, FREE), &fbl_head(z, FREE),
> +		size, align, VMALLOC_START, VMALLOC_END);
> +	fbl_unlock(z, FREE);
> +
> +out:
> +	atomic_set(&z->fill_in_progress, 0);
> +	return addr;
> +}
> +
> +static unsigned long
> +this_cpu_zone_alloc(unsigned long size, unsigned long align, gfp_t gfp_mask, int node)
> +{
> +	struct cpu_vmap_zone *z = raw_cpu_ptr(&cpu_vmap_zone);
> +	unsigned long extra = align > PAGE_SIZE ? align : 0;
> +	unsigned long addr = VMALLOC_END, left = 0;
> +
> +	/*
> +	 * It is disabled, fallback to a global heap.
> +	 */
> +	if (cvz_size == ULONG_MAX)
> +		return addr;
> +
> +	/*
> +	 * Any allocation bigger/equal than one half of
                          ~~~~~~typo~~~~~~  bigger than/equal to
> +	 * a zone-size will fallback to a global heap.
> +	 */
> +	if (cvz_size / (size + extra) < 3)
> +		return addr;
> +
> +	if (RB_EMPTY_ROOT(&fbl_root(z, FREE)))
> +		goto fill;
> +
> +	fbl_lock(z, FREE);
> +	addr = __alloc_vmap_area(&fbl_root(z, FREE), &fbl_head(z, FREE),
> +		size, align, VMALLOC_START, VMALLOC_END);
> +
> +	if (addr == VMALLOC_END)
> +		left = get_subtree_max_size(fbl_root(z, FREE).rb_node);
> +	fbl_unlock(z, FREE);
> +
> +fill:
> +	/*
> +	 * A low watermark is 3 pages.
> +	 */
> +	if (addr == VMALLOC_END && left < 4 * PAGE_SIZE)
> +		addr = this_cpu_zone_alloc_fill(z, size, align, gfp_mask, node);
> +
> +	return addr;
> +}
> +
>  /*
>   * Allocate a region of KVA of the specified size and alignment, within the
>   * vstart and vend.
> @@ -1678,11 +1765,21 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
>  	 */
>  	kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask);
>  
> +	/*
> +	 * Fast path allocation, start with it.
> +	 */
> +	if (vstart == VMALLOC_START && vend == VMALLOC_END)
> +		addr = this_cpu_zone_alloc(size, align, gfp_mask, node);
> +	else
> +		addr = vend;
> +
>  retry:
> -	preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
> -	addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
> -		size, align, vstart, vend);
> -	spin_unlock(&free_vmap_area_lock);
> +	if (addr == vend) {
> +		preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
> +		addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
> +			size, align, vstart, vend);
> +		spin_unlock(&free_vmap_area_lock);
> +	}
>  
>  	trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend);
>  
> @@ -1827,6 +1924,27 @@ purge_cpu_vmap_zone(struct cpu_vmap_zone *z)
>  	return num_purged_areas;
>  }
>  
> +static void
> +drop_cpu_vmap_cache(struct cpu_vmap_zone *z)
> +{
> +	struct vmap_area *va, *n_va;
> +	LIST_HEAD(free_head);
> +
> +	if (RB_EMPTY_ROOT(&fbl_root(z, FREE)))
> +		return;
> +
> +	fbl_lock(z, FREE);
> +	WRITE_ONCE(fbl(z, FREE, root.rb_node), NULL);
> +	list_replace_init(&fbl_head(z, FREE), &free_head);
> +	fbl_unlock(z, FREE);
> +
> +	spin_lock(&free_vmap_area_lock);
> +	list_for_each_entry_safe(va, n_va, &free_head, list)
> +		merge_or_add_vmap_area_augment(va,
> +			&free_vmap_area_root, &free_vmap_area_list);
> +	spin_unlock(&free_vmap_area_lock);
> +}
> +
>  /*
>   * Purges all lazily-freed vmap areas.
>   */
> @@ -1868,6 +1986,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
>  		for_each_possible_cpu(i) {
>  			z = per_cpu_ptr(&cpu_vmap_zone, i);
>  			num_purged_areas += purge_cpu_vmap_zone(z);
> +			drop_cpu_vmap_cache(z);
>  		}
>  	}
>  
> -- 
> 2.30.2
> 


  reply	other threads:[~2023-06-05  0:44 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-22 11:08 [PATCH 0/9] Mitigate a vmap lock contention Uladzislau Rezki (Sony)
2023-05-22 11:08 ` [PATCH 1/9] mm: vmalloc: Add va_alloc() helper Uladzislau Rezki (Sony)
2023-05-23  6:05   ` Christoph Hellwig
2023-05-23  9:57     ` Uladzislau Rezki
2023-05-27 19:55   ` Lorenzo Stoakes
2023-05-22 11:08 ` [PATCH 2/9] mm: vmalloc: Rename adjust_va_to_fit_type() function Uladzislau Rezki (Sony)
2023-05-23  6:06   ` Christoph Hellwig
2023-05-23 10:01     ` Uladzislau Rezki
2023-05-23 17:24   ` Liam R. Howlett
2023-05-24 11:51     ` Uladzislau Rezki
2023-05-27 21:50   ` Lorenzo Stoakes
2023-05-29 20:37     ` Uladzislau Rezki
2023-05-22 11:08 ` [PATCH 3/9] mm: vmalloc: Move vmap_init_free_space() down in vmalloc.c Uladzislau Rezki (Sony)
2023-05-23  6:06   ` Christoph Hellwig
2023-05-27 21:52   ` Lorenzo Stoakes
2023-05-22 11:08 ` [PATCH 4/9] mm: vmalloc: Add a per-CPU-zone infrastructure Uladzislau Rezki (Sony)
2023-05-23  6:08   ` Christoph Hellwig
2023-05-23 14:53     ` Uladzislau Rezki
2023-05-23 15:13       ` Christoph Hellwig
2023-05-23 15:32         ` Uladzislau Rezki
2023-05-22 11:08 ` [PATCH 5/9] mm: vmalloc: Insert busy-VA per-cpu zone Uladzislau Rezki (Sony)
2023-05-23  6:12   ` Christoph Hellwig
2023-05-23 15:00     ` Uladzislau Rezki
2023-05-22 11:08 ` [PATCH 6/9] mm: vmalloc: Support multiple zones in vmallocinfo Uladzislau Rezki (Sony)
2023-05-22 11:08 ` [PATCH 7/9] mm: vmalloc: Insert lazy-VA per-cpu zone Uladzislau Rezki (Sony)
2023-05-22 11:08 ` [PATCH 8/9] mm: vmalloc: Offload free_vmap_area_lock global lock Uladzislau Rezki (Sony)
2023-06-05  0:43   ` Baoquan He [this message]
2023-06-06  9:01     ` Uladzislau Rezki
2023-06-06 12:11       ` Baoquan He
2023-06-07  6:58         ` Uladzislau Rezki
2023-05-22 11:08 ` [PATCH 9/9] mm: vmalloc: Scale and activate cvz_size Uladzislau Rezki (Sony)
2023-05-23 11:59 ` [PATCH 0/9] Mitigate a vmap lock contention Hyeonggon Yoo
2023-05-23 15:12   ` Uladzislau Rezki
2023-05-23 18:04     ` Hyeonggon Yoo
2023-05-23 21:43       ` Dave Chinner
2023-05-24  1:30         ` Hyeonggon Yoo
2023-05-24  9:50       ` Uladzislau Rezki
2023-05-24 21:56         ` Dave Chinner
2023-05-25  7:59           ` Christoph Hellwig
2023-05-25 10:20           ` Uladzislau Rezki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZH0vuwaSddREy9dz@MiWiFi-R3L-srv \
    --to=bhe@redhat.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@fromorbit.com \
    --cc=hch@infradead.org \
    --cc=joel@joelfernandes.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lstoakes@gmail.com \
    --cc=oleksiy.avramchenko@sony.com \
    --cc=paulmck@kernel.org \
    --cc=urezki@gmail.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.