From: Tianyu Lan <ltykernel@gmail.com>
To: hch@infradead.org, robin.murphy@arm.com
Cc: parri.andrea@gmail.com, thomas.lendacky@amd.com,
wei.liu@kernel.org, Tianyu Lan <Tianyu.Lan@microsoft.com>,
linux-hyperv@vger.kernel.org, konrad.wilk@oracle.com,
linux-kernel@vger.kernel.org, kirill.shutemov@intel.com,
iommu@lists.linux-foundation.org, michael.h.kelley@microsoft.com,
andi.kleen@intel.com, brijesh.singh@amd.com, vkuznets@redhat.com,
kys@microsoft.com, hch@lst.de
Subject: Re: [RFC PATCH] swiotlb: Add Child IO TLB mem support
Date: Fri, 29 Apr 2022 22:25:53 +0800 [thread overview]
Message-ID: <c0e70b17-cdf1-4fd8-f807-e4b9ccad44fd@gmail.com> (raw)
In-Reply-To: <20220429142147.1725184-1-ltykernel@gmail.com>
On 4/29/2022 10:21 PM, Tianyu Lan wrote:
> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
>
> Traditionally swiotlb was not performance critical because it was only
> used for slow devices. But in some setups, like TDX/SEV confidential
> guests, all IO has to go through swiotlb. Currently swiotlb only has a
> single lock. Under high IO load with multiple CPUs this can lead to
> significant lock contention on the swiotlb lock.
>
> This patch adds child IO TLB mem support to resolve spinlock overhead
> among device's queues. Each device may allocate IO tlb mem and setup
> child IO TLB mem according to queue number. Swiotlb code allocates
> bounce buffer among child IO tlb mem iterately.
>
Hi Robin and Christoph:
According to Robin idea. I draft this patch. Please have a look
and check whether it's right diection.
Thanks.
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
> ---
> include/linux/swiotlb.h | 7 +++
> kernel/dma/swiotlb.c | 96 ++++++++++++++++++++++++++++++++++++-----
> 2 files changed, 93 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index 7ed35dd3de6e..4a3f6a7b4b7e 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -89,6 +89,9 @@ extern enum swiotlb_force swiotlb_force;
> * @late_alloc: %true if allocated using the page allocator
> * @force_bounce: %true if swiotlb bouncing is forced
> * @for_alloc: %true if the pool is used for memory allocation
> + * @child_nslot:The number of IO TLB slot in the child IO TLB mem.
> + * @num_child: The child io tlb mem number in the pool.
> + * @child_start:The child index to start searching in the next round.
> */
> struct io_tlb_mem {
> phys_addr_t start;
> @@ -102,6 +105,10 @@ struct io_tlb_mem {
> bool late_alloc;
> bool force_bounce;
> bool for_alloc;
> + unsigned int num_child;
> + unsigned int child_nslot;
> + unsigned int child_start;
> + struct io_tlb_mem *child;
> struct io_tlb_slot {
> phys_addr_t orig_addr;
> size_t alloc_size;
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index e2ef0864eb1e..382fa2288645 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -207,6 +207,25 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
> mem->force_bounce = true;
>
> spin_lock_init(&mem->lock);
> +
> + if (mem->num_child) {
> + mem->child_nslot = nslabs / mem->num_child;
> + mem->child_start = 0;
> +
> + /*
> + * Initialize child IO TLB mem, divide IO TLB pool
> + * into child number. Reuse parent mem->slot in the
> + * child mem->slot.
> + */
> + for (i = 0; i < mem->num_child; i++) {
> + mem->num_child = 0;
> + mem->child[i].slots = mem->slots + i * mem->child_nslot;
> + swiotlb_init_io_tlb_mem(&mem->child[i],
> + start + ((i * mem->child_nslot) << IO_TLB_SHIFT),
> + mem->child_nslot, late_alloc);
> + }
> + }
> +
> for (i = 0; i < mem->nslabs; i++) {
> mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
> mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
> @@ -336,16 +355,18 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
>
> mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
> get_order(array_size(sizeof(*mem->slots), nslabs)));
> - if (!mem->slots) {
> - free_pages((unsigned long)vstart, order);
> - return -ENOMEM;
> - }
> + if (!mem->slots)
> + goto error_slots;
>
> set_memory_decrypted((unsigned long)vstart, bytes >> PAGE_SHIFT);
> swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, true);
>
> swiotlb_print_info();
> return 0;
> +
> +error_slots:
> + free_pages((unsigned long)vstart, order);
> + return -ENOMEM;
> }
>
> void __init swiotlb_exit(void)
> @@ -483,10 +504,11 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
> * Find a suitable number of IO TLB entries size that will fit this request and
> * allocate a buffer from that IO TLB pool.
> */
> -static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
> - size_t alloc_size, unsigned int alloc_align_mask)
> +static int swiotlb_do_find_slots(struct io_tlb_mem *mem,
> + struct device *dev, phys_addr_t orig_addr,
> + size_t alloc_size,
> + unsigned int alloc_align_mask)
> {
> - struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> unsigned long boundary_mask = dma_get_seg_boundary(dev);
> dma_addr_t tbl_dma_addr =
> phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
> @@ -565,6 +587,46 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
> return index;
> }
>
> +static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
> + size_t alloc_size, unsigned int alloc_align_mask)
> +{
> + struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> + struct io_tlb_mem *child_mem = mem;
> + int start = 0, i = 0, index;
> +
> + if (mem->num_child) {
> + i = start = mem->child_start;
> + mem->child_start = (mem->child_start + 1) % mem->num_child;
> + child_mem = mem->child;
> + }
> +
> + do {
> + index = swiotlb_do_find_slots(child_mem + i, dev, orig_addr,
> + alloc_size, alloc_align_mask);
> + if (index >= 0)
> + return i * mem->child_nslot + index;
> + if (++i >= mem->num_child)
> + i = 0;
> + } while (i != start);
> +
> + return -1;
> +}
> +
> +static unsigned long mem_used(struct io_tlb_mem *mem)
> +{
> + int i;
> + unsigned long used = 0;
> +
> + if (mem->num_child) {
> + for (i = 0; i < mem->num_child; i++)
> + used += mem->child[i].used;
> + } else {
> + used = mem->used;
> + }
> +
> + return used;
> +}
> +
> phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> size_t mapping_size, size_t alloc_size,
> unsigned int alloc_align_mask, enum dma_data_direction dir,
> @@ -594,7 +656,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> if (!(attrs & DMA_ATTR_NO_WARN))
> dev_warn_ratelimited(dev,
> "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
> - alloc_size, mem->nslabs, mem->used);
> + alloc_size, mem->nslabs, mem_used(mem));
> return (phys_addr_t)DMA_MAPPING_ERROR;
> }
>
> @@ -617,9 +679,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
> return tlb_addr;
> }
>
> -static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
> +static void swiotlb_do_release_slots(struct io_tlb_mem *mem,
> + struct device *dev, phys_addr_t tlb_addr)
> {
> - struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> unsigned long flags;
> unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
> int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
> @@ -660,6 +722,20 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
> spin_unlock_irqrestore(&mem->lock, flags);
> }
>
> +static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
> +{
> + struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
> + int index, offset;
> +
> + if (mem->num_child) {
> + offset = swiotlb_align_offset(dev, tlb_addr);
> + index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
> + mem = &mem->child[index / mem->child_nslot];
> + }
> +
> + swiotlb_do_release_slots(mem, dev, tlb_addr);
> +}
> +
> /*
> * tlb_addr is the physical address of the bounce buffer to unmap.
> */
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
next prev parent reply other threads:[~2022-04-29 14:26 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-28 14:14 [RFC PATCH 0/2] swiotlb: Introduce swiotlb device allocation function Tianyu Lan
2022-04-28 14:14 ` [RFC PATCH 1/2] swiotlb: Split up single swiotlb lock Tianyu Lan
2022-04-28 14:44 ` Robin Murphy
2022-04-28 14:45 ` Christoph Hellwig
2022-04-28 14:55 ` Andi Kleen
2022-04-28 15:05 ` Christoph Hellwig
2022-04-28 15:16 ` Andi Kleen
2022-04-28 15:07 ` Robin Murphy
2022-04-28 16:02 ` Andi Kleen
2022-04-28 16:59 ` Robin Murphy
2022-04-28 14:56 ` Robin Murphy
2022-04-28 15:54 ` Tianyu Lan
2022-04-29 14:21 ` [RFC PATCH] swiotlb: Add Child IO TLB mem support Tianyu Lan
2022-04-29 14:25 ` Tianyu Lan [this message]
2022-04-28 14:14 ` [RFC PATCH 2/2] Swiotlb: Add device bounce buffer allocation interface Tianyu Lan
2022-04-28 15:50 ` Tianyu Lan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c0e70b17-cdf1-4fd8-f807-e4b9ccad44fd@gmail.com \
--to=ltykernel@gmail.com \
--cc=Tianyu.Lan@microsoft.com \
--cc=andi.kleen@intel.com \
--cc=brijesh.singh@amd.com \
--cc=hch@infradead.org \
--cc=hch@lst.de \
--cc=iommu@lists.linux-foundation.org \
--cc=kirill.shutemov@intel.com \
--cc=konrad.wilk@oracle.com \
--cc=kys@microsoft.com \
--cc=linux-hyperv@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=michael.h.kelley@microsoft.com \
--cc=parri.andrea@gmail.com \
--cc=robin.murphy@arm.com \
--cc=thomas.lendacky@amd.com \
--cc=vkuznets@redhat.com \
--cc=wei.liu@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox