public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Robin Murphy <robin.murphy@arm.com>
To: Alexander Lobakin <aleksander.lobakin@intel.com>,
	"David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>,
	Marek Szyprowski <m.szyprowski@samsung.com>,
	Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	"Rafael J. Wysocki" <rafael@kernel.org>,
	Magnus Karlsson <magnus.karlsson@intel.com>,
	Maciej Fijalkowski <maciej.fijalkowski@intel.com>,
	Alexander Duyck <alexanderduyck@fb.com>,
	bpf@vger.kernel.org, netdev@vger.kernel.org,
	iommu@lists.linux.dev, linux-kernel@vger.kernel.org
Subject: Re: [PATCH net-next v3 1/7] dma: compile-out DMA sync op calls when not used
Date: Wed, 14 Feb 2024 17:20:50 +0000	[thread overview]
Message-ID: <893ad3a4-ba24-43cf-8200-b8cd7742622d@arm.com> (raw)
In-Reply-To: <20240214162201.4168778-2-aleksander.lobakin@intel.com>

On 2024-02-14 4:21 pm, Alexander Lobakin wrote:
> Some platforms do have DMA, but DMA there is always direct and coherent.
> Currently, even on such platforms DMA sync operations are compiled and
> called.
> Add a new hidden Kconfig symbol, DMA_NEED_SYNC, and set it only when
> either sync operations are needed or there is DMA ops or swiotlb
> enabled. Set dma_need_sync() and dma_skip_sync() depending on this
> symbol state and don't call sync ops when dma_skip_sync() is true.
> The change allows for future optimizations of DMA sync calls depending
> on compile-time or runtime conditions.
> 
> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
> ---
>   kernel/dma/Kconfig          |  4 ++
>   include/linux/dma-mapping.h | 80 +++++++++++++++++++++++++++++++------
>   kernel/dma/mapping.c        | 20 +++++-----
>   3 files changed, 81 insertions(+), 23 deletions(-)
> 
> diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
> index d62f5957f36b..1c9ff05b1ecb 100644
> --- a/kernel/dma/Kconfig
> +++ b/kernel/dma/Kconfig
> @@ -107,6 +107,10 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC
>   	bool
>   	depends on SWIOTLB
>   
> +config DMA_NEED_SYNC
> +	def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \
> +		 ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_OPS || SWIOTLB

I'm not sure DMA_OPS belongs here - several architectures have 
non-trivial ops without syncs, e.g. Alpha.

> +
>   config DMA_RESTRICTED_POOL
>   	bool "DMA Restricted Pool"
>   	depends on OF && OF_RESERVED_MEM && SWIOTLB
> diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
> index 4a658de44ee9..6c7640441214 100644
> --- a/include/linux/dma-mapping.h
> +++ b/include/linux/dma-mapping.h
> @@ -117,13 +117,13 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
>   		size_t size, enum dma_data_direction dir, unsigned long attrs);
>   void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
>   		enum dma_data_direction dir, unsigned long attrs);
> -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
> +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
>   		enum dma_data_direction dir);
> -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
> +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
>   		size_t size, enum dma_data_direction dir);
> -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
> +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
>   		    int nelems, enum dma_data_direction dir);
> -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
> +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
>   		       int nelems, enum dma_data_direction dir);
>   void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
>   		gfp_t flag, unsigned long attrs);
> @@ -147,7 +147,7 @@ u64 dma_get_required_mask(struct device *dev);
>   bool dma_addressing_limited(struct device *dev);
>   size_t dma_max_mapping_size(struct device *dev);
>   size_t dma_opt_mapping_size(struct device *dev);
> -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);
> +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr);
>   unsigned long dma_get_merge_boundary(struct device *dev);
>   struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size,
>   		enum dma_data_direction dir, gfp_t gfp, unsigned long attrs);
> @@ -195,19 +195,19 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr,
>   		size_t size, enum dma_data_direction dir, unsigned long attrs)
>   {
>   }
> -static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
> -		size_t size, enum dma_data_direction dir)
> +static inline void __dma_sync_single_for_cpu(struct device *dev,
> +		dma_addr_t addr, size_t size, enum dma_data_direction dir)

To me it would feel more logical to put all the wrappers inside the 
#ifdef CONFIG_HAS_DMA and not touch these stubs at all (what does it 
mean to skip an inline no-op?). Or in fact, if dma_skip_sync() is 
constant false for !HAS_DMA, then we could also just make the external 
function declarations unconditional and remove the stubs. Not a critical 
matter though, and I defer to whatever Christoph thinks is most 
maintainable.

>   {
>   }
> -static inline void dma_sync_single_for_device(struct device *dev,
> +static inline void __dma_sync_single_for_device(struct device *dev,
>   		dma_addr_t addr, size_t size, enum dma_data_direction dir)
>   {
>   }
> -static inline void dma_sync_sg_for_cpu(struct device *dev,
> +static inline void __dma_sync_sg_for_cpu(struct device *dev,
>   		struct scatterlist *sg, int nelems, enum dma_data_direction dir)
>   {
>   }
> -static inline void dma_sync_sg_for_device(struct device *dev,
> +static inline void __dma_sync_sg_for_device(struct device *dev,
>   		struct scatterlist *sg, int nelems, enum dma_data_direction dir)
>   {
>   }
> @@ -277,7 +277,7 @@ static inline size_t dma_opt_mapping_size(struct device *dev)
>   {
>   	return 0;
>   }
> -static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
> +static inline bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr)
>   {
>   	return false;
>   }
> @@ -348,18 +348,72 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
>   	return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
>   }
>   
> +static inline void __dma_sync_single_range_for_cpu(struct device *dev,
> +		dma_addr_t addr, unsigned long offset, size_t size,
> +		enum dma_data_direction dir)
> +{
> +	__dma_sync_single_for_cpu(dev, addr + offset, size, dir);
> +}
> +
> +static inline void __dma_sync_single_range_for_device(struct device *dev,
> +		dma_addr_t addr, unsigned long offset, size_t size,
> +		enum dma_data_direction dir)
> +{
> +	__dma_sync_single_for_device(dev, addr + offset, size, dir);
> +}

There is no need to introduce these two.

> +
> +static inline bool dma_skip_sync(const struct device *dev)
> +{
> +	return !IS_ENABLED(CONFIG_DMA_NEED_SYNC);
> +}
> +
> +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
> +{
> +	return !dma_skip_sync(dev) ? __dma_need_sync(dev, dma_addr) : false;
> +}

That's a bit of a mind-bender... is it actually just

	return !dma_skip_sync(dev) && __dma_need_sync(dev, dma_addr);

?

(I do still think the negative flag makes it all a little harder to 
follow in general than a positive "device needs to consider syncs" flag 
would.)

> +static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
> +		size_t size, enum dma_data_direction dir)
> +{
> +	if (!dma_skip_sync(dev))
> +		__dma_sync_single_for_cpu(dev, addr, size, dir);
> +}
> +
> +static inline void dma_sync_single_for_device(struct device *dev,
> +		dma_addr_t addr, size_t size, enum dma_data_direction dir)
> +{
> +	if (!dma_skip_sync(dev))
> +		__dma_sync_single_for_device(dev, addr, size, dir);
> +}
> +
> +static inline void dma_sync_sg_for_cpu(struct device *dev,
> +		struct scatterlist *sg, int nelems, enum dma_data_direction dir)
> +{
> +	if (!dma_skip_sync(dev))
> +		__dma_sync_sg_for_cpu(dev, sg, nelems, dir);
> +}
> +
> +static inline void dma_sync_sg_for_device(struct device *dev,
> +		struct scatterlist *sg, int nelems, enum dma_data_direction dir)
> +{
> +	if (!dma_skip_sync(dev))
> +		__dma_sync_sg_for_device(dev, sg, nelems, dir);
> +}
> +
>   static inline void dma_sync_single_range_for_cpu(struct device *dev,
>   		dma_addr_t addr, unsigned long offset, size_t size,
>   		enum dma_data_direction dir)
>   {
> -	return dma_sync_single_for_cpu(dev, addr + offset, size, dir);
> +	if (!dma_skip_sync(dev))
> +		__dma_sync_single_for_cpu(dev, addr + offset, size, dir);
>   }
>   
>   static inline void dma_sync_single_range_for_device(struct device *dev,
>   		dma_addr_t addr, unsigned long offset, size_t size,
>   		enum dma_data_direction dir)
>   {
> -	return dma_sync_single_for_device(dev, addr + offset, size, dir);
> +	if (!dma_skip_sync(dev))
> +		__dma_sync_single_for_device(dev, addr + offset, size, dir);
>   }

These two don't need changing either, since the dma_sync_single_* 
wrappers have already taken care of it.

Thanks,
Robin.

  reply	other threads:[~2024-02-14 17:20 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-14 16:21 [PATCH net-next v3 0/7] dma: skip calling no-op sync ops when possible Alexander Lobakin
2024-02-14 16:21 ` [PATCH net-next v3 1/7] dma: compile-out DMA sync op calls when not used Alexander Lobakin
2024-02-14 17:20   ` Robin Murphy [this message]
2024-02-15  5:06     ` Christoph Hellwig
2024-02-19 12:53     ` Alexander Lobakin
2024-02-26 16:27       ` Robin Murphy
2024-02-14 18:09   ` Robin Murphy
2024-02-15  5:06     ` Christoph Hellwig
2024-02-14 16:21 ` [PATCH net-next v3 2/7] dma: avoid redundant calls for sync operations Alexander Lobakin
2024-02-14 17:55   ` Robin Murphy
2024-02-15  5:08     ` Christoph Hellwig
2024-02-15 11:36       ` Robin Murphy
2024-02-19 12:49     ` Alexander Lobakin
2024-02-26 15:45       ` Robin Murphy
2024-02-14 16:21 ` [PATCH net-next v3 3/7] iommu/dma: avoid expensive indirect " Alexander Lobakin
2024-02-14 17:58   ` Robin Murphy
2024-02-14 16:21 ` [PATCH net-next v3 4/7] page_pool: make sure frag API fields don't span between cachelines Alexander Lobakin
2024-02-14 16:21 ` [PATCH net-next v3 5/7] page_pool: don't use driver-set flags field directly Alexander Lobakin
2024-02-14 16:22 ` [PATCH net-next v3 6/7] page_pool: check for DMA sync shortcut earlier Alexander Lobakin
2024-02-14 16:22 ` [PATCH net-next v3 7/7] xsk: use generic DMA sync shortcut instead of a custom one Alexander Lobakin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=893ad3a4-ba24-43cf-8200-b8cd7742622d@arm.com \
    --to=robin.murphy@arm.com \
    --cc=aleksander.lobakin@intel.com \
    --cc=alexanderduyck@fb.com \
    --cc=bpf@vger.kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hch@lst.de \
    --cc=iommu@lists.linux.dev \
    --cc=joro@8bytes.org \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=m.szyprowski@samsung.com \
    --cc=maciej.fijalkowski@intel.com \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=rafael@kernel.org \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox