All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/2] Fix examples/distributor build issue for non x86
@ 2015-12-06 15:24 Jerin Jacob
  2015-12-06 15:24 ` [PATCH 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Jerin Jacob @ 2015-12-06 15:24 UTC (permalink / raw)
  To: dev

Introduced rte_prefetch_non_temporal() to remove IA specific _mm_prefect(addr, 0)
gcc intrinsic and build examples/distributor for non 86 platform

Not sure the rte_prefetch_non_temporal mapping correct for
all the platforms. Architecture maintainers please check the mapping for
rte_prefetch_non_temporal() for specific architecures

Jerin Jacob (2):
  eal: introduce rte_prefetch_non_temporal
  examples/distributor: remove IA specific __mm_prefetch

 examples/distributor/main.c                              |  9 +++++----
 lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h |  5 +++++
 lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h |  5 +++++
 lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h |  5 +++++
 lib/librte_eal/common/include/arch/tile/rte_prefetch.h   |  5 +++++
 lib/librte_eal/common/include/arch/x86/rte_prefetch.h    |  5 +++++
 lib/librte_eal/common/include/generic/rte_prefetch.h     | 12 ++++++++++++
 7 files changed, 42 insertions(+), 4 deletions(-)

--
2.1.0

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/2] eal: introduce rte_prefetch_non_temporal
  2015-12-06 15:24 [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
@ 2015-12-06 15:24 ` Jerin Jacob
  2016-02-11 11:00   ` Thomas Monjalon
  2016-02-11 11:43   ` Jan Viktorin
  2015-12-06 15:24 ` [PATCH 2/2] examples/distributor: remove IA specific __mm_prefetch Jerin Jacob
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 12+ messages in thread
From: Jerin Jacob @ 2015-12-06 15:24 UTC (permalink / raw)
  To: dev

non-temporal/transient/stream version of rte_prefetch0()

The non-temporal prefetch is intended as a prefetch hint that processor
will use the prefetched data only once or short period,
unlike the rte_prefetch0() function which imply that
prefetched data to use repeatedly.

Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
 lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h |  5 +++++
 lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h |  5 +++++
 lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h |  5 +++++
 lib/librte_eal/common/include/arch/tile/rte_prefetch.h   |  5 +++++
 lib/librte_eal/common/include/arch/x86/rte_prefetch.h    |  5 +++++
 lib/librte_eal/common/include/generic/rte_prefetch.h     | 12 ++++++++++++
 6 files changed, 37 insertions(+)

diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
index b716384..3157224 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
@@ -54,6 +54,11 @@ static inline void rte_prefetch2(const volatile void *p)
 	asm volatile ("pld [%0]" : : "r" (p));
 }
 
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	rte_prefetch0(p);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
index f9cc62e..3ed46a4 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
@@ -54,6 +54,11 @@ static inline void rte_prefetch2(const volatile void *p)
 	asm volatile ("PRFM PLDL3KEEP, [%0]" : : "r" (p));
 }
 
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p));
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
index fea3be1..cab6fe0 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
@@ -54,6 +54,11 @@ static inline void rte_prefetch2(const volatile void *p)
 	asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
 }
 
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	rte_prefetch0(p);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
index c94075c..19d3c6e 100644
--- a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
@@ -54,6 +54,11 @@ static inline void rte_prefetch2(const volatile void *p)
 	__builtin_prefetch((const void *)(uintptr_t)p, 0, 1);
 }
 
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	rte_prefetch0(p);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
index 8e6e02c..5dac47e 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
@@ -55,6 +55,11 @@ static inline void rte_prefetch2(const volatile void *p)
 	asm volatile ("prefetcht2 %[p]" : : [p] "m" (*(const volatile char *)p));
 }
 
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/generic/rte_prefetch.h b/lib/librte_eal/common/include/generic/rte_prefetch.h
index 725715f..95c3fbc 100644
--- a/lib/librte_eal/common/include/generic/rte_prefetch.h
+++ b/lib/librte_eal/common/include/generic/rte_prefetch.h
@@ -68,4 +68,16 @@ static inline void rte_prefetch1(const volatile void *p);
  */
 static inline void rte_prefetch2(const volatile void *p);
 
+/**
+ * Prefetch a cache line into all cache levels(non-temporal/transient version)
+ *
+ * The non-temporal prefetch is intended as a prefetch hint that processor will
+ * use the prefetched data only once or short period, unlike the
+ * rte_prefetch0() function which imply that prefetched data to use repeatedly.
+ *
+ * @param p
+ *   Address to prefetch
+ */
+static inline void rte_prefetch_non_temporal(const volatile void *p);
+
 #endif /* _RTE_PREFETCH_H_ */
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/2] examples/distributor: remove IA specific __mm_prefetch
  2015-12-06 15:24 [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
  2015-12-06 15:24 ` [PATCH 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
@ 2015-12-06 15:24 ` Jerin Jacob
  2016-01-29  3:21 ` [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
  2016-02-12 11:13 ` [PATCH v2 " Jerin Jacob
  3 siblings, 0 replies; 12+ messages in thread
From: Jerin Jacob @ 2015-12-06 15:24 UTC (permalink / raw)
  To: dev

use rte_prefetch_non_temporal() abstraction instead of _mm_prefetch(x, 0)
to in-order to build distributor application for non x86 platforms

Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
 examples/distributor/main.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/examples/distributor/main.c b/examples/distributor/main.c
index 4e74f8f..c0201a9 100644
--- a/examples/distributor/main.c
+++ b/examples/distributor/main.c
@@ -42,6 +42,7 @@
 #include <rte_cycles.h>
 #include <rte_malloc.h>
 #include <rte_debug.h>
+#include <rte_prefetch.h>
 #include <rte_distributor.h>
 
 #define RX_RING_SIZE 256
@@ -335,13 +336,13 @@ lcore_tx(struct rte_ring *in_r)
 
 			/* for traffic we receive, queue it up for transmit */
 			uint16_t i;
-			_mm_prefetch((void *)bufs[0], 0);
-			_mm_prefetch((void *)bufs[1], 0);
-			_mm_prefetch((void *)bufs[2], 0);
+			rte_prefetch_non_temporal((void *)bufs[0]);
+			rte_prefetch_non_temporal((void *)bufs[1]);
+			rte_prefetch_non_temporal((void *)bufs[2]);
 			for (i = 0; i < nb_rx; i++) {
 				struct output_buffer *outbuf;
 				uint8_t outp;
-				_mm_prefetch((void *)bufs[i + 3], 0);
+				rte_prefetch_non_temporal((void *)bufs[i + 3]);
 				/*
 				 * workers should update in_port to hold the
 				 * output port value
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH 0/2] Fix examples/distributor build issue for non x86
  2015-12-06 15:24 [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
  2015-12-06 15:24 ` [PATCH 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
  2015-12-06 15:24 ` [PATCH 2/2] examples/distributor: remove IA specific __mm_prefetch Jerin Jacob
@ 2016-01-29  3:21 ` Jerin Jacob
  2016-01-29 15:03   ` Bruce Richardson
  2016-02-12 11:13 ` [PATCH v2 " Jerin Jacob
  3 siblings, 1 reply; 12+ messages in thread
From: Jerin Jacob @ 2016-01-29  3:21 UTC (permalink / raw)
  To: dev; +Cc: viktorin

On Sun, Dec 06, 2015 at 08:54:28PM +0530, Jerin Jacob wrote:
> Introduced rte_prefetch_non_temporal() to remove IA specific _mm_prefect(addr, 0)
> gcc intrinsic and build examples/distributor for non 86 platform

ping for review.

> 
> Not sure the rte_prefetch_non_temporal mapping correct for
> all the platforms. Architecture maintainers please check the mapping for
> rte_prefetch_non_temporal() for specific architecures
> 
> Jerin Jacob (2):
>   eal: introduce rte_prefetch_non_temporal
>   examples/distributor: remove IA specific __mm_prefetch
> 
>  examples/distributor/main.c                              |  9 +++++----
>  lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h |  5 +++++
>  lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h |  5 +++++
>  lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h |  5 +++++
>  lib/librte_eal/common/include/arch/tile/rte_prefetch.h   |  5 +++++
>  lib/librte_eal/common/include/arch/x86/rte_prefetch.h    |  5 +++++
>  lib/librte_eal/common/include/generic/rte_prefetch.h     | 12 ++++++++++++
>  7 files changed, 42 insertions(+), 4 deletions(-)
> 
> --
> 2.1.0
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH 0/2] Fix examples/distributor build issue for non x86
  2016-01-29  3:21 ` [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
@ 2016-01-29 15:03   ` Bruce Richardson
  2016-01-29 16:53     ` Jerin Jacob
  0 siblings, 1 reply; 12+ messages in thread
From: Bruce Richardson @ 2016-01-29 15:03 UTC (permalink / raw)
  To: Jerin Jacob; +Cc: dev, viktorin

On Fri, Jan 29, 2016 at 08:51:41AM +0530, Jerin Jacob wrote:
> On Sun, Dec 06, 2015 at 08:54:28PM +0530, Jerin Jacob wrote:
> > Introduced rte_prefetch_non_temporal() to remove IA specific _mm_prefect(addr, 0)
> > gcc intrinsic and build examples/distributor for non 86 platform
> 
> ping for review.
> 

Is there much performance difference between making this a prefetch NT vs making
it an rte_prefetch0 on the platforms you have tested?

/Bruce

> > 
> > Not sure the rte_prefetch_non_temporal mapping correct for
> > all the platforms. Architecture maintainers please check the mapping for
> > rte_prefetch_non_temporal() for specific architecures
> > 
> > Jerin Jacob (2):
> >   eal: introduce rte_prefetch_non_temporal
> >   examples/distributor: remove IA specific __mm_prefetch
> > 
> >  examples/distributor/main.c                              |  9 +++++----
> >  lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h |  5 +++++
> >  lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h |  5 +++++
> >  lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h |  5 +++++
> >  lib/librte_eal/common/include/arch/tile/rte_prefetch.h   |  5 +++++
> >  lib/librte_eal/common/include/arch/x86/rte_prefetch.h    |  5 +++++
> >  lib/librte_eal/common/include/generic/rte_prefetch.h     | 12 ++++++++++++
> >  7 files changed, 42 insertions(+), 4 deletions(-)
> > 
> > --
> > 2.1.0
> > 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH 0/2] Fix examples/distributor build issue for non x86
  2016-01-29 15:03   ` Bruce Richardson
@ 2016-01-29 16:53     ` Jerin Jacob
  0 siblings, 0 replies; 12+ messages in thread
From: Jerin Jacob @ 2016-01-29 16:53 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: dev, viktorin

On Fri, Jan 29, 2016 at 08:03:37AM -0700, Bruce Richardson wrote:
> On Fri, Jan 29, 2016 at 08:51:41AM +0530, Jerin Jacob wrote:
> > On Sun, Dec 06, 2015 at 08:54:28PM +0530, Jerin Jacob wrote:
> > > Introduced rte_prefetch_non_temporal() to remove IA specific _mm_prefect(addr, 0)
> > > gcc intrinsic and build examples/distributor for non 86 platform
> > 
> > ping for review.
> > 
> 
> Is there much performance difference between making this a prefetch NT vs making
> it an rte_prefetch0 on the platforms you have tested?

Not much difference. But, I think its worth keep the abstraction as
IA and arm64 architecture supports it.

Jerin

> 
> /Bruce
> 
> > > 
> > > Not sure the rte_prefetch_non_temporal mapping correct for
> > > all the platforms. Architecture maintainers please check the mapping for
> > > rte_prefetch_non_temporal() for specific architecures
> > > 
> > > Jerin Jacob (2):
> > >   eal: introduce rte_prefetch_non_temporal
> > >   examples/distributor: remove IA specific __mm_prefetch
> > > 
> > >  examples/distributor/main.c                              |  9 +++++----
> > >  lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h |  5 +++++
> > >  lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h |  5 +++++
> > >  lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h |  5 +++++
> > >  lib/librte_eal/common/include/arch/tile/rte_prefetch.h   |  5 +++++
> > >  lib/librte_eal/common/include/arch/x86/rte_prefetch.h    |  5 +++++
> > >  lib/librte_eal/common/include/generic/rte_prefetch.h     | 12 ++++++++++++
> > >  7 files changed, 42 insertions(+), 4 deletions(-)
> > > 
> > > --
> > > 2.1.0
> > > 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] eal: introduce rte_prefetch_non_temporal
  2015-12-06 15:24 ` [PATCH 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
@ 2016-02-11 11:00   ` Thomas Monjalon
  2016-02-11 11:43   ` Jan Viktorin
  1 sibling, 0 replies; 12+ messages in thread
From: Thomas Monjalon @ 2016-02-11 11:00 UTC (permalink / raw)
  To: Jerin Jacob; +Cc: dev, viktorin

Please arch maintainers, your ack would be appreciated.

2015-12-06 20:54, Jerin Jacob:
> +static inline void rte_prefetch_non_temporal(const volatile void *p)
> +{
> +	rte_prefetch0(p);
> +}

A comment about using the same instruction for temporal and non-temporal?

> +/**
> + * Prefetch a cache line into all cache levels(non-temporal/transient version)

space missing before paren

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] eal: introduce rte_prefetch_non_temporal
  2015-12-06 15:24 ` [PATCH 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
  2016-02-11 11:00   ` Thomas Monjalon
@ 2016-02-11 11:43   ` Jan Viktorin
  1 sibling, 0 replies; 12+ messages in thread
From: Jan Viktorin @ 2016-02-11 11:43 UTC (permalink / raw)
  To: Jerin Jacob; +Cc: dev

I am OK with this patch.

On Sun, 6 Dec 2015 20:54:29 +0530
Jerin Jacob <jerin.jacob@caviumnetworks.com> wrote:

> non-temporal/transient/stream version of rte_prefetch0()
> 
> The non-temporal prefetch is intended as a prefetch hint that processor
> will use the prefetched data only once or short period,
> unlike the rte_prefetch0() function which imply that
> prefetched data to use repeatedly.
> 
> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
Acked-by: Jan Viktorin <viktorin@rehivetech.com>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2 0/2] Fix examples/distributor build issue for non x86
  2015-12-06 15:24 [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
                   ` (2 preceding siblings ...)
  2016-01-29  3:21 ` [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
@ 2016-02-12 11:13 ` Jerin Jacob
  2016-02-12 11:13   ` [PATCH v2 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
                     ` (2 more replies)
  3 siblings, 3 replies; 12+ messages in thread
From: Jerin Jacob @ 2016-02-12 11:13 UTC (permalink / raw)
  To: dev; +Cc: viktorin

Introduced rte_prefetch_non_temporal() to remove IA specific _mm_prefect(addr, 0)
gcc intrinsic and build examples/distributor for non 86 platform

Not sure the rte_prefetch_non_temporal mapping correct for
all the platforms. Architecture maintainers please check the mapping for
rte_prefetch_non_temporal() for specific architecures

v1..v2

Addessed Thomas's review comments[1]
[1] http://dpdk.org/dev/patchwork/patch/9369/

Jerin Jacob (2):
  eal: introduce rte_prefetch_non_temporal
  examples/distributor: remove IA specific __mm_prefetch

 examples/distributor/main.c                              |  9 +++++----
 lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h |  6 ++++++
 lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h |  5 +++++
 lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h |  6 ++++++
 lib/librte_eal/common/include/arch/tile/rte_prefetch.h   |  6 ++++++
 lib/librte_eal/common/include/arch/x86/rte_prefetch.h    |  5 +++++
 lib/librte_eal/common/include/generic/rte_prefetch.h     | 12 ++++++++++++
 7 files changed, 45 insertions(+), 4 deletions(-)

-- 
2.1.0

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2 1/2] eal: introduce rte_prefetch_non_temporal
  2016-02-12 11:13 ` [PATCH v2 " Jerin Jacob
@ 2016-02-12 11:13   ` Jerin Jacob
  2016-02-12 11:13   ` [PATCH v2 2/2] examples/distributor: remove IA specific __mm_prefetch Jerin Jacob
  2016-02-16  6:28   ` [PATCH v2 0/2] Fix examples/distributor build issue for non x86 Thomas Monjalon
  2 siblings, 0 replies; 12+ messages in thread
From: Jerin Jacob @ 2016-02-12 11:13 UTC (permalink / raw)
  To: dev; +Cc: viktorin

non-temporal/transient/stream version of rte_prefetch0()

The non-temporal prefetch is intended as a prefetch hint that processor
will use the prefetched data only once or short period,
unlike the rte_prefetch0() function which imply that
prefetched data to use repeatedly.

Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
Acked-by: Jan Viktorin <viktorin@rehivetech.com>
---
 lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h |  6 ++++++
 lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h |  5 +++++
 lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h |  6 ++++++
 lib/librte_eal/common/include/arch/tile/rte_prefetch.h   |  6 ++++++
 lib/librte_eal/common/include/arch/x86/rte_prefetch.h    |  5 +++++
 lib/librte_eal/common/include/generic/rte_prefetch.h     | 12 ++++++++++++
 6 files changed, 40 insertions(+)

diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
index b716384..5aeed22 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
@@ -54,6 +54,12 @@ static inline void rte_prefetch2(const volatile void *p)
 	asm volatile ("pld [%0]" : : "r" (p));
 }
 
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	/* non-temporal version not available, fallback to rte_prefetch0 */
+	rte_prefetch0(p);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
index f9cc62e..3ed46a4 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h
@@ -54,6 +54,11 @@ static inline void rte_prefetch2(const volatile void *p)
 	asm volatile ("PRFM PLDL3KEEP, [%0]" : : "r" (p));
 }
 
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p));
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
index fea3be1..bcc7185 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h
@@ -54,6 +54,12 @@ static inline void rte_prefetch2(const volatile void *p)
 	asm volatile ("dcbt 0,%[p],1" : : [p] "r" (p));
 }
 
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	/* non-temporal version not available, fallback to rte_prefetch0 */
+	rte_prefetch0(p);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
index c94075c..7a1bb93 100644
--- a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h
@@ -54,6 +54,12 @@ static inline void rte_prefetch2(const volatile void *p)
 	__builtin_prefetch((const void *)(uintptr_t)p, 0, 1);
 }
 
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	/* non-temporal version not available, fallback to rte_prefetch0 */
+	rte_prefetch0(p);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
index 8e6e02c..5dac47e 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h
@@ -55,6 +55,11 @@ static inline void rte_prefetch2(const volatile void *p)
 	asm volatile ("prefetcht2 %[p]" : : [p] "m" (*(const volatile char *)p));
 }
 
+static inline void rte_prefetch_non_temporal(const volatile void *p)
+{
+	asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char *)p));
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/generic/rte_prefetch.h b/lib/librte_eal/common/include/generic/rte_prefetch.h
index 725715f..07e409e 100644
--- a/lib/librte_eal/common/include/generic/rte_prefetch.h
+++ b/lib/librte_eal/common/include/generic/rte_prefetch.h
@@ -68,4 +68,16 @@ static inline void rte_prefetch1(const volatile void *p);
  */
 static inline void rte_prefetch2(const volatile void *p);
 
+/**
+ * Prefetch a cache line into all cache levels (non-temporal/transient version)
+ *
+ * The non-temporal prefetch is intended as a prefetch hint that processor will
+ * use the prefetched data only once or short period, unlike the
+ * rte_prefetch0() function which imply that prefetched data to use repeatedly.
+ *
+ * @param p
+ *   Address to prefetch
+ */
+static inline void rte_prefetch_non_temporal(const volatile void *p);
+
 #endif /* _RTE_PREFETCH_H_ */
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v2 2/2] examples/distributor: remove IA specific __mm_prefetch
  2016-02-12 11:13 ` [PATCH v2 " Jerin Jacob
  2016-02-12 11:13   ` [PATCH v2 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
@ 2016-02-12 11:13   ` Jerin Jacob
  2016-02-16  6:28   ` [PATCH v2 0/2] Fix examples/distributor build issue for non x86 Thomas Monjalon
  2 siblings, 0 replies; 12+ messages in thread
From: Jerin Jacob @ 2016-02-12 11:13 UTC (permalink / raw)
  To: dev; +Cc: viktorin

use rte_prefetch_non_temporal() abstraction instead of _mm_prefetch(x, 0)
to in-order to build distributor application for non x86 platforms

Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
 examples/distributor/main.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/examples/distributor/main.c b/examples/distributor/main.c
index 4e74f8f..c0201a9 100644
--- a/examples/distributor/main.c
+++ b/examples/distributor/main.c
@@ -42,6 +42,7 @@
 #include <rte_cycles.h>
 #include <rte_malloc.h>
 #include <rte_debug.h>
+#include <rte_prefetch.h>
 #include <rte_distributor.h>
 
 #define RX_RING_SIZE 256
@@ -335,13 +336,13 @@ lcore_tx(struct rte_ring *in_r)
 
 			/* for traffic we receive, queue it up for transmit */
 			uint16_t i;
-			_mm_prefetch((void *)bufs[0], 0);
-			_mm_prefetch((void *)bufs[1], 0);
-			_mm_prefetch((void *)bufs[2], 0);
+			rte_prefetch_non_temporal((void *)bufs[0]);
+			rte_prefetch_non_temporal((void *)bufs[1]);
+			rte_prefetch_non_temporal((void *)bufs[2]);
 			for (i = 0; i < nb_rx; i++) {
 				struct output_buffer *outbuf;
 				uint8_t outp;
-				_mm_prefetch((void *)bufs[i + 3], 0);
+				rte_prefetch_non_temporal((void *)bufs[i + 3]);
 				/*
 				 * workers should update in_port to hold the
 				 * output port value
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2 0/2] Fix examples/distributor build issue for non x86
  2016-02-12 11:13 ` [PATCH v2 " Jerin Jacob
  2016-02-12 11:13   ` [PATCH v2 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
  2016-02-12 11:13   ` [PATCH v2 2/2] examples/distributor: remove IA specific __mm_prefetch Jerin Jacob
@ 2016-02-16  6:28   ` Thomas Monjalon
  2 siblings, 0 replies; 12+ messages in thread
From: Thomas Monjalon @ 2016-02-16  6:28 UTC (permalink / raw)
  To: Jerin Jacob; +Cc: dev, viktorin

2016-02-12 16:43, Jerin Jacob:
> Introduced rte_prefetch_non_temporal() to remove IA specific _mm_prefect(addr, 0)
> gcc intrinsic and build examples/distributor for non 86 platform
> 
> Not sure the rte_prefetch_non_temporal mapping correct for
> all the platforms. Architecture maintainers please check the mapping for
> rte_prefetch_non_temporal() for specific architecures
> 
> v1..v2
> 
> Addessed Thomas's review comments[1]
> [1] http://dpdk.org/dev/patchwork/patch/9369/
> 
> Jerin Jacob (2):
>   eal: introduce rte_prefetch_non_temporal
>   examples/distributor: remove IA specific __mm_prefetch

Applied, thanks

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-02-16  6:30 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-12-06 15:24 [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
2015-12-06 15:24 ` [PATCH 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
2016-02-11 11:00   ` Thomas Monjalon
2016-02-11 11:43   ` Jan Viktorin
2015-12-06 15:24 ` [PATCH 2/2] examples/distributor: remove IA specific __mm_prefetch Jerin Jacob
2016-01-29  3:21 ` [RFC PATCH 0/2] Fix examples/distributor build issue for non x86 Jerin Jacob
2016-01-29 15:03   ` Bruce Richardson
2016-01-29 16:53     ` Jerin Jacob
2016-02-12 11:13 ` [PATCH v2 " Jerin Jacob
2016-02-12 11:13   ` [PATCH v2 1/2] eal: introduce rte_prefetch_non_temporal Jerin Jacob
2016-02-12 11:13   ` [PATCH v2 2/2] examples/distributor: remove IA specific __mm_prefetch Jerin Jacob
2016-02-16  6:28   ` [PATCH v2 0/2] Fix examples/distributor build issue for non x86 Thomas Monjalon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.