* [PATCH v2 16/29] net/bnxt: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, Stephen Hurd, Ajit Khaparde,
Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
From: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal abstraction
for I/O device memory read/write access to fix portability issues across
different architectures.
CC: Stephen Hurd <stephen.hurd@broadcom.com>
CC: Ajit Khaparde <ajit.khaparde@broadcom.com>
Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/bnxt/bnxt_cpr.h | 13 ++++++++-----
drivers/net/bnxt/bnxt_hwrm.c | 7 +++++--
drivers/net/bnxt/bnxt_txr.h | 6 +++---
3 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index f9f2adb..83e5376 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -34,6 +34,8 @@
#ifndef _BNXT_CPR_H_
#define _BNXT_CPR_H_
+#include <rte_io.h>
+
#define CMP_VALID(cmp, raw_cons, ring) \
(!!(((struct cmpl_base *)(cmp))->info3_v & CMPL_BASE_V) == \
!((raw_cons) & ((ring)->ring_size)))
@@ -50,13 +52,14 @@
#define DB_CP_FLAGS (DB_KEY_CP | DB_IDX_VALID | DB_IRQ_DIS)
#define B_CP_DB_REARM(cpr, raw_cons) \
- (*(uint32_t *)((cpr)->cp_doorbell) = (DB_CP_REARM_FLAGS | \
- RING_CMP(cpr->cp_ring_struct, raw_cons)))
+ rte_write32((DB_CP_REARM_FLAGS | \
+ RING_CMP(((cpr)->cp_ring_struct), raw_cons)), \
+ ((cpr)->cp_doorbell))
#define B_CP_DIS_DB(cpr, raw_cons) \
- rte_smp_wmb(); \
- (*(uint32_t *)((cpr)->cp_doorbell) = (DB_CP_FLAGS | \
- RING_CMP(cpr->cp_ring_struct, raw_cons)))
+ rte_write32((DB_CP_FLAGS | \
+ RING_CMP(((cpr)->cp_ring_struct), raw_cons)), \
+ ((cpr)->cp_doorbell))
struct bnxt_ring;
struct bnxt_cp_ring_info {
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 07e7124..c182152 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -50,6 +50,8 @@
#include "bnxt_vnic.h"
#include "hsi_struct_def_dpdk.h"
+#include <rte_io.h>
+
#define HWRM_CMD_TIMEOUT 2000
/*
@@ -72,7 +74,7 @@ static int bnxt_hwrm_send_message_locked(struct bnxt *bp, void *msg,
/* Write request msg to hwrm channel */
for (i = 0; i < msg_len; i += 4) {
bar = (uint8_t *)bp->bar0 + i;
- *(volatile uint32_t *)bar = *data;
+ rte_write32(*data, bar);
data++;
}
@@ -80,11 +82,12 @@ static int bnxt_hwrm_send_message_locked(struct bnxt *bp, void *msg,
for (; i < bp->max_req_len; i += 4) {
bar = (uint8_t *)bp->bar0 + i;
*(volatile uint32_t *)bar = 0;
+ rte_write32(0, bar);
}
/* Ring channel doorbell */
bar = (uint8_t *)bp->bar0 + 0x100;
- *(volatile uint32_t *)bar = 1;
+ rte_write32(1, bar);
/* Poll for the valid bit */
for (i = 0; i < HWRM_CMD_TIMEOUT; i++) {
diff --git a/drivers/net/bnxt/bnxt_txr.h b/drivers/net/bnxt/bnxt_txr.h
index 4c16101..5b09711 100644
--- a/drivers/net/bnxt/bnxt_txr.h
+++ b/drivers/net/bnxt/bnxt_txr.h
@@ -34,12 +34,12 @@
#ifndef _BNXT_TXR_H_
#define _BNXT_TXR_H_
+#include <rte_io.h>
+
#define MAX_TX_RINGS 16
#define BNXT_TX_PUSH_THRESH 92
-#define B_TX_DB(db, prod) \
- rte_smp_wmb(); \
- (*(uint32_t *)db = (DB_KEY_TX | prod))
+#define B_TX_DB(db, prod) rte_write32((DB_KEY_TX | (prod)), db)
struct bnxt_tx_ring_info {
uint16_t tx_prod;
--
2.5.5
^ permalink raw reply related
* [PATCH v2 17/29] net/bnx2x: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, Harish Patil, Rasesh Mody, Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
From: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal abstraction
for I/O device memory read/write access to fix portability issues across
different architectures.
CC: Harish Patil <harish.patil@cavium.com>
CC: Rasesh Mody <rasesh.mody@cavium.com>
Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/bnx2x/bnx2x.h | 26 ++++++++++----------------
1 file changed, 10 insertions(+), 16 deletions(-)
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 5cefea4..59064d8 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -18,6 +18,7 @@
#include <rte_byteorder.h>
#include <rte_spinlock.h>
+#include <rte_io.h>
#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
#ifndef __LITTLE_ENDIAN
@@ -1419,8 +1420,7 @@ bnx2x_reg_write8(struct bnx2x_softc *sc, size_t offset, uint8_t val)
{
PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x",
(unsigned long)offset, val);
- *((volatile uint8_t*)
- ((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val;
+ rte_write8(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset));
}
static inline void
@@ -1433,8 +1433,8 @@ bnx2x_reg_write16(struct bnx2x_softc *sc, size_t offset, uint16_t val)
#endif
PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%04x",
(unsigned long)offset, val);
- *((volatile uint16_t*)
- ((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val;
+ rte_write16(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset));
+
}
static inline void
@@ -1448,8 +1448,7 @@ bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val)
PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x",
(unsigned long)offset, val);
- *((volatile uint32_t*)
- ((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val;
+ rte_write32(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset));
}
static inline uint8_t
@@ -1457,8 +1456,7 @@ bnx2x_reg_read8(struct bnx2x_softc *sc, size_t offset)
{
uint8_t val;
- val = (uint8_t)(*((volatile uint8_t*)
- ((uintptr_t)sc->bar[BAR0].base_addr + offset)));
+ val = rte_read8((uint8_t *)sc->bar[BAR0].base_addr + offset);
PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x",
(unsigned long)offset, val);
@@ -1476,8 +1474,7 @@ bnx2x_reg_read16(struct bnx2x_softc *sc, size_t offset)
(unsigned long)offset);
#endif
- val = (uint16_t)(*((volatile uint16_t*)
- ((uintptr_t)sc->bar[BAR0].base_addr + offset)));
+ val = rte_read16(((uint8_t *)sc->bar[BAR0].base_addr + offset));
PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x",
(unsigned long)offset, val);
@@ -1495,8 +1492,7 @@ bnx2x_reg_read32(struct bnx2x_softc *sc, size_t offset)
(unsigned long)offset);
#endif
- val = (uint32_t)(*((volatile uint32_t*)
- ((uintptr_t)sc->bar[BAR0].base_addr + offset)));
+ val = rte_read32(((uint8_t *)sc->bar[BAR0].base_addr + offset));
PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x",
(unsigned long)offset, val);
@@ -1560,11 +1556,9 @@ bnx2x_reg_read32(struct bnx2x_softc *sc, size_t offset)
#define DPM_TRIGGER_TYPE 0x40
/* Doorbell macro */
-#define BNX2X_DB_WRITE(db_bar, val) \
- *((volatile uint32_t *)(db_bar)) = (val)
+#define BNX2X_DB_WRITE(db_bar, val) rte_write32_relaxed((val), (db_bar))
-#define BNX2X_DB_READ(db_bar) \
- *((volatile uint32_t *)(db_bar))
+#define BNX2X_DB_READ(db_bar) rte_read32_relaxed(db_bar)
#define DOORBELL_ADDR(sc, offset) \
(volatile uint32_t *)(((char *)(sc)->bar[BAR1].base_addr + (offset)))
--
2.5.5
^ permalink raw reply related
* [PATCH v2 18/29] net/cxgbe: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, Rahul Lakkireddy, Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
From: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal
abstraction for I/O device memory read/write access to fix
portability issues across different architectures.
CC: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/cxgbe/base/adapter.h | 34 ++++++++++++++++++++++++++++------
drivers/net/cxgbe/cxgbe_compat.h | 8 +++++++-
drivers/net/cxgbe/sge.c | 10 +++++-----
3 files changed, 40 insertions(+), 12 deletions(-)
diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h
index 5e3bd50..beb1e3e 100644
--- a/drivers/net/cxgbe/base/adapter.h
+++ b/drivers/net/cxgbe/base/adapter.h
@@ -37,6 +37,7 @@
#define __T4_ADAPTER_H__
#include <rte_mbuf.h>
+#include <rte_io.h>
#include "cxgbe_compat.h"
#include "t4_regs_values.h"
@@ -324,7 +325,7 @@ struct adapter {
int use_unpacked_mode; /* unpacked rx mode state */
};
-#define CXGBE_PCI_REG(reg) (*((volatile uint32_t *)(reg)))
+#define CXGBE_PCI_REG(reg) rte_read32(reg)
static inline uint64_t cxgbe_read_addr64(volatile void *addr)
{
@@ -350,16 +351,21 @@ static inline uint32_t cxgbe_read_addr(volatile void *addr)
#define CXGBE_READ_REG64(adap, reg) \
cxgbe_read_addr64(CXGBE_PCI_REG_ADDR((adap), (reg)))
-#define CXGBE_PCI_REG_WRITE(reg, value) ({ \
- CXGBE_PCI_REG((reg)) = (value); })
+#define CXGBE_PCI_REG_WRITE(reg, value) rte_write32((value), (reg))
+
+#define CXGBE_PCI_REG_WRITE_RELAXED(reg, value) \
+ rte_write32_relaxed((value), (reg))
#define CXGBE_WRITE_REG(adap, reg, value) \
CXGBE_PCI_REG_WRITE(CXGBE_PCI_REG_ADDR((adap), (reg)), (value))
+#define CXGBE_WRITE_REG_RELAXED(adap, reg, value) \
+ CXGBE_PCI_REG_WRITE_RELAXED(CXGBE_PCI_REG_ADDR((adap), (reg)), (value))
+
static inline uint64_t cxgbe_write_addr64(volatile void *addr, uint64_t val)
{
- CXGBE_PCI_REG(addr) = val;
- CXGBE_PCI_REG(((volatile uint8_t *)(addr) + 4)) = (val >> 32);
+ CXGBE_PCI_REG_WRITE(addr, val);
+ CXGBE_PCI_REG_WRITE(((volatile uint8_t *)(addr) + 4), (val >> 32));
return val;
}
@@ -383,7 +389,7 @@ static inline u32 t4_read_reg(struct adapter *adapter, u32 reg_addr)
}
/**
- * t4_write_reg - write a HW register
+ * t4_write_reg - write a HW register with barrier
* @adapter: the adapter
* @reg_addr: the register address
* @val: the value to write
@@ -398,6 +404,22 @@ static inline void t4_write_reg(struct adapter *adapter, u32 reg_addr, u32 val)
}
/**
+ * t4_write_reg_relaxed - write a HW register with no barrier
+ * @adapter: the adapter
+ * @reg_addr: the register address
+ * @val: the value to write
+ *
+ * Write a 32-bit value into the given HW register.
+ */
+static inline void t4_write_reg_relaxed(struct adapter *adapter, u32 reg_addr,
+ u32 val)
+{
+ CXGBE_DEBUG_REG(adapter, "setting register 0x%x to 0x%x\n", reg_addr,
+ val);
+ CXGBE_WRITE_REG_RELAXED(adapter, reg_addr, val);
+}
+
+/**
* t4_read_reg64 - read a 64-bit HW register
* @adapter: the adapter
* @reg_addr: the register address
diff --git a/drivers/net/cxgbe/cxgbe_compat.h b/drivers/net/cxgbe/cxgbe_compat.h
index e68f8f5..1551cbf 100644
--- a/drivers/net/cxgbe/cxgbe_compat.h
+++ b/drivers/net/cxgbe/cxgbe_compat.h
@@ -45,6 +45,7 @@
#include <rte_cycles.h>
#include <rte_spinlock.h>
#include <rte_log.h>
+#include <rte_io.h>
#define dev_printf(level, fmt, args...) \
RTE_LOG(level, PMD, "rte_cxgbe_pmd: " fmt, ## args)
@@ -254,7 +255,7 @@ static inline unsigned long ilog2(unsigned long n)
static inline void writel(unsigned int val, volatile void __iomem *addr)
{
- *(volatile unsigned int *)addr = val;
+ rte_write32(val, addr);
}
static inline void writeq(u64 val, volatile void __iomem *addr)
@@ -263,4 +264,9 @@ static inline void writeq(u64 val, volatile void __iomem *addr)
writel(val >> 32, (void *)((uintptr_t)addr + 4));
}
+static inline void writel_relaxed(unsigned int val, volatile void __iomem *addr)
+{
+ rte_write32_relaxed(val, addr);
+}
+
#endif /* _CXGBE_COMPAT_H_ */
diff --git a/drivers/net/cxgbe/sge.c b/drivers/net/cxgbe/sge.c
index 736f08c..fc03a0c 100644
--- a/drivers/net/cxgbe/sge.c
+++ b/drivers/net/cxgbe/sge.c
@@ -338,12 +338,12 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
* mechanism.
*/
if (unlikely(!q->bar2_addr)) {
- t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
- val | V_QID(q->cntxt_id));
+ t4_write_reg_relaxed(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
+ val | V_QID(q->cntxt_id));
} else {
- writel(val | V_QID(q->bar2_qid),
- (void *)((uintptr_t)q->bar2_addr +
- SGE_UDB_KDOORBELL));
+ writel_relaxed(val | V_QID(q->bar2_qid),
+ (void *)((uintptr_t)q->bar2_addr +
+ SGE_UDB_KDOORBELL));
/*
* This Write memory Barrier will force the write to
--
2.5.5
^ permalink raw reply related
* [PATCH v2 19/29] net/e1000: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, Wenzhuo Lu, Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
From: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal
abstraction for I/O device memory read/write access to fix
portability issues across different architectures.
CC: Wenzhuo Lu <wenzhuo.lu@intel.com>
Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/e1000/base/e1000_osdep.h | 18 ++++++++++--------
drivers/net/e1000/em_rxtx.c | 2 +-
drivers/net/e1000/igb_rxtx.c | 2 +-
3 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/drivers/net/e1000/base/e1000_osdep.h b/drivers/net/e1000/base/e1000_osdep.h
index 47a1948..b886804 100644
--- a/drivers/net/e1000/base/e1000_osdep.h
+++ b/drivers/net/e1000/base/e1000_osdep.h
@@ -44,6 +44,7 @@
#include <rte_log.h>
#include <rte_debug.h>
#include <rte_byteorder.h>
+#include <rte_io.h>
#include "../e1000_logs.h"
@@ -94,17 +95,18 @@ typedef int bool;
#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS)
-#define E1000_PCI_REG(reg) (*((volatile uint32_t *)(reg)))
+#define E1000_PCI_REG(reg) rte_read32(reg)
-#define E1000_PCI_REG16(reg) (*((volatile uint16_t *)(reg)))
+#define E1000_PCI_REG16(reg) rte_read16(reg)
-#define E1000_PCI_REG_WRITE(reg, value) do { \
- E1000_PCI_REG((reg)) = (rte_cpu_to_le_32(value)); \
-} while (0)
+#define E1000_PCI_REG_WRITE(reg, value) \
+ rte_write32((rte_cpu_to_le_32(value)), reg)
-#define E1000_PCI_REG_WRITE16(reg, value) do { \
- E1000_PCI_REG16((reg)) = (rte_cpu_to_le_16(value)); \
-} while (0)
+#define E1000_PCI_REG_WRITE_RELAXED(reg, value) \
+ rte_write32_relaxed((rte_cpu_to_le_32(value)), reg)
+
+#define E1000_PCI_REG_WRITE16(reg, value) \
+ rte_write16((rte_cpu_to_le_16(value)), reg)
#define E1000_PCI_REG_ADDR(hw, reg) \
((volatile uint32_t *)((char *)(hw)->hw_addr + (reg)))
diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c
index 41f51c0..6ec38d4 100644
--- a/drivers/net/e1000/em_rxtx.c
+++ b/drivers/net/e1000/em_rxtx.c
@@ -610,7 +610,7 @@ eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
(unsigned) txq->port_id, (unsigned) txq->queue_id,
(unsigned) tx_id, (unsigned) nb_tx);
- E1000_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
+ E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
txq->tx_tail = tx_id;
return nb_tx;
diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
index dbd37ac..61edbfb 100644
--- a/drivers/net/e1000/igb_rxtx.c
+++ b/drivers/net/e1000/igb_rxtx.c
@@ -605,7 +605,7 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
/*
* Set the Transmit Descriptor Tail (TDT).
*/
- E1000_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
+ E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
(unsigned) txq->port_id, (unsigned) txq->queue_id,
(unsigned) tx_id, (unsigned) nb_tx);
--
2.5.5
^ permalink raw reply related
* [PATCH v2 20/29] net/ena: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, Jan Medala, Jakub Palider, Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
From: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal
abstraction for I/O device memory read/write access to fix
portability issues across different architectures.
CC: Jan Medala <jan@semihalf.com>
CC: Jakub Palider <jpa@semihalf.com>
Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
Acked-by: Jan Medala <jan@semihalf.com>
---
drivers/net/ena/base/ena_eth_com.h | 2 +-
drivers/net/ena/base/ena_plat_dpdk.h | 11 +++++++++--
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ena/base/ena_eth_com.h b/drivers/net/ena/base/ena_eth_com.h
index 71a880c..ee62685 100644
--- a/drivers/net/ena/base/ena_eth_com.h
+++ b/drivers/net/ena/base/ena_eth_com.h
@@ -118,7 +118,7 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
ena_trc_dbg("write submission queue doorbell for queue: %d tail: %d\n",
io_sq->qid, tail);
- ENA_REG_WRITE32(tail, io_sq->db_addr);
+ ENA_REG_WRITE32_RELAXED(tail, io_sq->db_addr);
return 0;
}
diff --git a/drivers/net/ena/base/ena_plat_dpdk.h b/drivers/net/ena/base/ena_plat_dpdk.h
index 87c3bf1..09d540a 100644
--- a/drivers/net/ena/base/ena_plat_dpdk.h
+++ b/drivers/net/ena/base/ena_plat_dpdk.h
@@ -48,6 +48,7 @@
#include <rte_malloc.h>
#include <rte_memzone.h>
#include <rte_spinlock.h>
+#include <rte_io.h>
#include <sys/time.h>
@@ -226,15 +227,21 @@ typedef uint64_t dma_addr_t;
static inline void writel(u32 value, volatile void *addr)
{
- *(volatile u32 *)addr = value;
+ rte_write32(value, addr);
+}
+
+static inline void writel_relaxed(u32 value, volatile void *addr)
+{
+ rte_write32_relaxed(value, addr);
}
static inline u32 readl(const volatile void *addr)
{
- return *(const volatile u32 *)addr;
+ return rte_read32(addr);
}
#define ENA_REG_WRITE32(value, reg) writel((value), (reg))
+#define ENA_REG_WRITE32_RELAXED(value, reg) writel_relaxed((value), (reg))
#define ENA_REG_READ32(reg) readl((reg))
#define ATOMIC32_INC(i32_ptr) rte_atomic32_inc(i32_ptr)
--
2.5.5
^ permalink raw reply related
* [PATCH v2 21/29] net/enic: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, John Daley, Nelson Escobar, Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
From: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal
abstraction for I/O device memory read/write access to fix portability
issues across different architectures.
CC: John Daley <johndale@cisco.com>
CC: Nelson Escobar <neescoba@cisco.com>
Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/enic/enic_compat.h | 27 +++++++++++++++++++--------
drivers/net/enic/enic_rxtx.c | 9 +++++----
2 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/drivers/net/enic/enic_compat.h b/drivers/net/enic/enic_compat.h
index 5dbd983..fc58bb4 100644
--- a/drivers/net/enic/enic_compat.h
+++ b/drivers/net/enic/enic_compat.h
@@ -41,6 +41,7 @@
#include <rte_atomic.h>
#include <rte_malloc.h>
#include <rte_log.h>
+#include <rte_io.h>
#define ENIC_PAGE_ALIGN 4096UL
#define ENIC_ALIGN ENIC_PAGE_ALIGN
@@ -95,42 +96,52 @@ typedef unsigned long long dma_addr_t;
static inline uint32_t ioread32(volatile void *addr)
{
- return *(volatile uint32_t *)addr;
+ return rte_read32(addr);
}
static inline uint16_t ioread16(volatile void *addr)
{
- return *(volatile uint16_t *)addr;
+ return rte_read16(addr);
}
static inline uint8_t ioread8(volatile void *addr)
{
- return *(volatile uint8_t *)addr;
+ return rte_read8(addr);
}
static inline void iowrite32(uint32_t val, volatile void *addr)
{
- *(volatile uint32_t *)addr = val;
+ rte_write32(val, addr);
+}
+
+static inline void iowrite32_relaxed(uint32_t val, volatile void *addr)
+{
+ rte_write32_relaxed(val, addr);
}
static inline void iowrite16(uint16_t val, volatile void *addr)
{
- *(volatile uint16_t *)addr = val;
+ rte_write16(val, addr);
}
static inline void iowrite8(uint8_t val, volatile void *addr)
{
- *(volatile uint8_t *)addr = val;
+ rte_write8(val, addr);
}
static inline unsigned int readl(volatile void __iomem *addr)
{
- return *(volatile unsigned int *)addr;
+ return rte_read32(addr);
+}
+
+static inline unsigned int readl_relaxed(volatile void __iomem *addr)
+{
+ return rte_read32_relaxed(addr);
}
static inline void writel(unsigned int val, volatile void __iomem *addr)
{
- *(volatile unsigned int *)addr = val;
+ rte_write32(val, addr);
}
#define min_t(type, x, y) ({ \
diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c
index f762a26..382d1ab 100644
--- a/drivers/net/enic/enic_rxtx.c
+++ b/drivers/net/enic/enic_rxtx.c
@@ -380,10 +380,11 @@ enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
rte_mb();
if (data_rq->in_use)
- iowrite32(data_rq->posted_index,
- &data_rq->ctrl->posted_index);
+ iowrite32_relaxed(data_rq->posted_index,
+ &data_rq->ctrl->posted_index);
rte_compiler_barrier();
- iowrite32(sop_rq->posted_index, &sop_rq->ctrl->posted_index);
+ iowrite32_relaxed(sop_rq->posted_index,
+ &sop_rq->ctrl->posted_index);
}
@@ -550,7 +551,7 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
}
post:
rte_wmb();
- iowrite32(head_idx, &wq->ctrl->posted_index);
+ iowrite32_relaxed(head_idx, &wq->ctrl->posted_index);
done:
wq->ring.desc_avail = wq_desc_avail;
wq->head_idx = head_idx;
--
2.5.5
^ permalink raw reply related
* [PATCH v2 22/29] net/fm10k: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, Jing Chen, Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
From: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal
abstraction for I/O device memory read/write access to fix
portability issues across different architectures.
CC: Jing Chen <jing.d.chen@intel.com>
Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/fm10k/base/fm10k_osdep.h | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/drivers/net/fm10k/base/fm10k_osdep.h b/drivers/net/fm10k/base/fm10k_osdep.h
index a21daa2..f07b678 100644
--- a/drivers/net/fm10k/base/fm10k_osdep.h
+++ b/drivers/net/fm10k/base/fm10k_osdep.h
@@ -39,6 +39,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include <rte_atomic.h>
#include <rte_byteorder.h>
#include <rte_cycles.h>
+#include <rte_io.h>
+
#include "../fm10k_logs.h"
/* TODO: this does not look like it should be used... */
@@ -88,17 +90,16 @@ typedef int bool;
#endif
/* offsets are WORD offsets, not BYTE offsets */
-#define FM10K_WRITE_REG(hw, reg, val) \
- ((((volatile uint32_t *)(hw)->hw_addr)[(reg)]) = ((uint32_t)(val)))
-#define FM10K_READ_REG(hw, reg) \
- (((volatile uint32_t *)(hw)->hw_addr)[(reg)])
+#define FM10K_WRITE_REG(hw, reg, val) \
+ rte_write32((val), ((hw)->hw_addr + (reg)))
+
+#define FM10K_READ_REG(hw, reg) rte_read32(((hw)->hw_addr + (reg)))
+
#define FM10K_WRITE_FLUSH(a) FM10K_READ_REG(a, FM10K_CTRL)
-#define FM10K_PCI_REG(reg) (*((volatile uint32_t *)(reg)))
+#define FM10K_PCI_REG(reg) rte_read32(reg)
-#define FM10K_PCI_REG_WRITE(reg, value) do { \
- FM10K_PCI_REG((reg)) = (value); \
-} while (0)
+#define FM10K_PCI_REG_WRITE(reg, value) rte_write32((value), (reg))
/* not implemented */
#define FM10K_READ_PCI_WORD(hw, reg) 0
--
2.5.5
^ permalink raw reply related
* [PATCH v2 23/29] net/i40e: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, Helin Zhang, Jingjing Wu, Satha Rao,
Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
From: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal abstraction
for I/O device memory read/write access to fix portability issues across
different architectures.
CC: Helin Zhang <helin.zhang@intel.com>
CC: Jingjing Wu <jingjing.wu@intel.com>
Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Satha Rao <skoteshwar@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/i40e/base/i40e_osdep.h | 10 +++++++---
drivers/net/i40e/i40e_rxtx.c | 4 ++--
2 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
index 38e7ba5..c57ecde 100644
--- a/drivers/net/i40e/base/i40e_osdep.h
+++ b/drivers/net/i40e/base/i40e_osdep.h
@@ -44,6 +44,7 @@
#include <rte_cycles.h>
#include <rte_spinlock.h>
#include <rte_log.h>
+#include <rte_io.h>
#include "../i40e_logs.h"
@@ -153,15 +154,18 @@ do { \
* I40E_PRTQF_FD_MSK
*/
-#define I40E_PCI_REG(reg) (*((volatile uint32_t *)(reg)))
+#define I40E_PCI_REG(reg) rte_read32(reg)
#define I40E_PCI_REG_ADDR(a, reg) \
((volatile uint32_t *)((char *)(a)->hw_addr + (reg)))
static inline uint32_t i40e_read_addr(volatile void *addr)
{
return rte_le_to_cpu_32(I40E_PCI_REG(addr));
}
-#define I40E_PCI_REG_WRITE(reg, value) \
- do { I40E_PCI_REG((reg)) = rte_cpu_to_le_32(value); } while (0)
+
+#define I40E_PCI_REG_WRITE(reg, value) \
+ rte_write32((rte_cpu_to_le_32(value)), reg)
+#define I40E_PCI_REG_WRITE_RELAXED(reg, value) \
+ rte_write32_relaxed((rte_cpu_to_le_32(value)), reg)
#define I40E_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_GLGEN_STAT)
#define I40EVF_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_VFGEN_RSTAT)
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 7ae7d9f..5c41a90 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1228,7 +1228,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
(unsigned) txq->port_id, (unsigned) txq->queue_id,
(unsigned) tx_id, (unsigned) nb_tx);
- I40E_PCI_REG_WRITE(txq->qtx_tail, tx_id);
+ I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
txq->tx_tail = tx_id;
return nb_tx;
@@ -1380,7 +1380,7 @@ tx_xmit_pkts(struct i40e_tx_queue *txq,
/* Update the tx tail register */
rte_wmb();
- I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+ I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, txq->tx_tail);
return nb_pkts;
}
--
2.5.5
^ permalink raw reply related
* [PATCH v2 24/29] net/ixgbe: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, Helin Zhang, Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
From: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal
abstraction for I/O device memory read/write access to fix
portability issues across different architectures.
CC: Helin Zhang <helin.zhang@intel.com>
CC: Konstantin Ananyev <konstantin.ananyev@intel.com>
Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/ixgbe/base/ixgbe_osdep.h | 11 +++++++----
drivers/net/ixgbe/ixgbe_rxtx.c | 13 +++++++------
2 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ixgbe/base/ixgbe_osdep.h b/drivers/net/ixgbe/base/ixgbe_osdep.h
index 77f0af5..9b874b8 100644
--- a/drivers/net/ixgbe/base/ixgbe_osdep.h
+++ b/drivers/net/ixgbe/base/ixgbe_osdep.h
@@ -44,6 +44,7 @@
#include <rte_cycles.h>
#include <rte_log.h>
#include <rte_byteorder.h>
+#include <rte_io.h>
#include "../ixgbe_logs.h"
#include "../ixgbe_bypass_defines.h"
@@ -121,16 +122,18 @@ typedef int bool;
#define prefetch(x) rte_prefetch0(x)
-#define IXGBE_PCI_REG(reg) (*((volatile uint32_t *)(reg)))
+#define IXGBE_PCI_REG(reg) rte_read32(reg)
static inline uint32_t ixgbe_read_addr(volatile void* addr)
{
return rte_le_to_cpu_32(IXGBE_PCI_REG(addr));
}
-#define IXGBE_PCI_REG_WRITE(reg, value) do { \
- IXGBE_PCI_REG((reg)) = (rte_cpu_to_le_32(value)); \
-} while(0)
+#define IXGBE_PCI_REG_WRITE(reg, value) \
+ rte_write32((rte_cpu_to_le_32(value)), reg)
+
+#define IXGBE_PCI_REG_WRITE_RELAXED(reg, value) \
+ rte_write32_relaxed((rte_cpu_to_le_32(value)), reg)
#define IXGBE_PCI_REG_ADDR(hw, reg) \
((volatile uint32_t *)((char *)(hw)->hw_addr + (reg)))
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index b2d9f45..81544bb 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -321,7 +321,7 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
/* update tail pointer */
rte_wmb();
- IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
+ IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
return nb_pkts;
}
@@ -897,7 +897,7 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
(unsigned) txq->port_id, (unsigned) txq->queue_id,
(unsigned) tx_id, (unsigned) nb_tx);
- IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id);
+ IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
txq->tx_tail = tx_id;
return nb_tx;
@@ -1581,7 +1581,8 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
/* update tail pointer */
rte_wmb();
- IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger);
+ IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
+ cur_free_trigger);
}
if (rxq->rx_tail >= rxq->nb_rx_desc)
@@ -1985,8 +1986,8 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
if (!ixgbe_rx_alloc_bufs(rxq, false)) {
rte_wmb();
- IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr,
- next_rdt);
+ IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
+ next_rdt);
nb_hold -= rxq->rx_free_thresh;
} else {
PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
@@ -2157,7 +2158,7 @@ ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
rte_wmb();
- IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id);
+ IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
nb_hold = 0;
}
--
2.5.5
^ permalink raw reply related
* [PATCH v2 25/29] net/nfp: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, Alejandro Lucero, Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
From: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal
abstraction for I/O device memory read/write access to fix
portability issues across different architectures.
CC: Alejandro Lucero <alejandro.lucero@netronome.com>
Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/nfp/nfp_net_pmd.h | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/net/nfp/nfp_net_pmd.h b/drivers/net/nfp/nfp_net_pmd.h
index c180972..f11b32e 100644
--- a/drivers/net/nfp/nfp_net_pmd.h
+++ b/drivers/net/nfp/nfp_net_pmd.h
@@ -121,25 +121,26 @@ struct nfp_net_adapter;
#define NFD_CFG_MINOR_VERSION_of(x) (((x) >> 0) & 0xff)
#include <linux/types.h>
+#include <rte_io.h>
static inline uint8_t nn_readb(volatile const void *addr)
{
- return *((volatile const uint8_t *)(addr));
+ return rte_read8(addr);
}
static inline void nn_writeb(uint8_t val, volatile void *addr)
{
- *((volatile uint8_t *)(addr)) = val;
+ rte_write8(val, addr);
}
static inline uint32_t nn_readl(volatile const void *addr)
{
- return *((volatile const uint32_t *)(addr));
+ return rte_read32(addr);
}
static inline void nn_writel(uint32_t val, volatile void *addr)
{
- *((volatile uint32_t *)(addr)) = val;
+ rte_write32(val, addr);
}
static inline uint64_t nn_readq(volatile void *addr)
--
2.5.5
^ permalink raw reply related
* [PATCH v2 26/29] net/qede: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, Harish Patil, Rasesh Mody, Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
From: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal
abstraction for I/O device memory read/write access to fix
portability issues across different architectures.
CC: Harish Patil <harish.patil@cavium.com>
CC: Rasesh Mody <rasesh.mody@cavium.com>
Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/qede/base/bcm_osal.h | 20 +++++++++++---------
drivers/net/qede/base/ecore_int_api.h | 28 +++++++++++++++++++++++-----
drivers/net/qede/base/ecore_spq.c | 3 ++-
drivers/net/qede/qede_rxtx.c | 2 +-
4 files changed, 37 insertions(+), 16 deletions(-)
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 0b446f2..33d43c6 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -18,6 +18,7 @@
#include <rte_cycles.h>
#include <rte_debug.h>
#include <rte_ether.h>
+#include <rte_io.h>
/* Forward declaration */
struct ecore_dev;
@@ -113,18 +114,18 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *, dma_addr_t *,
/* HW reads/writes */
-#define DIRECT_REG_RD(_dev, _reg_addr) \
- (*((volatile u32 *) (_reg_addr)))
+#define DIRECT_REG_RD(_dev, _reg_addr) rte_read32(_reg_addr)
#define REG_RD(_p_hwfn, _reg_offset) \
DIRECT_REG_RD(_p_hwfn, \
((u8 *)(uintptr_t)(_p_hwfn->regview) + (_reg_offset)))
-#define DIRECT_REG_WR16(_reg_addr, _val) \
- (*((volatile u16 *)(_reg_addr)) = _val)
+#define DIRECT_REG_WR16(_reg_addr, _val) rte_write16((_val), (_reg_addr))
-#define DIRECT_REG_WR(_dev, _reg_addr, _val) \
- (*((volatile u32 *)(_reg_addr)) = _val)
+#define DIRECT_REG_WR(_dev, _reg_addr, _val) rte_write32((_val), (_reg_addr))
+
+#define DIRECT_REG_WR_RELAXED(_dev, _reg_addr, _val) \
+ rte_write32_relaxed((_val), (_reg_addr))
#define REG_WR(_p_hwfn, _reg_offset, _val) \
DIRECT_REG_WR(NULL, \
@@ -134,9 +135,10 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *, dma_addr_t *,
DIRECT_REG_WR16(((u8 *)(uintptr_t)(_p_hwfn->regview) + \
(_reg_offset)), (u16)_val)
-#define DOORBELL(_p_hwfn, _db_addr, _val) \
- DIRECT_REG_WR(_p_hwfn, \
- ((u8 *)(uintptr_t)(_p_hwfn->doorbells) + (_db_addr)), (u32)_val)
+#define DOORBELL(_p_hwfn, _db_addr, _val) \
+ DIRECT_REG_WR_RELAXED((_p_hwfn), \
+ ((u8 *)(uintptr_t)(_p_hwfn->doorbells) + \
+ (_db_addr)), (u32)_val)
/* Mutexes */
diff --git a/drivers/net/qede/base/ecore_int_api.h b/drivers/net/qede/base/ecore_int_api.h
index fc873e7..a0d6a43 100644
--- a/drivers/net/qede/base/ecore_int_api.h
+++ b/drivers/net/qede/base/ecore_int_api.h
@@ -120,19 +120,37 @@ static OSAL_INLINE void __internal_ram_wr(void *p_hwfn,
}
#ifdef ECORE_CONFIG_DIRECT_HWFN
+static OSAL_INLINE void __internal_ram_wr_relaxed(struct ecore_hwfn *p_hwfn,
+ void OSAL_IOMEM * addr,
+ int size, u32 *data)
+#else
+static OSAL_INLINE void __internal_ram_wr_relaxed(void *p_hwfn,
+ void OSAL_IOMEM * addr,
+ int size, u32 *data)
+#endif
+{
+ unsigned int i;
+
+ for (i = 0; i < size / sizeof(*data); i++)
+ DIRECT_REG_WR_RELAXED(p_hwfn, &((u32 OSAL_IOMEM *)addr)[i],
+ data[i]);
+}
+
+#ifdef ECORE_CONFIG_DIRECT_HWFN
static OSAL_INLINE void internal_ram_wr(struct ecore_hwfn *p_hwfn,
- void OSAL_IOMEM *addr,
- int size, u32 *data)
+ void OSAL_IOMEM * addr,
+ int size, u32 *data)
{
- __internal_ram_wr(p_hwfn, addr, size, data);
+ __internal_ram_wr_relaxed(p_hwfn, addr, size, data);
}
#else
static OSAL_INLINE void internal_ram_wr(void OSAL_IOMEM *addr,
- int size, u32 *data)
+ int size, u32 *data)
{
- __internal_ram_wr(OSAL_NULL, addr, size, data);
+ __internal_ram_wr_relaxed(OSAL_NULL, addr, size, data);
}
#endif
+
#endif
struct ecore_hwfn;
diff --git a/drivers/net/qede/base/ecore_spq.c b/drivers/net/qede/base/ecore_spq.c
index 0d744dd..6e5ce5d 100644
--- a/drivers/net/qede/base/ecore_spq.c
+++ b/drivers/net/qede/base/ecore_spq.c
@@ -248,7 +248,8 @@ static enum _ecore_status_t ecore_spq_hw_post(struct ecore_hwfn *p_hwfn,
/* make sure the SPQE is updated before the doorbell */
OSAL_WMB(p_hwfn->p_dev);
- DOORBELL(p_hwfn, DB_ADDR(p_spq->cid, DQ_DEMS_LEGACY), *(u32 *)&db);
+ DOORBELL(p_hwfn, DB_ADDR(p_spq->cid, DQ_DEMS_LEGACY),
+ *(u32 *)&db);
/* make sure doorbell is rang */
OSAL_WMB(p_hwfn->p_dev);
diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c
index 2e181c8..e1e9956 100644
--- a/drivers/net/qede/qede_rxtx.c
+++ b/drivers/net/qede/qede_rxtx.c
@@ -1246,7 +1246,7 @@ qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
txq->tx_db.data.bd_prod = bd_prod;
rte_wmb();
rte_compiler_barrier();
- DIRECT_REG_WR(edev, txq->doorbell_addr, txq->tx_db.raw);
+ DIRECT_REG_WR_RELAXED(edev, txq->doorbell_addr, txq->tx_db.raw);
rte_wmb();
/* Check again for Tx completions */
--
2.5.5
^ permalink raw reply related
* [PATCH v2 27/29] net/thunderx: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal
abstraction for I/O device memory read/write access to fix portability
issues across different architectures.
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/thunderx/base/nicvf_plat.h | 36 ++++------------------------------
1 file changed, 4 insertions(+), 32 deletions(-)
diff --git a/drivers/net/thunderx/base/nicvf_plat.h b/drivers/net/thunderx/base/nicvf_plat.h
index 83c1844..3754e1b 100644
--- a/drivers/net/thunderx/base/nicvf_plat.h
+++ b/drivers/net/thunderx/base/nicvf_plat.h
@@ -69,31 +69,15 @@
#include <rte_ether.h>
#define NICVF_MAC_ADDR_SIZE ETHER_ADDR_LEN
+#include <rte_io.h>
+#define nicvf_addr_write(addr, val) rte_write64_relaxed((val), (void *)(addr))
+#define nicvf_addr_read(addr) rte_read64_relaxed((void *)(addr))
+
/* ARM64 specific functions */
#if defined(RTE_ARCH_ARM64)
#define nicvf_prefetch_store_keep(_ptr) ({\
asm volatile("prfm pstl1keep, %a0\n" : : "p" (_ptr)); })
-static inline void __attribute__((always_inline))
-nicvf_addr_write(uintptr_t addr, uint64_t val)
-{
- asm volatile(
- "str %x[val], [%x[addr]]"
- :
- : [val] "r" (val), [addr] "r" (addr));
-}
-
-static inline uint64_t __attribute__((always_inline))
-nicvf_addr_read(uintptr_t addr)
-{
- uint64_t val;
-
- asm volatile(
- "ldr %x[val], [%x[addr]]"
- : [val] "=r" (val)
- : [addr] "r" (addr));
- return val;
-}
#define NICVF_LOAD_PAIR(reg1, reg2, addr) ({ \
asm volatile( \
@@ -106,18 +90,6 @@ nicvf_addr_read(uintptr_t addr)
#define nicvf_prefetch_store_keep(_ptr) do {} while (0)
-static inline void __attribute__((always_inline))
-nicvf_addr_write(uintptr_t addr, uint64_t val)
-{
- *(volatile uint64_t *)addr = val;
-}
-
-static inline uint64_t __attribute__((always_inline))
-nicvf_addr_read(uintptr_t addr)
-{
- return *(volatile uint64_t *)addr;
-}
-
#define NICVF_LOAD_PAIR(reg1, reg2, addr) \
do { \
reg1 = nicvf_addr_read((uintptr_t)addr); \
--
2.5.5
^ permalink raw reply related
* [PATCH v2 28/29] net/virtio: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, Huawei Xie, Yuanhan Liu, Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
From: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal
abstraction for I/O device memory read/write access to fix
portability issues across different architectures.
CC: Huawei Xie <huawei.xie@intel.com>
CC: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
Acked-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
drivers/net/virtio/virtio_pci.c | 97 +++++++++++++----------------------------
1 file changed, 31 insertions(+), 66 deletions(-)
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 9b47165..7c1cb4c 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -37,6 +37,8 @@
#include <fcntl.h>
#endif
+#include <rte_io.h>
+
#include "virtio_pci.h"
#include "virtio_logs.h"
#include "virtqueue.h"
@@ -316,48 +318,11 @@ static const struct virtio_pci_ops legacy_ops = {
.notify_queue = legacy_notify_queue,
};
-
-static inline uint8_t
-io_read8(uint8_t *addr)
-{
- return *(volatile uint8_t *)addr;
-}
-
-static inline void
-io_write8(uint8_t val, uint8_t *addr)
-{
- *(volatile uint8_t *)addr = val;
-}
-
-static inline uint16_t
-io_read16(uint16_t *addr)
-{
- return *(volatile uint16_t *)addr;
-}
-
-static inline void
-io_write16(uint16_t val, uint16_t *addr)
-{
- *(volatile uint16_t *)addr = val;
-}
-
-static inline uint32_t
-io_read32(uint32_t *addr)
-{
- return *(volatile uint32_t *)addr;
-}
-
-static inline void
-io_write32(uint32_t val, uint32_t *addr)
-{
- *(volatile uint32_t *)addr = val;
-}
-
static inline void
io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
{
- io_write32(val & ((1ULL << 32) - 1), lo);
- io_write32(val >> 32, hi);
+ rte_write32(val & ((1ULL << 32) - 1), lo);
+ rte_write32(val >> 32, hi);
}
static void
@@ -369,13 +334,13 @@ modern_read_dev_config(struct virtio_hw *hw, size_t offset,
uint8_t old_gen, new_gen;
do {
- old_gen = io_read8(&hw->common_cfg->config_generation);
+ old_gen = rte_read8(&hw->common_cfg->config_generation);
p = dst;
for (i = 0; i < length; i++)
- *p++ = io_read8((uint8_t *)hw->dev_cfg + offset + i);
+ *p++ = rte_read8((uint8_t *)hw->dev_cfg + offset + i);
- new_gen = io_read8(&hw->common_cfg->config_generation);
+ new_gen = rte_read8(&hw->common_cfg->config_generation);
} while (old_gen != new_gen);
}
@@ -387,7 +352,7 @@ modern_write_dev_config(struct virtio_hw *hw, size_t offset,
const uint8_t *p = src;
for (i = 0; i < length; i++)
- io_write8(*p++, (uint8_t *)hw->dev_cfg + offset + i);
+ rte_write8((*p++), (((uint8_t *)hw->dev_cfg) + offset + i));
}
static uint64_t
@@ -395,11 +360,11 @@ modern_get_features(struct virtio_hw *hw)
{
uint32_t features_lo, features_hi;
- io_write32(0, &hw->common_cfg->device_feature_select);
- features_lo = io_read32(&hw->common_cfg->device_feature);
+ rte_write32(0, &hw->common_cfg->device_feature_select);
+ features_lo = rte_read32(&hw->common_cfg->device_feature);
- io_write32(1, &hw->common_cfg->device_feature_select);
- features_hi = io_read32(&hw->common_cfg->device_feature);
+ rte_write32(1, &hw->common_cfg->device_feature_select);
+ features_hi = rte_read32(&hw->common_cfg->device_feature);
return ((uint64_t)features_hi << 32) | features_lo;
}
@@ -407,25 +372,25 @@ modern_get_features(struct virtio_hw *hw)
static void
modern_set_features(struct virtio_hw *hw, uint64_t features)
{
- io_write32(0, &hw->common_cfg->guest_feature_select);
- io_write32(features & ((1ULL << 32) - 1),
- &hw->common_cfg->guest_feature);
+ rte_write32(0, &hw->common_cfg->guest_feature_select);
+ rte_write32(features & ((1ULL << 32) - 1),
+ &hw->common_cfg->guest_feature);
- io_write32(1, &hw->common_cfg->guest_feature_select);
- io_write32(features >> 32,
- &hw->common_cfg->guest_feature);
+ rte_write32(1, &hw->common_cfg->guest_feature_select);
+ rte_write32(features >> 32,
+ &hw->common_cfg->guest_feature);
}
static uint8_t
modern_get_status(struct virtio_hw *hw)
{
- return io_read8(&hw->common_cfg->device_status);
+ return rte_read8(&hw->common_cfg->device_status);
}
static void
modern_set_status(struct virtio_hw *hw, uint8_t status)
{
- io_write8(status, &hw->common_cfg->device_status);
+ rte_write8(status, &hw->common_cfg->device_status);
}
static void
@@ -438,21 +403,21 @@ modern_reset(struct virtio_hw *hw)
static uint8_t
modern_get_isr(struct virtio_hw *hw)
{
- return io_read8(hw->isr);
+ return rte_read8(hw->isr);
}
static uint16_t
modern_set_config_irq(struct virtio_hw *hw, uint16_t vec)
{
- io_write16(vec, &hw->common_cfg->msix_config);
- return io_read16(&hw->common_cfg->msix_config);
+ rte_write16(vec, &hw->common_cfg->msix_config);
+ return rte_read16(&hw->common_cfg->msix_config);
}
static uint16_t
modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
{
- io_write16(queue_id, &hw->common_cfg->queue_select);
- return io_read16(&hw->common_cfg->queue_size);
+ rte_write16(queue_id, &hw->common_cfg->queue_select);
+ return rte_read16(&hw->common_cfg->queue_size);
}
static int
@@ -470,7 +435,7 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
ring[vq->vq_nentries]),
VIRTIO_PCI_VRING_ALIGN);
- io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+ rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
&hw->common_cfg->queue_desc_hi);
@@ -479,11 +444,11 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
&hw->common_cfg->queue_used_hi);
- notify_off = io_read16(&hw->common_cfg->queue_notify_off);
+ notify_off = rte_read16(&hw->common_cfg->queue_notify_off);
vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
notify_off * hw->notify_off_multiplier);
- io_write16(1, &hw->common_cfg->queue_enable);
+ rte_write16(1, &hw->common_cfg->queue_enable);
PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr);
@@ -498,7 +463,7 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
static void
modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
{
- io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+ rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
&hw->common_cfg->queue_desc_hi);
@@ -507,13 +472,13 @@ modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
&hw->common_cfg->queue_used_hi);
- io_write16(0, &hw->common_cfg->queue_enable);
+ rte_write16(0, &hw->common_cfg->queue_enable);
}
static void
modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
{
- io_write16(1, vq->notify_addr);
+ rte_write16(1, vq->notify_addr);
}
static const struct virtio_pci_ops modern_ops = {
--
2.5.5
^ permalink raw reply related
* [PATCH v2 29/29] net/vmxnet3: use eal I/O device memory read/write API
From: Jerin Jacob @ 2016-12-27 9:49 UTC (permalink / raw)
To: dev
Cc: konstantin.ananyev, thomas.monjalon, bruce.richardson, jianbo.liu,
viktorin, santosh.shukla, Yong Wang, Jerin Jacob
In-Reply-To: <1482832175-27199-1-git-send-email-jerin.jacob@caviumnetworks.com>
From: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Replace the raw I/O device memory read/write access with eal
abstraction for I/O device memory read/write access to fix
portability issues across different architectures.
CC: Yong Wang <yongwang@vmware.com>
Signed-off-by: Santosh Shukla <santosh.shukla@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/vmxnet3/vmxnet3_ethdev.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.h b/drivers/net/vmxnet3/vmxnet3_ethdev.h
index 7d3b11e..85c00e4 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.h
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.h
@@ -34,6 +34,8 @@
#ifndef _VMXNET3_ETHDEV_H_
#define _VMXNET3_ETHDEV_H_
+#include <rte_io.h>
+
#define VMXNET3_MAX_MAC_ADDRS 1
/* UPT feature to negotiate */
@@ -120,7 +122,7 @@ struct vmxnet3_hw {
/* Config space read/writes */
-#define VMXNET3_PCI_REG(reg) (*((volatile uint32_t *)(reg)))
+#define VMXNET3_PCI_REG(reg) rte_read32(reg)
static inline uint32_t
vmxnet3_read_addr(volatile void *addr)
@@ -128,9 +130,7 @@ vmxnet3_read_addr(volatile void *addr)
return VMXNET3_PCI_REG(addr);
}
-#define VMXNET3_PCI_REG_WRITE(reg, value) do { \
- VMXNET3_PCI_REG((reg)) = (value); \
-} while(0)
+#define VMXNET3_PCI_REG_WRITE(reg, value) rte_write32((value), (reg))
#define VMXNET3_PCI_BAR0_REG_ADDR(hw, reg) \
((volatile uint32_t *)((char *)(hw)->hw_addr0 + (reg)))
--
2.5.5
^ permalink raw reply related
* [PATCH v2 0/4] eal/common: introduce rte_memset and related test
From: Zhiyong Yang @ 2016-12-27 10:04 UTC (permalink / raw)
To: dev
Cc: yuanhan.liu, thomas.monjalon, bruce.richardson,
konstantin.ananyev, pablo.de.lara.guarch
In-Reply-To: <1480926387-63838-2-git-send-email-zhiyong.yang@intel.com>
DPDK code has met performance drop badly in some case when calling glibc
function memset. Reference to discussions about memset in
http://dpdk.org/ml/archives/dev/2016-October/048628.html
It is necessary to introduce more high efficient function to fix it.
One important thing about rte_memset is that we can get clear control
on what instruction flow is used.
This patchset introduces rte_memset to bring more high efficient
implementation, and will bring obvious perf improvement, especially
for small N bytes in the most application scenarios.
Patch 1 implements rte_memset in the file rte_memset.h on IA platform
The file supports three types of instruction sets including sse & avx
(128bits), avx2(256bits) and avx512(512bits). rte_memset makes use of
vectorization and inline function to improve the perf on IA. In addition,
cache line and memory alignment are fully taken into consideration.
Patch 2 implements functional autotest to validates the function whether
to work in a right way.
Patch 3 implements performance autotest separately in cache and memory.
We can see the perf of rte_memset is obviously better than glibc memset
especially for small N bytes.
Patch 4 Using rte_memset instead of copy_virtio_net_hdr can bring 3%~4%
performance improvements on IA platform from virtio/vhost non-mergeable
loopback testing.
Changes in V2:
Patch 1:
Rename rte_memset.h -> rte_memset_64.h and create a file rte_memset.h
for each arch.
Patch 3:
add the perf comparation data between rte_memset and memset on haswell.
Patch 4:
Modify release_17_02.rst description.
Zhiyong Yang (4):
eal/common: introduce rte_memset on IA platform
app/test: add functional autotest for rte_memset
app/test: add performance autotest for rte_memset
lib/librte_vhost: improve vhost perf using rte_memset
app/test/Makefile | 3 +
app/test/test_memset.c | 158 +++++++++
app/test/test_memset_perf.c | 348 +++++++++++++++++++
doc/guides/rel_notes/release_17_02.rst | 7 +
.../common/include/arch/arm/rte_memset.h | 36 ++
.../common/include/arch/ppc_64/rte_memset.h | 36 ++
.../common/include/arch/tile/rte_memset.h | 36 ++
.../common/include/arch/x86/rte_memset.h | 51 +++
.../common/include/arch/x86/rte_memset_64.h | 378 +++++++++++++++++++++
lib/librte_eal/common/include/generic/rte_memset.h | 52 +++
lib/librte_vhost/virtio_net.c | 18 +-
11 files changed, 1116 insertions(+), 7 deletions(-)
create mode 100644 app/test/test_memset.c
create mode 100644 app/test/test_memset_perf.c
create mode 100644 lib/librte_eal/common/include/arch/arm/rte_memset.h
create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memset.h
create mode 100644 lib/librte_eal/common/include/arch/tile/rte_memset.h
create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memset.h
create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memset_64.h
create mode 100644 lib/librte_eal/common/include/generic/rte_memset.h
--
2.7.4
^ permalink raw reply
* [PATCH v2 1/4] eal/common: introduce rte_memset on IA platform
From: Zhiyong Yang @ 2016-12-27 10:04 UTC (permalink / raw)
To: dev
Cc: yuanhan.liu, thomas.monjalon, bruce.richardson,
konstantin.ananyev, pablo.de.lara.guarch, Zhiyong Yang
In-Reply-To: <1482833098-38096-1-git-send-email-zhiyong.yang@intel.com>
Performance drop has been caused in some cases when DPDK code calls glibc
function memset. please reference to discussions about memset in
http://dpdk.org/ml/archives/dev/2016-October/048628.html
It is necessary to introduce more high efficient function to fix it.
One important thing about rte_memset is that we can get clear control
on what instruction flow is used. This patch supports instruction sets
such as sse & avx(128 bits), avx2(256 bits) and avx512(512bits).
rte_memset makes full use of vectorization and inline function to improve
the perf on IA. In addition, cache line and memory alignment are fully
taken into consideration.
Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
Changes in V2:
Rename rte_memset.h -> rte_memset_64.h and create a file rte_memset.h
for each arch.
.../common/include/arch/arm/rte_memset.h | 36 ++
.../common/include/arch/ppc_64/rte_memset.h | 36 ++
.../common/include/arch/tile/rte_memset.h | 36 ++
.../common/include/arch/x86/rte_memset.h | 51 +++
.../common/include/arch/x86/rte_memset_64.h | 378 +++++++++++++++++++++
lib/librte_eal/common/include/generic/rte_memset.h | 52 +++
6 files changed, 589 insertions(+)
create mode 100644 lib/librte_eal/common/include/arch/arm/rte_memset.h
create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memset.h
create mode 100644 lib/librte_eal/common/include/arch/tile/rte_memset.h
create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memset.h
create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memset_64.h
create mode 100644 lib/librte_eal/common/include/generic/rte_memset.h
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memset.h b/lib/librte_eal/common/include/arch/arm/rte_memset.h
new file mode 100644
index 0000000..6945f6d
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_memset.h
@@ -0,0 +1,36 @@
+/*
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of RehiveTech nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMSET_ARM_H_
+#define _RTE_MEMSET_ARM_H_
+
+#define rte_memset memset
+
+#endif /* _RTE_MEMSET_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memset.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memset.h
new file mode 100644
index 0000000..0d73f05
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memset.h
@@ -0,0 +1,36 @@
+/*
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of IBM Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMSET_PPC_64_H_
+#define _RTE_MEMSET_PPC_64_H_
+
+#define rte_memset memset
+
+#endif /* _RTE_MEMSET_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_memset.h b/lib/librte_eal/common/include/arch/tile/rte_memset.h
new file mode 100644
index 0000000..e8a1aa1
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_memset.h
@@ -0,0 +1,36 @@
+/*
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of EZchip Semiconductor nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMSET_TILE_H_
+#define _RTE_MEMSET_TILE_H_
+
+#define rte_memset memset
+
+#endif /* _RTE_MEMSET_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memset.h b/lib/librte_eal/common/include/arch/x86/rte_memset.h
new file mode 100644
index 0000000..86e0812
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_memset.h
@@ -0,0 +1,51 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMSET_X86_H_
+#define _RTE_MEMSET_X86_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef RTE_ARCH_X86_64
+#include "rte_memset_64.h"
+#else
+#define rte_memset memset
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMSET_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memset_64.h b/lib/librte_eal/common/include/arch/x86/rte_memset_64.h
new file mode 100644
index 0000000..f25d344
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_memset_64.h
@@ -0,0 +1,378 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMSET_X86_64_H_
+#define _RTE_MEMSET_X86_64_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ *
+ * Functions for vectorised implementation of memset().
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <rte_vect.h>
+
+static inline void *
+rte_memset(void *dst, int a, size_t n) __attribute__((always_inline));
+
+static inline void
+rte_memset_less16(void *dst, int a, size_t n)
+{
+ uintptr_t dstu = (uintptr_t)dst;
+
+ if (n >= 8) {
+ uint16_t b = ((uint8_t)a | (((uint8_t)a) << 8));
+ uint16_t c = ((uint8_t)a | (((uint8_t)a) << 8));
+ uint32_t d = b | c << 16;
+ uint64_t e = d | ((uint64_t)d << 32);
+
+ *(uint64_t *)dstu = e;
+ *(uint64_t *)((uint8_t *)dstu + n - 8) = e;
+ } else {
+ if (n & 0x01) {
+ *(uint8_t *)dstu = (uint8_t)a;
+ dstu = (uintptr_t)((uint8_t *)dstu + 1);
+ }
+ if (n & 0x02) {
+ *(uint16_t *)dstu = (uint8_t)a | (((uint8_t)a) << 8);
+ dstu = (uintptr_t)((uint16_t *)dstu + 1);
+ }
+ if (n & 0x04) {
+ uint16_t b = ((uint8_t)a | (((uint8_t)a) << 8));
+
+ *(uint32_t *)dstu = (uint32_t)(b | (b << 16));
+ dstu = (uintptr_t)((uint32_t *)dstu + 1);
+ }
+ }
+}
+
+static inline void
+rte_memset16(uint8_t *dst, int8_t a)
+{
+ __m128i xmm0;
+
+ xmm0 = _mm_set1_epi8(a);
+ _mm_storeu_si128((__m128i *)dst, xmm0);
+}
+
+static inline void
+rte_memset_17to32(void *dst, int a, size_t n)
+{
+ rte_memset16((uint8_t *)dst, a);
+ rte_memset16((uint8_t *)dst - 16 + n, a);
+}
+
+#ifdef RTE_MACHINE_CPUFLAG_AVX512
+
+/**
+ * AVX512 implementation below
+ */
+
+static inline void
+rte_memset32(uint8_t *dst, int8_t a)
+{
+ __m256i ymm0;
+
+ ymm0 = _mm256_set1_epi8(a);
+ _mm256_storeu_si256((__m256i *)dst, ymm0);
+}
+
+static inline void
+rte_memset64(uint8_t *dst, int8_t a)
+{
+ __m512i zmm0;
+
+ zmm0 = _mm512_set1_epi8(a);
+ _mm512_storeu_si512((void *)dst, zmm0);
+}
+
+static inline void
+rte_memset128blocks(uint8_t *dst, int8_t a, size_t n)
+{
+ __m512i zmm0;
+
+ zmm0 = _mm512_set1_epi8(a);
+ while (n >= 128) {
+ n -= 128;
+ _mm512_store_si512((void *)(dst + 0 * 64), zmm0);
+ _mm512_store_si512((void *)(dst + 1 * 64), zmm0);
+ dst = dst + 128;
+ }
+}
+
+static inline void *
+rte_memset(void *dst, int a, size_t n)
+{
+ void *ret = dst;
+ size_t dstofss;
+ size_t bits;
+
+ if (n < 16) {
+ rte_memset_less16(dst, a, n);
+ return ret;
+ } else if (n == 16) {
+ rte_memset16((uint8_t *)dst, a);
+ return ret;
+ }
+ if (n <= 32) {
+ rte_memset_17to32(dst, a, n);
+ return ret;
+ }
+ if (n <= 64) {
+ rte_memset32((uint8_t *)dst, a);
+ rte_memset32((uint8_t *)dst - 32 + n, a);
+ return ret;
+ }
+ if (n >= 256) {
+ dstofss = ((uintptr_t)dst & 0x3F);
+ if (dstofss > 0) {
+ dstofss = 64 - dstofss;
+ n -= dstofss;
+ rte_memset64((uint8_t *)dst, a);
+ dst = (uint8_t *)dst + dstofss;
+ }
+ rte_memset128blocks((uint8_t *)dst, a, n);
+ bits = n;
+ n = n & 127;
+ bits -= n;
+ dst = (uint8_t *)dst + bits;
+ }
+ if (n > 128) {
+ n -= 128;
+ rte_memset64((uint8_t *)dst, a);
+ rte_memset64((uint8_t *)dst + 64, a);
+ dst = (uint8_t *)dst + 128;
+ }
+ if (n > 64) {
+ rte_memset64((uint8_t *)dst, a);
+ rte_memset64((uint8_t *)dst - 64 + n, a);
+ return ret;
+ }
+ if (n > 0)
+ rte_memset64((uint8_t *)dst - 64 + n, a);
+ return ret;
+}
+
+#elif defined RTE_MACHINE_CPUFLAG_AVX2
+
+/**
+ * AVX2 implementation below
+ */
+
+static inline void
+rte_memset32(uint8_t *dst, int8_t a)
+{
+ __m256i ymm0;
+
+ ymm0 = _mm256_set1_epi8(a);
+ _mm256_storeu_si256((__m256i *)dst, ymm0);
+}
+
+static inline void
+rte_memset_33to64(void *dst, int a, size_t n)
+{
+ rte_memset32((uint8_t *)dst, a);
+ rte_memset32((uint8_t *)dst - 32 + n, a);
+}
+
+static inline void
+rte_memset64blocks(uint8_t *dst, int8_t a, size_t n)
+{
+ __m256i ymm0;
+
+ ymm0 = _mm256_set1_epi8(a);
+ while (n >= 64) {
+ n -= 64;
+ _mm256_store_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0);
+ _mm256_store_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm0);
+ dst = (uint8_t *)dst + 64;
+
+ }
+}
+
+static inline void *
+rte_memset(void *dst, int a, size_t n)
+{
+ void *ret = dst;
+ size_t dstofss;
+ size_t bits;
+
+ if (n < 16) {
+ rte_memset_less16(dst, a, n);
+ return ret;
+ } else if (n == 16) {
+ rte_memset16((uint8_t *)dst, a);
+ return ret;
+ }
+ if (n <= 32) {
+ rte_memset_17to32(dst, a, n);
+ return ret;
+ }
+ if (n <= 64) {
+ rte_memset_33to64(dst, a, n);
+ return ret;
+ }
+ if (n > 64) {
+ dstofss = (uintptr_t)dst & 0x1F;
+ if (dstofss > 0) {
+ dstofss = 32 - dstofss;
+ n -= dstofss;
+ rte_memset32((uint8_t *)dst, a);
+ dst = (uint8_t *)dst + dstofss;
+ }
+ rte_memset64blocks((uint8_t *)dst, a, n);
+ bits = n;
+ n = n & 63;
+ bits -= n;
+ dst = (uint8_t *)dst + bits;
+ }
+ if (n > 32) {
+ rte_memset_33to64(dst, a, n);
+ return ret;
+ }
+ if (n > 0)
+ rte_memset32((uint8_t *)dst - 32 + n, a);
+ return ret;
+}
+
+#else /* RTE_MACHINE_CPUFLAG */
+
+/**
+ * SSE && AVX implementation below
+ */
+
+static inline void
+rte_memset32(uint8_t *dst, int8_t a)
+{
+ __m128i xmm0 = _mm_set1_epi8(a);
+
+ _mm_storeu_si128((__m128i *)dst, xmm0);
+ _mm_storeu_si128((__m128i *)(dst + 16), xmm0);
+}
+
+static inline void
+rte_memset16blocks(uint8_t *dst, int8_t a, size_t n)
+{
+ __m128i xmm0 = _mm_set1_epi8(a);
+
+ while (n >= 16) {
+ n -= 16;
+ _mm_store_si128((__m128i *)(dst + 0 * 16), xmm0);
+ dst = (uint8_t *)dst + 16;
+ }
+}
+
+static inline void
+rte_memset64blocks(uint8_t *dst, int8_t a, size_t n)
+{
+ __m128i xmm0 = _mm_set1_epi8(a);
+
+ while (n >= 64) {
+ n -= 64;
+ _mm_store_si128((__m128i *)(dst + 0 * 16), xmm0);
+ _mm_store_si128((__m128i *)(dst + 1 * 16), xmm0);
+ _mm_store_si128((__m128i *)(dst + 2 * 16), xmm0);
+ _mm_store_si128((__m128i *)(dst + 3 * 16), xmm0);
+ dst = (uint8_t *)dst + 64;
+ }
+}
+
+static inline void *
+rte_memset(void *dst, int a, size_t n)
+{
+ void *ret = dst;
+ size_t dstofss;
+ size_t bits;
+
+ if (n < 16) {
+ rte_memset_less16(dst, a, n);
+ return ret;
+ } else if (n == 16) {
+ rte_memset16((uint8_t *)dst, a);
+ return ret;
+ }
+ if (n <= 32) {
+ rte_memset_17to32(dst, a, n);
+ return ret;
+ }
+ if (n <= 48) {
+ rte_memset32((uint8_t *)dst, a);
+ rte_memset16((uint8_t *)dst - 16 + n, a);
+ return ret;
+ }
+ if (n <= 64) {
+ rte_memset32((uint8_t *)dst, a);
+ rte_memset16((uint8_t *)dst + 32, a);
+ rte_memset16((uint8_t *)dst - 16 + n, a);
+ return ret;
+ }
+ if (n > 64) {
+ dstofss = (uintptr_t)dst & 0xF;
+ if (dstofss > 0) {
+ dstofss = 16 - dstofss;
+ n -= dstofss;
+ rte_memset16((uint8_t *)dst, a);
+ dst = (uint8_t *)dst + dstofss;
+ }
+ rte_memset64blocks((uint8_t *)dst, a, n);
+ bits = n;
+ n &= 63;
+ bits -= n;
+ dst = (uint8_t *)dst + bits;
+ rte_memset16blocks((uint8_t *)dst, a, n);
+ bits = n;
+ n &= 0xf;
+ bits -= n;
+ dst = (uint8_t *)dst + bits;
+ if (n > 0) {
+ rte_memset16((uint8_t *)dst - 16 + n, a);
+ return ret;
+ }
+ }
+ return ret;
+}
+
+#endif /* RTE_MACHINE_CPUFLAG */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMSET_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_memset.h b/lib/librte_eal/common/include/generic/rte_memset.h
new file mode 100644
index 0000000..b03a7d0
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_memset.h
@@ -0,0 +1,52 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMSET_H_
+#define _RTE_MEMSET_H_
+
+/**
+ * @file
+ *
+ * Functions for vectorised implementation of memset().
+ */
+#ifdef _RTE_MEMSET_X86_64_H_
+
+static void *
+rte_memset(void *dst, int a, size_t n);
+
+#else
+
+#define rte_memset memset
+
+#endif
+#endif /* _RTE_MEMSET_H_ */
--
2.7.4
^ permalink raw reply related
* [PATCH v2 2/4] app/test: add functional autotest for rte_memset
From: Zhiyong Yang @ 2016-12-27 10:04 UTC (permalink / raw)
To: dev
Cc: yuanhan.liu, thomas.monjalon, bruce.richardson,
konstantin.ananyev, pablo.de.lara.guarch, Zhiyong Yang
In-Reply-To: <1482833098-38096-1-git-send-email-zhiyong.yang@intel.com>
The file implements the functional autotest for rte_memset, which
validates the new function rte_memset whether to work in a right
way. The implementation of test_memcpy.c is used as a reference.
Usage:
step 1: run ./x86_64-native-linuxapp-gcc/app/test
step 2: run command memset_autotest at the run time.
Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
app/test/Makefile | 2 +
app/test/test_memset.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 160 insertions(+)
create mode 100644 app/test/test_memset.c
diff --git a/app/test/Makefile b/app/test/Makefile
index 5be023a..82da3f3 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -123,6 +123,8 @@ SRCS-y += test_logs.c
SRCS-y += test_memcpy.c
SRCS-y += test_memcpy_perf.c
+SRCS-y += test_memset.c
+
SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash.c
SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_thash.c
SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
diff --git a/app/test/test_memset.c b/app/test/test_memset.c
new file mode 100644
index 0000000..c9020bf
--- /dev/null
+++ b/app/test/test_memset.c
@@ -0,0 +1,158 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <rte_common.h>
+#include <rte_random.h>
+#include <rte_memset.h>
+#include "test.h"
+
+/*
+ * Set this to the maximum buffer size you want to test. If it is 0, then the
+ * values in the buf_sizes[] array below will be used.
+ */
+#define TEST_VALUE_RANGE 0
+#define MAX_INT8 127
+#define MIN_INT8 -128
+/* List of buffer sizes to test */
+#if TEST_VALUE_RANGE == 0
+static size_t buf_sizes[] = {
+ 0, 1, 7, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128, 129,
+ 255, 256, 257, 320, 384, 511, 512, 513, 1023, 1024, 1025, 1518,
+ 1522, 1600, 2048, 3072, 4096, 5120, 6144, 7168, 8192
+};
+/* MUST be as large as largest packet size above */
+#define BUFFER_SIZE 8192
+#else /* TEST_VALUE_RANGE != 0 */
+static size_t buf_sizes[TEST_VALUE_RANGE];
+#define BUFFER_SIZE TEST_VALUE_RANGE
+#endif /* TEST_VALUE_RANGE == 0 */
+
+/* Data is aligned on this many bytes (power of 2) */
+#define ALIGNMENT_UNIT 32
+
+/*
+ * Create two buffers, and initialize the one as the reference buffer with
+ * random values. Another(dest_buff) is assigned by the reference buffer.
+ * Set some memory area of dest_buff by using ch and then compare to see
+ * if the rte_memset is successful. The bytes outside the setted area are
+ * also checked to make sure they are not changed.
+ */
+static int
+test_single_memset(unsigned int off_dst, int ch, size_t size)
+{
+ unsigned int i;
+ uint8_t dest_buff[BUFFER_SIZE + ALIGNMENT_UNIT];
+ uint8_t ref_buff[BUFFER_SIZE + ALIGNMENT_UNIT];
+ void *ret;
+
+ /* Setup buffers */
+ for (i = 0; i < BUFFER_SIZE + ALIGNMENT_UNIT; i++) {
+ ref_buff[i] = (uint8_t) rte_rand();
+ dest_buff[i] = ref_buff[i];
+ }
+ /* Do the rte_memset */
+ ret = rte_memset(dest_buff + off_dst, ch, size);
+ if (ret != (dest_buff + off_dst)) {
+ printf("rte_memset() returned %p, not %p\n",
+ ret, dest_buff + off_dst);
+ }
+ /* Check nothing before offset was affected */
+ for (i = 0; i < off_dst; i++) {
+ if (dest_buff[i] != ref_buff[i]) {
+ printf("rte_memset() failed for %u bytes (offsets=%u): \
+ [modified before start of dst].\n",
+ (unsigned int)size, off_dst);
+ return -1;
+ }
+ }
+ /* Check every byte was setted */
+ for (i = 0; i < size; i++) {
+ if (dest_buff[i + off_dst] != (uint8_t)ch) {
+ printf("rte_memset() failed for %u bytes (offsets=%u): \
+ [didn't memset byte %u].\n",
+ (unsigned int)size, off_dst, i);
+ return -1;
+ }
+ }
+ /* Check nothing after memset was affected */
+ for (i = off_dst + size; i < BUFFER_SIZE + ALIGNMENT_UNIT; i++) {
+ if (dest_buff[i] != ref_buff[i]) {
+ printf("rte_memset() failed for %u bytes (offsets=%u): \
+ [memset too many].\n",
+ (unsigned int)size, off_dst);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Check functionality for various buffer sizes and data offsets/alignments.
+ */
+static int
+func_test(void)
+{
+ unsigned int off_dst, i;
+ unsigned int num_buf_sizes = sizeof(buf_sizes) / sizeof(buf_sizes[0]);
+ int ret;
+ int j;
+
+ for (j = MIN_INT8; j <= MAX_INT8; j++) {
+ for (off_dst = 0; off_dst < ALIGNMENT_UNIT; off_dst++) {
+ for (i = 0; i < num_buf_sizes; i++) {
+ ret = test_single_memset(off_dst, j,
+ buf_sizes[i]);
+ if (ret != 0)
+ return -1;
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+test_memset(void)
+{
+ int ret;
+
+ ret = func_test();
+ if (ret != 0)
+ return -1;
+ return 0;
+}
+
+REGISTER_TEST_COMMAND(memset_autotest, test_memset);
--
2.7.4
^ permalink raw reply related
* [PATCH v2 3/4] app/test: add performance autotest for rte_memset
From: Zhiyong Yang @ 2016-12-27 10:04 UTC (permalink / raw)
To: dev
Cc: yuanhan.liu, thomas.monjalon, bruce.richardson,
konstantin.ananyev, pablo.de.lara.guarch, Zhiyong Yang
In-Reply-To: <1482833098-38096-1-git-send-email-zhiyong.yang@intel.com>
The file implements the perf autotest for rte_memset. The perf data
can be gotten compared between rte_memset and memset when you run it.
We can see the perf of rte_memset obviously is better than glibc memset
especially for small N bytes.
The first column shows the N size for memset & rte_memset.
The second column lists a set of numbers for rte_memset Vs memset perf
in cache.
The third column lists a set of numbers for rte_memset Vs memset perf
in memory.
The following data is gotten on haswell.
** rte_memset() - memset perf tests
(C = compile-time constant) **
======== ======= ======== ======= ========
Size memset in cache memset in mem
(bytes) (ticks) (ticks)
------- -------------- ---------------
============= 32B aligned ================
1 3 - 8 14 - 115
3 4 - 8 19 - 125
6 3 - 7 19 - 125
8 3 - 6 19 - 124
12 3 - 6 19 - 124
15 3 - 6 19 - 125
16 3 - 8 13 - 125
32 3 - 7 19 - 133
64 3 - 7 28 - 162
65 6 - 8 41 - 182
128 6 - 13 54 - 199
192 8 - 13 77 - 273
255 8 - 16 100 - 222
512 17 - 14 187 - 247
768 22 - 20 270 - 362
1024 29 - 28 329 - 377
2048 63 - 57 564 - 601
4096 104 - 102 993 - 1025
8192 200 - 211 1831 - 2270
------ -------------- -------------- ------
C 6 2 - 2 19 - 19
C 64 2 - 6 28 - 33
C 128 3 - 12 54 - 59
C 192 5 - 29 77 - 83
C 256 6 - 35 100 - 105
C 512 12 - 60 188 - 195
C 768 18 - 20 271 - 362
C 1024 24 - 29 329 - 377
Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
Change in V2:
Add perf comparation data between rte_memset and memset on haswell.
app/test/Makefile | 1 +
app/test/test_memset_perf.c | 348 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 349 insertions(+)
create mode 100644 app/test/test_memset_perf.c
diff --git a/app/test/Makefile b/app/test/Makefile
index 82da3f3..1c3e7f1 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -124,6 +124,7 @@ SRCS-y += test_memcpy.c
SRCS-y += test_memcpy_perf.c
SRCS-y += test_memset.c
+SRCS-y += test_memset_perf.c
SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash.c
SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_thash.c
diff --git a/app/test/test_memset_perf.c b/app/test/test_memset_perf.c
new file mode 100644
index 0000000..83b15b5
--- /dev/null
+++ b/app/test/test_memset_perf.c
@@ -0,0 +1,348 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_random.h>
+#include <rte_malloc.h>
+#include <rte_memset.h>
+#include "test.h"
+
+/*
+ * Set this to the maximum buffer size you want to test. If it is 0, then the
+ * values in the buf_sizes[] array below will be used.
+ */
+#define TEST_VALUE_RANGE 0
+
+/* List of buffer sizes to test */
+#if TEST_VALUE_RANGE == 0
+static size_t buf_sizes[] = {
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 15, 16, 17, 31, 32, 33, 63, 64, 65,
+ 70, 85, 96, 105, 115, 127, 128, 129, 161, 191, 192, 193, 255, 256,
+ 257, 319, 320, 321, 383, 384, 385, 447, 448, 449, 511, 512, 513,
+ 767, 768, 769, 1023, 1024, 1025, 1518, 1522, 1536, 1600, 2048, 2560,
+ 3072, 3584, 4096, 4608, 5120, 5632, 6144, 6656, 7168, 7680, 8192
+};
+/* MUST be as large as largest packet size above */
+#define SMALL_BUFFER_SIZE 8192
+#else /* TEST_VALUE_RANGE != 0 */
+static size_t buf_sizes[TEST_VALUE_RANGE];
+#define SMALL_BUFFER_SIZE TEST_VALUE_RANGE
+#endif /* TEST_VALUE_RANGE == 0 */
+
+/*
+ * Arrays of this size are used for measuring uncached memory accesses by
+ * picking a random location within the buffer. Make this smaller if there are
+ * memory allocation errors.
+ */
+#define LARGE_BUFFER_SIZE (100 * 1024 * 1024)
+
+/* How many times to run timing loop for performance tests */
+#define TEST_ITERATIONS 1000000
+#define TEST_BATCH_SIZE 100
+
+/* Data is aligned on this many bytes (power of 2) */
+#ifdef RTE_MACHINE_CPUFLAG_AVX512F
+#define ALIGNMENT_UNIT 64
+#elif defined RTE_MACHINE_CPUFLAG_AVX2
+#define ALIGNMENT_UNIT 32
+#else /* RTE_MACHINE_CPUFLAG */
+#define ALIGNMENT_UNIT 16
+#endif /* RTE_MACHINE_CPUFLAG */
+
+/*
+ * Pointers used in performance tests. The two large buffers are for uncached
+ * access where random addresses within the buffer are used for each
+ * memset. The two small buffers are for cached access.
+ */
+static uint8_t *large_buf_read, *large_buf_write;
+static uint8_t *small_buf_read, *small_buf_write;
+
+/* Initialise data buffers. */
+static int
+init_buffers(void)
+{
+ unsigned int i;
+
+ large_buf_read = rte_malloc("memset", LARGE_BUFFER_SIZE
+ + ALIGNMENT_UNIT, ALIGNMENT_UNIT);
+ if (large_buf_read == NULL)
+ goto error_large_buf_read;
+
+ large_buf_write = rte_malloc("memset", LARGE_BUFFER_SIZE
+ + ALIGNMENT_UNIT, ALIGNMENT_UNIT);
+ if (large_buf_write == NULL)
+ goto error_large_buf_write;
+
+ small_buf_read = rte_malloc("memset", SMALL_BUFFER_SIZE
+ + ALIGNMENT_UNIT, ALIGNMENT_UNIT);
+ if (small_buf_read == NULL)
+ goto error_small_buf_read;
+
+ small_buf_write = rte_malloc("memset", SMALL_BUFFER_SIZE
+ + ALIGNMENT_UNIT, ALIGNMENT_UNIT);
+ if (small_buf_write == NULL)
+ goto error_small_buf_write;
+
+ for (i = 0; i < LARGE_BUFFER_SIZE; i++)
+ large_buf_read[i] = rte_rand();
+ for (i = 0; i < SMALL_BUFFER_SIZE; i++)
+ small_buf_read[i] = rte_rand();
+
+ return 0;
+
+error_small_buf_write:
+ rte_free(small_buf_read);
+error_small_buf_read:
+ rte_free(large_buf_write);
+error_large_buf_write:
+ rte_free(large_buf_read);
+error_large_buf_read:
+ printf("ERROR: not enough memory\n");
+ return -1;
+}
+
+/* Cleanup data buffers */
+static void
+free_buffers(void)
+{
+ rte_free(large_buf_read);
+ rte_free(large_buf_write);
+ rte_free(small_buf_read);
+ rte_free(small_buf_write);
+}
+
+/*
+ * Get a random offset into large array, with enough space needed to perform
+ * max memset size. Offset is aligned, uoffset is used for unalignment setting.
+ */
+static inline size_t
+get_rand_offset(size_t uoffset)
+{
+ return ((rte_rand() % (LARGE_BUFFER_SIZE - SMALL_BUFFER_SIZE)) &
+ ~(ALIGNMENT_UNIT - 1)) + uoffset;
+}
+
+/* Fill in destination addresses. */
+static inline void
+fill_addr_arrays(size_t *dst_addr, int is_dst_cached, size_t dst_uoffset)
+{
+ unsigned int i;
+
+ for (i = 0; i < TEST_BATCH_SIZE; i++)
+ dst_addr[i] = (is_dst_cached) ? dst_uoffset :
+ get_rand_offset(dst_uoffset);
+}
+
+/*
+ * WORKAROUND: For some reason the first test doing an uncached write
+ * takes a very long time (~25 times longer than is expected). So we do
+ * it once without timing.
+ */
+static void
+do_uncached_write(uint8_t *dst, int is_dst_cached, size_t size)
+{
+ unsigned int i, j;
+ size_t dst_addrs[TEST_BATCH_SIZE];
+ int ch = rte_rand() & 0xff;
+
+ for (i = 0; i < (TEST_ITERATIONS / TEST_BATCH_SIZE); i++) {
+ fill_addr_arrays(dst_addrs, is_dst_cached, 0);
+ for (j = 0; j < TEST_BATCH_SIZE; j++)
+ rte_memset(dst+dst_addrs[j], ch, size);
+ }
+}
+
+/*
+ * Run a single memset performance test. This is a macro to ensure that if
+ * the "size" parameter is a constant it won't be converted to a variable.
+ */
+#define SINGLE_PERF_TEST(dst, is_dst_cached, dst_uoffset, size) \
+do { \
+ unsigned int iter, t; \
+ size_t dst_addrs[TEST_BATCH_SIZE]; \
+ uint64_t start_time, total_time = 0; \
+ uint64_t total_time2 = 0; \
+ int ch = rte_rand() & 0xff; \
+ \
+ for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) {\
+ fill_addr_arrays(dst_addrs, is_dst_cached, dst_uoffset); \
+ start_time = rte_rdtsc(); \
+ for (t = 0; t < TEST_BATCH_SIZE; t++) \
+ rte_memset(dst+dst_addrs[t], ch, size); \
+ total_time += rte_rdtsc() - start_time; \
+ } \
+ for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) {\
+ fill_addr_arrays(dst_addrs, is_dst_cached, dst_uoffset); \
+ start_time = rte_rdtsc(); \
+ for (t = 0; t < TEST_BATCH_SIZE; t++) \
+ memset(dst+dst_addrs[t], ch, size); \
+ total_time2 += rte_rdtsc() - start_time; \
+ } \
+ printf("%8.0f -", (double)total_time / TEST_ITERATIONS); \
+ printf("%5.0f", (double)total_time2 / TEST_ITERATIONS); \
+} while (0)
+
+/* Run aligned memset tests. */
+#define ALL_PERF_TESTS_FOR_SIZE(n) \
+do { \
+ if (__builtin_constant_p(n)) \
+ printf("\nC%6u", (unsigned int)n); \
+ else \
+ printf("\n%7u", (unsigned int)n); \
+ SINGLE_PERF_TEST(small_buf_write, 1, 0, n); \
+ SINGLE_PERF_TEST(large_buf_write, 0, 0, n); \
+} while (0)
+
+/* Run unaligned memset tests */
+#define ALL_PERF_TESTS_FOR_SIZE_UNALIGNED(n) \
+do { \
+ if (__builtin_constant_p(n)) \
+ printf("\nC%6u", (unsigned int)n); \
+ else \
+ printf("\n%7u", (unsigned int)n); \
+ SINGLE_PERF_TEST(small_buf_write, 1, 1, n); \
+ SINGLE_PERF_TEST(large_buf_write, 0, 1, n); \
+} while (0)
+
+/* Run memset tests for constant length */
+#define ALL_PERF_TEST_FOR_CONSTANT \
+do { \
+ TEST_CONSTANT(6U); TEST_CONSTANT(64U); TEST_CONSTANT(128U); \
+ TEST_CONSTANT(192U); TEST_CONSTANT(256U); TEST_CONSTANT(512U); \
+ TEST_CONSTANT(768U); TEST_CONSTANT(1024U); TEST_CONSTANT(1536U); \
+} while (0)
+
+/* Run all memset tests for aligned constant cases */
+static inline void
+perf_test_constant_aligned(void)
+{
+#define TEST_CONSTANT ALL_PERF_TESTS_FOR_SIZE
+ ALL_PERF_TEST_FOR_CONSTANT;
+#undef TEST_CONSTANT
+}
+
+/* Run all memset tests for unaligned constant cases */
+static inline void
+perf_test_constant_unaligned(void)
+{
+#define TEST_CONSTANT ALL_PERF_TESTS_FOR_SIZE_UNALIGNED
+ ALL_PERF_TEST_FOR_CONSTANT;
+#undef TEST_CONSTANT
+}
+
+/* Run all memset tests for aligned variable cases */
+static inline void
+perf_test_variable_aligned(void)
+{
+ unsigned int n = sizeof(buf_sizes) / sizeof(buf_sizes[0]);
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ ALL_PERF_TESTS_FOR_SIZE((size_t)buf_sizes[i]);
+}
+
+/* Run all memset tests for unaligned variable cases */
+static inline void
+perf_test_variable_unaligned(void)
+{
+ unsigned int n = sizeof(buf_sizes) / sizeof(buf_sizes[0]);
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ ALL_PERF_TESTS_FOR_SIZE_UNALIGNED((size_t)buf_sizes[i]);
+}
+
+/* Run all memset tests */
+static int
+perf_test(void)
+{
+ int ret;
+
+ ret = init_buffers();
+ if (ret != 0)
+ return ret;
+
+#if TEST_VALUE_RANGE != 0
+ /* Set up buf_sizes array, if required */
+ unsigned int i;
+
+ for (i = 0; i < TEST_VALUE_RANGE; i++)
+ buf_sizes[i] = i;
+#endif
+
+ /* See function comment */
+ do_uncached_write(large_buf_write, 0, SMALL_BUFFER_SIZE);
+
+ printf("\n** rte_memset() - memset perf tests \t\n \
+ (C = compile-time constant) **\n"
+ "======== ======= ======== ======= ========\n"
+ " Size memset in cache memset in mem\n"
+ "(bytes) (ticks) (ticks)\n"
+ "------- -------------- ---------------");
+
+ printf("\n============= %2dB aligned ================", ALIGNMENT_UNIT);
+ /* Do aligned tests where size is a variable */
+ perf_test_variable_aligned();
+ printf("\n------ -------------- -------------- ------");
+ /* Do aligned tests where size is a compile-time constant */
+ perf_test_constant_aligned();
+ printf("\n============= Unaligned ===================");
+ /* Do unaligned tests where size is a variable */
+ perf_test_variable_unaligned();
+ printf("\n------ -------------- -------------- ------");
+ /* Do unaligned tests where size is a compile-time constant */
+ perf_test_constant_unaligned();
+ printf("\n====== ============== ============== =======\n\n");
+
+ free_buffers();
+
+ return 0;
+}
+
+static int
+test_memset_perf(void)
+{
+ int ret;
+
+ ret = perf_test();
+ if (ret != 0)
+ return -1;
+ return 0;
+}
+
+REGISTER_TEST_COMMAND(memset_perf_autotest, test_memset_perf);
--
2.7.4
^ permalink raw reply related
* [PATCH v2 4/4] lib/librte_vhost: improve vhost perf using rte_memset
From: Zhiyong Yang @ 2016-12-27 10:04 UTC (permalink / raw)
To: dev
Cc: yuanhan.liu, thomas.monjalon, bruce.richardson,
konstantin.ananyev, pablo.de.lara.guarch, Zhiyong Yang
In-Reply-To: <1482833098-38096-1-git-send-email-zhiyong.yang@intel.com>
Using rte_memset instead of copy_virtio_net_hdr can bring 3%~4%
performance improvements on IA platform from virtio/vhost
non-mergeable loopback testing.
Two key points have been considered:
1. One variable initialization could be saved, which involves memory
store.
2. copy_virtio_net_hdr involves both load (from stack, the virtio_hdr
var) and store (to virtio driver memory), while rte_memset just involves
store.
Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
Changes in V2:
Modify release_17_02.rst description.
doc/guides/rel_notes/release_17_02.rst | 7 +++++++
lib/librte_vhost/virtio_net.c | 18 +++++++++++-------
2 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/doc/guides/rel_notes/release_17_02.rst b/doc/guides/rel_notes/release_17_02.rst
index 180af82..3d39cde 100644
--- a/doc/guides/rel_notes/release_17_02.rst
+++ b/doc/guides/rel_notes/release_17_02.rst
@@ -52,6 +52,13 @@ New Features
See the :ref:`Generic flow API <Generic_flow_API>` documentation for more
information.
+* **Introduced rte_memset on IA platform.**
+
+ Performance drop had been caused in some cases on Ivybridge when DPDK code calls
+ glibc function memset. It was necessary to introduce more high efficient function
+ to replace it. The function rte_memset supported three types of instruction sets
+ including sse & avx(128 bits), avx2(256 bits) and avx512(512bits) and have better
+ performance than glibc memset.
Resolved Issues
---------------
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 595f67c..392b31b 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -37,6 +37,7 @@
#include <rte_mbuf.h>
#include <rte_memcpy.h>
+#include <rte_memset.h>
#include <rte_ether.h>
#include <rte_ip.h>
#include <rte_virtio_net.h>
@@ -194,7 +195,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
uint32_t cpy_len;
struct vring_desc *desc;
uint64_t desc_addr;
- struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
+ struct virtio_net_hdr *virtio_hdr;
desc = &descs[desc_idx];
desc_addr = gpa_to_vva(dev, desc->addr);
@@ -208,8 +209,9 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
rte_prefetch0((void *)(uintptr_t)desc_addr);
- virtio_enqueue_offload(m, &virtio_hdr.hdr);
- copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
+ virtio_hdr = (struct virtio_net_hdr *)(uintptr_t)desc_addr;
+ rte_memset(virtio_hdr, 0, sizeof(*virtio_hdr));
+ virtio_enqueue_offload(m, virtio_hdr);
vhost_log_write(dev, desc->addr, dev->vhost_hlen);
PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
@@ -459,7 +461,6 @@ static inline int __attribute__((always_inline))
copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
struct buf_vector *buf_vec, uint16_t num_buffers)
{
- struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
uint32_t vec_idx = 0;
uint64_t desc_addr;
uint32_t mbuf_offset, mbuf_avail;
@@ -480,7 +481,6 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
hdr_phys_addr = buf_vec[vec_idx].buf_addr;
rte_prefetch0((void *)(uintptr_t)hdr_addr);
- virtio_hdr.num_buffers = num_buffers;
LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
dev->vid, num_buffers);
@@ -512,8 +512,12 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
}
if (hdr_addr) {
- virtio_enqueue_offload(hdr_mbuf, &virtio_hdr.hdr);
- copy_virtio_net_hdr(dev, hdr_addr, virtio_hdr);
+ struct virtio_net_hdr_mrg_rxbuf *hdr =
+ (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
+
+ rte_memset(&(hdr->hdr), 0, sizeof(hdr->hdr));
+ hdr->num_buffers = num_buffers;
+ virtio_enqueue_offload(hdr_mbuf, &(hdr->hdr));
vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen);
PRINT_PACKET(dev, (uintptr_t)hdr_addr,
dev->vhost_hlen, 0);
--
2.7.4
^ permalink raw reply related
* [PATCH 1/2] net/ixgbe: remove unused global variable
From: Jerin Jacob @ 2016-12-27 10:09 UTC (permalink / raw)
To: dev; +Cc: konstantin.ananyev, helin.zhang, thomas.monjalon, Jerin Jacob
Removed unused "reg_info" global variable from ixgbe driver.
cat build/app/testpmd.map | grep "Allocating common symbols" -A 15
Allocating common symbols
Common symbol size file
reg_info 0x18 build/lib/librte_pmd_ixgbe.a(ixgbe_ethdev.o)
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
drivers/net/ixgbe/ixgbe_regs.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ixgbe/ixgbe_regs.h b/drivers/net/ixgbe/ixgbe_regs.h
index 773e169..2aa4820 100644
--- a/drivers/net/ixgbe/ixgbe_regs.h
+++ b/drivers/net/ixgbe/ixgbe_regs.h
@@ -41,7 +41,7 @@ struct reg_info {
uint32_t count;
uint32_t stride;
const char *name;
-} reg_info;
+};
static const struct reg_info ixgbe_regs_general[] = {
{IXGBE_CTRL, 1, 1, "IXGBE_CTRL"},
--
2.5.5
^ permalink raw reply related
* [PATCH 2/2] app/testpmd: remove explicit ixgbe link request
From: Jerin Jacob @ 2016-12-27 10:09 UTC (permalink / raw)
To: dev; +Cc: konstantin.ananyev, helin.zhang, thomas.monjalon, Jerin Jacob
In-Reply-To: <1482833398-30145-1-git-send-email-jerin.jacob@caviumnetworks.com>
Removed explicit ixgbe driver linkage request from
app/testpmd makefile to mk/rte.app.mk to
1)Maintain the correct link ordering(from higher level libraries
to lower level libraries)
2)In shared lib configuration, any application can use ixgbe
exposed pmd specific APIs not just testpmd.
Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
---
app/test-pmd/Makefile | 2 --
mk/rte.app.mk | 2 +-
2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/app/test-pmd/Makefile b/app/test-pmd/Makefile
index 5988c3e..96e0c67 100644
--- a/app/test-pmd/Makefile
+++ b/app/test-pmd/Makefile
@@ -59,8 +59,6 @@ SRCS-y += csumonly.c
SRCS-y += icmpecho.c
SRCS-$(CONFIG_RTE_LIBRTE_IEEE1588) += ieee1588fwd.c
-_LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += -lrte_pmd_ixgbe
-
CFLAGS_cmdline.o := -D_GNU_SOURCE
# this application needs libraries first
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index f75f0e2..aee235c 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -101,6 +101,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_CFGFILE) += -lrte_cfgfile
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += -lrte_pmd_bond
_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += -lrte_pmd_xenvirt -lxenstore
+_LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += -lrte_pmd_ixgbe
ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n)
# plugins (link only if static libraries)
@@ -114,7 +115,6 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += -lrte_pmd_ena
_LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += -lrte_pmd_enic
_LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += -lrte_pmd_fm10k
_LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += -lrte_pmd_i40e
-_LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += -lrte_pmd_ixgbe
_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += -lrte_pmd_mlx4 -libverbs
_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += -lrte_pmd_mlx5 -libverbs
_LDLIBS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD) += -lrte_pmd_mpipe -lgxio
--
2.5.5
^ permalink raw reply related
* Re: [PATCH] net/mlx5: fix multi segment packet send
From: Adrien Mazarguil @ 2016-12-27 10:21 UTC (permalink / raw)
To: Shahaf Shuler; +Cc: dev, stable
In-Reply-To: <1482766116-6202-1-git-send-email-shahafs@mellanox.com>
Hi Shahaf,
On Mon, Dec 26, 2016 at 05:28:36PM +0200, Shahaf Shuler wrote:
> Dseg pointer is not initialised when the first segment is inlined
> causing a segmentation fault in such situation.
>
> Fixes: 2a66cf378954 ("net/mlx5: support inline send")
>
> CC: stable@dpdk.org
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Thanks for fixing this bug, a few comments below.
> ---
> drivers/net/mlx5/mlx5_rxtx.c | 6 +++++-
> 1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 97810e8..d6688c6 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -483,7 +483,7 @@
> assert(addr <= addr_end);
> }
> /*
> - * 2 DWORDs consumed by the WQE header + 1 DSEG +
> + * 2 DWORDs consumed by the WQE header + ETH segment +
> * the size of the inline part of the packet.
> */
> ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
> @@ -498,6 +498,10 @@
> } else if (!segs_n) {
> goto next_pkt;
> } else {
> + /* dseg will be advance as part of next_seg*/
Nit-picking here, there is a missing space in the above comment.
> + dseg = (volatile rte_v128u32_t *)
rte_v128u32_t does not exist (yet) in the tree, this patch therefore depends
on "eal: define generic vector types" [1]. Such dependencies should be
mentioned as a notes section of a patch (after a three-dash line).
Regarding stable@dpdk.org, un case the vector types patch is not applied on
the stable branch, you'll also have to provide your own definition.
> + ((uintptr_t)wqe +
> + ((ds - 1) * MLX5_WQE_DWORD_SIZE));
> goto next_seg;
> }
> } else {
> --
> 1.8.3.1
>
Otherwise,
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
[1] http://dpdk.org/ml/archives/dev/2016-November/050261.html
--
Adrien Mazarguil
6WIND
^ permalink raw reply
* [PATCH v3 0/4] new API 'rte_eth_dev_fw_info_get'
From: Qiming Yang @ 2016-12-27 12:30 UTC (permalink / raw)
To: dev, thomas.monjalon; +Cc: remy.horton, ferruh.yigit, Qiming Yang
In-Reply-To: <1481008582-69416-1-git-send-email-qiming.yang@intel.com>
These four patches added a new function ``rte_eth_dev_fwver_get()``
to fetch firmware related information by a given device.
Information include major firmware version, minor firmware
version, patch number and etrack id.
Qiming Yang (4):
ethdev: add firmware information get
net/e1000: add firmware version get
net/ixgbe: add firmware version get
net/i40e: add firmware version get
doc/guides/nics/features/default.ini | 1 +
doc/guides/nics/features/i40e.ini | 1 +
doc/guides/nics/features/igb.ini | 1 +
doc/guides/nics/features/ixgbe.ini | 1 +
doc/guides/rel_notes/release_17_02.rst | 4 ++++
drivers/net/e1000/igb_ethdev.c | 43 ++++++++++++++++++++++++++++++++++
drivers/net/i40e/i40e_ethdev.c | 15 ++++++++++++
drivers/net/ixgbe/ixgbe_ethdev.c | 17 ++++++++++++++
lib/librte_ether/rte_ethdev.c | 14 +++++++++++
lib/librte_ether/rte_ethdev.h | 23 ++++++++++++++++++
lib/librte_ether/rte_ether_version.map | 1 +
11 files changed, 121 insertions(+)
--
2.7.4
^ permalink raw reply
* [PATCH v3 1/4] ethdev: add firmware information get
From: Qiming Yang @ 2016-12-27 12:30 UTC (permalink / raw)
To: dev, thomas.monjalon; +Cc: remy.horton, ferruh.yigit, Qiming Yang
In-Reply-To: <1482841816-54143-1-git-send-email-qiming.yang@intel.com>
This patch adds a new API 'rte_eth_dev_fw_info_get' for fetching
firmware related information by a given device.
Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Acked-by: Remy Horton <remy.horton@intel.com>
---
v2 changes:
* modified some comment statements.
v3 changes:
* change API, use rte_eth_dev_fw_info_get(uint8_t port_id,
uint32_t *fw_major, uint32_t *fw_minor, uint32_t *fw_patch,
uint32_t *etrack_id) instead of rte_eth_dev_fwver_get(uint8_t port_id,
char *fw_version, int fw_length).
Add statusment in /doc/guides/nics/features/default.ini and
release_17_02.rst.
---
---
doc/guides/nics/features/default.ini | 1 +
doc/guides/rel_notes/release_17_02.rst | 4 ++++
lib/librte_ether/rte_ethdev.c | 14 ++++++++++++++
lib/librte_ether/rte_ethdev.h | 23 +++++++++++++++++++++++
lib/librte_ether/rte_ether_version.map | 1 +
5 files changed, 43 insertions(+)
diff --git a/doc/guides/nics/features/default.ini b/doc/guides/nics/features/default.ini
index f1bf9bf..8237ee4 100644
--- a/doc/guides/nics/features/default.ini
+++ b/doc/guides/nics/features/default.ini
@@ -66,3 +66,4 @@ x86-64 =
Usage doc =
Design doc =
Perf doc =
+FW version =
diff --git a/doc/guides/rel_notes/release_17_02.rst b/doc/guides/rel_notes/release_17_02.rst
index 180af82..f6dc6c0 100644
--- a/doc/guides/rel_notes/release_17_02.rst
+++ b/doc/guides/rel_notes/release_17_02.rst
@@ -52,6 +52,10 @@ New Features
See the :ref:`Generic flow API <Generic_flow_API>` documentation for more
information.
+* **Added firmware information get API.**
+ Added a new function ``rte_eth_dev_fw_info_get()`` to fetch firmware related
+ information by a given device. Information include major firmware version,
+ minor firmware version, patch number and etrack id.
Resolved Issues
---------------
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 280f0db..f399f09 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1586,6 +1586,20 @@ rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id,
}
void
+rte_eth_dev_fw_info_get(uint8_t port_id, uint32_t *fw_major, uint32_t *fw_minor,
+ uint32_t *fw_patch, uint32_t *etrack_id)
+{
+ struct rte_eth_dev *dev;
+
+ RTE_ETH_VALID_PORTID_OR_RET(port_id);
+ dev = &rte_eth_devices[port_id];
+
+ RTE_FUNC_PTR_OR_RET(*dev->dev_ops->fw_version_get);
+ (*dev->dev_ops->fw_version_get)(dev, fw_major, fw_minor,
+ fw_patch, etrack_id);
+}
+
+void
rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
{
struct rte_eth_dev *dev;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index fb51754..829f652 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1150,6 +1150,11 @@ typedef uint32_t (*eth_rx_queue_count_t)(struct rte_eth_dev *dev,
typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset);
/**< @internal Check DD bit of specific RX descriptor */
+typedef void (*eth_fw_version_get_t)(struct rte_eth_dev *dev,
+ uint32_t *fw_major, uint32_t *fw_minor,
+ uint32_t *fw_patch, uint32_t *etrack_id);
+/**< @internal Get firmware information of an Ethernet device. */
+
typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev,
uint16_t rx_queue_id, struct rte_eth_rxq_info *qinfo);
@@ -1457,6 +1462,7 @@ struct eth_dev_ops {
eth_txq_info_get_t txq_info_get; /**< retrieve TX queue information. */
eth_dev_supported_ptypes_get_t dev_supported_ptypes_get;
/**< Get packet types supported and identified by device. */
+ eth_fw_version_get_t fw_version_get; /**< Get firmware version. */
vlan_filter_set_t vlan_filter_set; /**< Filter VLAN Setup. */
vlan_tpid_set_t vlan_tpid_set; /**< Outer/Inner VLAN TPID Setup. */
@@ -2395,6 +2401,23 @@ void rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr);
void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info);
/**
+ * Retrieve the firmware version of a device.
+ *
+ * @param port_id
+ * The port identifier of the device.
+ * @param fw_major
+ * A array pointer to store the major firmware version of a device.
+ * @param fw_minor
+ * A array pointer to store the minor firmware version of a device.
+ * @param fw_patch
+ * A array pointer to store the firmware patch number of a device.
+ * @param etrack_id
+ * A array pointer to store the nvm version of a device.
+ */
+void rte_eth_dev_fw_info_get(uint8_t port_id, uint32_t *fw_major,
+ uint32_t *fw_minor, uint32_t *fw_patch, uint32_t *etrack_id);
+
+/**
* Retrieve the supported packet types of an Ethernet device.
*
* When a packet type is announced as supported, it *must* be recognized by
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index a021781..a89bfaa 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -156,5 +156,6 @@ DPDK_17.02 {
rte_flow_flush;
rte_flow_query;
rte_flow_validate;
+ rte_eth_dev_fw_info_get;
} DPDK_16.11;
--
2.7.4
^ permalink raw reply related
* [PATCH v3 2/4] net/e1000: add firmware version get
From: Qiming Yang @ 2016-12-27 12:30 UTC (permalink / raw)
To: dev, thomas.monjalon; +Cc: remy.horton, ferruh.yigit, Qiming Yang
In-Reply-To: <1482841816-54143-1-git-send-email-qiming.yang@intel.com>
This patch adds a new function eth_igb_fw_version_get.
Signed-off-by: Qiming Yang <qiming.yang@intel.com>
---
v3 changes:
* use eth_igb_fw_version_get(struct rte_eth_dev *dev, u32 *fw_major,
u32 *fw_minor, u32 *fw_minor, u32 *fw_patch, u32 *etrack_id) instead
of eth_igb_fw_version_get(struct rte_eth_dev *dev, char *fw_version,
int fw_length). Add statusment in /doc/guides/nics/features/igb.ini.
---
---
doc/guides/nics/features/igb.ini | 1 +
drivers/net/e1000/igb_ethdev.c | 43 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 44 insertions(+)
diff --git a/doc/guides/nics/features/igb.ini b/doc/guides/nics/features/igb.ini
index 9fafe72..ffd87ba 100644
--- a/doc/guides/nics/features/igb.ini
+++ b/doc/guides/nics/features/igb.ini
@@ -39,6 +39,7 @@ EEPROM dump = Y
Registers dump = Y
BSD nic_uio = Y
Linux UIO = Y
+FW version = Y
Linux VFIO = Y
x86-32 = Y
x86-64 = Y
diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index 4a15447..25344b7 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -120,6 +120,8 @@ static int eth_igb_xstats_get_names(struct rte_eth_dev *dev,
unsigned limit);
static void eth_igb_stats_reset(struct rte_eth_dev *dev);
static void eth_igb_xstats_reset(struct rte_eth_dev *dev);
+static void eth_igb_fw_version_get(struct rte_eth_dev *dev, u32 *fw_major,
+ u32 *fw_minor, u32 *fw_patch, u32 *etrack_id);
static void eth_igb_infos_get(struct rte_eth_dev *dev,
struct rte_eth_dev_info *dev_info);
static const uint32_t *eth_igb_supported_ptypes_get(struct rte_eth_dev *dev);
@@ -389,6 +391,7 @@ static const struct eth_dev_ops eth_igb_ops = {
.xstats_get_names = eth_igb_xstats_get_names,
.stats_reset = eth_igb_stats_reset,
.xstats_reset = eth_igb_xstats_reset,
+ .fw_version_get = eth_igb_fw_version_get,
.dev_infos_get = eth_igb_infos_get,
.dev_supported_ptypes_get = eth_igb_supported_ptypes_get,
.mtu_set = eth_igb_mtu_set,
@@ -1981,6 +1984,46 @@ eth_igbvf_stats_reset(struct rte_eth_dev *dev)
}
static void
+eth_igb_fw_version_get(struct rte_eth_dev *dev, u32 *fw_major, u32 *fw_minor,
+ u32 *fw_patch, u32 *etrack_id)
+{
+ struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+ struct e1000_fw_version fw;
+
+ e1000_get_fw_version(hw, &fw);
+
+ switch (hw->mac.type) {
+ case e1000_i210:
+ case e1000_i211:
+ if (!(e1000_get_flash_presence_i210(hw))) {
+ *fw_major = fw.invm_major;
+ *fw_minor = fw.invm_minor;
+ break;
+ }
+ /* fall through */
+ default:
+ /* if option rom is valid, display its version too*/
+ if (fw.or_valid) {
+ *fw_major = fw.eep_major;
+ *fw_minor = fw.eep_minor;
+ *etrack_id = fw.etrack_id;
+ *fw_patch = fw.or_patch;
+ /* no option rom */
+ } else {
+ if (fw.etrack_id != 0X0000) {
+ *fw_major = fw.eep_major;
+ *fw_minor = fw.eep_minor;
+ *etrack_id = fw.etrack_id;
+ } else {
+ *fw_major = fw.eep_major;
+ *fw_minor = fw.eep_minor;
+ }
+ }
+ break;
+ }
+}
+
+static void
eth_igb_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
{
struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
--
2.7.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox