* [PATCH net-next V1 01/14] net/mlx4_en: Code cleanups in tx path
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
@ 2014-10-06 6:15 ` Amir Vadai
2014-10-06 6:15 ` [PATCH net-next V1 02/14] net/mlx4_en: Align tx path structures to cache lines Amir Vadai
` (13 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:15 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
- Remove unused variable ring->poll_cnt
- No need to set some fields if using blueflame
- Add missing const's
- Use unlikely
- Remove unneeded new line
- Make some comments more precise
- struct mlx4_bf @offset field reduced to unsigned int to save space
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 47 +++++++++++++++-------------
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 -
include/linux/mlx4/device.h | 2 +-
3 files changed, 26 insertions(+), 24 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 0c50125..d2f06a7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -191,7 +191,6 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
ring->prod = 0;
ring->cons = 0xffffffff;
ring->last_nr_txbb = 1;
- ring->poll_cnt = 0;
memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
memset(ring->buf, 0, ring->buf_size);
@@ -512,7 +511,8 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
return ring->buf + index * TXBB_SIZE;
}
-static int is_inline(int inline_thold, struct sk_buff *skb, void **pfrag)
+static bool is_inline(int inline_thold, const struct sk_buff *skb,
+ void **pfrag)
{
void *ptr;
@@ -535,7 +535,7 @@ static int is_inline(int inline_thold, struct sk_buff *skb, void **pfrag)
return 0;
}
-static int inline_size(struct sk_buff *skb)
+static int inline_size(const struct sk_buff *skb)
{
if (skb->len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg)
<= MLX4_INLINE_ALIGN)
@@ -546,7 +546,8 @@ static int inline_size(struct sk_buff *skb)
sizeof(struct mlx4_wqe_inline_seg), 16);
}
-static int get_real_size(struct sk_buff *skb, struct net_device *dev,
+static int get_real_size(const struct sk_buff *skb,
+ struct net_device *dev,
int *lso_header_size)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -581,8 +582,10 @@ static int get_real_size(struct sk_buff *skb, struct net_device *dev,
return real_size;
}
-static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *skb,
- int real_size, u16 *vlan_tag, int tx_ind, void *fragptr)
+static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
+ const struct sk_buff *skb,
+ int real_size, u16 *vlan_tag,
+ int tx_ind, void *fragptr)
{
struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl;
@@ -642,7 +645,8 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
return fallback(dev, skb) % rings_p_up + up * rings_p_up;
}
-static void mlx4_bf_copy(void __iomem *dst, unsigned long *src, unsigned bytecnt)
+static void mlx4_bf_copy(void __iomem *dst, const void *src,
+ unsigned int bytecnt)
{
__iowrite64_copy(dst, src, bytecnt / 8);
}
@@ -736,11 +740,10 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
tx_info->skb = skb;
tx_info->nr_txbb = nr_txbb;
+ data = &tx_desc->data;
if (lso_header_size)
data = ((void *)&tx_desc->lso + ALIGN(lso_header_size + 4,
DS_SIZE));
- else
- data = &tx_desc->data;
/* valid only for none inline segments */
tx_info->data_offset = (void *)data - (void *)tx_desc;
@@ -753,9 +756,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
if (is_inline(ring->inline_thold, skb, &fragptr)) {
tx_info->inl = 1;
} else {
- /* Map fragments */
+ /* Map fragments if any */
for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
- struct skb_frag_struct *frag;
+ const struct skb_frag_struct *frag;
dma_addr_t dma;
frag = &skb_shinfo(skb)->frags[i];
@@ -772,7 +775,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
--data;
}
- /* Map linear part */
+ /* Map linear part if needed */
if (tx_info->linear) {
u32 byte_count = skb_headlen(skb) - lso_header_size;
dma_addr_t dma;
@@ -795,18 +798,14 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
* For timestamping add flag to skb_shinfo and
* set flag for further reference
*/
- if (ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
- skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
+ if (unlikely(ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
+ skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
tx_info->ts_requested = 1;
}
/* Prepare ctrl segement apart opcode+ownership, which depends on
* whether LSO is used */
- tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
- tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN *
- !!vlan_tx_tag_present(skb);
- tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f;
tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
@@ -852,7 +851,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
tx_info->nr_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
ring->packets++;
-
}
ring->bytes += tx_info->nr_bytes;
netdev_tx_sent_queue(ring->tx_queue, tx_info->nr_bytes);
@@ -874,7 +872,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
ring->prod += nr_txbb;
/* If we used a bounce buffer then copy descriptor back into place */
- if (bounce)
+ if (unlikely(bounce))
tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size);
skb_tx_timestamp(skb);
@@ -894,13 +892,18 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
wmb();
- mlx4_bf_copy(ring->bf.reg + ring->bf.offset, (unsigned long *) &tx_desc->ctrl,
- desc_size);
+ mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl,
+ desc_size);
wmb();
ring->bf.offset ^= ring->bf.buf_size;
} else {
+ tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
+ tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN *
+ !!vlan_tx_tag_present(skb);
+ tx_desc->ctrl.fence_size = real_size;
+
/* Ensure new descriptor hits memory
* before setting ownership of this descriptor to HW
*/
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 84c9d5d..e54b653 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -263,7 +263,6 @@ struct mlx4_en_tx_ring {
u32 buf_size;
u32 doorbell_qpn;
void *buf;
- u16 poll_cnt;
struct mlx4_en_tx_info *tx_info;
u8 *bounce_buf;
u8 queue_index;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b2f8ab9..37e4404 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -583,7 +583,7 @@ struct mlx4_uar {
};
struct mlx4_bf {
- unsigned long offset;
+ unsigned int offset;
int buf_size;
struct mlx4_uar *uar;
void __iomem *reg;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next V1 02/14] net/mlx4_en: Align tx path structures to cache lines
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
2014-10-06 6:15 ` [PATCH net-next V1 01/14] net/mlx4_en: Code cleanups in tx path Amir Vadai
@ 2014-10-06 6:15 ` Amir Vadai
2014-10-06 6:15 ` [PATCH net-next V1 03/14] net/mlx4_en: Avoid calling bswap in tx fast path Amir Vadai
` (12 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:15 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
Reorganize struct mlx4_en_tx_ring to have:
- One cache line containing last_nr_txbb & cons & wake_queue, used by tx
completion.
- One cache line containing fields dirtied by mlx4_en_xmit()
- Following part is read mostly and shared by cpus.
Align struct mlx4_en_tx_info to a cache line
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 86 +++++++++++++++-------------
1 file changed, 46 insertions(+), 40 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index e54b653..b7bde95 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -216,13 +216,13 @@ enum cq_type {
struct mlx4_en_tx_info {
struct sk_buff *skb;
- u32 nr_txbb;
- u32 nr_bytes;
- u8 linear;
- u8 data_offset;
- u8 inl;
- u8 ts_requested;
-};
+ u32 nr_txbb;
+ u32 nr_bytes;
+ u8 linear;
+ u8 data_offset;
+ u8 inl;
+ u8 ts_requested;
+} ____cacheline_aligned_in_smp;
#define MLX4_EN_BIT_DESC_OWN 0x80000000
@@ -253,40 +253,46 @@ struct mlx4_en_rx_alloc {
};
struct mlx4_en_tx_ring {
+ /* cache line used and dirtied in tx completion
+ * (mlx4_en_free_tx_buf())
+ */
+ u32 last_nr_txbb;
+ u32 cons;
+ unsigned long wake_queue;
+
+ /* cache line used and dirtied in mlx4_en_xmit() */
+ u32 prod ____cacheline_aligned_in_smp;
+ unsigned long bytes;
+ unsigned long packets;
+ unsigned long tx_csum;
+ unsigned long tso_packets;
+ unsigned long xmit_more;
+ struct mlx4_bf bf;
+ unsigned long queue_stopped;
+
+ /* Following part should be mostly read */
+ cpumask_t affinity_mask;
+ struct mlx4_qp qp;
struct mlx4_hwq_resources wqres;
- u32 size ; /* number of TXBBs */
- u32 size_mask;
- u16 stride;
- u16 cqn; /* index of port CQ associated with this ring */
- u32 prod;
- u32 cons;
- u32 buf_size;
- u32 doorbell_qpn;
- void *buf;
- struct mlx4_en_tx_info *tx_info;
- u8 *bounce_buf;
- u8 queue_index;
- cpumask_t affinity_mask;
- u32 last_nr_txbb;
- struct mlx4_qp qp;
- struct mlx4_qp_context context;
- int qpn;
- enum mlx4_qp_state qp_state;
- struct mlx4_srq dummy;
- unsigned long bytes;
- unsigned long packets;
- unsigned long tx_csum;
- unsigned long queue_stopped;
- unsigned long wake_queue;
- unsigned long tso_packets;
- unsigned long xmit_more;
- struct mlx4_bf bf;
- bool bf_enabled;
- bool bf_alloced;
- struct netdev_queue *tx_queue;
- int hwtstamp_tx_type;
- int inline_thold;
-};
+ u32 size; /* number of TXBBs */
+ u32 size_mask;
+ u16 stride;
+ u16 cqn; /* index of port CQ associated with this ring */
+ u32 buf_size;
+ u32 doorbell_qpn;
+ void *buf;
+ struct mlx4_en_tx_info *tx_info;
+ u8 *bounce_buf;
+ struct mlx4_qp_context context;
+ int qpn;
+ enum mlx4_qp_state qp_state;
+ u8 queue_index;
+ bool bf_enabled;
+ bool bf_alloced;
+ struct netdev_queue *tx_queue;
+ int hwtstamp_tx_type;
+ int inline_thold;
+} ____cacheline_aligned_in_smp;
struct mlx4_en_rx_desc {
/* actual number of entries depends on rx ring stride */
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next V1 03/14] net/mlx4_en: Avoid calling bswap in tx fast path
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
2014-10-06 6:15 ` [PATCH net-next V1 01/14] net/mlx4_en: Code cleanups in tx path Amir Vadai
2014-10-06 6:15 ` [PATCH net-next V1 02/14] net/mlx4_en: Align tx path structures to cache lines Amir Vadai
@ 2014-10-06 6:15 ` Amir Vadai
2014-10-06 6:15 ` [PATCH net-next V1 04/14] net/mlx4_en: tx_info allocated with kmalloc() instead of vmalloc() Amir Vadai
` (11 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:15 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
- doorbell_qpn is stored in the cpu_to_be32() way to avoid bswap() in fast
path.
- mdev->mr.key stored in ring->mr_key to also avoid bswap() and access to
cold cache line.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 17 ++++++++++-------
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 3 ++-
2 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index d2f06a7..3ea17f9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -195,7 +195,8 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
memset(ring->buf, 0, ring->buf_size);
ring->qp_state = MLX4_QP_STATE_RST;
- ring->doorbell_qpn = ring->qp.qpn << 8;
+ ring->doorbell_qpn = cpu_to_be32(ring->qp.qpn << 8);
+ ring->mr_key = cpu_to_be32(mdev->mr.key);
mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
ring->cqn, user_prio, &ring->context);
@@ -654,7 +655,6 @@ static void mlx4_bf_copy(void __iomem *dst, const void *src,
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
- struct mlx4_en_dev *mdev = priv->mdev;
struct device *ddev = priv->ddev;
struct mlx4_en_tx_ring *ring;
struct mlx4_en_tx_desc *tx_desc;
@@ -769,7 +769,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_drop_unmap;
data->addr = cpu_to_be64(dma);
- data->lkey = cpu_to_be32(mdev->mr.key);
+ data->lkey = ring->mr_key;
wmb();
data->byte_count = cpu_to_be32(skb_frag_size(frag));
--data;
@@ -787,7 +787,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_drop_unmap;
data->addr = cpu_to_be64(dma);
- data->lkey = cpu_to_be32(mdev->mr.key);
+ data->lkey = ring->mr_key;
wmb();
data->byte_count = cpu_to_be32(byte_count);
}
@@ -879,9 +879,12 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
send_doorbell = !skb->xmit_more || netif_xmit_stopped(ring->tx_queue);
+ real_size = (real_size / 16) & 0x3f;
+
if (ring->bf_enabled && desc_size <= MAX_BF && !bounce &&
!vlan_tx_tag_present(skb) && send_doorbell) {
- tx_desc->ctrl.bf_qpn |= cpu_to_be32(ring->doorbell_qpn);
+ tx_desc->ctrl.bf_qpn = ring->doorbell_qpn |
+ cpu_to_be32(real_size);
op_own |= htonl((bf_index & 0xffff) << 8);
/* Ensure new descriptor hits memory
@@ -911,8 +914,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
tx_desc->ctrl.owner_opcode = op_own;
if (send_doorbell) {
wmb();
- iowrite32be(ring->doorbell_qpn,
- ring->bf.uar->map + MLX4_SEND_DOORBELL);
+ iowrite32(ring->doorbell_qpn,
+ ring->bf.uar->map + MLX4_SEND_DOORBELL);
} else {
ring->xmit_more++;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index b7bde95..ab34461 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -279,7 +279,8 @@ struct mlx4_en_tx_ring {
u16 stride;
u16 cqn; /* index of port CQ associated with this ring */
u32 buf_size;
- u32 doorbell_qpn;
+ __be32 doorbell_qpn;
+ __be32 mr_key;
void *buf;
struct mlx4_en_tx_info *tx_info;
u8 *bounce_buf;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next V1 04/14] net/mlx4_en: tx_info allocated with kmalloc() instead of vmalloc()
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
` (2 preceding siblings ...)
2014-10-06 6:15 ` [PATCH net-next V1 03/14] net/mlx4_en: Avoid calling bswap in tx fast path Amir Vadai
@ 2014-10-06 6:15 ` Amir Vadai
2014-10-06 6:15 ` [PATCH net-next V1 05/14] net/mlx4_en: Avoid a cache line miss in TX completion for single frag skb's Amir Vadai
` (10 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:15 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
Try to allocate using kmalloc_node() first, only on failure use
vmalloc()
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 3ea17f9..02ade59 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -68,7 +68,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
ring->inline_thold = priv->prof->inline_thold;
tmp = size * sizeof(struct mlx4_en_tx_info);
- ring->tx_info = vmalloc_node(tmp, node);
+ ring->tx_info = kmalloc_node(tmp, GFP_KERNEL | __GFP_NOWARN, node);
if (!ring->tx_info) {
ring->tx_info = vmalloc(tmp);
if (!ring->tx_info) {
@@ -151,7 +151,7 @@ err_bounce:
kfree(ring->bounce_buf);
ring->bounce_buf = NULL;
err_info:
- vfree(ring->tx_info);
+ kvfree(ring->tx_info);
ring->tx_info = NULL;
err_ring:
kfree(ring);
@@ -174,7 +174,7 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
kfree(ring->bounce_buf);
ring->bounce_buf = NULL;
- vfree(ring->tx_info);
+ kvfree(ring->tx_info);
ring->tx_info = NULL;
kfree(ring);
*pring = NULL;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next V1 05/14] net/mlx4_en: Avoid a cache line miss in TX completion for single frag skb's
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
` (3 preceding siblings ...)
2014-10-06 6:15 ` [PATCH net-next V1 04/14] net/mlx4_en: tx_info allocated with kmalloc() instead of vmalloc() Amir Vadai
@ 2014-10-06 6:15 ` Amir Vadai
2014-10-06 6:15 ` [PATCH net-next V1 06/14] net/mlx4_en: Use prefetch in tx path Amir Vadai
` (9 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:15 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
Add frag0_dma/frag0_byte_count into mlx4_en_tx_info to avoid a cache
line miss in TX completion for frames having one dma element. (We avoid
reading back the tx descriptor)
Note this could be extended to 2/3 dma elements later, as we have free
room in mlx4_en_tx_info
Also, mlx4_en_free_tx_desc() no longer accesses skb_shinfo(). We use a
new nr_maps fields in mlx4_en_tx_info to avoid 2 or 3 cache misses.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 83 +++++++++++++++-------------
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 3 +
2 files changed, 49 insertions(+), 37 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 02ade59..772ae6f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -259,38 +259,40 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring,
int index, u8 owner, u64 timestamp)
{
- struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
- struct sk_buff *skb = tx_info->skb;
- struct skb_frag_struct *frag;
void *end = ring->buf + ring->buf_size;
- int frags = skb_shinfo(skb)->nr_frags;
+ struct sk_buff *skb = tx_info->skb;
+ int nr_maps = tx_info->nr_maps;
int i;
- struct skb_shared_hwtstamps hwts;
- if (timestamp) {
- mlx4_en_fill_hwtstamps(mdev, &hwts, timestamp);
+ if (unlikely(timestamp)) {
+ struct skb_shared_hwtstamps hwts;
+
+ mlx4_en_fill_hwtstamps(priv->mdev, &hwts, timestamp);
skb_tstamp_tx(skb, &hwts);
}
/* Optimize the common case when there are no wraparounds */
if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
if (!tx_info->inl) {
- if (tx_info->linear) {
+ if (tx_info->linear)
dma_unmap_single(priv->ddev,
- (dma_addr_t) be64_to_cpu(data->addr),
- be32_to_cpu(data->byte_count),
- PCI_DMA_TODEVICE);
- ++data;
- }
-
- for (i = 0; i < frags; i++) {
- frag = &skb_shinfo(skb)->frags[i];
+ tx_info->map0_dma,
+ tx_info->map0_byte_count,
+ PCI_DMA_TODEVICE);
+ else
+ dma_unmap_page(priv->ddev,
+ tx_info->map0_dma,
+ tx_info->map0_byte_count,
+ PCI_DMA_TODEVICE);
+ for (i = 1; i < nr_maps; i++) {
+ data++;
dma_unmap_page(priv->ddev,
- (dma_addr_t) be64_to_cpu(data[i].addr),
- skb_frag_size(frag), PCI_DMA_TODEVICE);
+ (dma_addr_t)be64_to_cpu(data->addr),
+ be32_to_cpu(data->byte_count),
+ PCI_DMA_TODEVICE);
}
}
} else {
@@ -299,23 +301,25 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
data = ring->buf + ((void *)data - end);
}
- if (tx_info->linear) {
+ if (tx_info->linear)
dma_unmap_single(priv->ddev,
- (dma_addr_t) be64_to_cpu(data->addr),
- be32_to_cpu(data->byte_count),
- PCI_DMA_TODEVICE);
- ++data;
- }
-
- for (i = 0; i < frags; i++) {
+ tx_info->map0_dma,
+ tx_info->map0_byte_count,
+ PCI_DMA_TODEVICE);
+ else
+ dma_unmap_page(priv->ddev,
+ tx_info->map0_dma,
+ tx_info->map0_byte_count,
+ PCI_DMA_TODEVICE);
+ for (i = 1; i < nr_maps; i++) {
+ data++;
/* Check for wraparound before unmapping */
if ((void *) data >= end)
data = ring->buf;
- frag = &skb_shinfo(skb)->frags[i];
dma_unmap_page(priv->ddev,
- (dma_addr_t) be64_to_cpu(data->addr),
- skb_frag_size(frag), PCI_DMA_TODEVICE);
- ++data;
+ (dma_addr_t)be64_to_cpu(data->addr),
+ be32_to_cpu(data->byte_count),
+ PCI_DMA_TODEVICE);
}
}
}
@@ -751,19 +755,22 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
tx_info->linear = (lso_header_size < skb_headlen(skb) &&
!is_inline(ring->inline_thold, skb, NULL)) ? 1 : 0;
- data += skb_shinfo(skb)->nr_frags + tx_info->linear - 1;
+ tx_info->nr_maps = skb_shinfo(skb)->nr_frags + tx_info->linear;
+ data += tx_info->nr_maps - 1;
if (is_inline(ring->inline_thold, skb, &fragptr)) {
tx_info->inl = 1;
} else {
+ dma_addr_t dma = 0;
+ u32 byte_count = 0;
+
/* Map fragments if any */
for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
const struct skb_frag_struct *frag;
- dma_addr_t dma;
-
frag = &skb_shinfo(skb)->frags[i];
+ byte_count = skb_frag_size(frag);
dma = skb_frag_dma_map(ddev, frag,
- 0, skb_frag_size(frag),
+ 0, byte_count,
DMA_TO_DEVICE);
if (dma_mapping_error(ddev, dma))
goto tx_drop_unmap;
@@ -771,14 +778,13 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
data->addr = cpu_to_be64(dma);
data->lkey = ring->mr_key;
wmb();
- data->byte_count = cpu_to_be32(skb_frag_size(frag));
+ data->byte_count = cpu_to_be32(byte_count);
--data;
}
/* Map linear part if needed */
if (tx_info->linear) {
- u32 byte_count = skb_headlen(skb) - lso_header_size;
- dma_addr_t dma;
+ byte_count = skb_headlen(skb) - lso_header_size;
dma = dma_map_single(ddev, skb->data +
lso_header_size, byte_count,
@@ -792,6 +798,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
data->byte_count = cpu_to_be32(byte_count);
}
tx_info->inl = 0;
+ /* tx completion can avoid cache line miss for common cases */
+ tx_info->map0_dma = dma;
+ tx_info->map0_byte_count = byte_count;
}
/*
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index ab34461..a904030 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -216,12 +216,15 @@ enum cq_type {
struct mlx4_en_tx_info {
struct sk_buff *skb;
+ dma_addr_t map0_dma;
+ u32 map0_byte_count;
u32 nr_txbb;
u32 nr_bytes;
u8 linear;
u8 data_offset;
u8 inl;
u8 ts_requested;
+ u8 nr_maps;
} ____cacheline_aligned_in_smp;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next V1 06/14] net/mlx4_en: Use prefetch in tx path
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
` (4 preceding siblings ...)
2014-10-06 6:15 ` [PATCH net-next V1 05/14] net/mlx4_en: Avoid a cache line miss in TX completion for single frag skb's Amir Vadai
@ 2014-10-06 6:15 ` Amir Vadai
2014-10-06 6:15 ` [PATCH net-next V1 07/14] net/mlx4_en: Avoid false sharing in mlx4_en_en_process_tx_cq() Amir Vadai
` (8 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:15 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
mlx4_en_free_tx_desc() uses a prefetchw(&skb->users) to speed up
consume_skb()
prefetchw(&ring->tx_queue->dql) to speed up BQL update
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 772ae6f..9328e6e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -37,6 +37,7 @@
#include <linux/mlx4/qp.h>
#include <linux/skbuff.h>
#include <linux/if_vlan.h>
+#include <linux/prefetch.h>
#include <linux/vmalloc.h>
#include <linux/tcp.h>
#include <linux/ip.h>
@@ -267,6 +268,11 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
int nr_maps = tx_info->nr_maps;
int i;
+ /* We do not touch skb here, so prefetch skb->users location
+ * to speedup consume_skb()
+ */
+ prefetchw(&skb->users);
+
if (unlikely(timestamp)) {
struct skb_shared_hwtstamps hwts;
@@ -385,6 +391,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
if (!priv->port_up)
return true;
+ prefetchw(&ring->tx_queue->dql.limit);
index = cons_index & size_mask;
cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
ring_index = ring->cons & size_mask;
@@ -722,6 +729,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
}
}
+ prefetchw(&ring->tx_queue->dql);
+
/* Track current inflight packets for performance analysis */
AVG_PERF_COUNTER(priv->pstats.inflight_avg,
(u32) (ring->prod - ring->cons - 1));
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next V1 07/14] net/mlx4_en: Avoid false sharing in mlx4_en_en_process_tx_cq()
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
` (5 preceding siblings ...)
2014-10-06 6:15 ` [PATCH net-next V1 06/14] net/mlx4_en: Use prefetch in tx path Amir Vadai
@ 2014-10-06 6:15 ` Amir Vadai
2014-10-06 6:15 ` [PATCH net-next V1 08/14] net/mlx4_en: mlx4_en_xmit() reads ring->cons once, and ahead of time to avoid stalls Amir Vadai
` (7 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:15 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
mlx4_en_process_tx_cq() carefully fetches and writes ring->last_nr_txbb
and ring->cons only one time to avoid false sharing
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 9328e6e..63e1f24 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -387,6 +387,8 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
u64 timestamp = 0;
int done = 0;
int budget = priv->tx_work_limit;
+ u32 last_nr_txbb;
+ u32 ring_cons;
if (!priv->port_up)
return true;
@@ -394,7 +396,9 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
prefetchw(&ring->tx_queue->dql.limit);
index = cons_index & size_mask;
cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
- ring_index = ring->cons & size_mask;
+ last_nr_txbb = ACCESS_ONCE(ring->last_nr_txbb);
+ ring_cons = ACCESS_ONCE(ring->cons);
+ ring_index = ring_cons & size_mask;
stamp_index = ring_index;
/* Process all completed CQEs */
@@ -419,19 +423,19 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
do {
- txbbs_skipped += ring->last_nr_txbb;
- ring_index = (ring_index + ring->last_nr_txbb) & size_mask;
+ txbbs_skipped += last_nr_txbb;
+ ring_index = (ring_index + last_nr_txbb) & size_mask;
if (ring->tx_info[ring_index].ts_requested)
timestamp = mlx4_en_get_cqe_ts(cqe);
/* free next descriptor */
- ring->last_nr_txbb = mlx4_en_free_tx_desc(
+ last_nr_txbb = mlx4_en_free_tx_desc(
priv, ring, ring_index,
- !!((ring->cons + txbbs_skipped) &
+ !!((ring_cons + txbbs_skipped) &
ring->size), timestamp);
mlx4_en_stamp_wqe(priv, ring, stamp_index,
- !!((ring->cons + txbbs_stamp) &
+ !!((ring_cons + txbbs_stamp) &
ring->size));
stamp_index = ring_index;
txbbs_stamp = txbbs_skipped;
@@ -452,7 +456,11 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
mcq->cons_index = cons_index;
mlx4_cq_set_ci(mcq);
wmb();
- ring->cons += txbbs_skipped;
+
+ /* we want to dirty this cache line once */
+ ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb;
+ ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped;
+
netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
/*
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next V1 08/14] net/mlx4_en: mlx4_en_xmit() reads ring->cons once, and ahead of time to avoid stalls
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
` (6 preceding siblings ...)
2014-10-06 6:15 ` [PATCH net-next V1 07/14] net/mlx4_en: Avoid false sharing in mlx4_en_en_process_tx_cq() Amir Vadai
@ 2014-10-06 6:15 ` Amir Vadai
2014-10-06 6:15 ` [PATCH net-next V1 09/14] net/mlx4_en: Use local var in tx flow for skb_shinfo(skb) Amir Vadai
` (6 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:15 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 63e1f24..4b018ce 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -691,10 +691,17 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
void *fragptr;
bool bounce = false;
bool send_doorbell;
+ u32 ring_cons;
if (!priv->port_up)
goto tx_drop;
+ tx_ind = skb_get_queue_mapping(skb);
+ ring = priv->tx_ring[tx_ind];
+
+ /* fetch ring->cons far ahead before needing it to avoid stall */
+ ring_cons = ACCESS_ONCE(ring->cons);
+
real_size = get_real_size(skb, dev, &lso_header_size);
if (unlikely(!real_size))
goto tx_drop;
@@ -708,13 +715,11 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_drop;
}
- tx_ind = skb->queue_mapping;
- ring = priv->tx_ring[tx_ind];
if (vlan_tx_tag_present(skb))
vlan_tag = vlan_tx_tag_get(skb);
/* Check available TXBBs And 2K spare for prefetch */
- if (unlikely(((int)(ring->prod - ring->cons)) >
+ if (unlikely(((int)(ring->prod - ring_cons)) >
ring->size - HEADROOM - MAX_DESC_TXBBS)) {
/* every full Tx ring stops queue */
netif_tx_stop_queue(ring->tx_queue);
@@ -728,7 +733,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
*/
wmb();
- if (unlikely(((int)(ring->prod - ring->cons)) <=
+ ring_cons = ACCESS_ONCE(ring->cons);
+ if (unlikely(((int)(ring->prod - ring_cons)) <=
ring->size - HEADROOM - MAX_DESC_TXBBS)) {
netif_tx_wake_queue(ring->tx_queue);
ring->wake_queue++;
@@ -741,7 +747,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
/* Track current inflight packets for performance analysis */
AVG_PERF_COUNTER(priv->pstats.inflight_avg,
- (u32) (ring->prod - ring->cons - 1));
+ (u32)(ring->prod - ring_cons - 1));
/* Packet is good - grab an index and transmit it */
index = ring->prod & ring->size_mask;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next V1 09/14] net/mlx4_en: Use local var in tx flow for skb_shinfo(skb)
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
` (7 preceding siblings ...)
2014-10-06 6:15 ` [PATCH net-next V1 08/14] net/mlx4_en: mlx4_en_xmit() reads ring->cons once, and ahead of time to avoid stalls Amir Vadai
@ 2014-10-06 6:15 ` Amir Vadai
2014-10-06 6:15 ` [PATCH net-next V1 10/14] net/mlx4_en: Use local var for skb_headlen(skb) Amir Vadai
` (5 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:15 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
Acces skb_shinfo(skb) once in tx flow.
Also, rename @i variable to @i_frag to avoid confusion, as the "goto
tx_drop_unmap;" relied on this @i variable.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 62 +++++++++++++++++-------------
1 file changed, 36 insertions(+), 26 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 4b018ce..aa05b09 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -532,13 +532,14 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
}
static bool is_inline(int inline_thold, const struct sk_buff *skb,
+ const struct skb_shared_info *shinfo,
void **pfrag)
{
void *ptr;
if (inline_thold && !skb_is_gso(skb) && skb->len <= inline_thold) {
- if (skb_shinfo(skb)->nr_frags == 1) {
- ptr = skb_frag_address_safe(&skb_shinfo(skb)->frags[0]);
+ if (shinfo->nr_frags == 1) {
+ ptr = skb_frag_address_safe(&shinfo->frags[0]);
if (unlikely(!ptr))
return 0;
@@ -546,7 +547,7 @@ static bool is_inline(int inline_thold, const struct sk_buff *skb,
*pfrag = ptr;
return 1;
- } else if (unlikely(skb_shinfo(skb)->nr_frags))
+ } else if (unlikely(shinfo->nr_frags))
return 0;
else
return 1;
@@ -567,18 +568,19 @@ static int inline_size(const struct sk_buff *skb)
}
static int get_real_size(const struct sk_buff *skb,
+ const struct skb_shared_info *shinfo,
struct net_device *dev,
int *lso_header_size)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
int real_size;
- if (skb_is_gso(skb)) {
+ if (shinfo->gso_size) {
if (skb->encapsulation)
*lso_header_size = (skb_inner_transport_header(skb) - skb->data) + inner_tcp_hdrlen(skb);
else
*lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb);
- real_size = CTRL_SIZE + skb_shinfo(skb)->nr_frags * DS_SIZE +
+ real_size = CTRL_SIZE + shinfo->nr_frags * DS_SIZE +
ALIGN(*lso_header_size + 4, DS_SIZE);
if (unlikely(*lso_header_size != skb_headlen(skb))) {
/* We add a segment for the skb linear buffer only if
@@ -593,8 +595,8 @@ static int get_real_size(const struct sk_buff *skb,
}
} else {
*lso_header_size = 0;
- if (!is_inline(priv->prof->inline_thold, skb, NULL))
- real_size = CTRL_SIZE + (skb_shinfo(skb)->nr_frags + 1) * DS_SIZE;
+ if (!is_inline(priv->prof->inline_thold, skb, shinfo, NULL))
+ real_size = CTRL_SIZE + (shinfo->nr_frags + 1) * DS_SIZE;
else
real_size = inline_size(skb);
}
@@ -604,6 +606,7 @@ static int get_real_size(const struct sk_buff *skb,
static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
const struct sk_buff *skb,
+ const struct skb_shared_info *shinfo,
int real_size, u16 *vlan_tag,
int tx_ind, void *fragptr)
{
@@ -619,9 +622,9 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
MIN_PKT_LEN - skb->len);
}
skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb));
- if (skb_shinfo(skb)->nr_frags)
+ if (shinfo->nr_frags)
memcpy(((void *)(inl + 1)) + skb_headlen(skb), fragptr,
- skb_frag_size(&skb_shinfo(skb)->frags[0]));
+ skb_frag_size(&shinfo->frags[0]));
} else {
inl->byte_count = cpu_to_be32(1 << 31 | spc);
@@ -639,9 +642,10 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
inl = (void *) (inl + 1) + spc;
skb_copy_from_linear_data_offset(skb, spc, inl + 1,
skb_headlen(skb) - spc);
- if (skb_shinfo(skb)->nr_frags)
+ if (shinfo->nr_frags)
memcpy(((void *)(inl + 1)) + skb_headlen(skb) - spc,
- fragptr, skb_frag_size(&skb_shinfo(skb)->frags[0]));
+ fragptr,
+ skb_frag_size(&shinfo->frags[0]));
}
wmb();
@@ -673,6 +677,7 @@ static void mlx4_bf_copy(void __iomem *dst, const void *src,
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
struct mlx4_en_priv *priv = netdev_priv(dev);
struct device *ddev = priv->ddev;
struct mlx4_en_tx_ring *ring;
@@ -686,7 +691,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
u32 index, bf_index;
__be32 op_own;
u16 vlan_tag = 0;
- int i;
+ int i_frag;
int lso_header_size;
void *fragptr;
bool bounce = false;
@@ -702,7 +707,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
/* fetch ring->cons far ahead before needing it to avoid stall */
ring_cons = ACCESS_ONCE(ring->cons);
- real_size = get_real_size(skb, dev, &lso_header_size);
+ real_size = get_real_size(skb, shinfo, dev, &lso_header_size);
if (unlikely(!real_size))
goto tx_drop;
@@ -776,21 +781,22 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
tx_info->data_offset = (void *)data - (void *)tx_desc;
tx_info->linear = (lso_header_size < skb_headlen(skb) &&
- !is_inline(ring->inline_thold, skb, NULL)) ? 1 : 0;
+ !is_inline(ring->inline_thold, skb, shinfo, NULL)) ? 1 : 0;
- tx_info->nr_maps = skb_shinfo(skb)->nr_frags + tx_info->linear;
+ tx_info->nr_maps = shinfo->nr_frags + tx_info->linear;
data += tx_info->nr_maps - 1;
- if (is_inline(ring->inline_thold, skb, &fragptr)) {
+ if (is_inline(ring->inline_thold, skb, shinfo, &fragptr)) {
tx_info->inl = 1;
} else {
dma_addr_t dma = 0;
u32 byte_count = 0;
/* Map fragments if any */
- for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
+ for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) {
const struct skb_frag_struct *frag;
- frag = &skb_shinfo(skb)->frags[i];
+
+ frag = &shinfo->frags[i_frag];
byte_count = skb_frag_size(frag);
dma = skb_frag_dma_map(ddev, frag,
0, byte_count,
@@ -831,8 +837,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
* set flag for further reference
*/
if (unlikely(ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
- skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
- skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
+ shinfo->tx_flags |= SKBTX_IN_PROGRESS;
tx_info->ts_requested = 1;
}
@@ -858,6 +864,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
/* Handle LSO (TSO) packets */
if (lso_header_size) {
+ int i;
+
/* Mark opcode as LSO */
op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) |
((ring->prod & ring->size) ?
@@ -865,15 +873,16 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
/* Fill in the LSO prefix */
tx_desc->lso.mss_hdr_size = cpu_to_be32(
- skb_shinfo(skb)->gso_size << 16 | lso_header_size);
+ shinfo->gso_size << 16 | lso_header_size);
/* Copy headers;
* note that we already verified that it is linear */
memcpy(tx_desc->lso.header, skb->data, lso_header_size);
ring->tso_packets++;
- i = ((skb->len - lso_header_size) / skb_shinfo(skb)->gso_size) +
- !!((skb->len - lso_header_size) % skb_shinfo(skb)->gso_size);
+
+ i = ((skb->len - lso_header_size) / shinfo->gso_size) +
+ !!((skb->len - lso_header_size) % shinfo->gso_size);
tx_info->nr_bytes = skb->len + (i - 1) * lso_header_size;
ring->packets += i;
} else {
@@ -889,7 +898,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len);
if (tx_info->inl) {
- build_inline_wqe(tx_desc, skb, real_size, &vlan_tag, tx_ind, fragptr);
+ build_inline_wqe(tx_desc, skb, shinfo, real_size, &vlan_tag,
+ tx_ind, fragptr);
tx_info->inl = 1;
}
@@ -958,8 +968,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
tx_drop_unmap:
en_err(priv, "DMA mapping error\n");
- for (i++; i < skb_shinfo(skb)->nr_frags; i++) {
- data++;
+ while (++i_frag < shinfo->nr_frags) {
+ ++data;
dma_unmap_page(ddev, (dma_addr_t) be64_to_cpu(data->addr),
be32_to_cpu(data->byte_count),
PCI_DMA_TODEVICE);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next V1 10/14] net/mlx4_en: Use local var for skb_headlen(skb)
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
` (8 preceding siblings ...)
2014-10-06 6:15 ` [PATCH net-next V1 09/14] net/mlx4_en: Use local var in tx flow for skb_shinfo(skb) Amir Vadai
@ 2014-10-06 6:15 ` Amir Vadai
2014-10-06 6:16 ` [PATCH net-next V1 11/14] net/mlx4_en: tx_info->ts_requested was not cleared Amir Vadai
` (4 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:15 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
Access skb_headlen() once in tx flow
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index aa05b09..e00841a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -612,6 +612,7 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
{
struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl;
+ unsigned int hlen = skb_headlen(skb);
if (skb->len <= spc) {
if (likely(skb->len >= MIN_PKT_LEN)) {
@@ -621,19 +622,19 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
memset(((void *)(inl + 1)) + skb->len, 0,
MIN_PKT_LEN - skb->len);
}
- skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb));
+ skb_copy_from_linear_data(skb, inl + 1, hlen);
if (shinfo->nr_frags)
- memcpy(((void *)(inl + 1)) + skb_headlen(skb), fragptr,
+ memcpy(((void *)(inl + 1)) + hlen, fragptr,
skb_frag_size(&shinfo->frags[0]));
} else {
inl->byte_count = cpu_to_be32(1 << 31 | spc);
- if (skb_headlen(skb) <= spc) {
- skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb));
- if (skb_headlen(skb) < spc) {
- memcpy(((void *)(inl + 1)) + skb_headlen(skb),
- fragptr, spc - skb_headlen(skb));
- fragptr += spc - skb_headlen(skb);
+ if (hlen <= spc) {
+ skb_copy_from_linear_data(skb, inl + 1, hlen);
+ if (hlen < spc) {
+ memcpy(((void *)(inl + 1)) + hlen,
+ fragptr, spc - hlen);
+ fragptr += spc - hlen;
}
inl = (void *) (inl + 1) + spc;
memcpy(((void *)(inl + 1)), fragptr, skb->len - spc);
@@ -641,9 +642,9 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
skb_copy_from_linear_data(skb, inl + 1, spc);
inl = (void *) (inl + 1) + spc;
skb_copy_from_linear_data_offset(skb, spc, inl + 1,
- skb_headlen(skb) - spc);
+ hlen - spc);
if (shinfo->nr_frags)
- memcpy(((void *)(inl + 1)) + skb_headlen(skb) - spc,
+ memcpy(((void *)(inl + 1)) + hlen - spc,
fragptr,
skb_frag_size(&shinfo->frags[0]));
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next V1 11/14] net/mlx4_en: tx_info->ts_requested was not cleared
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
` (9 preceding siblings ...)
2014-10-06 6:15 ` [PATCH net-next V1 10/14] net/mlx4_en: Use local var for skb_headlen(skb) Amir Vadai
@ 2014-10-06 6:16 ` Amir Vadai
2014-10-06 6:16 ` [PATCH net-next V1 12/14] net/mlx4_en: Enable the compiler to make is_inline() inlined Amir Vadai
` (3 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:16 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
Properly clear tx_info->ts_requested
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index e00841a..2c03b55 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -837,6 +837,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
* For timestamping add flag to skb_shinfo and
* set flag for further reference
*/
+ tx_info->ts_requested = 0;
if (unlikely(ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
shinfo->tx_flags |= SKBTX_IN_PROGRESS;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next V1 12/14] net/mlx4_en: Enable the compiler to make is_inline() inlined
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
` (10 preceding siblings ...)
2014-10-06 6:16 ` [PATCH net-next V1 11/14] net/mlx4_en: tx_info->ts_requested was not cleared Amir Vadai
@ 2014-10-06 6:16 ` Amir Vadai
2014-10-06 6:16 ` [PATCH net-next V1 13/14] ethtool: Ethtool parameter to dynamically change tx_copybreak Amir Vadai
` (2 subsequent siblings)
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:16 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
Reorganize code to call is_inline() once, so compiler can inline it
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 67 +++++++++++++++++-------------
1 file changed, 38 insertions(+), 29 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 2c03b55..f0080c5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -531,29 +531,32 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
return ring->buf + index * TXBB_SIZE;
}
+/* Decide if skb can be inlined in tx descriptor to avoid dma mapping
+ *
+ * It seems strange we do not simply use skb_copy_bits().
+ * This would allow to inline all skbs iff skb->len <= inline_thold
+ *
+ * Note that caller already checked skb was not a gso packet
+ */
static bool is_inline(int inline_thold, const struct sk_buff *skb,
const struct skb_shared_info *shinfo,
void **pfrag)
{
void *ptr;
- if (inline_thold && !skb_is_gso(skb) && skb->len <= inline_thold) {
- if (shinfo->nr_frags == 1) {
- ptr = skb_frag_address_safe(&shinfo->frags[0]);
- if (unlikely(!ptr))
- return 0;
-
- if (pfrag)
- *pfrag = ptr;
+ if (skb->len > inline_thold || !inline_thold)
+ return false;
- return 1;
- } else if (unlikely(shinfo->nr_frags))
- return 0;
- else
- return 1;
+ if (shinfo->nr_frags == 1) {
+ ptr = skb_frag_address_safe(&shinfo->frags[0]);
+ if (unlikely(!ptr))
+ return false;
+ *pfrag = ptr;
+ return true;
}
-
- return 0;
+ if (shinfo->nr_frags)
+ return false;
+ return true;
}
static int inline_size(const struct sk_buff *skb)
@@ -570,12 +573,15 @@ static int inline_size(const struct sk_buff *skb)
static int get_real_size(const struct sk_buff *skb,
const struct skb_shared_info *shinfo,
struct net_device *dev,
- int *lso_header_size)
+ int *lso_header_size,
+ bool *inline_ok,
+ void **pfrag)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
int real_size;
if (shinfo->gso_size) {
+ *inline_ok = false;
if (skb->encapsulation)
*lso_header_size = (skb_inner_transport_header(skb) - skb->data) + inner_tcp_hdrlen(skb);
else
@@ -595,10 +601,14 @@ static int get_real_size(const struct sk_buff *skb,
}
} else {
*lso_header_size = 0;
- if (!is_inline(priv->prof->inline_thold, skb, shinfo, NULL))
- real_size = CTRL_SIZE + (shinfo->nr_frags + 1) * DS_SIZE;
- else
+ *inline_ok = is_inline(priv->prof->inline_thold, skb,
+ shinfo, pfrag);
+
+ if (*inline_ok)
real_size = inline_size(skb);
+ else
+ real_size = CTRL_SIZE +
+ (shinfo->nr_frags + 1) * DS_SIZE;
}
return real_size;
@@ -694,9 +704,10 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
u16 vlan_tag = 0;
int i_frag;
int lso_header_size;
- void *fragptr;
+ void *fragptr = NULL;
bool bounce = false;
bool send_doorbell;
+ bool inline_ok;
u32 ring_cons;
if (!priv->port_up)
@@ -708,7 +719,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
/* fetch ring->cons far ahead before needing it to avoid stall */
ring_cons = ACCESS_ONCE(ring->cons);
- real_size = get_real_size(skb, shinfo, dev, &lso_header_size);
+ real_size = get_real_size(skb, shinfo, dev, &lso_header_size,
+ &inline_ok, &fragptr);
if (unlikely(!real_size))
goto tx_drop;
@@ -781,15 +793,15 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
/* valid only for none inline segments */
tx_info->data_offset = (void *)data - (void *)tx_desc;
+ tx_info->inl = inline_ok;
+
tx_info->linear = (lso_header_size < skb_headlen(skb) &&
- !is_inline(ring->inline_thold, skb, shinfo, NULL)) ? 1 : 0;
+ !inline_ok) ? 1 : 0;
tx_info->nr_maps = shinfo->nr_frags + tx_info->linear;
data += tx_info->nr_maps - 1;
- if (is_inline(ring->inline_thold, skb, shinfo, &fragptr)) {
- tx_info->inl = 1;
- } else {
+ if (!tx_info->inl) {
dma_addr_t dma = 0;
u32 byte_count = 0;
@@ -827,7 +839,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
wmb();
data->byte_count = cpu_to_be32(byte_count);
}
- tx_info->inl = 0;
/* tx completion can avoid cache line miss for common cases */
tx_info->map0_dma = dma;
tx_info->map0_byte_count = byte_count;
@@ -899,11 +910,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
netdev_tx_sent_queue(ring->tx_queue, tx_info->nr_bytes);
AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len);
- if (tx_info->inl) {
+ if (tx_info->inl)
build_inline_wqe(tx_desc, skb, shinfo, real_size, &vlan_tag,
tx_ind, fragptr);
- tx_info->inl = 1;
- }
if (skb->encapsulation) {
struct iphdr *ipv4 = (struct iphdr *)skb_inner_network_header(skb);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next V1 13/14] ethtool: Ethtool parameter to dynamically change tx_copybreak
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
` (11 preceding siblings ...)
2014-10-06 6:16 ` [PATCH net-next V1 12/14] net/mlx4_en: Enable the compiler to make is_inline() inlined Amir Vadai
@ 2014-10-06 6:16 ` Amir Vadai
2014-10-06 6:16 ` [PATCH net-next V1 14/14] net/mlx4_en: Use the new tx_copybreak to set inline threshold Amir Vadai
2014-10-06 12:51 ` [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Eric Dumazet
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:16 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
Use new ethtool [sg]et_tunable() to set tx_copybread (inline threshold)
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
include/uapi/linux/ethtool.h | 1 +
net/core/ethtool.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 7a364f2..99b4305 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -212,6 +212,7 @@ struct ethtool_value {
enum tunable_id {
ETHTOOL_ID_UNSPEC,
ETHTOOL_RX_COPYBREAK,
+ ETHTOOL_TX_COPYBREAK,
};
enum tunable_type_id {
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 27e61b8..1600aa2 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1625,6 +1625,7 @@ static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
{
switch (tuna->id) {
case ETHTOOL_RX_COPYBREAK:
+ case ETHTOOL_TX_COPYBREAK:
if (tuna->len != sizeof(u32) ||
tuna->type_id != ETHTOOL_TUNABLE_U32)
return -EINVAL;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH net-next V1 14/14] net/mlx4_en: Use the new tx_copybreak to set inline threshold
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
` (12 preceding siblings ...)
2014-10-06 6:16 ` [PATCH net-next V1 13/14] ethtool: Ethtool parameter to dynamically change tx_copybreak Amir Vadai
@ 2014-10-06 6:16 ` Amir Vadai
2014-10-06 12:51 ` [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Eric Dumazet
14 siblings, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 6:16 UTC (permalink / raw)
To: David S. Miller, Eric Dumazet
Cc: netdev, Yevgeny Petrilin, Or Gerlitz, Ido Shamay, Amir Vadai
From: Eric Dumazet <edumazet@google.com>
Instead of setting inline threshold using module parameter only on
driver load, use set_tunable() to set it dynamically.
No need to store the threshold per ring, using instead the netdev global
priv->prof->inline_thold
Initial value still is set using the module parameter, therefore
backward compatability is kept.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 44 +++++++++++++++++++++++++
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 1 -
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 -
3 files changed, 44 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 42c9f8b..b2d9e1f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1267,6 +1267,48 @@ static u32 mlx4_en_get_priv_flags(struct net_device *dev)
return priv->pflags;
}
+static int mlx4_en_get_tunable(struct net_device *dev,
+ const struct ethtool_tunable *tuna,
+ void *data)
+{
+ const struct mlx4_en_priv *priv = netdev_priv(dev);
+ int ret = 0;
+
+ switch (tuna->id) {
+ case ETHTOOL_TX_COPYBREAK:
+ *(u32 *)data = priv->prof->inline_thold;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int mlx4_en_set_tunable(struct net_device *dev,
+ const struct ethtool_tunable *tuna,
+ const void *data)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ int val, ret = 0;
+
+ switch (tuna->id) {
+ case ETHTOOL_TX_COPYBREAK:
+ val = *(u32 *)data;
+ if (val < MIN_PKT_LEN || val > MAX_INLINE)
+ ret = -EINVAL;
+ else
+ priv->prof->inline_thold = val;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
const struct ethtool_ops mlx4_en_ethtool_ops = {
.get_drvinfo = mlx4_en_get_drvinfo,
@@ -1297,6 +1339,8 @@ const struct ethtool_ops mlx4_en_ethtool_ops = {
.get_ts_info = mlx4_en_get_ts_info,
.set_priv_flags = mlx4_en_set_priv_flags,
.get_priv_flags = mlx4_en_get_priv_flags,
+ .get_tunable = mlx4_en_get_tunable,
+ .set_tunable = mlx4_en_set_tunable,
};
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index f0080c5..92a7cf4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -66,7 +66,6 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
ring->size = size;
ring->size_mask = size - 1;
ring->stride = stride;
- ring->inline_thold = priv->prof->inline_thold;
tmp = size * sizeof(struct mlx4_en_tx_info);
ring->tx_info = kmalloc_node(tmp, GFP_KERNEL | __GFP_NOWARN, node);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index a904030..8fef658 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -295,7 +295,6 @@ struct mlx4_en_tx_ring {
bool bf_alloced;
struct netdev_queue *tx_queue;
int hwtstamp_tx_type;
- int inline_thold;
} ____cacheline_aligned_in_smp;
struct mlx4_en_rx_desc {
--
1.8.3.1
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow
2014-10-06 6:15 [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Amir Vadai
` (13 preceding siblings ...)
2014-10-06 6:16 ` [PATCH net-next V1 14/14] net/mlx4_en: Use the new tx_copybreak to set inline threshold Amir Vadai
@ 2014-10-06 12:51 ` Eric Dumazet
2014-10-06 12:54 ` Amir Vadai
2014-10-06 16:30 ` [PATCH net-next] net/mlx4_en: remove NETDEV_TX_BUSY Eric Dumazet
14 siblings, 2 replies; 21+ messages in thread
From: Eric Dumazet @ 2014-10-06 12:51 UTC (permalink / raw)
To: Amir Vadai
Cc: David S. Miller, Eric Dumazet, netdev, Yevgeny Petrilin,
Or Gerlitz, Ido Shamay
On Mon, 2014-10-06 at 09:15 +0300, Amir Vadai wrote:
> Changes from V0:
> - Patch 14/14 ("Use the new tx_copybreak to set inline threshold"):
> - Use same coding convention as currently is in en_ethtool.c
> - Patch 1/14 ("Code cleanups in tx path") and Patch 9/14 ("Use local var in
> tx flow for skb_shinfo(skb)"):
> - local var shinfo was used by mistake in Patch 1/14 while declared at 9/14.
> Fixed it for the sake of future bisections
Hmm... patches were already merged. I am afraid you need to provide one
patch on top of current net-next, if still needed. Too bad for the
bisection.
Thanks !
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow
2014-10-06 12:51 ` [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Eric Dumazet
@ 2014-10-06 12:54 ` Amir Vadai
2014-10-06 16:30 ` [PATCH net-next] net/mlx4_en: remove NETDEV_TX_BUSY Eric Dumazet
1 sibling, 0 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-06 12:54 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S. Miller, Eric Dumazet, netdev, Yevgeny Petrilin,
Or Gerlitz, Ido Shamay
On 10/6/2014 3:51 PM, Eric Dumazet wrote:
> On Mon, 2014-10-06 at 09:15 +0300, Amir Vadai wrote:
>
>> Changes from V0:
>> - Patch 14/14 ("Use the new tx_copybreak to set inline threshold"):
>> - Use same coding convention as currently is in en_ethtool.c
>> - Patch 1/14 ("Code cleanups in tx path") and Patch 9/14 ("Use local var in
>> tx flow for skb_shinfo(skb)"):
>> - local var shinfo was used by mistake in Patch 1/14 while declared at 9/14.
>> Fixed it for the sake of future bisections
>
> Hmm... patches were already merged. I am afraid you need to provide one
> patch on top of current net-next, if still needed. Too bad for the
> bisection.
>
> Thanks !
>
>
Yeh, I missed it somehow. Sorry for the spam.
Amir
^ permalink raw reply [flat|nested] 21+ messages in thread* [PATCH net-next] net/mlx4_en: remove NETDEV_TX_BUSY
2014-10-06 12:51 ` [PATCH net-next V1 00/14] net/mlx4_en: Optimizations to TX flow Eric Dumazet
2014-10-06 12:54 ` Amir Vadai
@ 2014-10-06 16:30 ` Eric Dumazet
2014-10-07 8:30 ` Amir Vadai
1 sibling, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2014-10-06 16:30 UTC (permalink / raw)
To: Amir Vadai
Cc: David S. Miller, Eric Dumazet, netdev, Yevgeny Petrilin,
Or Gerlitz, Ido Shamay
From: Eric Dumazet <edumazet@google.com>
Drivers should avoid NETDEV_TX_BUSY as much as possible.
They should stop the tx queue before qdisc even tries to push another
packet, to avoid requeues.
For a driver supporting skb->xmit_more, this is likely to be a prereq
anyway, otherwise we could have a tx deadlock : We need to force a
doorbell if TX ring is full.
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
drivers/net/ethernet/mellanox/mlx4/en_tx.c | 48 +++++++++----------
1 file changed, 24 insertions(+), 24 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 92a7cf46d9af9260d601a0d65ec5bba4..44004e331e4e760f18cfa69ef99a2b2b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -706,6 +706,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
void *fragptr = NULL;
bool bounce = false;
bool send_doorbell;
+ bool stop_queue;
bool inline_ok;
u32 ring_cons;
@@ -735,30 +736,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
if (vlan_tx_tag_present(skb))
vlan_tag = vlan_tx_tag_get(skb);
- /* Check available TXBBs And 2K spare for prefetch */
- if (unlikely(((int)(ring->prod - ring_cons)) >
- ring->size - HEADROOM - MAX_DESC_TXBBS)) {
- /* every full Tx ring stops queue */
- netif_tx_stop_queue(ring->tx_queue);
- ring->queue_stopped++;
-
- /* If queue was emptied after the if, and before the
- * stop_queue - need to wake the queue, or else it will remain
- * stopped forever.
- * Need a memory barrier to make sure ring->cons was not
- * updated before queue was stopped.
- */
- wmb();
-
- ring_cons = ACCESS_ONCE(ring->cons);
- if (unlikely(((int)(ring->prod - ring_cons)) <=
- ring->size - HEADROOM - MAX_DESC_TXBBS)) {
- netif_tx_wake_queue(ring->tx_queue);
- ring->wake_queue++;
- } else {
- return NETDEV_TX_BUSY;
- }
- }
prefetchw(&ring->tx_queue->dql);
@@ -929,6 +906,13 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
skb_tx_timestamp(skb);
+ /* Check available TXBBs And 2K spare for prefetch */
+ stop_queue = (int)(ring->prod - ring_cons) >
+ ring->size - HEADROOM - MAX_DESC_TXBBS;
+ if (unlikely(stop_queue)) {
+ netif_tx_stop_queue(ring->tx_queue);
+ ring->queue_stopped++;
+ }
send_doorbell = !skb->xmit_more || netif_xmit_stopped(ring->tx_queue);
real_size = (real_size / 16) & 0x3f;
@@ -973,6 +957,22 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
}
}
+ if (unlikely(stop_queue)) {
+ /* If queue was emptied after the if (stop_queue) , and before
+ * the netif_tx_stop_queue() - need to wake the queue,
+ * or else it will remain stopped forever.
+ * Need a memory barrier to make sure ring->cons was not
+ * updated before queue was stopped.
+ */
+ smp_rmb();
+
+ ring_cons = ACCESS_ONCE(ring->cons);
+ if (unlikely(((int)(ring->prod - ring_cons)) <=
+ ring->size - HEADROOM - MAX_DESC_TXBBS)) {
+ netif_tx_wake_queue(ring->tx_queue);
+ ring->wake_queue++;
+ }
+ }
return NETDEV_TX_OK;
tx_drop_unmap:
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH net-next] net/mlx4_en: remove NETDEV_TX_BUSY
2014-10-06 16:30 ` [PATCH net-next] net/mlx4_en: remove NETDEV_TX_BUSY Eric Dumazet
@ 2014-10-07 8:30 ` Amir Vadai
2014-10-07 12:38 ` Eric Dumazet
2014-10-07 17:21 ` David Miller
0 siblings, 2 replies; 21+ messages in thread
From: Amir Vadai @ 2014-10-07 8:30 UTC (permalink / raw)
To: Eric Dumazet, Amir Vadai
Cc: David S. Miller, Eric Dumazet, netdev, Yevgeny Petrilin,
Or Gerlitz, Ido Shamay
On 10/6/2014 7:30 PM, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> Drivers should avoid NETDEV_TX_BUSY as much as possible.
>
> They should stop the tx queue before qdisc even tries to push another
> packet, to avoid requeues.
>
> For a driver supporting skb->xmit_more, this is likely to be a prereq
> anyway, otherwise we could have a tx deadlock : We need to force a
> doorbell if TX ring is full.
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
Reviewed. Also verified that it fixes the deadlock (by sending a large
burst - larger than ring size). Before this fix, last packet of the
burst wasn't sent, therefore no doorbell was rang, and the queue was
stalled.
BTW, another nice optimization that we hope to send soon, is not to arm
the CQ unless ringing the doorbell.
Acked-by: Amir Vadai <amirv@mellanox.com>
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH net-next] net/mlx4_en: remove NETDEV_TX_BUSY
2014-10-07 8:30 ` Amir Vadai
@ 2014-10-07 12:38 ` Eric Dumazet
2014-10-07 17:21 ` David Miller
1 sibling, 0 replies; 21+ messages in thread
From: Eric Dumazet @ 2014-10-07 12:38 UTC (permalink / raw)
To: amirv
Cc: David S. Miller, Eric Dumazet, netdev, Yevgeny Petrilin,
Or Gerlitz, Ido Shamay
On Tue, 2014-10-07 at 11:30 +0300, Amir Vadai wrote:
> Reviewed. Also verified that it fixes the deadlock (by sending a large
> burst - larger than ring size). Before this fix, last packet of the
> burst wasn't sent, therefore no doorbell was rang, and the queue was
> stalled.
>
> BTW, another nice optimization that we hope to send soon, is not to arm
> the CQ unless ringing the doorbell.
>
> Acked-by: Amir Vadai <amirv@mellanox.com>
This sounds great, thanks Amir
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH net-next] net/mlx4_en: remove NETDEV_TX_BUSY
2014-10-07 8:30 ` Amir Vadai
2014-10-07 12:38 ` Eric Dumazet
@ 2014-10-07 17:21 ` David Miller
1 sibling, 0 replies; 21+ messages in thread
From: David Miller @ 2014-10-07 17:21 UTC (permalink / raw)
To: amirv, amirv.mellanox
Cc: eric.dumazet, edumazet, netdev, yevgenyp, ogerlitz, idos
From: Amir Vadai <amirv.mellanox@gmail.com>
Date: Tue, 07 Oct 2014 11:30:06 +0300
> On 10/6/2014 7:30 PM, Eric Dumazet wrote:
>> From: Eric Dumazet <edumazet@google.com>
>>
>> Drivers should avoid NETDEV_TX_BUSY as much as possible.
>>
>> They should stop the tx queue before qdisc even tries to push another
>> packet, to avoid requeues.
>>
>> For a driver supporting skb->xmit_more, this is likely to be a prereq
>> anyway, otherwise we could have a tx deadlock : We need to force a
>> doorbell if TX ring is full.
>>
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> ---
>
> Reviewed. Also verified that it fixes the deadlock (by sending a large
> burst - larger than ring size). Before this fix, last packet of the
> burst wasn't sent, therefore no doorbell was rang, and the queue was
> stalled.
>
> BTW, another nice optimization that we hope to send soon, is not to arm
> the CQ unless ringing the doorbell.
>
> Acked-by: Amir Vadai <amirv@mellanox.com>
Applied, thanks everyone.
^ permalink raw reply [flat|nested] 21+ messages in thread