From: Eric Dumazet <dada1@cosmosbay.com>
To: David Miller <davem@davemloft.net>
Cc: Andrew Dickinson <andrew@whydna.net>,
jelaas@gmail.com, netdev@vger.kernel.org
Subject: [PATCH] net: skb_tx_hash() improvements
Date: Fri, 01 May 2009 10:29:09 +0200 [thread overview]
Message-ID: <49FAB2D5.60508@cosmosbay.com> (raw)
In-Reply-To: <49FA932B.4030405@cosmosbay.com>
David, here is the followup I promised
Thanks
[PATCH] net: skb_tx_hash() improvements
When skb_rx_queue_recorded() is true, we dont want to use jhash distribution
as the device driver exactly told us which queue was selected at RX time.
jhash makes a statistical shuffle, but this wont work with only 8 different inputs.
We also need to implement a true reciprocal division, to not disturb
symmetric setups (when number of tx queues matches number of rx queues)
and cpu affinities.
This patch introduces a new helper, dev_real_num_tx_queues_set()
to set both real_num_tx_queues and its reciprocal value,
and makes all drivers use this helper.
Many thanks to Andrew Dickinson to let us see the light here :)
Reported-by: Andrew Dickinson <andrew@whydna.net>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
---
drivers/net/bnx2.c | 2 +-
drivers/net/bnx2x_main.c | 2 +-
drivers/net/cxgb3/cxgb3_main.c | 2 +-
drivers/net/igb/igb_main.c | 2 +-
drivers/net/ixgbe/ixgbe_main.c | 2 +-
drivers/net/mv643xx_eth.c | 2 +-
drivers/net/myri10ge/myri10ge.c | 4 ++--
drivers/net/niu.c | 2 +-
drivers/net/vxge/vxge-main.c | 2 +-
include/linux/netdevice.h | 2 ++
net/core/dev.c | 26 ++++++++++++++++++--------
11 files changed, 30 insertions(+), 18 deletions(-)
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index d478391..1f674c1 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -5951,7 +5951,7 @@ bnx2_setup_int_mode(struct bnx2 *bp, int dis_msi)
}
bp->num_tx_rings = rounddown_pow_of_two(bp->irq_nvecs);
- bp->dev->real_num_tx_queues = bp->num_tx_rings;
+ dev_real_num_tx_queues_set(bp->dev, bp->num_tx_rings);
bp->num_rx_rings = bp->irq_nvecs;
}
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index ad5ef25..d5c641b 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -6800,7 +6800,7 @@ static void bnx2x_set_int_mode(struct bnx2x *bp)
}
break;
}
- bp->dev->real_num_tx_queues = bp->num_tx_queues;
+ dev_real_num_tx_queues_set(bp->dev, bp->num_tx_queues);
}
static void bnx2x_set_rx_mode(struct net_device *dev);
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index 7ea4841..a84abf3 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -1220,7 +1220,7 @@ static int cxgb_open(struct net_device *dev)
"Could not initialize offload capabilities\n");
}
- dev->real_num_tx_queues = pi->nqsets;
+ dev_real_num_tx_queues_set(dev, pi->nqsets);
link_start(dev);
t3_port_intr_enable(adapter, pi->port_id);
netif_tx_start_all_queues(dev);
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 08c8014..48c530d 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -691,7 +691,7 @@ msi_only:
adapter->flags |= IGB_FLAG_HAS_MSI;
out:
/* Notify the stack of the (possibly) reduced Tx Queue count. */
- adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
+ dev_real_num_tx_queues_set(adapter->netdev, adapter->num_tx_queues);
return;
}
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 07e778d..4b4369b 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -2737,7 +2737,7 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
done:
/* Notify the stack of the (possibly) reduced Tx Queue count. */
- adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
+ dev_real_num_tx_queues_set(adapter->netdev, adapter->num_tx_queues);
}
static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter,
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index b3185bf..cb6d859 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -2904,7 +2904,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
mp->dev = dev;
set_params(mp, pd);
- dev->real_num_tx_queues = mp->txq_count;
+ dev_real_num_tx_queues_set(dev, mp->txq_count);
if (pd->phy_addr != MV643XX_ETH_PHY_NONE)
mp->phy = phy_scan(mp, pd->phy_addr);
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index f2c4a66..bfb6a11 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -968,7 +968,7 @@ static int myri10ge_reset(struct myri10ge_priv *mgp)
* RX queues, so if we get an error, first retry using a
* single TX queue before giving up */
if (status != 0 && mgp->dev->real_num_tx_queues > 1) {
- mgp->dev->real_num_tx_queues = 1;
+ dev_real_num_tx_queues_set(mgp->dev, 1);
cmd.data0 = mgp->num_slices;
cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
status = myri10ge_send_cmd(mgp,
@@ -3862,7 +3862,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
dev_err(&pdev->dev, "failed to alloc slice state\n");
goto abort_with_firmware;
}
- netdev->real_num_tx_queues = mgp->num_slices;
+ dev_real_num_tx_queues_set(netdev, mgp->num_slices);
status = myri10ge_reset(mgp);
if (status != 0) {
dev_err(&pdev->dev, "failed reset\n");
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 2b17453..a6eac3b 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -4501,7 +4501,7 @@ static int niu_alloc_channels(struct niu *np)
np->num_rx_rings = parent->rxchan_per_port[port];
np->num_tx_rings = parent->txchan_per_port[port];
- np->dev->real_num_tx_queues = np->num_tx_rings;
+ dev_real_num_tx_queues_set(np->dev, np->num_tx_rings);
np->rx_rings = kzalloc(np->num_rx_rings * sizeof(struct rx_ring_info),
GFP_KERNEL);
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index b7f08f3..15602ab 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -3331,7 +3331,7 @@ int __devinit vxge_device_register(struct __vxge_hw_device *hldev,
ndev->features |= NETIF_F_GRO;
if (vdev->config.tx_steering_type == TX_MULTIQ_STEERING)
- ndev->real_num_tx_queues = no_of_vpath;
+ dev_real_num_tx_queues_set(ndev, no_of_vpath);
#ifdef NETIF_F_LLTX
ndev->features |= NETIF_F_LLTX;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5a96a1a..f3939ec 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -790,6 +790,7 @@ struct net_device
/* Number of TX queues currently active in device */
unsigned int real_num_tx_queues;
+ unsigned int rec_real_num_tx_queues; /* reciprocal value */
unsigned long tx_queue_len; /* Max frames per queue allowed */
spinlock_t tx_global_lock;
@@ -1782,6 +1783,7 @@ static inline void netif_addr_unlock_bh(struct net_device *dev)
extern void ether_setup(struct net_device *dev);
+extern void dev_real_num_tx_queues_set(struct net_device *dev, unsigned int count);
/* Support for loadable net-drivers */
extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *),
diff --git a/net/core/dev.c b/net/core/dev.c
index 308a7d0..dfb8f32 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -126,6 +126,7 @@
#include <linux/in.h>
#include <linux/jhash.h>
#include <linux/random.h>
+#include <linux/reciprocal_div.h>
#include "net-sysfs.h"
@@ -1735,19 +1736,28 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
{
u32 hash;
- if (skb_rx_queue_recorded(skb)) {
+ if (skb_rx_queue_recorded(skb))
hash = skb_get_rx_queue(skb);
- } else if (skb->sk && skb->sk->sk_hash) {
- hash = skb->sk->sk_hash;
- } else
- hash = skb->protocol;
+ else {
+ if (skb->sk && skb->sk->sk_hash)
+ hash = skb->sk->sk_hash;
+ else
+ hash = skb->protocol;
- hash = jhash_1word(hash, skb_tx_hashrnd);
+ hash = jhash_1word(hash, skb_tx_hashrnd);
+ }
+ return (u16) reciprocal_divide(hash, dev->rec_real_num_tx_queues);
- return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
}
EXPORT_SYMBOL(skb_tx_hash);
+void dev_real_num_tx_queues_set(struct net_device *dev, unsigned int count)
+{
+ dev->real_num_tx_queues = count;
+ dev->rec_real_num_tx_queues = reciprocal_value(count);
+}
+EXPORT_SYMBOL(dev_real_num_tx_queues_set);
+
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
{
@@ -4781,7 +4791,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev->_tx = tx;
dev->num_tx_queues = queue_count;
- dev->real_num_tx_queues = queue_count;
+ dev_real_num_tx_queues_set(dev, queue_count);
dev->gso_max_size = GSO_MAX_SIZE;
next prev parent reply other threads:[~2009-05-01 8:29 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-04-29 23:00 tx queue hashing hot-spots and poor performance (multiq, ixgbe) Andrew Dickinson
2009-04-30 9:07 ` Jens Låås
2009-04-30 9:24 ` David Miller
2009-04-30 10:51 ` Jens Låås
2009-04-30 11:05 ` David Miller
2009-04-30 14:04 ` Andrew Dickinson
2009-04-30 14:08 ` David Miller
2009-04-30 23:53 ` Andrew Dickinson
2009-05-01 4:19 ` Andrew Dickinson
2009-05-01 7:32 ` Eric Dumazet
2009-05-01 7:47 ` Eric Dumazet
2009-05-01 6:14 ` Eric Dumazet
2009-05-01 6:19 ` Andrew Dickinson
2009-05-01 6:40 ` Eric Dumazet
2009-05-01 7:23 ` Andrew Dickinson
2009-05-01 7:31 ` Eric Dumazet
2009-05-01 7:34 ` Andrew Dickinson
2009-05-01 21:37 ` Brandeburg, Jesse
2009-05-01 8:29 ` Eric Dumazet [this message]
2009-05-01 8:52 ` [PATCH] net: skb_tx_hash() improvements Eric Dumazet
2009-05-01 9:29 ` Eric Dumazet
2009-05-01 16:17 ` David Miller
2009-05-03 21:44 ` David Miller
2009-05-04 6:12 ` Eric Dumazet
2009-05-01 16:08 ` tx queue hashing hot-spots and poor performance (multiq, ixgbe) David Miller
2009-05-01 16:48 ` Eric Dumazet
2009-05-01 17:22 ` David Miller
2009-05-01 10:20 ` Jesper Dangaard Brouer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=49FAB2D5.60508@cosmosbay.com \
--to=dada1@cosmosbay.com \
--cc=andrew@whydna.net \
--cc=davem@davemloft.net \
--cc=jelaas@gmail.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).