From: Eric Dumazet <eric.dumazet@gmail.com>
To: xiaosuo@gmail.com
Cc: "David S. Miller" <davem@davemloft.net>,
netdev@vger.kernel.org, Tom Herbert <therbert@google.com>
Subject: Re: [PATCH] ifb: add multi-queue support
Date: Tue, 10 Nov 2009 10:07:57 +0100 [thread overview]
Message-ID: <4AF92D6D.8060300@gmail.com> (raw)
In-Reply-To: <4AF924A5.1050303@gmail.com>
Changli Gao a écrit :
> ifb: add multi-queue support
>
> Add multi-queue support, and one kernel thread is created for per queue.
> It can used to emulate multi-queue NIC in software, and distribute work
> among CPUs.
> gentux linux # modprobe ifb numtxqs=2
> gentux linux # ifconfig ifb0 up
> gentux linux # pgrep ifb0
> 18508
> 18509
> gentux linux # taskset -p 1 18508
> pid 18508's current affinity mask: 3
> pid 18508's new affinity mask: 1
> gentux linux # taskset -p 2 18509
> pid 18509's current affinity mask: 3
> pid 18509's new affinity mask: 2
> gentux linux # tc qdisc add dev br0 ingress
> gentux linux # tc filter add dev br0 parent ffff: protocol ip basic
> action mirred egress redirect dev ifb0
Seems pretty cool !
I have some comments
>
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ----
> drivers/net/ifb.c | 309
> ++++++++++++++++++++++++++++++++----------------------
> 1 file changed, 186 insertions(+), 123 deletions(-)
>
> diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
> index 030913f..6e04188 100644
> --- a/drivers/net/ifb.c
> +++ b/drivers/net/ifb.c
> @@ -33,139 +33,101 @@
> #include <linux/etherdevice.h>
> #include <linux/init.h>
> #include <linux/moduleparam.h>
> +#include <linux/wait.h>
> +#include <linux/sched.h>
> +#include <linux/kthread.h>
> +#include <linux/ip.h>
> +#include <linux/ipv6.h>
> +#include <net/ip.h>
> #include <net/pkt_sched.h>
> #include <net/net_namespace.h>
>
> -#define TX_TIMEOUT (2*HZ)
> -
> #define TX_Q_LIMIT 32
> +
> struct ifb_private {
> - struct tasklet_struct ifb_tasklet;
> - int tasklet_pending;
> - /* mostly debug stats leave in for now */
> - unsigned long st_task_enter; /* tasklet entered */
> - unsigned long st_txq_refl_try; /* transmit queue refill attempt */
> - unsigned long st_rxq_enter; /* receive queue entered */
> - unsigned long st_rx2tx_tran; /* receive to trasmit transfers */
> - unsigned long st_rxq_notenter; /*receiveQ not entered, resched */
> - unsigned long st_rx_frm_egr; /* received from egress path */
> - unsigned long st_rx_frm_ing; /* received from ingress path */
> - unsigned long st_rxq_check;
> - unsigned long st_rxq_rsch;
> - struct sk_buff_head rq;
> - struct sk_buff_head tq;
> + struct net_device *dev;
> + struct sk_buff_head rq;
> + struct sk_buff_head tq;
> + wait_queue_head_t wq;
> + struct task_struct *task;
> };
Maybe you should allocate true per_cpu structure, to avoid cache line sharing
and get appropriate NUMA properties.
At least use a __cacheline_aligned_in_smp ...
>
> +/* Number of ifb devices to be set up by this module. */
> static int numifbs = 2;
> +module_param(numifbs, int, 0444);
> +MODULE_PARM_DESC(numifbs, "Number of ifb devices");
>
> -static void ri_tasklet(unsigned long dev);
> -static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
> -static int ifb_open(struct net_device *dev);
> -static int ifb_close(struct net_device *dev);
> +/* Number of TX queues per ifb */
> +static int numtxqs = 1;
> +module_param(numtxqs, int, 0444);
> +MODULE_PARM_DESC(numtxqs, "Number of TX queues per ifb");
>
> -static void ri_tasklet(unsigned long dev)
> +static int ifb_thread(void *priv)
> {
> -
> - struct net_device *_dev = (struct net_device *)dev;
> - struct ifb_private *dp = netdev_priv(_dev);
> - struct net_device_stats *stats = &_dev->stats;
> - struct netdev_queue *txq;
> + struct ifb_private *dp = (struct ifb_private*)priv;
A space is required before * : (struct ifb_private *)priv;
(in many places in your patch)
> + struct net_device *dev = dp->dev;
> + struct net_device_stats *stats = &dev->stats;
Here you use a net_device_stats that is shared by all your queues,
your updates wont be protected and some will be lost.
You should use txq->tx_ counters.
> + unsigned int num = dp - (struct ifb_private*)netdev_priv(dev);
> + struct netdev_queue *txq = netdev_get_tx_queue(dev, num);
> struct sk_buff *skb;
> -
> - txq = netdev_get_tx_queue(_dev, 0);
> - dp->st_task_enter++;
> - if ((skb = skb_peek(&dp->tq)) == NULL) {
> - dp->st_txq_refl_try++;
> - if (__netif_tx_trylock(txq)) {
> - dp->st_rxq_enter++;
> - while ((skb = skb_dequeue(&dp->rq)) != NULL) {
> + DEFINE_WAIT(wait);
> +
> + while (1) {
> + /* move skb from rq to tq */
> + while (1) {
> + prepare_to_wait(&dp->wq, &wait, TASK_UNINTERRUPTIBLE);
> + while (!__netif_tx_trylock(txq))
> + yield();
> + while ((skb = skb_dequeue(&dp->rq)) != NULL)
> skb_queue_tail(&dp->tq, skb);
> - dp->st_rx2tx_tran++;
> - }
> + if (netif_queue_stopped(dev))
> + netif_wake_queue(dev);
> __netif_tx_unlock(txq);
> - } else {
> - /* reschedule */
> - dp->st_rxq_notenter++;
> - goto resched;
> + if (kthread_should_stop() || !skb_queue_empty(&dp->tq))
> + break;
> + schedule();
> }
> - }
> -
> - while ((skb = skb_dequeue(&dp->tq)) != NULL) {
> - u32 from = G_TC_FROM(skb->tc_verd);
> -
> - skb->tc_verd = 0;
> - skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
> - stats->tx_packets++;
> - stats->tx_bytes +=skb->len;
> -
> - skb->dev = dev_get_by_index(&init_net, skb->iif);
> - if (!skb->dev) {
> - dev_kfree_skb(skb);
> - stats->tx_dropped++;
> + finish_wait(&dp->wq, &wait);
> + if (kthread_should_stop())
> break;
> - }
> - dev_put(skb->dev);
> - skb->iif = _dev->ifindex;
> -
> - if (from & AT_EGRESS) {
> - dp->st_rx_frm_egr++;
> - dev_queue_xmit(skb);
> - } else if (from & AT_INGRESS) {
> - dp->st_rx_frm_ing++;
> - skb_pull(skb, skb->dev->hard_header_len);
> - netif_rx(skb);
> - } else
> - BUG();
> - }
>
> - if (__netif_tx_trylock(txq)) {
> - dp->st_rxq_check++;
> - if ((skb = skb_peek(&dp->rq)) == NULL) {
> - dp->tasklet_pending = 0;
> - if (netif_queue_stopped(_dev))
> - netif_wake_queue(_dev);
> - } else {
> - dp->st_rxq_rsch++;
> - __netif_tx_unlock(txq);
> - goto resched;
> + /* transfer packets */
> + while ((skb = skb_dequeue(&dp->tq)) != NULL) {
> + u32 from = G_TC_FROM(skb->tc_verd);
> +
> + skb->tc_verd = 0;
> + skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
> + stats->tx_packets++;
> + stats->tx_bytes +=skb->len;
Use : txq->tx_packets++
txq->tx_bytes += skb->len;
> +
> + skb->dev = dev_get_by_index(&init_net, skb->iif);
> + if (!skb->dev) {
> + dev_kfree_skb(skb);
> + stats->tx_dropped++;
txq->tx_dropped ?
> + break;
> + }
> + dev_put(skb->dev);
> + skb->iif = dev->ifindex;
> +
> + if (from & AT_EGRESS) {
> + dev_queue_xmit(skb);
> + } else if (from & AT_INGRESS) {
> + skb_pull(skb, skb->dev->hard_header_len);
> + netif_rx_ni(skb);
> + } else
> + BUG();
> }
> - __netif_tx_unlock(txq);
> - } else {
> -resched:
> - dp->tasklet_pending = 1;
> - tasklet_schedule(&dp->ifb_tasklet);
> }
>
> -}
> -
> -static const struct net_device_ops ifb_netdev_ops = {
> - .ndo_open = ifb_open,
> - .ndo_stop = ifb_close,
> - .ndo_start_xmit = ifb_xmit,
> - .ndo_validate_addr = eth_validate_addr,
> -};
> -
> -static void ifb_setup(struct net_device *dev)
> -{
> - /* Initialize the device structure. */
> - dev->destructor = free_netdev;
> - dev->netdev_ops = &ifb_netdev_ops;
> -
> - /* Fill in device structure with ethernet-generic values. */
> - ether_setup(dev);
> - dev->tx_queue_len = TX_Q_LIMIT;
> -
> - dev->flags |= IFF_NOARP;
> - dev->flags &= ~IFF_MULTICAST;
> - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
> - random_ether_addr(dev->dev_addr);
> + return 0;
> }
>
> static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
> {
> - struct ifb_private *dp = netdev_priv(dev);
> struct net_device_stats *stats = &dev->stats;
> u32 from = G_TC_FROM(skb->tc_verd);
> + int num = skb_get_queue_mapping(skb);
> + struct ifb_private *dp = ((struct ifb_private*)netdev_priv(dev)) + num;
>
> stats->rx_packets++;
> stats->rx_bytes+=skb->len;
Not sure how to solve this problem (several cpus can updates counter in //)
Thanks
next prev parent reply other threads:[~2009-11-10 9:08 UTC|newest]
Thread overview: 62+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-11-10 8:30 [PATCH] ifb: add multi-queue support Changli Gao
2009-11-10 9:07 ` Eric Dumazet [this message]
2009-11-10 9:43 ` Changli Gao
2009-11-10 10:57 ` Eric Dumazet
2009-11-10 11:14 ` Changli Gao
2009-11-10 11:41 ` Patrick McHardy
2009-11-10 12:14 ` Changli Gao
2009-11-10 12:19 ` Patrick McHardy
2009-11-10 12:37 ` Changli Gao
2009-11-10 12:45 ` Patrick McHardy
2009-11-10 13:06 ` Changli Gao
2009-11-10 13:34 ` Eric Dumazet
2009-11-10 13:49 ` Changli Gao
2009-11-10 16:45 ` Stephen Hemminger
2009-11-11 6:30 ` Changli Gao
2009-11-10 10:29 ` Patrick McHardy
2009-11-10 10:48 ` Changli Gao
2009-11-10 10:55 ` Eric Dumazet
-- strict thread matches above, loose matches on Subject: below --
2009-11-11 9:51 Changli Gao
2009-11-11 9:56 ` Changli Gao
2009-11-11 10:30 ` Eric Dumazet
2009-11-11 10:57 ` Changli Gao
2009-11-11 15:59 ` Patrick McHardy
2009-11-12 3:12 ` Changli Gao
2009-11-12 8:52 ` Jarek Poplawski
2009-11-12 9:32 ` Changli Gao
2009-11-12 15:10 ` Patrick McHardy
2009-11-13 1:28 ` Changli Gao
2009-11-12 9:44 ` Changli Gao
2009-11-12 9:48 ` Changli Gao
2009-11-12 15:11 ` Patrick McHardy
2009-11-13 1:32 ` Changli Gao
2009-11-13 7:18 ` Patrick McHardy
2009-11-12 12:48 ` Eric Dumazet
2009-11-13 1:26 ` Changli Gao
2009-11-13 5:56 ` Eric Dumazet
2009-11-13 6:16 ` Changli Gao
2009-11-13 7:45 ` Jarek Poplawski
2009-11-13 8:54 ` Changli Gao
2009-11-13 9:18 ` Jarek Poplawski
2009-11-13 9:38 ` Changli Gao
2009-11-13 9:57 ` Jarek Poplawski
2009-11-13 11:25 ` Changli Gao
2009-11-13 12:32 ` Jarek Poplawski
2009-11-13 13:10 ` Eric Dumazet
2009-11-13 16:15 ` Stephen Hemminger
2009-11-13 23:28 ` Changli Gao
2009-11-13 23:32 ` Stephen Hemminger
2009-11-13 23:42 ` Changli Gao
2009-11-14 12:53 ` Eric Dumazet
2009-11-14 13:30 ` Changli Gao
2009-11-13 13:55 ` Eric Dumazet
2009-11-13 4:37 ` Changli Gao
2009-11-16 16:39 ` Stephen Hemminger
2009-11-17 3:10 ` David Miller
2009-11-17 5:38 ` Changli Gao
2009-11-17 6:02 ` Stephen Hemminger
2009-11-13 4:42 Changli Gao
2009-11-13 4:46 ` Changli Gao
2009-11-16 7:31 Changli Gao
2009-11-16 8:19 ` Eric Dumazet
2009-11-16 8:43 ` Changli Gao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4AF92D6D.8060300@gmail.com \
--to=eric.dumazet@gmail.com \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=therbert@google.com \
--cc=xiaosuo@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.