Re: [PATCH 4/5] ifb: add multiqueue support

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Changli Gao <xiaosuo@gmail.com>
To: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jamal Hadi Salim <hadi@cyberus.ca>,
	"David S. Miller" <davem@davemloft.net>,
	netdev@vger.kernel.org
Subject: Re: [PATCH 4/5] ifb: add multiqueue support
Date: Tue, 14 Dec 2010 07:42:37 +0800	[thread overview]
Message-ID: <AANLkTim=9JfQQ__Pii0c8XUvYFSLmj_gtK77twJ9iAbR@mail.gmail.com> (raw)
In-Reply-To: <1292257573.2759.41.camel@edumazet-laptop>

On Tue, Dec 14, 2010 at 12:26 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le lundi 13 décembre 2010 à 22:43 +0800, Changli Gao a écrit :
>> Each ifb NIC has nr_cpu_ids rx queues and nr_cpu_ids queues. Packets
>> transmitted to ifb are enqueued to the corresponding per cpu tx queues,
>> and processed in the corresponding per cpu tasklet latter.
>>
>> The stats are converted to the u64 ones.
>>
>> tq is a stack variable now. It makes ifb_q_private smaller and tx queue
>> locked only once in ri_tasklet.
>>
>
> Might be good to say the tx_queue_len is multiplied by number of online
> cpus ;)

Thanks for completion.

>
>> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
>> ---
>>  drivers/net/ifb.c |  211 ++++++++++++++++++++++++++++++++++++------------------
>>  1 file changed, 141 insertions(+), 70 deletions(-)
>> diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
>> index 57c5cfb..16c767b 100644
>> --- a/drivers/net/ifb.c
>> +++ b/drivers/net/ifb.c
>> @@ -37,56 +37,63 @@
>>  #include <net/net_namespace.h>
>>
>>  #define TX_Q_LIMIT    32
>> +struct ifb_q_private {
>> +     struct tasklet_struct   ifb_tasklet;
>> +     struct sk_buff_head     rq;
>> +     struct u64_stats_sync   syncp;
>> +     u64                     rx_packets;
>> +     u64                     rx_bytes;
>> +     u64                     rx_dropped;
>> +};
>> +
>>  struct ifb_private {
>> -     struct tasklet_struct   ifb_tasklet;
>> -     int     tasklet_pending;
>> -     struct sk_buff_head     rq;
>> -     struct sk_buff_head     tq;
>> +     struct ifb_q_private __percpu   *q;
>
> You probably could use dev->ml_priv (lstats/tstats/dstats)
> so that ifb_private just disapears (we save a full cache line)

Good idea. Thanks.

>
>>  };
>>
>>  static int numifbs = 2;
>>
>> -static void ri_tasklet(unsigned long dev)
>> +static void ri_tasklet(unsigned long _dev)
>>  {
>> -
>> -     struct net_device *_dev = (struct net_device *)dev;
>> -     struct ifb_private *dp = netdev_priv(_dev);
>> -     struct net_device_stats *stats = &_dev->stats;
>> +     struct net_device *dev = (struct net_device *)_dev;
>> +     struct ifb_private *p = netdev_priv(dev);
>> +     struct ifb_q_private *qp;
>>       struct netdev_queue *txq;
>>       struct sk_buff *skb;
>> -
>> -     txq = netdev_get_tx_queue(_dev, 0);
>> -     skb = skb_peek(&dp->tq);
>> -     if (skb == NULL) {
>> -             if (__netif_tx_trylock(txq)) {
>> -                     skb_queue_splice_tail_init(&dp->rq, &dp->tq);
>> -                     __netif_tx_unlock(txq);
>> -             } else {
>> -                     /* reschedule */
>> -                     goto resched;
>> -             }
>> +     struct sk_buff_head tq;
>> +
>> +     __skb_queue_head_init(&tq);
>> +     txq = netdev_get_tx_queue(dev, raw_smp_processor_id());
>> +     qp = per_cpu_ptr(p->q, raw_smp_processor_id());
>
>        qp = this_cpu_ptr(dev->ifb_qp); is faster

and less, thanks. :)

>
>> +     if (!__netif_tx_trylock(txq)) {
>> +             tasklet_schedule(&qp->ifb_tasklet);
>> +             return;
>>       }
>> +     skb_queue_splice_tail_init(&qp->rq, &tq);
>> +     if (netif_tx_queue_stopped(txq))
>> +             netif_tx_wake_queue(txq);
>> +     __netif_tx_unlock(txq);
>>
>> -     while ((skb = skb_dequeue(&dp->tq)) != NULL) {
>> +     while ((skb = __skb_dequeue(&tq)) != NULL) {
>>               u32 from = G_TC_FROM(skb->tc_verd);
>>
>>               skb->tc_verd = 0;
>>               skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
>> -             stats->tx_packets++;
>> -             stats->tx_bytes += skb->len;
>> +             u64_stats_update_begin(&qp->syncp);
>> +             txq->tx_packets++;
>> +             txq->tx_bytes += skb->len;
>>
>>               rcu_read_lock();
>>               skb->dev = dev_get_by_index_rcu(&init_net, skb->skb_iif);
>>               if (!skb->dev) {
>>                       rcu_read_unlock();
>> +                     txq->tx_dropped++;
>> +                     u64_stats_update_end(&qp->syncp);
>>                       dev_kfree_skb(skb);
>> -                     stats->tx_dropped++;
>> -                     if (skb_queue_len(&dp->tq) != 0)
>> -                             goto resched;
>> -                     break;
>> +                     continue;
>>               }
>>               rcu_read_unlock();
>> -             skb->skb_iif = _dev->ifindex;
>> +             u64_stats_update_end(&qp->syncp);
>> +             skb->skb_iif = dev->ifindex;
>
> Why is this necessary ? shouldnt skb->skb_iif already be dev->ifindex ?

No. In fact, act_mirred use skb_iff to save the original dev.

        skb2->skb_iif = skb->dev->ifindex;
        skb2->dev = dev;

>
>>
>>               if (from & AT_EGRESS) {
>>                       dev_queue_xmit(skb);
>> @@ -96,48 +103,32 @@ static void ri_tasklet(unsigned long dev)
>>               } else
>>                       BUG();
>>       }
>> -
>> -     if (__netif_tx_trylock(txq)) {
>> -             skb = skb_peek(&dp->rq);
>> -             if (skb == NULL) {
>> -                     dp->tasklet_pending = 0;
>> -                     if (netif_queue_stopped(_dev))
>> -                             netif_wake_queue(_dev);
>> -             } else {
>> -                     __netif_tx_unlock(txq);
>> -                     goto resched;
>> -             }
>> -             __netif_tx_unlock(txq);
>> -     } else {
>> -resched:
>> -             dp->tasklet_pending = 1;
>> -             tasklet_schedule(&dp->ifb_tasklet);
>> -     }
>> -
>>  }
>>
>>  static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
>>  {
>> -     struct ifb_private *dp = netdev_priv(dev);
>> -     struct net_device_stats *stats = &dev->stats;
>> +     struct ifb_private *p = netdev_priv(dev);
>> +     struct ifb_q_private *qp = per_cpu_ptr(p->q,
>> +                                            skb_get_queue_mapping(skb));
>
> Would be good to add a
>
> WARN_ON(skb_get_queue_mapping(skb) != smp_processor_id());

I think it maybe what Jamal wants too. Thanks.

>
>
>>       u32 from = G_TC_FROM(skb->tc_verd);
>>
>> -     stats->rx_packets++;
>> -     stats->rx_bytes += skb->len;
>> +     u64_stats_update_begin(&qp->syncp);
>> +     qp->rx_packets++;
>> +     qp->rx_bytes += skb->len;
>>
>>       if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) {
>> +             qp->rx_dropped++;
>> +             u64_stats_update_end(&qp->syncp);
>>               dev_kfree_skb(skb);
>> -             stats->rx_dropped++;
>>               return NETDEV_TX_OK;
>>       }
>> +     u64_stats_update_end(&qp->syncp);
>>
>> -     __skb_queue_tail(&dp->rq, skb);
>> -     if (!dp->tasklet_pending) {
>> -             dp->tasklet_pending = 1;
>> -             tasklet_schedule(&dp->ifb_tasklet);
>> -     }
>> +     __skb_queue_tail(&qp->rq, skb);
>> +     if (skb_queue_len(&qp->rq) == 1)
>> +             tasklet_schedule(&qp->ifb_tasklet);
>>
>> -     if (skb_queue_len(&dp->rq) >= dev->tx_queue_len)
>> +     if (skb_queue_len(&qp->rq) >= dev->tx_queue_len)
>
> This seems wrong...
> You need to change to netif_tx_stop_queue(txq)

Thanks for catching this. I missed it.

>
>>               netif_stop_queue(dev);
>>
>>       return NETDEV_TX_OK;
>> @@ -145,33 +136,103 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
>>
>>  static int ifb_close(struct net_device *dev)
>>  {
>> -     struct ifb_private *dp = netdev_priv(dev);
>> -
>> -     tasklet_kill(&dp->ifb_tasklet);
>> -     netif_stop_queue(dev);
>> -     __skb_queue_purge(&dp->rq);
>> -     __skb_queue_purge(&dp->tq);
>> +     struct ifb_private *p = netdev_priv(dev);
>> +     struct ifb_q_private *qp;
>> +     int cpu;
>> +
>> +     for_each_possible_cpu(cpu) {
>> +             qp = per_cpu_ptr(p->q, cpu);
>> +             tasklet_kill(&qp->ifb_tasklet);
>> +             netif_tx_stop_queue(netdev_get_tx_queue(dev, cpu));
>> +             __skb_queue_purge(&qp->rq);
>> +     }
>>
>>       return 0;
>>  }
>>
>>  static int ifb_open(struct net_device *dev)
>>  {
>> -     struct ifb_private *dp = netdev_priv(dev);
>> +     int cpu;
>> +
>
>
>
>> +     for_each_possible_cpu(cpu)
>> +             netif_tx_start_queue(netdev_get_tx_queue(dev, cpu));
>> +
>> +     return 0;
>> +}
>> +
>> +static int ifb_init(struct net_device *dev)
>> +{
>> +     struct ifb_private *p = netdev_priv(dev);
>> +     struct ifb_q_private *q;
>> +     int cpu;
>>
>> -     tasklet_init(&dp->ifb_tasklet, ri_tasklet, (unsigned long)dev);
>> -     __skb_queue_head_init(&dp->rq);
>> -     __skb_queue_head_init(&dp->tq);
>> -     netif_start_queue(dev);
>> +     p->q = alloc_percpu(struct ifb_q_private);
>> +     if (!p->q)
>> +             return -ENOMEM;
>
> Hmm, maybe  use netdev_queue_numa_node_write() somewhere, so that
> qdisc_alloc() can use NUMA affinities.

I'll add it, thanks.

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

next prev parent reply	other threads:[~2010-12-13 23:42 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-13 14:43 [PATCH 1/5] ifb: remove function declarations Changli Gao
2010-12-13 14:43 ` [PATCH 2/5] ifb: code cleanup Changli Gao
2010-12-15 12:32   ` jamal
2010-12-13 14:43 ` [PATCH 3/5] ifb: fix tx_queue_len overlimit Changli Gao
2010-12-15 12:33   ` jamal
2010-12-15 12:36     ` Changli Gao
2010-12-15 12:42       ` jamal
2010-12-13 14:43 ` [PATCH 4/5] ifb: add multiqueue support Changli Gao
2010-12-13 16:26   ` Eric Dumazet
2010-12-13 23:42     ` Changli Gao [this message]
2010-12-13 17:05   ` Eric Dumazet
2010-12-13 23:46     ` Changli Gao
2010-12-15 12:34   ` jamal
2010-12-13 14:43 ` [PATCH 5/5] net: add skb.old_queue_mapping Changli Gao
2010-12-13 16:56   ` Eric Dumazet
2010-12-13 17:58     ` David Miller
2010-12-13 23:58     ` Changli Gao
2010-12-15  2:59       ` Changli Gao
2010-12-15 12:40   ` jamal
2010-12-15 12:52     ` Changli Gao
2010-12-15 12:31 ` [PATCH 1/5] ifb: remove function declarations jamal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='AANLkTim=9JfQQ__Pii0c8XUvYFSLmj_gtK77twJ9iAbR@mail.gmail.com' \
    --to=xiaosuo@gmail.com \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=hadi@cyberus.ca \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).