From: Changli Gao <xiaosuo@gmail.com>
To: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jamal Hadi Salim <hadi@cyberus.ca>,
"David S. Miller" <davem@davemloft.net>,
netdev@vger.kernel.org
Subject: Re: [PATCH 4/5] ifb: add multiqueue support
Date: Tue, 14 Dec 2010 07:42:37 +0800 [thread overview]
Message-ID: <AANLkTim=9JfQQ__Pii0c8XUvYFSLmj_gtK77twJ9iAbR@mail.gmail.com> (raw)
In-Reply-To: <1292257573.2759.41.camel@edumazet-laptop>
On Tue, Dec 14, 2010 at 12:26 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le lundi 13 décembre 2010 à 22:43 +0800, Changli Gao a écrit :
>> Each ifb NIC has nr_cpu_ids rx queues and nr_cpu_ids queues. Packets
>> transmitted to ifb are enqueued to the corresponding per cpu tx queues,
>> and processed in the corresponding per cpu tasklet latter.
>>
>> The stats are converted to the u64 ones.
>>
>> tq is a stack variable now. It makes ifb_q_private smaller and tx queue
>> locked only once in ri_tasklet.
>>
>
> Might be good to say the tx_queue_len is multiplied by number of online
> cpus ;)
Thanks for completion.
>
>> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
>> ---
>> drivers/net/ifb.c | 211 ++++++++++++++++++++++++++++++++++++------------------
>> 1 file changed, 141 insertions(+), 70 deletions(-)
>> diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
>> index 57c5cfb..16c767b 100644
>> --- a/drivers/net/ifb.c
>> +++ b/drivers/net/ifb.c
>> @@ -37,56 +37,63 @@
>> #include <net/net_namespace.h>
>>
>> #define TX_Q_LIMIT 32
>> +struct ifb_q_private {
>> + struct tasklet_struct ifb_tasklet;
>> + struct sk_buff_head rq;
>> + struct u64_stats_sync syncp;
>> + u64 rx_packets;
>> + u64 rx_bytes;
>> + u64 rx_dropped;
>> +};
>> +
>> struct ifb_private {
>> - struct tasklet_struct ifb_tasklet;
>> - int tasklet_pending;
>> - struct sk_buff_head rq;
>> - struct sk_buff_head tq;
>> + struct ifb_q_private __percpu *q;
>
> You probably could use dev->ml_priv (lstats/tstats/dstats)
> so that ifb_private just disapears (we save a full cache line)
Good idea. Thanks.
>
>> };
>>
>> static int numifbs = 2;
>>
>> -static void ri_tasklet(unsigned long dev)
>> +static void ri_tasklet(unsigned long _dev)
>> {
>> -
>> - struct net_device *_dev = (struct net_device *)dev;
>> - struct ifb_private *dp = netdev_priv(_dev);
>> - struct net_device_stats *stats = &_dev->stats;
>> + struct net_device *dev = (struct net_device *)_dev;
>> + struct ifb_private *p = netdev_priv(dev);
>> + struct ifb_q_private *qp;
>> struct netdev_queue *txq;
>> struct sk_buff *skb;
>> -
>> - txq = netdev_get_tx_queue(_dev, 0);
>> - skb = skb_peek(&dp->tq);
>> - if (skb == NULL) {
>> - if (__netif_tx_trylock(txq)) {
>> - skb_queue_splice_tail_init(&dp->rq, &dp->tq);
>> - __netif_tx_unlock(txq);
>> - } else {
>> - /* reschedule */
>> - goto resched;
>> - }
>> + struct sk_buff_head tq;
>> +
>> + __skb_queue_head_init(&tq);
>> + txq = netdev_get_tx_queue(dev, raw_smp_processor_id());
>> + qp = per_cpu_ptr(p->q, raw_smp_processor_id());
>
> qp = this_cpu_ptr(dev->ifb_qp); is faster
and less, thanks. :)
>
>> + if (!__netif_tx_trylock(txq)) {
>> + tasklet_schedule(&qp->ifb_tasklet);
>> + return;
>> }
>> + skb_queue_splice_tail_init(&qp->rq, &tq);
>> + if (netif_tx_queue_stopped(txq))
>> + netif_tx_wake_queue(txq);
>> + __netif_tx_unlock(txq);
>>
>> - while ((skb = skb_dequeue(&dp->tq)) != NULL) {
>> + while ((skb = __skb_dequeue(&tq)) != NULL) {
>> u32 from = G_TC_FROM(skb->tc_verd);
>>
>> skb->tc_verd = 0;
>> skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
>> - stats->tx_packets++;
>> - stats->tx_bytes += skb->len;
>> + u64_stats_update_begin(&qp->syncp);
>> + txq->tx_packets++;
>> + txq->tx_bytes += skb->len;
>>
>> rcu_read_lock();
>> skb->dev = dev_get_by_index_rcu(&init_net, skb->skb_iif);
>> if (!skb->dev) {
>> rcu_read_unlock();
>> + txq->tx_dropped++;
>> + u64_stats_update_end(&qp->syncp);
>> dev_kfree_skb(skb);
>> - stats->tx_dropped++;
>> - if (skb_queue_len(&dp->tq) != 0)
>> - goto resched;
>> - break;
>> + continue;
>> }
>> rcu_read_unlock();
>> - skb->skb_iif = _dev->ifindex;
>> + u64_stats_update_end(&qp->syncp);
>> + skb->skb_iif = dev->ifindex;
>
> Why is this necessary ? shouldnt skb->skb_iif already be dev->ifindex ?
No. In fact, act_mirred use skb_iff to save the original dev.
skb2->skb_iif = skb->dev->ifindex;
skb2->dev = dev;
>
>>
>> if (from & AT_EGRESS) {
>> dev_queue_xmit(skb);
>> @@ -96,48 +103,32 @@ static void ri_tasklet(unsigned long dev)
>> } else
>> BUG();
>> }
>> -
>> - if (__netif_tx_trylock(txq)) {
>> - skb = skb_peek(&dp->rq);
>> - if (skb == NULL) {
>> - dp->tasklet_pending = 0;
>> - if (netif_queue_stopped(_dev))
>> - netif_wake_queue(_dev);
>> - } else {
>> - __netif_tx_unlock(txq);
>> - goto resched;
>> - }
>> - __netif_tx_unlock(txq);
>> - } else {
>> -resched:
>> - dp->tasklet_pending = 1;
>> - tasklet_schedule(&dp->ifb_tasklet);
>> - }
>> -
>> }
>>
>> static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
>> {
>> - struct ifb_private *dp = netdev_priv(dev);
>> - struct net_device_stats *stats = &dev->stats;
>> + struct ifb_private *p = netdev_priv(dev);
>> + struct ifb_q_private *qp = per_cpu_ptr(p->q,
>> + skb_get_queue_mapping(skb));
>
> Would be good to add a
>
> WARN_ON(skb_get_queue_mapping(skb) != smp_processor_id());
I think it maybe what Jamal wants too. Thanks.
>
>
>> u32 from = G_TC_FROM(skb->tc_verd);
>>
>> - stats->rx_packets++;
>> - stats->rx_bytes += skb->len;
>> + u64_stats_update_begin(&qp->syncp);
>> + qp->rx_packets++;
>> + qp->rx_bytes += skb->len;
>>
>> if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) {
>> + qp->rx_dropped++;
>> + u64_stats_update_end(&qp->syncp);
>> dev_kfree_skb(skb);
>> - stats->rx_dropped++;
>> return NETDEV_TX_OK;
>> }
>> + u64_stats_update_end(&qp->syncp);
>>
>> - __skb_queue_tail(&dp->rq, skb);
>> - if (!dp->tasklet_pending) {
>> - dp->tasklet_pending = 1;
>> - tasklet_schedule(&dp->ifb_tasklet);
>> - }
>> + __skb_queue_tail(&qp->rq, skb);
>> + if (skb_queue_len(&qp->rq) == 1)
>> + tasklet_schedule(&qp->ifb_tasklet);
>>
>> - if (skb_queue_len(&dp->rq) >= dev->tx_queue_len)
>> + if (skb_queue_len(&qp->rq) >= dev->tx_queue_len)
>
> This seems wrong...
> You need to change to netif_tx_stop_queue(txq)
Thanks for catching this. I missed it.
>
>> netif_stop_queue(dev);
>>
>> return NETDEV_TX_OK;
>> @@ -145,33 +136,103 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
>>
>> static int ifb_close(struct net_device *dev)
>> {
>> - struct ifb_private *dp = netdev_priv(dev);
>> -
>> - tasklet_kill(&dp->ifb_tasklet);
>> - netif_stop_queue(dev);
>> - __skb_queue_purge(&dp->rq);
>> - __skb_queue_purge(&dp->tq);
>> + struct ifb_private *p = netdev_priv(dev);
>> + struct ifb_q_private *qp;
>> + int cpu;
>> +
>> + for_each_possible_cpu(cpu) {
>> + qp = per_cpu_ptr(p->q, cpu);
>> + tasklet_kill(&qp->ifb_tasklet);
>> + netif_tx_stop_queue(netdev_get_tx_queue(dev, cpu));
>> + __skb_queue_purge(&qp->rq);
>> + }
>>
>> return 0;
>> }
>>
>> static int ifb_open(struct net_device *dev)
>> {
>> - struct ifb_private *dp = netdev_priv(dev);
>> + int cpu;
>> +
>
>
>
>> + for_each_possible_cpu(cpu)
>> + netif_tx_start_queue(netdev_get_tx_queue(dev, cpu));
>> +
>> + return 0;
>> +}
>> +
>> +static int ifb_init(struct net_device *dev)
>> +{
>> + struct ifb_private *p = netdev_priv(dev);
>> + struct ifb_q_private *q;
>> + int cpu;
>>
>> - tasklet_init(&dp->ifb_tasklet, ri_tasklet, (unsigned long)dev);
>> - __skb_queue_head_init(&dp->rq);
>> - __skb_queue_head_init(&dp->tq);
>> - netif_start_queue(dev);
>> + p->q = alloc_percpu(struct ifb_q_private);
>> + if (!p->q)
>> + return -ENOMEM;
>
> Hmm, maybe use netdev_queue_numa_node_write() somewhere, so that
> qdisc_alloc() can use NUMA affinities.
I'll add it, thanks.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
next prev parent reply other threads:[~2010-12-13 23:42 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-12-13 14:43 [PATCH 1/5] ifb: remove function declarations Changli Gao
2010-12-13 14:43 ` [PATCH 2/5] ifb: code cleanup Changli Gao
2010-12-15 12:32 ` jamal
2010-12-13 14:43 ` [PATCH 3/5] ifb: fix tx_queue_len overlimit Changli Gao
2010-12-15 12:33 ` jamal
2010-12-15 12:36 ` Changli Gao
2010-12-15 12:42 ` jamal
2010-12-13 14:43 ` [PATCH 4/5] ifb: add multiqueue support Changli Gao
2010-12-13 16:26 ` Eric Dumazet
2010-12-13 23:42 ` Changli Gao [this message]
2010-12-13 17:05 ` Eric Dumazet
2010-12-13 23:46 ` Changli Gao
2010-12-15 12:34 ` jamal
2010-12-13 14:43 ` [PATCH 5/5] net: add skb.old_queue_mapping Changli Gao
2010-12-13 16:56 ` Eric Dumazet
2010-12-13 17:58 ` David Miller
2010-12-13 23:58 ` Changli Gao
2010-12-15 2:59 ` Changli Gao
2010-12-15 12:40 ` jamal
2010-12-15 12:52 ` Changli Gao
2010-12-15 12:31 ` [PATCH 1/5] ifb: remove function declarations jamal
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='AANLkTim=9JfQQ__Pii0c8XUvYFSLmj_gtK77twJ9iAbR@mail.gmail.com' \
--to=xiaosuo@gmail.com \
--cc=davem@davemloft.net \
--cc=eric.dumazet@gmail.com \
--cc=hadi@cyberus.ca \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).