From mboxrd@z Thu Jan 1 00:00:00 1970 From: Changli Gao Subject: Re: [PATCH 4/5] ifb: add multiqueue support Date: Tue, 14 Dec 2010 07:42:37 +0800 Message-ID: References: <1292251414-5154-1-git-send-email-xiaosuo@gmail.com> <1292251414-5154-4-git-send-email-xiaosuo@gmail.com> <1292257573.2759.41.camel@edumazet-laptop> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: Jamal Hadi Salim , "David S. Miller" , netdev@vger.kernel.org To: Eric Dumazet Return-path: Received: from mail-fx0-f43.google.com ([209.85.161.43]:59023 "EHLO mail-fx0-f43.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755602Ab0LMXm7 convert rfc822-to-8bit (ORCPT ); Mon, 13 Dec 2010 18:42:59 -0500 Received: by fxm18 with SMTP id 18so53632fxm.2 for ; Mon, 13 Dec 2010 15:42:57 -0800 (PST) In-Reply-To: <1292257573.2759.41.camel@edumazet-laptop> Sender: netdev-owner@vger.kernel.org List-ID: On Tue, Dec 14, 2010 at 12:26 AM, Eric Dumazet = wrote: > Le lundi 13 d=E9cembre 2010 =E0 22:43 +0800, Changli Gao a =E9crit : >> Each ifb NIC has nr_cpu_ids rx queues and nr_cpu_ids queues. Packets >> transmitted to ifb are enqueued to the corresponding per cpu tx queu= es, >> and processed in the corresponding per cpu tasklet latter. >> >> The stats are converted to the u64 ones. >> >> tq is a stack variable now. It makes ifb_q_private smaller and tx qu= eue >> locked only once in ri_tasklet. >> > > Might be good to say the tx_queue_len is multiplied by number of onli= ne > cpus ;) Thanks for completion. > >> Signed-off-by: Changli Gao >> --- >> =A0drivers/net/ifb.c | =A0211 ++++++++++++++++++++++++++++++++++++--= ---------------- >> =A01 file changed, 141 insertions(+), 70 deletions(-) >> diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c >> index 57c5cfb..16c767b 100644 >> --- a/drivers/net/ifb.c >> +++ b/drivers/net/ifb.c >> @@ -37,56 +37,63 @@ >> =A0#include >> >> =A0#define TX_Q_LIMIT =A0 =A032 >> +struct ifb_q_private { >> + =A0 =A0 struct tasklet_struct =A0 ifb_tasklet; >> + =A0 =A0 struct sk_buff_head =A0 =A0 rq; >> + =A0 =A0 struct u64_stats_sync =A0 syncp; >> + =A0 =A0 u64 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 rx_packets; >> + =A0 =A0 u64 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 rx_bytes; >> + =A0 =A0 u64 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 rx_dropped; >> +}; >> + >> =A0struct ifb_private { >> - =A0 =A0 struct tasklet_struct =A0 ifb_tasklet; >> - =A0 =A0 int =A0 =A0 tasklet_pending; >> - =A0 =A0 struct sk_buff_head =A0 =A0 rq; >> - =A0 =A0 struct sk_buff_head =A0 =A0 tq; >> + =A0 =A0 struct ifb_q_private __percpu =A0 *q; > > You probably could use dev->ml_priv (lstats/tstats/dstats) > so that ifb_private just disapears (we save a full cache line) Good idea. Thanks. > >> =A0}; >> >> =A0static int numifbs =3D 2; >> >> -static void ri_tasklet(unsigned long dev) >> +static void ri_tasklet(unsigned long _dev) >> =A0{ >> - >> - =A0 =A0 struct net_device *_dev =3D (struct net_device *)dev; >> - =A0 =A0 struct ifb_private *dp =3D netdev_priv(_dev); >> - =A0 =A0 struct net_device_stats *stats =3D &_dev->stats; >> + =A0 =A0 struct net_device *dev =3D (struct net_device *)_dev; >> + =A0 =A0 struct ifb_private *p =3D netdev_priv(dev); >> + =A0 =A0 struct ifb_q_private *qp; >> =A0 =A0 =A0 struct netdev_queue *txq; >> =A0 =A0 =A0 struct sk_buff *skb; >> - >> - =A0 =A0 txq =3D netdev_get_tx_queue(_dev, 0); >> - =A0 =A0 skb =3D skb_peek(&dp->tq); >> - =A0 =A0 if (skb =3D=3D NULL) { >> - =A0 =A0 =A0 =A0 =A0 =A0 if (__netif_tx_trylock(txq)) { >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 skb_queue_splice_tail_init= (&dp->rq, &dp->tq); >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 __netif_tx_unlock(txq); >> - =A0 =A0 =A0 =A0 =A0 =A0 } else { >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* reschedule */ >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto resched; >> - =A0 =A0 =A0 =A0 =A0 =A0 } >> + =A0 =A0 struct sk_buff_head tq; >> + >> + =A0 =A0 __skb_queue_head_init(&tq); >> + =A0 =A0 txq =3D netdev_get_tx_queue(dev, raw_smp_processor_id()); >> + =A0 =A0 qp =3D per_cpu_ptr(p->q, raw_smp_processor_id()); > > =A0 =A0 =A0 =A0qp =3D this_cpu_ptr(dev->ifb_qp); is faster and less, thanks. :) > >> + =A0 =A0 if (!__netif_tx_trylock(txq)) { >> + =A0 =A0 =A0 =A0 =A0 =A0 tasklet_schedule(&qp->ifb_tasklet); >> + =A0 =A0 =A0 =A0 =A0 =A0 return; >> =A0 =A0 =A0 } >> + =A0 =A0 skb_queue_splice_tail_init(&qp->rq, &tq); >> + =A0 =A0 if (netif_tx_queue_stopped(txq)) >> + =A0 =A0 =A0 =A0 =A0 =A0 netif_tx_wake_queue(txq); >> + =A0 =A0 __netif_tx_unlock(txq); >> >> - =A0 =A0 while ((skb =3D skb_dequeue(&dp->tq)) !=3D NULL) { >> + =A0 =A0 while ((skb =3D __skb_dequeue(&tq)) !=3D NULL) { >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 u32 from =3D G_TC_FROM(skb->tc_verd); >> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 skb->tc_verd =3D 0; >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 skb->tc_verd =3D SET_TC_NCLS(skb->tc_ver= d); >> - =A0 =A0 =A0 =A0 =A0 =A0 stats->tx_packets++; >> - =A0 =A0 =A0 =A0 =A0 =A0 stats->tx_bytes +=3D skb->len; >> + =A0 =A0 =A0 =A0 =A0 =A0 u64_stats_update_begin(&qp->syncp); >> + =A0 =A0 =A0 =A0 =A0 =A0 txq->tx_packets++; >> + =A0 =A0 =A0 =A0 =A0 =A0 txq->tx_bytes +=3D skb->len; >> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 rcu_read_lock(); >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 skb->dev =3D dev_get_by_index_rcu(&init_= net, skb->skb_iif); >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (!skb->dev) { >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 rcu_read_unlock(); >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 txq->tx_dropped++; >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 u64_stats_update_end(&qp->= syncp); >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dev_kfree_skb(skb); >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 stats->tx_dropped++; >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (skb_queue_len(&dp->tq)= !=3D 0) >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto resch= ed; >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 break; >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 continue; >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 } >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 rcu_read_unlock(); >> - =A0 =A0 =A0 =A0 =A0 =A0 skb->skb_iif =3D _dev->ifindex; >> + =A0 =A0 =A0 =A0 =A0 =A0 u64_stats_update_end(&qp->syncp); >> + =A0 =A0 =A0 =A0 =A0 =A0 skb->skb_iif =3D dev->ifindex; > > Why is this necessary ? shouldnt skb->skb_iif already be dev->ifindex= ? No. In fact, act_mirred use skb_iff to save the original dev. skb2->skb_iif =3D skb->dev->ifindex; skb2->dev =3D dev; > >> >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (from & AT_EGRESS) { >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dev_queue_xmit(skb); >> @@ -96,48 +103,32 @@ static void ri_tasklet(unsigned long dev) >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 } else >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 BUG(); >> =A0 =A0 =A0 } >> - >> - =A0 =A0 if (__netif_tx_trylock(txq)) { >> - =A0 =A0 =A0 =A0 =A0 =A0 skb =3D skb_peek(&dp->rq); >> - =A0 =A0 =A0 =A0 =A0 =A0 if (skb =3D=3D NULL) { >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dp->tasklet_pending =3D 0; >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (netif_queue_stopped(_d= ev)) >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 netif_wake= _queue(_dev); >> - =A0 =A0 =A0 =A0 =A0 =A0 } else { >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 __netif_tx_unlock(txq); >> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto resched; >> - =A0 =A0 =A0 =A0 =A0 =A0 } >> - =A0 =A0 =A0 =A0 =A0 =A0 __netif_tx_unlock(txq); >> - =A0 =A0 } else { >> -resched: >> - =A0 =A0 =A0 =A0 =A0 =A0 dp->tasklet_pending =3D 1; >> - =A0 =A0 =A0 =A0 =A0 =A0 tasklet_schedule(&dp->ifb_tasklet); >> - =A0 =A0 } >> - >> =A0} >> >> =A0static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_devic= e *dev) >> =A0{ >> - =A0 =A0 struct ifb_private *dp =3D netdev_priv(dev); >> - =A0 =A0 struct net_device_stats *stats =3D &dev->stats; >> + =A0 =A0 struct ifb_private *p =3D netdev_priv(dev); >> + =A0 =A0 struct ifb_q_private *qp =3D per_cpu_ptr(p->q, >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0= =A0 =A0 =A0 =A0 =A0skb_get_queue_mapping(skb)); > > Would be good to add a > > WARN_ON(skb_get_queue_mapping(skb) !=3D smp_processor_id()); I think it maybe what Jamal wants too. Thanks. > > >> =A0 =A0 =A0 u32 from =3D G_TC_FROM(skb->tc_verd); >> >> - =A0 =A0 stats->rx_packets++; >> - =A0 =A0 stats->rx_bytes +=3D skb->len; >> + =A0 =A0 u64_stats_update_begin(&qp->syncp); >> + =A0 =A0 qp->rx_packets++; >> + =A0 =A0 qp->rx_bytes +=3D skb->len; >> >> =A0 =A0 =A0 if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) { >> + =A0 =A0 =A0 =A0 =A0 =A0 qp->rx_dropped++; >> + =A0 =A0 =A0 =A0 =A0 =A0 u64_stats_update_end(&qp->syncp); >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 dev_kfree_skb(skb); >> - =A0 =A0 =A0 =A0 =A0 =A0 stats->rx_dropped++; >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 return NETDEV_TX_OK; >> =A0 =A0 =A0 } >> + =A0 =A0 u64_stats_update_end(&qp->syncp); >> >> - =A0 =A0 __skb_queue_tail(&dp->rq, skb); >> - =A0 =A0 if (!dp->tasklet_pending) { >> - =A0 =A0 =A0 =A0 =A0 =A0 dp->tasklet_pending =3D 1; >> - =A0 =A0 =A0 =A0 =A0 =A0 tasklet_schedule(&dp->ifb_tasklet); >> - =A0 =A0 } >> + =A0 =A0 __skb_queue_tail(&qp->rq, skb); >> + =A0 =A0 if (skb_queue_len(&qp->rq) =3D=3D 1) >> + =A0 =A0 =A0 =A0 =A0 =A0 tasklet_schedule(&qp->ifb_tasklet); >> >> - =A0 =A0 if (skb_queue_len(&dp->rq) >=3D dev->tx_queue_len) >> + =A0 =A0 if (skb_queue_len(&qp->rq) >=3D dev->tx_queue_len) > > This seems wrong... > You need to change to netif_tx_stop_queue(txq) Thanks for catching this. I missed it. > >> =A0 =A0 =A0 =A0 =A0 =A0 =A0 netif_stop_queue(dev); >> >> =A0 =A0 =A0 return NETDEV_TX_OK; >> @@ -145,33 +136,103 @@ static netdev_tx_t ifb_xmit(struct sk_buff *s= kb, struct net_device *dev) >> >> =A0static int ifb_close(struct net_device *dev) >> =A0{ >> - =A0 =A0 struct ifb_private *dp =3D netdev_priv(dev); >> - >> - =A0 =A0 tasklet_kill(&dp->ifb_tasklet); >> - =A0 =A0 netif_stop_queue(dev); >> - =A0 =A0 __skb_queue_purge(&dp->rq); >> - =A0 =A0 __skb_queue_purge(&dp->tq); >> + =A0 =A0 struct ifb_private *p =3D netdev_priv(dev); >> + =A0 =A0 struct ifb_q_private *qp; >> + =A0 =A0 int cpu; >> + >> + =A0 =A0 for_each_possible_cpu(cpu) { >> + =A0 =A0 =A0 =A0 =A0 =A0 qp =3D per_cpu_ptr(p->q, cpu); >> + =A0 =A0 =A0 =A0 =A0 =A0 tasklet_kill(&qp->ifb_tasklet); >> + =A0 =A0 =A0 =A0 =A0 =A0 netif_tx_stop_queue(netdev_get_tx_queue(de= v, cpu)); >> + =A0 =A0 =A0 =A0 =A0 =A0 __skb_queue_purge(&qp->rq); >> + =A0 =A0 } >> >> =A0 =A0 =A0 return 0; >> =A0} >> >> =A0static int ifb_open(struct net_device *dev) >> =A0{ >> - =A0 =A0 struct ifb_private *dp =3D netdev_priv(dev); >> + =A0 =A0 int cpu; >> + > > > >> + =A0 =A0 for_each_possible_cpu(cpu) >> + =A0 =A0 =A0 =A0 =A0 =A0 netif_tx_start_queue(netdev_get_tx_queue(d= ev, cpu)); >> + >> + =A0 =A0 return 0; >> +} >> + >> +static int ifb_init(struct net_device *dev) >> +{ >> + =A0 =A0 struct ifb_private *p =3D netdev_priv(dev); >> + =A0 =A0 struct ifb_q_private *q; >> + =A0 =A0 int cpu; >> >> - =A0 =A0 tasklet_init(&dp->ifb_tasklet, ri_tasklet, (unsigned long)= dev); >> - =A0 =A0 __skb_queue_head_init(&dp->rq); >> - =A0 =A0 __skb_queue_head_init(&dp->tq); >> - =A0 =A0 netif_start_queue(dev); >> + =A0 =A0 p->q =3D alloc_percpu(struct ifb_q_private); >> + =A0 =A0 if (!p->q) >> + =A0 =A0 =A0 =A0 =A0 =A0 return -ENOMEM; > > Hmm, maybe =A0use netdev_queue_numa_node_write() somewhere, so that > qdisc_alloc() can use NUMA affinities. I'll add it, thanks. --=20 Regards, Changli Gao(xiaosuo@gmail.com)