From: Krishna Kumar <krkumar2@in.ibm.com>
To: davem@davemloft.net, rdreier@cisco.com
Cc: johnpol@2ka.mipt.ru, Robert.Olsson@data.slu.se,
rick.jones2@hp.com, herbert@gondor.apana.org.au,
gaagaan@gmail.com, kumarkr@linux.ibm.com,
peter.p.waskiewicz.jr@intel.com, mcarlson@broadcom.com,
kaber@trash.net, jagana@us.ibm.com,
general@lists.openfabrics.org, mchan@broadcom.com, tgraf@suug.ch,
jeff@garzik.org, sri@us.ibm.com, hadi@cyberus.ca,
netdev@vger.kernel.org, Krishna Kumar <krkumar2@in.ibm.com>,
xma@us.ibm.com
Subject: [PATCH 09/10] IPoIB batching xmit handler support.
Date: Fri, 20 Jul 2007 12:03:36 +0530 [thread overview]
Message-ID: <20070720063336.26341.2955.sendpatchset@localhost.localdomain> (raw)
In-Reply-To: <20070720063149.26341.84076.sendpatchset@localhost.localdomain>
Add a IPoIB batching xmit handler.
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
ipoib_main.c | 215 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 210 insertions(+), 5 deletions(-)
diff -ruNp org/drivers/infiniband/ulp/ipoib/ipoib_main.c new/drivers/infiniband/ulp/ipoib/ipoib_main.c
--- org/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-07-20 07:49:28.000000000 +0530
+++ new/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-07-20 08:30:22.000000000 +0530
@@ -558,7 +558,8 @@ static void neigh_add_path(struct sk_buf
goto err_drop;
}
} else
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+ ipoib_send(dev, skb, path->ah,
+ IPOIB_QPN(skb->dst->neighbour->ha), 1);
} else {
neigh->ah = NULL;
@@ -638,7 +639,7 @@ static void unicast_arp_send(struct sk_b
ipoib_dbg(priv, "Send unicast ARP to %04x\n",
be16_to_cpu(path->pathrec.dlid));
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+ ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr), 1);
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
/* put pseudoheader back on for next time */
@@ -704,7 +705,8 @@ static int ipoib_start_xmit(struct sk_bu
goto out;
}
- ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+ ipoib_send(dev, skb, neigh->ah,
+ IPOIB_QPN(skb->dst->neighbour->ha), 1);
goto out;
}
@@ -753,6 +755,177 @@ out:
return NETDEV_TX_OK;
}
+#define XMIT_QUEUED_SKBS() \
+ do { \
+ if (num_skbs) { \
+ ipoib_send(dev, NULL, old_neigh->ah, old_qpn, \
+ num_skbs); \
+ num_skbs = 0; \
+ } \
+ } while (0)
+
+/*
+ * TODO: Merge with ipoib_start_xmit to use the same code and have a
+ * transparent wrapper caller to xmit's, etc.
+ */
+static int ipoib_start_xmit_frames(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct sk_buff *skb;
+ struct sk_buff_head *blist;
+ int max_skbs, num_skbs = 0, tx_ring_index = -1;
+ u32 qpn, old_qpn = 0;
+ struct ipoib_neigh *neigh, *old_neigh = NULL;
+ unsigned long flags;
+
+ if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
+ return NETDEV_TX_LOCKED;
+
+ blist = dev->skb_blist;
+
+ /*
+ * Send atmost xmit_slots skbs. This also prevents the device getting
+ * full as ipoib_send modifies the xmit_slots and we use the same
+ * value to figure how many skbs to send.
+ */
+ max_skbs = dev->xmit_slots;
+
+ while (max_skbs-- > 0 && (skb = __skb_dequeue(blist)) != NULL) {
+ /*
+ * From here on, ipoib_send() cannot stop the queue as it
+ * uses the same initialization as 'max_skbs'. So we can
+ * optimize to not check for queue stopped for every skb.
+ */
+ if (likely(skb->dst && skb->dst->neighbour)) {
+ if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
+ XMIT_QUEUED_SKBS();
+ ipoib_path_lookup(skb, dev);
+ continue;
+ }
+
+ neigh = *to_ipoib_neigh(skb->dst->neighbour);
+
+ if (ipoib_cm_get(neigh)) {
+ if (ipoib_cm_up(neigh)) {
+ XMIT_QUEUED_SKBS();
+ ipoib_cm_send(dev, skb,
+ ipoib_cm_get(neigh));
+ continue;
+ }
+ } else if (neigh->ah) {
+ if (unlikely(memcmp(&neigh->dgid.raw,
+ skb->dst->neighbour->ha + 4,
+ sizeof(union ib_gid)))) {
+ spin_lock(&priv->lock);
+ /*
+ * It's safe to call ipoib_put_ah()
+ * inside priv->lock here, because we
+ * know that path->ah will always hold
+ * one more reference, so ipoib_put_ah()
+ * will never do more than decrement
+ * the ref count.
+ */
+ ipoib_put_ah(neigh->ah);
+ list_del(&neigh->list);
+ ipoib_neigh_free(dev, neigh);
+ spin_unlock(&priv->lock);
+ XMIT_QUEUED_SKBS();
+ ipoib_path_lookup(skb, dev);
+ continue;
+ }
+
+ qpn = IPOIB_QPN(skb->dst->neighbour->ha);
+ if (neigh != old_neigh || qpn != old_qpn) {
+ /*
+ * Sending to a different destination
+ * from earlier skb's - send all
+ * existing skbs (if any).
+ */
+ if (tx_ring_index == -1) {
+ /*
+ * First time, find where to
+ * store skb.
+ */
+ tx_ring_index = priv->tx_head &
+ (ipoib_sendq_size - 1);
+ } else {
+ /* Some skbs to send */
+ XMIT_QUEUED_SKBS();
+ }
+ old_neigh = neigh;
+ old_qpn = IPOIB_QPN(skb->dst->neighbour->ha);
+ }
+
+ if (ipoib_process_skb(dev, skb, priv, num_skbs,
+ tx_ring_index, neigh->ah,
+ qpn))
+ continue;
+
+ num_skbs++;
+
+ /* Queue'd one skb, get index for next skb */
+ if (max_skbs)
+ tx_ring_index = (tx_ring_index + 1) &
+ (ipoib_sendq_size - 1);
+ continue;
+ }
+
+ if (skb_queue_len(&neigh->queue) <
+ IPOIB_MAX_PATH_REC_QUEUE) {
+ spin_lock(&priv->lock);
+ __skb_queue_tail(&neigh->queue, skb);
+ spin_unlock(&priv->lock);
+ } else {
+ dev_kfree_skb_any(skb);
+ ++priv->stats.tx_dropped;
+ ++max_skbs;
+ }
+ } else {
+ struct ipoib_pseudoheader *phdr =
+ (struct ipoib_pseudoheader *) skb->data;
+ skb_pull(skb, sizeof *phdr);
+
+ if (phdr->hwaddr[4] == 0xff) {
+ /* Add in the P_Key for multicast*/
+ phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
+ phdr->hwaddr[9] = priv->pkey & 0xff;
+
+ XMIT_QUEUED_SKBS();
+ ipoib_mcast_send(dev, phdr->hwaddr + 4, skb);
+ } else {
+ /* unicast GID -- should be ARP or RARP reply */
+
+ if ((be16_to_cpup((__be16 *) skb->data) !=
+ ETH_P_ARP) &&
+ (be16_to_cpup((__be16 *) skb->data) !=
+ ETH_P_RARP)) {
+ ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x "
+ IPOIB_GID_FMT "\n",
+ skb->dst ? "neigh" : "dst",
+ be16_to_cpup((__be16 *)
+ skb->data),
+ IPOIB_QPN(phdr->hwaddr),
+ IPOIB_GID_RAW_ARG(phdr->hwaddr
+ + 4));
+ dev_kfree_skb_any(skb);
+ ++priv->stats.tx_dropped;
+ ++max_skbs;
+ continue;
+ }
+ XMIT_QUEUED_SKBS();
+ unicast_arp_send(skb, dev, phdr);
+ }
+ }
+ }
+
+ /* Send out last packets (if any) */
+ XMIT_QUEUED_SKBS();
+
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+ return skb_queue_empty(blist) ? NETDEV_TX_OK : NETDEV_TX_BUSY;
+}
+
static struct net_device_stats *ipoib_get_stats(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -898,11 +1071,35 @@ int ipoib_dev_init(struct net_device *de
/* priv->tx_head & tx_tail are already 0 */
- if (ipoib_ib_dev_init(dev, ca, port))
+ /* Allocate tx_sge */
+ priv->tx_sge = kmalloc(ipoib_sendq_size * sizeof *priv->tx_sge,
+ GFP_KERNEL);
+ if (!priv->tx_sge) {
+ printk(KERN_WARNING "%s: failed to allocate TX sge (%d entries)\n",
+ ca->name, ipoib_sendq_size);
goto out_tx_ring_cleanup;
+ }
+
+ /* Allocate tx_wr */
+ priv->tx_wr = kmalloc(ipoib_sendq_size * sizeof *priv->tx_wr,
+ GFP_KERNEL);
+ if (!priv->tx_wr) {
+ printk(KERN_WARNING "%s: failed to allocate TX wr (%d entries)\n",
+ ca->name, ipoib_sendq_size);
+ goto out_tx_sge_cleanup;
+ }
+
+ if (ipoib_ib_dev_init(dev, ca, port))
+ goto out_tx_wr_cleanup;
return 0;
+out_tx_wr_cleanup:
+ kfree(priv->tx_wr);
+
+out_tx_sge_cleanup:
+ kfree(priv->tx_sge);
+
out_tx_ring_cleanup:
kfree(priv->tx_ring);
@@ -930,9 +1127,13 @@ void ipoib_dev_cleanup(struct net_device
kfree(priv->rx_ring);
kfree(priv->tx_ring);
+ kfree(priv->tx_sge);
+ kfree(priv->tx_wr);
priv->rx_ring = NULL;
priv->tx_ring = NULL;
+ priv->tx_sge = NULL;
+ priv->tx_wr = NULL;
}
static void ipoib_setup(struct net_device *dev)
@@ -943,6 +1144,7 @@ static void ipoib_setup(struct net_devic
dev->stop = ipoib_stop;
dev->change_mtu = ipoib_change_mtu;
dev->hard_start_xmit = ipoib_start_xmit;
+ dev->hard_start_xmit_batch = ipoib_start_xmit_frames;
dev->get_stats = ipoib_get_stats;
dev->tx_timeout = ipoib_timeout;
dev->hard_header = ipoib_hard_header;
@@ -963,7 +1165,10 @@ static void ipoib_setup(struct net_devic
dev->addr_len = INFINIBAND_ALEN;
dev->type = ARPHRD_INFINIBAND;
dev->tx_queue_len = ipoib_sendq_size * 2;
- dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
+ dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX |
+ NETIF_F_BATCH_SKBS;
+
+ dev->xmit_slots = ipoib_sendq_size;
/* MTU will be reset when mcast join happens */
dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;
next prev parent reply other threads:[~2007-07-20 6:34 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-07-20 6:31 [ofa-general] [PATCH 00/10] Implement batching skb API Krishna Kumar
2007-07-20 6:32 ` [ofa-general] [PATCH 01/10] HOWTO documentation for Batching SKB Krishna Kumar
2007-07-20 6:32 ` [PATCH 02/10] Networking include file changes Krishna Kumar
2007-07-20 9:59 ` Patrick McHardy
2007-07-20 17:25 ` [ofa-general] " Sridhar Samudrala
2007-07-21 6:30 ` Krishna Kumar2
2007-07-23 5:59 ` Sridhar Samudrala
2007-07-23 6:27 ` Krishna Kumar2
2007-07-20 6:32 ` [ofa-general] [PATCH 03/10] dev.c changes Krishna Kumar
2007-07-20 10:04 ` [ofa-general] " Patrick McHardy
2007-07-20 10:27 ` Krishna Kumar2
2007-07-20 11:20 ` [ofa-general] " Patrick McHardy
2007-07-20 11:52 ` Krishna Kumar2
2007-07-20 11:55 ` Patrick McHardy
2007-07-20 12:09 ` Krishna Kumar2
2007-07-20 12:25 ` Krishna Kumar2
2007-07-20 12:37 ` Patrick McHardy
2007-07-20 17:44 ` Sridhar Samudrala
2007-07-21 6:44 ` Krishna Kumar2
2007-07-20 6:32 ` [PATCH 04/10] net-sysfs.c changes Krishna Kumar
2007-07-20 10:07 ` [ofa-general] " Patrick McHardy
2007-07-20 10:28 ` Krishna Kumar2
2007-07-20 11:21 ` Patrick McHardy
2007-07-20 16:22 ` Stephen Hemminger
2007-07-21 6:46 ` Krishna Kumar2
2007-07-23 9:56 ` Stephen Hemminger
2007-07-20 6:32 ` [ofa-general] [PATCH 05/10] sch_generic.c changes Krishna Kumar
2007-07-20 10:11 ` [ofa-general] " Patrick McHardy
2007-07-20 10:32 ` Krishna Kumar2
2007-07-20 11:24 ` Patrick McHardy
2007-07-20 18:16 ` Patrick McHardy
2007-07-21 6:56 ` Krishna Kumar2
2007-07-22 17:03 ` Patrick McHardy
2007-07-20 6:33 ` [ofa-general] [PATCH 06/10] IPoIB header file changes Krishna Kumar
2007-07-20 6:33 ` [ofa-general] [PATCH 07/10] IPoIB verb changes Krishna Kumar
2007-07-20 6:33 ` [ofa-general] [PATCH 08/10] IPoIB multicast/CM changes Krishna Kumar
2007-07-20 6:33 ` Krishna Kumar [this message]
2007-07-20 6:33 ` [PATCH 10/10] IPoIB batching in internal xmit/handler routines Krishna Kumar
2007-07-20 7:18 ` [ofa-general] Re: [PATCH 00/10] Implement batching skb API Stephen Hemminger
2007-07-20 7:30 ` Krishna Kumar2
2007-07-20 7:57 ` [ofa-general] " Stephen Hemminger
2007-07-20 7:47 ` Krishna Kumar2
2007-07-21 13:46 ` [ofa-general] TCP and batching WAS(Re: " jamal
2007-07-23 9:44 ` Stephen Hemminger
2007-07-20 12:54 ` [ofa-general] " Evgeniy Polyakov
2007-07-20 13:02 ` Krishna Kumar2
2007-07-23 4:23 ` Krishna Kumar2
2007-07-21 13:18 ` [ofa-general] " jamal
2007-07-22 6:27 ` Krishna Kumar2
2007-07-22 12:51 ` jamal
2007-07-23 4:49 ` Krishna Kumar2
2007-07-23 12:32 ` jamal
2007-07-24 3:44 ` [ofa-general] " Krishna Kumar2
2007-07-24 19:28 ` jamal
2007-07-25 2:41 ` Krishna Kumar2
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070720063336.26341.2955.sendpatchset@localhost.localdomain \
--to=krkumar2@in.ibm.com \
--cc=Robert.Olsson@data.slu.se \
--cc=davem@davemloft.net \
--cc=gaagaan@gmail.com \
--cc=general@lists.openfabrics.org \
--cc=hadi@cyberus.ca \
--cc=herbert@gondor.apana.org.au \
--cc=jagana@us.ibm.com \
--cc=jeff@garzik.org \
--cc=johnpol@2ka.mipt.ru \
--cc=kaber@trash.net \
--cc=kumarkr@linux.ibm.com \
--cc=mcarlson@broadcom.com \
--cc=mchan@broadcom.com \
--cc=netdev@vger.kernel.org \
--cc=peter.p.waskiewicz.jr@intel.com \
--cc=rdreier@cisco.com \
--cc=rick.jones2@hp.com \
--cc=sri@us.ibm.com \
--cc=tgraf@suug.ch \
--cc=xma@us.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).