From: Krishna Kumar <krkumar2@in.ibm.com>
To: davem@davemloft.net, rdreier@cisco.com
Cc: johnpol@2ka.mipt.ru, Robert.Olsson@data.slu.se,
rick.jones2@hp.com, herbert@gondor.apana.org.au,
gaagaan@gmail.com, kumarkr@linux.ibm.com,
peter.p.waskiewicz.jr@intel.com, mcarlson@broadcom.com,
kaber@trash.net, jagana@us.ibm.com,
general@lists.openfabrics.org, mchan@broadcom.com, tgraf@suug.ch,
jeff@garzik.org, sri@us.ibm.com, hadi@cyberus.ca,
netdev@vger.kernel.org, Krishna Kumar <krkumar2@in.ibm.com>,
xma@us.ibm.com
Subject: [PATCH 09/10] IPoIB batching xmit handler support.
Date: Fri, 20 Jul 2007 12:03:36 +0530 [thread overview]
Message-ID: <20070720063336.26341.2955.sendpatchset@localhost.localdomain> (raw)
In-Reply-To: <20070720063149.26341.84076.sendpatchset@localhost.localdomain>
Add a IPoIB batching xmit handler.
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
ipoib_main.c | 215 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 210 insertions(+), 5 deletions(-)
diff -ruNp org/drivers/infiniband/ulp/ipoib/ipoib_main.c new/drivers/infiniband/ulp/ipoib/ipoib_main.c
--- org/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-07-20 07:49:28.000000000 +0530
+++ new/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-07-20 08:30:22.000000000 +0530
@@ -558,7 +558,8 @@ static void neigh_add_path(struct sk_buf
goto err_drop;
}
} else
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+ ipoib_send(dev, skb, path->ah,
+ IPOIB_QPN(skb->dst->neighbour->ha), 1);
} else {
neigh->ah = NULL;
@@ -638,7 +639,7 @@ static void unicast_arp_send(struct sk_b
ipoib_dbg(priv, "Send unicast ARP to %04x\n",
be16_to_cpu(path->pathrec.dlid));
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+ ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr), 1);
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
/* put pseudoheader back on for next time */
@@ -704,7 +705,8 @@ static int ipoib_start_xmit(struct sk_bu
goto out;
}
- ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+ ipoib_send(dev, skb, neigh->ah,
+ IPOIB_QPN(skb->dst->neighbour->ha), 1);
goto out;
}
@@ -753,6 +755,177 @@ out:
return NETDEV_TX_OK;
}
+#define XMIT_QUEUED_SKBS() \
+ do { \
+ if (num_skbs) { \
+ ipoib_send(dev, NULL, old_neigh->ah, old_qpn, \
+ num_skbs); \
+ num_skbs = 0; \
+ } \
+ } while (0)
+
+/*
+ * TODO: Merge with ipoib_start_xmit to use the same code and have a
+ * transparent wrapper caller to xmit's, etc.
+ */
+static int ipoib_start_xmit_frames(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct sk_buff *skb;
+ struct sk_buff_head *blist;
+ int max_skbs, num_skbs = 0, tx_ring_index = -1;
+ u32 qpn, old_qpn = 0;
+ struct ipoib_neigh *neigh, *old_neigh = NULL;
+ unsigned long flags;
+
+ if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
+ return NETDEV_TX_LOCKED;
+
+ blist = dev->skb_blist;
+
+ /*
+ * Send atmost xmit_slots skbs. This also prevents the device getting
+ * full as ipoib_send modifies the xmit_slots and we use the same
+ * value to figure how many skbs to send.
+ */
+ max_skbs = dev->xmit_slots;
+
+ while (max_skbs-- > 0 && (skb = __skb_dequeue(blist)) != NULL) {
+ /*
+ * From here on, ipoib_send() cannot stop the queue as it
+ * uses the same initialization as 'max_skbs'. So we can
+ * optimize to not check for queue stopped for every skb.
+ */
+ if (likely(skb->dst && skb->dst->neighbour)) {
+ if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
+ XMIT_QUEUED_SKBS();
+ ipoib_path_lookup(skb, dev);
+ continue;
+ }
+
+ neigh = *to_ipoib_neigh(skb->dst->neighbour);
+
+ if (ipoib_cm_get(neigh)) {
+ if (ipoib_cm_up(neigh)) {
+ XMIT_QUEUED_SKBS();
+ ipoib_cm_send(dev, skb,
+ ipoib_cm_get(neigh));
+ continue;
+ }
+ } else if (neigh->ah) {
+ if (unlikely(memcmp(&neigh->dgid.raw,
+ skb->dst->neighbour->ha + 4,
+ sizeof(union ib_gid)))) {
+ spin_lock(&priv->lock);
+ /*
+ * It's safe to call ipoib_put_ah()
+ * inside priv->lock here, because we
+ * know that path->ah will always hold
+ * one more reference, so ipoib_put_ah()
+ * will never do more than decrement
+ * the ref count.
+ */
+ ipoib_put_ah(neigh->ah);
+ list_del(&neigh->list);
+ ipoib_neigh_free(dev, neigh);
+ spin_unlock(&priv->lock);
+ XMIT_QUEUED_SKBS();
+ ipoib_path_lookup(skb, dev);
+ continue;
+ }
+
+ qpn = IPOIB_QPN(skb->dst->neighbour->ha);
+ if (neigh != old_neigh || qpn != old_qpn) {
+ /*
+ * Sending to a different destination
+ * from earlier skb's - send all
+ * existing skbs (if any).
+ */
+ if (tx_ring_index == -1) {
+ /*
+ * First time, find where to
+ * store skb.
+ */
+ tx_ring_index = priv->tx_head &
+ (ipoib_sendq_size - 1);
+ } else {
+ /* Some skbs to send */
+ XMIT_QUEUED_SKBS();
+ }
+ old_neigh = neigh;
+ old_qpn = IPOIB_QPN(skb->dst->neighbour->ha);
+ }
+
+ if (ipoib_process_skb(dev, skb, priv, num_skbs,
+ tx_ring_index, neigh->ah,
+ qpn))
+ continue;
+
+ num_skbs++;
+
+ /* Queue'd one skb, get index for next skb */
+ if (max_skbs)
+ tx_ring_index = (tx_ring_index + 1) &
+ (ipoib_sendq_size - 1);
+ continue;
+ }
+
+ if (skb_queue_len(&neigh->queue) <
+ IPOIB_MAX_PATH_REC_QUEUE) {
+ spin_lock(&priv->lock);
+ __skb_queue_tail(&neigh->queue, skb);
+ spin_unlock(&priv->lock);
+ } else {
+ dev_kfree_skb_any(skb);
+ ++priv->stats.tx_dropped;
+ ++max_skbs;
+ }
+ } else {
+ struct ipoib_pseudoheader *phdr =
+ (struct ipoib_pseudoheader *) skb->data;
+ skb_pull(skb, sizeof *phdr);
+
+ if (phdr->hwaddr[4] == 0xff) {
+ /* Add in the P_Key for multicast*/
+ phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
+ phdr->hwaddr[9] = priv->pkey & 0xff;
+
+ XMIT_QUEUED_SKBS();
+ ipoib_mcast_send(dev, phdr->hwaddr + 4, skb);
+ } else {
+ /* unicast GID -- should be ARP or RARP reply */
+
+ if ((be16_to_cpup((__be16 *) skb->data) !=
+ ETH_P_ARP) &&
+ (be16_to_cpup((__be16 *) skb->data) !=
+ ETH_P_RARP)) {
+ ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x "
+ IPOIB_GID_FMT "\n",
+ skb->dst ? "neigh" : "dst",
+ be16_to_cpup((__be16 *)
+ skb->data),
+ IPOIB_QPN(phdr->hwaddr),
+ IPOIB_GID_RAW_ARG(phdr->hwaddr
+ + 4));
+ dev_kfree_skb_any(skb);
+ ++priv->stats.tx_dropped;
+ ++max_skbs;
+ continue;
+ }
+ XMIT_QUEUED_SKBS();
+ unicast_arp_send(skb, dev, phdr);
+ }
+ }
+ }
+
+ /* Send out last packets (if any) */
+ XMIT_QUEUED_SKBS();
+
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+ return skb_queue_empty(blist) ? NETDEV_TX_OK : NETDEV_TX_BUSY;
+}
+
static struct net_device_stats *ipoib_get_stats(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -898,11 +1071,35 @@ int ipoib_dev_init(struct net_device *de
/* priv->tx_head & tx_tail are already 0 */
- if (ipoib_ib_dev_init(dev, ca, port))
+ /* Allocate tx_sge */
+ priv->tx_sge = kmalloc(ipoib_sendq_size * sizeof *priv->tx_sge,
+ GFP_KERNEL);
+ if (!priv->tx_sge) {
+ printk(KERN_WARNING "%s: failed to allocate TX sge (%d entries)\n",
+ ca->name, ipoib_sendq_size);
goto out_tx_ring_cleanup;
+ }
+
+ /* Allocate tx_wr */
+ priv->tx_wr = kmalloc(ipoib_sendq_size * sizeof *priv->tx_wr,
+ GFP_KERNEL);
+ if (!priv->tx_wr) {
+ printk(KERN_WARNING "%s: failed to allocate TX wr (%d entries)\n",
+ ca->name, ipoib_sendq_size);
+ goto out_tx_sge_cleanup;
+ }
+
+ if (ipoib_ib_dev_init(dev, ca, port))
+ goto out_tx_wr_cleanup;
return 0;
+out_tx_wr_cleanup:
+ kfree(priv->tx_wr);
+
+out_tx_sge_cleanup:
+ kfree(priv->tx_sge);
+
out_tx_ring_cleanup:
kfree(priv->tx_ring);
@@ -930,9 +1127,13 @@ void ipoib_dev_cleanup(struct net_device
kfree(priv->rx_ring);
kfree(priv->tx_ring);
+ kfree(priv->tx_sge);
+ kfree(priv->tx_wr);
priv->rx_ring = NULL;
priv->tx_ring = NULL;
+ priv->tx_sge = NULL;
+ priv->tx_wr = NULL;
}
static void ipoib_setup(struct net_device *dev)
@@ -943,6 +1144,7 @@ static void ipoib_setup(struct net_devic
dev->stop = ipoib_stop;
dev->change_mtu = ipoib_change_mtu;
dev->hard_start_xmit = ipoib_start_xmit;
+ dev->hard_start_xmit_batch = ipoib_start_xmit_frames;
dev->get_stats = ipoib_get_stats;
dev->tx_timeout = ipoib_timeout;
dev->hard_header = ipoib_hard_header;
@@ -963,7 +1165,10 @@ static void ipoib_setup(struct net_devic
dev->addr_len = INFINIBAND_ALEN;
dev->type = ARPHRD_INFINIBAND;
dev->tx_queue_len = ipoib_sendq_size * 2;
- dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
+ dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX |
+ NETIF_F_BATCH_SKBS;
+
+ dev->xmit_slots = ipoib_sendq_size;
/* MTU will be reset when mcast join happens */
dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;
next prev parent reply other threads:[~2007-07-20 6:34 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-07-20 6:31 [ofa-general] [PATCH 00/10] Implement batching skb API Krishna Kumar
2007-07-20 6:32 ` [ofa-general] [PATCH 01/10] HOWTO documentation for Batching SKB Krishna Kumar
2007-07-20 6:32 ` [PATCH 02/10] Networking include file changes Krishna Kumar
2007-07-20 9:59 ` Patrick McHardy
2007-07-20 17:25 ` [ofa-general] " Sridhar Samudrala
2007-07-21 6:30 ` Krishna Kumar2
2007-07-23 5:59 ` Sridhar Samudrala
2007-07-23 6:27 ` Krishna Kumar2
2007-07-20 6:32 ` [ofa-general] [PATCH 03/10] dev.c changes Krishna Kumar
2007-07-20 10:04 ` [ofa-general] " Patrick McHardy
2007-07-20 10:27 ` Krishna Kumar2
2007-07-20 11:20 ` [ofa-general] " Patrick McHardy
2007-07-20 11:52 ` Krishna Kumar2
2007-07-20 11:55 ` Patrick McHardy
2007-07-20 12:09 ` Krishna Kumar2
2007-07-20 12:25 ` Krishna Kumar2
2007-07-20 12:37 ` Patrick McHardy
2007-07-20 17:44 ` Sridhar Samudrala
2007-07-21 6:44 ` Krishna Kumar2
2007-07-20 6:32 ` [PATCH 04/10] net-sysfs.c changes Krishna Kumar
2007-07-20 10:07 ` [ofa-general] " Patrick McHardy
2007-07-20 10:28 ` Krishna Kumar2
2007-07-20 11:21 ` Patrick McHardy
2007-07-20 16:22 ` Stephen Hemminger
2007-07-21 6:46 ` Krishna Kumar2
2007-07-23 9:56 ` Stephen Hemminger
2007-07-20 6:32 ` [ofa-general] [PATCH 05/10] sch_generic.c changes Krishna Kumar
2007-07-20 10:11 ` [ofa-general] " Patrick McHardy
2007-07-20 10:32 ` Krishna Kumar2
2007-07-20 11:24 ` Patrick McHardy
2007-07-20 18:16 ` Patrick McHardy
2007-07-21 6:56 ` Krishna Kumar2
2007-07-22 17:03 ` Patrick McHardy
2007-07-20 6:33 ` [ofa-general] [PATCH 06/10] IPoIB header file changes Krishna Kumar
2007-07-20 6:33 ` [ofa-general] [PATCH 07/10] IPoIB verb changes Krishna Kumar
2007-07-20 6:33 ` [ofa-general] [PATCH 08/10] IPoIB multicast/CM changes Krishna Kumar
2007-07-20 6:33 ` Krishna Kumar [this message]
2007-07-20 6:33 ` [PATCH 10/10] IPoIB batching in internal xmit/handler routines Krishna Kumar
2007-07-20 7:18 ` [ofa-general] Re: [PATCH 00/10] Implement batching skb API Stephen Hemminger
2007-07-20 7:30 ` Krishna Kumar2
2007-07-20 7:57 ` [ofa-general] " Stephen Hemminger
2007-07-20 7:47 ` Krishna Kumar2
2007-07-21 13:46 ` [ofa-general] TCP and batching WAS(Re: " jamal
2007-07-23 9:44 ` Stephen Hemminger
2007-07-20 12:54 ` [ofa-general] " Evgeniy Polyakov
2007-07-20 13:02 ` Krishna Kumar2
2007-07-23 4:23 ` Krishna Kumar2
2007-07-21 13:18 ` [ofa-general] " jamal
2007-07-22 6:27 ` Krishna Kumar2
2007-07-22 12:51 ` jamal
2007-07-23 4:49 ` Krishna Kumar2
2007-07-23 12:32 ` jamal
2007-07-24 3:44 ` [ofa-general] " Krishna Kumar2
2007-07-24 19:28 ` jamal
2007-07-25 2:41 ` Krishna Kumar2
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070720063336.26341.2955.sendpatchset@localhost.localdomain \
--to=krkumar2@in.ibm.com \
--cc=Robert.Olsson@data.slu.se \
--cc=davem@davemloft.net \
--cc=gaagaan@gmail.com \
--cc=general@lists.openfabrics.org \
--cc=hadi@cyberus.ca \
--cc=herbert@gondor.apana.org.au \
--cc=jagana@us.ibm.com \
--cc=jeff@garzik.org \
--cc=johnpol@2ka.mipt.ru \
--cc=kaber@trash.net \
--cc=kumarkr@linux.ibm.com \
--cc=mcarlson@broadcom.com \
--cc=mchan@broadcom.com \
--cc=netdev@vger.kernel.org \
--cc=peter.p.waskiewicz.jr@intel.com \
--cc=rdreier@cisco.com \
--cc=rick.jones2@hp.com \
--cc=sri@us.ibm.com \
--cc=tgraf@suug.ch \
--cc=xma@us.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.