netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Krishna Kumar <krkumar2@in.ibm.com>
To: davem@davemloft.net, rdreier@cisco.com
Cc: johnpol@2ka.mipt.ru, Robert.Olsson@data.slu.se,
	rick.jones2@hp.com, herbert@gondor.apana.org.au,
	gaagaan@gmail.com, kumarkr@linux.ibm.com,
	peter.p.waskiewicz.jr@intel.com, mcarlson@broadcom.com,
	kaber@trash.net, jagana@us.ibm.com,
	general@lists.openfabrics.org, mchan@broadcom.com, tgraf@suug.ch,
	jeff@garzik.org, sri@us.ibm.com, hadi@cyberus.ca,
	netdev@vger.kernel.org, Krishna Kumar <krkumar2@in.ibm.com>,
	xma@us.ibm.com
Subject: [PATCH 09/10] IPoIB batching xmit handler support.
Date: Fri, 20 Jul 2007 12:03:36 +0530	[thread overview]
Message-ID: <20070720063336.26341.2955.sendpatchset@localhost.localdomain> (raw)
In-Reply-To: <20070720063149.26341.84076.sendpatchset@localhost.localdomain>

Add a IPoIB batching xmit handler.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
 ipoib_main.c |  215 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 210 insertions(+), 5 deletions(-)

diff -ruNp org/drivers/infiniband/ulp/ipoib/ipoib_main.c new/drivers/infiniband/ulp/ipoib/ipoib_main.c
--- org/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-07-20 07:49:28.000000000 +0530
+++ new/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-07-20 08:30:22.000000000 +0530
@@ -558,7 +558,8 @@ static void neigh_add_path(struct sk_buf
 				goto err_drop;
 			}
 		} else
-			ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+			ipoib_send(dev, skb, path->ah,
+				   IPOIB_QPN(skb->dst->neighbour->ha), 1);
 	} else {
 		neigh->ah  = NULL;
 
@@ -638,7 +639,7 @@ static void unicast_arp_send(struct sk_b
 		ipoib_dbg(priv, "Send unicast ARP to %04x\n",
 			  be16_to_cpu(path->pathrec.dlid));
 
-		ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+		ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr), 1);
 	} else if ((path->query || !path_rec_start(dev, path)) &&
 		   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
 		/* put pseudoheader back on for next time */
@@ -704,7 +705,8 @@ static int ipoib_start_xmit(struct sk_bu
 				goto out;
 			}
 
-			ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+			ipoib_send(dev, skb, neigh->ah,
+				   IPOIB_QPN(skb->dst->neighbour->ha), 1);
 			goto out;
 		}
 
@@ -753,6 +755,177 @@ out:
 	return NETDEV_TX_OK;
 }
 
+#define	XMIT_QUEUED_SKBS()						\
+	do {								\
+		if (num_skbs) {						\
+			ipoib_send(dev, NULL, old_neigh->ah, old_qpn,	\
+				   num_skbs);				\
+			num_skbs = 0;					\
+		}							\
+	} while (0)
+
+/*
+ * TODO: Merge with ipoib_start_xmit to use the same code and have a
+ * transparent wrapper caller to xmit's, etc.
+ */
+static int ipoib_start_xmit_frames(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct sk_buff *skb;
+	struct sk_buff_head *blist;
+	int max_skbs, num_skbs = 0, tx_ring_index = -1;
+	u32 qpn, old_qpn = 0;
+	struct ipoib_neigh *neigh, *old_neigh = NULL;
+	unsigned long flags;
+
+	if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
+		return NETDEV_TX_LOCKED;
+
+	blist = dev->skb_blist;
+
+	/*
+	 * Send atmost xmit_slots skbs. This also prevents the device getting
+	 * full as ipoib_send modifies the xmit_slots and we use the same
+	 * value to figure how many skbs to send.
+	 */
+	max_skbs = dev->xmit_slots;
+
+	while (max_skbs-- > 0 && (skb = __skb_dequeue(blist)) != NULL) {
+		/*
+		 * From here on, ipoib_send() cannot stop the queue as it
+		 * uses the same initialization as 'max_skbs'. So we can
+		 * optimize to not check for queue stopped for every skb.
+		 */
+		if (likely(skb->dst && skb->dst->neighbour)) {
+			if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
+				XMIT_QUEUED_SKBS();
+				ipoib_path_lookup(skb, dev);
+				continue;
+			}
+
+			neigh = *to_ipoib_neigh(skb->dst->neighbour);
+
+			if (ipoib_cm_get(neigh)) {
+				if (ipoib_cm_up(neigh)) {
+					XMIT_QUEUED_SKBS();
+					ipoib_cm_send(dev, skb,
+						      ipoib_cm_get(neigh));
+					continue;
+				}
+			} else if (neigh->ah) {
+				if (unlikely(memcmp(&neigh->dgid.raw,
+						    skb->dst->neighbour->ha + 4,
+						    sizeof(union ib_gid)))) {
+					spin_lock(&priv->lock);
+					/*
+					 * It's safe to call ipoib_put_ah()
+					 * inside priv->lock here, because we
+					 * know that path->ah will always hold
+					 * one more reference, so ipoib_put_ah()
+					 * will never do more than decrement
+					 * the ref count.
+					 */
+					ipoib_put_ah(neigh->ah);
+					list_del(&neigh->list);
+					ipoib_neigh_free(dev, neigh);
+					spin_unlock(&priv->lock);
+					XMIT_QUEUED_SKBS();
+					ipoib_path_lookup(skb, dev);
+					continue;
+				}
+
+				qpn = IPOIB_QPN(skb->dst->neighbour->ha);
+				if (neigh != old_neigh || qpn != old_qpn) {
+					/*
+					 * Sending to a different destination
+					 * from earlier skb's - send all
+					 * existing skbs (if any).
+					 */
+					if (tx_ring_index == -1) {
+						/*
+						 * First time, find where to
+						 * store skb.
+						 */
+						tx_ring_index = priv->tx_head &
+							(ipoib_sendq_size - 1);
+					} else {
+						/* Some skbs to send */
+						XMIT_QUEUED_SKBS();
+					}
+					old_neigh = neigh;
+					old_qpn = IPOIB_QPN(skb->dst->neighbour->ha);
+				}
+
+				if (ipoib_process_skb(dev, skb, priv, num_skbs,
+						      tx_ring_index, neigh->ah,
+						      qpn))
+					continue;
+
+				num_skbs++;
+
+				/* Queue'd one skb, get index for next skb */
+				if (max_skbs)
+					tx_ring_index = (tx_ring_index + 1) &
+							(ipoib_sendq_size - 1);
+				continue;
+			}
+
+			if (skb_queue_len(&neigh->queue) <
+			    IPOIB_MAX_PATH_REC_QUEUE) {
+				spin_lock(&priv->lock);
+				__skb_queue_tail(&neigh->queue, skb);
+				spin_unlock(&priv->lock);
+			} else {
+				dev_kfree_skb_any(skb);
+				++priv->stats.tx_dropped;
+				++max_skbs;
+			}
+		} else {
+			struct ipoib_pseudoheader *phdr =
+				(struct ipoib_pseudoheader *) skb->data;
+			skb_pull(skb, sizeof *phdr);
+
+			if (phdr->hwaddr[4] == 0xff) {
+				/* Add in the P_Key for multicast*/
+				phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
+				phdr->hwaddr[9] = priv->pkey & 0xff;
+
+				XMIT_QUEUED_SKBS();
+				ipoib_mcast_send(dev, phdr->hwaddr + 4, skb);
+			} else {
+				/* unicast GID -- should be ARP or RARP reply */
+
+				if ((be16_to_cpup((__be16 *) skb->data) !=
+				    ETH_P_ARP) &&
+				    (be16_to_cpup((__be16 *) skb->data) !=
+				    ETH_P_RARP)) {
+					ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x "
+						IPOIB_GID_FMT "\n",
+						skb->dst ? "neigh" : "dst",
+						be16_to_cpup((__be16 *)
+						skb->data),
+						IPOIB_QPN(phdr->hwaddr),
+						IPOIB_GID_RAW_ARG(phdr->hwaddr
+								  + 4));
+					dev_kfree_skb_any(skb);
+					++priv->stats.tx_dropped;
+					++max_skbs;
+					continue;
+				}
+				XMIT_QUEUED_SKBS();
+				unicast_arp_send(skb, dev, phdr);
+			}
+		}
+	}
+
+	/* Send out last packets (if any) */
+	XMIT_QUEUED_SKBS();
+
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+	return skb_queue_empty(blist) ? NETDEV_TX_OK : NETDEV_TX_BUSY;
+}
+
 static struct net_device_stats *ipoib_get_stats(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -898,11 +1071,35 @@ int ipoib_dev_init(struct net_device *de
 
 	/* priv->tx_head & tx_tail are already 0 */
 
-	if (ipoib_ib_dev_init(dev, ca, port))
+	/* Allocate tx_sge */
+	priv->tx_sge = kmalloc(ipoib_sendq_size * sizeof *priv->tx_sge,
+			       GFP_KERNEL);
+	if (!priv->tx_sge) {
+		printk(KERN_WARNING "%s: failed to allocate TX sge (%d entries)\n",
+		       ca->name, ipoib_sendq_size);
 		goto out_tx_ring_cleanup;
+	}
+
+	/* Allocate tx_wr */
+	priv->tx_wr = kmalloc(ipoib_sendq_size * sizeof *priv->tx_wr,
+			      GFP_KERNEL);
+	if (!priv->tx_wr) {
+		printk(KERN_WARNING "%s: failed to allocate TX wr (%d entries)\n",
+		       ca->name, ipoib_sendq_size);
+		goto out_tx_sge_cleanup;
+	}
+
+	if (ipoib_ib_dev_init(dev, ca, port))
+		goto out_tx_wr_cleanup;
 
 	return 0;
 
+out_tx_wr_cleanup:
+	kfree(priv->tx_wr);
+
+out_tx_sge_cleanup:
+	kfree(priv->tx_sge);
+
 out_tx_ring_cleanup:
 	kfree(priv->tx_ring);
 
@@ -930,9 +1127,13 @@ void ipoib_dev_cleanup(struct net_device
 
 	kfree(priv->rx_ring);
 	kfree(priv->tx_ring);
+	kfree(priv->tx_sge);
+	kfree(priv->tx_wr);
 
 	priv->rx_ring = NULL;
 	priv->tx_ring = NULL;
+	priv->tx_sge = NULL;
+	priv->tx_wr = NULL;
 }
 
 static void ipoib_setup(struct net_device *dev)
@@ -943,6 +1144,7 @@ static void ipoib_setup(struct net_devic
 	dev->stop 		 = ipoib_stop;
 	dev->change_mtu 	 = ipoib_change_mtu;
 	dev->hard_start_xmit 	 = ipoib_start_xmit;
+	dev->hard_start_xmit_batch = ipoib_start_xmit_frames;
 	dev->get_stats 		 = ipoib_get_stats;
 	dev->tx_timeout 	 = ipoib_timeout;
 	dev->hard_header 	 = ipoib_hard_header;
@@ -963,7 +1165,10 @@ static void ipoib_setup(struct net_devic
 	dev->addr_len 		 = INFINIBAND_ALEN;
 	dev->type 		 = ARPHRD_INFINIBAND;
 	dev->tx_queue_len 	 = ipoib_sendq_size * 2;
-	dev->features            = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
+	dev->features            = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX |
+					NETIF_F_BATCH_SKBS;
+
+	dev->xmit_slots		= ipoib_sendq_size;
 
 	/* MTU will be reset when mcast join happens */
 	dev->mtu 		 = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;

  parent reply	other threads:[~2007-07-20  6:34 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-07-20  6:31 [ofa-general] [PATCH 00/10] Implement batching skb API Krishna Kumar
2007-07-20  6:32 ` [ofa-general] [PATCH 01/10] HOWTO documentation for Batching SKB Krishna Kumar
2007-07-20  6:32 ` [PATCH 02/10] Networking include file changes Krishna Kumar
2007-07-20  9:59   ` Patrick McHardy
2007-07-20 17:25   ` [ofa-general] " Sridhar Samudrala
2007-07-21  6:30     ` Krishna Kumar2
2007-07-23  5:59       ` Sridhar Samudrala
2007-07-23  6:27         ` Krishna Kumar2
2007-07-20  6:32 ` [ofa-general] [PATCH 03/10] dev.c changes Krishna Kumar
2007-07-20 10:04   ` [ofa-general] " Patrick McHardy
2007-07-20 10:27     ` Krishna Kumar2
2007-07-20 11:20       ` [ofa-general] " Patrick McHardy
2007-07-20 11:52         ` Krishna Kumar2
2007-07-20 11:55           ` Patrick McHardy
2007-07-20 12:09         ` Krishna Kumar2
2007-07-20 12:25         ` Krishna Kumar2
2007-07-20 12:37           ` Patrick McHardy
2007-07-20 17:44   ` Sridhar Samudrala
2007-07-21  6:44     ` Krishna Kumar2
2007-07-20  6:32 ` [PATCH 04/10] net-sysfs.c changes Krishna Kumar
2007-07-20 10:07   ` [ofa-general] " Patrick McHardy
2007-07-20 10:28     ` Krishna Kumar2
2007-07-20 11:21       ` Patrick McHardy
2007-07-20 16:22         ` Stephen Hemminger
2007-07-21  6:46           ` Krishna Kumar2
2007-07-23  9:56             ` Stephen Hemminger
2007-07-20  6:32 ` [ofa-general] [PATCH 05/10] sch_generic.c changes Krishna Kumar
2007-07-20 10:11   ` [ofa-general] " Patrick McHardy
2007-07-20 10:32     ` Krishna Kumar2
2007-07-20 11:24       ` Patrick McHardy
2007-07-20 18:16   ` Patrick McHardy
2007-07-21  6:56     ` Krishna Kumar2
2007-07-22 17:03       ` Patrick McHardy
2007-07-20  6:33 ` [ofa-general] [PATCH 06/10] IPoIB header file changes Krishna Kumar
2007-07-20  6:33 ` [ofa-general] [PATCH 07/10] IPoIB verb changes Krishna Kumar
2007-07-20  6:33 ` [ofa-general] [PATCH 08/10] IPoIB multicast/CM changes Krishna Kumar
2007-07-20  6:33 ` Krishna Kumar [this message]
2007-07-20  6:33 ` [PATCH 10/10] IPoIB batching in internal xmit/handler routines Krishna Kumar
2007-07-20  7:18 ` [ofa-general] Re: [PATCH 00/10] Implement batching skb API Stephen Hemminger
2007-07-20  7:30   ` Krishna Kumar2
2007-07-20  7:57     ` [ofa-general] " Stephen Hemminger
2007-07-20  7:47   ` Krishna Kumar2
2007-07-21 13:46   ` [ofa-general] TCP and batching WAS(Re: " jamal
2007-07-23  9:44     ` Stephen Hemminger
2007-07-20 12:54 ` [ofa-general] " Evgeniy Polyakov
2007-07-20 13:02   ` Krishna Kumar2
2007-07-23  4:23   ` Krishna Kumar2
2007-07-21 13:18 ` [ofa-general] " jamal
2007-07-22  6:27   ` Krishna Kumar2
2007-07-22 12:51     ` jamal
2007-07-23  4:49       ` Krishna Kumar2
2007-07-23 12:32         ` jamal
2007-07-24  3:44           ` [ofa-general] " Krishna Kumar2
2007-07-24 19:28             ` jamal
2007-07-25  2:41               ` Krishna Kumar2

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070720063336.26341.2955.sendpatchset@localhost.localdomain \
    --to=krkumar2@in.ibm.com \
    --cc=Robert.Olsson@data.slu.se \
    --cc=davem@davemloft.net \
    --cc=gaagaan@gmail.com \
    --cc=general@lists.openfabrics.org \
    --cc=hadi@cyberus.ca \
    --cc=herbert@gondor.apana.org.au \
    --cc=jagana@us.ibm.com \
    --cc=jeff@garzik.org \
    --cc=johnpol@2ka.mipt.ru \
    --cc=kaber@trash.net \
    --cc=kumarkr@linux.ibm.com \
    --cc=mcarlson@broadcom.com \
    --cc=mchan@broadcom.com \
    --cc=netdev@vger.kernel.org \
    --cc=peter.p.waskiewicz.jr@intel.com \
    --cc=rdreier@cisco.com \
    --cc=rick.jones2@hp.com \
    --cc=sri@us.ibm.com \
    --cc=tgraf@suug.ch \
    --cc=xma@us.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).