From: Krishna Kumar <krkumar2@in.ibm.com>
To: johnpol@2ka.mipt.ru, sri@us.ibm.com,
shemminger@linux-foundation.org, davem@davemloft.net,
kaber@trash.net
Cc: jagana@us.ibm.com, Robert.Olsson@data.slu.se, rick.jones2@hp.com,
herbert@gondor.apana.org.au, gaagaan@gmail.com,
kumarkr@linux.ibm.com, rdreier@cisco.com,
peter.p.waskiewicz.jr@intel.com, mcarlson@broadcom.com,
jeff@garzik.org, general@lists.openfabrics.org,
mchan@broadcom.com, tgraf@suug.ch, hadi@cyberus.ca,
netdev@vger.kernel.org, Krishna Kumar <krkumar2@in.ibm.com>,
xma@us.ibm.com
Subject: [PATCH 8/9 Rev3] [IPoIB] Post and work completion handler changes
Date: Wed, 08 Aug 2007 15:02:35 +0530 [thread overview]
Message-ID: <20070808093235.15396.3273.sendpatchset@localhost.localdomain> (raw)
In-Reply-To: <20070808093114.15396.22797.sendpatchset@localhost.localdomain>
IPoIB internal post and work completion handler changes.
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
ipoib_ib.c | 217 ++++++++++++++++++++++++++++++++++++++++++++++++-------------
1 files changed, 173 insertions(+), 44 deletions(-)
diff -ruNp ORG/drivers/infiniband/ulp/ipoib/ipoib_ib.c NEW/drivers/infiniband/ulp/ipoib/ipoib_ib.c
--- ORG/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-07-17 08:48:35.000000000 +0530
+++ NEW/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-08-07 13:11:19.000000000 +0530
@@ -242,6 +242,8 @@ repost:
static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int i, num_completions;
+ unsigned int tx_ring_index;
unsigned int wr_id = wc->wr_id;
struct ipoib_tx_buf *tx_req;
unsigned long flags;
@@ -255,18 +257,56 @@ static void ipoib_ib_handle_tx_wc(struct
return;
}
- tx_req = &priv->tx_ring[wr_id];
+ /*
+ * Handle skbs completion from tx_tail to wr_id. Two issues :
+ * - Need to stop other WC's from mangling same skb's if they
+ * run at the same time. Use tx_prev_tail to demarcate WC's.
+ * - Handle WC's from earlier (possibly multiple) post_sends in
+ * this iteration as we move from tx_prev_tail to wr_id, since
+ * if the last WR (which is the one which requested completion
+ * notification) failed to be sent for any of those earlier
+ * request(s), no completion notification is generated for
+ * successful WR's of those earlier request(s).
+ */
+ spin_lock_irqsave(&priv->comp_lock, flags);
+
+ /* Get start index */
+ tx_ring_index = priv->tx_prev_tail & (ipoib_sendq_size - 1);
+
+ /*Find number of WC's */
+ num_completions = wr_id - tx_ring_index + 1;
+ if (unlikely(num_completions <= 0))
+ num_completions += ipoib_sendq_size;
+
+ /* Save new start index for any parallel WC's */
+ priv->tx_prev_tail += num_completions;
+
+ spin_unlock_irqrestore(&priv->comp_lock, flags);
- ib_dma_unmap_single(priv->ca, tx_req->mapping,
- tx_req->skb->len, DMA_TO_DEVICE);
+ tx_req = &priv->tx_ring[tx_ring_index];
+ for (i = 0; i < num_completions; i++) {
+ if (likely(tx_req->skb)) {
+ ib_dma_unmap_single(priv->ca, tx_req->mapping,
+ tx_req->skb->len, DMA_TO_DEVICE);
- ++priv->stats.tx_packets;
- priv->stats.tx_bytes += tx_req->skb->len;
+ ++priv->stats.tx_packets;
+ priv->stats.tx_bytes += tx_req->skb->len;
- dev_kfree_skb_any(tx_req->skb);
+ dev_kfree_skb_any(tx_req->skb);
+ }
+ /*
+ * else this skb failed synchronously when posted and was
+ * freed immediately.
+ */
+
+ if (likely(++tx_ring_index != ipoib_sendq_size))
+ tx_req++;
+ else
+ tx_req = &priv->tx_ring[0];
+ }
spin_lock_irqsave(&priv->tx_lock, flags);
- ++priv->tx_tail;
+ priv->tx_tail += num_completions;
if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) &&
priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) {
clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
@@ -340,29 +380,57 @@ void ipoib_ib_completion(struct ib_cq *c
netif_rx_schedule(dev_ptr);
}
-static inline int post_send(struct ipoib_dev_priv *priv,
- unsigned int wr_id,
- struct ib_ah *address, u32 qpn,
- u64 addr, int len)
+/*
+ * post_send : Post WR(s) to the device.
+ *
+ * num_skbs is the number of WR's, first_wr is the first slot in tx_wr[] (or
+ * tx_sge[]). first_wr is normally zero unless a previous post_send returned
+ * error and we are trying to post the untried WR's, in which case first_wr
+ * is the index to the first untried WR.
+ *
+ * Break the WR link before posting so that provider knows how many WR's to
+ * process, and this is set back after the post.
+ */
+static inline int post_send(struct ipoib_dev_priv *priv, u32 qpn,
+ int first_wr, int num_skbs,
+ struct ib_send_wr **bad_wr)
{
- struct ib_send_wr *bad_wr;
+ int ret;
+ struct ib_send_wr *last_wr, *next_wr;
+
+ last_wr = &priv->tx_wr[first_wr + num_skbs - 1];
+
+ /* Set Completion Notification for last WR */
+ last_wr->send_flags = IB_SEND_SIGNALED;
- priv->tx_sge.addr = addr;
- priv->tx_sge.length = len;
+ /* Terminate the last WR */
+ next_wr = last_wr->next;
+ last_wr->next = NULL;
- priv->tx_wr.wr_id = wr_id;
- priv->tx_wr.wr.ud.remote_qpn = qpn;
- priv->tx_wr.wr.ud.ah = address;
+ /* Send all the WR's in one doorbell */
+ ret = ib_post_send(priv->qp, &priv->tx_wr[first_wr], bad_wr);
- return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
+ /* Restore send_flags & WR chain */
+ last_wr->send_flags = 0;
+ last_wr->next = next_wr;
+
+ return ret;
}
-void ipoib_send(struct net_device *dev, struct sk_buff *skb,
- struct ipoib_ah *address, u32 qpn)
+/*
+ * Map skb & store skb/mapping in tx_ring; and details of the WR in tx_wr
+ * to pass to the provider.
+ *
+ * Returns:
+ * 1: Error and the skb is freed.
+ * 0 skb processed successfully.
+ */
+int ipoib_process_skb(struct net_device *dev, struct sk_buff *skb,
+ struct ipoib_dev_priv *priv, struct ipoib_ah *address,
+ u32 qpn, int wr_num)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ipoib_tx_buf *tx_req;
u64 addr;
+ unsigned int tx_ring_index;
if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -370,7 +438,7 @@ void ipoib_send(struct net_device *dev,
++priv->stats.tx_dropped;
++priv->stats.tx_errors;
ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
- return;
+ return 1;
}
ipoib_dbg_data(priv, "sending packet, length=%d address=%p qpn=0x%06x\n",
@@ -383,35 +451,96 @@ void ipoib_send(struct net_device *dev,
* means we have to make sure everything is properly recorded and
* our state is consistent before we call post_send().
*/
- tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
- tx_req->skb = skb;
- addr = ib_dma_map_single(priv->ca, skb->data, skb->len,
- DMA_TO_DEVICE);
+ addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE);
if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
++priv->stats.tx_errors;
dev_kfree_skb_any(skb);
- return;
+ return 1;
}
- tx_req->mapping = addr;
- if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
- address->ah, qpn, addr, skb->len))) {
- ipoib_warn(priv, "post_send failed\n");
- ++priv->stats.tx_errors;
- ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
- dev_kfree_skb_any(skb);
- } else {
- dev->trans_start = jiffies;
+ tx_ring_index = priv->tx_head & (ipoib_sendq_size - 1);
+
+ /* Save till completion handler executes */
+ priv->tx_ring[tx_ring_index].skb = skb;
+ priv->tx_ring[tx_ring_index].mapping = addr;
+
+ /* Set WR values for the provider to use */
+ priv->tx_sge[wr_num].addr = addr;
+ priv->tx_sge[wr_num].length = skb->len;
+
+ priv->tx_wr[wr_num].wr_id = tx_ring_index;
+ priv->tx_wr[wr_num].wr.ud.remote_qpn = qpn;
+ priv->tx_wr[wr_num].wr.ud.ah = address->ah;
+
+ priv->tx_head++;
+
+ if (unlikely(priv->tx_head - priv->tx_tail == ipoib_sendq_size)) {
+ ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
+ netif_stop_queue(dev);
+ set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
+ }
- address->last_send = priv->tx_head;
- ++priv->tx_head;
+ return 0;
+}
- if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
- ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
- netif_stop_queue(dev);
- set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
+/*
+ * Send num_skbs to the device. If an skb is passed to this function, it is
+ * single, unprocessed skb send case; otherwise it means that all skbs are
+ * already processed and put on priv->tx_wr,tx_sge,tx_ring, etc.
+ */
+void ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ struct ipoib_ah *address, u32 qpn, int num_skbs)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int first_wr = 0;
+
+ if (skb && ipoib_process_skb(dev, skb, priv, address, qpn, 0))
+ return;
+
+ /* Send all skb's in one post */
+ do {
+ struct ib_send_wr *bad_wr;
+
+ if (unlikely((post_send(priv, qpn, first_wr, num_skbs,
+ &bad_wr)))) {
+ int done;
+
+ ipoib_warn(priv, "post_send failed\n");
+
+ /* Get number of WR's that finished successfully */
+ done = bad_wr - &priv->tx_wr[first_wr];
+
+ /* Handle 1 error */
+ priv->stats.tx_errors++;
+ ib_dma_unmap_single(priv->ca,
+ priv->tx_sge[first_wr + done].addr,
+ priv->tx_sge[first_wr + done].length,
+ DMA_TO_DEVICE);
+
+ /* Free failed WR & reset for WC handler to recognize */
+ dev_kfree_skb_any(priv->tx_ring[bad_wr->wr_id].skb);
+ priv->tx_ring[bad_wr->wr_id].skb = NULL;
+
+ /* Handle 'n' successes */
+ if (done) {
+ dev->trans_start = jiffies;
+ address->last_send = priv->tx_head - (num_skbs -
+ done) - 1;
+ }
+
+ /* Get count of skbs that were not tried */
+ num_skbs -= (done + 1);
+ /* + 1 for WR that was tried & failed */
+
+ /* Get start index for next iteration */
+ first_wr += (done + 1);
+ } else {
+ dev->trans_start = jiffies;
+
+ address->last_send = priv->tx_head - 1;
+ num_skbs = 0;
}
- }
+ } while (num_skbs);
}
static void __ipoib_reap_ah(struct net_device *dev)
next prev parent reply other threads:[~2007-08-08 9:33 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-08-08 9:31 [ofa-general] [PATCH 0/9 Rev3] Implement batching skb API and support in IPoIB Krishna Kumar
2007-08-08 9:31 ` [PATCH 1/9 Rev3] [Doc] HOWTO Documentation for batching Krishna Kumar
2007-08-08 9:31 ` [PATCH 2/9 Rev3] [core] Add skb_blist & hard_start_xmit_batch Krishna Kumar
2007-08-08 10:59 ` [ofa-general] " Stephen Hemminger
2007-08-08 11:24 ` Krishna Kumar2
2007-08-08 12:01 ` Evgeniy Polyakov
2007-08-09 3:09 ` Krishna Kumar2
2007-08-08 9:31 ` [PATCH 3/9 Rev3] [sched] Modify qdisc_run to support batching Krishna Kumar
2007-08-08 12:14 ` [ofa-general] " Evgeniy Polyakov
2007-08-09 3:13 ` Krishna Kumar2
2007-08-08 14:05 ` Patrick McHardy
2007-08-08 15:26 ` [ofa-general] " jamal
2007-08-09 4:06 ` Krishna Kumar2
2007-08-08 9:31 ` [PATCH 4/9 Rev3] [ethtool] Add ethtool support Krishna Kumar
2007-08-08 9:32 ` [ofa-general] [PATCH 5/9 Rev3] [IPoIB] Header file changes Krishna Kumar
2007-08-08 9:32 ` [ofa-general] [PATCH 6/9 Rev3] [IPoIB] CM & Multicast changes Krishna Kumar
2007-08-08 9:32 ` [ofa-general] [PATCH 7/9 Rev3] [IPoIB] Verb changes Krishna Kumar
2007-08-08 9:32 ` Krishna Kumar [this message]
2007-08-08 9:32 ` [PATCH 9/9 Rev3] [IPoIB] Implement the new batching API Krishna Kumar
2007-08-08 10:49 ` [ofa-general] Re: [PATCH 0/9 Rev3] Implement batching skb API and support in IPoIB David Miller
2007-08-08 11:09 ` Krishna Kumar2
2007-08-08 22:01 ` [ofa-general] " David Miller
2007-08-09 4:19 ` Krishna Kumar2
2007-08-09 4:27 ` David Miller
2007-08-09 6:26 ` Krishna Kumar2
2007-08-08 13:42 ` [ofa-general] " Herbert Xu
2007-08-08 15:14 ` jamal
2007-08-08 20:55 ` Stephen Hemminger
2007-08-08 22:40 ` jamal
2007-08-08 22:22 ` David Miller
2007-08-08 22:53 ` jamal
2007-08-09 0:06 ` Shirley Ma
2007-08-09 3:19 ` [ofa-general] " Krishna Kumar2
2007-08-14 9:02 ` Krishna Kumar2
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070808093235.15396.3273.sendpatchset@localhost.localdomain \
--to=krkumar2@in.ibm.com \
--cc=Robert.Olsson@data.slu.se \
--cc=davem@davemloft.net \
--cc=gaagaan@gmail.com \
--cc=general@lists.openfabrics.org \
--cc=hadi@cyberus.ca \
--cc=herbert@gondor.apana.org.au \
--cc=jagana@us.ibm.com \
--cc=jeff@garzik.org \
--cc=johnpol@2ka.mipt.ru \
--cc=kaber@trash.net \
--cc=kumarkr@linux.ibm.com \
--cc=mcarlson@broadcom.com \
--cc=mchan@broadcom.com \
--cc=netdev@vger.kernel.org \
--cc=peter.p.waskiewicz.jr@intel.com \
--cc=rdreier@cisco.com \
--cc=rick.jones2@hp.com \
--cc=shemminger@linux-foundation.org \
--cc=sri@us.ibm.com \
--cc=tgraf@suug.ch \
--cc=xma@us.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.