From: Hariprasad Shenai <hariprasad-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org>
To: netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org,
roland-BHEL68pLQRGGvPXPguhicg@public.gmane.org,
santosh-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org,
dm-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org,
kumaras-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org,
swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org,
leedom-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org,
nirranjan-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org,
hariprasad-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org
Subject: [PATCHv4 net-next 26/32] iw_cxgb4: endpoint timeout fixes
Date: Fri, 7 Mar 2014 16:03:23 +0530 [thread overview]
Message-ID: <1394188409-9739-27-git-send-email-hariprasad@chelsio.com> (raw)
In-Reply-To: <1394188409-9739-1-git-send-email-hariprasad-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org>
From: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
1) timedout endpoint processing can be starved. If there is continual
CPL messages flowing into the driver, the endpoint timeout processing
can be starved. This condition exposed the other bugs below.
Solution: In process_work(), call process_timedout_eps() after each CPL
is processed.
2) Connection events can be processed even though the endpoint is on
the timeout list. If the endpoint is scheduled for timeout processing,
then we must ignore MPA Start Requests and Replies.
Solution: Change stop_ep_timer() to return 1 if the ep has already been
queued for timeout processing. All the callers of stop_ep_timer() need
to check this and act accordingly. There are just a few cases where
the caller needs to do something different if stop_ep_timer() returns 1:
1) in process_mpa_reply(), ignore the reply and process_timeout()
will abort the connection.
2) in process_mpa_request, ignore the request and process_timeout()
will abort the connection.
It is ok for callers of stop_ep_timer() to abort the connection since
that will leave the state in ABORTING or DEAD, and process_timeout()
now ignores timeouts when the ep is in these states.
3) Double insertion on the timeout list. Since the endpoint timers are
used for connection setup and teardown, we need to guard against the
possibility that an endpoint is already on the timeout list. This is
a rare condition and only seen under heavy load and in the presense of
the above 2 bugs.
Solution: In ep_timeout(), don't queue the endpoint if it is already on
the queue.
Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---
drivers/infiniband/hw/cxgb4/cm.c | 89 +++++++++++++++++++++++++---------------
1 file changed, 56 insertions(+), 33 deletions(-)
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 31e725e..64126bb 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -178,12 +178,15 @@ static void start_ep_timer(struct c4iw_ep *ep)
add_timer(&ep->timer);
}
-static void stop_ep_timer(struct c4iw_ep *ep)
+static int stop_ep_timer(struct c4iw_ep *ep)
{
PDBG("%s ep %p stopping\n", __func__, ep);
del_timer_sync(&ep->timer);
- if (!test_and_set_bit(TIMEOUT, &ep->com.flags))
+ if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
c4iw_put_ep(&ep->com);
+ return 0;
+ }
+ return 1;
}
static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
@@ -1188,12 +1191,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
/*
- * Stop mpa timer. If it expired, then the state has
- * changed and we bail since ep_timeout already aborted
- * the connection.
+ * Stop mpa timer. If it expired, then
+ * we ignore the MPA reply. process_timeout()
+ * will abort the connection.
*/
- stop_ep_timer(ep);
- if (ep->com.state != MPA_REQ_SENT)
+ if (stop_ep_timer(ep))
return;
/*
@@ -1398,15 +1400,12 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- if (ep->com.state != MPA_REQ_WAIT)
- return;
-
/*
* If we get more than the supported amount of private data
* then we must fail this connection.
*/
if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1436,13 +1435,13 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (mpa->revision > mpa_rev) {
printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
" Received = %d\n", __func__, mpa_rev, mpa->revision);
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1453,7 +1452,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
* Fail if there's too much private data.
*/
if (plen > MPA_MAX_PRIVATE_DATA) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1462,7 +1461,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
* If plen does not account for pkt size
*/
if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1519,18 +1518,24 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
ep->mpa_attr.p2p_type);
- __state_set(&ep->com, MPA_REQ_RCVD);
- stop_ep_timer(ep);
-
- /* drive upcall */
- mutex_lock(&ep->parent_ep->com.mutex);
- if (ep->parent_ep->com.state != DEAD) {
- if (connect_request_upcall(ep))
+ /*
+ * If the endpoint timer already expired, then we ignore
+ * the start request. process_timeout() will abort
+ * the connection.
+ */
+ if (!stop_ep_timer(ep)) {
+ __state_set(&ep->com, MPA_REQ_RCVD);
+
+ /* drive upcall */
+ mutex_lock(&ep->parent_ep->com.mutex);
+ if (ep->parent_ep->com.state != DEAD) {
+ if (connect_request_upcall(ep))
+ abort_connection(ep, skb, GFP_KERNEL);
+ } else {
abort_connection(ep, skb, GFP_KERNEL);
- } else {
- abort_connection(ep, skb, GFP_KERNEL);
+ }
+ mutex_unlock(&ep->parent_ep->com.mutex);
}
- mutex_unlock(&ep->parent_ep->com.mutex);
return;
}
@@ -2320,7 +2325,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
disconnect = 0;
break;
case MORIBUND:
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
if (ep->com.cm_id && ep->com.qp) {
attrs.next_state = C4IW_QP_STATE_IDLE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
@@ -2380,10 +2385,10 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
case CONNECTING:
break;
case MPA_REQ_WAIT:
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
break;
case MPA_REQ_SENT:
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1))
connect_reply_upcall(ep, -ECONNRESET);
else {
@@ -2488,7 +2493,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
__state_set(&ep->com, MORIBUND);
break;
case MORIBUND:
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
if ((ep->com.cm_id) && (ep->com.qp)) {
attrs.next_state = C4IW_QP_STATE_IDLE;
c4iw_modify_qp(ep->com.qp->rhp,
@@ -3083,7 +3088,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
close = 1;
if (abrupt) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
ep->com.state = ABORTING;
} else
ep->com.state = MORIBUND;
@@ -3518,6 +3523,16 @@ static void process_timeout(struct c4iw_ep *ep)
__state_set(&ep->com, ABORTING);
close_complete_upcall(ep, -ETIMEDOUT);
break;
+ case ABORTING:
+ case DEAD:
+
+ /*
+ * These states are expected if the ep timed out at the same
+ * time as another thread was calling stop_ep_timer().
+ * So we silently do nothing for these states.
+ */
+ abort = 0;
+ break;
default:
WARN(1, "%s unexpected state ep %p tid %u state %u\n",
__func__, ep, ep->hwtid, ep->com.state);
@@ -3539,6 +3554,8 @@ static void process_timedout_eps(void)
tmp = timeout_list.next;
list_del(tmp);
+ tmp->next = NULL;
+ tmp->prev = NULL;
spin_unlock_irq(&timeout_lock);
ep = list_entry(tmp, struct c4iw_ep, entry);
process_timeout(ep);
@@ -3555,6 +3572,7 @@ static void process_work(struct work_struct *work)
unsigned int opcode;
int ret;
+ process_timedout_eps();
while ((skb = skb_dequeue(&rxq))) {
rpl = cplhdr(skb);
dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
@@ -3564,8 +3582,8 @@ static void process_work(struct work_struct *work)
ret = work_handlers[opcode](dev, skb);
if (!ret)
kfree_skb(skb);
+ process_timedout_eps();
}
- process_timedout_eps();
}
static DECLARE_WORK(skb_work, process_work);
@@ -3577,8 +3595,13 @@ static void ep_timeout(unsigned long arg)
spin_lock(&timeout_lock);
if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
- list_add_tail(&ep->entry, &timeout_list);
- kickit = 1;
+ /*
+ * Only insert if it is not already on the list.
+ */
+ if (!ep->entry.next) {
+ list_add_tail(&ep->entry, &timeout_list);
+ kickit = 1;
+ }
}
spin_unlock(&timeout_lock);
if (kickit)
--
1.8.4
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2014-03-07 10:33 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-03-07 10:32 [PATCHv4 net-next 00/32] Misc. fixes for cxgb4 and iw_cxgb4 Hariprasad Shenai
2014-03-07 10:32 ` [PATCHv4 net-next 01/32] cxgb4: Fix some small bugs in t4_sge_init_soft() when our Page Size is 64KB Hariprasad Shenai
2014-03-07 10:32 ` [PATCHv4 net-next 02/32] cxgb4: Add code to dump SGE registers when hitting idma hangs Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 03/32] cxgb4: Rectify emitting messages about SGE Ingress DMA channels being potentially stuck Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 04/32] cxgb4: Updates for T5 SGE's Egress Congestion Threshold Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 06/32] cxgb4: Calculate len properly for LSO path Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 07/32] iw_cxgb4: cap CQ size at T4_MAX_IQ_SIZE Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 08/32] iw_cxgb4: Allow loopback connections Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 09/32] iw_cxgb4: release neigh entry in error paths Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 10/32] iw_cxgb4: Treat CPL_ERR_KEEPALV_NEG_ADVICE as negative advice Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 12/32] iw_cxgb4: use the BAR2/WC path for kernel QPs and T5 devices Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 13/32] iw_cxgb4: Fix incorrect BUG_ON conditions Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 14/32] iw_cxgb4: Mind the sq_sig_all/sq_sig_type QP attributes Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 15/32] iw_cxgb4: default peer2peer mode to 1 Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 17/32] iw_cxgb4: don't leak skb in c4iw_uld_rx_handler() Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 18/32] iw_cxgb4: fix possible memory leak in RX_PKT processing Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 19/32] iw_cxgb4: ignore read reponse type 1 CQEs Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 20/32] iw_cxgb4: connect_request_upcall fixes Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 21/32] iw_cxgb4: adjust tcp snd/rcv window based on link speed Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 22/32] iw_cxgb4: update snd_seq when sending MPA messages Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 23/32] iw_cxgb4: lock around accept/reject downcalls Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 24/32] iw_cxgb4: drop RX_DATA packets if the endpoint is gone Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 25/32] iw_cxgb4: rx_data() needs to hold the ep mutex Hariprasad Shenai
[not found] ` <1394188409-9739-1-git-send-email-hariprasad-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org>
2014-03-07 10:33 ` [PATCHv4 net-next 05/32] cxgb4: use spinlock_irqsave/spinlock_irqrestore for db lock Hariprasad Shenai
[not found] ` <1394188409-9739-6-git-send-email-hariprasad-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org>
2014-03-08 5:43 ` David Miller
2014-03-07 10:33 ` [PATCHv4 net-next 11/32] cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 16/32] iw_cxgb4: save the correct map length for fast_reg_page_lists Hariprasad Shenai
2014-03-07 10:33 ` Hariprasad Shenai [this message]
2014-03-07 10:33 ` [PATCHv4 net-next 27/32] iw_cxgb4: rmb() after reading valid gen bit Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 28/32] iw_cxgb4: wc_wmb() needed after DB writes Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 29/32] iw_cxgb4: SQ flush fix Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 30/32] iw_cxgb4: minor fixes Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 31/32] iw_cxgb4: Max fastreg depth depends on DSGL support Hariprasad Shenai
2014-03-07 10:33 ` [PATCHv4 net-next 32/32] iw_cxgb4: Use pr_warn_ratelimited Hariprasad Shenai
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1394188409-9739-27-git-send-email-hariprasad@chelsio.com \
--to=hariprasad-ut6up61k2wzbdgjk7y7tuq@public.gmane.org \
--cc=davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org \
--cc=dm-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org \
--cc=kumaras-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org \
--cc=leedom-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=nirranjan-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org \
--cc=roland-BHEL68pLQRGGvPXPguhicg@public.gmane.org \
--cc=santosh-ut6Up61K2wZBDgjK7y7TUQ@public.gmane.org \
--cc=swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).