All of lore.kernel.org
 help / color / mirror / Atom feed
From: Oleg Drokin <green@linuxhacker.ru>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	linux-kernel@vger.kernel.org, devel@driverdev.osuosl.org
Cc: Isaac Huang <he.huang@intel.com>, Oleg Drokin <oleg.drokin@intel.com>
Subject: [PATCH 18/18] staging/lustre/lnet: abort messages whose MD has been unlinked
Date: Sun, 22 Jun 2014 21:32:22 -0400	[thread overview]
Message-ID: <1403487142-4880-19-git-send-email-green@linuxhacker.ru> (raw)
In-Reply-To: <1403487142-4880-1-git-send-email-green@linuxhacker.ru>

From: Isaac Huang <he.huang@intel.com>

If LNetMDUnlink has been called, all outgoing messages
on that MD should be aborted before lnet_ni_send() is
called.

Signed-off-by: Isaac Huang <he.huang@intel.com>
Reviewed-on: http://review.whamcloud.com/8041
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-4006
Reviewed-by: Liang Zhen <liang.zhen@intel.com>
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Signed-off-by: Oleg Drokin <oleg.drokin@intel.com>
---
 .../staging/lustre/include/linux/lnet/lib-types.h  |  1 +
 drivers/staging/lustre/lnet/lnet/lib-md.c          | 10 ++---
 drivers/staging/lustre/lnet/lnet/lib-me.c          | 11 ++---
 drivers/staging/lustre/lnet/lnet/lib-move.c        | 49 +++++++++++++++-------
 4 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
index a63654b..6816aa0 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
@@ -280,6 +280,7 @@ typedef struct lnet_libmd {
 
 #define LNET_MD_FLAG_ZOMBIE	   (1 << 0)
 #define LNET_MD_FLAG_AUTO_UNLINK      (1 << 1)
+#define LNET_MD_FLAG_ABORTED	 (1 << 2)
 
 #ifdef LNET_USE_LIB_FREELIST
 typedef struct {
diff --git a/drivers/staging/lustre/lnet/lnet/lib-md.c b/drivers/staging/lustre/lnet/lnet/lib-md.c
index ae643f2..d68c6e0 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-md.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-md.c
@@ -387,7 +387,8 @@ EXPORT_SYMBOL(LNetMDBind);
 
 /**
  * Unlink the memory descriptor from any ME it may be linked to and release
- * the internal resources associated with it.
+ * the internal resources associated with it. As a result, active messages
+ * associated with the MD may get aborted.
  *
  * This function does not free the memory region associated with the MD;
  * i.e., the memory the user allocated for this MD. If the ME associated with
@@ -433,12 +434,11 @@ LNetMDUnlink (lnet_handle_md_t mdh)
 		return -ENOENT;
 	}
 
+	md->md_flags |= LNET_MD_FLAG_ABORTED;
 	/* If the MD is busy, lnet_md_unlink just marks it for deletion, and
-	 * when the NAL is done, the completion event flags that the MD was
+	 * when the LND is done, the completion event flags that the MD was
 	 * unlinked.  Otherwise, we enqueue an event now... */
-
-	if (md->md_eq != NULL &&
-	    md->md_refcount == 0) {
+	if (md->md_eq != NULL && md->md_refcount == 0) {
 		lnet_build_unlink_event(md, &ev);
 		lnet_eq_enqueue_event(md->md_eq, &ev);
 	}
diff --git a/drivers/staging/lustre/lnet/lnet/lib-me.c b/drivers/staging/lustre/lnet/lnet/lib-me.c
index 0081075..0e42209 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-me.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-me.c
@@ -246,11 +246,12 @@ LNetMEUnlink(lnet_handle_me_t meh)
 	}
 
 	md = me->me_md;
-	if (md != NULL &&
-	    md->md_eq != NULL &&
-	    md->md_refcount == 0) {
-		lnet_build_unlink_event(md, &ev);
-		lnet_eq_enqueue_event(md->md_eq, &ev);
+	if (md != NULL) {
+		md->md_flags |= LNET_MD_FLAG_ABORTED;
+		if (md->md_eq != NULL && md->md_refcount == 0) {
+			lnet_build_unlink_event(md, &ev);
+			lnet_eq_enqueue_event(md->md_eq, &ev);
+		}
 	}
 
 	lnet_me_unlink(me);
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c
index bbf43ae..95bf41f 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -773,26 +773,30 @@ lnet_peer_alive_locked(lnet_peer_t *lp)
 	return 0;
 }
 
-int
+/**
+ * \param msg The message to be sent.
+ * \param do_send True if lnet_ni_send() should be called in this function.
+ *	  lnet_send() is going to lnet_net_unlock immediately after this, so
+ *	  it sets do_send FALSE and I don't do the unlock/send/lock bit.
+ *
+ * \retval 0 If \a msg sent or OK to send.
+ * \retval EAGAIN If \a msg blocked for credit.
+ * \retval EHOSTUNREACH If the next hop of the message appears dead.
+ * \retval ECANCELED If the MD of the message has been unlinked.
+ */
+static int
 lnet_post_send_locked(lnet_msg_t *msg, int do_send)
 {
-	/* lnet_send is going to lnet_net_unlock immediately after this,
-	 * so it sets do_send FALSE and I don't do the unlock/send/lock bit.
-	 * I return EAGAIN if msg blocked, EHOSTUNREACH if msg_txpeer
-	 * appears dead, and 0 if sent or OK to send */
-	struct lnet_peer	*lp = msg->msg_txpeer;
-	struct lnet_ni		*ni = lp->lp_ni;
-	struct lnet_tx_queue	*tq;
-	int			cpt;
+	lnet_peer_t		*lp = msg->msg_txpeer;
+	lnet_ni_t		*ni = lp->lp_ni;
+	int			cpt = msg->msg_tx_cpt;
+	struct lnet_tx_queue	*tq = ni->ni_tx_queues[cpt];
 
 	/* non-lnet_send() callers have checked before */
 	LASSERT(!do_send || msg->msg_tx_delayed);
 	LASSERT(!msg->msg_receiving);
 	LASSERT(msg->msg_tx_committed);
 
-	cpt = msg->msg_tx_cpt;
-	tq = ni->ni_tx_queues[cpt];
-
 	/* NB 'lp' is always the next hop */
 	if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
 	    lnet_peer_alive_locked(lp) == 0) {
@@ -809,6 +813,20 @@ lnet_post_send_locked(lnet_msg_t *msg, int do_send)
 		return EHOSTUNREACH;
 	}
 
+	if (msg->msg_md != NULL &&
+	    (msg->msg_md->md_flags & LNET_MD_FLAG_ABORTED) != 0) {
+		lnet_net_unlock(cpt);
+
+		CNETERR("Aborting message for %s: LNetM[DE]Unlink() already "
+			"called on the MD/ME.\n",
+			libcfs_id2str(msg->msg_target));
+		if (do_send)
+			lnet_finalize(ni, msg, -ECANCELED);
+
+		lnet_net_lock(cpt);
+		return ECANCELED;
+	}
+
 	if (!msg->msg_peertxcredit) {
 		LASSERT((lp->lp_txcredits < 0) ==
 			 !list_empty(&lp->lp_txq));
@@ -1327,13 +1345,13 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
 	rc = lnet_post_send_locked(msg, 0);
 	lnet_net_unlock(cpt);
 
-	if (rc == EHOSTUNREACH)
-		return -EHOSTUNREACH;
+	if (rc == EHOSTUNREACH || rc == ECANCELED)
+		return -rc;
 
 	if (rc == 0)
 		lnet_ni_send(src_ni, msg);
 
-	return 0;
+	return 0; /* rc == 0 or EAGAIN */
 }
 
 static void
@@ -2288,7 +2306,6 @@ LNetGet(lnet_nid_t self, lnet_handle_md_t mdh,
 		lnet_res_unlock(cpt);
 
 		lnet_msg_free(msg);
-
 		return -ENOENT;
 	}
 
-- 
1.9.0


      parent reply	other threads:[~2014-06-23  1:33 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-23  1:32 [PATCH 00/18] Lustre fixes Oleg Drokin
2014-06-23  1:32 ` [PATCH 01/18] staging/lustre/libcfs: revert changes to libcfs_sock_ioctl Oleg Drokin
2014-06-23  1:32 ` [PATCH 02/18] staging/lustre/ptlrpc: Protect request buffer changing Oleg Drokin
2014-06-23  1:32 ` [PATCH 03/18] staging/lustre/llite: Only kill SGID/SUID bits Oleg Drokin
2014-06-23  1:32 ` [PATCH 04/18] staging/lustre: fix frong ldlm flags type used Oleg Drokin
2014-06-23  1:32 ` [PATCH 05/18] staging/lustre/ptlrpc: fix NULL pointer dereference of {exp,imp}_obd Oleg Drokin
2014-06-23  1:32 ` [PATCH 06/18] staging/lustre/mgc: mgc import reconnect race Oleg Drokin
2014-06-23  1:32 ` [PATCH 07/18] staging/lustre/osc: get rid of old checksum initial value Oleg Drokin
2014-06-23  1:32 ` [PATCH 08/18] staging/lustre/ptlrpc: race at req processing Oleg Drokin
2014-06-23  1:32 ` [PATCH 09/18] staging/lustre/mgc: replace hard-coded MGC_ENQUEUE_LIMIT value Oleg Drokin
2014-06-23  1:32 ` [PATCH 10/18] staging/lustre/ptlrpc: Add schedule point to ptlrpc_check_set() Oleg Drokin
2014-06-23  1:32 ` [PATCH 11/18] staging/lustre/obdclass: Fix uninitialized variables Oleg Drokin
2014-06-23  1:32 ` [PATCH 12/18] staging/lustre/osc: osc_extent_truncate()) ASSERTION( !ext->oe_urgent ) failed Oleg Drokin
2014-06-23  1:32 ` [PATCH 13/18] staging/lustre/llite: Fix uninitialized variable Oleg Drokin
2014-06-23  1:32 ` [PATCH 14/18] staging/lustre/ptlrpc: unlink request buffer correctly Oleg Drokin
2014-06-23  1:32 ` [PATCH 15/18] staging/lustre/obdclass: runtime load lustre client when needed Oleg Drokin
2014-06-23  1:32 ` [PATCH 16/18] staging/lustre/vvp: release mmap_sem in error case Oleg Drokin
2014-06-23  1:32 ` [PATCH 17/18] staging/lustre/llite: fix a flag bug of vvp_io_kernel_fault() Oleg Drokin
2014-06-23  1:32 ` Oleg Drokin [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1403487142-4880-19-git-send-email-green@linuxhacker.ru \
    --to=green@linuxhacker.ru \
    --cc=devel@driverdev.osuosl.org \
    --cc=gregkh@linuxfoundation.org \
    --cc=he.huang@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=oleg.drokin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.