public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg-uk2M96/98Pc@public.gmane.org>
To: Honggang LI <honli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Cc: Bart Van Assche <Bart.VanAssche-Sjgp3cTcYWE@public.gmane.org>,
	"linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org"
	<linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Subject: Re: [PATCH rdma-core] srp_daemon: Install signal handler for ibsrpdm
Date: Fri, 15 Dec 2017 10:28:00 -0700	[thread overview]
Message-ID: <20171215172800.GA12434@ziepe.ca> (raw)
In-Reply-To: <20171215013628.GA743-Y5OA6DF/u0nid9cnFhDO8BcY2uh10dtjAL8bYrjMMd8@public.gmane.org>

On Fri, Dec 15, 2017 at 09:36:28AM +0800, Honggang LI wrote:
> On Thu, Dec 14, 2017 at 02:59:34PM +0000, Bart Van Assche wrote:
> > On Thu, 2017-12-14 at 19:02 +0800, Honggang LI wrote:
> > > fd3005f0cd34 moves the signal handler setup from ibsrpdm path. So,
> > > default signal handler will be used when it receives signal SIGINT.
> > > ibsrpdm will exit with non-zero exit code as default signal handler
> > > killed it.
> > 
> > Can you explain why you think that ibsrpdm needs a signal handler?
> 
> main
>   ibsrpdm
>     alloc_res
>       pthread_create(&res->res.async_ev_thread [1]
>     ....
>     free_res
>       if (res->async_ev_thread) { pthread_kill(res->async_ev_thread, SIGINT); [2]
> 
> 
> The ibsrpdm program create a thread in [1], and send a SIGINT in [2].
> The default behavior of SIGINT is to terminate the process.

Yuk, no, using signals like this is horrifyingly buggy.

Here is a sketch on how to fix it properly. All the users of
pthread_kill should be eliminated.

Though overall, there is really no reason to even cleanup the threads,
just call exit?

diff --git a/srp_daemon/srp_daemon.c b/srp_daemon/srp_daemon.c
index cec36db2e0f12e..fe307705ad6a51 100644
--- a/srp_daemon/srp_daemon.c
+++ b/srp_daemon/srp_daemon.c
@@ -61,6 +61,7 @@
 #include <string.h>
 #include <signal.h>
 #include <sys/syslog.h>
+#include <sys/eventfd.h>
 #include <infiniband/umad.h>
 #include <infiniband/umad_types.h>
 #include <infiniband/umad_sa.h>
@@ -1887,7 +1888,9 @@ static void free_res(struct resources *res)
 		modify_qp_to_err(res->ud_res->qp);
 
 	if (res->reconnect_thread) {
-		pthread_kill(res->reconnect_thread, SIGINT);
+		uint64_t val = 1;
+
+		write(res->sync_res->stop_event_fd, &val, sizeof(val));
 		pthread_join(res->reconnect_thread, &status);
 	}
 	if (res->async_ev_thread) {
@@ -1947,6 +1950,7 @@ static struct resources *alloc_res(void)
 			goto err;
 	}
 
+	res->sync_res.stop_event_fd = eventfd(0, EFD_CLOEXEC);
 	ret = pthread_create(&res->res.async_ev_thread, NULL,
 			     run_thread_listen_to_events, &res->res);
 	if (ret)
diff --git a/srp_daemon/srp_daemon.h b/srp_daemon/srp_daemon.h
index 5d268ed395e17e..8cbee09d7c688b 100644
--- a/srp_daemon/srp_daemon.h
+++ b/srp_daemon/srp_daemon.h
@@ -245,6 +245,7 @@ enum {
 };
 
 struct sync_resources {
+	int stop_event_fd;
 	int stop_threads;
 	int next_task;
 	struct timespec next_recalc_time;
diff --git a/srp_daemon/srp_handle_traps.c b/srp_daemon/srp_handle_traps.c
index 6b36b15cc84c16..cedc2b0ab8e5af 100644
--- a/srp_daemon/srp_handle_traps.c
+++ b/srp_daemon/srp_handle_traps.c
@@ -44,6 +44,7 @@
 #include <infiniband/verbs.h>
 #include <infiniband/umad_sa.h>
 #include <infiniband/umad_sm.h>
+#include <poll.h>
 
 #include "srp_ib_types.h"
 
@@ -788,69 +789,94 @@ int register_to_traps(struct resources *res, int subscribe)
 
 }
 
-void *run_thread_listen_to_events(void *res_in)
+static int do_async_event(struct resources *res)
 {
-	struct resources *res = (struct resources *)res_in;
 	struct ibv_async_event event;
 
-	while (!stop_threads(res->sync_res)) {
-		if (ibv_get_async_event(res->ud_res->ib_ctx, &event)) {
-			if (errno != EINTR)
-				pr_err("ibv_get_async_event failed (errno = %d)\n",
-				       errno);
-			break;
+	if (ibv_get_async_event(res->ud_res->ib_ctx, &event)) {
+		if (errno != EINTR)
+			pr_err("ibv_get_async_event failed (errno = %d)\n",
+			       errno);
+		return -1;
+	}
+
+	pr_debug("event_type %d, port %d\n", event.event_type,
+		 event.element.port_num);
+
+	switch (event.event_type) {
+	case IBV_EVENT_PORT_ACTIVE:
+	case IBV_EVENT_SM_CHANGE:
+	case IBV_EVENT_LID_CHANGE:
+	case IBV_EVENT_CLIENT_REREGISTER:
+	case IBV_EVENT_PKEY_CHANGE:
+		if (event.element.port_num == config->port_num) {
+			pthread_mutex_lock(&res->sync_res->mutex);
+			__schedule_rescan(res->sync_res, 0);
+			wake_up_main_loop(0);
+			pthread_mutex_unlock(&res->sync_res->mutex);
 		}
+		break;
 
-		pr_debug("event_type %d, port %d\n",
-			 event.event_type, event.element.port_num);
-
-		switch (event.event_type) {
-		case IBV_EVENT_PORT_ACTIVE:
-		case IBV_EVENT_SM_CHANGE:
-		case IBV_EVENT_LID_CHANGE:
-		case IBV_EVENT_CLIENT_REREGISTER:
-		case IBV_EVENT_PKEY_CHANGE:
-			if (event.element.port_num == config->port_num) {
-				pthread_mutex_lock(&res->sync_res->mutex);
-				__schedule_rescan(res->sync_res, 0);
-				wake_up_main_loop(0);
-				pthread_mutex_unlock(&res->sync_res->mutex);
-			}
-		  	break;
-
-		case IBV_EVENT_DEVICE_FATAL:
-		case IBV_EVENT_CQ_ERR:
-		case IBV_EVENT_QP_FATAL:
-		  /* clean and restart */
-			pr_err("Critical event %d, raising catastrophic "
-			       "error signal\n", event.event_type);
-			raise(SRP_CATAS_ERR);
-			break;
+	case IBV_EVENT_DEVICE_FATAL:
+	case IBV_EVENT_CQ_ERR:
+	case IBV_EVENT_QP_FATAL:
+		/* clean and restart */
+		pr_err("Critical event %d, raising catastrophic "
+		       "error signal\n",
+		       event.event_type);
+		raise(SRP_CATAS_ERR);
+		break;
 
- 	      	 /*
+		/*
 
-		case IBV_EVENT_PORT_ERR:
-		case IBV_EVENT_QP_REQ_ERR:
-		case IBV_EVENT_QP_ACCESS_ERR:
-		case IBV_EVENT_COMM_EST:
-		case IBV_EVENT_SQ_DRAINED:
-		case IBV_EVENT_PATH_MIG:
-		case IBV_EVENT_PATH_MIG_ERR:
-		case IBV_EVENT_SRQ_ERR:
-		case IBV_EVENT_SRQ_LIMIT_REACHED:
-		case IBV_EVENT_QP_LAST_WQE_REACHED:
+	       case IBV_EVENT_PORT_ERR:
+	       case IBV_EVENT_QP_REQ_ERR:
+	       case IBV_EVENT_QP_ACCESS_ERR:
+	       case IBV_EVENT_COMM_EST:
+	       case IBV_EVENT_SQ_DRAINED:
+	       case IBV_EVENT_PATH_MIG:
+	       case IBV_EVENT_PATH_MIG_ERR:
+	       case IBV_EVENT_SRQ_ERR:
+	       case IBV_EVENT_SRQ_LIMIT_REACHED:
+	       case IBV_EVENT_QP_LAST_WQE_REACHED:
 
-		*/
+	       */
 
 
-		default:
+	default:
+		break;
+	}
+
+	ibv_ack_async_event(&event);
+	return 0;
+}
+
+void *run_thread_listen_to_events(void *res_in)
+{
+	struct pollfd fds[2];
+	struct resources *res = (struct resources *)res_in;
+
+	fds[0].fd = res->ud_res->ib_ctx->async_fd;
+	fds[0].events = POLLIN;
+	fds[1].fd = res->sync_res->stop_event_fd;
+	fds[1].events = POLLIN;
+
+	while (1) {
+		if (poll(fds, 2, -1) == -1) {
+			if (errno == EINTR)
+				continue;
+			pr_err("ibv_get_async_event failed (errno = %d)\n",
+			       errno);
 			break;
 		}
 
-		ibv_ack_async_event(&event);
+		if (fds[1].revents & POLLIN)
+			break;
 
+		if (fds[0].revents & POLLIN)
+			if (do_async_event(res))
+				break;
 	}
 
 	return NULL;
 }
-
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  parent reply	other threads:[~2017-12-15 17:28 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-14 11:02 [PATCH rdma-core] srp_daemon: Install signal handler for ibsrpdm Honggang LI
     [not found] ` <20171214110241.4701-1-honli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-12-14 14:59   ` Bart Van Assche
     [not found]     ` <1513263572.2986.2.camel-Sjgp3cTcYWE@public.gmane.org>
2017-12-15  1:36       ` Honggang LI
     [not found]         ` <20171215013628.GA743-Y5OA6DF/u0nid9cnFhDO8BcY2uh10dtjAL8bYrjMMd8@public.gmane.org>
2017-12-15 17:28           ` Jason Gunthorpe [this message]
     [not found]             ` <20171215172800.GA12434-uk2M96/98Pc@public.gmane.org>
2017-12-15 17:50               ` Bart Van Assche
     [not found]                 ` <1513360253.2571.23.camel-Sjgp3cTcYWE@public.gmane.org>
2017-12-15 17:59                   ` Jason Gunthorpe
2017-12-19 12:20               ` Honggang LI
     [not found]                 ` <20171219122050.GA19682-Y5OA6DF/u0nid9cnFhDO8BcY2uh10dtjAL8bYrjMMd8@public.gmane.org>
2017-12-19 16:57                   ` Bart Van Assche
     [not found]                     ` <1513702653.2535.3.camel-Sjgp3cTcYWE@public.gmane.org>
2017-12-19 19:12                       ` Honggang LI
2017-12-19 21:13                   ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171215172800.GA12434@ziepe.ca \
    --to=jgg-uk2m96/98pc@public.gmane.org \
    --cc=Bart.VanAssche-Sjgp3cTcYWE@public.gmane.org \
    --cc=honli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox