From: Jason Gunthorpe <jgg-uk2M96/98Pc@public.gmane.org>
To: Honggang LI <honli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Cc: Bart Van Assche <Bart.VanAssche-Sjgp3cTcYWE@public.gmane.org>,
"linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org"
<linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Subject: Re: [PATCH rdma-core] srp_daemon: Install signal handler for ibsrpdm
Date: Fri, 15 Dec 2017 10:28:00 -0700 [thread overview]
Message-ID: <20171215172800.GA12434@ziepe.ca> (raw)
In-Reply-To: <20171215013628.GA743-Y5OA6DF/u0nid9cnFhDO8BcY2uh10dtjAL8bYrjMMd8@public.gmane.org>
On Fri, Dec 15, 2017 at 09:36:28AM +0800, Honggang LI wrote:
> On Thu, Dec 14, 2017 at 02:59:34PM +0000, Bart Van Assche wrote:
> > On Thu, 2017-12-14 at 19:02 +0800, Honggang LI wrote:
> > > fd3005f0cd34 moves the signal handler setup from ibsrpdm path. So,
> > > default signal handler will be used when it receives signal SIGINT.
> > > ibsrpdm will exit with non-zero exit code as default signal handler
> > > killed it.
> >
> > Can you explain why you think that ibsrpdm needs a signal handler?
>
> main
> ibsrpdm
> alloc_res
> pthread_create(&res->res.async_ev_thread [1]
> ....
> free_res
> if (res->async_ev_thread) { pthread_kill(res->async_ev_thread, SIGINT); [2]
>
>
> The ibsrpdm program create a thread in [1], and send a SIGINT in [2].
> The default behavior of SIGINT is to terminate the process.
Yuk, no, using signals like this is horrifyingly buggy.
Here is a sketch on how to fix it properly. All the users of
pthread_kill should be eliminated.
Though overall, there is really no reason to even cleanup the threads,
just call exit?
diff --git a/srp_daemon/srp_daemon.c b/srp_daemon/srp_daemon.c
index cec36db2e0f12e..fe307705ad6a51 100644
--- a/srp_daemon/srp_daemon.c
+++ b/srp_daemon/srp_daemon.c
@@ -61,6 +61,7 @@
#include <string.h>
#include <signal.h>
#include <sys/syslog.h>
+#include <sys/eventfd.h>
#include <infiniband/umad.h>
#include <infiniband/umad_types.h>
#include <infiniband/umad_sa.h>
@@ -1887,7 +1888,9 @@ static void free_res(struct resources *res)
modify_qp_to_err(res->ud_res->qp);
if (res->reconnect_thread) {
- pthread_kill(res->reconnect_thread, SIGINT);
+ uint64_t val = 1;
+
+ write(res->sync_res->stop_event_fd, &val, sizeof(val));
pthread_join(res->reconnect_thread, &status);
}
if (res->async_ev_thread) {
@@ -1947,6 +1950,7 @@ static struct resources *alloc_res(void)
goto err;
}
+ res->sync_res.stop_event_fd = eventfd(0, EFD_CLOEXEC);
ret = pthread_create(&res->res.async_ev_thread, NULL,
run_thread_listen_to_events, &res->res);
if (ret)
diff --git a/srp_daemon/srp_daemon.h b/srp_daemon/srp_daemon.h
index 5d268ed395e17e..8cbee09d7c688b 100644
--- a/srp_daemon/srp_daemon.h
+++ b/srp_daemon/srp_daemon.h
@@ -245,6 +245,7 @@ enum {
};
struct sync_resources {
+ int stop_event_fd;
int stop_threads;
int next_task;
struct timespec next_recalc_time;
diff --git a/srp_daemon/srp_handle_traps.c b/srp_daemon/srp_handle_traps.c
index 6b36b15cc84c16..cedc2b0ab8e5af 100644
--- a/srp_daemon/srp_handle_traps.c
+++ b/srp_daemon/srp_handle_traps.c
@@ -44,6 +44,7 @@
#include <infiniband/verbs.h>
#include <infiniband/umad_sa.h>
#include <infiniband/umad_sm.h>
+#include <poll.h>
#include "srp_ib_types.h"
@@ -788,69 +789,94 @@ int register_to_traps(struct resources *res, int subscribe)
}
-void *run_thread_listen_to_events(void *res_in)
+static int do_async_event(struct resources *res)
{
- struct resources *res = (struct resources *)res_in;
struct ibv_async_event event;
- while (!stop_threads(res->sync_res)) {
- if (ibv_get_async_event(res->ud_res->ib_ctx, &event)) {
- if (errno != EINTR)
- pr_err("ibv_get_async_event failed (errno = %d)\n",
- errno);
- break;
+ if (ibv_get_async_event(res->ud_res->ib_ctx, &event)) {
+ if (errno != EINTR)
+ pr_err("ibv_get_async_event failed (errno = %d)\n",
+ errno);
+ return -1;
+ }
+
+ pr_debug("event_type %d, port %d\n", event.event_type,
+ event.element.port_num);
+
+ switch (event.event_type) {
+ case IBV_EVENT_PORT_ACTIVE:
+ case IBV_EVENT_SM_CHANGE:
+ case IBV_EVENT_LID_CHANGE:
+ case IBV_EVENT_CLIENT_REREGISTER:
+ case IBV_EVENT_PKEY_CHANGE:
+ if (event.element.port_num == config->port_num) {
+ pthread_mutex_lock(&res->sync_res->mutex);
+ __schedule_rescan(res->sync_res, 0);
+ wake_up_main_loop(0);
+ pthread_mutex_unlock(&res->sync_res->mutex);
}
+ break;
- pr_debug("event_type %d, port %d\n",
- event.event_type, event.element.port_num);
-
- switch (event.event_type) {
- case IBV_EVENT_PORT_ACTIVE:
- case IBV_EVENT_SM_CHANGE:
- case IBV_EVENT_LID_CHANGE:
- case IBV_EVENT_CLIENT_REREGISTER:
- case IBV_EVENT_PKEY_CHANGE:
- if (event.element.port_num == config->port_num) {
- pthread_mutex_lock(&res->sync_res->mutex);
- __schedule_rescan(res->sync_res, 0);
- wake_up_main_loop(0);
- pthread_mutex_unlock(&res->sync_res->mutex);
- }
- break;
-
- case IBV_EVENT_DEVICE_FATAL:
- case IBV_EVENT_CQ_ERR:
- case IBV_EVENT_QP_FATAL:
- /* clean and restart */
- pr_err("Critical event %d, raising catastrophic "
- "error signal\n", event.event_type);
- raise(SRP_CATAS_ERR);
- break;
+ case IBV_EVENT_DEVICE_FATAL:
+ case IBV_EVENT_CQ_ERR:
+ case IBV_EVENT_QP_FATAL:
+ /* clean and restart */
+ pr_err("Critical event %d, raising catastrophic "
+ "error signal\n",
+ event.event_type);
+ raise(SRP_CATAS_ERR);
+ break;
- /*
+ /*
- case IBV_EVENT_PORT_ERR:
- case IBV_EVENT_QP_REQ_ERR:
- case IBV_EVENT_QP_ACCESS_ERR:
- case IBV_EVENT_COMM_EST:
- case IBV_EVENT_SQ_DRAINED:
- case IBV_EVENT_PATH_MIG:
- case IBV_EVENT_PATH_MIG_ERR:
- case IBV_EVENT_SRQ_ERR:
- case IBV_EVENT_SRQ_LIMIT_REACHED:
- case IBV_EVENT_QP_LAST_WQE_REACHED:
+ case IBV_EVENT_PORT_ERR:
+ case IBV_EVENT_QP_REQ_ERR:
+ case IBV_EVENT_QP_ACCESS_ERR:
+ case IBV_EVENT_COMM_EST:
+ case IBV_EVENT_SQ_DRAINED:
+ case IBV_EVENT_PATH_MIG:
+ case IBV_EVENT_PATH_MIG_ERR:
+ case IBV_EVENT_SRQ_ERR:
+ case IBV_EVENT_SRQ_LIMIT_REACHED:
+ case IBV_EVENT_QP_LAST_WQE_REACHED:
- */
+ */
- default:
+ default:
+ break;
+ }
+
+ ibv_ack_async_event(&event);
+ return 0;
+}
+
+void *run_thread_listen_to_events(void *res_in)
+{
+ struct pollfd fds[2];
+ struct resources *res = (struct resources *)res_in;
+
+ fds[0].fd = res->ud_res->ib_ctx->async_fd;
+ fds[0].events = POLLIN;
+ fds[1].fd = res->sync_res->stop_event_fd;
+ fds[1].events = POLLIN;
+
+ while (1) {
+ if (poll(fds, 2, -1) == -1) {
+ if (errno == EINTR)
+ continue;
+ pr_err("ibv_get_async_event failed (errno = %d)\n",
+ errno);
break;
}
- ibv_ack_async_event(&event);
+ if (fds[1].revents & POLLIN)
+ break;
+ if (fds[0].revents & POLLIN)
+ if (do_async_event(res))
+ break;
}
return NULL;
}
-
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2017-12-15 17:28 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-14 11:02 [PATCH rdma-core] srp_daemon: Install signal handler for ibsrpdm Honggang LI
[not found] ` <20171214110241.4701-1-honli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2017-12-14 14:59 ` Bart Van Assche
[not found] ` <1513263572.2986.2.camel-Sjgp3cTcYWE@public.gmane.org>
2017-12-15 1:36 ` Honggang LI
[not found] ` <20171215013628.GA743-Y5OA6DF/u0nid9cnFhDO8BcY2uh10dtjAL8bYrjMMd8@public.gmane.org>
2017-12-15 17:28 ` Jason Gunthorpe [this message]
[not found] ` <20171215172800.GA12434-uk2M96/98Pc@public.gmane.org>
2017-12-15 17:50 ` Bart Van Assche
[not found] ` <1513360253.2571.23.camel-Sjgp3cTcYWE@public.gmane.org>
2017-12-15 17:59 ` Jason Gunthorpe
2017-12-19 12:20 ` Honggang LI
[not found] ` <20171219122050.GA19682-Y5OA6DF/u0nid9cnFhDO8BcY2uh10dtjAL8bYrjMMd8@public.gmane.org>
2017-12-19 16:57 ` Bart Van Assche
[not found] ` <1513702653.2535.3.camel-Sjgp3cTcYWE@public.gmane.org>
2017-12-19 19:12 ` Honggang LI
2017-12-19 21:13 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171215172800.GA12434@ziepe.ca \
--to=jgg-uk2m96/98pc@public.gmane.org \
--cc=Bart.VanAssche-Sjgp3cTcYWE@public.gmane.org \
--cc=honli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox