All of lore.kernel.org
 help / color / mirror / Atom feed
From: Minlan Wang via lttng-dev <lttng-dev@lists.lttng.org>
To: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: lttng-dev <lttng-dev@lists.lttng.org>, paulmck <paulmck@kernel.org>
Subject: Re: [lttng-dev] urcu workqueue thread uses 99% of cpu while workqueue is empty
Date: Mon, 20 Jun 2022 23:52:06 -0400	[thread overview]
Message-ID: <20220621035206.GA267474@localhost.localdomain> (raw)
In-Reply-To: <1711122492.5944.1655473043420.JavaMail.zimbra@efficios.com>

[-- Attachment #1: Type: text/plain, Size: 8525 bytes --]

Hi, Mathieu,
On Fri, Jun 17, 2022 at 09:37:23AM -0400, Mathieu Desnoyers wrote:
> Can you try the following patch on top of v0.12.1 + your debug patch ?
> 
> diff --git a/src/workqueue.c b/src/workqueue.c
> index eed1e21e..19fc89c8 100644
> --- a/src/workqueue.c
> +++ b/src/workqueue.c
> @@ -142,6 +142,7 @@ static void futex_wait(int32_t *futex)
>  
>  		return;
>  	}
> +wait:
>  	while ((ret = futex_async(futex, FUTEX_WAIT, -1, NULL, NULL, 0))) {
>  		err = errno;
>  		fprintf(stderr, "%lu: wq %p: %s failed, errno %d\n",
> @@ -161,7 +162,16 @@ static void futex_wait(int32_t *futex)
>  
>  	fprintf(stderr, "%lu: wq %p: %s wait return %d\n",
>  		pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__, ret);
> -
> +	/*
> +	 * Prior queued wakeups can cause futex wait to return even
> +	 * though the futex value is still -1. If this happens, wait
> +	 * again.
> +	 */
> +	if (uatomic_read(futex) == -1) {
> +		fprintf(stderr, "%lu: wq %p: %s futex == -1, wait again\n",
> +			pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__);
> +		goto wait;
> +	}
>  }
>  
>  static void futex_wake_up(int32_t *futex)
> 
I tried this patch on top of my debug patch, the issue disappeared.
The log shows that "futex_wait futex == -1, wait again" did happen several times.
I put the complete log in the attachment.

> 
> We should not rely too much on the ordering of fprintf stderr output between threads,
> because those can be interleaved in any order (each fprintf to stderr is unbuffered).
> If we want more precision in the order of events I would recommend using LTTng-UST for
> fine-grained user-space tracing. This might explain why the futex has an unexpected value
> at line 2099.
I tried this patch on top of previous 2 patches to use lttng_ust_tracepoint in
workqueue.c, but always failed in compile example code with this error:
---
make[5]: Entering directory `/home/mlwang/debug/userspace-rcu/doc/examples/rculfqueue'
  CC       cds_lfq_enqueue.o
  CCLD     cds_lfq_enqueue
../../../src/.libs//liburcu-cds.so: undefined reference to `lttng_ust_tracepoint_provider_workqueue'
---
the patch is as following:

From 2fa8082914da55b6e3815dffd2f749798bf23735 Mon Sep 17 00:00:00 2001
From: root <root@localhost.localdomain>
Date: Tue, 21 Jun 2022 11:42:56 +0800
Subject: [PATCH] add lttng_ust_tracepoint in workqueue.c

---
 src/Makefile.am       |  4 +--
 src/liburcu-cds.pc.in |  2 +-
 src/workqueue.c       |  5 ++++
 src/workqueue.h       |  1 +
 src/workqueue_tp.c    |  4 +++
 src/workqueue_tp.h    | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 95 insertions(+), 3 deletions(-)
 create mode 100644 src/workqueue_tp.c
 create mode 100644 src/workqueue_tp.h

diff --git a/src/Makefile.am b/src/Makefile.am
index 88ccc1f..5a18e31 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,7 +2,7 @@ AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_builddir)/include -I$(top_srcdir)
 
 #Add the -version-info directly here since we are only building
 # library that use the version-info
-AM_LDFLAGS=-version-info $(URCU_LIBRARY_VERSION)
+AM_LDFLAGS=-version-info $(URCU_LIBRARY_VERSION) -llttng-ust -ldl
 if USE_CYGWIN
 AM_LDFLAGS+=-no-undefined
 endif
@@ -56,7 +56,7 @@ liburcu_bp_la_SOURCES = urcu-bp.c urcu-pointer.c $(COMPAT)
 liburcu_bp_la_LIBADD = liburcu-common.la
 
 liburcu_cds_la_SOURCES = rculfqueue.c rculfstack.c lfstack.c \
-	workqueue.c workqueue.h $(RCULFHASH) $(COMPAT)
+	workqueue.c workqueue.h workqueue_tp.c workqueue_tp.h $(RCULFHASH) $(COMPAT)
 liburcu_cds_la_LIBADD = liburcu-common.la
 
 pkgconfigdir = $(libdir)/pkgconfig
diff --git a/src/liburcu-cds.pc.in b/src/liburcu-cds.pc.in
index e3d13af..f4e5e3e 100644
--- a/src/liburcu-cds.pc.in
+++ b/src/liburcu-cds.pc.in
@@ -7,5 +7,5 @@ Name: Userspace RCU Concurrent Data Structures
 Description: Data structures leveraging RCU and atomic operations to provide efficient concurrency-aware storage
 Version: @PACKAGE_VERSION@
 Requires:
-Libs: -L${libdir} -lurcu-cds
+Libs: -L${libdir} -lurcu-cds -llttng-ust -ldl
 Cflags: -I${includedir} 
diff --git a/src/workqueue.c b/src/workqueue.c
index 19fc89c..c29c070 100644
--- a/src/workqueue.c
+++ b/src/workqueue.c
@@ -145,6 +145,7 @@ static void futex_wait(int32_t *futex)
 wait:
 	while ((ret = futex_async(futex, FUTEX_WAIT, -1, NULL, NULL, 0))) {
 		err = errno;
+		lttng_ust_tracepoint(workqueue, futex_wait_err, (long)futex, *futex, err);
 		fprintf(stderr, "%lu: wq %p: %s failed, errno %d\n",
 			pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__, err);
 		switch (err) {
@@ -160,6 +161,7 @@ wait:
 		}
 	}
 
+	lttng_ust_tracepoint(workqueue, futex_wait_return, (long)futex, *futex, ret);
 	fprintf(stderr, "%lu: wq %p: %s wait return %d\n",
 		pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__, ret);
 	/*
@@ -168,6 +170,7 @@ wait:
 	 * again.
 	 */
 	if (uatomic_read(futex) == -1) {
+		lttng_ust_tracepoint(workqueue, futex_wait_again, (long)futex, *futex);
 		fprintf(stderr, "%lu: wq %p: %s futex == -1, wait again\n",
 			pthread_self(), caa_container_of(futex, struct urcu_workqueue, futex), __func__);
 		goto wait;
@@ -184,11 +187,13 @@ static void futex_wake_up(int32_t *futex)
 	if (caa_unlikely(old == -1)) {
 		old = uatomic_xchg(futex, 0);
 		if (old == -1) {
+			lttng_ust_tracepoint(workqueue, futex_wake_up_success, (long)futex, old);
 			fprintf(stderr, "%lu: wq %p, wakeup succeed: old %d\n",
 				pthread_self(),
 				caa_container_of(futex, struct urcu_workqueue, futex),
 				old);
 		} else {
+			lttng_ust_tracepoint(workqueue, futex_wake_up_fail, (long)futex, old);
 			fprintf(stderr, "%lu: wq %p, wakeup failed: old %d\n",
 				pthread_self(),
 				caa_container_of(futex, struct urcu_workqueue, futex),
diff --git a/src/workqueue.h b/src/workqueue.h
index 52b6973..63931a4 100644
--- a/src/workqueue.h
+++ b/src/workqueue.h
@@ -28,6 +28,7 @@
 #include <pthread.h>
 
 #include <urcu/wfcqueue.h>
+#include "workqueue_tp.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/src/workqueue_tp.c b/src/workqueue_tp.c
new file mode 100644
index 0000000..a5f9586
--- /dev/null
+++ b/src/workqueue_tp.c
@@ -0,0 +1,4 @@
+#define LTTNG_UST_TRACEPOINT_CREATE_PROBES
+#define LTTNG_UST_TRACEPOINT_DEFINE
+
+#include "workqueue_tp.h"
diff --git a/src/workqueue_tp.h b/src/workqueue_tp.h
new file mode 100644
index 0000000..e2bce31
--- /dev/null
+++ b/src/workqueue_tp.h
@@ -0,0 +1,82 @@
+#undef	LTTNG_UST_TRACEPOINT_PROVIDER
+#define	LTTNG_UST_TRACEPOINT_PROVIVER workqueue
+
+#undef	LTTNG_UST_TRACEPOINT_INCLUDE
+#define	LTTNG_UST_TRACEPOINT_INCLUDE "./workqueue_tp.h"
+
+#if !defined(_WORKQUEUE_TP_H) || defined(LTTNG_USG_TRACEPOINT_HEADER_MULTI_READ)
+#define	_WORKQUEUE_TP_H
+
+#include <lttng/tracepoint.h>
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wait_err,
+	LTTNG_UST_TP_ARGS( 
+		long, futex_p_arg,
+		int, futex_arg,
+		int, err_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+		lttng_ust_field_integer(int, futex_field, futex_arg)
+		lttng_ust_field_integer(int, err_field, err_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wait_return,
+	LTTNG_UST_TP_ARGS( 
+		long, futex_p_arg,
+		int, futex_arg,
+		int, ret_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+		lttng_ust_field_integer(int, futex_field, futex_arg)
+		lttng_ust_field_integer(int, ret_field, ret_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wait_again,
+	LTTNG_UST_TP_ARGS( 
+		long, futex_p_arg,
+		int, futex_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+		lttng_ust_field_integer(int, futex_field, futex_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wake_up_success,
+	LTTNG_UST_TP_ARGS( 
+		long, futex_p_arg,
+		int, futex_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+		lttng_ust_field_integer(int, futex_field, futex_arg)
+	)
+)
+
+LTTNG_UST_TRACEPOINT_EVENT(
+	workqueue,
+	futex_wake_up_fail,
+	LTTNG_UST_TP_ARGS( 
+		long, futex_p_arg,
+		int, futex_arg
+	),
+	LTTNG_UST_TP_FIELDS(
+		lttng_ust_field_integer_hex(long, futex_p_field, futex_p_arg)
+		lttng_ust_field_integer(int, futex_field, futex_arg)
+	)
+)
+
+#endif /* _WORKQUEUE_TP_H */
+#include <lttng/tracepoint-event.h>
-- 
1.8.3.1

Is "perf probe" accurate in tracing the futex event here?  Maybe i can try that.

B.R
Minlan

[-- Attachment #2: log.tar.bz2 --]
[-- Type: application/x-bzip2, Size: 153355 bytes --]

[-- Attachment #3: Type: text/plain, Size: 156 bytes --]

_______________________________________________
lttng-dev mailing list
lttng-dev@lists.lttng.org
https://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev

  reply	other threads:[~2022-06-21  3:52 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-14  3:55 [lttng-dev] urcu workqueue thread uses 99% of cpu while workqueue is empty Minlan Wang via lttng-dev
2022-06-14 13:39 ` Mathieu Desnoyers via lttng-dev
2022-06-14 13:40   ` Mathieu Desnoyers via lttng-dev
2022-06-14 14:19 ` Mathieu Desnoyers via lttng-dev
2022-06-14 15:53 ` Mathieu Desnoyers via lttng-dev
2022-06-15  3:49   ` Minlan Wang via lttng-dev
2022-06-15 13:35     ` Mathieu Desnoyers via lttng-dev
2022-06-15 14:15     ` Mathieu Desnoyers via lttng-dev
2022-06-16  7:09       ` Minlan Wang via lttng-dev
2022-06-16  8:09       ` Minlan Wang via lttng-dev
2022-06-17 13:37         ` Mathieu Desnoyers via lttng-dev
2022-06-21  3:52           ` Minlan Wang via lttng-dev [this message]
2022-06-21 13:12             ` [lttng-dev] ***UNCHECKED*** " Mathieu Desnoyers via lttng-dev
2022-06-22  7:45               ` Minlan Wang via lttng-dev
2022-06-22 13:19                 ` [lttng-dev] ***UNCHECKED*** " Mathieu Desnoyers via lttng-dev
2022-06-22 20:28                   ` Mathieu Desnoyers via lttng-dev
2022-06-22 20:52                     ` Mathieu Desnoyers via lttng-dev
2022-06-23  9:08                       ` Minlan Wang via lttng-dev
     [not found]                   ` <20220623034528.GA271179@localhost.localdomain>
2022-06-23  3:57                     ` Minlan Wang via lttng-dev
2022-06-23 14:09                       ` Mathieu Desnoyers via lttng-dev
2022-06-24  6:21                         ` Minlan Wang via lttng-dev
2022-06-23  8:36                   ` [lttng-dev] [PART 4/4] " Minlan Wang via lttng-dev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220621035206.GA267474@localhost.localdomain \
    --to=lttng-dev@lists.lttng.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=paulmck@kernel.org \
    --cc=wangminlan@szsandstone.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.