* [PATCH] rcu_nest: Update description of rcu_nest.[hc]
@ 2019-05-29 4:47 Junchang Wang
2019-05-29 13:30 ` Paul E. McKenney
0 siblings, 1 reply; 2+ messages in thread
From: Junchang Wang @ 2019-05-29 4:47 UTC (permalink / raw)
To: paulmck, akiyks; +Cc: perfbook, Junchang Wang
Commit 61e0eb9cdb89 ("rcu_nest: Fix concurrency issues") fixes a few
concurrency issues in rcu_nest.h and rcu_nest.c. This patch accordingly
updates the description in Appendix B Toy RCU Implementations. Besides, the
new scheme, which directly extracts snippet from rcu_nest.[hc], is applied.
Signed-off-by: Junchang Wang <junchangwang@gmail.com>
---
Thanks again for writing and sharing perfbook. I can always find great
pleasure in reading through a chapter of this book :-).
CodeSamples/defer/rcu_nest.c | 35 ++++++++------
CodeSamples/defer/rcu_nest.h | 36 +++++++++------
appendix/toyrcu/toyrcu.tex | 108 +++++++++++++++----------------------------
3 files changed, 81 insertions(+), 98 deletions(-)
diff --git a/CodeSamples/defer/rcu_nest.c b/CodeSamples/defer/rcu_nest.c
index 362f466..7e36866 100644
--- a/CodeSamples/defer/rcu_nest.c
+++ b/CodeSamples/defer/rcu_nest.c
@@ -21,45 +21,52 @@
#include "../api.h"
#include "rcu_nest.h"
-void synchronize_rcu(void)
+//\begin{snippet}[labelbase=ln:defer:rcu_nest:synchronize,gobbleblank=yes,commandchars=\%\@\$]
+void synchronize_rcu(void) //\lnlbl{syn:b}
{
int t;
-
+ //\fcvblank
/* Memory barrier ensures mutation seen before grace period. */
- smp_mb();
+ smp_mb(); //\lnlbl{syn:mb1}
/* Only one synchronize_rcu() at a time. */
- spin_lock(&rcu_gp_lock);
+ spin_lock(&rcu_gp_lock); //\lnlbl{syn:spinlock}
/* Advance to a new grace-period number, enforce ordering. */
- WRITE_ONCE(rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR_BOTTOM_BIT);
- smp_mb();
+ WRITE_ONCE(rcu_gp_ctr, rcu_gp_ctr + //\lnlbl{syn:incgp1}
+ RCU_GP_CTR_BOTTOM_BIT); //\lnlbl{syn:incgp2}
+ smp_mb(); //\lnlbl{syn:mb2}
/*
* Wait until all threads are either out of their RCU read-side
* critical sections or are aware of the new grace period.
*/
- for_each_thread(t) {
- while (rcu_gp_ongoing(t) &&
- ((READ_ONCE(per_thread(rcu_reader_gp, t)) -
- rcu_gp_ctr) < 0)) {
+ for_each_thread(t) { //\lnlbl{syn:scan:b}
+ while (rcu_gp_ongoing(t) && //\lnlbl{syn:ongoing}
+ ((READ_ONCE(per_thread(rcu_reader_gp, t)) -//\lnlbl{syn:lt1}
+ rcu_gp_ctr) < 0)) { //\lnlbl{syn:lt2}
+#ifndef FCV_SNIPPET
/*@@@ poll(NULL, 0, 10); */
barrier();
+#else
+ poll(NULL, 0, 10); //\lnlbl{syn:poll}
+#endif
}
- }
+ } //\lnlbl{syn:scan:e}
/* Let other synchronize_rcu() instances move ahead. */
- spin_unlock(&rcu_gp_lock);
+ spin_unlock(&rcu_gp_lock); //\lnlbl{syn:spinunlock}
/* Ensure that any subsequent free()s happen -after- above checks. */
- smp_mb();
-}
+ smp_mb(); //\lnlbl{syn:mb3}
+} //\lnlbl{syn:e}
+//\end{snippet}
#ifdef TEST
#define RCU_READ_NESTABLE
diff --git a/CodeSamples/defer/rcu_nest.h b/CodeSamples/defer/rcu_nest.h
index 7e7b7de..19a0956 100644
--- a/CodeSamples/defer/rcu_nest.h
+++ b/CodeSamples/defer/rcu_nest.h
@@ -42,25 +42,28 @@ static void rcu_init(void)
init_per_thread(rcu_reader_gp, 0);
}
-static void rcu_read_lock(void)
+//\begin{snippet}[labelbase=ln:defer:rcu_nest:read_lock_unlock,gobbleblank=yes,commandchars=\%\@\$]
+static void rcu_read_lock(void) //\lnlbl{lock:b}
{
unsigned long tmp;
unsigned long *rrgp;
-
+ //\fcvblank
/*
* If this is the outermost RCU read-side critical section,
* copy the global grace-period counter. In either case,
* increment the nesting count held in the low-order bits.
*/
- rrgp = &__get_thread_var(rcu_reader_gp);
+ rrgp = &__get_thread_var(rcu_reader_gp); //\lnlbl{lock:readgp}
+#ifndef FCV_SNIPPET
retry:
- tmp = *rrgp;
- if ((tmp & RCU_GP_CTR_NEST_MASK) == 0)
- tmp = READ_ONCE(rcu_gp_ctr);
- tmp++;
- WRITE_ONCE(*rrgp, tmp);
- smp_mb();
+#endif
+ tmp = *rrgp; //\lnlbl{lock:wtmp1}
+ if ((tmp & RCU_GP_CTR_NEST_MASK) == 0) //\lnlbl{lock:checktmp}
+ tmp = READ_ONCE(rcu_gp_ctr); //\lnlbl{lock:wtmp2}
+ tmp++; //\lnlbl{lock:inctmp}
+ WRITE_ONCE(*rrgp, tmp); //\lnlbl{lock:writegp}
+ smp_mb(); //\lnlbl{lock:mb1}
/*
* A reader could be suspended in between fetching the value of *rrgp
@@ -71,14 +74,15 @@ retry:
* ULONG_MAX. To handle this correctly, we adopt the helper function
* ULONG_CMP_GE.
*/
-
+#ifndef FCV_SNIPPET
if (((tmp & RCU_GP_CTR_NEST_MASK) == 1) &&
ULONG_CMP_GE(READ_ONCE(rcu_gp_ctr), tmp + MAX_GP_ADV_DISTANCE)) {
WRITE_ONCE(*rrgp, *rrgp - 1);
goto retry;
}
+#endif
}
-
+ //\fcvblank
static void rcu_read_unlock(void)
{
/*
@@ -86,11 +90,15 @@ static void rcu_read_unlock(void)
* which had better not initially be zero.
*/
- smp_mb();
+ smp_mb(); //\lnlbl{unlock:mb1}
+#ifndef FCV_SNIPPET
#ifdef DEBUG_EXTREME
BUG_ON((__get_thread_var(rcu_reader_gp) & RCU_GP_CTR_NEST_MASK) != 0);
-#endif /* #ifdef DEBUG_EXTREME */
- __get_thread_var(rcu_reader_gp)--;
+#endif /* #ifdef DEBUG_EXTREME */ //\fcvexclude
+#endif
+ __get_thread_var(rcu_reader_gp)--; //\lnlbl{unlock:decgp}
}
+ //\fcvblank
+//\end{snippet}
extern void synchronize_rcu(void);
diff --git a/appendix/toyrcu/toyrcu.tex b/appendix/toyrcu/toyrcu.tex
index ded754d..0e84b05 100644
--- a/appendix/toyrcu/toyrcu.tex
+++ b/appendix/toyrcu/toyrcu.tex
@@ -1395,60 +1395,15 @@ variables.
\end{listing}
\begin{listing}[tb]
-{ \scriptsize
-\begin{verbbox}
- 1 static void rcu_read_lock(void)
- 2 {
- 3 long tmp;
- 4 long *rrgp;
- 5
- 6 rrgp = &__get_thread_var(rcu_reader_gp);
- 7 tmp = *rrgp;
- 8 if ((tmp & RCU_GP_CTR_NEST_MASK) == 0)
- 9 tmp = ACCESS_ONCE(rcu_gp_ctr);
-10 tmp++;
-11 *rrgp = tmp;
-12 smp_mb();
-13 }
-14
-15 static void rcu_read_unlock(void)
-16 {
-17 long tmp;
-18
-19 smp_mb();
-20 __get_thread_var(rcu_reader_gp)--;
-21 }
-22
-23 void synchronize_rcu(void)
-24 {
-25 int t;
-26
-27 smp_mb();
-28 spin_lock(&rcu_gp_lock);
-29 ACCESS_ONCE(rcu_gp_ctr) +=
-30 RCU_GP_CTR_BOTTOM_BIT;
-31 smp_mb();
-32 for_each_thread(t) {
-33 while (rcu_gp_ongoing(t) &&
-34 ((per_thread(rcu_reader_gp, t) -
-35 rcu_gp_ctr) < 0)) {
-36 poll(NULL, 0, 10);
-37 }
-38 }
-39 spin_unlock(&rcu_gp_lock);
-40 smp_mb();
-41 }
-\end{verbbox}
-}
-\centering
-\theverbbox
+\input{CodeSamples/defer/rcu_nest@read_lock_unlock.fcv}\vspace*{-11pt}\fvset{firstnumber=last}
+\input{CodeSamples/defer/rcu_nest@synchronize.fcv}\fvset{firstnumber=auto}
\caption{Nestable RCU Using a Free-Running Counter}
\label{lst:app:toyrcu:Nestable RCU Using a Free-Running Counter}
\end{listing}
Listing~\ref{lst:app:toyrcu:Nestable RCU Using a Free-Running Counter}
(\path{rcu_nest.h} and \path{rcu_nest.c})
-show an RCU implementation based on a single global free-running counter,
+shows an RCU implementation based on a single global free-running counter,
but that permits nesting of RCU read-side critical sections.
This nestability is accomplished by reserving the low-order bits of the
global \co{rcu_gp_ctr} to count nesting, using the definitions shown in
@@ -1472,23 +1427,28 @@ reserves seven bits, for a maximum RCU read-side critical-section
nesting depth of 127, which should be well in excess of that needed
by most applications.
+\begin{lineref}[ln:defer:rcu_nest:read_lock_unlock:lock]
The resulting \co{rcu_read_lock()} implementation is still reasonably
straightforward.
-Line~6 places a pointer to this thread's instance of \co{rcu_reader_gp}
+Line~\lnref{readgp} places a pointer to
+this thread's instance of \co{rcu_reader_gp}
into the local variable \co{rrgp}, minimizing the number of expensive
calls to the pthreads thread-local-state API.
-Line~7 records the current value of \co{rcu_reader_gp} into another
-local variable \co{tmp}, and line~8 checks to see if the low-order
-bits are zero, which would indicate that this is the outermost
-\co{rcu_read_lock()}.
-If so, line~9 places the global \co{rcu_gp_ctr} into \co{tmp} because
-the current value previously fetched by line~7 is likely to be obsolete.
-In either case, line~10 increments the nesting depth, which you will
-recall is stored in the seven low-order bits of the counter.
-Line~11 stores the updated counter back into this thread's instance
-of \co{rcu_reader_gp}, and, finally, line~12 executes a memory
-barrier to prevent the RCU read-side critical section from bleeding out
+Line~\lnref{wtmp1} records the current value of \co{rcu_reader_gp}
+into another local variable \co{tmp}, and line~\lnref{checktmp} checks
+to see if the low-order bits are zero, which would indicate that
+this is the outermost \co{rcu_read_lock()}.
+If so, line~\lnref{wtmp2} places the global \co{rcu_gp_ctr}
+into \co{tmp} because the current value previously fetched by
+line~\lnref{wtmp1} is likely to be obsolete.
+In either case, line~\lnref{inctmp} increments the nesting depth,
+which you will recall is stored in the seven low-order bits of the counter.
+Line~\lnref{writegp} stores the updated counter back into this thread's
+instance of \co{rcu_reader_gp}, and,
+finally, line~\lnref{mb1} executes a memory barrier
+to prevent the RCU read-side critical section from bleeding out
into the code preceding the call to \co{rcu_read_lock()}.
+\end{lineref}
In other words, this implementation of \co{rcu_read_lock()} picks up a copy
of the global \co{rcu_gp_ctr} unless the current invocation of
@@ -1499,29 +1459,35 @@ Either way, it increments whatever value it fetched in order to record
an additional nesting level, and stores the result in the current
thread's instance of \co{rcu_reader_gp}.
+\begin{lineref}[ln:defer:rcu_nest:read_lock_unlock:unlock]
Interestingly enough, despite their \co{rcu_read_lock()} differences,
the implementation of \co{rcu_read_unlock()}
is broadly similar to that shown in
Section~\ref{sec:app:toyrcu:RCU Based on Free-Running Counter}.
-Line~19 executes a memory barrier in order to prevent the RCU read-side
+Line~\lnref{mb1} executes a memory barrier
+in order to prevent the RCU read-side
critical section from bleeding out into code following the call
to \co{rcu_read_unlock()}, and
-line~20 decrements this thread's instance of \co{rcu_reader_gp},
+line~\lnref{decgp} decrements this thread's instance of \co{rcu_reader_gp},
which has the effect of decrementing the nesting count contained in
\co{rcu_reader_gp}'s low-order bits.
Debugging versions of this primitive would check (before decrementing!)
that these low-order bits were non-zero.
+\end{lineref}
+\begin{lineref}[ln:defer:rcu_nest:synchronize:syn]
The implementation of \co{synchronize_rcu()} is quite similar to
that shown in
Section~\ref{sec:app:toyrcu:RCU Based on Free-Running Counter}.
There are two differences.
-The first is that lines~29 and~30 adds \co{RCU_GP_CTR_BOTTOM_BIT}
-to the global \co{rcu_gp_ctr} instead of adding the constant ``2'',
-and the second is that the comparison on line~33 has been abstracted
-out to a separate function, where it checks the bit indicated
-by \co{RCU_GP_CTR_BOTTOM_BIT} instead of unconditionally checking
-the low-order bit.
+The first is that lines~\lnref{incgp1} and~\lnref{incgp2}
+adds \co{RCU_GP_CTR_BOTTOM_BIT} to the global \co{rcu_gp_ctr}
+instead of adding the constant ``2'',
+and the second is that the comparison on line~\lnref{ongoing}
+has been abstracted out to a separate function,
+where it checks the bit indicated by \co{RCU_GP_CTR_BOTTOM_BIT}
+instead of unconditionally checking the low-order bit.
+\end{lineref}
This approach achieves read-side performance almost equal to that
shown in
@@ -1562,10 +1528,12 @@ overhead.
how could you double the time required to overflow the global
\co{rcu_gp_ctr}?
\QuickQuizAnswer{
+ \begin{lineref}[ln:defer:rcu_nest:synchronize:syn]
One way would be to replace the magnitude comparison on
- lines~33 and 34 with an inequality check of the per-thread
- \co{rcu_reader_gp} variable against
+ lines~\lnref{lt1} and \lnref{lt2} with an inequality check of
+ the per-thread \co{rcu_reader_gp} variable against
\co{rcu_gp_ctr+RCU_GP_CTR_BOTTOM_BIT}.
+ \end{lineref}
} \QuickQuizEnd
\QuickQuiz{}
--
2.7.4
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] rcu_nest: Update description of rcu_nest.[hc]
2019-05-29 4:47 [PATCH] rcu_nest: Update description of rcu_nest.[hc] Junchang Wang
@ 2019-05-29 13:30 ` Paul E. McKenney
0 siblings, 0 replies; 2+ messages in thread
From: Paul E. McKenney @ 2019-05-29 13:30 UTC (permalink / raw)
To: Junchang Wang; +Cc: akiyks, perfbook
On Wed, May 29, 2019 at 12:47:42PM +0800, Junchang Wang wrote:
> Commit 61e0eb9cdb89 ("rcu_nest: Fix concurrency issues") fixes a few
> concurrency issues in rcu_nest.h and rcu_nest.c. This patch accordingly
> updates the description in Appendix B Toy RCU Implementations. Besides, the
> new scheme, which directly extracts snippet from rcu_nest.[hc], is applied.
>
>
> Signed-off-by: Junchang Wang <junchangwang@gmail.com>
Applied and pushed, thank you, Junchang!
Thanx, Paul
> ---
>
> Thanks again for writing and sharing perfbook. I can always find great
> pleasure in reading through a chapter of this book :-).
>
> CodeSamples/defer/rcu_nest.c | 35 ++++++++------
> CodeSamples/defer/rcu_nest.h | 36 +++++++++------
> appendix/toyrcu/toyrcu.tex | 108 +++++++++++++++----------------------------
> 3 files changed, 81 insertions(+), 98 deletions(-)
>
> diff --git a/CodeSamples/defer/rcu_nest.c b/CodeSamples/defer/rcu_nest.c
> index 362f466..7e36866 100644
> --- a/CodeSamples/defer/rcu_nest.c
> +++ b/CodeSamples/defer/rcu_nest.c
> @@ -21,45 +21,52 @@
> #include "../api.h"
> #include "rcu_nest.h"
>
> -void synchronize_rcu(void)
> +//\begin{snippet}[labelbase=ln:defer:rcu_nest:synchronize,gobbleblank=yes,commandchars=\%\@\$]
> +void synchronize_rcu(void) //\lnlbl{syn:b}
> {
> int t;
> -
> + //\fcvblank
> /* Memory barrier ensures mutation seen before grace period. */
>
> - smp_mb();
> + smp_mb(); //\lnlbl{syn:mb1}
>
> /* Only one synchronize_rcu() at a time. */
>
> - spin_lock(&rcu_gp_lock);
> + spin_lock(&rcu_gp_lock); //\lnlbl{syn:spinlock}
>
> /* Advance to a new grace-period number, enforce ordering. */
>
> - WRITE_ONCE(rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR_BOTTOM_BIT);
> - smp_mb();
> + WRITE_ONCE(rcu_gp_ctr, rcu_gp_ctr + //\lnlbl{syn:incgp1}
> + RCU_GP_CTR_BOTTOM_BIT); //\lnlbl{syn:incgp2}
> + smp_mb(); //\lnlbl{syn:mb2}
>
> /*
> * Wait until all threads are either out of their RCU read-side
> * critical sections or are aware of the new grace period.
> */
>
> - for_each_thread(t) {
> - while (rcu_gp_ongoing(t) &&
> - ((READ_ONCE(per_thread(rcu_reader_gp, t)) -
> - rcu_gp_ctr) < 0)) {
> + for_each_thread(t) { //\lnlbl{syn:scan:b}
> + while (rcu_gp_ongoing(t) && //\lnlbl{syn:ongoing}
> + ((READ_ONCE(per_thread(rcu_reader_gp, t)) -//\lnlbl{syn:lt1}
> + rcu_gp_ctr) < 0)) { //\lnlbl{syn:lt2}
> +#ifndef FCV_SNIPPET
> /*@@@ poll(NULL, 0, 10); */
> barrier();
> +#else
> + poll(NULL, 0, 10); //\lnlbl{syn:poll}
> +#endif
> }
> - }
> + } //\lnlbl{syn:scan:e}
>
> /* Let other synchronize_rcu() instances move ahead. */
>
> - spin_unlock(&rcu_gp_lock);
> + spin_unlock(&rcu_gp_lock); //\lnlbl{syn:spinunlock}
>
> /* Ensure that any subsequent free()s happen -after- above checks. */
>
> - smp_mb();
> -}
> + smp_mb(); //\lnlbl{syn:mb3}
> +} //\lnlbl{syn:e}
> +//\end{snippet}
>
> #ifdef TEST
> #define RCU_READ_NESTABLE
> diff --git a/CodeSamples/defer/rcu_nest.h b/CodeSamples/defer/rcu_nest.h
> index 7e7b7de..19a0956 100644
> --- a/CodeSamples/defer/rcu_nest.h
> +++ b/CodeSamples/defer/rcu_nest.h
> @@ -42,25 +42,28 @@ static void rcu_init(void)
> init_per_thread(rcu_reader_gp, 0);
> }
>
> -static void rcu_read_lock(void)
> +//\begin{snippet}[labelbase=ln:defer:rcu_nest:read_lock_unlock,gobbleblank=yes,commandchars=\%\@\$]
> +static void rcu_read_lock(void) //\lnlbl{lock:b}
> {
> unsigned long tmp;
> unsigned long *rrgp;
> -
> + //\fcvblank
> /*
> * If this is the outermost RCU read-side critical section,
> * copy the global grace-period counter. In either case,
> * increment the nesting count held in the low-order bits.
> */
>
> - rrgp = &__get_thread_var(rcu_reader_gp);
> + rrgp = &__get_thread_var(rcu_reader_gp); //\lnlbl{lock:readgp}
> +#ifndef FCV_SNIPPET
> retry:
> - tmp = *rrgp;
> - if ((tmp & RCU_GP_CTR_NEST_MASK) == 0)
> - tmp = READ_ONCE(rcu_gp_ctr);
> - tmp++;
> - WRITE_ONCE(*rrgp, tmp);
> - smp_mb();
> +#endif
> + tmp = *rrgp; //\lnlbl{lock:wtmp1}
> + if ((tmp & RCU_GP_CTR_NEST_MASK) == 0) //\lnlbl{lock:checktmp}
> + tmp = READ_ONCE(rcu_gp_ctr); //\lnlbl{lock:wtmp2}
> + tmp++; //\lnlbl{lock:inctmp}
> + WRITE_ONCE(*rrgp, tmp); //\lnlbl{lock:writegp}
> + smp_mb(); //\lnlbl{lock:mb1}
>
> /*
> * A reader could be suspended in between fetching the value of *rrgp
> @@ -71,14 +74,15 @@ retry:
> * ULONG_MAX. To handle this correctly, we adopt the helper function
> * ULONG_CMP_GE.
> */
> -
> +#ifndef FCV_SNIPPET
> if (((tmp & RCU_GP_CTR_NEST_MASK) == 1) &&
> ULONG_CMP_GE(READ_ONCE(rcu_gp_ctr), tmp + MAX_GP_ADV_DISTANCE)) {
> WRITE_ONCE(*rrgp, *rrgp - 1);
> goto retry;
> }
> +#endif
> }
> -
> + //\fcvblank
> static void rcu_read_unlock(void)
> {
> /*
> @@ -86,11 +90,15 @@ static void rcu_read_unlock(void)
> * which had better not initially be zero.
> */
>
> - smp_mb();
> + smp_mb(); //\lnlbl{unlock:mb1}
> +#ifndef FCV_SNIPPET
> #ifdef DEBUG_EXTREME
> BUG_ON((__get_thread_var(rcu_reader_gp) & RCU_GP_CTR_NEST_MASK) != 0);
> -#endif /* #ifdef DEBUG_EXTREME */
> - __get_thread_var(rcu_reader_gp)--;
> +#endif /* #ifdef DEBUG_EXTREME */ //\fcvexclude
> +#endif
> + __get_thread_var(rcu_reader_gp)--; //\lnlbl{unlock:decgp}
> }
> + //\fcvblank
> +//\end{snippet}
>
> extern void synchronize_rcu(void);
> diff --git a/appendix/toyrcu/toyrcu.tex b/appendix/toyrcu/toyrcu.tex
> index ded754d..0e84b05 100644
> --- a/appendix/toyrcu/toyrcu.tex
> +++ b/appendix/toyrcu/toyrcu.tex
> @@ -1395,60 +1395,15 @@ variables.
> \end{listing}
>
> \begin{listing}[tb]
> -{ \scriptsize
> -\begin{verbbox}
> - 1 static void rcu_read_lock(void)
> - 2 {
> - 3 long tmp;
> - 4 long *rrgp;
> - 5
> - 6 rrgp = &__get_thread_var(rcu_reader_gp);
> - 7 tmp = *rrgp;
> - 8 if ((tmp & RCU_GP_CTR_NEST_MASK) == 0)
> - 9 tmp = ACCESS_ONCE(rcu_gp_ctr);
> -10 tmp++;
> -11 *rrgp = tmp;
> -12 smp_mb();
> -13 }
> -14
> -15 static void rcu_read_unlock(void)
> -16 {
> -17 long tmp;
> -18
> -19 smp_mb();
> -20 __get_thread_var(rcu_reader_gp)--;
> -21 }
> -22
> -23 void synchronize_rcu(void)
> -24 {
> -25 int t;
> -26
> -27 smp_mb();
> -28 spin_lock(&rcu_gp_lock);
> -29 ACCESS_ONCE(rcu_gp_ctr) +=
> -30 RCU_GP_CTR_BOTTOM_BIT;
> -31 smp_mb();
> -32 for_each_thread(t) {
> -33 while (rcu_gp_ongoing(t) &&
> -34 ((per_thread(rcu_reader_gp, t) -
> -35 rcu_gp_ctr) < 0)) {
> -36 poll(NULL, 0, 10);
> -37 }
> -38 }
> -39 spin_unlock(&rcu_gp_lock);
> -40 smp_mb();
> -41 }
> -\end{verbbox}
> -}
> -\centering
> -\theverbbox
> +\input{CodeSamples/defer/rcu_nest@read_lock_unlock.fcv}\vspace*{-11pt}\fvset{firstnumber=last}
> +\input{CodeSamples/defer/rcu_nest@synchronize.fcv}\fvset{firstnumber=auto}
> \caption{Nestable RCU Using a Free-Running Counter}
> \label{lst:app:toyrcu:Nestable RCU Using a Free-Running Counter}
> \end{listing}
>
> Listing~\ref{lst:app:toyrcu:Nestable RCU Using a Free-Running Counter}
> (\path{rcu_nest.h} and \path{rcu_nest.c})
> -show an RCU implementation based on a single global free-running counter,
> +shows an RCU implementation based on a single global free-running counter,
> but that permits nesting of RCU read-side critical sections.
> This nestability is accomplished by reserving the low-order bits of the
> global \co{rcu_gp_ctr} to count nesting, using the definitions shown in
> @@ -1472,23 +1427,28 @@ reserves seven bits, for a maximum RCU read-side critical-section
> nesting depth of 127, which should be well in excess of that needed
> by most applications.
>
> +\begin{lineref}[ln:defer:rcu_nest:read_lock_unlock:lock]
> The resulting \co{rcu_read_lock()} implementation is still reasonably
> straightforward.
> -Line~6 places a pointer to this thread's instance of \co{rcu_reader_gp}
> +Line~\lnref{readgp} places a pointer to
> +this thread's instance of \co{rcu_reader_gp}
> into the local variable \co{rrgp}, minimizing the number of expensive
> calls to the pthreads thread-local-state API.
> -Line~7 records the current value of \co{rcu_reader_gp} into another
> -local variable \co{tmp}, and line~8 checks to see if the low-order
> -bits are zero, which would indicate that this is the outermost
> -\co{rcu_read_lock()}.
> -If so, line~9 places the global \co{rcu_gp_ctr} into \co{tmp} because
> -the current value previously fetched by line~7 is likely to be obsolete.
> -In either case, line~10 increments the nesting depth, which you will
> -recall is stored in the seven low-order bits of the counter.
> -Line~11 stores the updated counter back into this thread's instance
> -of \co{rcu_reader_gp}, and, finally, line~12 executes a memory
> -barrier to prevent the RCU read-side critical section from bleeding out
> +Line~\lnref{wtmp1} records the current value of \co{rcu_reader_gp}
> +into another local variable \co{tmp}, and line~\lnref{checktmp} checks
> +to see if the low-order bits are zero, which would indicate that
> +this is the outermost \co{rcu_read_lock()}.
> +If so, line~\lnref{wtmp2} places the global \co{rcu_gp_ctr}
> +into \co{tmp} because the current value previously fetched by
> +line~\lnref{wtmp1} is likely to be obsolete.
> +In either case, line~\lnref{inctmp} increments the nesting depth,
> +which you will recall is stored in the seven low-order bits of the counter.
> +Line~\lnref{writegp} stores the updated counter back into this thread's
> +instance of \co{rcu_reader_gp}, and,
> +finally, line~\lnref{mb1} executes a memory barrier
> +to prevent the RCU read-side critical section from bleeding out
> into the code preceding the call to \co{rcu_read_lock()}.
> +\end{lineref}
>
> In other words, this implementation of \co{rcu_read_lock()} picks up a copy
> of the global \co{rcu_gp_ctr} unless the current invocation of
> @@ -1499,29 +1459,35 @@ Either way, it increments whatever value it fetched in order to record
> an additional nesting level, and stores the result in the current
> thread's instance of \co{rcu_reader_gp}.
>
> +\begin{lineref}[ln:defer:rcu_nest:read_lock_unlock:unlock]
> Interestingly enough, despite their \co{rcu_read_lock()} differences,
> the implementation of \co{rcu_read_unlock()}
> is broadly similar to that shown in
> Section~\ref{sec:app:toyrcu:RCU Based on Free-Running Counter}.
> -Line~19 executes a memory barrier in order to prevent the RCU read-side
> +Line~\lnref{mb1} executes a memory barrier
> +in order to prevent the RCU read-side
> critical section from bleeding out into code following the call
> to \co{rcu_read_unlock()}, and
> -line~20 decrements this thread's instance of \co{rcu_reader_gp},
> +line~\lnref{decgp} decrements this thread's instance of \co{rcu_reader_gp},
> which has the effect of decrementing the nesting count contained in
> \co{rcu_reader_gp}'s low-order bits.
> Debugging versions of this primitive would check (before decrementing!)
> that these low-order bits were non-zero.
> +\end{lineref}
>
> +\begin{lineref}[ln:defer:rcu_nest:synchronize:syn]
> The implementation of \co{synchronize_rcu()} is quite similar to
> that shown in
> Section~\ref{sec:app:toyrcu:RCU Based on Free-Running Counter}.
> There are two differences.
> -The first is that lines~29 and~30 adds \co{RCU_GP_CTR_BOTTOM_BIT}
> -to the global \co{rcu_gp_ctr} instead of adding the constant ``2'',
> -and the second is that the comparison on line~33 has been abstracted
> -out to a separate function, where it checks the bit indicated
> -by \co{RCU_GP_CTR_BOTTOM_BIT} instead of unconditionally checking
> -the low-order bit.
> +The first is that lines~\lnref{incgp1} and~\lnref{incgp2}
> +adds \co{RCU_GP_CTR_BOTTOM_BIT} to the global \co{rcu_gp_ctr}
> +instead of adding the constant ``2'',
> +and the second is that the comparison on line~\lnref{ongoing}
> +has been abstracted out to a separate function,
> +where it checks the bit indicated by \co{RCU_GP_CTR_BOTTOM_BIT}
> +instead of unconditionally checking the low-order bit.
> +\end{lineref}
>
> This approach achieves read-side performance almost equal to that
> shown in
> @@ -1562,10 +1528,12 @@ overhead.
> how could you double the time required to overflow the global
> \co{rcu_gp_ctr}?
> \QuickQuizAnswer{
> + \begin{lineref}[ln:defer:rcu_nest:synchronize:syn]
> One way would be to replace the magnitude comparison on
> - lines~33 and 34 with an inequality check of the per-thread
> - \co{rcu_reader_gp} variable against
> + lines~\lnref{lt1} and \lnref{lt2} with an inequality check of
> + the per-thread \co{rcu_reader_gp} variable against
> \co{rcu_gp_ctr+RCU_GP_CTR_BOTTOM_BIT}.
> + \end{lineref}
> } \QuickQuizEnd
>
> \QuickQuiz{}
> --
> 2.7.4
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-05-29 13:30 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-05-29 4:47 [PATCH] rcu_nest: Update description of rcu_nest.[hc] Junchang Wang
2019-05-29 13:30 ` Paul E. McKenney
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.