* [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU @ 2014-02-12 19:09 Mike Day 2014-02-13 9:11 ` Alex Bligh 2014-02-15 10:23 ` Alex Bligh 0 siblings, 2 replies; 10+ messages in thread From: Mike Day @ 2014-02-12 19:09 UTC (permalink / raw) To: qemu-devel; +Cc: pbonzini, alex, peter.maydell Allow readers to use RCU when reading Qemu timer lists. Applies to Paolo Bonzini's RCU branch, https://github.com/bonzini/qemu/tree/rcu. This patch is for comment and review only. The rcu branch needs to be rebased on upstream. Signed-off-by: Mike Day <ncmike@ncultra.org> --- include/qemu/timer.h | 9 +++++- qemu-timer.c | 88 +++++++++++++++++++++++++++++++++++----------------- 2 files changed, 67 insertions(+), 30 deletions(-) diff --git a/include/qemu/timer.h b/include/qemu/timer.h index b58903b..5aaa213 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -4,7 +4,13 @@ #include "qemu/typedefs.h" #include "qemu-common.h" #include "qemu/notify.h" - +#ifdef __GNUC__ +#ifndef __ATOMIC_RELEASE +#define __ATOMIC_RELEASE +#endif +#endif +#include "qemu/atomic.h" +#include "qemu/rcu.h" /* timers */ #define SCALE_MS 1000000 @@ -61,6 +67,7 @@ struct QEMUTimer { void *opaque; QEMUTimer *next; int scale; + struct rcu_head rcu; }; extern QEMUTimerListGroup main_loop_tlg; diff --git a/qemu-timer.c b/qemu-timer.c index 6b62e88..27285ca 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -29,6 +29,7 @@ #include "hw/hw.h" #include "qemu/timer.h" +#include "qemu/rcu_queue.h" #ifdef CONFIG_POSIX #include <pthread.h> #endif @@ -49,7 +50,6 @@ typedef struct QEMUClock { NotifierList reset_notifiers; int64_t last; - QEMUClockType type; bool enabled; } QEMUClock; @@ -71,6 +71,7 @@ struct QEMUTimerList { QLIST_ENTRY(QEMUTimerList) list; QEMUTimerListNotifyCB *notify_cb; void *notify_opaque; + struct rcu_head rcu; }; /** @@ -107,6 +108,12 @@ QEMUTimerList *timerlist_new(QEMUClockType type, return timer_list; } +static void reclaim_timer_list(struct rcu_head *rcu) +{ + QEMUTimerList *timer_list = container_of(rcu, QEMUTimerList, rcu); + g_free(timer_list); +} + void timerlist_free(QEMUTimerList *timer_list) { assert(!timerlist_has_timers(timer_list)); @@ -114,7 +121,7 @@ void timerlist_free(QEMUTimerList *timer_list) QLIST_REMOVE(timer_list, list); } qemu_mutex_destroy(&timer_list->active_timers_lock); - g_free(timer_list); + call_rcu1(&timer_list->rcu, reclaim_timer_list); } static void qemu_clock_init(QEMUClockType type) @@ -138,9 +145,11 @@ void qemu_clock_notify(QEMUClockType type) { QEMUTimerList *timer_list; QEMUClock *clock = qemu_clock_ptr(type); - QLIST_FOREACH(timer_list, &clock->timerlists, list) { + rcu_read_lock(); + QLIST_FOREACH_RCU(timer_list, &clock->timerlists, list) { timerlist_notify(timer_list); } + rcu_read_unlock(); } void qemu_clock_enable(QEMUClockType type, bool enabled) @@ -155,7 +164,7 @@ void qemu_clock_enable(QEMUClockType type, bool enabled) bool timerlist_has_timers(QEMUTimerList *timer_list) { - return !!timer_list->active_timers; + return !!atomic_rcu_read(&timer_list->active_timers); } bool qemu_clock_has_timers(QEMUClockType type) @@ -168,15 +177,15 @@ bool timerlist_expired(QEMUTimerList *timer_list) { int64_t expire_time; - qemu_mutex_lock(&timer_list->active_timers_lock); - if (!timer_list->active_timers) { - qemu_mutex_unlock(&timer_list->active_timers_lock); + rcu_read_lock(); + if (atomic_rcu_read(&timer_list->active_timers)) { + rcu_read_unlock(); return false; } - expire_time = timer_list->active_timers->expire_time; - qemu_mutex_unlock(&timer_list->active_timers_lock); + expire_time = timer_list->active_timers->expire_time; return expire_time < qemu_clock_get_ns(timer_list->clock->type); + rcu_read_unlock(); } bool qemu_clock_expired(QEMUClockType type) @@ -194,8 +203,10 @@ int64_t timerlist_deadline_ns(QEMUTimerList *timer_list) { int64_t delta; int64_t expire_time; - - if (!timer_list->clock->enabled) { + bool enabled; + rcu_read_lock(); + enabled = atomic_rcu_read(&timer_list->clock->enabled); + if (!enabled) { return -1; } @@ -203,16 +214,13 @@ int64_t timerlist_deadline_ns(QEMUTimerList *timer_list) * value but ->notify_cb() is called when the deadline changes. Therefore * the caller should notice the change and there is no race condition. */ - qemu_mutex_lock(&timer_list->active_timers_lock); if (!timer_list->active_timers) { - qemu_mutex_unlock(&timer_list->active_timers_lock); + rcu_read_unlock(); return -1; } expire_time = timer_list->active_timers->expire_time; - qemu_mutex_unlock(&timer_list->active_timers_lock); - delta = expire_time - qemu_clock_get_ns(timer_list->clock->type); - + rcu_read_unlock(); if (delta <= 0) { return 0; } @@ -230,16 +238,22 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type) int64_t deadline = -1; QEMUTimerList *timer_list; QEMUClock *clock = qemu_clock_ptr(type); - QLIST_FOREACH(timer_list, &clock->timerlists, list) { + rcu_read_lock(); + QLIST_FOREACH_RCU(timer_list, &clock->timerlists, list) { deadline = qemu_soonest_timeout(deadline, timerlist_deadline_ns(timer_list)); } + rcu_read_unlock(); return deadline; } QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list) { - return timer_list->clock->type; + QEMUClockType type; + rcu_read_lock(); + type = atomic_rcu_read(&timer_list->clock->type); + rcu_read_unlock(); + return type; } QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type) @@ -249,11 +263,13 @@ QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type) void timerlist_notify(QEMUTimerList *timer_list) { - if (timer_list->notify_cb) { + rcu_read_lock(); + if (atomic_rcu_read(&timer_list->notify_cb)) { timer_list->notify_cb(timer_list->notify_opaque); } else { qemu_notify_event(); } + rcu_read_unlock(); } /* Transition function to convert a nanosecond timeout to ms @@ -308,18 +324,24 @@ void timer_init(QEMUTimer *ts, QEMUTimerList *timer_list, int scale, QEMUTimerCB *cb, void *opaque) { - ts->timer_list = timer_list; ts->cb = cb; ts->opaque = opaque; ts->scale = scale; ts->expire_time = -1; + ts->timer_list = timer_list; } -void timer_free(QEMUTimer *ts) +static void reclaim_timer(struct rcu_head *rcu) { + QEMUTimer *ts = container_of(rcu, QEMUTimer, rcu); g_free(ts); } +void timer_free(QEMUTimer *ts) +{ + call_rcu1(&ts->rcu, reclaim_timer); +} + static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts) { QEMUTimer **pt, *t; @@ -331,7 +353,7 @@ static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts) if (!t) break; if (t == ts) { - *pt = t->next; + atomic_rcu_set(pt, t->next); break; } pt = &t->next; @@ -369,15 +391,17 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time) } ts->expire_time = MAX(expire_time, 0); ts->next = *pt; - *pt = ts; + atomic_rcu_set(pt, ts); qemu_mutex_unlock(&timer_list->active_timers_lock); - /* Rearm if necessary */ + rcu_read_unlock(); if (pt == &timer_list->active_timers) { /* Interrupt execution to force deadline recalculation. */ qemu_clock_warp(timer_list->clock->type); timerlist_notify(timer_list); } + rcu_read_unlock(); + } void timer_mod(QEMUTimer *ts, int64_t expire_time) @@ -402,30 +426,35 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) bool progress = false; QEMUTimerCB *cb; void *opaque; + bool enabled; - if (!timer_list->clock->enabled) { + enabled = atomic_rcu_read(&timer_list->clock->enabled); + if (!enabled) { return progress; } - + rcu_read_lock(); current_time = qemu_clock_get_ns(timer_list->clock->type); for(;;) { - qemu_mutex_lock(&timer_list->active_timers_lock); ts = timer_list->active_timers; if (!timer_expired_ns(ts, current_time)) { - qemu_mutex_unlock(&timer_list->active_timers_lock); + rcu_read_unlock(); break; } + rcu_read_unlock(); + qemu_mutex_lock(&timer_list->active_timers_lock); /* remove timer from the list before calling the callback */ - timer_list->active_timers = ts->next; ts->next = NULL; ts->expire_time = -1; cb = ts->cb; opaque = ts->opaque; + timer_list->active_timers = ts->next; qemu_mutex_unlock(&timer_list->active_timers_lock); /* run the callback (the timer list can be modified) */ + rcu_read_lock(); cb(opaque); + rcu_read_unlock(); progress = true; } return progress; @@ -477,6 +506,7 @@ int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg) return deadline; } +/* caller holds an rcu read lock */ int64_t qemu_clock_get_ns(QEMUClockType type) { int64_t now, last; -- Mike Day | "Endurance is a Virtue" ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU 2014-02-12 19:09 [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU Mike Day @ 2014-02-13 9:11 ` Alex Bligh 2014-02-13 9:25 ` Paolo Bonzini 2014-02-15 10:23 ` Alex Bligh 1 sibling, 1 reply; 10+ messages in thread From: Alex Bligh @ 2014-02-13 9:11 UTC (permalink / raw) To: Mike Day; +Cc: pbonzini, qemu-devel, Alex Bligh, peter.maydell Mike, On 12 Feb 2014, at 19:09, Mike Day wrote: > Allow readers to use RCU when reading Qemu timer lists. Applies to > Paolo Bonzini's RCU branch, https://github.com/bonzini/qemu/tree/rcu. > > This patch is for comment and review only. The rcu branch needs to be > rebased on upstream. I'll certainly have a look through this. However before I do, what problem is this trying to solve? Do we think there is possibility of contention on the active timers lock? I used to think this was taken (let alone contented) relatively infrequently, but Rob Herring's recent email suggests to me the list is being modified in some circumstances rather more frequently than I thought. -- Alex Bligh ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU 2014-02-13 9:11 ` Alex Bligh @ 2014-02-13 9:25 ` Paolo Bonzini 2014-02-13 12:06 ` Mike Day 0 siblings, 1 reply; 10+ messages in thread From: Paolo Bonzini @ 2014-02-13 9:25 UTC (permalink / raw) To: Alex Bligh, Mike Day; +Cc: peter.maydell, qemu-devel Il 13/02/2014 10:11, Alex Bligh ha scritto: > Mike, > > On 12 Feb 2014, at 19:09, Mike Day wrote: > >> Allow readers to use RCU when reading Qemu timer lists. Applies to >> Paolo Bonzini's RCU branch, https://github.com/bonzini/qemu/tree/rcu. >> >> This patch is for comment and review only. The rcu branch needs to be >> rebased on upstream. > > I'll certainly have a look through this. However before I do, what > problem is this trying to solve? Do we think there is possibility > of contention on the active timers lock? I used to think this was > taken (let alone contented) relatively infrequently, but Rob Herring's > recent email suggests to me the list is being modified in some > circumstances rather more frequently than I thought. I think that, more than contention, it tries to reduce the cost of synchronization primitives, especially the locking and unlocking of the list around the invocation of timer callbacks. Paolo ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU 2014-02-13 9:25 ` Paolo Bonzini @ 2014-02-13 12:06 ` Mike Day 2014-02-13 12:57 ` Alex Bligh 0 siblings, 1 reply; 10+ messages in thread From: Mike Day @ 2014-02-13 12:06 UTC (permalink / raw) To: Paolo Bonzini; +Cc: Peter Maydell, qemu-devel@nongnu.org, Alex Bligh On Thu, Feb 13, 2014 at 4:25 AM, Paolo Bonzini <pbonzini@redhat.com> wrote: > Il 13/02/2014 10:11, Alex Bligh ha scritto: >> >> I'll certainly have a look through this. However before I do, what >> problem is this trying to solve? Do we think there is possibility >> of contention on the active timers lock? I used to think this was >> taken (let alone contented) relatively infrequently, but Rob Herring's >> recent email suggests to me the list is being modified in some >> circumstances rather more frequently than I thought. > > I think that, more than contention, it tries to reduce the cost of > synchronization primitives, especially the locking and unlocking of the list > around the invocation of timer callbacks. Yes, the assumption is that the active timers are a read-mostly list, so rcu is a win. Mike ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU 2014-02-13 12:06 ` Mike Day @ 2014-02-13 12:57 ` Alex Bligh 0 siblings, 0 replies; 10+ messages in thread From: Alex Bligh @ 2014-02-13 12:57 UTC (permalink / raw) To: Mike Day; +Cc: Paolo Bonzini, qemu-devel@nongnu.org, Alex Bligh, Peter Maydell On 13 Feb 2014, at 12:06, Mike Day wrote: >> I think that, more than contention, it tries to reduce the cost of >> synchronization primitives, especially the locking and unlocking of the list >> around the invocation of timer callbacks. > > Yes, the assumption is that the active timers are a read-mostly list, > so rcu is a win. Thanks - I'll have a look through. -- Alex Bligh ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU 2014-02-12 19:09 [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU Mike Day 2014-02-13 9:11 ` Alex Bligh @ 2014-02-15 10:23 ` Alex Bligh 2014-02-15 20:33 ` Mike Day 2014-02-17 16:13 ` Mike Day 1 sibling, 2 replies; 10+ messages in thread From: Alex Bligh @ 2014-02-15 10:23 UTC (permalink / raw) To: Mike Day; +Cc: pbonzini, qemu-devel, Alex Bligh, peter.maydell Mike, Some comments: 1. You seem to be removing the use of the active_timers_lock and replacing it by rcu (fine). However, you seem to have left the qemu_mutex_destroy in timerlist_free, and left the mutex in QEMUTimerList. Any reason why we need both? 2. You have introduced rcu not only to protect active_timers, the list of active timers within one timerlist, but also to protect (I think) the list of timerlists, as evidenced by the fact you have reclaim_timer_list as well as reclaim_timer. We currently have no locking in the creation/deletion of timerlists as these only happen on init or on when an AioContext is created/destroyed (as these are the only things that create or delete TimerListGroups); both these operations are required to happen within the main thread and are protected by the BQL. Indeed currently nothing every destroys an AioContext. I suspect if there is really a need to destroy timerlists in threads other than the main thread we are going to have more issues than locking of the list of timerlists. I suggest you remove these changes and solely look at protecting the list of active timers (as opposed to the list of timerlists) by RCU. I wrote something describing the new structure here: http://blog.alex.org.uk/2013/08/24/changes-to-qemus-timer-system/ I suspect simplifying this such that you are *only* protecting the active timer list would make the patch far less intrusive. 3. If you are going to protect the list of timerlists as well, do you not need to at least take the rcu read lock in timerlist_new? 4. In timerlist_notify(), you are holding the rcu_read_lock across the callback or qemu_notify_event. Why is this necessary? EG can't you do something like: void timerlist_notify(QEMUTimerList *timer_list) { rcu_read_lock(); if (atomic_rcu_read(&timer_list->notify_cb)) { QEMUTimerListNotifyCB *notify_cb = timer_list->notify_cb; void *notify_opaque = timer_list->notify_opaque; rcu_read_unlock(); notify_cb(notify_opaque); } else { rcu_read_unlock(); qemu_notify_event(); } } as if the callback modifies the timer it will take its own rcu read lock. My concern here is that the qemu_notify_event stuff could in theory be quite long running. Perhaps we don't care about how often reclaim occurs though. 5. Similarly to the above, in qemu_clock_notify, you take the rcu_read_lock, and then do a QLIST_FOREACH_RCU. That will mean the rcu_read_lock is taken anyway in 99% of calls to timerlist_notify. This is going through the list of timerlists, and that list is in practice static (protected by the BQL). Similarly in qemu_clock_deadline_ns_all, timerlist_get_clock, etc. 6. In timerlist_expired, rcu_read_unlock() appears to get called at the end after the return statement (last line of the function). 7. May be my reading of the diff, but in timer_mod_ns it seems to be introducing two rcu_read_unlock()s at the end of the function (after 'Rearm if necessary') - should the first not be a rcu_read_lock()? 8. Again, may be my reading of the diff, but you appear to have introduced a rcu_read_lock() outside the loop, but then unlock it within the loop. If the loop iterates more than once, won't it do more unlocks than locks? Alex On 12 Feb 2014, at 19:09, Mike Day wrote: > > Allow readers to use RCU when reading Qemu timer lists. Applies to > Paolo Bonzini's RCU branch, https://github.com/bonzini/qemu/tree/rcu. > > This patch is for comment and review only. The rcu branch needs to be > rebased on upstream. > > Signed-off-by: Mike Day <ncmike@ncultra.org> > --- > include/qemu/timer.h | 9 +++++- > qemu-timer.c | 88 +++++++++++++++++++++++++++++++++++----------------- > 2 files changed, 67 insertions(+), 30 deletions(-) > > diff --git a/include/qemu/timer.h b/include/qemu/timer.h > index b58903b..5aaa213 100644 > --- a/include/qemu/timer.h > +++ b/include/qemu/timer.h > @@ -4,7 +4,13 @@ > #include "qemu/typedefs.h" > #include "qemu-common.h" > #include "qemu/notify.h" > - > +#ifdef __GNUC__ > +#ifndef __ATOMIC_RELEASE > +#define __ATOMIC_RELEASE > +#endif > +#endif > +#include "qemu/atomic.h" > +#include "qemu/rcu.h" > /* timers */ > > #define SCALE_MS 1000000 > @@ -61,6 +67,7 @@ struct QEMUTimer { > void *opaque; > QEMUTimer *next; > int scale; > + struct rcu_head rcu; > }; > > extern QEMUTimerListGroup main_loop_tlg; > diff --git a/qemu-timer.c b/qemu-timer.c > index 6b62e88..27285ca 100644 > --- a/qemu-timer.c > +++ b/qemu-timer.c > @@ -29,6 +29,7 @@ > #include "hw/hw.h" > > #include "qemu/timer.h" > +#include "qemu/rcu_queue.h" > #ifdef CONFIG_POSIX > #include <pthread.h> > #endif > @@ -49,7 +50,6 @@ typedef struct QEMUClock { > > NotifierList reset_notifiers; > int64_t last; > - > QEMUClockType type; > bool enabled; > } QEMUClock; > @@ -71,6 +71,7 @@ struct QEMUTimerList { > QLIST_ENTRY(QEMUTimerList) list; > QEMUTimerListNotifyCB *notify_cb; > void *notify_opaque; > + struct rcu_head rcu; > }; > > /** > @@ -107,6 +108,12 @@ QEMUTimerList *timerlist_new(QEMUClockType type, > return timer_list; > } > > +static void reclaim_timer_list(struct rcu_head *rcu) > +{ > + QEMUTimerList *timer_list = container_of(rcu, QEMUTimerList, rcu); > + g_free(timer_list); > +} > + > void timerlist_free(QEMUTimerList *timer_list) > { > assert(!timerlist_has_timers(timer_list)); > @@ -114,7 +121,7 @@ void timerlist_free(QEMUTimerList *timer_list) > QLIST_REMOVE(timer_list, list); > } > qemu_mutex_destroy(&timer_list->active_timers_lock); > - g_free(timer_list); > + call_rcu1(&timer_list->rcu, reclaim_timer_list); > } > > static void qemu_clock_init(QEMUClockType type) > @@ -138,9 +145,11 @@ void qemu_clock_notify(QEMUClockType type) > { > QEMUTimerList *timer_list; > QEMUClock *clock = qemu_clock_ptr(type); > - QLIST_FOREACH(timer_list, &clock->timerlists, list) { > + rcu_read_lock(); > + QLIST_FOREACH_RCU(timer_list, &clock->timerlists, list) { > timerlist_notify(timer_list); > } > + rcu_read_unlock(); > } > > void qemu_clock_enable(QEMUClockType type, bool enabled) > @@ -155,7 +164,7 @@ void qemu_clock_enable(QEMUClockType type, bool enabled) > > bool timerlist_has_timers(QEMUTimerList *timer_list) > { > - return !!timer_list->active_timers; > + return !!atomic_rcu_read(&timer_list->active_timers); > } > > bool qemu_clock_has_timers(QEMUClockType type) > @@ -168,15 +177,15 @@ bool timerlist_expired(QEMUTimerList *timer_list) > { > int64_t expire_time; > > - qemu_mutex_lock(&timer_list->active_timers_lock); > - if (!timer_list->active_timers) { > - qemu_mutex_unlock(&timer_list->active_timers_lock); > + rcu_read_lock(); > + if (atomic_rcu_read(&timer_list->active_timers)) { > + rcu_read_unlock(); > return false; > } > - expire_time = timer_list->active_timers->expire_time; > - qemu_mutex_unlock(&timer_list->active_timers_lock); > > + expire_time = timer_list->active_timers->expire_time; > return expire_time < qemu_clock_get_ns(timer_list->clock->type); > + rcu_read_unlock(); > } > > bool qemu_clock_expired(QEMUClockType type) > @@ -194,8 +203,10 @@ int64_t timerlist_deadline_ns(QEMUTimerList *timer_list) > { > int64_t delta; > int64_t expire_time; > - > - if (!timer_list->clock->enabled) { > + bool enabled; > + rcu_read_lock(); > + enabled = atomic_rcu_read(&timer_list->clock->enabled); > + if (!enabled) { > return -1; > } > > @@ -203,16 +214,13 @@ int64_t timerlist_deadline_ns(QEMUTimerList *timer_list) > * value but ->notify_cb() is called when the deadline changes. Therefore > * the caller should notice the change and there is no race condition. > */ > - qemu_mutex_lock(&timer_list->active_timers_lock); > if (!timer_list->active_timers) { > - qemu_mutex_unlock(&timer_list->active_timers_lock); > + rcu_read_unlock(); > return -1; > } > expire_time = timer_list->active_timers->expire_time; > - qemu_mutex_unlock(&timer_list->active_timers_lock); > - > delta = expire_time - qemu_clock_get_ns(timer_list->clock->type); > - > + rcu_read_unlock(); > if (delta <= 0) { > return 0; > } > @@ -230,16 +238,22 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type) > int64_t deadline = -1; > QEMUTimerList *timer_list; > QEMUClock *clock = qemu_clock_ptr(type); > - QLIST_FOREACH(timer_list, &clock->timerlists, list) { > + rcu_read_lock(); > + QLIST_FOREACH_RCU(timer_list, &clock->timerlists, list) { > deadline = qemu_soonest_timeout(deadline, > timerlist_deadline_ns(timer_list)); > } > + rcu_read_unlock(); > return deadline; > } > > QEMUClockType timerlist_get_clock(QEMUTimerList *timer_list) > { > - return timer_list->clock->type; > + QEMUClockType type; > + rcu_read_lock(); > + type = atomic_rcu_read(&timer_list->clock->type); > + rcu_read_unlock(); > + return type; > } > > QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type) > @@ -249,11 +263,13 @@ QEMUTimerList *qemu_clock_get_main_loop_timerlist(QEMUClockType type) > > void timerlist_notify(QEMUTimerList *timer_list) > { > - if (timer_list->notify_cb) { > + rcu_read_lock(); > + if (atomic_rcu_read(&timer_list->notify_cb)) { > timer_list->notify_cb(timer_list->notify_opaque); > } else { > qemu_notify_event(); > } > + rcu_read_unlock(); > } > > /* Transition function to convert a nanosecond timeout to ms > @@ -308,18 +324,24 @@ void timer_init(QEMUTimer *ts, > QEMUTimerList *timer_list, int scale, > QEMUTimerCB *cb, void *opaque) > { > - ts->timer_list = timer_list; > ts->cb = cb; > ts->opaque = opaque; > ts->scale = scale; > ts->expire_time = -1; > + ts->timer_list = timer_list; > } > > -void timer_free(QEMUTimer *ts) > +static void reclaim_timer(struct rcu_head *rcu) > { > + QEMUTimer *ts = container_of(rcu, QEMUTimer, rcu); > g_free(ts); > } > > +void timer_free(QEMUTimer *ts) > +{ > + call_rcu1(&ts->rcu, reclaim_timer); > +} > + > static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts) > { > QEMUTimer **pt, *t; > @@ -331,7 +353,7 @@ static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts) > if (!t) > break; > if (t == ts) { > - *pt = t->next; > + atomic_rcu_set(pt, t->next); > break; > } > pt = &t->next; > @@ -369,15 +391,17 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time) > } > ts->expire_time = MAX(expire_time, 0); > ts->next = *pt; > - *pt = ts; > + atomic_rcu_set(pt, ts); > qemu_mutex_unlock(&timer_list->active_timers_lock); > - > /* Rearm if necessary */ > + rcu_read_unlock(); > if (pt == &timer_list->active_timers) { > /* Interrupt execution to force deadline recalculation. */ > qemu_clock_warp(timer_list->clock->type); > timerlist_notify(timer_list); > } > + rcu_read_unlock(); > + > } > > void timer_mod(QEMUTimer *ts, int64_t expire_time) > @@ -402,30 +426,35 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) > bool progress = false; > QEMUTimerCB *cb; > void *opaque; > + bool enabled; > > - if (!timer_list->clock->enabled) { > + enabled = atomic_rcu_read(&timer_list->clock->enabled); > + if (!enabled) { > return progress; > } > - > + rcu_read_lock(); > current_time = qemu_clock_get_ns(timer_list->clock->type); > for(;;) { > - qemu_mutex_lock(&timer_list->active_timers_lock); > ts = timer_list->active_timers; > if (!timer_expired_ns(ts, current_time)) { > - qemu_mutex_unlock(&timer_list->active_timers_lock); > + rcu_read_unlock(); > break; > } > + rcu_read_unlock(); > > + qemu_mutex_lock(&timer_list->active_timers_lock); > /* remove timer from the list before calling the callback */ > - timer_list->active_timers = ts->next; > ts->next = NULL; > ts->expire_time = -1; > cb = ts->cb; > opaque = ts->opaque; > + timer_list->active_timers = ts->next; > qemu_mutex_unlock(&timer_list->active_timers_lock); > > /* run the callback (the timer list can be modified) */ > + rcu_read_lock(); > cb(opaque); > + rcu_read_unlock(); > progress = true; > } > return progress; > @@ -477,6 +506,7 @@ int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg) > return deadline; > } > > +/* caller holds an rcu read lock */ > int64_t qemu_clock_get_ns(QEMUClockType type) > { > int64_t now, last; > > -- > Mike Day | "Endurance is a Virtue" > > -- Alex Bligh ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU 2014-02-15 10:23 ` Alex Bligh @ 2014-02-15 20:33 ` Mike Day 2014-02-15 22:00 ` Alex Bligh 2014-02-17 16:13 ` Mike Day 1 sibling, 1 reply; 10+ messages in thread From: Mike Day @ 2014-02-15 20:33 UTC (permalink / raw) To: Alex Bligh; +Cc: pbonzini, qemu-devel, peter.maydell Alex Bligh <alex@alex.org.uk> writes: > Some comments: Thanks for the thorough review! > 1. You seem to be removing the use of the active_timers_lock and replacing it by > rcu (fine). However, you seem to have left the qemu_mutex_destroy in > timerlist_free, and left the mutex in QEMUTimerList. Any reason why we need > both? This was an oversight on my part - which I will correct. > 2. You have introduced rcu not only to protect active_timers, the list of > active timers within one timerlist, but also to protect (I think) > the list of timerlists, as evidenced by the fact you have > reclaim_timer_list as well as reclaim_timer. We currently have no > locking in the creation/deletion of timerlists as these only happen on init > or on when an AioContext is created/destroyed (as these are the only things > that create or delete TimerListGroups); both these operations are > required to happen within the main thread and are protected by the > BQL. Indeed currently nothing every destroys an AioContext. I suspect > if there is really a need to destroy timerlists in threads other than > the main thread we are going to have more issues than locking of the list of > timerlists. I suggest you remove these changes and solely look at protecting > the list of active timers (as opposed to the list of timerlists) by RCU. I thought about this a lot and and I'm glad you are bringing up the timerlist here. As you said, I couldn't find anywhere in the code base that manipulates this list. But I also noticed that the timer and clock structures are accessed by unrelated code modules. I don't know enough about the timer usage to predict how timers will be used by other code modules so I decided to overprotect and see what you thought about it. Removing the protection of the timerlist will vastly simplify this patch. Eventually, though, when we remove the BQL, I'm assuming we will need to protect the timerlist somehow. I'm happy now re-factoring toward a much simpler patch that just protects the active timer list. > > 3. If you are going to protect the list of timerlists as well, do you > not need to at least take the rcu read lock in timerlist_new? Thanks for catching this. It probably needs a proper mutex (which is the BQL now as you stated). > 4. In timerlist_notify(), you are holding the rcu_read_lock across > the callback or qemu_notify_event. Why is this necessary? EG can't you > do something like: > > void timerlist_notify(QEMUTimerList *timer_list) > { > rcu_read_lock(); > if (atomic_rcu_read(&timer_list->notify_cb)) { > QEMUTimerListNotifyCB *notify_cb = timer_list->notify_cb; > void *notify_opaque = timer_list->notify_opaque; > rcu_read_unlock(); > notify_cb(notify_opaque); > } else { > rcu_read_unlock(); > qemu_notify_event(); > } > } I thought a lot about this one and went back and forth a couple of times, so I'm glad you are looking at it. My concern is that the timer could be reclaimed during execution ofthe callback. And, the callback may be referincing the timer itself. Even if the callback takes an rcu_read_lock, there is a window between when the timer code leaves the rcu critical section and calls the notification code, and when the notification callback enters the rcu critical section via rcu_read_lock. It may be possible though unlikely that a reclaim could occur during this small window. The downside, as far as I know, is that the notification callback will take a long time to execute. This is not good, but it won't cause an error by itself. > 5. Similarly to the above, in qemu_clock_notify, you take the > rcu_read_lock, and then do a QLIST_FOREACH_RCU. That will mean > the rcu_read_lock is taken anyway in 99% of calls to timerlist_notify. > This is going through the list of timerlists, and that list is in > practice static (protected by the BQL). Similarly in > qemu_clock_deadline_ns_all, timerlist_get_clock, etc. My thinking here is that rcu_read_lock can be called recursively with no bad effect other than possibly taking up cache space. So when you have a function like this that is usually called as an internal function but occasionally called directly, take the rcu read lock in both functions. I think this will be removed anyway with the timerlist change. > 6. In timerlist_expired, rcu_read_unlock() appears to get called > at the end after the return statement (last line of the function). It does indeed, thanks. > 7. May be my reading of the diff, but in timer_mod_ns it seems to be > introducing two rcu_read_unlock()s at the end of the function > (after 'Rearm if necessary') - should the first not be a > rcu_read_lock()? Yes, thank you. > 8. Again, may be my reading of the diff, but you appear to have > introduced a rcu_read_lock() outside the loop, but then > unlock it within the loop. If the loop iterates more than once, > won't it do more unlocks than locks? Yes, I think the right fix is to omit the last rcu_read_unlock. -- Mike Day | "Endurance is a Virtue" ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU 2014-02-15 20:33 ` Mike Day @ 2014-02-15 22:00 ` Alex Bligh 0 siblings, 0 replies; 10+ messages in thread From: Alex Bligh @ 2014-02-15 22:00 UTC (permalink / raw) To: Mike Day; +Cc: pbonzini, qemu-devel, Alex Bligh, peter.maydell Mike, On 15 Feb 2014, at 20:33, Mike Day wrote: >> >> 2. You have introduced rcu not only to protect active_timers, the list of >> active timers within one timerlist, but also to protect (I think) >> the list of timerlists, as evidenced by the fact you have >> reclaim_timer_list as well as reclaim_timer. We currently have no >> locking in the creation/deletion of timerlists as these only happen on init >> or on when an AioContext is created/destroyed (as these are the only things >> that create or delete TimerListGroups); both these operations are >> required to happen within the main thread and are protected by the >> BQL. Indeed currently nothing every destroys an AioContext. I suspect >> if there is really a need to destroy timerlists in threads other than >> the main thread we are going to have more issues than locking of the list of >> timerlists. I suggest you remove these changes and solely look at protecting >> the list of active timers (as opposed to the list of timerlists) by RCU. > > I thought about this a lot and and I'm glad you are bringing up the > timerlist here. As you said, I couldn't find anywhere in the code base > that manipulates this list. But I also noticed that the timer and clock > structures are accessed by unrelated code modules. I don't know enough > about the timer usage to predict how timers will be used by other code > modules so I decided to overprotect and see what you thought about it. > > Removing the protection of the timerlist will vastly simplify this > patch. Eventually, though, when we remove the BQL, I'm assuming we will > need to protect the timerlist somehow. I'm happy now re-factoring toward > a much simpler patch that just protects the active timer list. I think you mean "protect the list of timerlists somehow". I agree. I suppose RCU would ultimately be better than a mutex as it's the archetypal read lots write almost never list. I suppose I somewhat grudgingly see the point! If you want to do that this time round, I'd suggest you break it into two patches. >> 4. In timerlist_notify(), you are holding the rcu_read_lock across >> the callback or qemu_notify_event. Why is this necessary? EG can't you >> do something like: >> >> void timerlist_notify(QEMUTimerList *timer_list) >> { >> rcu_read_lock(); >> if (atomic_rcu_read(&timer_list->notify_cb)) { >> QEMUTimerListNotifyCB *notify_cb = timer_list->notify_cb; >> void *notify_opaque = timer_list->notify_opaque; >> rcu_read_unlock(); >> notify_cb(notify_opaque); >> } else { >> rcu_read_unlock(); >> qemu_notify_event(); >> } >> } > > I thought a lot about this one and went back and forth a couple of > times, so I'm glad you are looking at it. My concern is that the timer > could be reclaimed during execution ofthe callback. And, the callback > may be referincing the timer itself. Even if the callback takes an > rcu_read_lock, there is a window between when the timer code leaves the > rcu critical section and calls the notification code, and when the > notification callback enters the rcu critical section via > rcu_read_lock. It may be possible though unlikely that a reclaim could > occur during this small window. Fair enough. > The downside, as far as I know, is that the notification callback will > take a long time to execute. This is not good, but it won't cause an > error by itself. Agree. >> 5. Similarly to the above, in qemu_clock_notify, you take the >> rcu_read_lock, and then do a QLIST_FOREACH_RCU. That will mean >> the rcu_read_lock is taken anyway in 99% of calls to timerlist_notify. >> This is going through the list of timerlists, and that list is in >> practice static (protected by the BQL). Similarly in >> qemu_clock_deadline_ns_all, timerlist_get_clock, etc. > > My thinking here is that rcu_read_lock can be called recursively with no > bad effect other than possibly taking up cache space. So when you have a > function like this that is usually called as an internal function but > occasionally called directly, take the rcu read lock in both > functions. I think this will be removed anyway with the timerlist > change. It will. But I suppose it isn't a great concern. -- Alex Bligh ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU 2014-02-15 10:23 ` Alex Bligh 2014-02-15 20:33 ` Mike Day @ 2014-02-17 16:13 ` Mike Day 2014-02-17 16:37 ` Alex Bligh 1 sibling, 1 reply; 10+ messages in thread From: Mike Day @ 2014-02-17 16:13 UTC (permalink / raw) To: Alex Bligh; +Cc: Paolo Bonzini, qemu-devel@nongnu.org, Peter Maydell > 1. You seem to be removing the use of the active_timers_lock and replacing it by > rcu (fine). However, you seem to have left the qemu_mutex_destroy in > timerlist_free, and left the mutex in QEMUTimerList. Any reason why we need both? > I responded incorrectly to this yesterday. We still need the mutex here (active_timers_lock) to provide synchronization for list updates. The difference now is that we don't need to hold the mutex for traversing the list. But to update the list we still need to hold the mutex. Mike ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU 2014-02-17 16:13 ` Mike Day @ 2014-02-17 16:37 ` Alex Bligh 0 siblings, 0 replies; 10+ messages in thread From: Alex Bligh @ 2014-02-17 16:37 UTC (permalink / raw) To: Mike Day; +Cc: Paolo Bonzini, qemu-devel@nongnu.org, Alex Bligh, Peter Maydell On 17 Feb 2014, at 16:13, Mike Day wrote: >> 1. You seem to be removing the use of the active_timers_lock and replacing it by >> rcu (fine). However, you seem to have left the qemu_mutex_destroy in >> timerlist_free, and left the mutex in QEMUTimerList. Any reason why we need both? >> > > I responded incorrectly to this yesterday. We still need the mutex > here (active_timers_lock) to provide synchronization for list updates. > The difference now is that we don't need to hold the mutex for > traversing the list. But to update the list we still need to hold the > mutex. Of course, my bad. -- Alex Bligh ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2014-02-17 16:37 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2014-02-12 19:09 [Qemu-devel] [PATCH] [RFC] Convert Qemu Timer List and Active Timers to RCU Mike Day 2014-02-13 9:11 ` Alex Bligh 2014-02-13 9:25 ` Paolo Bonzini 2014-02-13 12:06 ` Mike Day 2014-02-13 12:57 ` Alex Bligh 2014-02-15 10:23 ` Alex Bligh 2014-02-15 20:33 ` Mike Day 2014-02-15 22:00 ` Alex Bligh 2014-02-17 16:13 ` Mike Day 2014-02-17 16:37 ` Alex Bligh
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).