* [PATCH v2 1/3] ocfs2: Switch osb->disable_recovery to enum
2025-04-22 12:41 [PATCH v2 0/3] ocfs2: Fix deadlocks in quota recovery Jan Kara
@ 2025-04-22 12:41 ` Jan Kara
2025-04-24 0:59 ` Joseph Qi
2025-04-22 12:41 ` [PATCH v2 2/3] ocfs2: Implement handshaking with ocfs2 recovery thread Jan Kara
2025-04-22 12:41 ` [PATCH v2 3/3] ocfs2: Stop quota recovery before disabling quotas Jan Kara
2 siblings, 1 reply; 9+ messages in thread
From: Jan Kara @ 2025-04-22 12:41 UTC (permalink / raw)
To: Joseph Qi; +Cc: ocfs2-devel, Heming Zhao, Murad Masimov, Shichangkuo, Jan Kara
We will need more recovery states than just pure enable / disable to fix
deadlocks with quota recovery. Switch osb->disable_recovery to enum.
Reviewed-by: Heming Zhao <heming.zhao@suse.com>
Tested-by: Heming Zhao <heming.zhao@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
fs/ocfs2/journal.c | 14 ++++++++------
fs/ocfs2/ocfs2.h | 7 ++++++-
2 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f1b4b3e611cb..8986e813ab2c 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
struct ocfs2_recovery_map *rm;
mutex_init(&osb->recovery_lock);
- osb->disable_recovery = 0;
+ osb->recovery_state = OCFS2_REC_ENABLED;
osb->recovery_thread_task = NULL;
init_waitqueue_head(&osb->recovery_event);
@@ -206,7 +206,7 @@ void ocfs2_recovery_exit(struct ocfs2_super *osb)
/* disable any new recovery threads and wait for any currently
* running ones to exit. Do this before setting the vol_state. */
mutex_lock(&osb->recovery_lock);
- osb->disable_recovery = 1;
+ osb->recovery_state = OCFS2_REC_DISABLED;
mutex_unlock(&osb->recovery_lock);
wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
@@ -1582,14 +1582,16 @@ static int __ocfs2_recovery_thread(void *arg)
void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
{
+ int was_set = -1;
+
mutex_lock(&osb->recovery_lock);
+ if (osb->recovery_state < OCFS2_REC_DISABLED)
+ was_set = ocfs2_recovery_map_set(osb, node_num);
trace_ocfs2_recovery_thread(node_num, osb->node_num,
- osb->disable_recovery, osb->recovery_thread_task,
- osb->disable_recovery ?
- -1 : ocfs2_recovery_map_set(osb, node_num));
+ osb->recovery_state, osb->recovery_thread_task, was_set);
- if (osb->disable_recovery)
+ if (osb->recovery_state == OCFS2_REC_DISABLED)
goto out;
if (osb->recovery_thread_task)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 51c52768132d..e713361939f0 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -308,6 +308,11 @@ enum ocfs2_journal_trigger_type {
void ocfs2_initialize_journal_triggers(struct super_block *sb,
struct ocfs2_triggers triggers[]);
+enum ocfs2_recovery_state {
+ OCFS2_REC_ENABLED = 0,
+ OCFS2_REC_DISABLED,
+};
+
struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
@@ -370,7 +375,7 @@ struct ocfs2_super
struct ocfs2_recovery_map *recovery_map;
struct ocfs2_replay_map *replay_map;
struct task_struct *recovery_thread_task;
- int disable_recovery;
+ enum ocfs2_recovery_state recovery_state;
wait_queue_head_t checkpoint_event;
struct ocfs2_journal *journal;
unsigned long osb_commit_interval;
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH v2 1/3] ocfs2: Switch osb->disable_recovery to enum
2025-04-22 12:41 ` [PATCH v2 1/3] ocfs2: Switch osb->disable_recovery to enum Jan Kara
@ 2025-04-24 0:59 ` Joseph Qi
2025-04-24 13:42 ` Jan Kara
0 siblings, 1 reply; 9+ messages in thread
From: Joseph Qi @ 2025-04-24 0:59 UTC (permalink / raw)
To: Jan Kara, akpm; +Cc: ocfs2-devel, Heming Zhao, Murad Masimov, Shichangkuo
On 2025/4/22 20:41, Jan Kara wrote:
> We will need more recovery states than just pure enable / disable to fix
> deadlocks with quota recovery. Switch osb->disable_recovery to enum.
>
> Reviewed-by: Heming Zhao <heming.zhao@suse.com>
> Tested-by: Heming Zhao <heming.zhao@suse.com>
> Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
> ---
> fs/ocfs2/journal.c | 14 ++++++++------
> fs/ocfs2/ocfs2.h | 7 ++++++-
> 2 files changed, 14 insertions(+), 7 deletions(-)
>
> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
> index f1b4b3e611cb..8986e813ab2c 100644
> --- a/fs/ocfs2/journal.c
> +++ b/fs/ocfs2/journal.c
> @@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
> struct ocfs2_recovery_map *rm;
>
> mutex_init(&osb->recovery_lock);
> - osb->disable_recovery = 0;
> + osb->recovery_state = OCFS2_REC_ENABLED;
> osb->recovery_thread_task = NULL;
> init_waitqueue_head(&osb->recovery_event);
>
> @@ -206,7 +206,7 @@ void ocfs2_recovery_exit(struct ocfs2_super *osb)
> /* disable any new recovery threads and wait for any currently
> * running ones to exit. Do this before setting the vol_state. */
> mutex_lock(&osb->recovery_lock);
> - osb->disable_recovery = 1;
> + osb->recovery_state = OCFS2_REC_DISABLED;
> mutex_unlock(&osb->recovery_lock);
> wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
>
> @@ -1582,14 +1582,16 @@ static int __ocfs2_recovery_thread(void *arg)
>
> void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
> {
> + int was_set = -1;
> +
> mutex_lock(&osb->recovery_lock);
> + if (osb->recovery_state < OCFS2_REC_DISABLED)
> + was_set = ocfs2_recovery_map_set(osb, node_num);
>
> trace_ocfs2_recovery_thread(node_num, osb->node_num,
> - osb->disable_recovery, osb->recovery_thread_task,
> - osb->disable_recovery ?
> - -1 : ocfs2_recovery_map_set(osb, node_num));
> + osb->recovery_state, osb->recovery_thread_task, was_set);
>
> - if (osb->disable_recovery)
> + if (osb->recovery_state == OCFS2_REC_DISABLED)
> goto out;
>
> if (osb->recovery_thread_task)
> diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
> index 51c52768132d..e713361939f0 100644
> --- a/fs/ocfs2/ocfs2.h
> +++ b/fs/ocfs2/ocfs2.h
> @@ -308,6 +308,11 @@ enum ocfs2_journal_trigger_type {
> void ocfs2_initialize_journal_triggers(struct super_block *sb,
> struct ocfs2_triggers triggers[]);
>
> +enum ocfs2_recovery_state {
> + OCFS2_REC_ENABLED = 0,
> + OCFS2_REC_DISABLED,
> +};
> +
> struct ocfs2_journal;
> struct ocfs2_slot_info;
> struct ocfs2_recovery_map;
> @@ -370,7 +375,7 @@ struct ocfs2_super
> struct ocfs2_recovery_map *recovery_map;
> struct ocfs2_replay_map *replay_map;
> struct task_struct *recovery_thread_task;
> - int disable_recovery;
> + enum ocfs2_recovery_state recovery_state;
> wait_queue_head_t checkpoint_event;
> struct ocfs2_journal *journal;
> unsigned long osb_commit_interval;
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH v2 1/3] ocfs2: Switch osb->disable_recovery to enum
2025-04-24 0:59 ` Joseph Qi
@ 2025-04-24 13:42 ` Jan Kara
0 siblings, 0 replies; 9+ messages in thread
From: Jan Kara @ 2025-04-24 13:42 UTC (permalink / raw)
To: Joseph Qi
Cc: Jan Kara, akpm, ocfs2-devel, Heming Zhao, Murad Masimov,
Shichangkuo
On Thu 24-04-25 08:59:59, Joseph Qi wrote:
>
>
> On 2025/4/22 20:41, Jan Kara wrote:
> > We will need more recovery states than just pure enable / disable to fix
> > deadlocks with quota recovery. Switch osb->disable_recovery to enum.
> >
> > Reviewed-by: Heming Zhao <heming.zhao@suse.com>
> > Tested-by: Heming Zhao <heming.zhao@suse.com>
> > Signed-off-by: Jan Kara <jack@suse.cz>
>
> Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Thanks for review Joseph! It seems Andrew is still merging patches for
OCFS2, doesn't he? So I'll send the patches to him.
Honza
> > ---
> > fs/ocfs2/journal.c | 14 ++++++++------
> > fs/ocfs2/ocfs2.h | 7 ++++++-
> > 2 files changed, 14 insertions(+), 7 deletions(-)
> >
> > diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
> > index f1b4b3e611cb..8986e813ab2c 100644
> > --- a/fs/ocfs2/journal.c
> > +++ b/fs/ocfs2/journal.c
> > @@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
> > struct ocfs2_recovery_map *rm;
> >
> > mutex_init(&osb->recovery_lock);
> > - osb->disable_recovery = 0;
> > + osb->recovery_state = OCFS2_REC_ENABLED;
> > osb->recovery_thread_task = NULL;
> > init_waitqueue_head(&osb->recovery_event);
> >
> > @@ -206,7 +206,7 @@ void ocfs2_recovery_exit(struct ocfs2_super *osb)
> > /* disable any new recovery threads and wait for any currently
> > * running ones to exit. Do this before setting the vol_state. */
> > mutex_lock(&osb->recovery_lock);
> > - osb->disable_recovery = 1;
> > + osb->recovery_state = OCFS2_REC_DISABLED;
> > mutex_unlock(&osb->recovery_lock);
> > wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
> >
> > @@ -1582,14 +1582,16 @@ static int __ocfs2_recovery_thread(void *arg)
> >
> > void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
> > {
> > + int was_set = -1;
> > +
> > mutex_lock(&osb->recovery_lock);
> > + if (osb->recovery_state < OCFS2_REC_DISABLED)
> > + was_set = ocfs2_recovery_map_set(osb, node_num);
> >
> > trace_ocfs2_recovery_thread(node_num, osb->node_num,
> > - osb->disable_recovery, osb->recovery_thread_task,
> > - osb->disable_recovery ?
> > - -1 : ocfs2_recovery_map_set(osb, node_num));
> > + osb->recovery_state, osb->recovery_thread_task, was_set);
> >
> > - if (osb->disable_recovery)
> > + if (osb->recovery_state == OCFS2_REC_DISABLED)
> > goto out;
> >
> > if (osb->recovery_thread_task)
> > diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
> > index 51c52768132d..e713361939f0 100644
> > --- a/fs/ocfs2/ocfs2.h
> > +++ b/fs/ocfs2/ocfs2.h
> > @@ -308,6 +308,11 @@ enum ocfs2_journal_trigger_type {
> > void ocfs2_initialize_journal_triggers(struct super_block *sb,
> > struct ocfs2_triggers triggers[]);
> >
> > +enum ocfs2_recovery_state {
> > + OCFS2_REC_ENABLED = 0,
> > + OCFS2_REC_DISABLED,
> > +};
> > +
> > struct ocfs2_journal;
> > struct ocfs2_slot_info;
> > struct ocfs2_recovery_map;
> > @@ -370,7 +375,7 @@ struct ocfs2_super
> > struct ocfs2_recovery_map *recovery_map;
> > struct ocfs2_replay_map *replay_map;
> > struct task_struct *recovery_thread_task;
> > - int disable_recovery;
> > + enum ocfs2_recovery_state recovery_state;
> > wait_queue_head_t checkpoint_event;
> > struct ocfs2_journal *journal;
> > unsigned long osb_commit_interval;
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 2/3] ocfs2: Implement handshaking with ocfs2 recovery thread
2025-04-22 12:41 [PATCH v2 0/3] ocfs2: Fix deadlocks in quota recovery Jan Kara
2025-04-22 12:41 ` [PATCH v2 1/3] ocfs2: Switch osb->disable_recovery to enum Jan Kara
@ 2025-04-22 12:41 ` Jan Kara
2025-04-22 14:45 ` Heming Zhao
2025-04-22 12:41 ` [PATCH v2 3/3] ocfs2: Stop quota recovery before disabling quotas Jan Kara
2 siblings, 1 reply; 9+ messages in thread
From: Jan Kara @ 2025-04-22 12:41 UTC (permalink / raw)
To: Joseph Qi; +Cc: ocfs2-devel, Heming Zhao, Murad Masimov, Shichangkuo, Jan Kara
We will need ocfs2 recovery thread to acknowledge transitions of
recovery_state when disabling particular types of recovery. This is
similar to what currently happens when disabling recovery completely,
just more general. Implement the handshake and use it for exit from
recovery.
Signed-off-by: Jan Kara <jack@suse.cz>
---
fs/ocfs2/journal.c | 52 +++++++++++++++++++++++++++++++---------------
fs/ocfs2/ocfs2.h | 4 ++++
2 files changed, 39 insertions(+), 17 deletions(-)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 8986e813ab2c..7511db5a46cd 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -190,31 +190,48 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
return 0;
}
-/* we can't grab the goofy sem lock from inside wait_event, so we use
- * memory barriers to make sure that we'll see the null task before
- * being woken up */
static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
{
- mb();
return osb->recovery_thread_task != NULL;
}
-void ocfs2_recovery_exit(struct ocfs2_super *osb)
+static void ocfs2_recovery_disable(struct ocfs2_super *osb,
+ enum ocfs2_recovery_state state)
{
- struct ocfs2_recovery_map *rm;
-
- /* disable any new recovery threads and wait for any currently
- * running ones to exit. Do this before setting the vol_state. */
mutex_lock(&osb->recovery_lock);
- osb->recovery_state = OCFS2_REC_DISABLED;
+ /*
+ * If recovery thread is not running, we can directly transition to
+ * final state.
+ */
+ if (!ocfs2_recovery_thread_running(osb)) {
+ osb->recovery_state = state + 1;
+ goto out_lock;
+ }
+ osb->recovery_state = state;
+ /* Wait for recovery thread to acknowledge state transition */
+ wait_event_cmd(osb->recovery_event,
+ !ocfs2_recovery_thread_running(osb) ||
+ osb->recovery_state >= state + 1,
+ mutex_unlock(&osb->recovery_lock),
+ mutex_lock(&osb->recovery_lock));
+out_lock:
mutex_unlock(&osb->recovery_lock);
- wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
- /* At this point, we know that no more recovery threads can be
- * launched, so wait for any recovery completion work to
- * complete. */
+ /*
+ * At this point we know that no more recovery work can be queued so
+ * wait for any recovery completion work to complete.
+ */
if (osb->ocfs2_wq)
flush_workqueue(osb->ocfs2_wq);
+}
+
+void ocfs2_recovery_exit(struct ocfs2_super *osb)
+{
+ struct ocfs2_recovery_map *rm;
+
+ /* disable any new recovery threads and wait for any currently
+ * running ones to exit. Do this before setting the vol_state. */
+ ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE);
/*
* Now that recovery is shut down, and the osb is about to be
@@ -1569,7 +1586,8 @@ static int __ocfs2_recovery_thread(void *arg)
ocfs2_free_replay_slots(osb);
osb->recovery_thread_task = NULL;
- mb(); /* sync with ocfs2_recovery_thread_running */
+ if (osb->recovery_state == OCFS2_REC_WANT_DISABLE)
+ osb->recovery_state = OCFS2_REC_DISABLED;
wake_up(&osb->recovery_event);
mutex_unlock(&osb->recovery_lock);
@@ -1585,13 +1603,13 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
int was_set = -1;
mutex_lock(&osb->recovery_lock);
- if (osb->recovery_state < OCFS2_REC_DISABLED)
+ if (osb->recovery_state < OCFS2_REC_WANT_DISABLE)
was_set = ocfs2_recovery_map_set(osb, node_num);
trace_ocfs2_recovery_thread(node_num, osb->node_num,
osb->recovery_state, osb->recovery_thread_task, was_set);
- if (osb->recovery_state == OCFS2_REC_DISABLED)
+ if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE)
goto out;
if (osb->recovery_thread_task)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index e713361939f0..70b2d5c8c228 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -310,6 +310,10 @@ void ocfs2_initialize_journal_triggers(struct super_block *sb,
enum ocfs2_recovery_state {
OCFS2_REC_ENABLED = 0,
+ OCFS2_REC_WANT_DISABLE,
+ /*
+ * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work
+ */
OCFS2_REC_DISABLED,
};
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH v2 2/3] ocfs2: Implement handshaking with ocfs2 recovery thread
2025-04-22 12:41 ` [PATCH v2 2/3] ocfs2: Implement handshaking with ocfs2 recovery thread Jan Kara
@ 2025-04-22 14:45 ` Heming Zhao
2025-04-24 1:03 ` Joseph Qi
0 siblings, 1 reply; 9+ messages in thread
From: Heming Zhao @ 2025-04-22 14:45 UTC (permalink / raw)
To: Jan Kara, Joseph Qi; +Cc: ocfs2-devel, Murad Masimov, Shichangkuo
On 4/22/25 20:41, Jan Kara wrote:
> We will need ocfs2 recovery thread to acknowledge transitions of
> recovery_state when disabling particular types of recovery. This is
> similar to what currently happens when disabling recovery completely,
> just more general. Implement the handshake and use it for exit from
> recovery.
>
> Signed-off-by: Jan Kara <jack@suse.cz>
Thanks for the v2 patch, and I got your code meaning.
I also tested the new patch.
Reviewed-by: Heming Zhao <heming.zhao@suse.com>
Tested-by: Heming Zhao <heming.zhao@suse.com>
> ---
> fs/ocfs2/journal.c | 52 +++++++++++++++++++++++++++++++---------------
> fs/ocfs2/ocfs2.h | 4 ++++
> 2 files changed, 39 insertions(+), 17 deletions(-)
>
> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
> index 8986e813ab2c..7511db5a46cd 100644
> --- a/fs/ocfs2/journal.c
> +++ b/fs/ocfs2/journal.c
> @@ -190,31 +190,48 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
> return 0;
> }
>
> -/* we can't grab the goofy sem lock from inside wait_event, so we use
> - * memory barriers to make sure that we'll see the null task before
> - * being woken up */
> static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
> {
> - mb();
> return osb->recovery_thread_task != NULL;
> }
>
> -void ocfs2_recovery_exit(struct ocfs2_super *osb)
> +static void ocfs2_recovery_disable(struct ocfs2_super *osb,
> + enum ocfs2_recovery_state state)
> {
> - struct ocfs2_recovery_map *rm;
> -
> - /* disable any new recovery threads and wait for any currently
> - * running ones to exit. Do this before setting the vol_state. */
> mutex_lock(&osb->recovery_lock);
> - osb->recovery_state = OCFS2_REC_DISABLED;
> + /*
> + * If recovery thread is not running, we can directly transition to
> + * final state.
> + */
> + if (!ocfs2_recovery_thread_running(osb)) {
> + osb->recovery_state = state + 1;
> + goto out_lock;
> + }
> + osb->recovery_state = state;
> + /* Wait for recovery thread to acknowledge state transition */
> + wait_event_cmd(osb->recovery_event,
> + !ocfs2_recovery_thread_running(osb) ||
> + osb->recovery_state >= state + 1,
> + mutex_unlock(&osb->recovery_lock),
> + mutex_lock(&osb->recovery_lock));
> +out_lock:
> mutex_unlock(&osb->recovery_lock);
> - wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
>
> - /* At this point, we know that no more recovery threads can be
> - * launched, so wait for any recovery completion work to
> - * complete. */
> + /*
> + * At this point we know that no more recovery work can be queued so
> + * wait for any recovery completion work to complete.
> + */
> if (osb->ocfs2_wq)
> flush_workqueue(osb->ocfs2_wq);
> +}
> +
> +void ocfs2_recovery_exit(struct ocfs2_super *osb)
> +{
> + struct ocfs2_recovery_map *rm;
> +
> + /* disable any new recovery threads and wait for any currently
> + * running ones to exit. Do this before setting the vol_state. */
> + ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE);
>
> /*
> * Now that recovery is shut down, and the osb is about to be
> @@ -1569,7 +1586,8 @@ static int __ocfs2_recovery_thread(void *arg)
>
> ocfs2_free_replay_slots(osb);
> osb->recovery_thread_task = NULL;
> - mb(); /* sync with ocfs2_recovery_thread_running */
> + if (osb->recovery_state == OCFS2_REC_WANT_DISABLE)
> + osb->recovery_state = OCFS2_REC_DISABLED;
> wake_up(&osb->recovery_event);
>
> mutex_unlock(&osb->recovery_lock);
> @@ -1585,13 +1603,13 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
> int was_set = -1;
>
> mutex_lock(&osb->recovery_lock);
> - if (osb->recovery_state < OCFS2_REC_DISABLED)
> + if (osb->recovery_state < OCFS2_REC_WANT_DISABLE)
> was_set = ocfs2_recovery_map_set(osb, node_num);
>
> trace_ocfs2_recovery_thread(node_num, osb->node_num,
> osb->recovery_state, osb->recovery_thread_task, was_set);
>
> - if (osb->recovery_state == OCFS2_REC_DISABLED)
> + if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE)
> goto out;
>
> if (osb->recovery_thread_task)
> diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
> index e713361939f0..70b2d5c8c228 100644
> --- a/fs/ocfs2/ocfs2.h
> +++ b/fs/ocfs2/ocfs2.h
> @@ -310,6 +310,10 @@ void ocfs2_initialize_journal_triggers(struct super_block *sb,
>
> enum ocfs2_recovery_state {
> OCFS2_REC_ENABLED = 0,
> + OCFS2_REC_WANT_DISABLE,
> + /*
> + * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work
> + */
> OCFS2_REC_DISABLED,
> };
>
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH v2 2/3] ocfs2: Implement handshaking with ocfs2 recovery thread
2025-04-22 14:45 ` Heming Zhao
@ 2025-04-24 1:03 ` Joseph Qi
0 siblings, 0 replies; 9+ messages in thread
From: Joseph Qi @ 2025-04-24 1:03 UTC (permalink / raw)
To: Heming Zhao, Jan Kara, akpm; +Cc: ocfs2-devel, Murad Masimov, Shichangkuo
On 2025/4/22 22:45, Heming Zhao wrote:
> On 4/22/25 20:41, Jan Kara wrote:
>> We will need ocfs2 recovery thread to acknowledge transitions of
>> recovery_state when disabling particular types of recovery. This is
>> similar to what currently happens when disabling recovery completely,
>> just more general. Implement the handshake and use it for exit from
>> recovery.
>>
>> Signed-off-by: Jan Kara <jack@suse.cz>
>
> Thanks for the v2 patch, and I got your code meaning.
> I also tested the new patch.
>
> Reviewed-by: Heming Zhao <heming.zhao@suse.com>
> Tested-by: Heming Zhao <heming.zhao@suse.com>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
>
>> ---
>> fs/ocfs2/journal.c | 52 +++++++++++++++++++++++++++++++---------------
>> fs/ocfs2/ocfs2.h | 4 ++++
>> 2 files changed, 39 insertions(+), 17 deletions(-)
>>
>> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
>> index 8986e813ab2c..7511db5a46cd 100644
>> --- a/fs/ocfs2/journal.c
>> +++ b/fs/ocfs2/journal.c
>> @@ -190,31 +190,48 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
>> return 0;
>> }
>> -/* we can't grab the goofy sem lock from inside wait_event, so we use
>> - * memory barriers to make sure that we'll see the null task before
>> - * being woken up */
>> static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
>> {
>> - mb();
>> return osb->recovery_thread_task != NULL;
>> }
>> -void ocfs2_recovery_exit(struct ocfs2_super *osb)
>> +static void ocfs2_recovery_disable(struct ocfs2_super *osb,
>> + enum ocfs2_recovery_state state)
>> {
>> - struct ocfs2_recovery_map *rm;
>> -
>> - /* disable any new recovery threads and wait for any currently
>> - * running ones to exit. Do this before setting the vol_state. */
>> mutex_lock(&osb->recovery_lock);
>> - osb->recovery_state = OCFS2_REC_DISABLED;
>> + /*
>> + * If recovery thread is not running, we can directly transition to
>> + * final state.
>> + */
>> + if (!ocfs2_recovery_thread_running(osb)) {
>> + osb->recovery_state = state + 1;
>> + goto out_lock;
>> + }
>> + osb->recovery_state = state;
>> + /* Wait for recovery thread to acknowledge state transition */
>> + wait_event_cmd(osb->recovery_event,
>> + !ocfs2_recovery_thread_running(osb) ||
>> + osb->recovery_state >= state + 1,
>> + mutex_unlock(&osb->recovery_lock),
>> + mutex_lock(&osb->recovery_lock));
>> +out_lock:
>> mutex_unlock(&osb->recovery_lock);
>> - wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
>> - /* At this point, we know that no more recovery threads can be
>> - * launched, so wait for any recovery completion work to
>> - * complete. */
>> + /*
>> + * At this point we know that no more recovery work can be queued so
>> + * wait for any recovery completion work to complete.
>> + */
>> if (osb->ocfs2_wq)
>> flush_workqueue(osb->ocfs2_wq);
>> +}
>> +
>> +void ocfs2_recovery_exit(struct ocfs2_super *osb)
>> +{
>> + struct ocfs2_recovery_map *rm;
>> +
>> + /* disable any new recovery threads and wait for any currently
>> + * running ones to exit. Do this before setting the vol_state. */
>> + ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE);
>> /*
>> * Now that recovery is shut down, and the osb is about to be
>> @@ -1569,7 +1586,8 @@ static int __ocfs2_recovery_thread(void *arg)
>> ocfs2_free_replay_slots(osb);
>> osb->recovery_thread_task = NULL;
>> - mb(); /* sync with ocfs2_recovery_thread_running */
>> + if (osb->recovery_state == OCFS2_REC_WANT_DISABLE)
>> + osb->recovery_state = OCFS2_REC_DISABLED;
>> wake_up(&osb->recovery_event);
>> mutex_unlock(&osb->recovery_lock);
>> @@ -1585,13 +1603,13 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
>> int was_set = -1;
>> mutex_lock(&osb->recovery_lock);
>> - if (osb->recovery_state < OCFS2_REC_DISABLED)
>> + if (osb->recovery_state < OCFS2_REC_WANT_DISABLE)
>> was_set = ocfs2_recovery_map_set(osb, node_num);
>> trace_ocfs2_recovery_thread(node_num, osb->node_num,
>> osb->recovery_state, osb->recovery_thread_task, was_set);
>> - if (osb->recovery_state == OCFS2_REC_DISABLED)
>> + if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE)
>> goto out;
>> if (osb->recovery_thread_task)
>> diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
>> index e713361939f0..70b2d5c8c228 100644
>> --- a/fs/ocfs2/ocfs2.h
>> +++ b/fs/ocfs2/ocfs2.h
>> @@ -310,6 +310,10 @@ void ocfs2_initialize_journal_triggers(struct super_block *sb,
>> enum ocfs2_recovery_state {
>> OCFS2_REC_ENABLED = 0,
>> + OCFS2_REC_WANT_DISABLE,
>> + /*
>> + * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work
>> + */
>> OCFS2_REC_DISABLED,
>> };
>>
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2 3/3] ocfs2: Stop quota recovery before disabling quotas
2025-04-22 12:41 [PATCH v2 0/3] ocfs2: Fix deadlocks in quota recovery Jan Kara
2025-04-22 12:41 ` [PATCH v2 1/3] ocfs2: Switch osb->disable_recovery to enum Jan Kara
2025-04-22 12:41 ` [PATCH v2 2/3] ocfs2: Implement handshaking with ocfs2 recovery thread Jan Kara
@ 2025-04-22 12:41 ` Jan Kara
2025-04-24 1:05 ` Joseph Qi
2 siblings, 1 reply; 9+ messages in thread
From: Jan Kara @ 2025-04-22 12:41 UTC (permalink / raw)
To: Joseph Qi; +Cc: ocfs2-devel, Heming Zhao, Murad Masimov, Shichangkuo, Jan Kara
Currently quota recovery is synchronized with unmount using sb->s_umount
semaphore. That is however prone to deadlocks because
flush_workqueue(osb->ocfs2_wq) called from umount code can wait for
quota recovery to complete while ocfs2_finish_quota_recovery() waits
for sb->s_umount semaphore.
Grabbing of sb->s_umount semaphore in ocfs2_finish_quota_recovery() is
only needed to protect that function from disabling of quotas from
ocfs2_dismount_volume(). Handle this problem by disabling quota recovery
early during unmount in ocfs2_dismount_volume() instead so that we can
drop acquisition of sb->s_umount from ocfs2_finish_quota_recovery().
Reported-by: Shichangkuo <shi.changkuo@h3c.com>
Reported-by: Murad Masimov <m.masimov@mt-integration.ru>
Reviewed-by: Heming Zhao <heming.zhao@suse.com>
Tested-by: Heming Zhao <heming.zhao@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
fs/ocfs2/journal.c | 20 ++++++++++++++++++--
fs/ocfs2/journal.h | 1 +
fs/ocfs2/ocfs2.h | 6 ++++++
fs/ocfs2/quota_local.c | 9 ++-------
fs/ocfs2/super.c | 3 +++
5 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 7511db5a46cd..f37831d5f95a 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -225,6 +225,11 @@ static void ocfs2_recovery_disable(struct ocfs2_super *osb,
flush_workqueue(osb->ocfs2_wq);
}
+void ocfs2_recovery_disable_quota(struct ocfs2_super *osb)
+{
+ ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE);
+}
+
void ocfs2_recovery_exit(struct ocfs2_super *osb)
{
struct ocfs2_recovery_map *rm;
@@ -1489,6 +1494,18 @@ static int __ocfs2_recovery_thread(void *arg)
}
}
restart:
+ if (quota_enabled) {
+ mutex_lock(&osb->recovery_lock);
+ /* Confirm that recovery thread will no longer recover quotas */
+ if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) {
+ osb->recovery_state = OCFS2_REC_QUOTA_DISABLED;
+ wake_up(&osb->recovery_event);
+ }
+ if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED)
+ quota_enabled = 0;
+ mutex_unlock(&osb->recovery_lock);
+ }
+
status = ocfs2_super_lock(osb, 1);
if (status < 0) {
mlog_errno(status);
@@ -1592,8 +1609,7 @@ static int __ocfs2_recovery_thread(void *arg)
mutex_unlock(&osb->recovery_lock);
- if (quota_enabled)
- kfree(rm_quota);
+ kfree(rm_quota);
return status;
}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index e3c3a35dc5e0..6397170f302f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
int ocfs2_recovery_init(struct ocfs2_super *osb);
void ocfs2_recovery_exit(struct ocfs2_super *osb);
+void ocfs2_recovery_disable_quota(struct ocfs2_super *osb);
int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
void ocfs2_free_replay_slots(struct ocfs2_super *osb);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 70b2d5c8c228..6aaa94c554c1 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -310,6 +310,12 @@ void ocfs2_initialize_journal_triggers(struct super_block *sb,
enum ocfs2_recovery_state {
OCFS2_REC_ENABLED = 0,
+ OCFS2_REC_QUOTA_WANT_DISABLE,
+ /*
+ * Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for
+ * ocfs2_recovery_disable_quota() to work.
+ */
+ OCFS2_REC_QUOTA_DISABLED,
OCFS2_REC_WANT_DISABLE,
/*
* Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 2956d888c131..e272429da3db 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -453,8 +453,7 @@ struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
/* Sync changes in local quota file into global quota file and
* reinitialize local quota file.
- * The function expects local quota file to be already locked and
- * s_umount locked in shared mode. */
+ * The function expects local quota file to be already locked. */
static int ocfs2_recover_local_quota_file(struct inode *lqinode,
int type,
struct ocfs2_quota_recovery *rec)
@@ -588,7 +587,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
{
unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
LOCAL_GROUP_QUOTA_SYSTEM_INODE };
- struct super_block *sb = osb->sb;
struct ocfs2_local_disk_dqinfo *ldinfo;
struct buffer_head *bh;
handle_t *handle;
@@ -600,7 +598,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for "
"slot %u\n", osb->dev_str, slot_num);
- down_read(&sb->s_umount);
for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
if (list_empty(&(rec->r_list[type])))
continue;
@@ -677,7 +674,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
break;
}
out:
- up_read(&sb->s_umount);
kfree(rec);
return status;
}
@@ -843,8 +839,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
/*
- * s_umount held in exclusive mode protects us against racing with
- * recovery thread...
+ * ocfs2_dismount_volume() has already aborted quota recovery...
*/
if (oinfo->dqi_rec) {
ocfs2_free_quota_recovery(oinfo->dqi_rec);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8bb5022f3082..3d2533950bae 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1812,6 +1812,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
/* Orphan scan should be stopped as early as possible */
ocfs2_orphan_scan_stop(osb);
+ /* Stop quota recovery so that we can disable quotas */
+ ocfs2_recovery_disable_quota(osb);
+
ocfs2_disable_quotas(osb);
/* All dquots should be freed by now */
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH v2 3/3] ocfs2: Stop quota recovery before disabling quotas
2025-04-22 12:41 ` [PATCH v2 3/3] ocfs2: Stop quota recovery before disabling quotas Jan Kara
@ 2025-04-24 1:05 ` Joseph Qi
0 siblings, 0 replies; 9+ messages in thread
From: Joseph Qi @ 2025-04-24 1:05 UTC (permalink / raw)
To: Jan Kara, akpm; +Cc: ocfs2-devel, Heming Zhao, Murad Masimov, Shichangkuo
On 2025/4/22 20:41, Jan Kara wrote:
> Currently quota recovery is synchronized with unmount using sb->s_umount
> semaphore. That is however prone to deadlocks because
> flush_workqueue(osb->ocfs2_wq) called from umount code can wait for
> quota recovery to complete while ocfs2_finish_quota_recovery() waits
> for sb->s_umount semaphore.
>
> Grabbing of sb->s_umount semaphore in ocfs2_finish_quota_recovery() is
> only needed to protect that function from disabling of quotas from
> ocfs2_dismount_volume(). Handle this problem by disabling quota recovery
> early during unmount in ocfs2_dismount_volume() instead so that we can
> drop acquisition of sb->s_umount from ocfs2_finish_quota_recovery().
>
> Reported-by: Shichangkuo <shi.changkuo@h3c.com>
> Reported-by: Murad Masimov <m.masimov@mt-integration.ru>
> Reviewed-by: Heming Zhao <heming.zhao@suse.com>
> Tested-by: Heming Zhao <heming.zhao@suse.com>
> Signed-off-by: Jan Kara <jack@suse.cz>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
> ---
> fs/ocfs2/journal.c | 20 ++++++++++++++++++--
> fs/ocfs2/journal.h | 1 +
> fs/ocfs2/ocfs2.h | 6 ++++++
> fs/ocfs2/quota_local.c | 9 ++-------
> fs/ocfs2/super.c | 3 +++
> 5 files changed, 30 insertions(+), 9 deletions(-)
>
> diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
> index 7511db5a46cd..f37831d5f95a 100644
> --- a/fs/ocfs2/journal.c
> +++ b/fs/ocfs2/journal.c
> @@ -225,6 +225,11 @@ static void ocfs2_recovery_disable(struct ocfs2_super *osb,
> flush_workqueue(osb->ocfs2_wq);
> }
>
> +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb)
> +{
> + ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE);
> +}
> +
> void ocfs2_recovery_exit(struct ocfs2_super *osb)
> {
> struct ocfs2_recovery_map *rm;
> @@ -1489,6 +1494,18 @@ static int __ocfs2_recovery_thread(void *arg)
> }
> }
> restart:
> + if (quota_enabled) {
> + mutex_lock(&osb->recovery_lock);
> + /* Confirm that recovery thread will no longer recover quotas */
> + if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) {
> + osb->recovery_state = OCFS2_REC_QUOTA_DISABLED;
> + wake_up(&osb->recovery_event);
> + }
> + if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED)
> + quota_enabled = 0;
> + mutex_unlock(&osb->recovery_lock);
> + }
> +
> status = ocfs2_super_lock(osb, 1);
> if (status < 0) {
> mlog_errno(status);
> @@ -1592,8 +1609,7 @@ static int __ocfs2_recovery_thread(void *arg)
>
> mutex_unlock(&osb->recovery_lock);
>
> - if (quota_enabled)
> - kfree(rm_quota);
> + kfree(rm_quota);
>
> return status;
> }
> diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
> index e3c3a35dc5e0..6397170f302f 100644
> --- a/fs/ocfs2/journal.h
> +++ b/fs/ocfs2/journal.h
> @@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
>
> int ocfs2_recovery_init(struct ocfs2_super *osb);
> void ocfs2_recovery_exit(struct ocfs2_super *osb);
> +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb);
>
> int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
> void ocfs2_free_replay_slots(struct ocfs2_super *osb);
> diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
> index 70b2d5c8c228..6aaa94c554c1 100644
> --- a/fs/ocfs2/ocfs2.h
> +++ b/fs/ocfs2/ocfs2.h
> @@ -310,6 +310,12 @@ void ocfs2_initialize_journal_triggers(struct super_block *sb,
>
> enum ocfs2_recovery_state {
> OCFS2_REC_ENABLED = 0,
> + OCFS2_REC_QUOTA_WANT_DISABLE,
> + /*
> + * Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for
> + * ocfs2_recovery_disable_quota() to work.
> + */
> + OCFS2_REC_QUOTA_DISABLED,
> OCFS2_REC_WANT_DISABLE,
> /*
> * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work
> diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
> index 2956d888c131..e272429da3db 100644
> --- a/fs/ocfs2/quota_local.c
> +++ b/fs/ocfs2/quota_local.c
> @@ -453,8 +453,7 @@ struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
>
> /* Sync changes in local quota file into global quota file and
> * reinitialize local quota file.
> - * The function expects local quota file to be already locked and
> - * s_umount locked in shared mode. */
> + * The function expects local quota file to be already locked. */
> static int ocfs2_recover_local_quota_file(struct inode *lqinode,
> int type,
> struct ocfs2_quota_recovery *rec)
> @@ -588,7 +587,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
> {
> unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
> LOCAL_GROUP_QUOTA_SYSTEM_INODE };
> - struct super_block *sb = osb->sb;
> struct ocfs2_local_disk_dqinfo *ldinfo;
> struct buffer_head *bh;
> handle_t *handle;
> @@ -600,7 +598,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
> printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for "
> "slot %u\n", osb->dev_str, slot_num);
>
> - down_read(&sb->s_umount);
> for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
> if (list_empty(&(rec->r_list[type])))
> continue;
> @@ -677,7 +674,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
> break;
> }
> out:
> - up_read(&sb->s_umount);
> kfree(rec);
> return status;
> }
> @@ -843,8 +839,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
> ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
>
> /*
> - * s_umount held in exclusive mode protects us against racing with
> - * recovery thread...
> + * ocfs2_dismount_volume() has already aborted quota recovery...
> */
> if (oinfo->dqi_rec) {
> ocfs2_free_quota_recovery(oinfo->dqi_rec);
> diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
> index 8bb5022f3082..3d2533950bae 100644
> --- a/fs/ocfs2/super.c
> +++ b/fs/ocfs2/super.c
> @@ -1812,6 +1812,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
> /* Orphan scan should be stopped as early as possible */
> ocfs2_orphan_scan_stop(osb);
>
> + /* Stop quota recovery so that we can disable quotas */
> + ocfs2_recovery_disable_quota(osb);
> +
> ocfs2_disable_quotas(osb);
>
> /* All dquots should be freed by now */
^ permalink raw reply [flat|nested] 9+ messages in thread