From: David Howells <dhowells@redhat.com>
To: Marc Dionne <marc.dionne@auristor.com>
Cc: David Howells <dhowells@redhat.com>,
linux-afs@lists.infradead.org, linux-fsdevel@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: [PATCH 41/41] afs: Fix offline and busy handling
Date: Thu, 9 Nov 2023 15:40:04 +0000 [thread overview]
Message-ID: <20231109154004.3317227-42-dhowells@redhat.com> (raw)
In-Reply-To: <20231109154004.3317227-1-dhowells@redhat.com>
The current code assumes offline and busy volume states apply to all
instances of a volume, not just the one on the server that returned
VOFFLINE or VBUSY.
Fix that by moving the flags recording this to the afs_server_entry struct
that is used to represent a particular instance of a volume on a specific
server.
Further, add a sleep for when we have iterated through all the servers so
that we don't keep poking the server every few milliseconds.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
fs/afs/internal.h | 7 ++++---
fs/afs/rotate.c | 40 ++++++++++++++++++++++++--------------
include/trace/events/afs.h | 1 +
3 files changed, 30 insertions(+), 18 deletions(-)
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index d1031241d11b..eb59b0487f8b 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -603,6 +603,9 @@ struct afs_server_entry {
struct afs_volume *volume;
struct list_head slink; /* Link in server->volumes */
time64_t cb_expires_at; /* Time at which volume-level callback expires */
+ unsigned long flags;
+#define AFS_SE_VOLUME_OFFLINE 0 /* Set if volume offline notice given */
+#define AFS_SE_VOLUME_BUSY 1 /* Set if volume busy notice given */
};
struct afs_server_list {
@@ -636,9 +639,7 @@ struct afs_volume {
#define AFS_VOLUME_UPDATING 1 /* - T if an update is in progress */
#define AFS_VOLUME_WAIT 2 /* - T if users must wait for update */
#define AFS_VOLUME_DELETED 3 /* - T if volume appears deleted */
-#define AFS_VOLUME_OFFLINE 4 /* - T if volume offline notice given */
-#define AFS_VOLUME_BUSY 5 /* - T if volume busy notice given */
-#define AFS_VOLUME_MAYBE_NO_IBULK 6 /* - T if some servers don't have InlineBulkStatus */
+#define AFS_VOLUME_MAYBE_NO_IBULK 4 /* - T if some servers don't have InlineBulkStatus */
#ifdef CONFIG_AFS_FSCACHE
struct fscache_volume *cache; /* Caching cookie */
#endif
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 3f656dcb0adf..cb0ab1c2c401 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -111,7 +111,7 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
/*
* Post volume busy note.
*/
-static void afs_busy(struct afs_volume *volume, u32 abort_code)
+static void afs_busy(struct afs_operation *op, u32 abort_code)
{
const char *m;
@@ -122,7 +122,8 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
default: m = "busy"; break;
}
- pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
+ pr_notice("kAFS: Volume %llu '%s' on server %pU is %s\n",
+ op->volume->vid, op->volume->name, &op->server->uuid, m);
}
/*
@@ -130,6 +131,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
*/
static bool afs_sleep_and_retry(struct afs_operation *op)
{
+ trace_afs_rotate(op, afs_rotate_trace_busy_sleep, 0);
if (!(op->flags & AFS_OPERATION_UNINTR)) {
msleep_interruptible(1000);
if (signal_pending(current)) {
@@ -180,6 +182,10 @@ bool afs_select_fileserver(struct afs_operation *op)
/* Evaluate the result of the previous operation, if there was one. */
switch (op->call_error) {
case 0:
+ clear_bit(AFS_SE_VOLUME_OFFLINE,
+ &op->server_list->servers[op->server_index].flags);
+ clear_bit(AFS_SE_VOLUME_BUSY,
+ &op->server_list->servers[op->server_index].flags);
op->cumul_error.responded = true;
fallthrough;
default:
@@ -297,18 +303,16 @@ bool afs_select_fileserver(struct afs_operation *op)
* expected to come back but it might take a long time (could be
* days).
*/
- if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) {
- afs_busy(op->volume, abort_code);
- clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
+ if (!test_and_set_bit(AFS_SE_VOLUME_OFFLINE,
+ &op->server_list->servers[op->server_index].flags)) {
+ afs_busy(op, abort_code);
+ clear_bit(AFS_SE_VOLUME_BUSY,
+ &op->server_list->servers[op->server_index].flags);
}
if (op->flags & AFS_OPERATION_NO_VSLEEP) {
afs_op_set_error(op, -EADV);
goto failed;
}
- if (op->flags & AFS_OPERATION_CUR_ONLY) {
- afs_op_set_error(op, -ESTALE);
- goto failed;
- }
goto busy;
case VRESTARTING: /* The fileserver is either shutting down or starting up. */
@@ -329,9 +333,11 @@ bool afs_select_fileserver(struct afs_operation *op)
afs_op_set_error(op, -EBUSY);
goto failed;
}
- if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) {
- afs_busy(op->volume, abort_code);
- clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
+ if (!test_and_set_bit(AFS_SE_VOLUME_BUSY,
+ &op->server_list->servers[op->server_index].flags)) {
+ afs_busy(op, abort_code);
+ clear_bit(AFS_SE_VOLUME_OFFLINE,
+ &op->server_list->servers[op->server_index].flags);
}
busy:
if (op->flags & AFS_OPERATION_CUR_ONLY) {
@@ -411,8 +417,10 @@ bool afs_select_fileserver(struct afs_operation *op)
default:
afs_op_accumulate_error(op, error, abort_code);
failed_but_online:
- clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
- clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
+ clear_bit(AFS_SE_VOLUME_OFFLINE,
+ &op->server_list->servers[op->server_index].flags);
+ clear_bit(AFS_SE_VOLUME_BUSY,
+ &op->server_list->servers[op->server_index].flags);
goto failed;
}
@@ -640,8 +648,10 @@ bool afs_select_fileserver(struct afs_operation *op)
* of them were busy.
*/
trace_afs_rotate(op, afs_rotate_trace_no_more_servers, 0);
- if (op->flags & AFS_OPERATION_VBUSY)
+ if (op->flags & AFS_OPERATION_VBUSY) {
+ afs_sleep_and_retry(op);
goto restart_from_beginning;
+ }
rcu_read_lock();
for (i = 0; i < op->server_list->nr_servers; i++) {
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 804d9e147314..ac50fa687429 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -446,6 +446,7 @@ enum yfs_cm_operation {
#define afs_rotate_traces \
EM(afs_rotate_trace_aborted, "Abortd") \
+ EM(afs_rotate_trace_busy_sleep, "BsySlp") \
EM(afs_rotate_trace_check_vol_status, "VolStt") \
EM(afs_rotate_trace_failed, "Failed") \
EM(afs_rotate_trace_iter, "Iter ") \
prev parent reply other threads:[~2023-11-09 15:44 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-09 15:39 [PATCH 00/41] afs: Fix probe handling, server rotation and RO volume callback handling David Howells
2023-11-09 15:39 ` [PATCH 01/41] rxrpc: Fix RTT determination to use PING ACKs as a source David Howells
2023-11-09 17:16 ` Jeffrey E Altman
2023-11-09 22:06 ` David Howells
2023-11-10 14:15 ` Jeffrey E Altman
2023-11-10 16:12 ` Jeffrey E Altman
2023-11-10 17:25 ` David Howells
2023-11-10 21:54 ` Jeffrey E Altman
2023-11-09 15:39 ` [PATCH 02/41] rxrpc: Fix two connection reaping bugs David Howells
2023-11-09 17:27 ` Jeffrey E Altman
2023-11-09 17:50 ` patchwork-bot+netdevbpf
2023-11-09 15:39 ` [PATCH 03/41] rxrpc: Fix some minor issues with bundle tracing David Howells
2023-11-09 15:39 ` [PATCH 04/41] afs: Fix afs_server_list to be cleaned up with RCU David Howells
2023-11-09 15:39 ` [PATCH 05/41] afs: Make error on cell lookup failure consistent with OpenAFS David Howells
2023-11-09 15:39 ` [PATCH 06/41] afs: Remove whitespace before most ')' from the trace header David Howells
2023-11-09 15:39 ` [PATCH 07/41] afs: Automatically generate trace tag enums David Howells
2023-11-09 15:39 ` [PATCH 08/41] afs: Add comments on abort handling David Howells
2023-11-09 17:41 ` Jeffrey E Altman
2023-11-09 15:39 ` [PATCH 09/41] afs: Turn the afs_addr_list address array into an array of structs David Howells
2023-11-09 15:39 ` [PATCH 10/41] rxrpc, afs: Allow afs to pin rxrpc_peer objects David Howells
2023-11-09 17:48 ` Marc Dionne
2023-11-09 15:39 ` [PATCH 11/41] afs: Don't skip server addresses for which we didn't get an RTT reading David Howells
2023-11-09 15:39 ` [PATCH 12/41] afs: Rename addr_list::failed to probe_failed David Howells
2023-11-09 15:39 ` [PATCH 13/41] afs: Handle the VIO abort explicitly David Howells
2023-11-09 18:12 ` Jeffrey E Altman
2023-11-09 15:39 ` [PATCH 14/41] afs: Use op->nr_iterations=-1 to indicate to begin fileserver iteration David Howells
2023-11-09 15:39 ` [PATCH 15/41] afs: Return ENOENT if no cell DNS record can be found David Howells
2023-11-09 15:39 ` [PATCH 16/41] afs: Wrap most op->error accesses with inline funcs David Howells
2023-11-09 15:39 ` [PATCH 17/41] afs: Don't put afs_call in afs_wait_for_call_to_complete() David Howells
2023-11-09 15:39 ` [PATCH 18/41] afs: Simplify error handling David Howells
2023-11-09 15:39 ` [PATCH 19/41] afs: Add a tracepoint for struct afs_addr_list David Howells
2023-11-09 15:39 ` [PATCH 20/41] afs: Rename some fields David Howells
2023-11-09 15:39 ` [PATCH 21/41] afs: Use peer + service_id as call address David Howells
2023-11-09 15:39 ` [PATCH 22/41] afs: Fold the afs_addr_cursor struct in David Howells
2023-11-09 15:39 ` [PATCH 23/41] rxrpc: Create a procfile to display outstanding clien conn bundles David Howells
2023-11-09 18:20 ` Jeffrey E Altman
2023-11-09 15:39 ` [PATCH 24/41] afs: Add some more info to /proc/net/afs/servers David Howells
2023-11-09 15:39 ` [PATCH 25/41] afs: Remove the unimplemented afs_cmp_addr_list() David Howells
2023-11-09 15:39 ` [PATCH 26/41] afs: Provide a way to configure address priorities David Howells
2023-11-09 15:39 ` [PATCH 27/41] afs: Mark address lists with configured priorities David Howells
2023-11-09 15:39 ` [PATCH 28/41] afs: Dispatch fileserver probes in priority order David Howells
2023-11-09 15:39 ` [PATCH 29/41] afs: Dispatch vlserver " David Howells
2023-11-09 15:39 ` [PATCH 30/41] afs: Keep a record of the current fileserver endpoint state David Howells
2023-11-09 15:39 ` [PATCH 31/41] afs: Combine the endpoint state bools into a bitmask David Howells
2023-11-09 15:39 ` [PATCH 32/41] afs: Fix file locking on R/O volumes to operate in local mode David Howells
2023-11-09 15:39 ` [PATCH 33/41] afs: Mark a superblock for an R/O or Backup volume as SB_RDONLY David Howells
2023-11-09 15:39 ` [PATCH 34/41] afs: Make it possible to find the volumes that are using a server David Howells
2023-11-09 15:39 ` [PATCH 35/41] afs: Defer volume record destruction to a workqueue David Howells
2023-11-09 15:39 ` [PATCH 36/41] afs: Move the vnode/volume validity checking code into its own file David Howells
2023-11-09 15:40 ` [PATCH 37/41] afs: Apply server breaks to mmap'd files in the call processor David Howells
2023-11-09 15:40 ` [PATCH 38/41] afs: Parse the VolSync record in the reply of a number of RPC ops David Howells
2023-11-09 15:40 ` [PATCH 39/41] afs: Overhaul invalidation handling to better support RO volumes David Howells
2023-11-09 19:00 ` Jeffrey E Altman
2023-11-13 15:58 ` [PATCH 42/41] afs: Fix the handling of " David Howells
2023-11-09 15:40 ` [PATCH 40/41] afs: Fix fileserver rotation David Howells
2023-11-09 15:40 ` David Howells [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231109154004.3317227-42-dhowells@redhat.com \
--to=dhowells@redhat.com \
--cc=linux-afs@lists.infradead.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=marc.dionne@auristor.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox