public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: Marc Dionne <marc.dionne@auristor.com>
Cc: David Howells <dhowells@redhat.com>,
	linux-afs@lists.infradead.org, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH 41/41] afs: Fix offline and busy handling
Date: Thu,  9 Nov 2023 15:40:04 +0000	[thread overview]
Message-ID: <20231109154004.3317227-42-dhowells@redhat.com> (raw)
In-Reply-To: <20231109154004.3317227-1-dhowells@redhat.com>

The current code assumes offline and busy volume states apply to all
instances of a volume, not just the one on the server that returned
VOFFLINE or VBUSY.

Fix that by moving the flags recording this to the afs_server_entry struct
that is used to represent a particular instance of a volume on a specific
server.

Further, add a sleep for when we have iterated through all the servers so
that we don't keep poking the server every few milliseconds.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
 fs/afs/internal.h          |  7 ++++---
 fs/afs/rotate.c            | 40 ++++++++++++++++++++++++--------------
 include/trace/events/afs.h |  1 +
 3 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index d1031241d11b..eb59b0487f8b 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -603,6 +603,9 @@ struct afs_server_entry {
 	struct afs_volume	*volume;
 	struct list_head	slink;		/* Link in server->volumes */
 	time64_t		cb_expires_at;	/* Time at which volume-level callback expires */
+	unsigned long		flags;
+#define AFS_SE_VOLUME_OFFLINE	0		/* Set if volume offline notice given */
+#define AFS_SE_VOLUME_BUSY	1		/* Set if volume busy notice given */
 };
 
 struct afs_server_list {
@@ -636,9 +639,7 @@ struct afs_volume {
 #define AFS_VOLUME_UPDATING	1	/* - T if an update is in progress */
 #define AFS_VOLUME_WAIT		2	/* - T if users must wait for update */
 #define AFS_VOLUME_DELETED	3	/* - T if volume appears deleted */
-#define AFS_VOLUME_OFFLINE	4	/* - T if volume offline notice given */
-#define AFS_VOLUME_BUSY		5	/* - T if volume busy notice given */
-#define AFS_VOLUME_MAYBE_NO_IBULK 6	/* - T if some servers don't have InlineBulkStatus */
+#define AFS_VOLUME_MAYBE_NO_IBULK 4	/* - T if some servers don't have InlineBulkStatus */
 #ifdef CONFIG_AFS_FSCACHE
 	struct fscache_volume	*cache;		/* Caching cookie */
 #endif
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 3f656dcb0adf..cb0ab1c2c401 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -111,7 +111,7 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
 /*
  * Post volume busy note.
  */
-static void afs_busy(struct afs_volume *volume, u32 abort_code)
+static void afs_busy(struct afs_operation *op, u32 abort_code)
 {
 	const char *m;
 
@@ -122,7 +122,8 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
 	default:		m = "busy";		break;
 	}
 
-	pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
+	pr_notice("kAFS: Volume %llu '%s' on server %pU is %s\n",
+		  op->volume->vid, op->volume->name, &op->server->uuid, m);
 }
 
 /*
@@ -130,6 +131,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
  */
 static bool afs_sleep_and_retry(struct afs_operation *op)
 {
+	trace_afs_rotate(op, afs_rotate_trace_busy_sleep, 0);
 	if (!(op->flags & AFS_OPERATION_UNINTR)) {
 		msleep_interruptible(1000);
 		if (signal_pending(current)) {
@@ -180,6 +182,10 @@ bool afs_select_fileserver(struct afs_operation *op)
 	/* Evaluate the result of the previous operation, if there was one. */
 	switch (op->call_error) {
 	case 0:
+		clear_bit(AFS_SE_VOLUME_OFFLINE,
+			  &op->server_list->servers[op->server_index].flags);
+		clear_bit(AFS_SE_VOLUME_BUSY,
+			  &op->server_list->servers[op->server_index].flags);
 		op->cumul_error.responded = true;
 		fallthrough;
 	default:
@@ -297,18 +303,16 @@ bool afs_select_fileserver(struct afs_operation *op)
 			 * expected to come back but it might take a long time (could be
 			 * days).
 			 */
-			if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) {
-				afs_busy(op->volume, abort_code);
-				clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
+			if (!test_and_set_bit(AFS_SE_VOLUME_OFFLINE,
+					      &op->server_list->servers[op->server_index].flags)) {
+				afs_busy(op, abort_code);
+				clear_bit(AFS_SE_VOLUME_BUSY,
+					  &op->server_list->servers[op->server_index].flags);
 			}
 			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
 				afs_op_set_error(op, -EADV);
 				goto failed;
 			}
-			if (op->flags & AFS_OPERATION_CUR_ONLY) {
-				afs_op_set_error(op, -ESTALE);
-				goto failed;
-			}
 			goto busy;
 
 		case VRESTARTING: /* The fileserver is either shutting down or starting up. */
@@ -329,9 +333,11 @@ bool afs_select_fileserver(struct afs_operation *op)
 				afs_op_set_error(op, -EBUSY);
 				goto failed;
 			}
-			if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) {
-				afs_busy(op->volume, abort_code);
-				clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
+			if (!test_and_set_bit(AFS_SE_VOLUME_BUSY,
+					      &op->server_list->servers[op->server_index].flags)) {
+				afs_busy(op, abort_code);
+				clear_bit(AFS_SE_VOLUME_OFFLINE,
+					  &op->server_list->servers[op->server_index].flags);
 			}
 		busy:
 			if (op->flags & AFS_OPERATION_CUR_ONLY) {
@@ -411,8 +417,10 @@ bool afs_select_fileserver(struct afs_operation *op)
 		default:
 			afs_op_accumulate_error(op, error, abort_code);
 		failed_but_online:
-			clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
-			clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
+			clear_bit(AFS_SE_VOLUME_OFFLINE,
+				  &op->server_list->servers[op->server_index].flags);
+			clear_bit(AFS_SE_VOLUME_BUSY,
+				  &op->server_list->servers[op->server_index].flags);
 			goto failed;
 		}
 
@@ -640,8 +648,10 @@ bool afs_select_fileserver(struct afs_operation *op)
 	 * of them were busy.
 	 */
 	trace_afs_rotate(op, afs_rotate_trace_no_more_servers, 0);
-	if (op->flags & AFS_OPERATION_VBUSY)
+	if (op->flags & AFS_OPERATION_VBUSY) {
+		afs_sleep_and_retry(op);
 		goto restart_from_beginning;
+	}
 
 	rcu_read_lock();
 	for (i = 0; i < op->server_list->nr_servers; i++) {
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 804d9e147314..ac50fa687429 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -446,6 +446,7 @@ enum yfs_cm_operation {
 
 #define afs_rotate_traces						\
 	EM(afs_rotate_trace_aborted,		"Abortd")		\
+	EM(afs_rotate_trace_busy_sleep,		"BsySlp")		\
 	EM(afs_rotate_trace_check_vol_status,	"VolStt")		\
 	EM(afs_rotate_trace_failed,		"Failed")		\
 	EM(afs_rotate_trace_iter,		"Iter  ")		\


      parent reply	other threads:[~2023-11-09 15:44 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-09 15:39 [PATCH 00/41] afs: Fix probe handling, server rotation and RO volume callback handling David Howells
2023-11-09 15:39 ` [PATCH 01/41] rxrpc: Fix RTT determination to use PING ACKs as a source David Howells
2023-11-09 17:16   ` Jeffrey E Altman
2023-11-09 22:06     ` David Howells
2023-11-10 14:15       ` Jeffrey E Altman
2023-11-10 16:12         ` Jeffrey E Altman
2023-11-10 17:25         ` David Howells
2023-11-10 21:54           ` Jeffrey E Altman
2023-11-09 15:39 ` [PATCH 02/41] rxrpc: Fix two connection reaping bugs David Howells
2023-11-09 17:27   ` Jeffrey E Altman
2023-11-09 17:50   ` patchwork-bot+netdevbpf
2023-11-09 15:39 ` [PATCH 03/41] rxrpc: Fix some minor issues with bundle tracing David Howells
2023-11-09 15:39 ` [PATCH 04/41] afs: Fix afs_server_list to be cleaned up with RCU David Howells
2023-11-09 15:39 ` [PATCH 05/41] afs: Make error on cell lookup failure consistent with OpenAFS David Howells
2023-11-09 15:39 ` [PATCH 06/41] afs: Remove whitespace before most ')' from the trace header David Howells
2023-11-09 15:39 ` [PATCH 07/41] afs: Automatically generate trace tag enums David Howells
2023-11-09 15:39 ` [PATCH 08/41] afs: Add comments on abort handling David Howells
2023-11-09 17:41   ` Jeffrey E Altman
2023-11-09 15:39 ` [PATCH 09/41] afs: Turn the afs_addr_list address array into an array of structs David Howells
2023-11-09 15:39 ` [PATCH 10/41] rxrpc, afs: Allow afs to pin rxrpc_peer objects David Howells
2023-11-09 17:48   ` Marc Dionne
2023-11-09 15:39 ` [PATCH 11/41] afs: Don't skip server addresses for which we didn't get an RTT reading David Howells
2023-11-09 15:39 ` [PATCH 12/41] afs: Rename addr_list::failed to probe_failed David Howells
2023-11-09 15:39 ` [PATCH 13/41] afs: Handle the VIO abort explicitly David Howells
2023-11-09 18:12   ` Jeffrey E Altman
2023-11-09 15:39 ` [PATCH 14/41] afs: Use op->nr_iterations=-1 to indicate to begin fileserver iteration David Howells
2023-11-09 15:39 ` [PATCH 15/41] afs: Return ENOENT if no cell DNS record can be found David Howells
2023-11-09 15:39 ` [PATCH 16/41] afs: Wrap most op->error accesses with inline funcs David Howells
2023-11-09 15:39 ` [PATCH 17/41] afs: Don't put afs_call in afs_wait_for_call_to_complete() David Howells
2023-11-09 15:39 ` [PATCH 18/41] afs: Simplify error handling David Howells
2023-11-09 15:39 ` [PATCH 19/41] afs: Add a tracepoint for struct afs_addr_list David Howells
2023-11-09 15:39 ` [PATCH 20/41] afs: Rename some fields David Howells
2023-11-09 15:39 ` [PATCH 21/41] afs: Use peer + service_id as call address David Howells
2023-11-09 15:39 ` [PATCH 22/41] afs: Fold the afs_addr_cursor struct in David Howells
2023-11-09 15:39 ` [PATCH 23/41] rxrpc: Create a procfile to display outstanding clien conn bundles David Howells
2023-11-09 18:20   ` Jeffrey E Altman
2023-11-09 15:39 ` [PATCH 24/41] afs: Add some more info to /proc/net/afs/servers David Howells
2023-11-09 15:39 ` [PATCH 25/41] afs: Remove the unimplemented afs_cmp_addr_list() David Howells
2023-11-09 15:39 ` [PATCH 26/41] afs: Provide a way to configure address priorities David Howells
2023-11-09 15:39 ` [PATCH 27/41] afs: Mark address lists with configured priorities David Howells
2023-11-09 15:39 ` [PATCH 28/41] afs: Dispatch fileserver probes in priority order David Howells
2023-11-09 15:39 ` [PATCH 29/41] afs: Dispatch vlserver " David Howells
2023-11-09 15:39 ` [PATCH 30/41] afs: Keep a record of the current fileserver endpoint state David Howells
2023-11-09 15:39 ` [PATCH 31/41] afs: Combine the endpoint state bools into a bitmask David Howells
2023-11-09 15:39 ` [PATCH 32/41] afs: Fix file locking on R/O volumes to operate in local mode David Howells
2023-11-09 15:39 ` [PATCH 33/41] afs: Mark a superblock for an R/O or Backup volume as SB_RDONLY David Howells
2023-11-09 15:39 ` [PATCH 34/41] afs: Make it possible to find the volumes that are using a server David Howells
2023-11-09 15:39 ` [PATCH 35/41] afs: Defer volume record destruction to a workqueue David Howells
2023-11-09 15:39 ` [PATCH 36/41] afs: Move the vnode/volume validity checking code into its own file David Howells
2023-11-09 15:40 ` [PATCH 37/41] afs: Apply server breaks to mmap'd files in the call processor David Howells
2023-11-09 15:40 ` [PATCH 38/41] afs: Parse the VolSync record in the reply of a number of RPC ops David Howells
2023-11-09 15:40 ` [PATCH 39/41] afs: Overhaul invalidation handling to better support RO volumes David Howells
2023-11-09 19:00   ` Jeffrey E Altman
2023-11-13 15:58     ` [PATCH 42/41] afs: Fix the handling of " David Howells
2023-11-09 15:40 ` [PATCH 40/41] afs: Fix fileserver rotation David Howells
2023-11-09 15:40 ` David Howells [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231109154004.3317227-42-dhowells@redhat.com \
    --to=dhowells@redhat.com \
    --cc=linux-afs@lists.infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=marc.dionne@auristor.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox