* [RFC PATCH 1/1] SUNRPC: increase max timeout for rebind to handle NFS server restart
@ 2023-02-10 8:10 Dai Ngo
2023-02-17 18:22 ` dai.ngo
0 siblings, 1 reply; 3+ messages in thread
From: Dai Ngo @ 2023-02-10 8:10 UTC (permalink / raw)
To: trondmy; +Cc: linux-nfs
Occasionally NLM lock and unlock request fail with EIO and ENOLCK
respectively. This usually happens when the NFS server is restarted
while NLM lock test is running.
Currently there is a 9 seconds limit for retrying the bind operation.
If the server is under load the port mapper might take more than 9
seconds to become ready after the NFS server restarted.
This patch increases the timeout for rebind from 9 to 30 seconds
allowing a bit more time for the port mapper to become ready.
Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
include/linux/sunrpc/clnt.h | 3 +++
include/linux/sunrpc/sched.h | 4 ++--
net/sunrpc/clnt.c | 2 +-
net/sunrpc/sched.c | 3 ++-
4 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 770ef2cb5775..7f2dee56c121 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
#define RPC_CLNT_CREATE_REUSEPORT (1UL << 11)
#define RPC_CLNT_CREATE_CONNECTED (1UL << 12)
+#define RPC_CLNT_REBIND_DELAY 3
+#define RPC_CLNT_REBIND_MAX_TIMEOUT 30
+
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
const struct rpc_program *, u32);
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index b8ca3ecaf8d7..e9dc142f10bb 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -90,8 +90,8 @@ struct rpc_task {
#endif
unsigned char tk_priority : 2,/* Task priority */
tk_garb_retry : 2,
- tk_cred_retry : 2,
- tk_rebind_retry : 2;
+ tk_cred_retry : 2;
+ unsigned char tk_rebind_retry;
};
typedef void (*rpc_action)(struct rpc_task *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0b0b9f1eed46..6c89a1fa40bf 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
if (task->tk_rebind_retry == 0)
break;
task->tk_rebind_retry--;
- rpc_delay(task, 3*HZ);
+ rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
goto retry_timeout;
case -ENOBUFS:
rpc_delay(task, HZ >> 2);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index be587a308e05..5c18a35752aa 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task *task)
/* Initialize retry counters */
task->tk_garb_retry = 2;
task->tk_cred_retry = 2;
- task->tk_rebind_retry = 2;
+ task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
+ RPC_CLNT_REBIND_DELAY;
/* starting timestamp */
task->tk_start = ktime_get();
--
2.9.5
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [RFC PATCH 1/1] SUNRPC: increase max timeout for rebind to handle NFS server restart
2023-02-10 8:10 [RFC PATCH 1/1] SUNRPC: increase max timeout for rebind to handle NFS server restart Dai Ngo
@ 2023-02-17 18:22 ` dai.ngo
2023-02-23 5:40 ` dai.ngo
0 siblings, 1 reply; 3+ messages in thread
From: dai.ngo @ 2023-02-17 18:22 UTC (permalink / raw)
To: trondmy; +Cc: linux-nfs
Hi Trond,
Could you please let me know your opinion on this patch?
Thanks,
-Dai
On 2/10/23 12:10 AM, Dai Ngo wrote:
> Occasionally NLM lock and unlock request fail with EIO and ENOLCK
> respectively. This usually happens when the NFS server is restarted
> while NLM lock test is running.
>
> Currently there is a 9 seconds limit for retrying the bind operation.
> If the server is under load the port mapper might take more than 9
> seconds to become ready after the NFS server restarted.
>
> This patch increases the timeout for rebind from 9 to 30 seconds
> allowing a bit more time for the port mapper to become ready.
>
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
> include/linux/sunrpc/clnt.h | 3 +++
> include/linux/sunrpc/sched.h | 4 ++--
> net/sunrpc/clnt.c | 2 +-
> net/sunrpc/sched.c | 3 ++-
> 4 files changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
> index 770ef2cb5775..7f2dee56c121 100644
> --- a/include/linux/sunrpc/clnt.h
> +++ b/include/linux/sunrpc/clnt.h
> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
> #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11)
> #define RPC_CLNT_CREATE_CONNECTED (1UL << 12)
>
> +#define RPC_CLNT_REBIND_DELAY 3
> +#define RPC_CLNT_REBIND_MAX_TIMEOUT 30
> +
> struct rpc_clnt *rpc_create(struct rpc_create_args *args);
> struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
> const struct rpc_program *, u32);
> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
> index b8ca3ecaf8d7..e9dc142f10bb 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -90,8 +90,8 @@ struct rpc_task {
> #endif
> unsigned char tk_priority : 2,/* Task priority */
> tk_garb_retry : 2,
> - tk_cred_retry : 2,
> - tk_rebind_retry : 2;
> + tk_cred_retry : 2;
> + unsigned char tk_rebind_retry;
> };
>
> typedef void (*rpc_action)(struct rpc_task *);
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index 0b0b9f1eed46..6c89a1fa40bf 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
> if (task->tk_rebind_retry == 0)
> break;
> task->tk_rebind_retry--;
> - rpc_delay(task, 3*HZ);
> + rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
> goto retry_timeout;
> case -ENOBUFS:
> rpc_delay(task, HZ >> 2);
> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> index be587a308e05..5c18a35752aa 100644
> --- a/net/sunrpc/sched.c
> +++ b/net/sunrpc/sched.c
> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task *task)
> /* Initialize retry counters */
> task->tk_garb_retry = 2;
> task->tk_cred_retry = 2;
> - task->tk_rebind_retry = 2;
> + task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
> + RPC_CLNT_REBIND_DELAY;
>
> /* starting timestamp */
> task->tk_start = ktime_get();
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [RFC PATCH 1/1] SUNRPC: increase max timeout for rebind to handle NFS server restart
2023-02-17 18:22 ` dai.ngo
@ 2023-02-23 5:40 ` dai.ngo
0 siblings, 0 replies; 3+ messages in thread
From: dai.ngo @ 2023-02-23 5:40 UTC (permalink / raw)
To: Anna Schumaker; +Cc: linux-nfs, Trond Myklebust
Hi Anna,
Just a reminder that this patch is still waiting for a review.
Thanks,
-Dai
On 2/17/23 10:22 AM, dai.ngo@oracle.com wrote:
> Hi Trond,
>
> Could you please let me know your opinion on this patch?
>
> Thanks,
> -Dai
>
> On 2/10/23 12:10 AM, Dai Ngo wrote:
>> Occasionally NLM lock and unlock request fail with EIO and ENOLCK
>> respectively. This usually happens when the NFS server is restarted
>> while NLM lock test is running.
>>
>> Currently there is a 9 seconds limit for retrying the bind operation.
>> If the server is under load the port mapper might take more than 9
>> seconds to become ready after the NFS server restarted.
>>
>> This patch increases the timeout for rebind from 9 to 30 seconds
>> allowing a bit more time for the port mapper to become ready.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>> include/linux/sunrpc/clnt.h | 3 +++
>> include/linux/sunrpc/sched.h | 4 ++--
>> net/sunrpc/clnt.c | 2 +-
>> net/sunrpc/sched.c | 3 ++-
>> 4 files changed, 8 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
>> index 770ef2cb5775..7f2dee56c121 100644
>> --- a/include/linux/sunrpc/clnt.h
>> +++ b/include/linux/sunrpc/clnt.h
>> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
>> #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11)
>> #define RPC_CLNT_CREATE_CONNECTED (1UL << 12)
>> +#define RPC_CLNT_REBIND_DELAY 3
>> +#define RPC_CLNT_REBIND_MAX_TIMEOUT 30
>> +
>> struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>> struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
>> const struct rpc_program *, u32);
>> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
>> index b8ca3ecaf8d7..e9dc142f10bb 100644
>> --- a/include/linux/sunrpc/sched.h
>> +++ b/include/linux/sunrpc/sched.h
>> @@ -90,8 +90,8 @@ struct rpc_task {
>> #endif
>> unsigned char tk_priority : 2,/* Task priority */
>> tk_garb_retry : 2,
>> - tk_cred_retry : 2,
>> - tk_rebind_retry : 2;
>> + tk_cred_retry : 2;
>> + unsigned char tk_rebind_retry;
>> };
>> typedef void (*rpc_action)(struct rpc_task *);
>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>> index 0b0b9f1eed46..6c89a1fa40bf 100644
>> --- a/net/sunrpc/clnt.c
>> +++ b/net/sunrpc/clnt.c
>> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
>> if (task->tk_rebind_retry == 0)
>> break;
>> task->tk_rebind_retry--;
>> - rpc_delay(task, 3*HZ);
>> + rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
>> goto retry_timeout;
>> case -ENOBUFS:
>> rpc_delay(task, HZ >> 2);
>> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
>> index be587a308e05..5c18a35752aa 100644
>> --- a/net/sunrpc/sched.c
>> +++ b/net/sunrpc/sched.c
>> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task *task)
>> /* Initialize retry counters */
>> task->tk_garb_retry = 2;
>> task->tk_cred_retry = 2;
>> - task->tk_rebind_retry = 2;
>> + task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
>> + RPC_CLNT_REBIND_DELAY;
>> /* starting timestamp */
>> task->tk_start = ktime_get();
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2023-02-23 5:40 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-02-10 8:10 [RFC PATCH 1/1] SUNRPC: increase max timeout for rebind to handle NFS server restart Dai Ngo
2023-02-17 18:22 ` dai.ngo
2023-02-23 5:40 ` dai.ngo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox