From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>,
Sasha Levin <sashal@kernel.org>,
linux-nfs@vger.kernel.org, netdev@vger.kernel.org
Subject: [PATCH AUTOSEL 4.19 012/209] SUNRPC: Fix priority queue fairness
Date: Tue, 12 Nov 2019 20:47:08 -0500 [thread overview]
Message-ID: <20191113015025.9685-12-sashal@kernel.org> (raw)
In-Reply-To: <20191113015025.9685-1-sashal@kernel.org>
From: Trond Myklebust <trond.myklebust@hammerspace.com>
[ Upstream commit f42f7c283078ce3c1e8368b140e270755b1ae313 ]
Fix up the priority queue to not batch by owner, but by queue, so that
we allow '1 << priority' elements to be dequeued before switching to
the next priority queue.
The owner field is still used to wake up requests in round robin order
by owner to avoid single processes hogging the RPC layer by loading the
queues.
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
include/linux/sunrpc/sched.h | 2 -
net/sunrpc/sched.c | 109 +++++++++++++++++------------------
2 files changed, 54 insertions(+), 57 deletions(-)
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 592653becd914..ad2e243f3f032 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -188,7 +188,6 @@ struct rpc_timer {
struct rpc_wait_queue {
spinlock_t lock;
struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */
- pid_t owner; /* process id of last task serviced */
unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */
unsigned char priority; /* current priority */
unsigned char nr; /* # tasks remaining for cookie */
@@ -204,7 +203,6 @@ struct rpc_wait_queue {
* from a single cookie. The aim is to improve
* performance of NFS operations such as read/write.
*/
-#define RPC_BATCH_COUNT 16
#define RPC_IS_PRIORITY(q) ((q)->maxpriority > 0)
/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 3fe5d60ab0e2c..e2808586c9e61 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -99,64 +99,78 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
}
-static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue)
-{
- struct list_head *q = &queue->tasks[queue->priority];
- struct rpc_task *task;
-
- if (!list_empty(q)) {
- task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
- if (task->tk_owner == queue->owner)
- list_move_tail(&task->u.tk_wait.list, q);
- }
-}
-
static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
{
if (queue->priority != priority) {
- /* Fairness: rotate the list when changing priority */
- rpc_rotate_queue_owner(queue);
queue->priority = priority;
+ queue->nr = 1U << priority;
}
}
-static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
- queue->owner = pid;
- queue->nr = RPC_BATCH_COUNT;
-}
-
static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
{
rpc_set_waitqueue_priority(queue, queue->maxpriority);
- rpc_set_waitqueue_owner(queue, 0);
}
/*
- * Add new request to a priority queue.
+ * Add a request to a queue list
*/
-static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
- struct rpc_task *task,
- unsigned char queue_priority)
+static void
+__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task)
{
- struct list_head *q;
struct rpc_task *t;
- INIT_LIST_HEAD(&task->u.tk_wait.links);
- if (unlikely(queue_priority > queue->maxpriority))
- queue_priority = queue->maxpriority;
- if (queue_priority > queue->priority)
- rpc_set_waitqueue_priority(queue, queue_priority);
- q = &queue->tasks[queue_priority];
list_for_each_entry(t, q, u.tk_wait.list) {
if (t->tk_owner == task->tk_owner) {
- list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
+ list_add_tail(&task->u.tk_wait.links,
+ &t->u.tk_wait.links);
+ /* Cache the queue head in task->u.tk_wait.list */
+ task->u.tk_wait.list.next = q;
+ task->u.tk_wait.list.prev = NULL;
return;
}
}
+ INIT_LIST_HEAD(&task->u.tk_wait.links);
list_add_tail(&task->u.tk_wait.list, q);
}
+/*
+ * Remove request from a queue list
+ */
+static void
+__rpc_list_dequeue_task(struct rpc_task *task)
+{
+ struct list_head *q;
+ struct rpc_task *t;
+
+ if (task->u.tk_wait.list.prev == NULL) {
+ list_del(&task->u.tk_wait.links);
+ return;
+ }
+ if (!list_empty(&task->u.tk_wait.links)) {
+ t = list_first_entry(&task->u.tk_wait.links,
+ struct rpc_task,
+ u.tk_wait.links);
+ /* Assume __rpc_list_enqueue_task() cached the queue head */
+ q = t->u.tk_wait.list.next;
+ list_add_tail(&t->u.tk_wait.list, q);
+ list_del(&task->u.tk_wait.links);
+ }
+ list_del(&task->u.tk_wait.list);
+}
+
+/*
+ * Add new request to a priority queue.
+ */
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+ struct rpc_task *task,
+ unsigned char queue_priority)
+{
+ if (unlikely(queue_priority > queue->maxpriority))
+ queue_priority = queue->maxpriority;
+ __rpc_list_enqueue_task(&queue->tasks[queue_priority], task);
+}
+
/*
* Add new request to wait queue.
*
@@ -194,13 +208,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
*/
static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
{
- struct rpc_task *t;
-
- if (!list_empty(&task->u.tk_wait.links)) {
- t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
- list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
- list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
- }
+ __rpc_list_dequeue_task(task);
}
/*
@@ -212,7 +220,8 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas
__rpc_disable_timer(queue, task);
if (RPC_IS_PRIORITY(queue))
__rpc_remove_wait_queue_priority(task);
- list_del(&task->u.tk_wait.list);
+ else
+ list_del(&task->u.tk_wait.list);
queue->qlen--;
dprintk("RPC: %5u removed from queue %p \"%s\"\n",
task->tk_pid, queue, rpc_qname(queue));
@@ -493,17 +502,9 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
* Service a batch of tasks from a single owner.
*/
q = &queue->tasks[queue->priority];
- if (!list_empty(q)) {
- task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
- if (queue->owner == task->tk_owner) {
- if (--queue->nr)
- goto out;
- list_move_tail(&task->u.tk_wait.list, q);
- }
- /*
- * Check if we need to switch queues.
- */
- goto new_owner;
+ if (!list_empty(q) && --queue->nr) {
+ task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+ goto out;
}
/*
@@ -515,7 +516,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
else
q = q - 1;
if (!list_empty(q)) {
- task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
+ task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
goto new_queue;
}
} while (q != &queue->tasks[queue->priority]);
@@ -525,8 +526,6 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
new_queue:
rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
-new_owner:
- rpc_set_waitqueue_owner(queue, task->tk_owner);
out:
return task;
}
--
2.20.1
next prev parent reply other threads:[~2019-11-13 2:28 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-11-13 1:46 [PATCH AUTOSEL 4.19 001/209] net: ovs: fix return type of ndo_start_xmit function Sasha Levin
2019-11-13 1:46 ` [PATCH AUTOSEL 4.19 002/209] net: xen-netback: " Sasha Levin
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 005/209] net: hns3: Fix for netdev not up problem when setting mtu Sasha Levin
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 006/209] net: hns3: Fix loss of coal configuration while doing reset Sasha Levin
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 010/209] tcp: up initial rmem to 128KB and SYN rwin to around 64KB Sasha Levin
[not found] ` <CAP12E-JHedm+OA9Zaf6PaZBuNw5ddmeMn4RMcSWFFNrH=MpOhA@mail.gmail.com>
2019-12-17 0:00 ` Vishwanath Pai
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 011/209] openvswitch: Use correct reply values in datapath and vport ops Sasha Levin
2019-11-13 1:47 ` Sasha Levin [this message]
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 018/209] mt76x2: fix tx power configuration for VHT mcs 9 Sasha Levin
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 019/209] mt76x2: disable WLAN core before probe Sasha Levin
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 020/209] mt76: fix handling ps-poll frames Sasha Levin
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 025/209] ath10k: fix vdev-start timeout on error Sasha Levin
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 026/209] rtlwifi: btcoex: Use proper enumerated types for Wi-Fi only interface Sasha Levin
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 032/209] ath9k: fix reporting calculated new FFT upper max Sasha Levin
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 033/209] selftests/tls: Fix recv(MSG_PEEK) & splice() test cases Sasha Levin
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 036/209] nl80211: Fix a GET_KEY reply attribute Sasha Levin
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 050/209] sunrpc: Fix connect metrics Sasha Levin
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 053/209] cxgb4: Use proper enum in cxgb4_dcb_handle_fw_update Sasha Levin
2019-11-13 1:47 ` [PATCH AUTOSEL 4.19 054/209] cxgb4: Use proper enum in IEEE_FAUX_SYNC Sasha Levin
2019-11-13 1:48 ` [PATCH AUTOSEL 4.19 066/209] ice: Fix forward to queue group logic Sasha Levin
2019-11-13 1:48 ` [PATCH AUTOSEL 4.19 068/209] ixgbe: Fix ixgbe TX hangs with XDP_TX beyond queue limit Sasha Levin
2019-11-13 1:48 ` [PATCH AUTOSEL 4.19 069/209] i40e: Use proper enum in i40e_ndo_set_vf_link_state Sasha Levin
2019-11-13 1:48 ` [PATCH AUTOSEL 4.19 070/209] ixgbe: Fix crash with VFs and flow director on interface flap Sasha Levin
2019-11-13 1:48 ` [PATCH AUTOSEL 4.19 088/209] qed: Avoid implicit enum conversion in qed_ooo_submit_tx_buffers Sasha Levin
2019-11-13 1:48 ` [PATCH AUTOSEL 4.19 094/209] bnxt_en: return proper error when FW returns HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED Sasha Levin
2019-11-13 1:48 ` [PATCH AUTOSEL 4.19 100/209] brcmfmac: reduce timeout for action frame scan Sasha Levin
2019-11-13 1:48 ` [PATCH AUTOSEL 4.19 101/209] brcmfmac: fix full timeout waiting for action frame on-channel tx Sasha Levin
2019-11-13 1:48 ` [PATCH AUTOSEL 4.19 102/209] qtnfmac: request userspace to do OBSS scanning if FW can not Sasha Levin
2019-11-13 1:48 ` [PATCH AUTOSEL 4.19 103/209] qtnfmac: pass sgi rate info flag to wireless core Sasha Levin
2019-11-13 1:48 ` [PATCH AUTOSEL 4.19 104/209] qtnfmac: inform wireless core about supported extended capabilities Sasha Levin
2019-11-13 1:48 ` [PATCH AUTOSEL 4.19 105/209] qtnfmac: drop error reports for out-of-bounds key indexes Sasha Levin
2019-11-13 1:48 ` [PATCH AUTOSEL 4.19 123/209] net: hns3: Fix for rx vlan id handle to support Rev 0x21 hardware Sasha Levin
2019-11-13 1:49 ` [PATCH AUTOSEL 4.19 124/209] tc-testing: fix build of eBPF programs Sasha Levin
2019-11-13 1:49 ` [PATCH AUTOSEL 4.19 132/209] xfrm: use correct size to initialise sp->ovec Sasha Levin
2019-11-13 1:49 ` [PATCH AUTOSEL 4.19 134/209] iwlwifi: mvm: don't send keys when entering D3 Sasha Levin
2019-11-13 1:49 ` [PATCH AUTOSEL 4.19 135/209] xsk: proper AF_XDP socket teardown ordering Sasha Levin
2019-11-13 1:49 ` [PATCH AUTOSEL 4.19 177/209] bpf: btf: Fix a missing check bug Sasha Levin
2019-11-13 1:49 ` [PATCH AUTOSEL 4.19 178/209] net: fix generic XDP to handle if eth header was mangled Sasha Levin
2019-11-13 1:50 ` [PATCH AUTOSEL 4.19 193/209] selftests: forwarding: Have lldpad_app_wait_set() wait for unknown, too Sasha Levin
2019-11-13 1:50 ` [PATCH AUTOSEL 4.19 194/209] net: sched: avoid writing on noop_qdisc Sasha Levin
2019-11-13 1:50 ` [PATCH AUTOSEL 4.19 195/209] netfilter: nft_compat: do not dump private area Sasha Levin
2019-11-13 1:50 ` [PATCH AUTOSEL 4.19 197/209] mac80211: minstrel: fix using short preamble CCK rates on HT clients Sasha Levin
2019-11-13 1:50 ` [PATCH AUTOSEL 4.19 198/209] mac80211: minstrel: fix CCK rate group streams value Sasha Levin
2019-11-13 1:50 ` [PATCH AUTOSEL 4.19 199/209] mac80211: minstrel: fix sampling/reporting of CCK rates in HT mode Sasha Levin
2019-11-13 1:50 ` [PATCH AUTOSEL 4.19 201/209] mlxsw: spectrum_switchdev: Check notification relevance based on upper device Sasha Levin
2019-11-13 1:50 ` [PATCH AUTOSEL 4.19 203/209] tcp: start receiver buffer autotuning sooner Sasha Levin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191113015025.9685-12-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-nfs@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=stable@vger.kernel.org \
--cc=trond.myklebust@hammerspace.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).