public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>,
	Sasha Levin <sashal@kernel.org>,
	linux-nfs@vger.kernel.org, netdev@vger.kernel.org
Subject: [PATCH AUTOSEL 4.9 05/68] SUNRPC: Fix priority queue fairness
Date: Tue, 12 Nov 2019 20:58:29 -0500	[thread overview]
Message-ID: <20191113015932.12655-5-sashal@kernel.org> (raw)
In-Reply-To: <20191113015932.12655-1-sashal@kernel.org>

From: Trond Myklebust <trond.myklebust@hammerspace.com>

[ Upstream commit f42f7c283078ce3c1e8368b140e270755b1ae313 ]

Fix up the priority queue to not batch by owner, but by queue, so that
we allow '1 << priority' elements to be dequeued before switching to
the next priority queue.
The owner field is still used to wake up requests in round robin order
by owner to avoid single processes hogging the RPC layer by loading the
queues.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 include/linux/sunrpc/sched.h |   2 -
 net/sunrpc/sched.c           | 109 +++++++++++++++++------------------
 2 files changed, 54 insertions(+), 57 deletions(-)

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 7ba040c797ec4..da2791b5fe879 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -185,7 +185,6 @@ struct rpc_timer {
 struct rpc_wait_queue {
 	spinlock_t		lock;
 	struct list_head	tasks[RPC_NR_PRIORITY];	/* task queue for each priority level */
-	pid_t			owner;			/* process id of last task serviced */
 	unsigned char		maxpriority;		/* maximum priority (0 if queue is not a priority queue) */
 	unsigned char		priority;		/* current priority */
 	unsigned char		nr;			/* # tasks remaining for cookie */
@@ -201,7 +200,6 @@ struct rpc_wait_queue {
  * from a single cookie.  The aim is to improve
  * performance of NFS operations such as read/write.
  */
-#define RPC_BATCH_COUNT			16
 #define RPC_IS_PRIORITY(q)		((q)->maxpriority > 0)
 
 /*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 600eacce653ae..0ef65822fdd34 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -99,64 +99,78 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 	list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
 }
 
-static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue)
-{
-	struct list_head *q = &queue->tasks[queue->priority];
-	struct rpc_task *task;
-
-	if (!list_empty(q)) {
-		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
-		if (task->tk_owner == queue->owner)
-			list_move_tail(&task->u.tk_wait.list, q);
-	}
-}
-
 static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
 {
 	if (queue->priority != priority) {
-		/* Fairness: rotate the list when changing priority */
-		rpc_rotate_queue_owner(queue);
 		queue->priority = priority;
+		queue->nr = 1U << priority;
 	}
 }
 
-static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
-	queue->owner = pid;
-	queue->nr = RPC_BATCH_COUNT;
-}
-
 static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
 {
 	rpc_set_waitqueue_priority(queue, queue->maxpriority);
-	rpc_set_waitqueue_owner(queue, 0);
 }
 
 /*
- * Add new request to a priority queue.
+ * Add a request to a queue list
  */
-static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
-		struct rpc_task *task,
-		unsigned char queue_priority)
+static void
+__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task)
 {
-	struct list_head *q;
 	struct rpc_task *t;
 
-	INIT_LIST_HEAD(&task->u.tk_wait.links);
-	if (unlikely(queue_priority > queue->maxpriority))
-		queue_priority = queue->maxpriority;
-	if (queue_priority > queue->priority)
-		rpc_set_waitqueue_priority(queue, queue_priority);
-	q = &queue->tasks[queue_priority];
 	list_for_each_entry(t, q, u.tk_wait.list) {
 		if (t->tk_owner == task->tk_owner) {
-			list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
+			list_add_tail(&task->u.tk_wait.links,
+					&t->u.tk_wait.links);
+			/* Cache the queue head in task->u.tk_wait.list */
+			task->u.tk_wait.list.next = q;
+			task->u.tk_wait.list.prev = NULL;
 			return;
 		}
 	}
+	INIT_LIST_HEAD(&task->u.tk_wait.links);
 	list_add_tail(&task->u.tk_wait.list, q);
 }
 
+/*
+ * Remove request from a queue list
+ */
+static void
+__rpc_list_dequeue_task(struct rpc_task *task)
+{
+	struct list_head *q;
+	struct rpc_task *t;
+
+	if (task->u.tk_wait.list.prev == NULL) {
+		list_del(&task->u.tk_wait.links);
+		return;
+	}
+	if (!list_empty(&task->u.tk_wait.links)) {
+		t = list_first_entry(&task->u.tk_wait.links,
+				struct rpc_task,
+				u.tk_wait.links);
+		/* Assume __rpc_list_enqueue_task() cached the queue head */
+		q = t->u.tk_wait.list.next;
+		list_add_tail(&t->u.tk_wait.list, q);
+		list_del(&task->u.tk_wait.links);
+	}
+	list_del(&task->u.tk_wait.list);
+}
+
+/*
+ * Add new request to a priority queue.
+ */
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+		struct rpc_task *task,
+		unsigned char queue_priority)
+{
+	if (unlikely(queue_priority > queue->maxpriority))
+		queue_priority = queue->maxpriority;
+	__rpc_list_enqueue_task(&queue->tasks[queue_priority], task);
+}
+
 /*
  * Add new request to wait queue.
  *
@@ -194,13 +208,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
  */
 static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
 {
-	struct rpc_task *t;
-
-	if (!list_empty(&task->u.tk_wait.links)) {
-		t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
-		list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
-		list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
-	}
+	__rpc_list_dequeue_task(task);
 }
 
 /*
@@ -212,7 +220,8 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas
 	__rpc_disable_timer(queue, task);
 	if (RPC_IS_PRIORITY(queue))
 		__rpc_remove_wait_queue_priority(task);
-	list_del(&task->u.tk_wait.list);
+	else
+		list_del(&task->u.tk_wait.list);
 	queue->qlen--;
 	dprintk("RPC: %5u removed from queue %p \"%s\"\n",
 			task->tk_pid, queue, rpc_qname(queue));
@@ -481,17 +490,9 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 	 * Service a batch of tasks from a single owner.
 	 */
 	q = &queue->tasks[queue->priority];
-	if (!list_empty(q)) {
-		task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
-		if (queue->owner == task->tk_owner) {
-			if (--queue->nr)
-				goto out;
-			list_move_tail(&task->u.tk_wait.list, q);
-		}
-		/*
-		 * Check if we need to switch queues.
-		 */
-		goto new_owner;
+	if (!list_empty(q) && --queue->nr) {
+		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+		goto out;
 	}
 
 	/*
@@ -503,7 +504,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 		else
 			q = q - 1;
 		if (!list_empty(q)) {
-			task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
+			task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
 			goto new_queue;
 		}
 	} while (q != &queue->tasks[queue->priority]);
@@ -513,8 +514,6 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 
 new_queue:
 	rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
-new_owner:
-	rpc_set_waitqueue_owner(queue, task->tk_owner);
 out:
 	return task;
 }
-- 
2.20.1


  parent reply	other threads:[~2019-11-13  1:59 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-13  1:58 [PATCH AUTOSEL 4.9 01/68] net: ovs: fix return type of ndo_start_xmit function Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 02/68] net: xen-netback: " Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 03/68] ARM: dts: omap5: enable OTG role for DWC3 controller Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 04/68] f2fs: return correct errno in f2fs_gc Sasha Levin
2019-11-13  1:58 ` Sasha Levin [this message]
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 06/68] kvm: arm/arm64: Fix stage2_flush_memslot for 4 level page table Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 07/68] arm64/numa: Report correct memblock range for the dummy node Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 08/68] ath10k: fix vdev-start timeout on error Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 09/68] ata: ahci_brcm: Match BCM63138 compatible strings Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 10/68] ata: ahci_brcm: Allow using driver or DSL SoCs Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 11/68] ath9k: fix reporting calculated new FFT upper max Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 12/68] usb: gadget: udc: fotg210-udc: Fix a sleep-in-atomic-context bug in fotg210_get_status() Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 13/68] nl80211: Fix a GET_KEY reply attribute Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 14/68] dmaengine: ep93xx: Return proper enum in ep93xx_dma_chan_direction Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 15/68] dmaengine: timb_dma: Use proper enum in td_prep_slave_sg Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 16/68] mei: samples: fix a signedness bug in amt_host_if_call() Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 17/68] cxgb4: Use proper enum in cxgb4_dcb_handle_fw_update Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 18/68] cxgb4: Use proper enum in IEEE_FAUX_SYNC Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 19/68] powerpc/pseries: Fix DTL buffer registration Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 20/68] powerpc/pseries: Fix how we iterate over the DTL entries Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 21/68] mtd: rawnand: sh_flctl: Use proper enum for flctl_dma_fifo0_transfer Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 22/68] ixgbe: Fix crash with VFs and flow director on interface flap Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 23/68] IB/mthca: Fix error return code in __mthca_init_one() Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 24/68] IB/mlx4: Avoid implicit enumerated type conversion Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 25/68] ACPICA: Never run _REG on system_memory and system_IO Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 26/68] ata: ep93xx: Use proper enums for directions Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 27/68] media: pxa_camera: Fix check for pdev->dev.of_node Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 28/68] ALSA: hda/sigmatel - Disable automute for Elo VuPoint Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 29/68] KVM: PPC: Book3S PR: Exiting split hack mode needs to fixup both PC and LR Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 30/68] USB: serial: cypress_m8: fix interrupt-out transfer length Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 31/68] mtd: physmap_of: Release resources on error Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 32/68] cpu/SMT: State SMT is disabled even with nosmt and without "=force" Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 33/68] brcmfmac: reduce timeout for action frame scan Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 34/68] brcmfmac: fix full timeout waiting for action frame on-channel tx Sasha Levin
2019-11-13  1:58 ` [PATCH AUTOSEL 4.9 35/68] clk: samsung: Use clk_hw API for calling clk framework from clk notifiers Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 36/68] i2c: brcmstb: Allow enabling the driver on DSL SoCs Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 37/68] NFSv4.x: fix lock recovery during delegation recall Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 38/68] dmaengine: ioat: fix prototype of ioat_enumerate_channels Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 39/68] Input: st1232 - set INPUT_PROP_DIRECT property Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 40/68] Input: silead - try firmware reload after unsuccessful resume Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 41/68] x86/olpc: Fix build error with CONFIG_MFD_CS5535=m Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 42/68] crypto: mxs-dcp - Fix SHA null hashes and output length Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 43/68] crypto: mxs-dcp - Fix AES issues Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 44/68] ACPI / SBS: Fix rare oops when removing modules Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 45/68] iwlwifi: mvm: don't send keys when entering D3 Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 46/68] fbdev: sbuslib: use checked version of put_user() Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 47/68] fbdev: sbuslib: integer overflow in sbusfb_ioctl_helper() Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 48/68] reset: Fix potential use-after-free in __of_reset_control_get() Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 49/68] bcache: recal cached_dev_sectors on detach Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 50/68] s390/kasan: avoid vdso instrumentation Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 51/68] proc/vmcore: Fix i386 build error of missing copy_oldmem_page_encrypted() Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 52/68] backlight: lm3639: Unconditionally call led_classdev_unregister Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 53/68] mfd: ti_am335x_tscadc: Keep ADC interface on if child is wakeup capable Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 54/68] printk: Give error on attempt to set log buffer length to over 2G Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 55/68] media: isif: fix a NULL pointer dereference bug Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 56/68] GFS2: Flush the GFS2 delete workqueue before stopping the kernel threads Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 57/68] media: cx231xx: fix potential sign-extension overflow on large shift Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 58/68] x86/kexec: Correct KEXEC_BACKUP_SRC_END off-by-one error Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 59/68] gpio: syscon: Fix possible NULL ptr usage Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 60/68] spi: spidev: Fix OF tree warning logic Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 61/68] ARM: 8802/1: Call syscall_trace_exit even when system call skipped Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 62/68] orangefs: rate limit the client not running info message Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 63/68] hwmon: (pwm-fan) Silence error on probe deferral Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 64/68] hwmon: (ina3221) Fix INA3221_CONFIG_MODE macros Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 65/68] misc: cxl: Fix possible null pointer dereference Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 66/68] mac80211: minstrel: fix CCK rate group streams value Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 67/68] spi: rockchip: initialize dma_slave_config properly Sasha Levin
2019-11-13  1:59 ` [PATCH AUTOSEL 4.9 68/68] ARM: dts: omap5: Fix dual-role mode on Super-Speed port Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191113015932.12655-5-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=trond.myklebust@hammerspace.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox