* [PATCH 0/4] nvmet: support polling queue task for bio request
2023-09-13 8:36 [PATCH 0/4] nvmet: support polling queue task for bio request Ping Gan
@ 2023-09-13 8:34 ` Ping Gan
2023-09-13 8:36 ` [PATCH 1/4] nvmet: Add nvme target polling queue task parameters Ping Gan
` (6 subsequent siblings)
7 siblings, 0 replies; 15+ messages in thread
From: Ping Gan @ 2023-09-13 8:34 UTC (permalink / raw)
To: kbusch, axboe, hch, sagi, kch, linux-kernel, linux-nvme
Cc: ping_gan, jacky_gam_2001
Since nvme target currently does not support to submit bio to a polling
queue, the bio's completion relies on system interrupt. But when there
is high workload in system and the competition is very high, so it makes
sense to add polling queue task to submit bio to disk's polling queue
and poll the completion queue of disk.
Ping Gan (4):
nvmet: Add nvme target polling queue task parameters
nvmet: Add polling queue task for nvme target
nvmet: support bio polling queue request
nvme-core: Get lowlevel disk for target polling queue task
drivers/nvme/host/multipath.c | 20 +
drivers/nvme/target/Makefile | 2 +-
drivers/nvme/target/core.c | 55 +-
drivers/nvme/target/io-cmd-bdev.c | 243 ++++++++-
drivers/nvme/target/nvmet.h | 13 +
drivers/nvme/target/polling-queue-thread.c | 594 +++++++++++++++++++++
6 files changed, 895 insertions(+), 32 deletions(-)
create mode 100644 drivers/nvme/target/polling-queue-thread.c
--
2.26.2
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 0/4] nvmet: support polling queue task for bio request
@ 2023-09-13 8:36 Ping Gan
2023-09-13 8:34 ` Ping Gan
` (7 more replies)
0 siblings, 8 replies; 15+ messages in thread
From: Ping Gan @ 2023-09-13 8:36 UTC (permalink / raw)
To: kbusch, axboe, hch, sagi, kch, linux-kernel, linux-nvme
Cc: ping_gan, jacky_gam_2001
Since nvme target currently does not support to submit bio to a polling
queue, the bio's completion relies on system interrupt. But when there
is high workload in system and the competition is very high, so it makes
sense to add polling queue task to submit bio to disk's polling queue
and poll the completion queue of disk.
Ping Gan (4):
nvmet: Add nvme target polling queue task parameters
nvmet: Add polling queue task for nvme target
nvmet: support bio polling queue request
nvme-core: Get lowlevel disk for target polling queue task
drivers/nvme/host/multipath.c | 20 +
drivers/nvme/target/Makefile | 2 +-
drivers/nvme/target/core.c | 55 +-
drivers/nvme/target/io-cmd-bdev.c | 243 ++++++++-
drivers/nvme/target/nvmet.h | 13 +
drivers/nvme/target/polling-queue-thread.c | 594 +++++++++++++++++++++
6 files changed, 895 insertions(+), 32 deletions(-)
create mode 100644 drivers/nvme/target/polling-queue-thread.c
--
2.26.2
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 1/4] nvmet: Add nvme target polling queue task parameters
2023-09-13 8:36 [PATCH 0/4] nvmet: support polling queue task for bio request Ping Gan
2023-09-13 8:34 ` Ping Gan
@ 2023-09-13 8:36 ` Ping Gan
2023-09-13 8:36 ` [PATCH 2/4] nvmet: Add polling queue task for nvme target Ping Gan
` (5 subsequent siblings)
7 siblings, 0 replies; 15+ messages in thread
From: Ping Gan @ 2023-09-13 8:36 UTC (permalink / raw)
To: kbusch, axboe, hch, sagi, kch, linux-kernel, linux-nvme
Cc: ping_gan, jacky_gam_2001
To define a polling task's running parameters when
nvme target submits bio to a nvme polling queue.
Signed-off-by: Ping Gan <jacky_gam_2001@163.com>
---
drivers/nvme/target/core.c | 55 ++++++++++++++++++++++++++++++++++++--
1 file changed, 53 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 3935165048e7..6f49965d5d17 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -17,6 +17,29 @@
#include "nvmet.h"
+/* Define the polling queue thread's affinity cpu core.
+ * */
+static int pqt_affinity_core = -1;
+module_param(pqt_affinity_core, int, 0644);
+MODULE_PARM_DESC(pqt_affinity_core,
+ "nvme polling queue thread's affinity core, -1 for all online cpus");
+
+/* Define a time (in usecs) that polling queue thread shall sample the
+ * * io request ring before determining it to be idle.
+ * */
+static int pqt_idle_usecs;
+module_param(pqt_idle_usecs, int, 0644);
+MODULE_PARM_DESC(pqt_idle_usecs,
+ "polling queue task will poll io request till idle time in usecs");
+
+/* Define the polling queue thread ring's size.
+ * * The ring will be consumed by polling queue thread.
+ * */
+static int pqt_ring_size;
+module_param(pqt_ring_size, int, 0644);
+MODULE_PARM_DESC(pqt_ring_size,
+ "nvme target polling queue thread ring size");
+
struct kmem_cache *nvmet_bvec_cache;
struct workqueue_struct *buffered_io_wq;
struct workqueue_struct *zbd_wq;
@@ -1648,13 +1671,34 @@ static int __init nvmet_init(void)
{
int error = -ENOMEM;
+ if ((pqt_affinity_core >= -1 &&
+ pqt_affinity_core < nr_cpu_ids) ||
+ pqt_idle_usecs > 0 || pqt_ring_size > 0) {
+ if (pqt_idle_usecs == 0)
+ pqt_idle_usecs = 1000; //default 1ms
+ if (pqt_affinity_core < -1 ||
+ pqt_affinity_core >= nr_cpu_ids) {
+ printk(KERN_ERR "bad parameter for affinity core \n");
+ error = -EINVAL;
+ return error;
+ }
+ if (pqt_ring_size == 0)
+ pqt_ring_size = 4096; //default 4k
+ error = nvmet_init_pq_thread(pqt_idle_usecs,
+ pqt_affinity_core, pqt_ring_size);
+ if (error)
+ return error;
+ }
+
nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
nvmet_bvec_cache = kmem_cache_create("nvmet-bvec",
NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0,
SLAB_HWCACHE_ALIGN, NULL);
- if (!nvmet_bvec_cache)
- return -ENOMEM;
+ if (!nvmet_bvec_cache) {
+ error = -ENOMEM;
+ goto out_free_pqt;
+ }
zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0);
if (!zbd_wq)
@@ -1688,6 +1732,8 @@ static int __init nvmet_init(void)
destroy_workqueue(zbd_wq);
out_destroy_bvec_cache:
kmem_cache_destroy(nvmet_bvec_cache);
+out_free_pqt:
+ nvmet_exit_pq_thread();
return error;
}
@@ -1701,6 +1747,11 @@ static void __exit nvmet_exit(void)
destroy_workqueue(zbd_wq);
kmem_cache_destroy(nvmet_bvec_cache);
+ if ((pqt_affinity_core >= -1 &&
+ pqt_affinity_core < nr_cpu_ids) ||
+ pqt_idle_usecs > 0 || pqt_ring_size > 0)
+ nvmet_exit_pq_thread();
+
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
}
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 2/4] nvmet: Add polling queue task for nvme target
2023-09-13 8:36 [PATCH 0/4] nvmet: support polling queue task for bio request Ping Gan
2023-09-13 8:34 ` Ping Gan
2023-09-13 8:36 ` [PATCH 1/4] nvmet: Add nvme target polling queue task parameters Ping Gan
@ 2023-09-13 8:36 ` Ping Gan
2023-09-13 12:19 ` kernel test robot
` (2 more replies)
2023-09-13 8:36 ` [PATCH 3/4] nvmet: support bio polling queue request Ping Gan
` (4 subsequent siblings)
7 siblings, 3 replies; 15+ messages in thread
From: Ping Gan @ 2023-09-13 8:36 UTC (permalink / raw)
To: kbusch, axboe, hch, sagi, kch, linux-kernel, linux-nvme
Cc: ping_gan, jacky_gam_2001
The polling queue task of nvme target will fetch bio
requests from the lossless ring which is filled by
io-cmd-bdev's rw , then submit the requests to nvme's
polling queue, at last do polling to check the coplemented
status of the requests and complete the request.
Signed-off-by: Ping Gan <jacky_gam_2001@163.com>
---
drivers/nvme/target/Makefile | 2 +-
drivers/nvme/target/nvmet.h | 13 +
drivers/nvme/target/polling-queue-thread.c | 594 +++++++++++++++++++++
3 files changed, 608 insertions(+), 1 deletion(-)
create mode 100644 drivers/nvme/target/polling-queue-thread.c
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index c66820102493..99272881b63e 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o
obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o
nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
- discovery.o io-cmd-file.o io-cmd-bdev.o
+ discovery.o io-cmd-file.o io-cmd-bdev.o polling-queue-thread.o
nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o
nvmet-$(CONFIG_NVME_TARGET_AUTH) += fabrics-cmd-auth.o auth.o
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 8cfd60f3b564..b29a45bbdf99 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -395,6 +395,12 @@ struct nvmet_req {
u64 error_slba;
};
+struct nvmet_pqt_bio_req {
+ struct nvmet_req *req;
+ struct bio_list blist;
+ unsigned short io_completed;
+};
+
#define NVMET_MAX_MPOOL_BVEC 16
extern struct kmem_cache *nvmet_bvec_cache;
extern struct workqueue_struct *buffered_io_wq;
@@ -455,6 +461,13 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
u16 nvmet_parse_fabrics_admin_cmd(struct nvmet_req *req);
u16 nvmet_parse_fabrics_io_cmd(struct nvmet_req *req);
+//below is for enabling nvmet polling queue task
+int nvmet_init_pq_thread(u32 thread_idle, int affinity_cpu, u32 ring_size);
+void nvmet_exit_pq_thread(void);
+bool nvmet_pqt_enabled(void);
+int nvmet_pqt_ring_enqueue(void *pt);
+void nvmet_wakeup_pq_thread(void);
+
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
void nvmet_req_uninit(struct nvmet_req *req);
diff --git a/drivers/nvme/target/polling-queue-thread.c b/drivers/nvme/target/polling-queue-thread.c
new file mode 100644
index 000000000000..2eb107393df9
--- /dev/null
+++ b/drivers/nvme/target/polling-queue-thread.c
@@ -0,0 +1,594 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe over Fabrics target POLLING queue thread implementation.
+ * Copyright (c) 2023 Ping Gan.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <linux/refcount.h>
+#include <linux/bits.h>
+#include <linux/blk-mq.h>
+#include <linux/kthread.h>
+
+#include <linux/sched/signal.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/bvec.h>
+#include <linux/bio.h>
+#include <linux/net.h>
+#include <linux/wait.h>
+#include "nvmet.h"
+
+
+struct nvmet_pqt_sqe {
+ struct bio *bio;
+ struct nvmet_pqt_bio_req *pqt_req;
+ struct list_head list;
+};
+
+enum {
+ NVMET_PQ_THREAD_SHOULD_STOP = 0,
+ NVMET_PQ_THREAD_ENABLE = 1,
+};
+
+struct nvmet_pq_ring_headtail {
+ u32 head ____cacheline_aligned_in_smp;
+ u32 tail ____cacheline_aligned_in_smp;
+};
+
+struct nvmet_pq_ring {
+ struct nvmet_pq_ring_headtail prod, cons;
+ u32 size;
+ u32 mask;
+ u32 capacity;
+ struct bio_list *qe_arry[] ____cacheline_aligned_in_smp;
+};
+
+struct nvmet_pq_thread_data {
+ struct wait_queue_head wait_head;
+ struct nvmet_pq_ring *ring;
+ u32 ring_mem_size;
+ struct list_head submit_list;
+ u32 thread_idle;
+ int affinity_cpu;
+ unsigned long state;
+ pid_t task_pid;
+ pid_t task_tgid;
+ struct task_struct *thread;
+ struct mutex lock;
+ struct delayed_work compl_bio;
+};
+
+struct nvmet_pq_thread_data nvmet_pqt_data;
+
+static inline int
+nvmet_pq_powerof2_enabled(u32 n)
+{
+ return n && !(n & (n - 1));
+}
+
+static inline u32 nvmet_pq_alignpow2(u32 x)
+{
+ x--;
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+ return x + 1;
+}
+
+static void nvmet_pq_mem_free(void *ptr, size_t size)
+{
+ struct page *page;
+
+ if (!ptr)
+ return;
+
+ page = virt_to_page(ptr);
+ __free_pages(page, get_order(size));
+}
+
+static void *nvmet_pq_mem_alloc(size_t size)
+{
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN;
+ if (get_order(size) > MAX_ORDER)
+ return NULL;
+ return (void *) __get_free_pages(gfp_flags, get_order(size));
+}
+
+static struct nvmet_pq_ring *nvmet_create_pq_ring(u32 cnt)
+{
+ struct nvmet_pq_ring *pq_ring;
+ u32 ring_size = 0, qe_size = cnt;
+
+ if (!nvmet_pq_powerof2_enabled(cnt))
+ qe_size = nvmet_pq_alignpow2(cnt + 1);
+ ring_size += sizeof(struct nvmet_pq_ring);
+ ring_size += qe_size * sizeof(void **);
+ pq_ring = nvmet_pq_mem_alloc(ring_size);
+ if (likely(pq_ring)) {
+ pq_ring->cons.head = 0;
+ pq_ring->cons.tail = 0;
+ pq_ring->prod.head = 0;
+ pq_ring->prod.tail = 0;
+ pq_ring->size = qe_size;
+ pq_ring->mask = qe_size - 1;
+ pq_ring->capacity = nvmet_pq_powerof2_enabled(cnt)?cnt:(qe_size - 1);
+ }
+ return pq_ring;
+}
+
+//below is derived from FreeBSD's bufring.h
+/* the actual enqueue of pointers on the ring.
+ * Placed here since identical code needed in both
+ * single and multi producer enqueue functions */
+#define ENQUEUE_PTRS(r, ring_start, prod_head, obj_table, n, obj_type) do { \
+ unsigned int i; \
+ const u32 size = (r)->size; \
+ u32 idx = prod_head & (r)->mask; \
+ obj_type *ring = (obj_type *)ring_start; \
+ if (likely(idx + n < size)) { \
+ for (i = 0; i < (n & ((~(unsigned)0x3))); i += 4, idx += 4) { \
+ ring[idx] = obj_table[i]; \
+ ring[idx+1] = obj_table[i+1]; \
+ ring[idx+2] = obj_table[i+2]; \
+ ring[idx+3] = obj_table[i+3]; \
+ } \
+ switch (n & 0x3) { \
+ case 3: \
+ ring[idx++] = obj_table[i++]; /* fallthrough */ \
+ case 2: \
+ ring[idx++] = obj_table[i++]; /* fallthrough */ \
+ case 1: \
+ ring[idx++] = obj_table[i++]; \
+ } \
+ } else { \
+ for (i = 0; idx < size; i++, idx++)\
+ ring[idx] = obj_table[i]; \
+ for (idx = 0; i < n; i++, idx++) \
+ ring[idx] = obj_table[i]; \
+ } \
+} while (0)
+
+/* the actual copy of pointers on the ring to obj_table.
+ * Placed here since identical code needed in both
+ * single and multi consumer dequeue functions */
+#define DEQUEUE_PTRS(r, ring_start, cons_head, obj_table, n, obj_type) do { \
+ unsigned int i; \
+ u32 idx = cons_head & (r)->mask; \
+ const u32 size = (r)->size; \
+ obj_type *ring = (obj_type *)ring_start; \
+ if (likely(idx + n < size)) { \
+ for (i = 0; i < (n & (~(unsigned)0x3)); i += 4, idx += 4) {\
+ obj_table[i] = ring[idx]; \
+ obj_table[i+1] = ring[idx+1]; \
+ obj_table[i+2] = ring[idx+2]; \
+ obj_table[i+3] = ring[idx+3]; \
+ } \
+ switch (n & 0x3) { \
+ case 3: \
+ obj_table[i++] = ring[idx++]; /* fallthrough */ \
+ case 2: \
+ obj_table[i++] = ring[idx++]; /* fallthrough */ \
+ case 1: \
+ obj_table[i++] = ring[idx++]; \
+ } \
+ } else { \
+ for (i = 0; idx < size; i++, idx++) \
+ obj_table[i] = ring[idx]; \
+ for (idx = 0; i < n; i++, idx++) \
+ obj_table[i] = ring[idx]; \
+ } \
+} while (0)
+
+static inline u32
+__nvmet_pq_ring_move_prod_head(struct nvmet_pq_ring *r, u32 n,
+ u32 *old_head, u32 *new_head, u32 *free_entries)
+{
+ const u32 capacity = smp_load_acquire(&r->capacity);
+ u32 ret, success;
+
+ do {
+ *old_head = smp_load_acquire(&r->prod.head);
+
+ /* add rmb barrier to avoid load/load reorder in weak
+ * memory model.
+ */
+ smp_rmb();
+
+ *free_entries = (capacity + smp_load_acquire(&r->cons.tail) - *old_head);
+
+ /* check that we have enough room in ring */
+ if (unlikely(n > *free_entries))
+ return 0;
+
+ *new_head = *old_head + n;
+ ret = cmpxchg(&r->prod.head, *old_head, *new_head);
+ success = (ret == *old_head) ? 1 : 0;
+ } while (unlikely(success == 0));
+ return n;
+}
+
+static inline u32
+__nvmet_pq_ring_move_cons_head(struct nvmet_pq_ring *r, u32 n,
+ u32 *old_head, u32 *new_head, u32 *entries)
+{
+ unsigned int ret, success;
+
+ /* move cons.head atomically */
+ do {
+ *old_head = smp_load_acquire(&r->cons.head);
+
+ /* add rmb barrier to avoid load/load reorder in weak
+ * memory model.
+ */
+ smp_rmb();
+
+ *entries = (smp_load_acquire(&r->prod.tail) - *old_head);
+
+ /* check if we have enough entry to dequeue */
+ if (n > *entries)
+ return 0;
+
+ *new_head = *old_head + n;
+ ret = cmpxchg(&r->cons.head, *old_head, *new_head);
+ success = (ret == *old_head) ? 1 : 0;
+ } while (unlikely(success == 0));
+ return n;
+}
+
+static inline void
+__nvmet_pq_ring_update_tail(struct nvmet_pq_ring_headtail *ht,
+ u32 old_val, u32 new_val, u32 enqueue)
+{
+ if (enqueue)
+ smp_wmb();
+ else
+ smp_rmb();
+
+ while (unlikely(smp_load_acquire(&ht->tail) != old_val))
+ ;
+
+ smp_store_release(&ht->tail, new_val);
+}
+
+static inline u32
+__nvmet_pq_ring_do_enqueue(struct nvmet_pq_ring *r,
+ void **obj_table, u32 n, u32 *free_space)
+{
+ uint32_t prod_head, prod_next;
+ uint32_t free_entries;
+
+ n = __nvmet_pq_ring_move_prod_head(r, n, &prod_head,
+ &prod_next, &free_entries);
+ if (n == 0)
+ goto end;
+
+ ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *);
+
+ __nvmet_pq_ring_update_tail(&r->prod, prod_head, prod_next, 1);
+end:
+ if (free_space != NULL)
+ *free_space = free_entries - n;
+ return n;
+}
+
+static inline u32
+__nvmet_pq_ring_do_dequeue(struct nvmet_pq_ring *r,
+ void **obj_table, u32 n, u32 *available)
+{
+ uint32_t cons_head, cons_next;
+ uint32_t entries;
+
+ n = __nvmet_pq_ring_move_cons_head(r, n, &cons_head, &cons_next, &entries);
+ if (n == 0)
+ goto end;
+
+ DEQUEUE_PTRS(r, &r[1], cons_head, obj_table, n, void *);
+
+ __nvmet_pq_ring_update_tail(&r->cons, cons_head, cons_next, 0);
+
+end:
+ if (available != NULL)
+ *available = entries - n;
+ return n;
+}
+
+static inline u32
+nvmet_pq_ring_enqueue_bulk(struct nvmet_pq_ring *r,
+ void **obj_table, u32 n, u32 *free_space)
+{
+ return __nvmet_pq_ring_do_enqueue(r, obj_table, n, free_space);
+}
+
+static inline int
+nvmet_pq_ring_enqueue(struct nvmet_pq_ring *r, void **obj)
+{
+ return nvmet_pq_ring_enqueue_bulk(r, obj, 1, NULL) ? 0 : -ENOBUFS;
+}
+
+static inline u32
+nvmet_pq_ring_dequeue_bulk(struct nvmet_pq_ring *r,
+ void **obj_table, u32 n, u32 *available)
+{
+ return __nvmet_pq_ring_do_dequeue(r, obj_table, n, available);
+}
+
+static inline int
+nvmet_pq_ring_dequeue(struct nvmet_pq_ring *r, void **obj_p)
+{
+ return nvmet_pq_ring_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOENT;
+}
+
+static inline u32
+__nvmet_pq_ring_count(const struct nvmet_pq_ring *r)
+{
+ u32 prod_tail = smp_load_acquire(&r->prod.tail);
+ u32 cons_tail = smp_load_acquire(&r->cons.tail);
+ u32 count = (prod_tail - cons_tail) & r->mask;
+ u32 capacity = smp_load_acquire(&r->capacity);
+ return (count > capacity) ? capacity : count;
+}
+
+/**
+ * Return the number of free entries in a ring.
+ */
+static inline u32
+__nvmet_pq_ring_free_count(const struct nvmet_pq_ring *r)
+{
+ return smp_load_acquire(&r->capacity) - __nvmet_pq_ring_count(r);
+}
+
+/**
+ * Test if a ring is full.
+ */
+static inline int
+__nvmet_pq_ring_full(const struct nvmet_pq_ring *r)
+{
+ return __nvmet_pq_ring_free_count(r) == 0;
+}
+
+/**
+ * Test if a ring is empty.
+ */
+static inline int
+__nvmet_pq_ring_empty(const struct nvmet_pq_ring *r)
+{
+ return __nvmet_pq_ring_count(r) == 0;
+}
+
+/**
+ * Return the size of the ring.
+ */
+static inline u32
+__nvmet_pq_ring_get_size(const struct nvmet_pq_ring *r)
+{
+ return smp_load_acquire(&r->size);
+}
+
+/**
+ * Return the number of elements which can be stored in the ring.
+ */
+static inline u32
+__nvmet_pq_ring_get_capacity(const struct nvmet_pq_ring *r)
+{
+ return smp_load_acquire(&r->capacity);
+}
+
+#define NVMET_PQT_IO_BUDGET 64
+
+static inline void
+__nvmet_pqt_submit_bio(struct nvmet_pq_thread_data *pqtd,
+ struct nvmet_pqt_bio_req *pqt_req, bool cancel_thread)
+{
+ struct bio *bio;
+ struct nvmet_pqt_sqe *sqe;
+
+ while ((bio = bio_list_pop(&pqt_req->blist))) {
+ if (cancel_thread) {
+ nvmet_req_bio_put(pqt_req->req, bio);
+ if (!bio_flagged(bio, BIO_CHAIN)) {
+ kfree(pqt_req);
+ break;
+ }
+ } else {
+ if (bio_flagged(bio, BIO_CHAIN))
+ submit_bio(bio);
+ else {
+ sqe = kmalloc(sizeof(struct nvmet_pqt_sqe), GFP_KERNEL);
+ if (!sqe) {
+ bio_io_error(bio);
+ kfree(pqt_req);
+ } else {
+ sqe->bio = bio;
+ sqe->pqt_req = pqt_req;
+ submit_bio(bio);
+ list_add(&sqe->list, &pqtd->submit_list);
+ }
+ }
+ }
+ }
+}
+
+static int __nvmet_pq_thread(struct nvmet_pq_thread_data *pqtd,
+ bool cancel_thread)
+{
+ int ret = 0, poll_cnt = 0;
+ struct nvmet_pqt_bio_req *req_done;
+ struct nvmet_pqt_sqe *sqe, *tmp;
+ unsigned int poll_flags = BLK_POLL_ONESHOT;
+ DEFINE_IO_COMP_BATCH(iob);
+
+ while (1) {
+ ret = nvmet_pq_ring_dequeue(pqtd->ring, (void **)&req_done);
+ if (ret)
+ break;
+ __nvmet_pqt_submit_bio(pqtd, req_done, cancel_thread);
+ poll_cnt++;
+ if (poll_cnt == NVMET_PQT_IO_BUDGET && !cancel_thread)
+ break;
+ }
+ if (!list_empty(&pqtd->submit_list)) {
+cancel_thread_poll_again:
+ list_for_each_entry_safe(sqe, tmp, &pqtd->submit_list, list) {
+ if (sqe->pqt_req->io_completed == 1) {
+ list_del(&sqe->list);
+ kfree(sqe->pqt_req);
+ kfree(sqe);
+ continue;
+ }
+ ret = bio_poll(sqe->bio, &iob, poll_flags);
+ if (ret < 0) {
+ if (!cancel_thread) {
+ if (!rq_list_empty(iob.req_list))
+ iob.complete(&iob);
+ return 1;
+ }
+ }
+ if (ret > 0 && sqe->pqt_req->io_completed == 1) {
+ list_del(&sqe->list);
+ kfree(sqe->pqt_req);
+ kfree(sqe);
+ }
+ }
+ }
+ if (cancel_thread) {
+ if (!list_empty(&pqtd->submit_list))
+ goto cancel_thread_poll_again;
+ nvmet_pq_mem_free(pqtd->ring, pqtd->ring_mem_size);
+ }
+ if (!rq_list_empty(iob.req_list))
+ iob.complete(&iob);
+ return 0;
+}
+
+void nvmet_wakeup_pq_thread(void)
+{
+ smp_mb();
+ if (waitqueue_active(&nvmet_pqt_data.wait_head))
+ wake_up(&nvmet_pqt_data.wait_head);
+}
+
+int nvmet_pqt_ring_enqueue(void *pt)
+{
+ struct nvmet_pqt_bio_req *pqt_req = pt;
+ return nvmet_pq_ring_enqueue(nvmet_pqt_data.ring, (void **)&pqt_req);
+}
+
+static int nvmet_pq_thread(void *data)
+{
+ struct nvmet_pq_thread_data *pqtd = data;
+ unsigned long timeout = 0;
+ DEFINE_WAIT(wait);
+
+ if (pqtd->affinity_cpu != -1)
+ set_cpus_allowed_ptr(current, cpumask_of(pqtd->affinity_cpu));
+ else
+ set_cpus_allowed_ptr(current, cpu_online_mask);
+ current->flags |= PF_NO_SETAFFINITY;
+ mutex_lock(&pqtd->lock);
+ pqtd->task_pid = current->pid;
+ pqtd->task_tgid = current->tgid;
+
+ while (!kthread_should_stop()) {
+ if (test_bit(NVMET_PQ_THREAD_SHOULD_STOP, &pqtd->state))
+ break;
+
+ int ret = __nvmet_pq_thread(pqtd, false);
+ if (ret > 0 || !time_after(jiffies, timeout)) {
+ cond_resched();
+ if (ret > 0)
+ timeout = jiffies + pqtd->thread_idle;
+ continue;
+ }
+ prepare_to_wait(&pqtd->wait_head, &wait, TASK_INTERRUPTIBLE);
+ mutex_unlock(&pqtd->lock);
+ schedule();
+ mutex_lock(&pqtd->lock);
+ finish_wait(&pqtd->wait_head, &wait);
+ timeout = jiffies + pqtd->thread_idle;
+ }
+ pqtd->thread = NULL;
+ pqtd->task_pid = -1;
+ pqtd->task_tgid = -1;
+ mutex_unlock(&pqtd->lock);
+ kthread_complete_and_exit(NULL, 0);
+}
+
+bool nvmet_pqt_enabled(void)
+{
+ if (!test_bit(NVMET_PQ_THREAD_SHOULD_STOP, &nvmet_pqt_data.state) &&
+ test_bit(NVMET_PQ_THREAD_ENABLE, &nvmet_pqt_data.state))
+ return true;
+ else
+ return false;
+}
+
+static void nvmet_pqt_compl_bio_req_func(struct work_struct *work)
+{
+ struct nvmet_pq_thread_data *pqtd = container_of(work,
+ struct nvmet_pq_thread_data, compl_bio.work);
+ __nvmet_pq_thread(pqtd, true);
+}
+
+int nvmet_init_pq_thread(u32 thread_idle, int affinity_cpu, u32 ring_size)
+{
+ struct task_struct *task;
+ int ret = 0;
+
+ memset(&nvmet_pqt_data, 0, sizeof(struct nvmet_pq_thread_data));
+ init_waitqueue_head(&nvmet_pqt_data.wait_head);
+ mutex_init(&nvmet_pqt_data.lock);
+ nvmet_pqt_data.thread_idle = usecs_to_jiffies(thread_idle);
+ nvmet_pqt_data.affinity_cpu = affinity_cpu;
+ INIT_LIST_HEAD(&nvmet_pqt_data.submit_list);
+ nvmet_pqt_data.ring = nvmet_create_pq_ring(ring_size);
+ if (!nvmet_pqt_data.ring) {
+ printk(KERN_ERR "allocate poll ring failure\n");
+ return -1;
+ }
+ nvmet_pqt_data.ring_mem_size = sizeof(struct nvmet_pq_ring);
+ nvmet_pqt_data.ring_mem_size += nvmet_pqt_data.ring->size * sizeof(void **);
+ task = kthread_create(nvmet_pq_thread, (void *)&nvmet_pqt_data, "nvmet-pqt");
+ if (IS_ERR(task)) {
+ ret = PTR_ERR(task);
+ goto err;
+ }
+
+ set_user_nice(task, -20);
+ mutex_lock(&nvmet_pqt_data.lock);
+ nvmet_pqt_data.thread = task;
+ mutex_unlock(&nvmet_pqt_data.lock);
+ wake_up_process(task);
+ set_bit(NVMET_PQ_THREAD_ENABLE, &nvmet_pqt_data.state);
+ return 0;
+err:
+ nvmet_pq_mem_free(nvmet_pqt_data.ring, nvmet_pqt_data.ring_mem_size);
+ return ret;
+}
+
+void nvmet_exit_pq_thread(void)
+{
+ set_bit(NVMET_PQ_THREAD_SHOULD_STOP, &nvmet_pqt_data.state);
+ clear_bit(NVMET_PQ_THREAD_ENABLE, &nvmet_pqt_data.state);
+ mutex_lock(&nvmet_pqt_data.lock);
+ if (nvmet_pqt_data.thread) {
+ mutex_unlock(&nvmet_pqt_data.lock);
+ kthread_stop(nvmet_pqt_data.thread);
+ } else {
+ mutex_unlock(&nvmet_pqt_data.lock);
+ }
+ INIT_DELAYED_WORK(&nvmet_pqt_data.compl_bio, nvmet_pqt_compl_bio_req_func);
+ schedule_delayed_work(&nvmet_pqt_data.compl_bio, 3);
+}
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 3/4] nvmet: support bio polling queue request
2023-09-13 8:36 [PATCH 0/4] nvmet: support polling queue task for bio request Ping Gan
` (2 preceding siblings ...)
2023-09-13 8:36 ` [PATCH 2/4] nvmet: Add polling queue task for nvme target Ping Gan
@ 2023-09-13 8:36 ` Ping Gan
2023-09-13 8:36 ` [PATCH 4/4] nvme-core: Get lowlevel disk for target polling queue task Ping Gan
` (3 subsequent siblings)
7 siblings, 0 replies; 15+ messages in thread
From: Ping Gan @ 2023-09-13 8:36 UTC (permalink / raw)
To: kbusch, axboe, hch, sagi, kch, linux-kernel, linux-nvme
Cc: ping_gan, jacky_gam_2001
If enabling bio polling queue task, we will split and chain
the bios if needed, then fill the request to the lossless ring
of polling queue task.
Signed-off-by: Ping Gan <jacky_gam_2001@163.com>
---
drivers/nvme/target/io-cmd-bdev.c | 243 ++++++++++++++++++++++++++----
1 file changed, 214 insertions(+), 29 deletions(-)
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 468833675cc9..6f7d04ae6cb7 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -184,6 +184,16 @@ static void nvmet_bio_done(struct bio *bio)
nvmet_req_bio_put(req, bio);
}
+static void nvmet_pqt_bio_done(struct bio *bio)
+{
+ struct nvmet_pqt_bio_req *req_done = bio->bi_private;
+
+ nvmet_req_complete(req_done->req, blk_to_nvme_status(req_done->req,
+ bio->bi_status));
+ nvmet_req_bio_put(req_done->req, bio);
+ req_done->io_completed = 1;
+}
+
#ifdef CONFIG_BLK_DEV_INTEGRITY
static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
struct sg_mapping_iter *miter)
@@ -237,6 +247,38 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+#ifdef CONFIG_NVME_MULTIPATH
+extern struct block_device *nvme_mpath_get_bdev(struct block_device *bdev);
+extern const struct block_device_operations nvme_ns_head_ops;
+#endif
+
+static inline int nvmet_chain_par_bio(struct nvmet_req *req, struct bio **bio,
+ struct sg_mapping_iter *prot_miter, struct block_device *bdev,
+ sector_t sector, struct bio_list *blist)
+{
+ struct bio *parent, *child;
+ unsigned int vec_cnt;
+ int rc;
+
+ parent = *bio;
+ vec_cnt = queue_max_segments(bdev->bd_disk->queue);
+ if (req->metadata_len) {
+ rc = nvmet_bdev_alloc_bip(req, parent,
+ prot_miter);
+ if (unlikely(rc))
+ return rc;
+ }
+ child = bio_alloc(bdev, vec_cnt, parent->bi_opf, GFP_KERNEL);
+ child->bi_iter.bi_sector = sector;
+ *bio = child;
+ bio_chain(*bio, parent);
+ parent->bi_opf |= REQ_POLLED;
+ parent->bi_opf |= REQ_NOWAIT;
+ parent->bi_opf |= REQ_NOMERGE;
+ bio_list_add(blist, parent);
+ return 0;
+}
+
static void nvmet_bdev_execute_rw(struct nvmet_req *req)
{
unsigned int sg_cnt = req->sg_cnt;
@@ -247,8 +289,13 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
blk_opf_t opf;
int i, rc;
struct sg_mapping_iter prot_miter;
- unsigned int iter_flags;
+ unsigned int iter_flags, max_sectors;
+ unsigned short vec_cnt, max_segments;
unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
+ bool pqt_enabled = nvmet_pqt_enabled();
+ unsigned int sg_len;
+ struct nvmet_pqt_bio_req *req_done = NULL;
+ struct block_device *bdev = req->ns->bdev;
if (!nvmet_check_transfer_len(req, total_len))
return;
@@ -268,6 +315,24 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
iter_flags = SG_MITER_FROM_SG;
}
+#ifdef CONFIG_NVME_MULTIPATH
+ if (pqt_enabled && bdev->bd_disk->fops == &nvme_ns_head_ops) {
+ bdev = nvme_mpath_get_bdev(bdev);
+ if (!bdev) {
+ nvmet_req_complete(req, 0);
+ return;
+ }
+ opf |= REQ_DRV;
+ }
+#endif
+ if (pqt_enabled) {
+ req_done = kmalloc(sizeof(struct nvmet_pqt_bio_req), GFP_KERNEL);
+ if (!req_done) {
+ nvmet_req_complete(req, 0);
+ return;
+ }
+ }
+
if (is_pci_p2pdma_page(sg_page(req->sg)))
opf |= REQ_NOMERGE;
@@ -278,54 +343,174 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio_init(bio, req->ns->bdev, req->inline_bvec,
ARRAY_SIZE(req->inline_bvec), opf);
} else {
- bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), opf,
+ vec_cnt = bio_max_segs(sg_cnt);
+ if (pqt_enabled)
+ vec_cnt = queue_max_segments(bdev->bd_disk->queue);
+ bio = bio_alloc(bdev, vec_cnt, opf,
GFP_KERNEL);
}
bio->bi_iter.bi_sector = sector;
- bio->bi_private = req;
- bio->bi_end_io = nvmet_bio_done;
+ if (!pqt_enabled) {
+ bio->bi_private = req;
+ bio->bi_end_io = nvmet_bio_done;
+ } else {
+ req_done->req = req;
+ bio->bi_private = req_done;
+ bio->bi_end_io = nvmet_pqt_bio_done;
+ }
- blk_start_plug(&plug);
+ if (!pqt_enabled)
+ blk_start_plug(&plug);
if (req->metadata_len)
sg_miter_start(&prot_miter, req->metadata_sg,
req->metadata_sg_cnt, iter_flags);
- for_each_sg(req->sg, sg, req->sg_cnt, i) {
- while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
- != sg->length) {
- struct bio *prev = bio;
-
- if (req->metadata_len) {
- rc = nvmet_bdev_alloc_bip(req, bio,
- &prot_miter);
- if (unlikely(rc)) {
- bio_io_error(bio);
- return;
+ if (!pqt_enabled) {
+ for_each_sg(req->sg, sg, req->sg_cnt, i) {
+ while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
+ != sg->length) {
+ struct bio *prev = bio;
+
+ if (req->metadata_len) {
+ rc = nvmet_bdev_alloc_bip(req, bio,
+ &prot_miter);
+ if (unlikely(rc)) {
+ bio_io_error(bio);
+ return;
+ }
}
- }
- bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
- opf, GFP_KERNEL);
- bio->bi_iter.bi_sector = sector;
+ bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
+ opf, GFP_KERNEL);
+ bio->bi_iter.bi_sector = sector;
- bio_chain(bio, prev);
- submit_bio(prev);
- }
+ bio_chain(bio, prev);
+ submit_bio(prev);
+ }
- sector += sg->length >> 9;
- sg_cnt--;
+ sector += sg->length >> 9;
+ sg_cnt--;
+ }
+ } else {
+ bio_list_init(&req_done->blist);
+ if (!test_bit(QUEUE_FLAG_POLL, &bdev->bd_disk->queue->queue_flags))
+ goto err_bio;
+ max_sectors = bdev->bd_disk->queue->limits.max_sectors;
+ max_sectors <<= 9;
+ max_segments = queue_max_segments(bdev->bd_disk->queue);
+ sg_len = 0;
+ unsigned int offset, len, vec_len, i;
+ bool sg_start_pg = true, need_chain_bio = false;
+ struct page *sglist_page, *max_sector_align;
+ sector_t temp_sector;
+
+ /*
+ * for bio's polling mode we will split bio to
+ * avoid low level's bio splitting when submit.
+ */
+ for_each_sg(req->sg, sg, req->sg_cnt, i) {
+ temp_sector = sector;
+ offset = (sg->offset % PAGE_SIZE);
+ if (offset + sg->length > PAGE_SIZE) { // need to split
+ len = sg->length;
+ i = 0;
+ sglist_page = virt_to_page(page_to_virt(sg_page(sg)) + offset);
+ if (offset != 0)
+ sg_start_pg = false;
+ while (len > PAGE_SIZE) {
+ max_sector_align = virt_to_page(page_to_virt(sglist_page) +
+ (PAGE_SIZE*i));
+ vec_len = sg_start_pg?PAGE_SIZE:(PAGE_SIZE - offset);
+ if (bio->bi_vcnt == max_segments - 1 ||
+ sg_len + vec_len > max_sectors)
+ need_chain_bio = true;
+ else {
+ __bio_add_page(bio, max_sector_align,
+ vec_len, sg_start_pg?0:offset);
+ temp_sector += vec_len >> 9;
+ sg_len += vec_len;
+ }
+ if (need_chain_bio) {
+ rc = nvmet_chain_par_bio(req, &bio, &prot_miter,
+ bdev, temp_sector, &req_done->blist);
+ if (unlikely(rc))
+ goto err_bio;
+ __bio_add_page(bio, max_sector_align, vec_len,
+ sg_start_pg?0:(PAGE_SIZE - offset));
+ temp_sector += vec_len >> 9;
+ sg_len = vec_len;
+ need_chain_bio = false;
+ }
+ if (!sg_start_pg) {
+ len -= (PAGE_SIZE - offset);
+ sg_start_pg = true;
+ } else {
+ len -= PAGE_SIZE;
+ }
+ i++;
+ }
+ if (len > 0) {
+ max_sector_align = virt_to_page(page_to_virt(sglist_page) +
+ (i * PAGE_SIZE));
+ if (bio->bi_vcnt == max_segments - 1 ||
+ sg_len + len > max_sectors) {
+ rc = nvmet_chain_par_bio(req, &bio, &prot_miter,
+ bdev, temp_sector, &req_done->blist);
+ if (unlikely(rc))
+ goto err_bio;
+ sg_len = len;
+ } else {
+ sg_len += len;
+ }
+ __bio_add_page(bio, max_sector_align, len, 0);
+ temp_sector += len >> 9;
+ }
+ } else {
+ if (bio->bi_vcnt == max_segments - 1 ||
+ sg_len + sg->length > max_sectors) {
+ rc = nvmet_chain_par_bio(req, &bio, &prot_miter,
+ bdev, temp_sector, &req_done->blist);
+ if (unlikely(rc))
+ goto err_bio;
+ sg_len = sg->length;
+ } else {
+ sg_len += sg->length;
+ }
+ __bio_add_page(bio, sg_page(sg), sg->length, sg->offset);
+ }
+ sector += sg->length >> 9;
+ sg_cnt--;
+ }
}
if (req->metadata_len) {
rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
if (unlikely(rc)) {
- bio_io_error(bio);
- return;
+ goto err_bio;
}
}
- submit_bio(bio);
- blk_finish_plug(&plug);
+ if (pqt_enabled) {
+ bio->bi_opf |= REQ_POLLED;
+ bio->bi_opf |= REQ_NOWAIT;
+ bio->bi_opf |= REQ_NOMERGE;
+ bio_list_add(&req_done->blist, bio);
+ req_done->io_completed = 0;
+ rc = nvmet_pqt_ring_enqueue(req_done);
+ if (rc < 0)
+ goto err_bio;
+ nvmet_wakeup_pq_thread();
+ } else {
+ submit_bio(bio);
+ }
+ if (!pqt_enabled)
+ blk_finish_plug(&plug);
+ return;
+err_bio:
+ bio_io_error(bio);
+ if (pqt_enabled)
+ kfree(req_done);
+ return;
}
static void nvmet_bdev_execute_flush(struct nvmet_req *req)
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 4/4] nvme-core: Get lowlevel disk for target polling queue task
2023-09-13 8:36 [PATCH 0/4] nvmet: support polling queue task for bio request Ping Gan
` (3 preceding siblings ...)
2023-09-13 8:36 ` [PATCH 3/4] nvmet: support bio polling queue request Ping Gan
@ 2023-09-13 8:36 ` Ping Gan
2023-09-13 10:17 ` kernel test robot
2023-09-13 18:53 ` [PATCH 0/4] nvmet: support polling queue task for bio request Chaitanya Kulkarni
` (2 subsequent siblings)
7 siblings, 1 reply; 15+ messages in thread
From: Ping Gan @ 2023-09-13 8:36 UTC (permalink / raw)
To: kbusch, axboe, hch, sagi, kch, linux-kernel, linux-nvme
Cc: ping_gan, jacky_gam_2001
When enabling multipath, if the block device of nvmet is
a nvme_ns_head disk, then we should get the lowlevel block
device to do bio split.
Signed-off-by: Ping Gan <jacky_gam_2001@163.com>
---
drivers/nvme/host/multipath.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 0a88d7bdc5e3..f6063600e06e 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -371,6 +371,25 @@ static bool nvme_available_path(struct nvme_ns_head *head)
return false;
}
+//for polling queue task to get lowlevel block device
+struct block_device *nvme_mpath_get_bdev(struct block_device *bdev)
+{
+ struct nvme_ns_head *head = bdev->bd_disk->private_data;
+ int srcu_idx;
+ struct nvme_ns *ns;
+ struct block_device *ret = NULL;
+
+ if (!multipath)
+ return NULL;
+ srcu_idx = srcu_read_lock(&head->srcu);
+ ns = nvme_find_path(head);
+ if (likely(ns))
+ ret = ns->disk->part0;
+ srcu_read_unlock(&head->srcu, srcu_idx);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nvme_mpath_get_bdev);
+
static void nvme_ns_head_submit_bio(struct bio *bio)
{
struct nvme_ns_head *head = bio->bi_bdev->bd_disk->private_data;
@@ -452,6 +471,7 @@ const struct block_device_operations nvme_ns_head_ops = {
.report_zones = nvme_ns_head_report_zones,
.pr_ops = &nvme_pr_ops,
};
+EXPORT_SYMBOL_GPL(nvme_ns_head_ops);
static inline struct nvme_ns_head *cdev_to_ns_head(struct cdev *cdev)
{
--
2.26.2
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 4/4] nvme-core: Get lowlevel disk for target polling queue task
2023-09-13 8:36 ` [PATCH 4/4] nvme-core: Get lowlevel disk for target polling queue task Ping Gan
@ 2023-09-13 10:17 ` kernel test robot
0 siblings, 0 replies; 15+ messages in thread
From: kernel test robot @ 2023-09-13 10:17 UTC (permalink / raw)
To: Ping Gan, kbusch, axboe, hch, sagi, kch, linux-kernel, linux-nvme
Cc: oe-kbuild-all, ping_gan, jacky_gam_2001
Hi Ping,
kernel test robot noticed the following build warnings:
[auto build test WARNING on v6.6-rc1]
[also build test WARNING on linus/master next-20230913]
[cannot apply to hch-configfs/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Ping-Gan/nvmet-Add-nvme-target-polling-queue-task-parameters/20230913-164112
base: v6.6-rc1
patch link: https://lore.kernel.org/r/006b6aefe94d73ee64931c769af4a908616439ad.1694592708.git.jacky_gam_2001%40163.com
patch subject: [PATCH 4/4] nvme-core: Get lowlevel disk for target polling queue task
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20230913/202309131858.YUV19V9E-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230913/202309131858.YUV19V9E-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202309131858.YUV19V9E-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> drivers/nvme/host/multipath.c:375:22: warning: no previous prototype for 'nvme_mpath_get_bdev' [-Wmissing-prototypes]
375 | struct block_device *nvme_mpath_get_bdev(struct block_device *bdev)
| ^~~~~~~~~~~~~~~~~~~
vim +/nvme_mpath_get_bdev +375 drivers/nvme/host/multipath.c
373
374 //for polling queue task to get lowlevel block device
> 375 struct block_device *nvme_mpath_get_bdev(struct block_device *bdev)
376 {
377 struct nvme_ns_head *head = bdev->bd_disk->private_data;
378 int srcu_idx;
379 struct nvme_ns *ns;
380 struct block_device *ret = NULL;
381
382 if (!multipath)
383 return NULL;
384 srcu_idx = srcu_read_lock(&head->srcu);
385 ns = nvme_find_path(head);
386 if (likely(ns))
387 ret = ns->disk->part0;
388 srcu_read_unlock(&head->srcu, srcu_idx);
389 return ret;
390 }
391 EXPORT_SYMBOL_GPL(nvme_mpath_get_bdev);
392
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/4] nvmet: Add polling queue task for nvme target
2023-09-13 8:36 ` [PATCH 2/4] nvmet: Add polling queue task for nvme target Ping Gan
@ 2023-09-13 12:19 ` kernel test robot
2023-09-13 13:13 ` kernel test robot
2023-09-13 16:03 ` kernel test robot
2 siblings, 0 replies; 15+ messages in thread
From: kernel test robot @ 2023-09-13 12:19 UTC (permalink / raw)
To: Ping Gan, kbusch, axboe, hch, sagi, kch, linux-kernel, linux-nvme
Cc: oe-kbuild-all, ping_gan, jacky_gam_2001
Hi Ping,
kernel test robot noticed the following build warnings:
[auto build test WARNING on v6.6-rc1]
[also build test WARNING on linus/master next-20230913]
[cannot apply to hch-configfs/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Ping-Gan/nvmet-Add-nvme-target-polling-queue-task-parameters/20230913-164112
base: v6.6-rc1
patch link: https://lore.kernel.org/r/84d7f188e892b5b0ba251a4601455d7a137075f3.1694592708.git.jacky_gam_2001%40163.com
patch subject: [PATCH 2/4] nvmet: Add polling queue task for nvme target
config: arc-randconfig-r004-20230913 (https://download.01.org/0day-ci/archive/20230913/202309131959.1zH6sjmk-lkp@intel.com/config)
compiler: arc-elf-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230913/202309131959.1zH6sjmk-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202309131959.1zH6sjmk-lkp@intel.com/
All warnings (new ones prefixed by >>):
drivers/nvme/target/polling-queue-thread.c: In function '__nvmet_pq_ring_do_enqueue':
>> drivers/nvme/target/polling-queue-thread.c:150:37: warning: this statement may fall through [-Wimplicit-fallthrough=]
150 | ring[idx++] = obj_table[i++]; /* fallthrough */ \
| ~~~~~~~~~~~~^~~~~~~~~~~~~~~~
drivers/nvme/target/polling-queue-thread.c:278:9: note: in expansion of macro 'ENQUEUE_PTRS'
278 | ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *);
| ^~~~~~~~~~~~
drivers/nvme/target/polling-queue-thread.c:151:17: note: here
151 | case 2: \
| ^~~~
drivers/nvme/target/polling-queue-thread.c:278:9: note: in expansion of macro 'ENQUEUE_PTRS'
278 | ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *);
| ^~~~~~~~~~~~
drivers/nvme/target/polling-queue-thread.c:152:37: warning: this statement may fall through [-Wimplicit-fallthrough=]
152 | ring[idx++] = obj_table[i++]; /* fallthrough */ \
| ~~~~~~~~~~~~^~~~~~~~~~~~~~~~
drivers/nvme/target/polling-queue-thread.c:278:9: note: in expansion of macro 'ENQUEUE_PTRS'
278 | ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *);
| ^~~~~~~~~~~~
drivers/nvme/target/polling-queue-thread.c:153:17: note: here
153 | case 1: \
| ^~~~
drivers/nvme/target/polling-queue-thread.c:278:9: note: in expansion of macro 'ENQUEUE_PTRS'
278 | ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *);
| ^~~~~~~~~~~~
drivers/nvme/target/polling-queue-thread.c: In function '__nvmet_pq_ring_do_dequeue':
drivers/nvme/target/polling-queue-thread.c:181:40: warning: this statement may fall through [-Wimplicit-fallthrough=]
181 | obj_table[i++] = ring[idx++]; /* fallthrough */ \
| ^
drivers/nvme/target/polling-queue-thread.c:298:9: note: in expansion of macro 'DEQUEUE_PTRS'
298 | DEQUEUE_PTRS(r, &r[1], cons_head, obj_table, n, void *);
| ^~~~~~~~~~~~
drivers/nvme/target/polling-queue-thread.c:182:17: note: here
182 | case 2: \
| ^~~~
drivers/nvme/target/polling-queue-thread.c:298:9: note: in expansion of macro 'DEQUEUE_PTRS'
298 | DEQUEUE_PTRS(r, &r[1], cons_head, obj_table, n, void *);
| ^~~~~~~~~~~~
drivers/nvme/target/polling-queue-thread.c:183:40: warning: this statement may fall through [-Wimplicit-fallthrough=]
183 | obj_table[i++] = ring[idx++]; /* fallthrough */ \
| ^
drivers/nvme/target/polling-queue-thread.c:298:9: note: in expansion of macro 'DEQUEUE_PTRS'
298 | DEQUEUE_PTRS(r, &r[1], cons_head, obj_table, n, void *);
| ^~~~~~~~~~~~
drivers/nvme/target/polling-queue-thread.c:184:17: note: here
184 | case 1: \
| ^~~~
drivers/nvme/target/polling-queue-thread.c:298:9: note: in expansion of macro 'DEQUEUE_PTRS'
298 | DEQUEUE_PTRS(r, &r[1], cons_head, obj_table, n, void *);
| ^~~~~~~~~~~~
vim +150 drivers/nvme/target/polling-queue-thread.c
131
132 //below is derived from FreeBSD's bufring.h
133 /* the actual enqueue of pointers on the ring.
134 * Placed here since identical code needed in both
135 * single and multi producer enqueue functions */
136 #define ENQUEUE_PTRS(r, ring_start, prod_head, obj_table, n, obj_type) do { \
137 unsigned int i; \
138 const u32 size = (r)->size; \
139 u32 idx = prod_head & (r)->mask; \
140 obj_type *ring = (obj_type *)ring_start; \
141 if (likely(idx + n < size)) { \
142 for (i = 0; i < (n & ((~(unsigned)0x3))); i += 4, idx += 4) { \
143 ring[idx] = obj_table[i]; \
144 ring[idx+1] = obj_table[i+1]; \
145 ring[idx+2] = obj_table[i+2]; \
146 ring[idx+3] = obj_table[i+3]; \
147 } \
148 switch (n & 0x3) { \
149 case 3: \
> 150 ring[idx++] = obj_table[i++]; /* fallthrough */ \
151 case 2: \
152 ring[idx++] = obj_table[i++]; /* fallthrough */ \
153 case 1: \
154 ring[idx++] = obj_table[i++]; \
155 } \
156 } else { \
157 for (i = 0; idx < size; i++, idx++)\
158 ring[idx] = obj_table[i]; \
159 for (idx = 0; i < n; i++, idx++) \
160 ring[idx] = obj_table[i]; \
161 } \
162 } while (0)
163
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/4] nvmet: Add polling queue task for nvme target
2023-09-13 8:36 ` [PATCH 2/4] nvmet: Add polling queue task for nvme target Ping Gan
2023-09-13 12:19 ` kernel test robot
@ 2023-09-13 13:13 ` kernel test robot
2023-09-13 16:03 ` kernel test robot
2 siblings, 0 replies; 15+ messages in thread
From: kernel test robot @ 2023-09-13 13:13 UTC (permalink / raw)
To: Ping Gan, kbusch, axboe, hch, sagi, kch, linux-kernel, linux-nvme
Cc: llvm, oe-kbuild-all, ping_gan, jacky_gam_2001
Hi Ping,
kernel test robot noticed the following build warnings:
[auto build test WARNING on v6.6-rc1]
[also build test WARNING on linus/master next-20230913]
[cannot apply to hch-configfs/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Ping-Gan/nvmet-Add-nvme-target-polling-queue-task-parameters/20230913-164112
base: v6.6-rc1
patch link: https://lore.kernel.org/r/84d7f188e892b5b0ba251a4601455d7a137075f3.1694592708.git.jacky_gam_2001%40163.com
patch subject: [PATCH 2/4] nvmet: Add polling queue task for nvme target
config: i386-randconfig-013-20230913 (https://download.01.org/0day-ci/archive/20230913/202309132115.8j2dhGX4-lkp@intel.com/config)
compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git ae42196bc493ffe877a7e3dff8be32035dea4d07)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230913/202309132115.8j2dhGX4-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202309132115.8j2dhGX4-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> drivers/nvme/target/polling-queue-thread.c:278:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *);
^
drivers/nvme/target/polling-queue-thread.c:151:3: note: expanded from macro 'ENQUEUE_PTRS'
case 2: \
^
>> drivers/nvme/target/polling-queue-thread.c:278:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
drivers/nvme/target/polling-queue-thread.c:153:3: note: expanded from macro 'ENQUEUE_PTRS'
case 1: \
^
drivers/nvme/target/polling-queue-thread.c:298:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
DEQUEUE_PTRS(r, &r[1], cons_head, obj_table, n, void *);
^
drivers/nvme/target/polling-queue-thread.c:182:3: note: expanded from macro 'DEQUEUE_PTRS'
case 2: \
^
drivers/nvme/target/polling-queue-thread.c:298:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
drivers/nvme/target/polling-queue-thread.c:184:3: note: expanded from macro 'DEQUEUE_PTRS'
case 1: \
^
drivers/nvme/target/polling-queue-thread.c:357:1: warning: unused function '__nvmet_pq_ring_full' [-Wunused-function]
__nvmet_pq_ring_full(const struct nvmet_pq_ring *r)
^
drivers/nvme/target/polling-queue-thread.c:366:1: warning: unused function '__nvmet_pq_ring_empty' [-Wunused-function]
__nvmet_pq_ring_empty(const struct nvmet_pq_ring *r)
^
drivers/nvme/target/polling-queue-thread.c:375:1: warning: unused function '__nvmet_pq_ring_get_size' [-Wunused-function]
__nvmet_pq_ring_get_size(const struct nvmet_pq_ring *r)
^
drivers/nvme/target/polling-queue-thread.c:384:1: warning: unused function '__nvmet_pq_ring_get_capacity' [-Wunused-function]
__nvmet_pq_ring_get_capacity(const struct nvmet_pq_ring *r)
^
8 warnings generated.
--
>> drivers/nvme/target/polling-queue-thread.c:345: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
* Return the number of free entries in a ring.
drivers/nvme/target/polling-queue-thread.c:354: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
* Test if a ring is full.
drivers/nvme/target/polling-queue-thread.c:363: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
* Test if a ring is empty.
drivers/nvme/target/polling-queue-thread.c:372: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
* Return the size of the ring.
drivers/nvme/target/polling-queue-thread.c:381: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
* Return the number of elements which can be stored in the ring.
vim +278 drivers/nvme/target/polling-queue-thread.c
265
266 static inline u32
267 __nvmet_pq_ring_do_enqueue(struct nvmet_pq_ring *r,
268 void **obj_table, u32 n, u32 *free_space)
269 {
270 uint32_t prod_head, prod_next;
271 uint32_t free_entries;
272
273 n = __nvmet_pq_ring_move_prod_head(r, n, &prod_head,
274 &prod_next, &free_entries);
275 if (n == 0)
276 goto end;
277
> 278 ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n, void *);
279
280 __nvmet_pq_ring_update_tail(&r->prod, prod_head, prod_next, 1);
281 end:
282 if (free_space != NULL)
283 *free_space = free_entries - n;
284 return n;
285 }
286
287 static inline u32
288 __nvmet_pq_ring_do_dequeue(struct nvmet_pq_ring *r,
289 void **obj_table, u32 n, u32 *available)
290 {
291 uint32_t cons_head, cons_next;
292 uint32_t entries;
293
294 n = __nvmet_pq_ring_move_cons_head(r, n, &cons_head, &cons_next, &entries);
295 if (n == 0)
296 goto end;
297
298 DEQUEUE_PTRS(r, &r[1], cons_head, obj_table, n, void *);
299
300 __nvmet_pq_ring_update_tail(&r->cons, cons_head, cons_next, 0);
301
302 end:
303 if (available != NULL)
304 *available = entries - n;
305 return n;
306 }
307
308 static inline u32
309 nvmet_pq_ring_enqueue_bulk(struct nvmet_pq_ring *r,
310 void **obj_table, u32 n, u32 *free_space)
311 {
312 return __nvmet_pq_ring_do_enqueue(r, obj_table, n, free_space);
313 }
314
315 static inline int
316 nvmet_pq_ring_enqueue(struct nvmet_pq_ring *r, void **obj)
317 {
318 return nvmet_pq_ring_enqueue_bulk(r, obj, 1, NULL) ? 0 : -ENOBUFS;
319 }
320
321 static inline u32
322 nvmet_pq_ring_dequeue_bulk(struct nvmet_pq_ring *r,
323 void **obj_table, u32 n, u32 *available)
324 {
325 return __nvmet_pq_ring_do_dequeue(r, obj_table, n, available);
326 }
327
328 static inline int
329 nvmet_pq_ring_dequeue(struct nvmet_pq_ring *r, void **obj_p)
330 {
331 return nvmet_pq_ring_dequeue_bulk(r, obj_p, 1, NULL) ? 0 : -ENOENT;
332 }
333
334 static inline u32
335 __nvmet_pq_ring_count(const struct nvmet_pq_ring *r)
336 {
337 u32 prod_tail = smp_load_acquire(&r->prod.tail);
338 u32 cons_tail = smp_load_acquire(&r->cons.tail);
339 u32 count = (prod_tail - cons_tail) & r->mask;
340 u32 capacity = smp_load_acquire(&r->capacity);
341 return (count > capacity) ? capacity : count;
342 }
343
344 /**
> 345 * Return the number of free entries in a ring.
346 */
347 static inline u32
348 __nvmet_pq_ring_free_count(const struct nvmet_pq_ring *r)
349 {
350 return smp_load_acquire(&r->capacity) - __nvmet_pq_ring_count(r);
351 }
352
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/4] nvmet: Add polling queue task for nvme target
2023-09-13 8:36 ` [PATCH 2/4] nvmet: Add polling queue task for nvme target Ping Gan
2023-09-13 12:19 ` kernel test robot
2023-09-13 13:13 ` kernel test robot
@ 2023-09-13 16:03 ` kernel test robot
2 siblings, 0 replies; 15+ messages in thread
From: kernel test robot @ 2023-09-13 16:03 UTC (permalink / raw)
To: Ping Gan, kbusch, axboe, hch, sagi, kch, linux-kernel, linux-nvme
Cc: oe-kbuild-all, ping_gan, jacky_gam_2001
Hi Ping,
kernel test robot noticed the following build warnings:
[auto build test WARNING on v6.6-rc1]
[also build test WARNING on linus/master next-20230913]
[cannot apply to hch-configfs/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Ping-Gan/nvmet-Add-nvme-target-polling-queue-task-parameters/20230913-164112
base: v6.6-rc1
patch link: https://lore.kernel.org/r/84d7f188e892b5b0ba251a4601455d7a137075f3.1694592708.git.jacky_gam_2001%40163.com
patch subject: [PATCH 2/4] nvmet: Add polling queue task for nvme target
config: x86_64-randconfig-121-20230913 (https://download.01.org/0day-ci/archive/20230913/202309132330.9Vr4qPji-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230913/202309132330.9Vr4qPji-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202309132330.9Vr4qPji-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
>> drivers/nvme/target/polling-queue-thread.c:72:29: sparse: sparse: symbol 'nvmet_pqt_data' was not declared. Should it be static?
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 0/4] nvmet: support polling queue task for bio request
2023-09-13 8:36 [PATCH 0/4] nvmet: support polling queue task for bio request Ping Gan
` (4 preceding siblings ...)
2023-09-13 8:36 ` [PATCH 4/4] nvme-core: Get lowlevel disk for target polling queue task Ping Gan
@ 2023-09-13 18:53 ` Chaitanya Kulkarni
2023-09-15 9:37 ` Ping Gan
2023-09-19 3:25 ` [PATCH 0/4] nvmet: support polling queue task for bio Ping Gan
2023-09-15 9:37 ` [PATCH 0/4] nvmet: support polling queue task for bio request Ping Gan
2023-09-19 3:25 ` Ping Gan
7 siblings, 2 replies; 15+ messages in thread
From: Chaitanya Kulkarni @ 2023-09-13 18:53 UTC (permalink / raw)
To: Ping Gan
Cc: ping_gan@dell.com, kbusch@kernel.org,
linux-nvme@lists.infradead.org, linux-kernel@vger.kernel.org,
Chaitanya Kulkarni, hch@lst.de, sagi@grimberg.me, axboe@kernel.dk
On 9/13/2023 1:34 AM, Ping Gan wrote:
> Since nvme target currently does not support to submit bio to a polling
> queue, the bio's completion relies on system interrupt. But when there
> is high workload in system and the competition is very high, so it makes
> sense to add polling queue task to submit bio to disk's polling queue
> and poll the completion queue of disk.
>
>
I did some work in the past for nvmet polling and saw good
performance improvement.
Can you please share performance numbers for this series ?
-ck
^ permalink raw reply [flat|nested] 15+ messages in thread
* (no subject)
2023-09-13 18:53 ` [PATCH 0/4] nvmet: support polling queue task for bio request Chaitanya Kulkarni
@ 2023-09-15 9:37 ` Ping Gan
2023-09-19 3:25 ` [PATCH 0/4] nvmet: support polling queue task for bio Ping Gan
1 sibling, 0 replies; 15+ messages in thread
From: Ping Gan @ 2023-09-15 9:37 UTC (permalink / raw)
To: chaitanyak
Cc: ping_gan, kbusch, linux-nvme, linux-kernel, hch, sagi, axboe,
jacky_gam_2001
> On 9/13/2023 1:34 AM, Ping Gan wrote:
> > Since nvme target currently does not support to submit bio to a
> > polling
> > queue, the bio's completion relies on system interrupt. But when there
> > is high workload in system and the competition is very high, so it
> > makes
> > sense to add polling queue task to submit bio to disk's polling queue
> > and poll the completion queue of disk.
> >
> >
>
> I did some work in the past for nvmet polling and saw good
> performance improvement.
>
> Can you please share performance numbers for this series ?
>
> -ck
hi,
I have verified this patch on two testbeds one for host and the other
for target. I used tcp as transport protocol, spdk perf as initiator.
I do two group tests. The IO size of first is 4K, and the other is 2M.
Both includ randrw, randwrite and randrw. Both have same prerequisites.
At the initiator side I used 1 qp, 32 queue depth,and 1 spdk perf
application, and for target side I bound tcp queue to 1 target core.
And I get below results.
iosize_4k polling queue interrupt
randrw NIC_rx:338M/s NIC_tx:335M/s NIC_rx:260M/s
NIC_tx:258M/s
randwrite NIC_rx:587M/s NIC_rx:431M/s
randread NIC_tx:873M/s NIC_tx:654M/s
iosize_2M polling queue interrupt
randrw NIC_rx:738M/s NIC_tx:741M/s NIC_rx:674M/s
NIC_tx:674M/s
randwrite NIC_rx:1199M/s NIC_rx:1146M/s
randread NIC_tx:2226M/s NIC_tx:2119M/s
For iosize 4k the NIC's bandwidth of poling queue is more than 30% than
bandwidth of interrupt. But for iosize 2M the improvement is not obvious,
the randrw of polling queue is about 9% more than interrupt; randwrite
and randread of polling queue is about 5% more than interrupt.
Thanks,
Ping
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 0/4] nvmet: support polling queue task for bio request
2023-09-13 8:36 [PATCH 0/4] nvmet: support polling queue task for bio request Ping Gan
` (5 preceding siblings ...)
2023-09-13 18:53 ` [PATCH 0/4] nvmet: support polling queue task for bio request Chaitanya Kulkarni
@ 2023-09-15 9:37 ` Ping Gan
2023-09-19 3:25 ` Ping Gan
7 siblings, 0 replies; 15+ messages in thread
From: Ping Gan @ 2023-09-15 9:37 UTC (permalink / raw)
To: chaitanyak
Cc: ping_gan, kbusch, linux-nvme, linux-kernel, hch, sagi, axboe,
jacky_gam_2001
Since nvme target currently does not support to submit bio to a polling
queue, the bio's completion relies on system interrupt. But when there
is high workload in system and the competition is very high, so it makes
sense to add polling queue task to submit bio to disk's polling queue
and poll the completion queue of disk.
Ping Gan (4):
nvmet: Add nvme target polling queue task parameters
nvmet: Add polling queue task for nvme target
nvmet: support bio polling queue request
nvme-core: Get lowlevel disk for target polling queue task
drivers/nvme/host/multipath.c | 20 +
drivers/nvme/target/Makefile | 2 +-
drivers/nvme/target/core.c | 55 +-
drivers/nvme/target/io-cmd-bdev.c | 243 ++++++++-
drivers/nvme/target/nvmet.h | 13 +
drivers/nvme/target/polling-queue-thread.c | 594 +++++++++++++++++++++
6 files changed, 895 insertions(+), 32 deletions(-)
create mode 100644 drivers/nvme/target/polling-queue-thread.c
--
2.26.2
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 0/4] nvmet: support polling queue task for bio
2023-09-13 18:53 ` [PATCH 0/4] nvmet: support polling queue task for bio request Chaitanya Kulkarni
2023-09-15 9:37 ` Ping Gan
@ 2023-09-19 3:25 ` Ping Gan
1 sibling, 0 replies; 15+ messages in thread
From: Ping Gan @ 2023-09-19 3:25 UTC (permalink / raw)
To: chaitanyak
Cc: ping_gan, kbusch, linux-nvme, linux-kernel, hch, sagi, axboe,
jacky_gam_2001
> On 9/13/2023 1:34 AM, Ping Gan wrote:
> > Since nvme target currently does not support to submit bio to a
> > polling
> > queue, the bio's completion relies on system interrupt. But when
> > there
> > is high workload in system and the competition is very high, so it
> > makes
> > sense to add polling queue task to submit bio to disk's polling
> > queue
> > and poll the completion queue of disk.
> >
> >
>
> I did some work in the past for nvmet polling and saw good
> performance improvement.
>
> Can you please share performance numbers for this series ?
>
> -ck
hi,
I have verified this patch on two testbeds one for host and the other
for target. I used tcp as transport protocol, spdk perf as initiator.
I did two group tests. The IO size of first is 4K, and the other is 2M.
Both include randrw, randwrite and randrw. Both also have same prerequisites.
At the initiator side I used 1 qp, 32 queue depth,and 1 spdk perf
application, and for target side I bound tcp queue to 1 target core.
And I get below results.
iosize_4k polling queue interrupt
randrw NIC_rx:338M/s NIC_tx:335M/s NIC_rx:260M/s
NIC_tx:258M/s
randwrite NIC_rx:587M/s NIC_rx:431M/s
randread NIC_tx:873M/s NIC_tx:654M/s
iosize_2M polling queue interrupt
randrw NIC_rx:738M/s NIC_tx:741M/s NIC_rx:674M/s
NIC_tx:674M/s
randwrite NIC_rx:1199M/s NIC_rx:1146M/s
randread NIC_tx:2226M/s NIC_tx:2119M/s
For iosize 4k the NIC's bandwidth of poling queue is more than 30% than
bandwidth of interrupt. But for iosize 2M the improvement is not
obvious,
the randrw of polling queue is about 9% more than interrupt; randwrite
and randread of polling queue is about 5% more than interrupt.
Thanks,
Ping
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 0/4] nvmet: support polling queue task for bio request
2023-09-13 8:36 [PATCH 0/4] nvmet: support polling queue task for bio request Ping Gan
` (6 preceding siblings ...)
2023-09-15 9:37 ` [PATCH 0/4] nvmet: support polling queue task for bio request Ping Gan
@ 2023-09-19 3:25 ` Ping Gan
7 siblings, 0 replies; 15+ messages in thread
From: Ping Gan @ 2023-09-19 3:25 UTC (permalink / raw)
To: chaitanyak
Cc: ping_gan, kbusch, linux-nvme, linux-kernel, hch, sagi, axboe,
jacky_gam_2001
Since nvme target currently does not support to submit bio to a polling
queue, the bio's completion relies on system interrupt. But when there
is high workload in system and the competition is very high, so it makes
sense to add polling queue task to submit bio to disk's polling queue
and poll the completion queue of disk.
Ping Gan (4):
nvmet: Add nvme target polling queue task parameters
nvmet: Add polling queue task for nvme target
nvmet: support bio polling queue request
nvme-core: Get lowlevel disk for target polling queue task
drivers/nvme/host/multipath.c | 20 +
drivers/nvme/target/Makefile | 2 +-
drivers/nvme/target/core.c | 55 +-
drivers/nvme/target/io-cmd-bdev.c | 243 ++++++++-
drivers/nvme/target/nvmet.h | 13 +
drivers/nvme/target/polling-queue-thread.c | 594 +++++++++++++++++++++
6 files changed, 895 insertions(+), 32 deletions(-)
create mode 100644 drivers/nvme/target/polling-queue-thread.c
--
2.26.2
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2023-09-19 3:27 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-09-13 8:36 [PATCH 0/4] nvmet: support polling queue task for bio request Ping Gan
2023-09-13 8:34 ` Ping Gan
2023-09-13 8:36 ` [PATCH 1/4] nvmet: Add nvme target polling queue task parameters Ping Gan
2023-09-13 8:36 ` [PATCH 2/4] nvmet: Add polling queue task for nvme target Ping Gan
2023-09-13 12:19 ` kernel test robot
2023-09-13 13:13 ` kernel test robot
2023-09-13 16:03 ` kernel test robot
2023-09-13 8:36 ` [PATCH 3/4] nvmet: support bio polling queue request Ping Gan
2023-09-13 8:36 ` [PATCH 4/4] nvme-core: Get lowlevel disk for target polling queue task Ping Gan
2023-09-13 10:17 ` kernel test robot
2023-09-13 18:53 ` [PATCH 0/4] nvmet: support polling queue task for bio request Chaitanya Kulkarni
2023-09-15 9:37 ` Ping Gan
2023-09-19 3:25 ` [PATCH 0/4] nvmet: support polling queue task for bio Ping Gan
2023-09-15 9:37 ` [PATCH 0/4] nvmet: support polling queue task for bio request Ping Gan
2023-09-19 3:25 ` Ping Gan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox