From mboxrd@z Thu Jan  1 00:00:00 1970
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Subject: [PATCH v8 4/5] null_nvm: LightNVM test driver
Date: Mon, 21 Sep 2015 18:05:49 +0200
Message-ID: <1442851550-25249-5-git-send-email-m@bjorling.me>
References: <1442851550-25249-1-git-send-email-m@bjorling.me>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: QUOTED-PRINTABLE
Cc: jg@lightnvm.io, Stephen.Bates@pmcs.com, keith.busch@intel.com,
	=?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
To: hch@infradead.org, axboe@fb.com, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-nvme@lists.infradead.org
Return-path: <linux-fsdevel-owner@vger.kernel.org>
Received: from mail-wi0-f180.google.com ([209.85.212.180]:37776 "EHLO
	mail-wi0-f180.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1756997AbbIUQGD (ORCPT
	<rfc822;linux-fsdevel@vger.kernel.org>);
	Mon, 21 Sep 2015 12:06:03 -0400
Received: by wicfx3 with SMTP id fx3so118591509wic.0
        for <linux-fsdevel@vger.kernel.org>; Mon, 21 Sep 2015 09:06:02 -0700 (PDT)
In-Reply-To: <1442851550-25249-1-git-send-email-m@bjorling.me>
Sender: linux-fsdevel-owner@vger.kernel.org
List-ID: <linux-fsdevel.vger.kernel.org>

This driver implements the I/O flow for a LightNVM device driver. It
does no transfers. It can be used to test setup/teardown of devices and
evaluating performance of block managers and targets.

The framework of the driver is derived from the null_blk module.

Signed-off-by: Matias Bj=C3=B8rling <m@bjorling.me>
---
 drivers/lightnvm/Kconfig    |   6 +
 drivers/lightnvm/Makefile   |   1 +
 drivers/lightnvm/null_nvm.c | 468 ++++++++++++++++++++++++++++++++++++=
++++++++
 3 files changed, 475 insertions(+)
 create mode 100644 drivers/lightnvm/null_nvm.c

diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
index 21a271e..d34ba6a 100644
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -39,4 +39,10 @@ config NVM_RRPC
 	host. The target is implemented using a linear mapping table and
 	cost-based garbage collection. It is optimized for 4K IO sizes.
=20
+config NVM_NULL_NVM
+	tristate "Null test LightNVM driver"
+	---help---
+	The null test driver can be used to evaluate targets without an
+	underlying device.
+
 endif # NVM
diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile
index 1b7c00b..145ee5d 100644
--- a/drivers/lightnvm/Makefile
+++ b/drivers/lightnvm/Makefile
@@ -5,3 +5,4 @@
 obj-$(CONFIG_NVM)		:=3D core.o
 obj-$(CONFIG_NVM_BM_HB) 	+=3D bm_hb.o
 obj-$(CONFIG_NVM_RRPC)		+=3D rrpc.o
+obj-$(CONFIG_NVM_NULL_NVM)	+=3D null_nvm.o
diff --git a/drivers/lightnvm/null_nvm.c b/drivers/lightnvm/null_nvm.c
new file mode 100644
index 0000000..6fb80dd
--- /dev/null
+++ b/drivers/lightnvm/null_nvm.c
@@ -0,0 +1,468 @@
+/*
+ * derived from Jens Axboe's block/null_blk.c
+ */
+
+#include <linux/module.h>
+
+#include <linux/moduleparam.h>
+#include <linux/sched.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/blk-mq.h>
+#include <linux/hrtimer.h>
+#include <linux/lightnvm.h>
+
+static struct kmem_cache *ppa_cache;
+struct nulln_cmd {
+	struct llist_node ll_list;
+	struct request *rq;
+};
+
+struct nulln {
+	struct list_head list;
+	unsigned int index;
+	struct request_queue *q;
+	struct blk_mq_tag_set tag_set;
+	struct hrtimer timer;
+	char disk_name[DISK_NAME_LEN];
+};
+
+static LIST_HEAD(nulln_list);
+static struct mutex nulln_lock;
+static int nulln_indexes;
+
+struct completion_queue {
+	struct llist_head list;
+	struct hrtimer timer;
+};
+
+/*
+ * These are per-cpu for now, they will need to be configured by the
+ * complete_queues parameter and appropriately mapped.
+ */
+static DEFINE_PER_CPU(struct completion_queue, completion_queues);
+
+enum {
+	NULL_IRQ_NONE		=3D 0,
+	NULL_IRQ_SOFTIRQ	=3D 1,
+	NULL_IRQ_TIMER		=3D 2,
+};
+
+static int submit_queues;
+module_param(submit_queues, int, S_IRUGO);
+MODULE_PARM_DESC(submit_queues, "Number of submission queues");
+
+static int home_node =3D NUMA_NO_NODE;
+module_param(home_node, int, S_IRUGO);
+MODULE_PARM_DESC(home_node, "Home node for the device");
+
+static int null_param_store_val(const char *str, int *val, int min, in=
t max)
+{
+	int ret, new_val;
+
+	ret =3D kstrtoint(str, 10, &new_val);
+	if (ret)
+		return -EINVAL;
+
+	if (new_val < min || new_val > max)
+		return -EINVAL;
+
+	*val =3D new_val;
+	return 0;
+}
+
+static int gb =3D 250;
+module_param(gb, int, S_IRUGO);
+MODULE_PARM_DESC(gb, "Size in GB");
+
+static int bs =3D 4096;
+module_param(bs, int, S_IRUGO);
+MODULE_PARM_DESC(bs, "Block size (in bytes)");
+
+static int nr_devices =3D 1;
+module_param(nr_devices, int, S_IRUGO);
+MODULE_PARM_DESC(nr_devices, "Number of devices to register");
+
+static int irqmode =3D NULL_IRQ_SOFTIRQ;
+
+static int null_set_irqmode(const char *str, const struct kernel_param=
 *kp)
+{
+	return null_param_store_val(str, &irqmode, NULL_IRQ_NONE,
+					NULL_IRQ_TIMER);
+}
+
+static const struct kernel_param_ops null_irqmode_param_ops =3D {
+	.set	=3D null_set_irqmode,
+	.get	=3D param_get_int,
+};
+
+device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO);
+MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, =
2-timer");
+
+static int completion_nsec =3D 10000;
+module_param(completion_nsec, int, S_IRUGO);
+MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in=
 hardware. Default: 10,000ns");
+
+static int hw_queue_depth =3D 64;
+module_param(hw_queue_depth, int, S_IRUGO);
+MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue.=
 Default: 64");
+
+static bool use_per_node_hctx;
+module_param(use_per_node_hctx, bool, S_IRUGO);
+MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardw=
are context queues. Default: false");
+
+static int num_channels =3D 1;
+module_param(num_channels, int, S_IRUGO);
+MODULE_PARM_DESC(num_channels, "Number of channels to be exposed. Defa=
ult: 1");
+
+static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *tim=
er)
+{
+	struct completion_queue *cq;
+	struct llist_node *entry;
+	struct nulln_cmd *cmd;
+
+	cq =3D &per_cpu(completion_queues, smp_processor_id());
+
+	while ((entry =3D llist_del_all(&cq->list)) !=3D NULL) {
+		entry =3D llist_reverse_order(entry);
+		do {
+			cmd =3D container_of(entry, struct nulln_cmd, ll_list);
+			entry =3D entry->next;
+			blk_mq_end_request(cmd->rq, 0);
+
+			if (cmd->rq) {
+				struct request_queue *q =3D cmd->rq->q;
+
+				if (!q->mq_ops && blk_queue_stopped(q)) {
+					spin_lock(q->queue_lock);
+					if (blk_queue_stopped(q))
+						blk_start_queue(q);
+					spin_unlock(q->queue_lock);
+				}
+			}
+		} while (entry);
+	}
+
+	return HRTIMER_NORESTART;
+}
+
+static void null_cmd_end_timer(struct nulln_cmd *cmd)
+{
+	struct completion_queue *cq =3D &per_cpu(completion_queues, get_cpu()=
);
+
+	cmd->ll_list.next =3D NULL;
+	if (llist_add(&cmd->ll_list, &cq->list)) {
+		ktime_t kt =3D ktime_set(0, completion_nsec);
+
+		hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED);
+	}
+
+	put_cpu();
+}
+
+static void null_softirq_done_fn(struct request *rq)
+{
+	blk_mq_end_request(rq, 0);
+}
+
+static inline void null_handle_cmd(struct nulln_cmd *cmd)
+{
+	/* Complete IO by inline, softirq or timer */
+	switch (irqmode) {
+	case NULL_IRQ_SOFTIRQ:
+	case NULL_IRQ_NONE:
+		blk_mq_complete_request(cmd->rq);
+		break;
+	case NULL_IRQ_TIMER:
+		null_cmd_end_timer(cmd);
+		break;
+	}
+}
+
+static int null_id(struct request_queue *q, struct nvm_id *id)
+{
+	sector_t size =3D gb * 1024 * 1024 * 1024ULL;
+	unsigned long per_chnl_size =3D
+				size / bs / num_channels;
+	struct nvm_id_chnl *chnl;
+	int i;
+
+	id->ver_id =3D 0x1;
+	id->nvm_type =3D NVM_NVMT_BLK;
+	id->nchannels =3D num_channels;
+
+	id->chnls =3D kmalloc_array(id->nchannels, sizeof(struct nvm_id_chnl)=
,
+								GFP_KERNEL);
+	if (!id->chnls)
+		return -ENOMEM;
+
+	for (i =3D 0; i < id->nchannels; i++) {
+		chnl =3D &id->chnls[i];
+		chnl->queue_size =3D hw_queue_depth;
+		chnl->gran_read =3D bs;
+		chnl->gran_write =3D bs;
+		chnl->gran_erase =3D bs * 256;
+		chnl->oob_size =3D 0;
+		chnl->t_r =3D chnl->t_sqr =3D 25000; /* 25us */
+		chnl->t_w =3D chnl->t_sqw =3D 500000; /* 500us */
+		chnl->t_e =3D 1500000; /* 1.500us */
+		chnl->io_sched =3D NVM_IOSCHED_CHANNEL;
+		chnl->laddr_begin =3D per_chnl_size * i;
+		chnl->laddr_end =3D per_chnl_size * (i + 1) - 1;
+	}
+
+	return 0;
+}
+
+static int null_get_features(struct request_queue *q,
+						struct nvm_get_features *gf)
+{
+	gf->rsp =3D NVM_RSP_L2P;
+	gf->ext =3D 0;
+
+	return 0;
+}
+
+static void null_end_io(struct request *rq, int error)
+{
+	struct nvm_rq *rqd =3D rq->end_io_data;
+	struct nvm_tgt_instance *ins =3D rqd->ins;
+
+	ins->tt->end_io(rq->end_io_data, error);
+
+	blk_put_request(rq);
+}
+
+static int null_submit_io(struct request_queue *q, struct nvm_rq *rqd)
+{
+	struct request *rq;
+	struct bio *bio =3D rqd->bio;
+
+	rq =3D blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0);
+	if (IS_ERR(rq))
+		return -ENOMEM;
+
+	rq->cmd_type =3D REQ_TYPE_DRV_PRIV;
+	rq->__sector =3D bio->bi_iter.bi_sector;
+	rq->ioprio =3D bio_prio(bio);
+
+	if (bio_has_data(bio))
+		rq->nr_phys_segments =3D bio_phys_segments(q, bio);
+
+	rq->__data_len =3D bio->bi_iter.bi_size;
+	rq->bio =3D rq->biotail =3D bio;
+
+	rq->end_io_data =3D rqd;
+
+	blk_execute_rq_nowait(q, NULL, rq, 0, null_end_io);
+
+	return 0;
+}
+
+static void *null_create_dma_pool(struct request_queue *q, char *name)
+{
+	mempool_t *virtmem_pool;
+
+	ppa_cache =3D kmem_cache_create(name, PAGE_SIZE, 0, 0, NULL);
+	if (!ppa_cache) {
+		pr_err("null_nvm: Unable to create kmem cache\n");
+		return NULL;
+	}
+
+	virtmem_pool =3D mempool_create_slab_pool(64, ppa_cache);
+	if (!virtmem_pool) {
+		pr_err("null_nvm: Unable to create virtual memory pool\n");
+		return NULL;
+	}
+
+	return virtmem_pool;
+}
+
+static void null_destroy_dma_pool(void *pool)
+{
+	mempool_t *virtmem_pool =3D pool;
+
+	mempool_destroy(virtmem_pool);
+}
+
+static void *null_dev_dma_alloc(struct request_queue *q, void *pool,
+				gfp_t mem_flags, dma_addr_t *dma_handler)
+{
+	return mempool_alloc(pool, mem_flags);
+}
+
+static void null_dev_dma_free(void *pool, void *entry, dma_addr_t dma_=
handler)
+{
+	mempool_free(entry, pool);
+}
+
+static struct nvm_dev_ops nulln_dev_ops =3D {
+	.identify		=3D null_id,
+
+	.get_features		=3D null_get_features,
+
+	.submit_io		=3D null_submit_io,
+
+	.create_dma_pool	=3D null_create_dma_pool,
+	.destroy_dma_pool	=3D null_destroy_dma_pool,
+	.dev_dma_alloc		=3D null_dev_dma_alloc,
+	.dev_dma_free		=3D null_dev_dma_free,
+
+	/* Emulate nvme protocol */
+	.max_phys_sect		=3D 64,
+};
+
+static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
+			 const struct blk_mq_queue_data *bd)
+{
+	struct nulln_cmd *cmd =3D blk_mq_rq_to_pdu(bd->rq);
+
+	cmd->rq =3D bd->rq;
+
+	blk_mq_start_request(bd->rq);
+
+	null_handle_cmd(cmd);
+	return BLK_MQ_RQ_QUEUE_OK;
+}
+
+static struct blk_mq_ops null_mq_ops =3D {
+	.queue_rq	=3D null_queue_rq,
+	.map_queue	=3D blk_mq_map_queue,
+	.complete	=3D null_softirq_done_fn,
+};
+
+static void null_del_dev(struct nulln *nulln)
+{
+	list_del_init(&nulln->list);
+
+	nvm_unregister(nulln->disk_name);
+
+	blk_cleanup_queue(nulln->q);
+	blk_mq_free_tag_set(&nulln->tag_set);
+	kfree(nulln);
+}
+
+static int null_add_dev(void)
+{
+	struct nulln *nulln;
+	int rv;
+
+	nulln =3D kzalloc_node(sizeof(*nulln), GFP_KERNEL, home_node);
+	if (!nulln) {
+		rv =3D -ENOMEM;
+		goto out;
+	}
+
+	if (use_per_node_hctx)
+		submit_queues =3D nr_online_nodes;
+
+	nulln->tag_set.ops =3D &null_mq_ops;
+	nulln->tag_set.nr_hw_queues =3D submit_queues;
+	nulln->tag_set.queue_depth =3D hw_queue_depth;
+	nulln->tag_set.numa_node =3D home_node;
+	nulln->tag_set.cmd_size =3D sizeof(struct nulln_cmd);
+	nulln->tag_set.driver_data =3D nulln;
+
+	rv =3D blk_mq_alloc_tag_set(&nulln->tag_set);
+	if (rv)
+		goto out_free_nulln;
+
+	nulln->q =3D blk_mq_init_queue(&nulln->tag_set);
+	if (IS_ERR(nulln->q)) {
+		rv =3D -ENOMEM;
+		goto out_cleanup_tags;
+	}
+
+	nulln->q->queuedata =3D nulln;
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nulln->q);
+	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nulln->q);
+
+	mutex_lock(&nulln_lock);
+	list_add_tail(&nulln->list, &nulln_list);
+	nulln->index =3D nulln_indexes++;
+	mutex_unlock(&nulln_lock);
+
+	blk_queue_logical_block_size(nulln->q, bs);
+	blk_queue_physical_block_size(nulln->q, bs);
+
+	sprintf(nulln->disk_name, "nulln%d", nulln->index);
+
+	rv =3D nvm_register(nulln->q, nulln->disk_name, &nulln_dev_ops);
+	if (rv)
+		goto out_cleanup_blk_queue;
+
+	return 0;
+
+out_cleanup_blk_queue:
+	blk_cleanup_queue(nulln->q);
+out_cleanup_tags:
+	blk_mq_free_tag_set(&nulln->tag_set);
+out_free_nulln:
+	kfree(nulln);
+out:
+	return rv;
+}
+
+static int __init null_init(void)
+{
+	unsigned int i;
+
+	if (bs > PAGE_SIZE) {
+		pr_warn("null_nvm: invalid block size\n");
+		pr_warn("null_nvm: defaults block size to %lu\n", PAGE_SIZE);
+		bs =3D PAGE_SIZE;
+	}
+
+	if (use_per_node_hctx) {
+		if (submit_queues < nr_online_nodes) {
+			pr_warn("null_nvm: submit_queues param is set to %u.",
+							nr_online_nodes);
+			submit_queues =3D nr_online_nodes;
+		}
+	} else if (submit_queues > nr_cpu_ids)
+		submit_queues =3D nr_cpu_ids;
+	else if (!submit_queues)
+		submit_queues =3D 1;
+
+	mutex_init(&nulln_lock);
+
+	/* Initialize a separate list for each CPU for issuing softirqs */
+	for_each_possible_cpu(i) {
+		struct completion_queue *cq =3D &per_cpu(completion_queues, i);
+
+		init_llist_head(&cq->list);
+
+		if (irqmode !=3D NULL_IRQ_TIMER)
+			continue;
+
+		hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		cq->timer.function =3D null_cmd_timer_expired;
+	}
+
+	for (i =3D 0; i < nr_devices; i++) {
+		if (null_add_dev())
+			return -EINVAL;
+	}
+
+	pr_info("null_nvm: module loaded\n");
+	return 0;
+}
+
+static void __exit null_exit(void)
+{
+	struct nulln *nulln;
+
+	mutex_lock(&nulln_lock);
+	while (!list_empty(&nulln_list)) {
+		nulln =3D list_entry(nulln_list.next, struct nulln, list);
+		null_del_dev(nulln);
+	}
+	mutex_unlock(&nulln_lock);
+}
+
+module_init(null_init);
+module_exit(null_exit);
+
+MODULE_AUTHOR("Matias Bjorling <mb@lightnvm.io>");
+MODULE_LICENSE("GPL");
--=20
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel=
" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html