From mboxrd@z Thu Jan 1 00:00:00 1970 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Subject: [PATCH v8 4/5] null_nvm: LightNVM test driver Date: Mon, 21 Sep 2015 18:05:49 +0200 Message-ID: <1442851550-25249-5-git-send-email-m@bjorling.me> References: <1442851550-25249-1-git-send-email-m@bjorling.me> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: jg@lightnvm.io, Stephen.Bates@pmcs.com, keith.busch@intel.com, =?UTF-8?q?Matias=20Bj=C3=B8rling?= To: hch@infradead.org, axboe@fb.com, linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvme@lists.infradead.org Return-path: Received: from mail-wi0-f180.google.com ([209.85.212.180]:37776 "EHLO mail-wi0-f180.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756997AbbIUQGD (ORCPT ); Mon, 21 Sep 2015 12:06:03 -0400 Received: by wicfx3 with SMTP id fx3so118591509wic.0 for ; Mon, 21 Sep 2015 09:06:02 -0700 (PDT) In-Reply-To: <1442851550-25249-1-git-send-email-m@bjorling.me> Sender: linux-fsdevel-owner@vger.kernel.org List-ID: This driver implements the I/O flow for a LightNVM device driver. It does no transfers. It can be used to test setup/teardown of devices and evaluating performance of block managers and targets. The framework of the driver is derived from the null_blk module. Signed-off-by: Matias Bj=C3=B8rling --- drivers/lightnvm/Kconfig | 6 + drivers/lightnvm/Makefile | 1 + drivers/lightnvm/null_nvm.c | 468 ++++++++++++++++++++++++++++++++++++= ++++++++ 3 files changed, 475 insertions(+) create mode 100644 drivers/lightnvm/null_nvm.c diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 21a271e..d34ba6a 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -39,4 +39,10 @@ config NVM_RRPC host. The target is implemented using a linear mapping table and cost-based garbage collection. It is optimized for 4K IO sizes. =20 +config NVM_NULL_NVM + tristate "Null test LightNVM driver" + ---help--- + The null test driver can be used to evaluate targets without an + underlying device. + endif # NVM diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile index 1b7c00b..145ee5d 100644 --- a/drivers/lightnvm/Makefile +++ b/drivers/lightnvm/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_NVM) :=3D core.o obj-$(CONFIG_NVM_BM_HB) +=3D bm_hb.o obj-$(CONFIG_NVM_RRPC) +=3D rrpc.o +obj-$(CONFIG_NVM_NULL_NVM) +=3D null_nvm.o diff --git a/drivers/lightnvm/null_nvm.c b/drivers/lightnvm/null_nvm.c new file mode 100644 index 0000000..6fb80dd --- /dev/null +++ b/drivers/lightnvm/null_nvm.c @@ -0,0 +1,468 @@ +/* + * derived from Jens Axboe's block/null_blk.c + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +static struct kmem_cache *ppa_cache; +struct nulln_cmd { + struct llist_node ll_list; + struct request *rq; +}; + +struct nulln { + struct list_head list; + unsigned int index; + struct request_queue *q; + struct blk_mq_tag_set tag_set; + struct hrtimer timer; + char disk_name[DISK_NAME_LEN]; +}; + +static LIST_HEAD(nulln_list); +static struct mutex nulln_lock; +static int nulln_indexes; + +struct completion_queue { + struct llist_head list; + struct hrtimer timer; +}; + +/* + * These are per-cpu for now, they will need to be configured by the + * complete_queues parameter and appropriately mapped. + */ +static DEFINE_PER_CPU(struct completion_queue, completion_queues); + +enum { + NULL_IRQ_NONE =3D 0, + NULL_IRQ_SOFTIRQ =3D 1, + NULL_IRQ_TIMER =3D 2, +}; + +static int submit_queues; +module_param(submit_queues, int, S_IRUGO); +MODULE_PARM_DESC(submit_queues, "Number of submission queues"); + +static int home_node =3D NUMA_NO_NODE; +module_param(home_node, int, S_IRUGO); +MODULE_PARM_DESC(home_node, "Home node for the device"); + +static int null_param_store_val(const char *str, int *val, int min, in= t max) +{ + int ret, new_val; + + ret =3D kstrtoint(str, 10, &new_val); + if (ret) + return -EINVAL; + + if (new_val < min || new_val > max) + return -EINVAL; + + *val =3D new_val; + return 0; +} + +static int gb =3D 250; +module_param(gb, int, S_IRUGO); +MODULE_PARM_DESC(gb, "Size in GB"); + +static int bs =3D 4096; +module_param(bs, int, S_IRUGO); +MODULE_PARM_DESC(bs, "Block size (in bytes)"); + +static int nr_devices =3D 1; +module_param(nr_devices, int, S_IRUGO); +MODULE_PARM_DESC(nr_devices, "Number of devices to register"); + +static int irqmode =3D NULL_IRQ_SOFTIRQ; + +static int null_set_irqmode(const char *str, const struct kernel_param= *kp) +{ + return null_param_store_val(str, &irqmode, NULL_IRQ_NONE, + NULL_IRQ_TIMER); +} + +static const struct kernel_param_ops null_irqmode_param_ops =3D { + .set =3D null_set_irqmode, + .get =3D param_get_int, +}; + +device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO); +MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, = 2-timer"); + +static int completion_nsec =3D 10000; +module_param(completion_nsec, int, S_IRUGO); +MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in= hardware. Default: 10,000ns"); + +static int hw_queue_depth =3D 64; +module_param(hw_queue_depth, int, S_IRUGO); +MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue.= Default: 64"); + +static bool use_per_node_hctx; +module_param(use_per_node_hctx, bool, S_IRUGO); +MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardw= are context queues. Default: false"); + +static int num_channels =3D 1; +module_param(num_channels, int, S_IRUGO); +MODULE_PARM_DESC(num_channels, "Number of channels to be exposed. Defa= ult: 1"); + +static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *tim= er) +{ + struct completion_queue *cq; + struct llist_node *entry; + struct nulln_cmd *cmd; + + cq =3D &per_cpu(completion_queues, smp_processor_id()); + + while ((entry =3D llist_del_all(&cq->list)) !=3D NULL) { + entry =3D llist_reverse_order(entry); + do { + cmd =3D container_of(entry, struct nulln_cmd, ll_list); + entry =3D entry->next; + blk_mq_end_request(cmd->rq, 0); + + if (cmd->rq) { + struct request_queue *q =3D cmd->rq->q; + + if (!q->mq_ops && blk_queue_stopped(q)) { + spin_lock(q->queue_lock); + if (blk_queue_stopped(q)) + blk_start_queue(q); + spin_unlock(q->queue_lock); + } + } + } while (entry); + } + + return HRTIMER_NORESTART; +} + +static void null_cmd_end_timer(struct nulln_cmd *cmd) +{ + struct completion_queue *cq =3D &per_cpu(completion_queues, get_cpu()= ); + + cmd->ll_list.next =3D NULL; + if (llist_add(&cmd->ll_list, &cq->list)) { + ktime_t kt =3D ktime_set(0, completion_nsec); + + hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED); + } + + put_cpu(); +} + +static void null_softirq_done_fn(struct request *rq) +{ + blk_mq_end_request(rq, 0); +} + +static inline void null_handle_cmd(struct nulln_cmd *cmd) +{ + /* Complete IO by inline, softirq or timer */ + switch (irqmode) { + case NULL_IRQ_SOFTIRQ: + case NULL_IRQ_NONE: + blk_mq_complete_request(cmd->rq); + break; + case NULL_IRQ_TIMER: + null_cmd_end_timer(cmd); + break; + } +} + +static int null_id(struct request_queue *q, struct nvm_id *id) +{ + sector_t size =3D gb * 1024 * 1024 * 1024ULL; + unsigned long per_chnl_size =3D + size / bs / num_channels; + struct nvm_id_chnl *chnl; + int i; + + id->ver_id =3D 0x1; + id->nvm_type =3D NVM_NVMT_BLK; + id->nchannels =3D num_channels; + + id->chnls =3D kmalloc_array(id->nchannels, sizeof(struct nvm_id_chnl)= , + GFP_KERNEL); + if (!id->chnls) + return -ENOMEM; + + for (i =3D 0; i < id->nchannels; i++) { + chnl =3D &id->chnls[i]; + chnl->queue_size =3D hw_queue_depth; + chnl->gran_read =3D bs; + chnl->gran_write =3D bs; + chnl->gran_erase =3D bs * 256; + chnl->oob_size =3D 0; + chnl->t_r =3D chnl->t_sqr =3D 25000; /* 25us */ + chnl->t_w =3D chnl->t_sqw =3D 500000; /* 500us */ + chnl->t_e =3D 1500000; /* 1.500us */ + chnl->io_sched =3D NVM_IOSCHED_CHANNEL; + chnl->laddr_begin =3D per_chnl_size * i; + chnl->laddr_end =3D per_chnl_size * (i + 1) - 1; + } + + return 0; +} + +static int null_get_features(struct request_queue *q, + struct nvm_get_features *gf) +{ + gf->rsp =3D NVM_RSP_L2P; + gf->ext =3D 0; + + return 0; +} + +static void null_end_io(struct request *rq, int error) +{ + struct nvm_rq *rqd =3D rq->end_io_data; + struct nvm_tgt_instance *ins =3D rqd->ins; + + ins->tt->end_io(rq->end_io_data, error); + + blk_put_request(rq); +} + +static int null_submit_io(struct request_queue *q, struct nvm_rq *rqd) +{ + struct request *rq; + struct bio *bio =3D rqd->bio; + + rq =3D blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0); + if (IS_ERR(rq)) + return -ENOMEM; + + rq->cmd_type =3D REQ_TYPE_DRV_PRIV; + rq->__sector =3D bio->bi_iter.bi_sector; + rq->ioprio =3D bio_prio(bio); + + if (bio_has_data(bio)) + rq->nr_phys_segments =3D bio_phys_segments(q, bio); + + rq->__data_len =3D bio->bi_iter.bi_size; + rq->bio =3D rq->biotail =3D bio; + + rq->end_io_data =3D rqd; + + blk_execute_rq_nowait(q, NULL, rq, 0, null_end_io); + + return 0; +} + +static void *null_create_dma_pool(struct request_queue *q, char *name) +{ + mempool_t *virtmem_pool; + + ppa_cache =3D kmem_cache_create(name, PAGE_SIZE, 0, 0, NULL); + if (!ppa_cache) { + pr_err("null_nvm: Unable to create kmem cache\n"); + return NULL; + } + + virtmem_pool =3D mempool_create_slab_pool(64, ppa_cache); + if (!virtmem_pool) { + pr_err("null_nvm: Unable to create virtual memory pool\n"); + return NULL; + } + + return virtmem_pool; +} + +static void null_destroy_dma_pool(void *pool) +{ + mempool_t *virtmem_pool =3D pool; + + mempool_destroy(virtmem_pool); +} + +static void *null_dev_dma_alloc(struct request_queue *q, void *pool, + gfp_t mem_flags, dma_addr_t *dma_handler) +{ + return mempool_alloc(pool, mem_flags); +} + +static void null_dev_dma_free(void *pool, void *entry, dma_addr_t dma_= handler) +{ + mempool_free(entry, pool); +} + +static struct nvm_dev_ops nulln_dev_ops =3D { + .identify =3D null_id, + + .get_features =3D null_get_features, + + .submit_io =3D null_submit_io, + + .create_dma_pool =3D null_create_dma_pool, + .destroy_dma_pool =3D null_destroy_dma_pool, + .dev_dma_alloc =3D null_dev_dma_alloc, + .dev_dma_free =3D null_dev_dma_free, + + /* Emulate nvme protocol */ + .max_phys_sect =3D 64, +}; + +static int null_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct nulln_cmd *cmd =3D blk_mq_rq_to_pdu(bd->rq); + + cmd->rq =3D bd->rq; + + blk_mq_start_request(bd->rq); + + null_handle_cmd(cmd); + return BLK_MQ_RQ_QUEUE_OK; +} + +static struct blk_mq_ops null_mq_ops =3D { + .queue_rq =3D null_queue_rq, + .map_queue =3D blk_mq_map_queue, + .complete =3D null_softirq_done_fn, +}; + +static void null_del_dev(struct nulln *nulln) +{ + list_del_init(&nulln->list); + + nvm_unregister(nulln->disk_name); + + blk_cleanup_queue(nulln->q); + blk_mq_free_tag_set(&nulln->tag_set); + kfree(nulln); +} + +static int null_add_dev(void) +{ + struct nulln *nulln; + int rv; + + nulln =3D kzalloc_node(sizeof(*nulln), GFP_KERNEL, home_node); + if (!nulln) { + rv =3D -ENOMEM; + goto out; + } + + if (use_per_node_hctx) + submit_queues =3D nr_online_nodes; + + nulln->tag_set.ops =3D &null_mq_ops; + nulln->tag_set.nr_hw_queues =3D submit_queues; + nulln->tag_set.queue_depth =3D hw_queue_depth; + nulln->tag_set.numa_node =3D home_node; + nulln->tag_set.cmd_size =3D sizeof(struct nulln_cmd); + nulln->tag_set.driver_data =3D nulln; + + rv =3D blk_mq_alloc_tag_set(&nulln->tag_set); + if (rv) + goto out_free_nulln; + + nulln->q =3D blk_mq_init_queue(&nulln->tag_set); + if (IS_ERR(nulln->q)) { + rv =3D -ENOMEM; + goto out_cleanup_tags; + } + + nulln->q->queuedata =3D nulln; + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nulln->q); + queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nulln->q); + + mutex_lock(&nulln_lock); + list_add_tail(&nulln->list, &nulln_list); + nulln->index =3D nulln_indexes++; + mutex_unlock(&nulln_lock); + + blk_queue_logical_block_size(nulln->q, bs); + blk_queue_physical_block_size(nulln->q, bs); + + sprintf(nulln->disk_name, "nulln%d", nulln->index); + + rv =3D nvm_register(nulln->q, nulln->disk_name, &nulln_dev_ops); + if (rv) + goto out_cleanup_blk_queue; + + return 0; + +out_cleanup_blk_queue: + blk_cleanup_queue(nulln->q); +out_cleanup_tags: + blk_mq_free_tag_set(&nulln->tag_set); +out_free_nulln: + kfree(nulln); +out: + return rv; +} + +static int __init null_init(void) +{ + unsigned int i; + + if (bs > PAGE_SIZE) { + pr_warn("null_nvm: invalid block size\n"); + pr_warn("null_nvm: defaults block size to %lu\n", PAGE_SIZE); + bs =3D PAGE_SIZE; + } + + if (use_per_node_hctx) { + if (submit_queues < nr_online_nodes) { + pr_warn("null_nvm: submit_queues param is set to %u.", + nr_online_nodes); + submit_queues =3D nr_online_nodes; + } + } else if (submit_queues > nr_cpu_ids) + submit_queues =3D nr_cpu_ids; + else if (!submit_queues) + submit_queues =3D 1; + + mutex_init(&nulln_lock); + + /* Initialize a separate list for each CPU for issuing softirqs */ + for_each_possible_cpu(i) { + struct completion_queue *cq =3D &per_cpu(completion_queues, i); + + init_llist_head(&cq->list); + + if (irqmode !=3D NULL_IRQ_TIMER) + continue; + + hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cq->timer.function =3D null_cmd_timer_expired; + } + + for (i =3D 0; i < nr_devices; i++) { + if (null_add_dev()) + return -EINVAL; + } + + pr_info("null_nvm: module loaded\n"); + return 0; +} + +static void __exit null_exit(void) +{ + struct nulln *nulln; + + mutex_lock(&nulln_lock); + while (!list_empty(&nulln_list)) { + nulln =3D list_entry(nulln_list.next, struct nulln, list); + null_del_dev(nulln); + } + mutex_unlock(&nulln_lock); +} + +module_init(null_init); +module_exit(null_exit); + +MODULE_AUTHOR("Matias Bjorling "); +MODULE_LICENSE("GPL"); --=20 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel= " in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html