From: "Matias Bjørling" <mb@lightnvm.io>
To: Wenwei Tao <ww.tao0320@gmail.com>, jg@lightnvm.io
Cc: linux-kernel@vger.kernel.org, linux-block@vger.kernel.org
Subject: Re: [PATCH v2] lightnvm: divide global reverse translation map into per lun
Date: Mon, 21 Mar 2016 11:25:47 +0100 [thread overview]
Message-ID: <56EFCC2B.5070809@lightnvm.io> (raw)
In-Reply-To: <1458453092-2184-1-git-send-email-ww.tao0320@gmail.com>
On 03/20/2016 06:51 AM, Wenwei Tao wrote:
> Divide the target's global reverse translation map into per lun,
> to prepare support for the non-continuous lun target creation.
>
> Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com>
> ---
>
> Changes since v1
> -fix merge/rebase mistake in rrpc_block_map_update().
> -remove variables poffset and lun_offset in rrpc structure
> since no one use them now.
>
> drivers/lightnvm/rrpc.c | 181 +++++++++++++++++++++++++++--------------------
> drivers/lightnvm/rrpc.h | 9 ++-
> include/linux/lightnvm.h | 1 +
> 3 files changed, 111 insertions(+), 80 deletions(-)
>
> diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
> index 3ab6495..88e2ce5 100644
> --- a/drivers/lightnvm/rrpc.c
> +++ b/drivers/lightnvm/rrpc.c
> @@ -23,28 +23,35 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
> struct nvm_rq *rqd, unsigned long flags);
>
> #define rrpc_for_each_lun(rrpc, rlun, i) \
> - for ((i) = 0, rlun = &(rrpc)->luns[0]; \
> - (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
> + for ((i) = 0, rlun = &(rrpc)->luns[0]; \
> + (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
> +
> +static inline u64 lun_poffset(struct nvm_lun *lun, struct nvm_dev *dev)
> +{
> + return lun->id * dev->sec_per_lun;
> +}
>
> static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
> {
> struct rrpc_block *rblk = a->rblk;
> - unsigned int pg_offset;
> + struct rrpc_lun *rlun = rblk->rlun;
> + u64 pg_offset;
>
> - lockdep_assert_held(&rrpc->rev_lock);
> + lockdep_assert_held(&rlun->rev_lock);
>
> if (a->addr == ADDR_EMPTY || !rblk)
> return;
>
> spin_lock(&rblk->lock);
>
> - div_u64_rem(a->addr, rrpc->dev->sec_per_blk, &pg_offset);
> + div_u64_rem(a->addr, rrpc->dev->sec_per_blk, (u32 *)&pg_offset);
> WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
> rblk->nr_invalid_pages++;
>
> spin_unlock(&rblk->lock);
>
> - rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY;
> + pg_offset = lun_poffset(rlun->parent, rrpc->dev);
> + rlun->rev_trans_map[a->addr - pg_offset].addr = ADDR_EMPTY;
> }
>
> static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
> @@ -52,14 +59,15 @@ static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
> {
> sector_t i;
>
> - spin_lock(&rrpc->rev_lock);
> for (i = slba; i < slba + len; i++) {
> struct rrpc_addr *gp = &rrpc->trans_map[i];
> + struct rrpc_lun *rlun = gp->rblk->rlun;
>
> + spin_lock(&rlun->rev_lock);
> rrpc_page_invalidate(rrpc, gp);
> + spin_unlock(&rlun->rev_lock);
> gp->rblk = NULL;
> }
> - spin_unlock(&rrpc->rev_lock);
> }
>
> static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc,
> @@ -116,15 +124,6 @@ static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk)
> return (rblk->next_page == rrpc->dev->sec_per_blk);
> }
>
> -/* Calculate relative addr for the given block, considering instantiated LUNs */
> -static u64 block_to_rel_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
> -{
> - struct nvm_block *blk = rblk->parent;
> - int lun_blk = blk->id % (rrpc->dev->blks_per_lun * rrpc->nr_luns);
> -
> - return lun_blk * rrpc->dev->sec_per_blk;
> -}
> -
> /* Calculate global addr for the given block */
> static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
> {
> @@ -291,13 +290,14 @@ static void rrpc_end_sync_bio(struct bio *bio)
> static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
> {
> struct request_queue *q = rrpc->dev->q;
> + struct rrpc_lun *rlun = rblk->rlun;
> struct rrpc_rev_addr *rev;
> struct nvm_rq *rqd;
> struct bio *bio;
> struct page *page;
> int slot;
> int nr_sec_per_blk = rrpc->dev->sec_per_blk;
> - u64 phys_addr;
> + u64 phys_addr, poffset;
> DECLARE_COMPLETION_ONSTACK(wait);
>
> if (bitmap_full(rblk->invalid_pages, nr_sec_per_blk))
> @@ -315,6 +315,7 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
> return -ENOMEM;
> }
>
> + poffset = lun_poffset(rlun->parent, rrpc->dev);
> while ((slot = find_first_zero_bit(rblk->invalid_pages,
> nr_sec_per_blk)) < nr_sec_per_blk) {
>
> @@ -322,23 +323,23 @@ static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
> phys_addr = rblk->parent->id * nr_sec_per_blk + slot;
>
> try:
> - spin_lock(&rrpc->rev_lock);
> + spin_lock(&rlun->rev_lock);
> /* Get logical address from physical to logical table */
> - rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset];
> + rev = &rlun->rev_trans_map[phys_addr - poffset];
> /* already updated by previous regular write */
> if (rev->addr == ADDR_EMPTY) {
> - spin_unlock(&rrpc->rev_lock);
> + spin_unlock(&rlun->rev_lock);
> continue;
> }
>
> rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1);
> if (IS_ERR_OR_NULL(rqd)) {
> - spin_unlock(&rrpc->rev_lock);
> + spin_unlock(&rlun->rev_lock);
> schedule();
> goto try;
> }
>
> - spin_unlock(&rrpc->rev_lock);
> + spin_unlock(&rlun->rev_lock);
>
> /* Perform read to do GC */
> bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
> @@ -407,7 +408,7 @@ static void rrpc_block_gc(struct work_struct *work)
> struct rrpc_block *rblk = gcb->rblk;
> struct nvm_dev *dev = rrpc->dev;
> struct nvm_lun *lun = rblk->parent->lun;
> - struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
> + struct rrpc_lun *rlun = lun->private;
>
> mempool_free(gcb, rrpc->gcb_pool);
> pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
> @@ -510,7 +511,7 @@ static void rrpc_gc_queue(struct work_struct *work)
> struct rrpc_block *rblk = gcb->rblk;
> struct nvm_lun *lun = rblk->parent->lun;
> struct nvm_block *blk = rblk->parent;
> - struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
> + struct rrpc_lun *rlun = lun->private;
>
> spin_lock(&rlun->lock);
> list_add_tail(&rblk->prio, &rlun->prio_list);
> @@ -561,22 +562,25 @@ static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc)
> static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
> struct rrpc_block *rblk, u64 paddr)
> {
> + struct rrpc_lun *rlun = rblk->rlun;
> struct rrpc_addr *gp;
> struct rrpc_rev_addr *rev;
> + u64 poffset = lun_poffset(rlun->parent, rrpc->dev);
>
> BUG_ON(laddr >= rrpc->nr_sects);
>
> gp = &rrpc->trans_map[laddr];
> - spin_lock(&rrpc->rev_lock);
> + spin_lock(&rlun->rev_lock);
> +
> if (gp->rblk)
> rrpc_page_invalidate(rrpc, gp);
>
> gp->addr = paddr;
> gp->rblk = rblk;
>
> - rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset];
> + rev = &rlun->rev_trans_map[gp->addr - poffset];
> rev->addr = laddr;
> - spin_unlock(&rrpc->rev_lock);
> + spin_unlock(&rlun->rev_lock);
>
> return gp;
> }
> @@ -990,7 +994,6 @@ static int rrpc_gc_init(struct rrpc *rrpc)
>
> static void rrpc_map_free(struct rrpc *rrpc)
> {
> - vfree(rrpc->rev_trans_map);
> vfree(rrpc->trans_map);
> }
>
> @@ -998,8 +1001,8 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
> {
> struct rrpc *rrpc = (struct rrpc *)private;
> struct nvm_dev *dev = rrpc->dev;
> - struct rrpc_addr *addr = rrpc->trans_map + slba;
> - struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
> + struct rrpc_addr *addr;
> + struct rrpc_rev_addr *raddr;
> u64 elba = slba + nlb;
> u64 i;
>
> @@ -1008,9 +1011,15 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
> return -EINVAL;
> }
>
> + slba -= rrpc->soffset >> (ilog2(dev->sec_size) - 9);
> + addr = rrpc->trans_map + slba;
> for (i = 0; i < nlb; i++) {
> + struct rrpc_lun *rlun;
> + struct nvm_lun *lun;
> u64 pba = le64_to_cpu(entries[i]);
> - unsigned int mod;
> + u64 poffset;
> + int lunid;
> +
> /* LNVM treats address-spaces as silos, LBA and PBA are
> * equally large and zero-indexed.
> */
> @@ -1026,10 +1035,18 @@ static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
> if (!pba)
> continue;
>
> - div_u64_rem(pba, rrpc->nr_sects, &mod);
> + lunid = div_u64(pba, dev->sec_per_lun);
> + lun = dev->mt->get_lun(dev, lunid);
> +
> + if (unlikely(!lun))
> + return -EINVAL;
> +
> + rlun = lun->private;
> + raddr = rlun->rev_trans_map;
> + poffset = lun_poffset(lun, dev);
>
> addr[i].addr = pba;
> - raddr[mod].addr = slba + i;
> + raddr[pba - poffset].addr = slba + i;
> }
>
> return 0;
> @@ -1048,17 +1065,10 @@ static int rrpc_map_init(struct rrpc *rrpc)
> if (!rrpc->trans_map)
> return -ENOMEM;
>
> - rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
> - * rrpc->nr_sects);
> - if (!rrpc->rev_trans_map)
> - return -ENOMEM;
> -
> for (i = 0; i < rrpc->nr_sects; i++) {
> struct rrpc_addr *p = &rrpc->trans_map[i];
> - struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];
>
> p->addr = ADDR_EMPTY;
> - r->addr = ADDR_EMPTY;
> }
>
> if (!dev->ops->get_l2p_tbl)
> @@ -1143,25 +1153,69 @@ static void rrpc_luns_free(struct rrpc *rrpc)
> if (!lun)
> break;
> dev->mt->release_lun(dev, lun->id);
> + vfree(rlun->rev_trans_map);
> vfree(rlun->blocks);
> }
>
> kfree(rrpc->luns);
> + rrpc->luns = NULL;
> +}
> +
> +static int rrpc_lun_init(struct rrpc *rrpc, struct rrpc_lun *rlun,
> + struct nvm_lun *lun)
> +{
> + struct nvm_dev *dev = rrpc->dev;
> + int i;
> +
> + rlun->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr) *
> + dev->sec_per_lun);
> + if (!rlun->rev_trans_map)
> + return -ENOMEM;
> +
> + for (i = 0; i < dev->sec_per_lun; i++) {
> + struct rrpc_rev_addr *r = &rlun->rev_trans_map[i];
> +
> + r->addr = ADDR_EMPTY;
> + }
> + rlun->blocks = vzalloc(sizeof(struct rrpc_block) * dev->blks_per_lun);
> + if (!rlun->blocks) {
> + vfree(rlun->rev_trans_map);
> + return -ENOMEM;
> + }
> +
> + for (i = 0; i < dev->blks_per_lun; i++) {
> + struct rrpc_block *rblk = &rlun->blocks[i];
> + struct nvm_block *blk = &lun->blocks[i];
> +
> + rblk->parent = blk;
> + rblk->rlun = rlun;
> + INIT_LIST_HEAD(&rblk->prio);
> + spin_lock_init(&rblk->lock);
> + }
> +
> + rlun->rrpc = rrpc;
> + lun->private = rlun;
> + INIT_LIST_HEAD(&rlun->prio_list);
> + INIT_LIST_HEAD(&rlun->open_list);
> + INIT_LIST_HEAD(&rlun->closed_list);
> + INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
> + spin_lock_init(&rlun->lock);
> + spin_lock_init(&rlun->rev_lock);
> +
> + return 0;
> }
>
> static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
> {
> struct nvm_dev *dev = rrpc->dev;
> struct rrpc_lun *rlun;
> - int i, j, ret = -EINVAL;
> + int i, ret = -EINVAL;
>
> if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
> pr_err("rrpc: number of pages per block too high.");
> return -EINVAL;
> }
>
> - spin_lock_init(&rrpc->rev_lock);
> -
> rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
> GFP_KERNEL);
> if (!rrpc->luns)
> @@ -1183,31 +1237,9 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
>
> rlun = &rrpc->luns[i];
> rlun->parent = lun;
> - rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
> - rrpc->dev->blks_per_lun);
> - if (!rlun->blocks) {
> - ret = -ENOMEM;
> + ret = rrpc_lun_init(rrpc, rlun, lun);
> + if (ret)
> goto err;
> - }
> -
> - for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
> - struct rrpc_block *rblk = &rlun->blocks[j];
> - struct nvm_block *blk = &lun->blocks[j];
> -
> - rblk->parent = blk;
> - rblk->rlun = rlun;
> - INIT_LIST_HEAD(&rblk->prio);
> - spin_lock_init(&rblk->lock);
> - }
> -
> - rlun->rrpc = rrpc;
> - INIT_LIST_HEAD(&rlun->prio_list);
> - INIT_LIST_HEAD(&rlun->open_list);
> - INIT_LIST_HEAD(&rlun->closed_list);
> -
> - INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
> - spin_lock_init(&rlun->lock);
> -
> rrpc->total_blocks += dev->blks_per_lun;
> rrpc->nr_sects += dev->sec_per_lun;
>
> @@ -1215,6 +1247,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
>
> return 0;
> err:
> + rrpc_luns_free(rrpc);
> return ret;
> }
>
> @@ -1288,15 +1321,16 @@ static sector_t rrpc_capacity(void *private)
> static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
> {
> struct nvm_dev *dev = rrpc->dev;
> + struct rrpc_lun *rlun = rblk->rlun;
> int offset;
> struct rrpc_addr *laddr;
> - u64 bpaddr, paddr, pladdr;
> + u64 paddr, pladdr, poffset;
>
> - bpaddr = block_to_rel_addr(rrpc, rblk);
> + poffset = lun_poffset(rlun->parent, dev);
> for (offset = 0; offset < dev->sec_per_blk; offset++) {
> - paddr = bpaddr + offset;
> + paddr = block_to_addr(rrpc, rblk) + offset;
>
> - pladdr = rrpc->rev_trans_map[paddr].addr;
> + pladdr = rlun->rev_trans_map[paddr - poffset].addr;
> if (pladdr == ADDR_EMPTY)
> continue;
>
> @@ -1405,9 +1439,6 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
> goto err;
> }
>
> - rrpc->poffset = dev->sec_per_lun * lun_begin;
> - rrpc->lun_offset = lun_begin;
> -
> ret = rrpc_core_init(rrpc);
> if (ret) {
> pr_err("nvm: rrpc: could not initialize core\n");
> diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
> index 2653484..590f1b7 100644
> --- a/drivers/lightnvm/rrpc.h
> +++ b/drivers/lightnvm/rrpc.h
> @@ -87,6 +87,10 @@ struct rrpc_lun {
>
> struct work_struct ws_gc;
>
> + /* store a reverse map for garbage collection */
> + struct rrpc_rev_addr *rev_trans_map;
> + spinlock_t rev_lock;
> +
> spinlock_t lock;
> };
>
> @@ -98,8 +102,6 @@ struct rrpc {
> struct gendisk *disk;
>
> sector_t soffset; /* logical sector offset */
> - u64 poffset; /* physical page offset */
> - int lun_offset;
>
> int nr_luns;
> struct rrpc_lun *luns;
> @@ -124,9 +126,6 @@ struct rrpc {
> * addresses are used when writing to the disk block device.
> */
> struct rrpc_addr *trans_map;
> - /* also store a reverse map for garbage collection */
> - struct rrpc_rev_addr *rev_trans_map;
> - spinlock_t rev_lock;
>
> struct rrpc_inflight inflights;
>
> diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
> index cdcb2cc..419a3db 100644
> --- a/include/linux/lightnvm.h
> +++ b/include/linux/lightnvm.h
> @@ -276,6 +276,7 @@ struct nvm_lun {
> spinlock_t lock;
>
> struct nvm_block *blocks;
> + void *private;
> };
>
> enum {
>
It seems like there is a race-condition when GC hits. It panics on
page_invalidation. You can test with the following fio script:
[global]
bs=4k
ioengine=libaio
iodepth=1
size=1G
direct=1
rw=randwrite
time_based
runtime=300
[p1]
numjobs=1
filename=/dev/test2
next prev parent reply other threads:[~2016-03-21 10:25 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-20 5:51 [PATCH v2] lightnvm: divide global reverse translation map into per lun Wenwei Tao
2016-03-21 10:25 ` Matias Bjørling [this message]
2016-03-22 10:17 ` Wenwei Tao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=56EFCC2B.5070809@lightnvm.io \
--to=mb@lightnvm.io \
--cc=jg@lightnvm.io \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=ww.tao0320@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.