From: Jan Kara <jack@suse.cz>
To: Dan Williams <dan.j.williams@intel.com>
Cc: akpm@linux-foundation.org, jack@suse.cz,
Matthew Wilcox <mawilcox@microsoft.com>,
linux-nvdimm@lists.01.org, linux-xfs@vger.kernel.org,
Jeff Moyer <jmoyer@redhat.com>,
linux-fsdevel@vger.kernel.org, ross.zwisler@linux.intel.com,
hch@lst.de
Subject: Re: [PATCH v4 07/18] dax: store pfns in the radix
Date: Wed, 3 Jan 2018 16:39:48 +0100 [thread overview]
Message-ID: <20180103153948.GK4911@quack2.suse.cz> (raw)
In-Reply-To: <151407699792.38751.2859846645388380112.stgit@dwillia2-desk3.amr.corp.intel.com>
On Sat 23-12-17 16:56:38, Dan Williams wrote:
> In preparation for examining the busy state of dax pages in the truncate
> path, switch from sectors to pfns in the radix.
>
> Cc: Jan Kara <jack@suse.cz>
> Cc: Jeff Moyer <jmoyer@redhat.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Matthew Wilcox <mawilcox@microsoft.com>
> Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Looks good to me after comments are fixed as Ross asked. You can add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
> ---
> drivers/dax/super.c | 15 ++++++++--
> fs/dax.c | 75 ++++++++++++++++++---------------------------------
> 2 files changed, 39 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> index 473af694ad1c..516124ae1ccf 100644
> --- a/drivers/dax/super.c
> +++ b/drivers/dax/super.c
> @@ -124,10 +124,19 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
> return len < 0 ? len : -EIO;
> }
>
> - if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn))
> - || pfn_t_devmap(pfn))
> + if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) {
> + /*
> + * An arch that has enabled the pmem api should also
> + * have its drivers support pfn_t_devmap()
> + *
> + * This is a developer warning and should not trigger in
> + * production. dax_flush() will crash since it depends
> + * on being able to do (page_address(pfn_to_page())).
> + */
> + WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
> + } else if (pfn_t_devmap(pfn)) {
> /* pass */;
> - else {
> + } else {
> pr_debug("VFS (%s): error: dax support not enabled\n",
> sb->s_id);
> return -EOPNOTSUPP;
> diff --git a/fs/dax.c b/fs/dax.c
> index 78b72c48374e..54071cd27e8c 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -72,16 +72,15 @@ fs_initcall(init_dax_wait_table);
> #define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
> #define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
>
> -static unsigned long dax_radix_sector(void *entry)
> +static unsigned long dax_radix_pfn(void *entry)
> {
> return (unsigned long)entry >> RADIX_DAX_SHIFT;
> }
>
> -static void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
> +static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags)
> {
> return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
> - ((unsigned long)sector << RADIX_DAX_SHIFT) |
> - RADIX_DAX_ENTRY_LOCK);
> + (pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK);
> }
>
> static unsigned int dax_radix_order(void *entry)
> @@ -525,12 +524,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
> */
> static void *dax_insert_mapping_entry(struct address_space *mapping,
> struct vm_fault *vmf,
> - void *entry, sector_t sector,
> + void *entry, pfn_t pfn_t,
> unsigned long flags, bool dirty)
> {
> struct radix_tree_root *page_tree = &mapping->page_tree;
> - void *new_entry;
> + unsigned long pfn = pfn_t_to_pfn(pfn_t);
> pgoff_t index = vmf->pgoff;
> + void *new_entry;
>
> if (dirty)
> __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> @@ -547,7 +547,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
> }
>
> spin_lock_irq(&mapping->tree_lock);
> - new_entry = dax_radix_locked_entry(sector, flags);
> + new_entry = dax_radix_locked_entry(pfn, flags);
>
> if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
> /*
> @@ -659,17 +659,14 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
> i_mmap_unlock_read(mapping);
> }
>
> -static int dax_writeback_one(struct block_device *bdev,
> - struct dax_device *dax_dev, struct address_space *mapping,
> - pgoff_t index, void *entry)
> +static int dax_writeback_one(struct dax_device *dax_dev,
> + struct address_space *mapping, pgoff_t index, void *entry)
> {
> struct radix_tree_root *page_tree = &mapping->page_tree;
> - void *entry2, **slot, *kaddr;
> - long ret = 0, id;
> - sector_t sector;
> - pgoff_t pgoff;
> + void *entry2, **slot;
> + unsigned long pfn;
> + long ret = 0;
> size_t size;
> - pfn_t pfn;
>
> /*
> * A page got tagged dirty in DAX mapping? Something is seriously
> @@ -688,7 +685,7 @@ static int dax_writeback_one(struct block_device *bdev,
> * compare sectors as we must not bail out due to difference in lockbit
> * or entry type.
> */
> - if (dax_radix_sector(entry2) != dax_radix_sector(entry))
> + if (dax_radix_pfn(entry2) != dax_radix_pfn(entry))
> goto put_unlocked;
> if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
> dax_is_zero_entry(entry))) {
> @@ -718,29 +715,11 @@ static int dax_writeback_one(struct block_device *bdev,
> * 'entry'. This allows us to flush for PMD_SIZE and not have to
> * worry about partial PMD writebacks.
> */
> - sector = dax_radix_sector(entry);
> + pfn = dax_radix_pfn(entry);
> size = PAGE_SIZE << dax_radix_order(entry);
>
> - id = dax_read_lock();
> - ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
> - if (ret)
> - goto dax_unlock;
> -
> - /*
> - * dax_direct_access() may sleep, so cannot hold tree_lock over
> - * its invocation.
> - */
> - ret = dax_direct_access(dax_dev, pgoff, size / PAGE_SIZE, &kaddr, &pfn);
> - if (ret < 0)
> - goto dax_unlock;
> -
> - if (WARN_ON_ONCE(ret < size / PAGE_SIZE)) {
> - ret = -EIO;
> - goto dax_unlock;
> - }
> -
> - dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn));
> - dax_flush(dax_dev, kaddr, size);
> + dax_mapping_entry_mkclean(mapping, index, pfn);
> + dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size);
> /*
> * After we have flushed the cache, we can clear the dirty tag. There
> * cannot be new dirty data in the pfn after the flush has completed as
> @@ -751,8 +730,6 @@ static int dax_writeback_one(struct block_device *bdev,
> radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY);
> spin_unlock_irq(&mapping->tree_lock);
> trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT);
> - dax_unlock:
> - dax_read_unlock(id);
> put_locked_mapping_entry(mapping, index);
> return ret;
>
> @@ -810,8 +787,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
> break;
> }
>
> - ret = dax_writeback_one(bdev, dax_dev, mapping,
> - indices[i], pvec.pages[i]);
> + ret = dax_writeback_one(dax_dev, mapping, indices[i],
> + pvec.pages[i]);
> if (ret < 0) {
> mapping_set_error(mapping, ret);
> goto out;
> @@ -879,6 +856,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
> int ret = VM_FAULT_NOPAGE;
> struct page *zero_page;
> void *entry2;
> + pfn_t pfn;
>
> zero_page = ZERO_PAGE(0);
> if (unlikely(!zero_page)) {
> @@ -886,14 +864,15 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
> goto out;
> }
>
> - entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0,
> + pfn = page_to_pfn_t(zero_page);
> + entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
> RADIX_DAX_ZERO_PAGE, false);
> if (IS_ERR(entry2)) {
> ret = VM_FAULT_SIGBUS;
> goto out;
> }
>
> - vm_insert_mixed(vmf->vma, vaddr, page_to_pfn_t(zero_page));
> + vm_insert_mixed(vmf->vma, vaddr, pfn);
> out:
> trace_dax_load_hole(inode, vmf, ret);
> return ret;
> @@ -1200,8 +1179,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
> if (error < 0)
> goto error_finish_iomap;
>
> - entry = dax_insert_mapping_entry(mapping, vmf, entry,
> - dax_iomap_sector(&iomap, pos),
> + entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
> 0, write && !sync);
> if (IS_ERR(entry)) {
> error = PTR_ERR(entry);
> @@ -1286,13 +1264,15 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
> void *ret = NULL;
> spinlock_t *ptl;
> pmd_t pmd_entry;
> + pfn_t pfn;
>
> zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
>
> if (unlikely(!zero_page))
> goto fallback;
>
> - ret = dax_insert_mapping_entry(mapping, vmf, entry, 0,
> + pfn = page_to_pfn_t(zero_page);
> + ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
> RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
> if (IS_ERR(ret))
> goto fallback;
> @@ -1415,8 +1395,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
> if (error < 0)
> goto finish_iomap;
>
> - entry = dax_insert_mapping_entry(mapping, vmf, entry,
> - dax_iomap_sector(&iomap, pos),
> + entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
> RADIX_DAX_PMD, write && !sync);
> if (IS_ERR(entry))
> goto finish_iomap;
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
next prev parent reply other threads:[~2018-01-03 15:39 UTC|newest]
Thread overview: 66+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-24 0:56 [PATCH v4 00/18] dax: fix dma vs truncate/hole-punch Dan Williams
2017-12-24 0:56 ` [PATCH v4 01/18] mm, dax: introduce pfn_t_special() Dan Williams
2018-01-04 8:16 ` Christoph Hellwig
2017-12-24 0:56 ` [PATCH v4 02/18] ext4: auto disable dax instead of failing mount Dan Williams
2018-01-03 14:20 ` Jan Kara
2017-12-24 0:56 ` [PATCH v4 03/18] ext2: " Dan Williams
2018-01-03 14:21 ` Jan Kara
2017-12-24 0:56 ` [PATCH v4 04/18] dax: require 'struct page' by default for filesystem dax Dan Williams
2018-01-03 15:29 ` Jan Kara
2018-01-04 8:16 ` Christoph Hellwig
2018-01-08 11:58 ` Gerald Schaefer
2017-12-24 0:56 ` [PATCH v4 05/18] dax: stop using VM_MIXEDMAP for dax Dan Williams
2018-01-03 15:27 ` Jan Kara
2017-12-24 0:56 ` [PATCH v4 06/18] dax: stop using VM_HUGEPAGE " Dan Williams
2017-12-24 0:56 ` [PATCH v4 07/18] dax: store pfns in the radix Dan Williams
2017-12-27 0:17 ` Ross Zwisler
2018-01-02 20:15 ` Dan Williams
2018-01-03 15:39 ` Jan Kara [this message]
2017-12-24 0:56 ` [PATCH v4 08/18] tools/testing/nvdimm: add 'bio_delay' mechanism Dan Williams
2017-12-27 18:08 ` Ross Zwisler
2018-01-02 20:35 ` Dan Williams
2018-01-02 21:44 ` Dave Chinner
2018-01-02 21:51 ` Dan Williams
2018-01-03 15:46 ` Jan Kara
2018-01-03 20:37 ` Jeff Moyer
2017-12-24 0:56 ` [PATCH v4 09/18] mm, dax: enable filesystems to trigger dev_pagemap ->page_free callbacks Dan Williams
2018-01-04 8:20 ` Christoph Hellwig
2017-12-24 0:56 ` [PATCH v4 10/18] mm, dev_pagemap: introduce CONFIG_DEV_PAGEMAP_OPS Dan Williams
2018-01-04 8:25 ` Christoph Hellwig
2017-12-24 0:56 ` [PATCH v4 11/18] fs, dax: introduce DEFINE_FSDAX_AOPS Dan Williams
2017-12-27 5:29 ` Matthew Wilcox
2018-01-02 20:21 ` Dan Williams
2018-01-03 16:05 ` Jan Kara
2018-01-04 8:27 ` Christoph Hellwig
2018-01-02 21:41 ` Dave Chinner
2017-12-24 0:57 ` [PATCH v4 12/18] xfs: use DEFINE_FSDAX_AOPS Dan Williams
2018-01-02 21:15 ` Darrick J. Wong
2018-01-02 21:40 ` Dan Williams
2018-01-03 16:09 ` Jan Kara
2018-01-04 8:28 ` Christoph Hellwig
2017-12-24 0:57 ` [PATCH v4 13/18] ext4: " Dan Williams
2018-01-04 8:29 ` Christoph Hellwig
2017-12-24 0:57 ` [PATCH v4 14/18] ext2: " Dan Williams
2018-01-04 8:29 ` Christoph Hellwig
2017-12-24 0:57 ` [PATCH v4 15/18] mm, fs, dax: use page->mapping to warn if dma collides with truncate Dan Williams
2018-01-04 8:30 ` Christoph Hellwig
2018-01-04 9:39 ` Jan Kara
2017-12-24 0:57 ` [PATCH v4 16/18] wait_bit: introduce {wait_on,wake_up}_atomic_one Dan Williams
2018-01-04 8:30 ` Christoph Hellwig
2017-12-24 0:57 ` [PATCH v4 17/18] mm, fs, dax: dax_flush_dma, handle dma vs block-map-change collisions Dan Williams
2018-01-04 8:31 ` Christoph Hellwig
2018-01-04 11:12 ` Jan Kara
2018-01-07 21:58 ` Dan Williams
2018-01-08 13:50 ` Jan Kara
2018-03-08 17:02 ` Dan Williams
2018-03-09 12:56 ` Jan Kara
2018-03-09 16:15 ` Dan Williams
2018-03-09 17:26 ` Dan Williams
2017-12-24 0:57 ` [PATCH v4 18/18] xfs, dax: wire up dax_flush_dma support via a new xfs_sync_dma helper Dan Williams
2018-01-02 21:07 ` Darrick J. Wong
2018-01-02 23:00 ` Dave Chinner
2018-01-03 2:21 ` Dan Williams
2018-01-03 7:51 ` Dave Chinner
2018-01-04 8:34 ` Christoph Hellwig
2018-01-04 8:33 ` Christoph Hellwig
2018-01-04 8:17 ` [PATCH v4 00/18] dax: fix dma vs truncate/hole-punch Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180103153948.GK4911@quack2.suse.cz \
--to=jack@suse.cz \
--cc=akpm@linux-foundation.org \
--cc=dan.j.williams@intel.com \
--cc=hch@lst.de \
--cc=jmoyer@redhat.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-nvdimm@lists.01.org \
--cc=linux-xfs@vger.kernel.org \
--cc=mawilcox@microsoft.com \
--cc=ross.zwisler@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).