From: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
To: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Cc: Theodore Ts'o <tytso-3s7WtUTddSA@public.gmane.org>,
Matthew Wilcox <mawilcox-0li6OtcxBFHby3iVrkZq2A@public.gmane.org>,
Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org>,
linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org,
linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
linux-xfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org,
Andreas Dilger
<adilger.kernel-m1MBpc4rdrD3fQ9qLvQP4Q@public.gmane.org>,
Alexander Viro
<viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn@public.gmane.org>,
Jan Kara <jack-IBi9RG/b67k@public.gmane.org>,
linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Andrew Morton
<akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>,
linux-ext4-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Subject: Re: [PATCH v5 15/17] dax: add struct iomap based DAX PMD support
Date: Tue, 11 Oct 2016 10:31:52 +0200 [thread overview]
Message-ID: <20161011083152.GG6952@quack2.suse.cz> (raw)
In-Reply-To: <1475874544-24842-16-git-send-email-ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
On Fri 07-10-16 15:09:02, Ross Zwisler wrote:
> diff --git a/fs/dax.c b/fs/dax.c
> index ac3cd05..e51d51f 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -281,7 +281,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
> * queue to the start of that PMD. This ensures that all offsets in
> * the range covered by the PMD map to the same bit lock.
> */
> - if (RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
> + if ((unsigned long)entry & RADIX_DAX_PMD)
> index &= ~((1UL << (PMD_SHIFT - PAGE_SHIFT)) - 1);
I agree with Christoph - helper for masking type bits would make this
nicer.
...
> -static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index)
> +static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
> + unsigned long size_flag)
> {
> + bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */
> void *entry, **slot;
>
> restart:
> spin_lock_irq(&mapping->tree_lock);
> entry = get_unlocked_mapping_entry(mapping, index, &slot);
> +
> + if (entry) {
> + if (size_flag & RADIX_DAX_PMD) {
> + if (!radix_tree_exceptional_entry(entry) ||
> + !((unsigned long)entry & RADIX_DAX_PMD)) {
> + entry = ERR_PTR(-EEXIST);
> + goto out_unlock;
You need to call put_unlocked_mapping_entry() if you use
get_unlocked_mapping_entry() and then decide not to lock it. The reason is
that the waitqueues we use are exclusive (we wake up only a single waiter
waiting for the lock) and so there can be some waiters for the entry lock
although we have not locked the entry ourselves.
> + }
> + } else { /* trying to grab a PTE entry */
> + if (radix_tree_exceptional_entry(entry) &&
> + ((unsigned long)entry & RADIX_DAX_PMD) &&
> + ((unsigned long)entry &
> + (RADIX_DAX_HZP|RADIX_DAX_EMPTY))) {
> + pmd_downgrade = true;
> + }
> + }
> + }
> +
> /* No entry for given index? Make sure radix tree is big enough. */
> - if (!entry) {
> + if (!entry || pmd_downgrade) {
> int err;
>
> + if (pmd_downgrade) {
> + /*
> + * Make sure 'entry' remains valid while we drop
> + * mapping->tree_lock.
> + */
> + entry = lock_slot(mapping, slot);
> + }
> +
> spin_unlock_irq(&mapping->tree_lock);
> err = radix_tree_preload(
> mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
> if (err)
> return ERR_PTR(err);
You need to unlock the locked entry before you return here...
> - entry = (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
> - RADIX_DAX_ENTRY_LOCK);
> +
> + /*
> + * Besides huge zero pages the only other thing that gets
> + * downgraded are empty entries which don't need to be
> + * unmapped.
> + */
> + if (pmd_downgrade && ((unsigned long)entry & RADIX_DAX_HZP))
> + unmap_mapping_range(mapping,
> + (index << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
> +
> spin_lock_irq(&mapping->tree_lock);
> - err = radix_tree_insert(&mapping->page_tree, index, entry);
> +
> + if (pmd_downgrade) {
> + radix_tree_delete(&mapping->page_tree, index);
> + mapping->nrexceptional--;
> + dax_wake_mapping_entry_waiter(mapping, index, entry,
> + false);
You need to set 'wake_all' argument here to true. Otherwise there could be
waiters waiting for non-existent entry forever...
> + }
> +
> + entry = dax_radix_entry(0, size_flag | RADIX_DAX_EMPTY);
> +
> + err = __radix_tree_insert(&mapping->page_tree, index,
> + dax_radix_order(entry), entry);
> radix_tree_preload_end();
> if (err) {
> spin_unlock_irq(&mapping->tree_lock);
> - /* Someone already created the entry? */
> - if (err == -EEXIST)
> + /*
> + * Someone already created the entry? This is a
> + * normal failure when inserting PMDs in a range
> + * that already contains PTEs. In that case we want
> + * to return -EEXIST immediately.
> + */
> + if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD))
> goto restart;
Add a comment here that we can get here only if there was no radix tree
entry at 'index' and thus there can be no waiters to wake.
> return ERR_PTR(err);
> }
> @@ -441,6 +509,7 @@ restart:
> return page;
> }
> entry = lock_slot(mapping, slot);
> + out_unlock:
> spin_unlock_irq(&mapping->tree_lock);
> return entry;
> }
> @@ -581,11 +650,17 @@ static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size
> return 0;
> }
>
> -#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))
> -
> +/*
> + * By this point grab_mapping_entry() has ensured that we have a locked entry
> + * of the appropriate size so we don't have to worry about downgrading PMDs to
> + * PTEs. If we happen to be trying to insert a PTE and there is a PMD
> + * already in the tree, we will skip the insertion and just dirty the PMD as
> + * appropriate.
> + */
> static void *dax_insert_mapping_entry(struct address_space *mapping,
> struct vm_fault *vmf,
> - void *entry, sector_t sector)
> + void *entry, sector_t sector,
> + unsigned long flags)
> {
> struct radix_tree_root *page_tree = &mapping->page_tree;
> int error = 0;
> @@ -608,22 +683,28 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
> error = radix_tree_preload(vmf->gfp_mask & ~__GFP_HIGHMEM);
> if (error)
> return ERR_PTR(error);
> + } else if (((unsigned long)entry & RADIX_DAX_HZP) &&
> + !(flags & RADIX_DAX_HZP)) {
> + /* replacing huge zero page with PMD block mapping */
> + unmap_mapping_range(mapping,
> + (vmf->pgoff << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
> }
>
> spin_lock_irq(&mapping->tree_lock);
> - new_entry = (void *)((unsigned long)RADIX_DAX_ENTRY(sector, false) |
> - RADIX_DAX_ENTRY_LOCK);
> + new_entry = dax_radix_entry(sector, flags);
> +
You've lost the RADIX_DAX_ENTRY_LOCK flag here?
> if (hole_fill) {
> __delete_from_page_cache(entry, NULL);
> /* Drop pagecache reference */
> put_page(entry);
> - error = radix_tree_insert(page_tree, index, new_entry);
> + error = __radix_tree_insert(page_tree, index,
> + dax_radix_order(new_entry), new_entry);
> if (error) {
> new_entry = ERR_PTR(error);
> goto unlock;
> }
> mapping->nrexceptional++;
> - } else {
> + } else if ((unsigned long)entry & (RADIX_DAX_HZP|RADIX_DAX_EMPTY)) {
> void **slot;
> void *ret;
Uh, why this condition need to change? Is it some protection so that we
don't replace a mapped PMD entry with PTE one?
<snip>
> @@ -1261,4 +1338,186 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> return VM_FAULT_NOPAGE | major;
> }
> EXPORT_SYMBOL_GPL(dax_iomap_fault);
> +
> +#ifdef CONFIG_FS_DAX_PMD
> +/*
> + * The 'colour' (ie low bits) within a PMD of a page offset. This comes up
> + * more often than one might expect in the below functions.
> + */
> +#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
Just out of curiosity: Why the british spelling of 'colour'?
> +
> +static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
> + struct vm_fault *vmf, unsigned long address,
> + struct iomap *iomap, loff_t pos, bool write, void **entryp)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + struct block_device *bdev = iomap->bdev;
> + struct blk_dax_ctl dax = {
> + .sector = dax_iomap_sector(iomap, pos),
> + .size = PMD_SIZE,
> + };
> + long length = dax_map_atomic(bdev, &dax);
> + void *ret;
> +
> + if (length < 0) /* dax_map_atomic() failed */
> + return VM_FAULT_FALLBACK;
> + if (length < PMD_SIZE)
> + goto unmap_fallback;
> + if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR)
> + goto unmap_fallback;
> + if (!pfn_t_devmap(dax.pfn))
> + goto unmap_fallback;
> +
> + dax_unmap_atomic(bdev, &dax);
> +
> + ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector,
> + RADIX_DAX_PMD);
> + if (IS_ERR(ret))
> + return VM_FAULT_FALLBACK;
> + *entryp = ret;
> +
> + return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write);
> +
> + unmap_fallback:
> + dax_unmap_atomic(bdev, &dax);
> + return VM_FAULT_FALLBACK;
> +}
> +
> +static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
> + struct vm_fault *vmf, unsigned long address,
> + struct iomap *iomap, void **entryp)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + unsigned long pmd_addr = address & PMD_MASK;
> + struct page *zero_page;
> + spinlock_t *ptl;
> + pmd_t pmd_entry;
> + void *ret;
> +
> + zero_page = get_huge_zero_page();
> +
> + if (unlikely(!zero_page))
> + return VM_FAULT_FALLBACK;
> +
> + ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0,
> + RADIX_DAX_PMD | RADIX_DAX_HZP);
> + if (IS_ERR(ret))
> + return VM_FAULT_FALLBACK;
> + *entryp = ret;
> +
> + ptl = pmd_lock(vma->vm_mm, pmd);
> + if (!pmd_none(*pmd)) {
> + spin_unlock(ptl);
> + return VM_FAULT_FALLBACK;
> + }
> +
> + pmd_entry = mk_pmd(zero_page, vma->vm_page_prot);
> + pmd_entry = pmd_mkhuge(pmd_entry);
> + set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry);
> + spin_unlock(ptl);
> + return VM_FAULT_NOPAGE;
> +}
> +
> +int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> + pmd_t *pmd, unsigned int flags, struct iomap_ops *ops)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + unsigned long pmd_addr = address & PMD_MASK;
> + bool write = flags & FAULT_FLAG_WRITE;
> + unsigned int iomap_flags = write ? IOMAP_WRITE : 0;
> + struct inode *inode = mapping->host;
> + int result = VM_FAULT_FALLBACK;
> + struct iomap iomap = { 0 };
Why the 0 here? Just empty braces are enough to initialize the structure to
zeros.
> + pgoff_t size, pgoff;
> + struct vm_fault vmf;
> + void *entry;
> + loff_t pos;
> + int error;
> +
> + /* Fall back to PTEs if we're going to COW */
> + if (write && !(vma->vm_flags & VM_SHARED)) {
> + split_huge_pmd(vma, pmd, address);
> + goto fallback;
> + }
> +
> + /* If the PMD would extend outside the VMA */
> + if (pmd_addr < vma->vm_start)
> + goto fallback;
> + if ((pmd_addr + PMD_SIZE) > vma->vm_end)
> + goto fallback;
> +
> + /*
> + * Check whether offset isn't beyond end of file now. Caller is
> + * supposed to hold locks serializing us with truncate / punch hole so
> + * this is a reliable test.
> + */
> + pgoff = linear_page_index(vma, pmd_addr);
> + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
Nitpick - 'size' does not express that this is in pages and rounded up.
Maybe we could have:
max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
and then use strict inequalities below?
Honza
--
Jan Kara <jack-IBi9RG/b67k@public.gmane.org>
SUSE Labs, CR
WARNING: multiple messages have this Message-ID (diff)
From: Jan Kara <jack@suse.cz>
To: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>,
Matthew Wilcox <mawilcox@microsoft.com>,
Dave Chinner <david@fromorbit.com>,
linux-nvdimm@lists.01.org, linux-kernel@vger.kernel.org,
linux-xfs@vger.kernel.org, linux-mm@kvack.org,
Andreas Dilger <adilger.kernel@dilger.ca>,
Alexander Viro <viro@zeniv.linux.org.uk>,
Jan Kara <jack@suse.com>,
linux-fsdevel@vger.kernel.org,
Andrew Morton <akpm@linux-foundation.org>,
linux-ext4@vger.kernel.org, Christoph Hellwig <hch@lst.de>
Subject: Re: [PATCH v5 15/17] dax: add struct iomap based DAX PMD support
Date: Tue, 11 Oct 2016 10:31:52 +0200 [thread overview]
Message-ID: <20161011083152.GG6952@quack2.suse.cz> (raw)
In-Reply-To: <1475874544-24842-16-git-send-email-ross.zwisler@linux.intel.com>
On Fri 07-10-16 15:09:02, Ross Zwisler wrote:
> diff --git a/fs/dax.c b/fs/dax.c
> index ac3cd05..e51d51f 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -281,7 +281,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
> * queue to the start of that PMD. This ensures that all offsets in
> * the range covered by the PMD map to the same bit lock.
> */
> - if (RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
> + if ((unsigned long)entry & RADIX_DAX_PMD)
> index &= ~((1UL << (PMD_SHIFT - PAGE_SHIFT)) - 1);
I agree with Christoph - helper for masking type bits would make this
nicer.
...
> -static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index)
> +static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
> + unsigned long size_flag)
> {
> + bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */
> void *entry, **slot;
>
> restart:
> spin_lock_irq(&mapping->tree_lock);
> entry = get_unlocked_mapping_entry(mapping, index, &slot);
> +
> + if (entry) {
> + if (size_flag & RADIX_DAX_PMD) {
> + if (!radix_tree_exceptional_entry(entry) ||
> + !((unsigned long)entry & RADIX_DAX_PMD)) {
> + entry = ERR_PTR(-EEXIST);
> + goto out_unlock;
You need to call put_unlocked_mapping_entry() if you use
get_unlocked_mapping_entry() and then decide not to lock it. The reason is
that the waitqueues we use are exclusive (we wake up only a single waiter
waiting for the lock) and so there can be some waiters for the entry lock
although we have not locked the entry ourselves.
> + }
> + } else { /* trying to grab a PTE entry */
> + if (radix_tree_exceptional_entry(entry) &&
> + ((unsigned long)entry & RADIX_DAX_PMD) &&
> + ((unsigned long)entry &
> + (RADIX_DAX_HZP|RADIX_DAX_EMPTY))) {
> + pmd_downgrade = true;
> + }
> + }
> + }
> +
> /* No entry for given index? Make sure radix tree is big enough. */
> - if (!entry) {
> + if (!entry || pmd_downgrade) {
> int err;
>
> + if (pmd_downgrade) {
> + /*
> + * Make sure 'entry' remains valid while we drop
> + * mapping->tree_lock.
> + */
> + entry = lock_slot(mapping, slot);
> + }
> +
> spin_unlock_irq(&mapping->tree_lock);
> err = radix_tree_preload(
> mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
> if (err)
> return ERR_PTR(err);
You need to unlock the locked entry before you return here...
> - entry = (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
> - RADIX_DAX_ENTRY_LOCK);
> +
> + /*
> + * Besides huge zero pages the only other thing that gets
> + * downgraded are empty entries which don't need to be
> + * unmapped.
> + */
> + if (pmd_downgrade && ((unsigned long)entry & RADIX_DAX_HZP))
> + unmap_mapping_range(mapping,
> + (index << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
> +
> spin_lock_irq(&mapping->tree_lock);
> - err = radix_tree_insert(&mapping->page_tree, index, entry);
> +
> + if (pmd_downgrade) {
> + radix_tree_delete(&mapping->page_tree, index);
> + mapping->nrexceptional--;
> + dax_wake_mapping_entry_waiter(mapping, index, entry,
> + false);
You need to set 'wake_all' argument here to true. Otherwise there could be
waiters waiting for non-existent entry forever...
> + }
> +
> + entry = dax_radix_entry(0, size_flag | RADIX_DAX_EMPTY);
> +
> + err = __radix_tree_insert(&mapping->page_tree, index,
> + dax_radix_order(entry), entry);
> radix_tree_preload_end();
> if (err) {
> spin_unlock_irq(&mapping->tree_lock);
> - /* Someone already created the entry? */
> - if (err == -EEXIST)
> + /*
> + * Someone already created the entry? This is a
> + * normal failure when inserting PMDs in a range
> + * that already contains PTEs. In that case we want
> + * to return -EEXIST immediately.
> + */
> + if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD))
> goto restart;
Add a comment here that we can get here only if there was no radix tree
entry at 'index' and thus there can be no waiters to wake.
> return ERR_PTR(err);
> }
> @@ -441,6 +509,7 @@ restart:
> return page;
> }
> entry = lock_slot(mapping, slot);
> + out_unlock:
> spin_unlock_irq(&mapping->tree_lock);
> return entry;
> }
> @@ -581,11 +650,17 @@ static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size
> return 0;
> }
>
> -#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))
> -
> +/*
> + * By this point grab_mapping_entry() has ensured that we have a locked entry
> + * of the appropriate size so we don't have to worry about downgrading PMDs to
> + * PTEs. If we happen to be trying to insert a PTE and there is a PMD
> + * already in the tree, we will skip the insertion and just dirty the PMD as
> + * appropriate.
> + */
> static void *dax_insert_mapping_entry(struct address_space *mapping,
> struct vm_fault *vmf,
> - void *entry, sector_t sector)
> + void *entry, sector_t sector,
> + unsigned long flags)
> {
> struct radix_tree_root *page_tree = &mapping->page_tree;
> int error = 0;
> @@ -608,22 +683,28 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
> error = radix_tree_preload(vmf->gfp_mask & ~__GFP_HIGHMEM);
> if (error)
> return ERR_PTR(error);
> + } else if (((unsigned long)entry & RADIX_DAX_HZP) &&
> + !(flags & RADIX_DAX_HZP)) {
> + /* replacing huge zero page with PMD block mapping */
> + unmap_mapping_range(mapping,
> + (vmf->pgoff << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
> }
>
> spin_lock_irq(&mapping->tree_lock);
> - new_entry = (void *)((unsigned long)RADIX_DAX_ENTRY(sector, false) |
> - RADIX_DAX_ENTRY_LOCK);
> + new_entry = dax_radix_entry(sector, flags);
> +
You've lost the RADIX_DAX_ENTRY_LOCK flag here?
> if (hole_fill) {
> __delete_from_page_cache(entry, NULL);
> /* Drop pagecache reference */
> put_page(entry);
> - error = radix_tree_insert(page_tree, index, new_entry);
> + error = __radix_tree_insert(page_tree, index,
> + dax_radix_order(new_entry), new_entry);
> if (error) {
> new_entry = ERR_PTR(error);
> goto unlock;
> }
> mapping->nrexceptional++;
> - } else {
> + } else if ((unsigned long)entry & (RADIX_DAX_HZP|RADIX_DAX_EMPTY)) {
> void **slot;
> void *ret;
Uh, why this condition need to change? Is it some protection so that we
don't replace a mapped PMD entry with PTE one?
<snip>
> @@ -1261,4 +1338,186 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> return VM_FAULT_NOPAGE | major;
> }
> EXPORT_SYMBOL_GPL(dax_iomap_fault);
> +
> +#ifdef CONFIG_FS_DAX_PMD
> +/*
> + * The 'colour' (ie low bits) within a PMD of a page offset. This comes up
> + * more often than one might expect in the below functions.
> + */
> +#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
Just out of curiosity: Why the british spelling of 'colour'?
> +
> +static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
> + struct vm_fault *vmf, unsigned long address,
> + struct iomap *iomap, loff_t pos, bool write, void **entryp)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + struct block_device *bdev = iomap->bdev;
> + struct blk_dax_ctl dax = {
> + .sector = dax_iomap_sector(iomap, pos),
> + .size = PMD_SIZE,
> + };
> + long length = dax_map_atomic(bdev, &dax);
> + void *ret;
> +
> + if (length < 0) /* dax_map_atomic() failed */
> + return VM_FAULT_FALLBACK;
> + if (length < PMD_SIZE)
> + goto unmap_fallback;
> + if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR)
> + goto unmap_fallback;
> + if (!pfn_t_devmap(dax.pfn))
> + goto unmap_fallback;
> +
> + dax_unmap_atomic(bdev, &dax);
> +
> + ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector,
> + RADIX_DAX_PMD);
> + if (IS_ERR(ret))
> + return VM_FAULT_FALLBACK;
> + *entryp = ret;
> +
> + return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write);
> +
> + unmap_fallback:
> + dax_unmap_atomic(bdev, &dax);
> + return VM_FAULT_FALLBACK;
> +}
> +
> +static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
> + struct vm_fault *vmf, unsigned long address,
> + struct iomap *iomap, void **entryp)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + unsigned long pmd_addr = address & PMD_MASK;
> + struct page *zero_page;
> + spinlock_t *ptl;
> + pmd_t pmd_entry;
> + void *ret;
> +
> + zero_page = get_huge_zero_page();
> +
> + if (unlikely(!zero_page))
> + return VM_FAULT_FALLBACK;
> +
> + ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0,
> + RADIX_DAX_PMD | RADIX_DAX_HZP);
> + if (IS_ERR(ret))
> + return VM_FAULT_FALLBACK;
> + *entryp = ret;
> +
> + ptl = pmd_lock(vma->vm_mm, pmd);
> + if (!pmd_none(*pmd)) {
> + spin_unlock(ptl);
> + return VM_FAULT_FALLBACK;
> + }
> +
> + pmd_entry = mk_pmd(zero_page, vma->vm_page_prot);
> + pmd_entry = pmd_mkhuge(pmd_entry);
> + set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry);
> + spin_unlock(ptl);
> + return VM_FAULT_NOPAGE;
> +}
> +
> +int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> + pmd_t *pmd, unsigned int flags, struct iomap_ops *ops)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + unsigned long pmd_addr = address & PMD_MASK;
> + bool write = flags & FAULT_FLAG_WRITE;
> + unsigned int iomap_flags = write ? IOMAP_WRITE : 0;
> + struct inode *inode = mapping->host;
> + int result = VM_FAULT_FALLBACK;
> + struct iomap iomap = { 0 };
Why the 0 here? Just empty braces are enough to initialize the structure to
zeros.
> + pgoff_t size, pgoff;
> + struct vm_fault vmf;
> + void *entry;
> + loff_t pos;
> + int error;
> +
> + /* Fall back to PTEs if we're going to COW */
> + if (write && !(vma->vm_flags & VM_SHARED)) {
> + split_huge_pmd(vma, pmd, address);
> + goto fallback;
> + }
> +
> + /* If the PMD would extend outside the VMA */
> + if (pmd_addr < vma->vm_start)
> + goto fallback;
> + if ((pmd_addr + PMD_SIZE) > vma->vm_end)
> + goto fallback;
> +
> + /*
> + * Check whether offset isn't beyond end of file now. Caller is
> + * supposed to hold locks serializing us with truncate / punch hole so
> + * this is a reliable test.
> + */
> + pgoff = linear_page_index(vma, pmd_addr);
> + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
Nitpick - 'size' does not express that this is in pages and rounded up.
Maybe we could have:
max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
and then use strict inequalities below?
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm
WARNING: multiple messages have this Message-ID (diff)
From: Jan Kara <jack@suse.cz>
To: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: linux-kernel@vger.kernel.org, Theodore Ts'o <tytso@mit.edu>,
Alexander Viro <viro@zeniv.linux.org.uk>,
Andreas Dilger <adilger.kernel@dilger.ca>,
Andrew Morton <akpm@linux-foundation.org>,
Christoph Hellwig <hch@lst.de>,
Dan Williams <dan.j.williams@intel.com>,
Dave Chinner <david@fromorbit.com>, Jan Kara <jack@suse.com>,
Matthew Wilcox <mawilcox@microsoft.com>,
linux-ext4@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-mm@kvack.org, linux-nvdimm@lists.01.org,
linux-xfs@vger.kernel.org
Subject: Re: [PATCH v5 15/17] dax: add struct iomap based DAX PMD support
Date: Tue, 11 Oct 2016 10:31:52 +0200 [thread overview]
Message-ID: <20161011083152.GG6952@quack2.suse.cz> (raw)
In-Reply-To: <1475874544-24842-16-git-send-email-ross.zwisler@linux.intel.com>
On Fri 07-10-16 15:09:02, Ross Zwisler wrote:
> diff --git a/fs/dax.c b/fs/dax.c
> index ac3cd05..e51d51f 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -281,7 +281,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
> * queue to the start of that PMD. This ensures that all offsets in
> * the range covered by the PMD map to the same bit lock.
> */
> - if (RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
> + if ((unsigned long)entry & RADIX_DAX_PMD)
> index &= ~((1UL << (PMD_SHIFT - PAGE_SHIFT)) - 1);
I agree with Christoph - helper for masking type bits would make this
nicer.
...
> -static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index)
> +static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
> + unsigned long size_flag)
> {
> + bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */
> void *entry, **slot;
>
> restart:
> spin_lock_irq(&mapping->tree_lock);
> entry = get_unlocked_mapping_entry(mapping, index, &slot);
> +
> + if (entry) {
> + if (size_flag & RADIX_DAX_PMD) {
> + if (!radix_tree_exceptional_entry(entry) ||
> + !((unsigned long)entry & RADIX_DAX_PMD)) {
> + entry = ERR_PTR(-EEXIST);
> + goto out_unlock;
You need to call put_unlocked_mapping_entry() if you use
get_unlocked_mapping_entry() and then decide not to lock it. The reason is
that the waitqueues we use are exclusive (we wake up only a single waiter
waiting for the lock) and so there can be some waiters for the entry lock
although we have not locked the entry ourselves.
> + }
> + } else { /* trying to grab a PTE entry */
> + if (radix_tree_exceptional_entry(entry) &&
> + ((unsigned long)entry & RADIX_DAX_PMD) &&
> + ((unsigned long)entry &
> + (RADIX_DAX_HZP|RADIX_DAX_EMPTY))) {
> + pmd_downgrade = true;
> + }
> + }
> + }
> +
> /* No entry for given index? Make sure radix tree is big enough. */
> - if (!entry) {
> + if (!entry || pmd_downgrade) {
> int err;
>
> + if (pmd_downgrade) {
> + /*
> + * Make sure 'entry' remains valid while we drop
> + * mapping->tree_lock.
> + */
> + entry = lock_slot(mapping, slot);
> + }
> +
> spin_unlock_irq(&mapping->tree_lock);
> err = radix_tree_preload(
> mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
> if (err)
> return ERR_PTR(err);
You need to unlock the locked entry before you return here...
> - entry = (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
> - RADIX_DAX_ENTRY_LOCK);
> +
> + /*
> + * Besides huge zero pages the only other thing that gets
> + * downgraded are empty entries which don't need to be
> + * unmapped.
> + */
> + if (pmd_downgrade && ((unsigned long)entry & RADIX_DAX_HZP))
> + unmap_mapping_range(mapping,
> + (index << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
> +
> spin_lock_irq(&mapping->tree_lock);
> - err = radix_tree_insert(&mapping->page_tree, index, entry);
> +
> + if (pmd_downgrade) {
> + radix_tree_delete(&mapping->page_tree, index);
> + mapping->nrexceptional--;
> + dax_wake_mapping_entry_waiter(mapping, index, entry,
> + false);
You need to set 'wake_all' argument here to true. Otherwise there could be
waiters waiting for non-existent entry forever...
> + }
> +
> + entry = dax_radix_entry(0, size_flag | RADIX_DAX_EMPTY);
> +
> + err = __radix_tree_insert(&mapping->page_tree, index,
> + dax_radix_order(entry), entry);
> radix_tree_preload_end();
> if (err) {
> spin_unlock_irq(&mapping->tree_lock);
> - /* Someone already created the entry? */
> - if (err == -EEXIST)
> + /*
> + * Someone already created the entry? This is a
> + * normal failure when inserting PMDs in a range
> + * that already contains PTEs. In that case we want
> + * to return -EEXIST immediately.
> + */
> + if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD))
> goto restart;
Add a comment here that we can get here only if there was no radix tree
entry at 'index' and thus there can be no waiters to wake.
> return ERR_PTR(err);
> }
> @@ -441,6 +509,7 @@ restart:
> return page;
> }
> entry = lock_slot(mapping, slot);
> + out_unlock:
> spin_unlock_irq(&mapping->tree_lock);
> return entry;
> }
> @@ -581,11 +650,17 @@ static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size
> return 0;
> }
>
> -#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))
> -
> +/*
> + * By this point grab_mapping_entry() has ensured that we have a locked entry
> + * of the appropriate size so we don't have to worry about downgrading PMDs to
> + * PTEs. If we happen to be trying to insert a PTE and there is a PMD
> + * already in the tree, we will skip the insertion and just dirty the PMD as
> + * appropriate.
> + */
> static void *dax_insert_mapping_entry(struct address_space *mapping,
> struct vm_fault *vmf,
> - void *entry, sector_t sector)
> + void *entry, sector_t sector,
> + unsigned long flags)
> {
> struct radix_tree_root *page_tree = &mapping->page_tree;
> int error = 0;
> @@ -608,22 +683,28 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
> error = radix_tree_preload(vmf->gfp_mask & ~__GFP_HIGHMEM);
> if (error)
> return ERR_PTR(error);
> + } else if (((unsigned long)entry & RADIX_DAX_HZP) &&
> + !(flags & RADIX_DAX_HZP)) {
> + /* replacing huge zero page with PMD block mapping */
> + unmap_mapping_range(mapping,
> + (vmf->pgoff << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
> }
>
> spin_lock_irq(&mapping->tree_lock);
> - new_entry = (void *)((unsigned long)RADIX_DAX_ENTRY(sector, false) |
> - RADIX_DAX_ENTRY_LOCK);
> + new_entry = dax_radix_entry(sector, flags);
> +
You've lost the RADIX_DAX_ENTRY_LOCK flag here?
> if (hole_fill) {
> __delete_from_page_cache(entry, NULL);
> /* Drop pagecache reference */
> put_page(entry);
> - error = radix_tree_insert(page_tree, index, new_entry);
> + error = __radix_tree_insert(page_tree, index,
> + dax_radix_order(new_entry), new_entry);
> if (error) {
> new_entry = ERR_PTR(error);
> goto unlock;
> }
> mapping->nrexceptional++;
> - } else {
> + } else if ((unsigned long)entry & (RADIX_DAX_HZP|RADIX_DAX_EMPTY)) {
> void **slot;
> void *ret;
Uh, why this condition need to change? Is it some protection so that we
don't replace a mapped PMD entry with PTE one?
<snip>
> @@ -1261,4 +1338,186 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> return VM_FAULT_NOPAGE | major;
> }
> EXPORT_SYMBOL_GPL(dax_iomap_fault);
> +
> +#ifdef CONFIG_FS_DAX_PMD
> +/*
> + * The 'colour' (ie low bits) within a PMD of a page offset. This comes up
> + * more often than one might expect in the below functions.
> + */
> +#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
Just out of curiosity: Why the british spelling of 'colour'?
> +
> +static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
> + struct vm_fault *vmf, unsigned long address,
> + struct iomap *iomap, loff_t pos, bool write, void **entryp)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + struct block_device *bdev = iomap->bdev;
> + struct blk_dax_ctl dax = {
> + .sector = dax_iomap_sector(iomap, pos),
> + .size = PMD_SIZE,
> + };
> + long length = dax_map_atomic(bdev, &dax);
> + void *ret;
> +
> + if (length < 0) /* dax_map_atomic() failed */
> + return VM_FAULT_FALLBACK;
> + if (length < PMD_SIZE)
> + goto unmap_fallback;
> + if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR)
> + goto unmap_fallback;
> + if (!pfn_t_devmap(dax.pfn))
> + goto unmap_fallback;
> +
> + dax_unmap_atomic(bdev, &dax);
> +
> + ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector,
> + RADIX_DAX_PMD);
> + if (IS_ERR(ret))
> + return VM_FAULT_FALLBACK;
> + *entryp = ret;
> +
> + return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write);
> +
> + unmap_fallback:
> + dax_unmap_atomic(bdev, &dax);
> + return VM_FAULT_FALLBACK;
> +}
> +
> +static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
> + struct vm_fault *vmf, unsigned long address,
> + struct iomap *iomap, void **entryp)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + unsigned long pmd_addr = address & PMD_MASK;
> + struct page *zero_page;
> + spinlock_t *ptl;
> + pmd_t pmd_entry;
> + void *ret;
> +
> + zero_page = get_huge_zero_page();
> +
> + if (unlikely(!zero_page))
> + return VM_FAULT_FALLBACK;
> +
> + ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0,
> + RADIX_DAX_PMD | RADIX_DAX_HZP);
> + if (IS_ERR(ret))
> + return VM_FAULT_FALLBACK;
> + *entryp = ret;
> +
> + ptl = pmd_lock(vma->vm_mm, pmd);
> + if (!pmd_none(*pmd)) {
> + spin_unlock(ptl);
> + return VM_FAULT_FALLBACK;
> + }
> +
> + pmd_entry = mk_pmd(zero_page, vma->vm_page_prot);
> + pmd_entry = pmd_mkhuge(pmd_entry);
> + set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry);
> + spin_unlock(ptl);
> + return VM_FAULT_NOPAGE;
> +}
> +
> +int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> + pmd_t *pmd, unsigned int flags, struct iomap_ops *ops)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + unsigned long pmd_addr = address & PMD_MASK;
> + bool write = flags & FAULT_FLAG_WRITE;
> + unsigned int iomap_flags = write ? IOMAP_WRITE : 0;
> + struct inode *inode = mapping->host;
> + int result = VM_FAULT_FALLBACK;
> + struct iomap iomap = { 0 };
Why the 0 here? Just empty braces are enough to initialize the structure to
zeros.
> + pgoff_t size, pgoff;
> + struct vm_fault vmf;
> + void *entry;
> + loff_t pos;
> + int error;
> +
> + /* Fall back to PTEs if we're going to COW */
> + if (write && !(vma->vm_flags & VM_SHARED)) {
> + split_huge_pmd(vma, pmd, address);
> + goto fallback;
> + }
> +
> + /* If the PMD would extend outside the VMA */
> + if (pmd_addr < vma->vm_start)
> + goto fallback;
> + if ((pmd_addr + PMD_SIZE) > vma->vm_end)
> + goto fallback;
> +
> + /*
> + * Check whether offset isn't beyond end of file now. Caller is
> + * supposed to hold locks serializing us with truncate / punch hole so
> + * this is a reliable test.
> + */
> + pgoff = linear_page_index(vma, pmd_addr);
> + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
Nitpick - 'size' does not express that this is in pages and rounded up.
Maybe we could have:
max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
and then use strict inequalities below?
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
WARNING: multiple messages have this Message-ID (diff)
From: Jan Kara <jack@suse.cz>
To: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: linux-kernel@vger.kernel.org, Theodore Ts'o <tytso@mit.edu>,
Alexander Viro <viro@zeniv.linux.org.uk>,
Andreas Dilger <adilger.kernel@dilger.ca>,
Andrew Morton <akpm@linux-foundation.org>,
Christoph Hellwig <hch@lst.de>,
Dan Williams <dan.j.williams@intel.com>,
Dave Chinner <david@fromorbit.com>, Jan Kara <jack@suse.com>,
Matthew Wilcox <mawilcox@microsoft.com>,
linux-ext4@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-mm@kvack.org, linux-nvdimm@lists.01.org,
linux-xfs@vger.kernel.org
Subject: Re: [PATCH v5 15/17] dax: add struct iomap based DAX PMD support
Date: Tue, 11 Oct 2016 10:31:52 +0200 [thread overview]
Message-ID: <20161011083152.GG6952@quack2.suse.cz> (raw)
In-Reply-To: <1475874544-24842-16-git-send-email-ross.zwisler@linux.intel.com>
On Fri 07-10-16 15:09:02, Ross Zwisler wrote:
> diff --git a/fs/dax.c b/fs/dax.c
> index ac3cd05..e51d51f 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -281,7 +281,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
> * queue to the start of that PMD. This ensures that all offsets in
> * the range covered by the PMD map to the same bit lock.
> */
> - if (RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
> + if ((unsigned long)entry & RADIX_DAX_PMD)
> index &= ~((1UL << (PMD_SHIFT - PAGE_SHIFT)) - 1);
I agree with Christoph - helper for masking type bits would make this
nicer.
...
> -static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index)
> +static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
> + unsigned long size_flag)
> {
> + bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */
> void *entry, **slot;
>
> restart:
> spin_lock_irq(&mapping->tree_lock);
> entry = get_unlocked_mapping_entry(mapping, index, &slot);
> +
> + if (entry) {
> + if (size_flag & RADIX_DAX_PMD) {
> + if (!radix_tree_exceptional_entry(entry) ||
> + !((unsigned long)entry & RADIX_DAX_PMD)) {
> + entry = ERR_PTR(-EEXIST);
> + goto out_unlock;
You need to call put_unlocked_mapping_entry() if you use
get_unlocked_mapping_entry() and then decide not to lock it. The reason is
that the waitqueues we use are exclusive (we wake up only a single waiter
waiting for the lock) and so there can be some waiters for the entry lock
although we have not locked the entry ourselves.
> + }
> + } else { /* trying to grab a PTE entry */
> + if (radix_tree_exceptional_entry(entry) &&
> + ((unsigned long)entry & RADIX_DAX_PMD) &&
> + ((unsigned long)entry &
> + (RADIX_DAX_HZP|RADIX_DAX_EMPTY))) {
> + pmd_downgrade = true;
> + }
> + }
> + }
> +
> /* No entry for given index? Make sure radix tree is big enough. */
> - if (!entry) {
> + if (!entry || pmd_downgrade) {
> int err;
>
> + if (pmd_downgrade) {
> + /*
> + * Make sure 'entry' remains valid while we drop
> + * mapping->tree_lock.
> + */
> + entry = lock_slot(mapping, slot);
> + }
> +
> spin_unlock_irq(&mapping->tree_lock);
> err = radix_tree_preload(
> mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
> if (err)
> return ERR_PTR(err);
You need to unlock the locked entry before you return here...
> - entry = (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
> - RADIX_DAX_ENTRY_LOCK);
> +
> + /*
> + * Besides huge zero pages the only other thing that gets
> + * downgraded are empty entries which don't need to be
> + * unmapped.
> + */
> + if (pmd_downgrade && ((unsigned long)entry & RADIX_DAX_HZP))
> + unmap_mapping_range(mapping,
> + (index << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
> +
> spin_lock_irq(&mapping->tree_lock);
> - err = radix_tree_insert(&mapping->page_tree, index, entry);
> +
> + if (pmd_downgrade) {
> + radix_tree_delete(&mapping->page_tree, index);
> + mapping->nrexceptional--;
> + dax_wake_mapping_entry_waiter(mapping, index, entry,
> + false);
You need to set 'wake_all' argument here to true. Otherwise there could be
waiters waiting for non-existent entry forever...
> + }
> +
> + entry = dax_radix_entry(0, size_flag | RADIX_DAX_EMPTY);
> +
> + err = __radix_tree_insert(&mapping->page_tree, index,
> + dax_radix_order(entry), entry);
> radix_tree_preload_end();
> if (err) {
> spin_unlock_irq(&mapping->tree_lock);
> - /* Someone already created the entry? */
> - if (err == -EEXIST)
> + /*
> + * Someone already created the entry? This is a
> + * normal failure when inserting PMDs in a range
> + * that already contains PTEs. In that case we want
> + * to return -EEXIST immediately.
> + */
> + if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD))
> goto restart;
Add a comment here that we can get here only if there was no radix tree
entry at 'index' and thus there can be no waiters to wake.
> return ERR_PTR(err);
> }
> @@ -441,6 +509,7 @@ restart:
> return page;
> }
> entry = lock_slot(mapping, slot);
> + out_unlock:
> spin_unlock_irq(&mapping->tree_lock);
> return entry;
> }
> @@ -581,11 +650,17 @@ static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size
> return 0;
> }
>
> -#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))
> -
> +/*
> + * By this point grab_mapping_entry() has ensured that we have a locked entry
> + * of the appropriate size so we don't have to worry about downgrading PMDs to
> + * PTEs. If we happen to be trying to insert a PTE and there is a PMD
> + * already in the tree, we will skip the insertion and just dirty the PMD as
> + * appropriate.
> + */
> static void *dax_insert_mapping_entry(struct address_space *mapping,
> struct vm_fault *vmf,
> - void *entry, sector_t sector)
> + void *entry, sector_t sector,
> + unsigned long flags)
> {
> struct radix_tree_root *page_tree = &mapping->page_tree;
> int error = 0;
> @@ -608,22 +683,28 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
> error = radix_tree_preload(vmf->gfp_mask & ~__GFP_HIGHMEM);
> if (error)
> return ERR_PTR(error);
> + } else if (((unsigned long)entry & RADIX_DAX_HZP) &&
> + !(flags & RADIX_DAX_HZP)) {
> + /* replacing huge zero page with PMD block mapping */
> + unmap_mapping_range(mapping,
> + (vmf->pgoff << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
> }
>
> spin_lock_irq(&mapping->tree_lock);
> - new_entry = (void *)((unsigned long)RADIX_DAX_ENTRY(sector, false) |
> - RADIX_DAX_ENTRY_LOCK);
> + new_entry = dax_radix_entry(sector, flags);
> +
You've lost the RADIX_DAX_ENTRY_LOCK flag here?
> if (hole_fill) {
> __delete_from_page_cache(entry, NULL);
> /* Drop pagecache reference */
> put_page(entry);
> - error = radix_tree_insert(page_tree, index, new_entry);
> + error = __radix_tree_insert(page_tree, index,
> + dax_radix_order(new_entry), new_entry);
> if (error) {
> new_entry = ERR_PTR(error);
> goto unlock;
> }
> mapping->nrexceptional++;
> - } else {
> + } else if ((unsigned long)entry & (RADIX_DAX_HZP|RADIX_DAX_EMPTY)) {
> void **slot;
> void *ret;
Uh, why this condition need to change? Is it some protection so that we
don't replace a mapped PMD entry with PTE one?
<snip>
> @@ -1261,4 +1338,186 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> return VM_FAULT_NOPAGE | major;
> }
> EXPORT_SYMBOL_GPL(dax_iomap_fault);
> +
> +#ifdef CONFIG_FS_DAX_PMD
> +/*
> + * The 'colour' (ie low bits) within a PMD of a page offset. This comes up
> + * more often than one might expect in the below functions.
> + */
> +#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
Just out of curiosity: Why the british spelling of 'colour'?
> +
> +static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
> + struct vm_fault *vmf, unsigned long address,
> + struct iomap *iomap, loff_t pos, bool write, void **entryp)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + struct block_device *bdev = iomap->bdev;
> + struct blk_dax_ctl dax = {
> + .sector = dax_iomap_sector(iomap, pos),
> + .size = PMD_SIZE,
> + };
> + long length = dax_map_atomic(bdev, &dax);
> + void *ret;
> +
> + if (length < 0) /* dax_map_atomic() failed */
> + return VM_FAULT_FALLBACK;
> + if (length < PMD_SIZE)
> + goto unmap_fallback;
> + if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR)
> + goto unmap_fallback;
> + if (!pfn_t_devmap(dax.pfn))
> + goto unmap_fallback;
> +
> + dax_unmap_atomic(bdev, &dax);
> +
> + ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector,
> + RADIX_DAX_PMD);
> + if (IS_ERR(ret))
> + return VM_FAULT_FALLBACK;
> + *entryp = ret;
> +
> + return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write);
> +
> + unmap_fallback:
> + dax_unmap_atomic(bdev, &dax);
> + return VM_FAULT_FALLBACK;
> +}
> +
> +static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
> + struct vm_fault *vmf, unsigned long address,
> + struct iomap *iomap, void **entryp)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + unsigned long pmd_addr = address & PMD_MASK;
> + struct page *zero_page;
> + spinlock_t *ptl;
> + pmd_t pmd_entry;
> + void *ret;
> +
> + zero_page = get_huge_zero_page();
> +
> + if (unlikely(!zero_page))
> + return VM_FAULT_FALLBACK;
> +
> + ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0,
> + RADIX_DAX_PMD | RADIX_DAX_HZP);
> + if (IS_ERR(ret))
> + return VM_FAULT_FALLBACK;
> + *entryp = ret;
> +
> + ptl = pmd_lock(vma->vm_mm, pmd);
> + if (!pmd_none(*pmd)) {
> + spin_unlock(ptl);
> + return VM_FAULT_FALLBACK;
> + }
> +
> + pmd_entry = mk_pmd(zero_page, vma->vm_page_prot);
> + pmd_entry = pmd_mkhuge(pmd_entry);
> + set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry);
> + spin_unlock(ptl);
> + return VM_FAULT_NOPAGE;
> +}
> +
> +int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> + pmd_t *pmd, unsigned int flags, struct iomap_ops *ops)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + unsigned long pmd_addr = address & PMD_MASK;
> + bool write = flags & FAULT_FLAG_WRITE;
> + unsigned int iomap_flags = write ? IOMAP_WRITE : 0;
> + struct inode *inode = mapping->host;
> + int result = VM_FAULT_FALLBACK;
> + struct iomap iomap = { 0 };
Why the 0 here? Just empty braces are enough to initialize the structure to
zeros.
> + pgoff_t size, pgoff;
> + struct vm_fault vmf;
> + void *entry;
> + loff_t pos;
> + int error;
> +
> + /* Fall back to PTEs if we're going to COW */
> + if (write && !(vma->vm_flags & VM_SHARED)) {
> + split_huge_pmd(vma, pmd, address);
> + goto fallback;
> + }
> +
> + /* If the PMD would extend outside the VMA */
> + if (pmd_addr < vma->vm_start)
> + goto fallback;
> + if ((pmd_addr + PMD_SIZE) > vma->vm_end)
> + goto fallback;
> +
> + /*
> + * Check whether offset isn't beyond end of file now. Caller is
> + * supposed to hold locks serializing us with truncate / punch hole so
> + * this is a reliable test.
> + */
> + pgoff = linear_page_index(vma, pmd_addr);
> + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
Nitpick - 'size' does not express that this is in pages and rounded up.
Maybe we could have:
max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
and then use strict inequalities below?
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
WARNING: multiple messages have this Message-ID (diff)
From: Jan Kara <jack@suse.cz>
To: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: linux-kernel@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>,
Alexander Viro <viro@zeniv.linux.org.uk>,
Andreas Dilger <adilger.kernel@dilger.ca>,
Andrew Morton <akpm@linux-foundation.org>,
Christoph Hellwig <hch@lst.de>,
Dan Williams <dan.j.williams@intel.com>,
Dave Chinner <david@fromorbit.com>, Jan Kara <jack@suse.com>,
Matthew Wilcox <mawilcox@microsoft.com>,
linux-ext4@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-mm@kvack.org, linux-nvdimm@ml01.01.org,
linux-xfs@vger.kernel.org
Subject: Re: [PATCH v5 15/17] dax: add struct iomap based DAX PMD support
Date: Tue, 11 Oct 2016 10:31:52 +0200 [thread overview]
Message-ID: <20161011083152.GG6952@quack2.suse.cz> (raw)
In-Reply-To: <1475874544-24842-16-git-send-email-ross.zwisler@linux.intel.com>
On Fri 07-10-16 15:09:02, Ross Zwisler wrote:
> diff --git a/fs/dax.c b/fs/dax.c
> index ac3cd05..e51d51f 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -281,7 +281,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping,
> * queue to the start of that PMD. This ensures that all offsets in
> * the range covered by the PMD map to the same bit lock.
> */
> - if (RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
> + if ((unsigned long)entry & RADIX_DAX_PMD)
> index &= ~((1UL << (PMD_SHIFT - PAGE_SHIFT)) - 1);
I agree with Christoph - helper for masking type bits would make this
nicer.
...
> -static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index)
> +static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
> + unsigned long size_flag)
> {
> + bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */
> void *entry, **slot;
>
> restart:
> spin_lock_irq(&mapping->tree_lock);
> entry = get_unlocked_mapping_entry(mapping, index, &slot);
> +
> + if (entry) {
> + if (size_flag & RADIX_DAX_PMD) {
> + if (!radix_tree_exceptional_entry(entry) ||
> + !((unsigned long)entry & RADIX_DAX_PMD)) {
> + entry = ERR_PTR(-EEXIST);
> + goto out_unlock;
You need to call put_unlocked_mapping_entry() if you use
get_unlocked_mapping_entry() and then decide not to lock it. The reason is
that the waitqueues we use are exclusive (we wake up only a single waiter
waiting for the lock) and so there can be some waiters for the entry lock
although we have not locked the entry ourselves.
> + }
> + } else { /* trying to grab a PTE entry */
> + if (radix_tree_exceptional_entry(entry) &&
> + ((unsigned long)entry & RADIX_DAX_PMD) &&
> + ((unsigned long)entry &
> + (RADIX_DAX_HZP|RADIX_DAX_EMPTY))) {
> + pmd_downgrade = true;
> + }
> + }
> + }
> +
> /* No entry for given index? Make sure radix tree is big enough. */
> - if (!entry) {
> + if (!entry || pmd_downgrade) {
> int err;
>
> + if (pmd_downgrade) {
> + /*
> + * Make sure 'entry' remains valid while we drop
> + * mapping->tree_lock.
> + */
> + entry = lock_slot(mapping, slot);
> + }
> +
> spin_unlock_irq(&mapping->tree_lock);
> err = radix_tree_preload(
> mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM);
> if (err)
> return ERR_PTR(err);
You need to unlock the locked entry before you return here...
> - entry = (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
> - RADIX_DAX_ENTRY_LOCK);
> +
> + /*
> + * Besides huge zero pages the only other thing that gets
> + * downgraded are empty entries which don't need to be
> + * unmapped.
> + */
> + if (pmd_downgrade && ((unsigned long)entry & RADIX_DAX_HZP))
> + unmap_mapping_range(mapping,
> + (index << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
> +
> spin_lock_irq(&mapping->tree_lock);
> - err = radix_tree_insert(&mapping->page_tree, index, entry);
> +
> + if (pmd_downgrade) {
> + radix_tree_delete(&mapping->page_tree, index);
> + mapping->nrexceptional--;
> + dax_wake_mapping_entry_waiter(mapping, index, entry,
> + false);
You need to set 'wake_all' argument here to true. Otherwise there could be
waiters waiting for non-existent entry forever...
> + }
> +
> + entry = dax_radix_entry(0, size_flag | RADIX_DAX_EMPTY);
> +
> + err = __radix_tree_insert(&mapping->page_tree, index,
> + dax_radix_order(entry), entry);
> radix_tree_preload_end();
> if (err) {
> spin_unlock_irq(&mapping->tree_lock);
> - /* Someone already created the entry? */
> - if (err == -EEXIST)
> + /*
> + * Someone already created the entry? This is a
> + * normal failure when inserting PMDs in a range
> + * that already contains PTEs. In that case we want
> + * to return -EEXIST immediately.
> + */
> + if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD))
> goto restart;
Add a comment here that we can get here only if there was no radix tree
entry at 'index' and thus there can be no waiters to wake.
> return ERR_PTR(err);
> }
> @@ -441,6 +509,7 @@ restart:
> return page;
> }
> entry = lock_slot(mapping, slot);
> + out_unlock:
> spin_unlock_irq(&mapping->tree_lock);
> return entry;
> }
> @@ -581,11 +650,17 @@ static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size
> return 0;
> }
>
> -#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))
> -
> +/*
> + * By this point grab_mapping_entry() has ensured that we have a locked entry
> + * of the appropriate size so we don't have to worry about downgrading PMDs to
> + * PTEs. If we happen to be trying to insert a PTE and there is a PMD
> + * already in the tree, we will skip the insertion and just dirty the PMD as
> + * appropriate.
> + */
> static void *dax_insert_mapping_entry(struct address_space *mapping,
> struct vm_fault *vmf,
> - void *entry, sector_t sector)
> + void *entry, sector_t sector,
> + unsigned long flags)
> {
> struct radix_tree_root *page_tree = &mapping->page_tree;
> int error = 0;
> @@ -608,22 +683,28 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
> error = radix_tree_preload(vmf->gfp_mask & ~__GFP_HIGHMEM);
> if (error)
> return ERR_PTR(error);
> + } else if (((unsigned long)entry & RADIX_DAX_HZP) &&
> + !(flags & RADIX_DAX_HZP)) {
> + /* replacing huge zero page with PMD block mapping */
> + unmap_mapping_range(mapping,
> + (vmf->pgoff << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0);
> }
>
> spin_lock_irq(&mapping->tree_lock);
> - new_entry = (void *)((unsigned long)RADIX_DAX_ENTRY(sector, false) |
> - RADIX_DAX_ENTRY_LOCK);
> + new_entry = dax_radix_entry(sector, flags);
> +
You've lost the RADIX_DAX_ENTRY_LOCK flag here?
> if (hole_fill) {
> __delete_from_page_cache(entry, NULL);
> /* Drop pagecache reference */
> put_page(entry);
> - error = radix_tree_insert(page_tree, index, new_entry);
> + error = __radix_tree_insert(page_tree, index,
> + dax_radix_order(new_entry), new_entry);
> if (error) {
> new_entry = ERR_PTR(error);
> goto unlock;
> }
> mapping->nrexceptional++;
> - } else {
> + } else if ((unsigned long)entry & (RADIX_DAX_HZP|RADIX_DAX_EMPTY)) {
> void **slot;
> void *ret;
Uh, why this condition need to change? Is it some protection so that we
don't replace a mapped PMD entry with PTE one?
<snip>
> @@ -1261,4 +1338,186 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> return VM_FAULT_NOPAGE | major;
> }
> EXPORT_SYMBOL_GPL(dax_iomap_fault);
> +
> +#ifdef CONFIG_FS_DAX_PMD
> +/*
> + * The 'colour' (ie low bits) within a PMD of a page offset. This comes up
> + * more often than one might expect in the below functions.
> + */
> +#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
Just out of curiosity: Why the british spelling of 'colour'?
> +
> +static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
> + struct vm_fault *vmf, unsigned long address,
> + struct iomap *iomap, loff_t pos, bool write, void **entryp)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + struct block_device *bdev = iomap->bdev;
> + struct blk_dax_ctl dax = {
> + .sector = dax_iomap_sector(iomap, pos),
> + .size = PMD_SIZE,
> + };
> + long length = dax_map_atomic(bdev, &dax);
> + void *ret;
> +
> + if (length < 0) /* dax_map_atomic() failed */
> + return VM_FAULT_FALLBACK;
> + if (length < PMD_SIZE)
> + goto unmap_fallback;
> + if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR)
> + goto unmap_fallback;
> + if (!pfn_t_devmap(dax.pfn))
> + goto unmap_fallback;
> +
> + dax_unmap_atomic(bdev, &dax);
> +
> + ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector,
> + RADIX_DAX_PMD);
> + if (IS_ERR(ret))
> + return VM_FAULT_FALLBACK;
> + *entryp = ret;
> +
> + return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write);
> +
> + unmap_fallback:
> + dax_unmap_atomic(bdev, &dax);
> + return VM_FAULT_FALLBACK;
> +}
> +
> +static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
> + struct vm_fault *vmf, unsigned long address,
> + struct iomap *iomap, void **entryp)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + unsigned long pmd_addr = address & PMD_MASK;
> + struct page *zero_page;
> + spinlock_t *ptl;
> + pmd_t pmd_entry;
> + void *ret;
> +
> + zero_page = get_huge_zero_page();
> +
> + if (unlikely(!zero_page))
> + return VM_FAULT_FALLBACK;
> +
> + ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0,
> + RADIX_DAX_PMD | RADIX_DAX_HZP);
> + if (IS_ERR(ret))
> + return VM_FAULT_FALLBACK;
> + *entryp = ret;
> +
> + ptl = pmd_lock(vma->vm_mm, pmd);
> + if (!pmd_none(*pmd)) {
> + spin_unlock(ptl);
> + return VM_FAULT_FALLBACK;
> + }
> +
> + pmd_entry = mk_pmd(zero_page, vma->vm_page_prot);
> + pmd_entry = pmd_mkhuge(pmd_entry);
> + set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry);
> + spin_unlock(ptl);
> + return VM_FAULT_NOPAGE;
> +}
> +
> +int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> + pmd_t *pmd, unsigned int flags, struct iomap_ops *ops)
> +{
> + struct address_space *mapping = vma->vm_file->f_mapping;
> + unsigned long pmd_addr = address & PMD_MASK;
> + bool write = flags & FAULT_FLAG_WRITE;
> + unsigned int iomap_flags = write ? IOMAP_WRITE : 0;
> + struct inode *inode = mapping->host;
> + int result = VM_FAULT_FALLBACK;
> + struct iomap iomap = { 0 };
Why the 0 here? Just empty braces are enough to initialize the structure to
zeros.
> + pgoff_t size, pgoff;
> + struct vm_fault vmf;
> + void *entry;
> + loff_t pos;
> + int error;
> +
> + /* Fall back to PTEs if we're going to COW */
> + if (write && !(vma->vm_flags & VM_SHARED)) {
> + split_huge_pmd(vma, pmd, address);
> + goto fallback;
> + }
> +
> + /* If the PMD would extend outside the VMA */
> + if (pmd_addr < vma->vm_start)
> + goto fallback;
> + if ((pmd_addr + PMD_SIZE) > vma->vm_end)
> + goto fallback;
> +
> + /*
> + * Check whether offset isn't beyond end of file now. Caller is
> + * supposed to hold locks serializing us with truncate / punch hole so
> + * this is a reliable test.
> + */
> + pgoff = linear_page_index(vma, pmd_addr);
> + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
Nitpick - 'size' does not express that this is in pages and rounded up.
Maybe we could have:
max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
and then use strict inequalities below?
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
next prev parent reply other threads:[~2016-10-11 8:31 UTC|newest]
Thread overview: 182+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-10-07 21:08 [PATCH v5 00/17] re-enable DAX PMD support Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` [PATCH v5 04/17] ext2: remove support for DAX PMD faults Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
[not found] ` <1475874544-24842-1-git-send-email-ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-10-07 21:08 ` [PATCH v5 01/17] ext4: allow DAX writeback for hole punch Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` [PATCH v5 02/17] ext4: tell DAX the size of allocation holes Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` [PATCH v5 03/17] dax: remove buffer_size_valid() Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` [PATCH v5 05/17] ext2: return -EIO on ext2_iomap_end() failure Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-11 6:48 ` Jan Kara
2016-10-11 6:48 ` Jan Kara
2016-10-11 6:48 ` Jan Kara
2016-10-11 6:48 ` Jan Kara
2016-10-07 21:08 ` [PATCH v5 06/17] dax: make 'wait_table' global variable static Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` [PATCH v5 07/17] dax: remove the last BUG_ON() from fs/dax.c Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
[not found] ` <1475874544-24842-8-git-send-email-ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-10-10 15:45 ` Christoph Hellwig
2016-10-10 15:45 ` Christoph Hellwig
2016-10-10 15:45 ` Christoph Hellwig
2016-10-10 15:45 ` Christoph Hellwig
2016-10-10 15:45 ` Christoph Hellwig
2016-10-11 6:50 ` Jan Kara
2016-10-11 6:50 ` Jan Kara
2016-10-11 6:50 ` Jan Kara
2016-10-11 6:50 ` Jan Kara
2016-10-11 6:50 ` Jan Kara
2016-10-07 21:08 ` [PATCH v5 08/17] dax: consistent variable naming for DAX entries Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` [PATCH v5 10/17] dax: remove dax_pmd_fault() Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` [PATCH v5 11/17] dax: correct dax iomap code namespace Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
[not found] ` <1475874544-24842-12-git-send-email-ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-10-09 15:28 ` Christoph Hellwig
2016-10-09 15:28 ` Christoph Hellwig
2016-10-09 15:28 ` Christoph Hellwig
2016-10-09 15:28 ` Christoph Hellwig
[not found] ` <20161009152803.GA20111-jcswGhMUV9g@public.gmane.org>
2016-10-10 19:04 ` [PATCH] " Ross Zwisler
2016-10-10 19:04 ` Ross Zwisler
2016-10-10 19:04 ` Ross Zwisler
2016-10-10 19:04 ` Ross Zwisler
[not found] ` <1476126244-27673-1-git-send-email-ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-10-10 20:19 ` Dave Chinner
2016-10-10 20:19 ` Dave Chinner
2016-10-10 20:19 ` Dave Chinner
2016-10-10 20:19 ` Dave Chinner
2016-10-07 21:08 ` [PATCH v5 12/17] dax: add dax_iomap_sector() helper function Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
[not found] ` <1475874544-24842-13-git-send-email-ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-10-10 15:46 ` Christoph Hellwig
2016-10-10 15:46 ` Christoph Hellwig
2016-10-10 15:46 ` Christoph Hellwig
2016-10-10 15:46 ` Christoph Hellwig
2016-10-10 15:46 ` Christoph Hellwig
2016-10-11 7:06 ` Jan Kara
2016-10-11 7:06 ` Jan Kara
2016-10-11 7:06 ` Jan Kara
2016-10-11 7:06 ` Jan Kara
2016-10-07 21:09 ` [PATCH v5 15/17] dax: add struct iomap based DAX PMD support Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
[not found] ` <1475874544-24842-16-git-send-email-ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-10-10 15:59 ` Christoph Hellwig
2016-10-10 15:59 ` Christoph Hellwig
2016-10-10 15:59 ` Christoph Hellwig
2016-10-10 15:59 ` Christoph Hellwig
[not found] ` <20161010155917.GA19978-jcswGhMUV9g@public.gmane.org>
2016-10-10 22:06 ` Ross Zwisler
2016-10-10 22:06 ` Ross Zwisler
2016-10-10 22:06 ` Ross Zwisler
2016-10-10 22:06 ` Ross Zwisler
2016-10-10 22:06 ` Ross Zwisler
2016-10-11 21:48 ` Ross Zwisler
2016-10-11 21:48 ` Ross Zwisler
2016-10-11 21:48 ` Ross Zwisler
2016-10-11 21:48 ` Ross Zwisler
2016-10-11 21:48 ` Ross Zwisler
2016-10-11 8:31 ` Jan Kara [this message]
2016-10-11 8:31 ` Jan Kara
2016-10-11 8:31 ` Jan Kara
2016-10-11 8:31 ` Jan Kara
2016-10-11 8:31 ` Jan Kara
[not found] ` <20161011083152.GG6952-4I4JzKEfoa/jFM9bn6wA6Q@public.gmane.org>
2016-10-11 22:51 ` Ross Zwisler
2016-10-11 22:51 ` Ross Zwisler
2016-10-11 22:51 ` Ross Zwisler
2016-10-11 22:51 ` Ross Zwisler
2016-10-11 22:51 ` Ross Zwisler
2016-10-12 7:45 ` Jan Kara
2016-10-12 7:45 ` Jan Kara
2016-10-12 7:45 ` Jan Kara
2016-10-07 21:09 ` [PATCH v5 16/17] xfs: use struct iomap based DAX PMD fault path Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-11 8:34 ` Jan Kara
2016-10-11 8:34 ` Jan Kara
2016-10-11 8:34 ` Jan Kara
2016-10-11 8:34 ` Jan Kara
2016-10-07 21:09 ` [PATCH v5 17/17] dax: remove "depends on BROKEN" from FS_DAX_PMD Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-07 21:08 ` [PATCH v5 09/17] dax: coordinate locking for offsets in PMD range Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-07 21:08 ` Ross Zwisler
2016-10-10 15:46 ` Christoph Hellwig
2016-10-10 15:46 ` Christoph Hellwig
2016-10-10 15:46 ` Christoph Hellwig
2016-10-11 7:04 ` Jan Kara
2016-10-11 7:04 ` Jan Kara
2016-10-11 7:04 ` Jan Kara
2016-10-11 7:04 ` Jan Kara
2016-10-11 21:18 ` Ross Zwisler
2016-10-11 21:18 ` Ross Zwisler
2016-10-11 21:18 ` Ross Zwisler
2016-10-07 21:09 ` [PATCH v5 13/17] dax: dax_iomap_fault() needs to call iomap_end() Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-10 15:50 ` Christoph Hellwig
2016-10-10 15:50 ` Christoph Hellwig
2016-10-10 15:50 ` Christoph Hellwig
2016-10-10 15:50 ` Christoph Hellwig
[not found] ` <20161010155004.GD19343-jcswGhMUV9g@public.gmane.org>
2016-10-10 22:05 ` Ross Zwisler
2016-10-10 22:05 ` Ross Zwisler
2016-10-10 22:05 ` Ross Zwisler
2016-10-10 22:05 ` Ross Zwisler
[not found] ` <1475874544-24842-14-git-send-email-ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-10-11 7:21 ` Jan Kara
2016-10-11 7:21 ` Jan Kara
2016-10-11 7:21 ` Jan Kara
2016-10-11 7:21 ` Jan Kara
2016-10-11 7:21 ` Jan Kara
2016-10-07 21:09 ` [PATCH v5 14/17] dax: move RADIX_DAX_* defines to dax.h Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
2016-10-07 21:09 ` Ross Zwisler
[not found] ` <1475874544-24842-15-git-send-email-ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2016-10-10 15:50 ` Christoph Hellwig
2016-10-10 15:50 ` Christoph Hellwig
2016-10-10 15:50 ` Christoph Hellwig
2016-10-10 15:50 ` Christoph Hellwig
2016-10-10 15:50 ` Christoph Hellwig
2016-10-11 7:23 ` Jan Kara
2016-10-11 7:23 ` Jan Kara
2016-10-11 7:23 ` Jan Kara
2016-10-11 7:23 ` Jan Kara
2016-10-11 7:23 ` Jan Kara
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20161011083152.GG6952@quack2.suse.cz \
--to=jack-alswssmvlrq@public.gmane.org \
--cc=adilger.kernel-m1MBpc4rdrD3fQ9qLvQP4Q@public.gmane.org \
--cc=akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org \
--cc=david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org \
--cc=hch-jcswGhMUV9g@public.gmane.org \
--cc=jack-IBi9RG/b67k@public.gmane.org \
--cc=linux-ext4-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org \
--cc=linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org \
--cc=linux-xfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=mawilcox-0li6OtcxBFHby3iVrkZq2A@public.gmane.org \
--cc=ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org \
--cc=tytso-3s7WtUTddSA@public.gmane.org \
--cc=viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.