From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Brian Foster <bfoster@redhat.com>,
Christoph Hellwig <hch@lst.de>,
"Darrick J. Wong" <darrick.wong@oracle.com>
Subject: [PATCH 4.9 05/72] xfs: pull up iolock from xfs_free_eofblocks()
Date: Thu, 6 Apr 2017 10:37:52 +0200 [thread overview]
Message-ID: <20170406083620.023320177@linuxfoundation.org> (raw)
In-Reply-To: <20170406083619.775985942@linuxfoundation.org>
4.9-stable review patch. If anyone has any objections, please let me know.
------------------
From: Brian Foster <bfoster@redhat.com>
commit a36b926180cda375ac2ec89e1748b47137cfc51c upstream.
xfs_free_eofblocks() requires the IOLOCK_EXCL lock, but is called from
different contexts where the lock may or may not be held. The
need_iolock parameter exists for this reason, to indicate whether
xfs_free_eofblocks() must acquire the iolock itself before it can
proceed.
This is ugly and confusing. Simplify the semantics of
xfs_free_eofblocks() to require the caller to acquire the iolock
appropriately and kill the need_iolock parameter. While here, the mp
param can be removed as well as the xfs_mount is accessible from the
xfs_inode structure. This patch does not change behavior.
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/xfs/xfs_bmap_util.c | 41 +++++++++++++++------------------------
fs/xfs/xfs_bmap_util.h | 3 --
fs/xfs/xfs_icache.c | 24 ++++++++++++++---------
fs/xfs/xfs_inode.c | 51 ++++++++++++++++++++++++++-----------------------
4 files changed, 60 insertions(+), 59 deletions(-)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -917,17 +917,18 @@ xfs_can_free_eofblocks(struct xfs_inode
*/
int
xfs_free_eofblocks(
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- bool need_iolock)
+ struct xfs_inode *ip)
{
- xfs_trans_t *tp;
- int error;
- xfs_fileoff_t end_fsb;
- xfs_fileoff_t last_fsb;
- xfs_filblks_t map_len;
- int nimaps;
- xfs_bmbt_irec_t imap;
+ struct xfs_trans *tp;
+ int error;
+ xfs_fileoff_t end_fsb;
+ xfs_fileoff_t last_fsb;
+ xfs_filblks_t map_len;
+ int nimaps;
+ struct xfs_bmbt_irec imap;
+ struct xfs_mount *mp = ip->i_mount;
+
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
/*
* Figure out if there are any blocks beyond the end
@@ -944,6 +945,10 @@ xfs_free_eofblocks(
error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ /*
+ * If there are blocks after the end of file, truncate the file to its
+ * current size to free them up.
+ */
if (!error && (nimaps != 0) &&
(imap.br_startblock != HOLESTARTBLOCK ||
ip->i_delayed_blks)) {
@@ -954,22 +959,10 @@ xfs_free_eofblocks(
if (error)
return error;
- /*
- * There are blocks after the end of file.
- * Free them up now by truncating the file to
- * its current size.
- */
- if (need_iolock) {
- if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
- return -EAGAIN;
- }
-
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
&tp);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
- if (need_iolock)
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return error;
}
@@ -997,8 +990,6 @@ xfs_free_eofblocks(
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- if (need_iolock)
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
}
return error;
}
@@ -1415,7 +1406,7 @@ xfs_shift_file_space(
* into the accessible region of the file.
*/
if (xfs_can_free_eofblocks(ip, true)) {
- error = xfs_free_eofblocks(mp, ip, false);
+ error = xfs_free_eofblocks(ip);
if (error)
return error;
}
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -63,8 +63,7 @@ int xfs_insert_file_space(struct xfs_ino
/* EOF block manipulation functions */
bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
-int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
- bool need_iolock);
+int xfs_free_eofblocks(struct xfs_inode *ip);
int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
struct xfs_swapext *sx);
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1324,7 +1324,7 @@ xfs_inode_free_eofblocks(
int flags,
void *args)
{
- int ret;
+ int ret = 0;
struct xfs_eofblocks *eofb = args;
bool need_iolock = true;
int match;
@@ -1360,19 +1360,25 @@ xfs_inode_free_eofblocks(
return 0;
/*
- * A scan owner implies we already hold the iolock. Skip it in
- * xfs_free_eofblocks() to avoid deadlock. This also eliminates
- * the possibility of EAGAIN being returned.
+ * A scan owner implies we already hold the iolock. Skip it here
+ * to avoid deadlock.
*/
if (eofb->eof_scan_owner == ip->i_ino)
need_iolock = false;
}
- ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock);
-
- /* don't revisit the inode if we're not waiting */
- if (ret == -EAGAIN && !(flags & SYNC_WAIT))
- ret = 0;
+ /*
+ * If the caller is waiting, return -EAGAIN to keep the background
+ * scanner moving and revisit the inode in a subsequent pass.
+ */
+ if (need_iolock && !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+ if (flags & SYNC_WAIT)
+ ret = -EAGAIN;
+ return ret;
+ }
+ ret = xfs_free_eofblocks(ip);
+ if (need_iolock)
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1701,32 +1701,34 @@ xfs_release(
if (xfs_can_free_eofblocks(ip, false)) {
/*
+ * Check if the inode is being opened, written and closed
+ * frequently and we have delayed allocation blocks outstanding
+ * (e.g. streaming writes from the NFS server), truncating the
+ * blocks past EOF will cause fragmentation to occur.
+ *
+ * In this case don't do the truncation, but we have to be
+ * careful how we detect this case. Blocks beyond EOF show up as
+ * i_delayed_blks even when the inode is clean, so we need to
+ * truncate them away first before checking for a dirty release.
+ * Hence on the first dirty close we will still remove the
+ * speculative allocation, but after that we will leave it in
+ * place.
+ */
+ if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
+ return 0;
+ /*
* If we can't get the iolock just skip truncating the blocks
* past EOF because we could deadlock with the mmap_sem
- * otherwise. We'll get another chance to drop them once the
+ * otherwise. We'll get another chance to drop them once the
* last reference to the inode is dropped, so we'll never leak
* blocks permanently.
- *
- * Further, check if the inode is being opened, written and
- * closed frequently and we have delayed allocation blocks
- * outstanding (e.g. streaming writes from the NFS server),
- * truncating the blocks past EOF will cause fragmentation to
- * occur.
- *
- * In this case don't do the truncation, either, but we have to
- * be careful how we detect this case. Blocks beyond EOF show
- * up as i_delayed_blks even when the inode is clean, so we
- * need to truncate them away first before checking for a dirty
- * release. Hence on the first dirty close we will still remove
- * the speculative allocation, but after that we will leave it
- * in place.
*/
- if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
- return 0;
-
- error = xfs_free_eofblocks(mp, ip, true);
- if (error && error != -EAGAIN)
- return error;
+ if (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+ error = xfs_free_eofblocks(ip);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ if (error)
+ return error;
+ }
/* delalloc blocks after truncation means it really is dirty */
if (ip->i_delayed_blks)
@@ -1913,8 +1915,11 @@ xfs_inactive(
* cache. Post-eof blocks must be freed, lest we end up with
* broken free space accounting.
*/
- if (xfs_can_free_eofblocks(ip, true))
- xfs_free_eofblocks(mp, ip, false);
+ if (xfs_can_free_eofblocks(ip, true)) {
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ xfs_free_eofblocks(ip);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ }
return;
}
next prev parent reply other threads:[~2017-04-06 8:40 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-04-06 8:37 [PATCH 4.9 00/72] 4.9.21-stable review Greg Kroah-Hartman
2017-04-06 8:37 ` [PATCH 4.9 01/72] libceph: force GFP_NOIO for socket allocations Greg Kroah-Hartman
2017-04-06 8:37 ` [PATCH 4.9 02/72] xen/setup: Dont relocate p2m over existing one Greg Kroah-Hartman
2017-04-06 8:37 ` [PATCH 4.9 03/72] xfs: only update mount/resv fields on success in __xfs_ag_resv_init Greg Kroah-Hartman
2017-04-06 8:37 ` [PATCH 4.9 04/72] xfs: use per-AG reservations for the finobt Greg Kroah-Hartman
2017-04-06 8:37 ` Greg Kroah-Hartman [this message]
2017-04-06 8:37 ` [PATCH 4.9 06/72] xfs: sync eofblocks scans under iolock are livelock prone Greg Kroah-Hartman
2017-04-06 8:37 ` [PATCH 4.9 07/72] xfs: fix eofblocks race with file extending async dio writes Greg Kroah-Hartman
2017-04-06 8:37 ` [PATCH 4.9 08/72] xfs: fix toctou race when locking an inode to access the data map Greg Kroah-Hartman
2017-04-06 8:37 ` [PATCH 4.9 09/72] xfs: fail _dir_open when readahead fails Greg Kroah-Hartman
2017-04-06 8:37 ` [PATCH 4.9 10/72] xfs: filter out obviously bad btree pointers Greg Kroah-Hartman
2017-04-06 8:37 ` [PATCH 4.9 11/72] xfs: check for obviously bad level values in the bmbt root Greg Kroah-Hartman
2017-04-06 8:37 ` [PATCH 4.9 12/72] xfs: verify free block header fields Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 13/72] xfs: allow unwritten extents in the CoW fork Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 14/72] xfs: mark speculative prealloc CoW fork extents unwritten Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 15/72] xfs: reset b_first_retry_time when clear the retry status of xfs_buf_t Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 16/72] xfs: update ctime and mtime on clone destinatation inodes Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 17/72] xfs: reject all unaligned direct writes to reflinked files Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 18/72] xfs: dont fail xfs_extent_busy allocation Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 19/72] xfs: handle indlen shortage on delalloc extent merge Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 20/72] xfs: split indlen reservations fairly when under reserved Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 21/72] xfs: fix uninitialized variable in _reflink_convert_cow Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 22/72] xfs: dont reserve blocks for right shift transactions Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 23/72] xfs: Use xfs_icluster_size_fsb() to calculate inode chunk alignment Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 24/72] xfs: tune down agno asserts in the bmap code Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 25/72] xfs: only reclaim unwritten COW extents periodically Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 26/72] xfs: fix and streamline error handling in xfs_end_io Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 27/72] xfs: Use xfs_icluster_size_fsb() to calculate inode alignment mask Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 28/72] xfs: use iomap new flag for newly allocated delalloc blocks Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 29/72] xfs: try any AG when allocating the first btree block when reflinking Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 30/72] scsi: sg: check length passed to SG_NEXT_CMD_LEN Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 31/72] scsi: libsas: fix ata xfer length Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 32/72] scsi: scsi_dh_alua: Check scsi_device_get() return value Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 33/72] scsi: scsi_dh_alua: Ensure that alua_activate() calls the completion function Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 35/72] ALSA: seq: Fix race during FIFO resize Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 36/72] ALSA: hda - fix a problem for lineout on a Dell AIO machine Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 37/72] ASoC: atmel-classd: fix audio clock rate Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 38/72] ASoC: Intel: Skylake: fix invalid memory access due to wrong reference of pointer Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 39/72] HID: wacom: Dont add ghost interface as shared data Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 40/72] mmc: sdhci: Disable runtime pm when the sdio_irq is enabled Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 41/72] mmc: sdhci-of-at91: fix MMC_DDR_52 timing selection Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 42/72] NFSv4.1 fix infinite loop on IO BAD_STATEID error Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 43/72] nfsd: map the ENOKEY to nfserr_perm for avoiding warning Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 44/72] parisc: Clean up fixup routines for get_user()/put_user() Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 45/72] parisc: Avoid stalled CPU warnings after system shutdown Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 46/72] parisc: Fix access fault handling in pa_memcpy() Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 47/72] ACPI: Fix incompatibility with mcount-based function graph tracing Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 48/72] ACPI: Do not create a platform_device for IOAPIC/IOxAPIC Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 49/72] tty/serial: atmel: fix race condition (TX+DMA) Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 50/72] tty/serial: atmel: fix TX path in atmel_console_write() Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 51/72] USB: fix linked-list corruption in rh_call_control() Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 54/72] KVM: kvm_io_bus_unregister_dev() should never fail Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 56/72] drm/vc4: Allocate the right amount of space for boot-time CRTC state Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 57/72] drm/etnaviv: (re-)protect fence allocation with GPU mutex Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 58/72] x86/mm/KASLR: Exclude EFI region from KASLR VA space randomization Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 59/72] x86/mce: Fix copy/paste error in exception table entries Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 61/72] mm: rmap: fix huge file mmap accounting in the memcg stats Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 62/72] mm, hugetlb: use pte_present() instead of pmd_present() in follow_huge_pmd() Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 64/72] qla2xxx: Allow vref count to timeout on vport delete Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 66/72] MIPS: Lantiq: Fix cascaded IRQ setup Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 67/72] mm: workingset: fix premature shadow node shrinking with cgroups Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 68/72] blk: improve order of bio handling in generic_make_request() Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 69/72] blk: Ensure users for current->bio_list can see the full list Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 70/72] padata: avoid race in reordering Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 71/72] nvme/core: Fix race kicking freed request_queue Greg Kroah-Hartman
2017-04-06 8:38 ` [PATCH 4.9 72/72] nvme/pci: Disable on removal when disconnected Greg Kroah-Hartman
2017-04-06 17:46 ` [PATCH 4.9 00/72] 4.9.21-stable review Shuah Khan
2017-04-06 21:52 ` Guenter Roeck
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170406083620.023320177@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=bfoster@redhat.com \
--cc=darrick.wong@oracle.com \
--cc=hch@lst.de \
--cc=linux-kernel@vger.kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).