From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
alan@lxorguk.ukuu.org.uk, Peng Tao <tao.peng@emc.com>,
Trond Myklebust <Trond.Myklebust@netapp.com>
Subject: [ 02/76] pnfsblock: fix partial page buffer wirte
Date: Thu, 18 Oct 2012 19:46:26 -0700 [thread overview]
Message-ID: <20121019024350.466211922@linuxfoundation.org> (raw)
In-Reply-To: <20121019024350.087156547@linuxfoundation.org>
3.6-stable review patch. If anyone has any objections, please let me know.
------------------
From: Peng Tao <bergwolf@gmail.com>
commit fe6e1e8d9fad86873eb74a26e80a8f91f9e870b5 upstream.
If applications use flock to protect its write range, generic NFS
will not do read-modify-write cycle at page cache level. Therefore
LD should know how to handle non-sector aligned writes. Otherwise
there will be data corruption.
Signed-off-by: Peng Tao <tao.peng@emc.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/nfs/blocklayout/blocklayout.c | 177 ++++++++++++++++++++++++++++++++++++---
fs/nfs/blocklayout/blocklayout.h | 1
2 files changed, 166 insertions(+), 12 deletions(-)
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -162,25 +162,39 @@ static struct bio *bl_alloc_init_bio(int
return bio;
}
-static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
+static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw,
sector_t isect, struct page *page,
struct pnfs_block_extent *be,
void (*end_io)(struct bio *, int err),
- struct parallel_io *par)
+ struct parallel_io *par,
+ unsigned int offset, int len)
{
+ isect = isect + (offset >> SECTOR_SHIFT);
+ dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
+ npg, rw, (unsigned long long)isect, offset, len);
retry:
if (!bio) {
bio = bl_alloc_init_bio(npg, isect, be, end_io, par);
if (!bio)
return ERR_PTR(-ENOMEM);
}
- if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+ if (bio_add_page(bio, page, len, offset) < len) {
bio = bl_submit_bio(rw, bio);
goto retry;
}
return bio;
}
+static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
+ sector_t isect, struct page *page,
+ struct pnfs_block_extent *be,
+ void (*end_io)(struct bio *, int err),
+ struct parallel_io *par)
+{
+ return do_add_page_to_bio(bio, npg, rw, isect, page, be,
+ end_io, par, 0, PAGE_CACHE_SIZE);
+}
+
/* This is basically copied from mpage_end_io_read */
static void bl_end_io_read(struct bio *bio, int err)
{
@@ -461,6 +475,106 @@ map_block(struct buffer_head *bh, sector
return;
}
+static void
+bl_read_single_end_io(struct bio *bio, int error)
+{
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct page *page = bvec->bv_page;
+
+ /* Only one page in bvec */
+ unlock_page(page);
+}
+
+static int
+bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be,
+ unsigned int offset, unsigned int len)
+{
+ struct bio *bio;
+ struct page *shadow_page;
+ sector_t isect;
+ char *kaddr, *kshadow_addr;
+ int ret = 0;
+
+ dprintk("%s: offset %u len %u\n", __func__, offset, len);
+
+ shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (shadow_page == NULL)
+ return -ENOMEM;
+
+ bio = bio_alloc(GFP_NOIO, 1);
+ if (bio == NULL)
+ return -ENOMEM;
+
+ isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) +
+ (offset / SECTOR_SIZE);
+
+ bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
+ bio->bi_bdev = be->be_mdev;
+ bio->bi_end_io = bl_read_single_end_io;
+
+ lock_page(shadow_page);
+ if (bio_add_page(bio, shadow_page,
+ SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) {
+ unlock_page(shadow_page);
+ bio_put(bio);
+ return -EIO;
+ }
+
+ submit_bio(READ, bio);
+ wait_on_page_locked(shadow_page);
+ if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) {
+ ret = -EIO;
+ } else {
+ kaddr = kmap_atomic(page);
+ kshadow_addr = kmap_atomic(shadow_page);
+ memcpy(kaddr + offset, kshadow_addr + offset, len);
+ kunmap_atomic(kshadow_addr);
+ kunmap_atomic(kaddr);
+ }
+ __free_page(shadow_page);
+ bio_put(bio);
+
+ return ret;
+}
+
+static int
+bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be,
+ unsigned int dirty_offset, unsigned int dirty_len,
+ bool full_page)
+{
+ int ret = 0;
+ unsigned int start, end;
+
+ if (full_page) {
+ start = 0;
+ end = PAGE_CACHE_SIZE;
+ } else {
+ start = round_down(dirty_offset, SECTOR_SIZE);
+ end = round_up(dirty_offset + dirty_len, SECTOR_SIZE);
+ }
+
+ dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len);
+ if (!be) {
+ zero_user_segments(page, start, dirty_offset,
+ dirty_offset + dirty_len, end);
+ if (start == 0 && end == PAGE_CACHE_SIZE &&
+ trylock_page(page)) {
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+ return ret;
+ }
+
+ if (start != dirty_offset)
+ ret = bl_do_readpage_sync(page, be, start, dirty_offset - start);
+
+ if (!ret && (dirty_offset + dirty_len < end))
+ ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len,
+ end - dirty_offset - dirty_len);
+
+ return ret;
+}
+
/* Given an unmapped page, zero it or read in page for COW, page is locked
* by caller.
*/
@@ -494,7 +608,6 @@ init_page_for_write(struct page *page, s
SetPageUptodate(page);
cleanup:
- bl_put_extent(cow_read);
if (bh)
free_buffer_head(bh);
if (ret) {
@@ -566,6 +679,7 @@ bl_write_pagelist(struct nfs_write_data
struct parallel_io *par = NULL;
loff_t offset = wdata->args.offset;
size_t count = wdata->args.count;
+ unsigned int pg_offset, pg_len, saved_len;
struct page **pages = wdata->args.pages;
struct page *page;
pgoff_t index;
@@ -674,10 +788,11 @@ next_page:
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
+ bl_put_extent(cow_read);
bio = bl_submit_bio(WRITE, bio);
/* Get the next one */
be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
- isect, NULL);
+ isect, &cow_read);
if (!be || !is_writable(be, isect)) {
header->pnfs_error = -EINVAL;
goto out;
@@ -694,7 +809,26 @@ next_page:
extent_length = be->be_length -
(isect - be->be_f_offset);
}
- if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+
+ dprintk("%s offset %lld count %Zu\n", __func__, offset, count);
+ pg_offset = offset & ~PAGE_CACHE_MASK;
+ if (pg_offset + count > PAGE_CACHE_SIZE)
+ pg_len = PAGE_CACHE_SIZE - pg_offset;
+ else
+ pg_len = count;
+
+ saved_len = pg_len;
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA &&
+ !bl_is_sector_init(be->be_inval, isect)) {
+ ret = bl_read_partial_page_sync(pages[i], cow_read,
+ pg_offset, pg_len, true);
+ if (ret) {
+ dprintk("%s bl_read_partial_page_sync fail %d\n",
+ __func__, ret);
+ header->pnfs_error = ret;
+ goto out;
+ }
+
ret = bl_mark_sectors_init(be->be_inval, isect,
PAGE_CACHE_SECTORS);
if (unlikely(ret)) {
@@ -703,15 +837,35 @@ next_page:
header->pnfs_error = ret;
goto out;
}
+
+ /* Expand to full page write */
+ pg_offset = 0;
+ pg_len = PAGE_CACHE_SIZE;
+ } else if ((pg_offset & (SECTOR_SIZE - 1)) ||
+ (pg_len & (SECTOR_SIZE - 1))){
+ /* ahh, nasty case. We have to do sync full sector
+ * read-modify-write cycles.
+ */
+ unsigned int saved_offset = pg_offset;
+ ret = bl_read_partial_page_sync(pages[i], be, pg_offset,
+ pg_len, false);
+ pg_offset = round_down(pg_offset, SECTOR_SIZE);
+ pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE)
+ - pg_offset;
}
- bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
+
+
+ bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
isect, pages[i], be,
- bl_end_io_write, par);
+ bl_end_io_write, par,
+ pg_offset, pg_len);
if (IS_ERR(bio)) {
header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
+ offset += saved_len;
+ count -= saved_len;
isect += PAGE_CACHE_SECTORS;
last_isect = isect;
extent_length -= PAGE_CACHE_SECTORS;
@@ -729,17 +883,16 @@ next_page:
}
write_done:
- wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset);
- if (count < wdata->res.count) {
- wdata->res.count = count;
- }
+ wdata->res.count = wdata->args.count;
out:
bl_put_extent(be);
+ bl_put_extent(cow_read);
bl_submit_bio(WRITE, bio);
put_parallel(par);
return PNFS_ATTEMPTED;
out_mds:
bl_put_extent(be);
+ bl_put_extent(cow_read);
kfree(par);
return PNFS_NOT_ATTEMPTED;
}
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -41,6 +41,7 @@
#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
struct block_mount_id {
spinlock_t bm_lock; /* protects list */
next prev parent reply other threads:[~2012-10-19 3:09 UTC|newest]
Thread overview: 83+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-10-19 2:46 [ 00/76] 3.6.3-stable review Greg Kroah-Hartman
2012-10-19 2:46 ` [ 01/76] ARM: vfp: fix saving d16-d31 vfp registers on v6+ kernels Greg Kroah-Hartman
2012-10-19 2:46 ` Greg Kroah-Hartman [this message]
2012-10-19 2:46 ` [ 03/76] NFS41: fix error of setting blocklayoutdriver Greg Kroah-Hartman
2012-10-19 2:46 ` [ 04/76] NFS: Remove bad delegations during open recovery Greg Kroah-Hartman
2012-10-19 2:46 ` [ 05/76] nfsd4: dont pin clientids to pseudoflavors Greg Kroah-Hartman
2012-10-19 2:46 ` [ 06/76] nfsd4: fix nfs4 stateid leak Greg Kroah-Hartman
2012-10-19 2:46 ` [ 07/76] NFSD: pass null terminated buf to kstrtouint() Greg Kroah-Hartman
2012-10-19 2:46 ` [ 08/76] lockd: per-net NSM client creation and destruction helpers introduced Greg Kroah-Hartman
2012-10-19 2:46 ` [ 09/76] lockd: use rpc clients cl_nodename for id encoding Greg Kroah-Hartman
2012-10-19 22:10 ` Ben Hutchings
2012-10-19 22:13 ` Ben Hutchings
2012-10-19 2:46 ` [ 10/76] lockd: create and use per-net NSM RPC clients on MON/UNMON requests Greg Kroah-Hartman
2012-10-19 2:46 ` [ 11/76] ACPI: EC: Make the GPE storm threshold a module parameter Greg Kroah-Hartman
2012-10-19 2:46 ` [ 12/76] ACPI: EC: Add a quirk for CLEVO M720T/M730T laptop Greg Kroah-Hartman
2012-10-19 2:46 ` [ 13/76] ALSA: hda - Add missing hda_gen_spec to struct via_spec Greg Kroah-Hartman
2012-10-19 2:46 ` [ 14/76] ALSA: hda - do not detect jack on internal speakers for Realtek Greg Kroah-Hartman
2012-10-19 2:46 ` [ 15/76] ALSA: hda - Fix memory leaks at error path in patch_cirrus.c Greg Kroah-Hartman
2012-10-19 2:46 ` [ 16/76] mips,kgdb: fix recursive page fault with CONFIG_KPROBES Greg Kroah-Hartman
2012-10-19 2:46 ` [ 17/76] tmpfs,ceph,gfs2,isofs,reiserfs,xfs: fix fh_len checking Greg Kroah-Hartman
2012-10-19 2:46 ` [ 18/76] iscsi-target: Correctly set 0xffffffff field within ISCSI_OP_REJECT PDU Greg Kroah-Hartman
2012-10-19 2:46 ` [ 19/76] iscsit: remove incorrect unlock in iscsit_build_sendtargets_resp Greg Kroah-Hartman
2012-10-19 2:46 ` [ 20/76] iscsi-target: Add explicit set of cache_dynamic_acls=1 for TPG demo-mode Greg Kroah-Hartman
2012-10-19 2:46 ` [ 21/76] iscsi-target: Bump defaults for nopin_timeout + nopin_response_timeout values Greg Kroah-Hartman
2012-10-19 2:46 ` [ 22/76] SCSI: storvsc: Account for in-transit packets in the RESET path Greg Kroah-Hartman
2012-10-19 2:46 ` [ 23/76] SCSI: scsi_debug: Fix off-by-one bug when unmapping region Greg Kroah-Hartman
2012-10-19 2:46 ` [ 24/76] SCSI: virtio-scsi: initialize scatterlist structure Greg Kroah-Hartman
2012-10-19 2:46 ` [ 25/76] ARM: 7541/1: Add ARM ERRATA 775420 workaround Greg Kroah-Hartman
2012-10-19 2:46 ` [ 26/76] ARM: OMAP: counter: add locking to read_persistent_clock Greg Kroah-Hartman
2012-10-19 2:46 ` [ 27/76] firewire: cdev: fix user memory corruption (i386 userland on amd64 kernel) Greg Kroah-Hartman
2012-10-19 2:46 ` [ 28/76] SUNRPC: Ensure that the TCP socket is closed when in CLOSE_WAIT Greg Kroah-Hartman
2012-10-19 2:46 ` [ 29/76] target: support zero allocation length in INQUIRY Greg Kroah-Hartman
2012-10-19 2:46 ` [ 30/76] target: fix truncation of mode data, support zero allocation length Greg Kroah-Hartman
2012-10-19 2:46 ` [ 31/76] target: fix return code in target_core_init_configfs error path Greg Kroah-Hartman
2012-10-19 2:46 ` [ 32/76] target/file: Re-enable optional fd_buffered_io=1 operation Greg Kroah-Hartman
2012-10-19 2:46 ` [ 33/76] qla2xxx: Fix endianness of task management response code Greg Kroah-Hartman
2012-10-19 2:46 ` [ 34/76] vfio: Move PCI INTx eventfd setting earlier Greg Kroah-Hartman
2012-10-19 2:46 ` [ 35/76] vfio: Fix PCI INTx disable consistency Greg Kroah-Hartman
2012-10-19 2:47 ` [ 36/76] xen/pv-on-hvm kexec: add quirk for Xen 3.4 and shutdown watches Greg Kroah-Hartman
2012-10-19 2:47 ` [ 37/76] xen/bootup: allow {read|write}_cr8 pvops call Greg Kroah-Hartman
2012-10-19 2:47 ` [ 38/76] xen/bootup: allow read_tscp call for Xen PV guests Greg Kroah-Hartman
2012-10-19 2:47 ` [ 39/76] block: fix request_queue->flags initialization Greg Kroah-Hartman
2012-10-19 2:47 ` [ 40/76] autofs4 - fix reset pending flag on mount fail Greg Kroah-Hartman
2012-10-19 2:47 ` [ 41/76] module: taint kernel when lve module is loaded Greg Kroah-Hartman
2012-10-19 2:47 ` [ 42/76] video/udlfb: fix line counting in fb_write Greg Kroah-Hartman
2012-10-19 2:47 ` [ 43/76] viafb: dont touch clock state on OLPC XO-1.5 Greg Kroah-Hartman
2012-10-19 2:47 ` [ 44/76] timekeeping: Cast raw_interval to u64 to avoid shift overflow Greg Kroah-Hartman
2012-10-19 2:47 ` [ 45/76] timers: Fix endless looping between cascade() and internal_add_timer() Greg Kroah-Hartman
2012-10-19 2:47 ` [ 46/76] nohz: Fix one jiffy count too far in idle cputime Greg Kroah-Hartman
2012-10-19 2:47 ` [ 47/76] ath9k: use ieee80211_free_txskb Greg Kroah-Hartman
2012-10-19 2:47 ` [ 48/76] mac80211: use ieee80211_free_txskb to fix possible skb leaks Greg Kroah-Hartman
2012-10-19 2:47 ` [ 49/76] md/raid10: use correct limit variable Greg Kroah-Hartman
2012-10-19 2:47 ` [ 50/76] kdb,vt_console: Fix missed data due to pager overruns Greg Kroah-Hartman
2012-10-19 2:47 ` [ 51/76] pktgen: fix crash when generating IPv6 packets Greg Kroah-Hartman
2012-10-19 2:47 ` [ 52/76] MIPS: ath79: Fix CPU/DDR frequency calculation for SRIF PLLs Greg Kroah-Hartman
2012-10-19 2:47 ` [ 53/76] kbuild: Fix accidental revert in commit fe04ddf Greg Kroah-Hartman
2012-10-19 2:47 ` [ 54/76] Add CDC-ACM support for the CX93010-2x UCMxx USB Modem Greg Kroah-Hartman
2012-10-19 2:47 ` [ 55/76] fs: handle failed audit_log_start properly Greg Kroah-Hartman
2012-10-19 2:47 ` [ 56/76] fs: prevent use after free in auditing when symlink following was denied Greg Kroah-Hartman
2012-10-19 2:47 ` [ 57/76] drm/radeon: Dont destroy I2C Bus Rec in radeon_ext_tmds_enc_destroy() Greg Kroah-Hartman
2012-10-19 2:47 ` [ 58/76] drm/i915: remove useless BUG_ON which caused a regression in 3.5 Greg Kroah-Hartman
2012-10-19 2:47 ` [ 59/76] drm/i915: Set guardband clipping workaround bit in the right register Greg Kroah-Hartman
2012-10-19 2:47 ` [ 60/76] drm/nouveau/bios: fix shadowing of ACPI ROMs larger than 64KiB Greg Kroah-Hartman
2012-10-19 16:01 ` Heinz Diehl
2012-10-19 17:48 ` Greg Kroah-Hartman
2012-10-19 19:11 ` Heinz Diehl
2012-10-21 16:31 ` Greg Kroah-Hartman
2012-10-19 2:47 ` [ 61/76] drm/i915: use adjusted_mode instead of mode for checking the 6bpc force flag Greg Kroah-Hartman
2012-10-19 2:47 ` [ 62/76] mcs7830: Fix link state detection Greg Kroah-Hartman
2012-10-19 2:47 ` [ 63/76] jbd: Fix assertion failure in commit code due to lacking transaction credits Greg Kroah-Hartman
2012-10-19 2:47 ` [ 64/76] mtd: nand: allow NAND_NO_SUBPAGE_WRITE to be set from driver Greg Kroah-Hartman
2012-10-19 2:47 ` [ 65/76] e1000e: Change wthresh to 1 to avoid possible Tx stalls Greg Kroah-Hartman
2012-10-19 2:47 ` [ 66/76] tpm: Propagate error from tpm_transmit to fix a timeout hang Greg Kroah-Hartman
2012-10-19 2:47 ` [ 67/76] usb: gadget: at91_udc: fix dt support Greg Kroah-Hartman
2012-10-19 2:47 ` [ 68/76] ALSA: hda - Fix registration race of VGA switcheroo Greg Kroah-Hartman
2012-10-19 2:47 ` [ 69/76] ALSA: hda - Stop LPIB delay counting on broken hardware Greg Kroah-Hartman
2012-10-19 2:47 ` [ 70/76] ALSA: hda - Always check array bounds in alc_get_line_out_pfx Greg Kroah-Hartman
2012-10-19 2:47 ` [ 71/76] ASoC: fsi: dont reschedule DMA from an atomic context Greg Kroah-Hartman
2012-10-19 2:47 ` [ 72/76] ASoC: wm2200: Use rev A register patches on rev B Greg Kroah-Hartman
2012-10-19 2:47 ` [ 73/76] ASoC: wm2200: Fix non-inverted OUT2 mute control Greg Kroah-Hartman
2012-10-19 2:47 ` [ 74/76] ASoC: omap-abe-twl6040: Fix typo of Vibrator Greg Kroah-Hartman
2012-10-19 2:47 ` [ 75/76] ALSA: ac97 - Fix missing NULL check in snd_ac97_cvol_new() Greg Kroah-Hartman
2012-10-19 2:47 ` [ 76/76] ALSA: emu10k1: add chip details for E-mu 1010 PCIe card Greg Kroah-Hartman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20121019024350.466211922@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=Trond.Myklebust@netapp.com \
--cc=alan@lxorguk.ukuu.org.uk \
--cc=linux-kernel@vger.kernel.org \
--cc=stable@vger.kernel.org \
--cc=tao.peng@emc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.