stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg KH <gregkh@suse.de>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: torvalds@linux-foundation.org, akpm@linux-foundation.org,
	alan@lxorguk.ukuu.org.uk, Boaz Harrosh <bharrosh@panasas.com>
Subject: [21/49] ore: Must support none-PAGE-aligned IO
Date: Tue, 10 Jan 2012 13:55:24 -0800	[thread overview]
Message-ID: <20120110215603.362094983@clark.kroah.org> (raw)
In-Reply-To: <20120110215609.GA22505@kroah.com>

3.2-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Boaz Harrosh <bharrosh@panasas.com>

commit 724577ca355795b0a25c93ccbeee927871ca1a77 upstream.

NFS might send us offsets that are not PAGE aligned. So
we must read in the reminder of the first/last pages, in cases
we need it for Parity calculations.

We only add an sg segments to read the partial page. But
we don't mark it as read=true because it is a lock-for-write
page.

TODO: In some cases (IO spans a single unit) we can just
adjust the raid_unit offset/length, but this is left for
later Kernels.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 fs/exofs/ore_raid.c |   72 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 60 insertions(+), 12 deletions(-)

--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct or
 /* @si contains info of the to-be-inserted page. Update of @si should be
  * maintained by caller. Specificaly si->dev, si->obj_offset, ...
  */
-static int _add_to_read_4_write(struct ore_io_state *ios,
-				struct ore_striping_info *si, struct page *page)
+static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si,
+		       struct page *page, unsigned pg_len)
 {
 	struct request_queue *q;
 	struct ore_per_dev_state *per_dev;
@@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct o
 		_ore_add_sg_seg(per_dev, gap, true);
 	}
 	q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev));
-	added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0);
-	if (unlikely(added_len != PAGE_SIZE)) {
+	added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len,
+				    si->obj_offset % PAGE_SIZE);
+	if (unlikely(added_len != pg_len)) {
 		ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n",
 			      per_dev->bio->bi_vcnt);
 		return -ENOMEM;
 	}
 
-	per_dev->length += PAGE_SIZE;
+	per_dev->length += pg_len;
 	return 0;
 }
 
+/* read the beginning of an unaligned first page */
+static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page)
+{
+	struct ore_striping_info si;
+	unsigned pg_len;
+
+	ore_calc_stripe_info(ios->layout, ios->offset, 0, &si);
+
+	pg_len = si.obj_offset % PAGE_SIZE;
+	si.obj_offset -= pg_len;
+
+	ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n",
+		   _LLU(si.obj_offset), pg_len, page->index, si.dev);
+
+	return _add_to_r4w(ios, &si, page, pg_len);
+}
+
+/* read the end of an incomplete last page */
+static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset)
+{
+	struct ore_striping_info si;
+	struct page *page;
+	unsigned pg_len, p, c;
+
+	ore_calc_stripe_info(ios->layout, *offset, 0, &si);
+
+	p = si.unit_off / PAGE_SIZE;
+	c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
+		       ios->layout->mirrors_p1, si.par_dev, si.dev);
+	page = ios->sp2d->_1p_stripes[p].pages[c];
+
+	pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE);
+	*offset += pg_len;
+
+	ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n",
+		   p, c, _LLU(*offset), pg_len, si.dev, si.par_dev);
+
+	BUG_ON(!page);
+
+	return _add_to_r4w(ios, &si, page, pg_len);
+}
+
 static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
 {
 	struct bio_vec *bv;
@@ -444,9 +487,13 @@ static int _read_4_write(struct ore_io_s
 			struct page **pp = &_1ps->pages[c];
 			bool uptodate;
 
-			if (*pp)
+			if (*pp) {
+				if (ios->offset % PAGE_SIZE)
+					/* Read the remainder of the page */
+					_add_to_r4w_first_page(ios, *pp);
 				/* to-be-written pages start here */
 				goto read_last_stripe;
+			}
 
 			*pp = ios->r4w->get_page(ios->private, offset,
 						 &uptodate);
@@ -454,7 +501,7 @@ static int _read_4_write(struct ore_io_s
 				return -ENOMEM;
 
 			if (!uptodate)
-				_add_to_read_4_write(ios, &read_si, *pp);
+				_add_to_r4w(ios, &read_si, *pp, PAGE_SIZE);
 
 			/* Mark read-pages to be cache_released */
 			_1ps->page_is_read[c] = true;
@@ -465,8 +512,11 @@ static int _read_4_write(struct ore_io_s
 	}
 
 read_last_stripe:
-	offset = ios->offset + (ios->length + PAGE_SIZE - 1) /
-				PAGE_SIZE * PAGE_SIZE;
+	offset = ios->offset + ios->length;
+	if (offset % PAGE_SIZE)
+		_add_to_r4w_last_page(ios, &offset);
+		/* offset will be aligned to next page */
+
 	last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe)
 				 * bytes_in_stripe;
 	if (offset == last_stripe_end) /* Optimize for the aligned case */
@@ -503,7 +553,7 @@ read_last_stripe:
 			/* Mark read-pages to be cache_released */
 			_1ps->page_is_read[c] = true;
 			if (!uptodate)
-				_add_to_read_4_write(ios, &read_si, page);
+				_add_to_r4w(ios, &read_si, page, PAGE_SIZE);
 		}
 
 		offset += PAGE_SIZE;
@@ -616,8 +666,6 @@ int _ore_post_alloc_raid_stuff(struct or
 			return -ENOMEM;
 		}
 
-		BUG_ON(ios->offset % PAGE_SIZE);
-
 		/* Round io down to last full strip */
 		first_stripe = div_u64(ios->offset, stripe_size);
 		last_stripe = div_u64(ios->offset + ios->length, stripe_size);



  parent reply	other threads:[~2012-01-10 21:55 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-10 21:56 [00/49] 3.2.1-stable review Greg KH
2012-01-10 21:55 ` [01/49] MAINTAINERS: stable: Update address Greg KH
2012-01-10 21:55 ` [02/49] Documentation: Update stable address Greg KH
2012-01-10 21:55 ` [03/49] firmware: Fix an oops on reading fw_priv->fw in sysfs loading file Greg KH
2012-01-10 21:55 ` [04/49] rt2800usb: Move ID out of unknown Greg KH
2012-01-10 21:55 ` [05/49] offb: Fix setting of the pseudo-palette for >8bpp Greg KH
2012-01-10 21:55 ` [06/49] offb: Fix bug in calculating requested vram size Greg KH
2012-01-10 21:55 ` [07/49] libertas: clean up scan thread handling Greg KH
2012-01-10 21:55 ` [08/49] bcma: support for suspend and resume Greg KH
2012-01-10 21:55 ` [09/49] wl12xx: Validate FEM index from ini file and FW Greg KH
2012-01-10 21:55 ` [10/49] wl12xx: Check buffer bound when processing nvs data Greg KH
2012-01-10 21:55 ` [11/49] wl12xx: Restore testmode ABI Greg KH
2012-01-10 21:55 ` [12/49] powerpc/time: Handle wrapping of decrementer Greg KH
2012-01-10 21:55 ` [13/49] powerpc: Fix unpaired probe_hcall_entry and probe_hcall_exit Greg KH
2012-01-10 21:55 ` [14/49] IB/qib: Fix a possible data corruption when receiving packets Greg KH
2012-01-10 21:55 ` [15/49] IB/uverbs: Protect QP multicast list Greg KH
2012-01-10 21:55 ` [16/49] iwlagn: fix TID use bug Greg KH
2012-01-10 21:55 ` [17/49] iwlagn: fix (remove) use of PAGE_SIZE Greg KH
2012-01-10 21:55 ` [18/49] perf: Fix parsing of __print_flags() in TP_printk() Greg KH
2012-01-10 21:55 ` [19/49] ore: Fix crash in case of an IO error Greg KH
2012-01-10 21:55 ` [20/49] ore: fix BUG_ON, too few sgs when reading Greg KH
2012-01-10 21:55 ` Greg KH [this message]
2012-01-10 21:55 ` [22/49] ore: FIX breakage when MISC_FILESYSTEMS is not set Greg KH
2012-01-10 21:55 ` [23/49] reiserfs: Fix quota mount option parsing Greg KH
2012-01-10 21:55 ` [24/49] reiserfs: Force inode evictions before umount to avoid crash Greg KH
2012-01-10 21:55 ` [25/49] ext3: Dont warn from writepage when readonly inode is spotted after error Greg KH
2012-01-10 21:55 ` [26/49] drivers: hv: Dont OOPS when you cannot init vmbus Greg KH
2012-01-10 21:55 ` [27/49] Drivers:hv: Fix a bug in vmbus_driver_unregister() Greg KH
2012-01-10 21:55 ` [28/49] USB: update documentation for usbmon Greg KH
2012-01-10 21:55 ` [29/49] usbfs: Fix oops related to user namespace conversion Greg KH
2012-01-10 21:55 ` [30/49] atmel_serial: fix spinlock lockup in RS485 code Greg KH
2012-01-10 21:55 ` [31/49] cgroup: fix to allow mounting a hierarchy by name Greg KH
2012-01-10 21:55 ` [32/49] udf: Fix deadlock when converting file from in-ICB one to normal one Greg KH
2012-01-10 21:55 ` [33/49] drivers/usb/class/cdc-acm.c: clear dangling pointer Greg KH
2012-01-10 21:55 ` [34/49] USB: isight: fix kernel bug when loading firmware Greg KH
2012-01-10 21:55 ` [35/49] usb: usb-storage doesnt support dynamic id currently, the patch disables the feature to fix an oops Greg KH
2012-01-10 21:55 ` [36/49] USB: pxa168: Fix compilation error Greg KH
2012-01-10 21:55 ` [37/49] USB: add quirk for another camera Greg KH
2012-01-10 21:55 ` [38/49] usb: musb: fix pm_runtime mismatch Greg KH
2012-01-10 21:55 ` [39/49] USB: omninet: fix write_room Greg KH
2012-01-10 21:55 ` [40/49] usb: option: add ZD Incorporated HSPA modem Greg KH
2012-01-10 21:55 ` [41/49] USB: Add USB-ID for Multiplex RC serial adapter to cp210x.c Greg KH
2012-01-10 21:55 ` [42/49] usb: fix number of mapped SG DMA entries Greg KH
2012-01-10 21:55 ` [43/49] xhci: Properly handle COMP_2ND_BW_ERR Greg KH
2012-01-10 21:55 ` [44/49] usb: ch9: fix up MaxStreams helper Greg KH
2012-01-10 21:55 ` [45/49] igmp: Avoid zero delay when receiving odd mixture of IGMP queries Greg KH
2012-01-10 21:55 ` [46/49] asix: fix infinite loop in rx_fixup() Greg KH
2012-01-10 21:55 ` [47/49] bonding: fix error handling if slave is busy (v2) Greg KH
2012-01-10 21:55 ` [48/49] usb: cdc-acm: Fix acm_tty_hangup() vs. acm_tty_close() race Greg KH
2012-01-10 21:55 ` [49/49] xfs: fix acl count validation in xfs_acl_from_disk() Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120110215603.362094983@clark.kroah.org \
    --to=gregkh@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=bharrosh@panasas.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).