linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: James Simmons <jsimmons@infradead.org>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	devel@driverdev.osuosl.org,
	Andreas Dilger <andreas.dilger@intel.com>,
	Oleg Drokin <oleg.drokin@intel.com>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Lustre Development List <lustre-devel@lists.lustre.org>,
	Jinshan Xiong <jinshan.xiong@intel.com>,
	James Simmons <jsimmons@infradead.org>
Subject: [PATCH 08/22] staging: lustre: clio: revise read ahead algorithm
Date: Fri,  2 Dec 2016 19:53:15 -0500	[thread overview]
Message-ID: <1480726409-20350-9-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1480726409-20350-1-git-send-email-jsimmons@infradead.org>

From: Jinshan Xiong <jinshan.xiong@intel.com>

ras_window_len should only be updated in ras_update() by read
pattern and it can't be adjusted in ll_readahead() at all;
ras_consecutive_pages is used to detect read pattern from
mmap. It will be used to increase read ahead window length
gradually.

Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-5505
Reviewed-on: http://review.whamcloud.com/11528
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 .../staging/lustre/lustre/llite/llite_internal.h   |    5 +-
 drivers/staging/lustre/lustre/llite/rw.c           |   71 +++++++++++---------
 2 files changed, 43 insertions(+), 33 deletions(-)

diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index ae0bb09..e37ba1f 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -1005,8 +1005,11 @@ int ll_xattr_list(struct inode *inode, const char *name, int type,
  */
 int cl_sb_init(struct super_block *sb);
 int cl_sb_fini(struct super_block *sb);
-void ll_io_init(struct cl_io *io, const struct file *file, int write);
 
+enum ras_update_flags {
+	LL_RAS_HIT  = 0x1,
+	LL_RAS_MMAP = 0x2
+};
 void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len);
 void ll_ra_stats_inc(struct inode *inode, enum ra_stat which);
 
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index e34017d..e2d5e75 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -457,30 +457,25 @@ static int ll_readahead(const struct lu_env *env, struct cl_io *io,
 
 	spin_lock(&ras->ras_lock);
 
-	/* Enlarge the RA window to encompass the full read */
-	if (vio->vui_ra_valid &&
-	    ras->ras_window_start + ras->ras_window_len <
-	    vio->vui_ra_start + vio->vui_ra_count) {
-		ras->ras_window_len = vio->vui_ra_start + vio->vui_ra_count -
-				      ras->ras_window_start;
-	}
+	/**
+	 * Note: other thread might rollback the ras_next_readahead,
+	 * if it can not get the full size of prepared pages, see the
+	 * end of this function. For stride read ahead, it needs to
+	 * make sure the offset is no less than ras_stride_offset,
+	 * so that stride read ahead can work correctly.
+	 */
+	if (stride_io_mode(ras))
+		start = max(ras->ras_next_readahead, ras->ras_stride_offset);
+	else
+		start = ras->ras_next_readahead;
 
-	/* Reserve a part of the read-ahead window that we'll be issuing */
-	if (ras->ras_window_len > 0) {
-		/*
-		 * Note: other thread might rollback the ras_next_readahead,
-		 * if it can not get the full size of prepared pages, see the
-		 * end of this function. For stride read ahead, it needs to
-		 * make sure the offset is no less than ras_stride_offset,
-		 * so that stride read ahead can work correctly.
-		 */
-		if (stride_io_mode(ras))
-			start = max(ras->ras_next_readahead,
-				    ras->ras_stride_offset);
-		else
-			start = ras->ras_next_readahead;
+	if (ras->ras_window_len > 0)
 		end = ras->ras_window_start + ras->ras_window_len - 1;
-	}
+
+	/* Enlarge the RA window to encompass the full read */
+	if (vio->vui_ra_valid &&
+	    end < vio->vui_ra_start + vio->vui_ra_count - 1)
+		end = vio->vui_ra_start + vio->vui_ra_count - 1;
 
 	if (end != 0) {
 		unsigned long rpc_boundary;
@@ -602,7 +597,7 @@ static void ras_reset(struct inode *inode, struct ll_readahead_state *ras,
 	ras->ras_consecutive_pages = 0;
 	ras->ras_window_len = 0;
 	ras_set_start(inode, ras, index);
-	ras->ras_next_readahead = max(ras->ras_window_start, index);
+	ras->ras_next_readahead = max(ras->ras_window_start, index + 1);
 
 	RAS_CDEBUG(ras);
 }
@@ -733,10 +728,11 @@ static void ras_increase_window(struct inode *inode,
 
 static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
 		       struct ll_readahead_state *ras, unsigned long index,
-		       unsigned int hit)
+		       enum ras_update_flags flags)
 {
 	struct ll_ra_info *ra = &sbi->ll_ra_info;
 	int zero = 0, stride_detect = 0, ra_miss = 0;
+	bool hit = flags & LL_RAS_HIT;
 
 	spin_lock(&ras->ras_lock);
 
@@ -766,7 +762,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
 	 * to for subsequent IO.  The mmap case does not increment
 	 * ras_requests and thus can never trigger this behavior.
 	 */
-	if (ras->ras_requests == 2 && !ras->ras_request_index) {
+	if (ras->ras_requests >= 2 && !ras->ras_request_index) {
 		__u64 kms_pages;
 
 		kms_pages = (i_size_read(inode) + PAGE_SIZE - 1) >>
@@ -778,8 +774,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
 		if (kms_pages &&
 		    kms_pages <= ra->ra_max_read_ahead_whole_pages) {
 			ras->ras_window_start = 0;
-			ras->ras_last_readpage = 0;
-			ras->ras_next_readahead = 0;
+			ras->ras_next_readahead = index + 1;
 			ras->ras_window_len = min(ra->ra_max_pages_per_file,
 				ra->ra_max_read_ahead_whole_pages);
 			goto out_unlock;
@@ -867,8 +862,13 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
 	/* Trigger RA in the mmap case where ras_consecutive_requests
 	 * is not incremented and thus can't be used to trigger RA
 	 */
-	if (!ras->ras_window_len && ras->ras_consecutive_pages == 4) {
-		ras->ras_window_len = RAS_INCREASE_STEP(inode);
+	if (ras->ras_consecutive_pages >= 4 && flags & LL_RAS_MMAP) {
+		ras_increase_window(inode, ras, ra);
+		/*
+		 * reset consecutive pages so that the readahead window can
+		 * grow gradually.
+		 */
+		ras->ras_consecutive_pages = 0;
 		goto out_unlock;
 	}
 
@@ -1101,9 +1101,16 @@ static int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
 
 	vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page));
 	if (sbi->ll_ra_info.ra_max_pages_per_file > 0 &&
-	    sbi->ll_ra_info.ra_max_pages > 0)
-		ras_update(sbi, inode, ras, vvp_index(vpg),
-			   vpg->vpg_defer_uptodate);
+	    sbi->ll_ra_info.ra_max_pages > 0) {
+		struct vvp_io *vio = vvp_env_io(env);
+		enum ras_update_flags flags = 0;
+
+		if (vpg->vpg_defer_uptodate)
+			flags |= LL_RAS_HIT;
+		if (!vio->vui_ra_valid)
+			flags |= LL_RAS_MMAP;
+		ras_update(sbi, inode, ras, vvp_index(vpg), flags);
+	}
 
 	if (vpg->vpg_defer_uptodate) {
 		vpg->vpg_ra_used = 1;
-- 
1.7.1

  parent reply	other threads:[~2016-12-03  0:56 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-03  0:53 [PATCH 00/22] Next batch of missing work for upstream client James Simmons
2016-12-03  0:53 ` [PATCH 01/22] staging: lustre: llite: clear LLIF_DATA_MODIFIED in atomic James Simmons
2016-12-03  0:53 ` [PATCH 02/22] staging: lustre: osc: fix debug log message formatting James Simmons
2016-12-03  0:53 ` [PATCH 03/22] staging: lustre: mdt: race between open and migrate James Simmons
2016-12-03  0:53 ` [PATCH 04/22] staging: lustre: osc: handle osc eviction correctly James Simmons
2016-12-05 20:55   ` Dan Carpenter
2016-12-05 23:03     ` Oleg Drokin
2016-12-07 23:16       ` James Simmons
2016-12-03  0:53 ` [PATCH 05/22] staging: lustre: lmv: remove nlink check in lmv_revalidate_slaves James Simmons
2016-12-05 20:57   ` Dan Carpenter
2016-12-03  0:53 ` [PATCH 06/22] staging: lustre: llog: reset llog bitmap James Simmons
2016-12-03  0:53 ` [PATCH 07/22] staging: lustre: obdclass: lu_site_purge() to handle purge-all James Simmons
2016-12-03  0:53 ` James Simmons [this message]
2016-12-03  0:53 ` [PATCH 09/22] staging: lustre: llite: Add client mount opt to ignore suppress_pings James Simmons
2016-12-03  0:53 ` [PATCH 10/22] staging: lustre: obdclass: limit lu_site hash table size on clients James Simmons
2016-12-03  0:53 ` [PATCH 11/22] staging: lustre: mdt: fail FMODE_WRITE open if the client is read only James Simmons
2016-12-03  0:53 ` [PATCH 12/22] staging: lustre: libcfs: report hnode value for cfs_hash_putref James Simmons
2016-12-03  0:53 ` [PATCH 13/22] staging: lustre: statahead: set sai_index_wait with lli_sa_lock held James Simmons
2016-12-03  0:53 ` [PATCH 14/22] staging: lustre: obd: add callback for llog_cat_process_or_fork James Simmons
2016-12-06  9:59   ` Greg Kroah-Hartman
2016-12-03  0:53 ` [PATCH 15/22] staging: lustre: rpc: increase bulk size James Simmons
2016-12-03  0:53 ` [PATCH 16/22] staging: lustre: llite: Invoke file_update_time in page_mkwrite James Simmons
2016-12-03  0:53 ` [PATCH 17/22] staging: lustre: clio: remove mtime check in vvp_io_fault_start() James Simmons
2016-12-03  0:53 ` [PATCH 18/22] staging: lustre: import: don't reconnect during connect interpret James Simmons
2016-12-03  0:53 ` [PATCH 19/22] staging: lustre: llite: ll_dir_ioctl cleanup of redundant comparisons James Simmons
2016-12-03  0:53 ` [PATCH 20/22] staging: lustre: osc: set lock data for readahead lock James Simmons
2016-12-03  0:53 ` [PATCH 21/22] staging: lustre: remove set but unused variables James Simmons
2016-12-03  0:53 ` [PATCH 22/22] staging: lustre: libcfs: remove lnet upcall code James Simmons
2016-12-06 10:00 ` [PATCH 00/22] Next batch of missing work for upstream client Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1480726409-20350-9-git-send-email-jsimmons@infradead.org \
    --to=jsimmons@infradead.org \
    --cc=andreas.dilger@intel.com \
    --cc=devel@driverdev.osuosl.org \
    --cc=gregkh@linuxfoundation.org \
    --cc=jinshan.xiong@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lustre-devel@lists.lustre.org \
    --cc=oleg.drokin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).