All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <jens.axboe@oracle.com>
To: linux-kernel@vger.kernel.org
Cc: zach.brown@oracle.com, Trond.Myklebust@netapp.com
Subject: Re: [PATCH 1/3] direct-io: make O_DIRECT IO path be page based
Date: Mon, 17 Aug 2009 14:11:38 +0200	[thread overview]
Message-ID: <20090817121137.GW12579@kernel.dk> (raw)
In-Reply-To: <1250505274-17108-2-git-send-email-jens.axboe@oracle.com>

On Mon, Aug 17 2009, Jens Axboe wrote:
> Switch the aops->direct_IO() and below code to use page arrays
> instead, so that it doesn't make any assumptions about who the pages
> belong to. This works directly for all users but NFS, which just
> uses the same helper that the generic mapping read/write functions
> also call.

A quick NFS test reveals a simple problem - it wants to free the
pagevec on IO completion, but for O_DIRECT it should not do that anymore
since it'll simply inherit the given page map.

There's a flag structure in the nfs_{read,write}_data structure, but I
was not able to find out which flags (if any?!) it actually uses. So I
just stuck an int in there. Trond?

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index aff6169..2913caf 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -302,7 +302,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		bytes = min(rsize,count);
 
 		result = -ENOMEM;
-		data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes));
+		data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes), 0);
 		if (unlikely(!data))
 			break;
 
@@ -699,7 +699,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		bytes = min(wsize,count);
 
 		result = -ENOMEM;
-		data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes));
+		data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes),0);
 		if (unlikely(!data))
 			break;
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 12c9e66..86f96a4 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -38,7 +38,8 @@ static mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ	(32)
 
-struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount,
+					 int need_pagevec)
 {
 	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS);
 
@@ -47,9 +48,11 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
 		INIT_LIST_HEAD(&p->pages);
 		p->npages = pagecount;
 		p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
-		if (pagecount <= ARRAY_SIZE(p->page_array))
+		if (!need_pagevec || pagecount <= ARRAY_SIZE(p->page_array)) {
 			p->pagevec = p->page_array;
-		else {
+			p->free_pagevec = 0;
+		} else {
+			p->free_pagevec = 1;
 			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
 			if (!p->pagevec) {
 				mempool_free(p, nfs_rdata_mempool);
@@ -62,7 +65,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
 
 void nfs_readdata_free(struct nfs_read_data *p)
 {
-	if (p && (p->pagevec != &p->page_array[0]))
+	if (p && p->free_pagevec)
 		kfree(p->pagevec);
 	mempool_free(p, nfs_rdata_mempool);
 }
@@ -256,7 +259,7 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
 	do {
 		size_t len = min(nbytes,rsize);
 
-		data = nfs_readdata_alloc(1);
+		data = nfs_readdata_alloc(1, 1);
 		if (!data)
 			goto out_bad;
 		list_add(&data->pages, &list);
@@ -306,7 +309,7 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
 	struct nfs_read_data	*data;
 	int ret = -ENOMEM;
 
-	data = nfs_readdata_alloc(npages);
+	data = nfs_readdata_alloc(npages, 1);
 	if (!data)
 		goto out_bad;
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index a34fae2..defb266 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -60,12 +60,13 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
 
 void nfs_commit_free(struct nfs_write_data *p)
 {
-	if (p && (p->pagevec != &p->page_array[0]))
+	if (p && p->free_pagevec)
 		kfree(p->pagevec);
 	mempool_free(p, nfs_commit_mempool);
 }
 
-struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount,
+					   int need_pagevec)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
 
@@ -74,9 +75,11 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 		INIT_LIST_HEAD(&p->pages);
 		p->npages = pagecount;
 		p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
-		if (pagecount <= ARRAY_SIZE(p->page_array))
+		if (!need_pagevec || pagecount <= ARRAY_SIZE(p->page_array)) {
 			p->pagevec = p->page_array;
-		else {
+			p->free_pagevec = 0;
+		} else {
+			p->free_pagevec = 1;
 			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
 			if (!p->pagevec) {
 				mempool_free(p, nfs_wdata_mempool);
@@ -89,7 +92,7 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 
 void nfs_writedata_free(struct nfs_write_data *p)
 {
-	if (p && (p->pagevec != &p->page_array[0]))
+	if (p && p->free_pagevec)
 		kfree(p->pagevec);
 	mempool_free(p, nfs_wdata_mempool);
 }
@@ -904,7 +907,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
 	do {
 		size_t len = min(nbytes, wsize);
 
-		data = nfs_writedata_alloc(1);
+		data = nfs_writedata_alloc(1, 1);
 		if (!data)
 			goto out_bad;
 		list_add(&data->pages, &list);
@@ -960,7 +963,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
 	struct page		**pages;
 	struct nfs_write_data	*data;
 
-	data = nfs_writedata_alloc(npages);
+	data = nfs_writedata_alloc(npages, 1);
 	if (!data)
 		goto out_bad;
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index dab9bf9..946317c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -498,7 +498,7 @@ nfs_have_writebacks(struct inode *inode)
 /*
  * Allocate nfs_write_data structures
  */
-extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages);
+extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages, int);
 extern void nfs_writedata_free(struct nfs_write_data *);
 
 /*
@@ -514,7 +514,7 @@ extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
 /*
  * Allocate nfs_read_data structures
  */
-extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages);
+extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages, int);
 extern void nfs_readdata_free(struct nfs_read_data *);
 
 /*
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 62f63fb..45dd955 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -954,6 +954,7 @@ struct nfs_read_data {
 	struct nfs_page		*req;	/* multi ops per nfs_page */
 	struct page		**pagevec;
 	unsigned int		npages;	/* Max length of pagevec */
+	int			free_pagevec;	/* free ->pagevec */
 	struct nfs_readargs args;
 	struct nfs_readres  res;
 #ifdef CONFIG_NFS_V4
@@ -973,6 +974,7 @@ struct nfs_write_data {
 	struct nfs_page		*req;		/* multi ops per nfs_page */
 	struct page		**pagevec;
 	unsigned int		npages;		/* Max length of pagevec */
+	int			free_pagevec;	/* free ->pagevec */
 	struct nfs_writeargs	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
 #ifdef CONFIG_NFS_V4

-- 
Jens Axboe


  parent reply	other threads:[~2009-08-17 12:11 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-08-17 10:34 [PATCH 0/3] Page based O_DIRECT and O_DIRECT loop Jens Axboe
2009-08-17 10:34 ` [PATCH 1/3] direct-io: make O_DIRECT IO path be page based Jens Axboe
2009-08-17 10:34   ` [PATCH 2/3] direct-io: add a "IO for kernel" flag to kiocb Jens Axboe
2009-08-17 10:34     ` [PATCH 3/3] loop: support O_DIRECT transfer mode Jens Axboe
2009-08-17 12:11   ` Jens Axboe [this message]
2009-08-20 13:08     ` [PATCH 1/3] direct-io: make O_DIRECT IO path be page based Trond Myklebust
2009-08-17 22:00 ` [PATCH 0/3] Page based O_DIRECT and O_DIRECT loop Christoph Hellwig
2009-08-18  6:40   ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090817121137.GW12579@kernel.dk \
    --to=jens.axboe@oracle.com \
    --cc=Trond.Myklebust@netapp.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=zach.brown@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.