* [PATCH] Retry Large Buffer Allocations
@ 2011-04-05 18:34 Grant Erickson
2011-04-05 19:31 ` Artem Bityutskiy
0 siblings, 1 reply; 4+ messages in thread
From: Grant Erickson @ 2011-04-05 18:34 UTC (permalink / raw)
To: linux-mtd; +Cc: Jarkko Lavinen, Artem Bityutskiy
When handling user space read or write requests via mtd_{read,write}
or JFFS2 medium scan requests, exponentially back off on the size of
the requested kernel transfer buffer until it succeeds or until the
requested transfer buffer size falls below the page size.
This helps ensure the operation can succeed under low-memory,
highly-fragmented situations albeit somewhat more slowly.
---
drivers/mtd/mtdchar.c | 50 +++++++++++++++++++++-------------------------
drivers/mtd/mtdcore.c | 31 +++++++++++++++++++++++++++++
fs/jffs2/scan.c | 11 +++++----
include/linux/mtd/mtd.h | 2 +
4 files changed, 62 insertions(+), 32 deletions(-)
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 145b3d0d..fd05a78 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -166,10 +166,23 @@ static int mtd_close(struct inode *inode, struct file *file)
return 0;
} /* mtd_close */
-/* FIXME: This _really_ needs to die. In 2.5, we should lock the
- userspace buffer down and use it directly with readv/writev.
-*/
-#define MAX_KMALLOC_SIZE 0x20000
+/* Back in April 2005, Linus wrote:
+ *
+ * FIXME: This _really_ needs to die. In 2.5, we should lock the
+ * userspace buffer down and use it directly with readv/writev.
+ *
+ * The implementation below, using mtd_alloc_up_to, mitigates
+ * allocation failures when the system is under low-memory situations
+ * or if memory is highly fragmented at the cost of reducing the
+ * performance of the requested transfer due to a smaller buffer size.
+ *
+ * A more complex but more memory-efficient implementation based on
+ * get_user_pages and iovecs to cover extents of those pages is a
+ * longer-term goal, as intimated by Linus above. However, for the
+ * write case, this requires yet more complex head and tail transfer
+ * handling when those head and tail offsets and sizes are such that
+ * alignment requirements are not met in the NAND subdriver.
+ */
static ssize_t mtd_read(struct file *file, char __user *buf, size_t count,loff_t *ppos)
{
@@ -179,6 +192,7 @@ static ssize_t mtd_read(struct file *file, char __user *buf, size_t count,loff_t
size_t total_retlen=0;
int ret=0;
int len;
+ size_t size = count;
char *kbuf;
DEBUG(MTD_DEBUG_LEVEL0,"MTD_read\n");
@@ -189,23 +203,12 @@ static ssize_t mtd_read(struct file *file, char __user *buf, size_t count,loff_t
if (!count)
return 0;
- /* FIXME: Use kiovec in 2.5 to lock down the user's buffers
- and pass them directly to the MTD functions */
-
- if (count > MAX_KMALLOC_SIZE)
- kbuf=kmalloc(MAX_KMALLOC_SIZE, GFP_KERNEL);
- else
- kbuf=kmalloc(count, GFP_KERNEL);
-
+ kbuf = mtd_alloc_up_to(&size);
if (!kbuf)
return -ENOMEM;
while (count) {
-
- if (count > MAX_KMALLOC_SIZE)
- len = MAX_KMALLOC_SIZE;
- else
- len = count;
+ len = min_t(size_t, count, size);
switch (mfi->mode) {
case MTD_MODE_OTP_FACTORY:
@@ -268,6 +271,7 @@ static ssize_t mtd_write(struct file *file, const char __user *buf, size_t count
{
struct mtd_file_info *mfi = file->private_data;
struct mtd_info *mtd = mfi->mtd;
+ size_t size = count;
char *kbuf;
size_t retlen;
size_t total_retlen=0;
@@ -285,20 +289,12 @@ static ssize_t mtd_write(struct file *file, const char __user *buf, size_t count
if (!count)
return 0;
- if (count > MAX_KMALLOC_SIZE)
- kbuf=kmalloc(MAX_KMALLOC_SIZE, GFP_KERNEL);
- else
- kbuf=kmalloc(count, GFP_KERNEL);
-
+ kbuf = mtd_alloc_up_to(&size);
if (!kbuf)
return -ENOMEM;
while (count) {
-
- if (count > MAX_KMALLOC_SIZE)
- len = MAX_KMALLOC_SIZE;
- else
- len = count;
+ len = min_t(size_t, count, size);
if (copy_from_user(kbuf, buf, len)) {
kfree(kbuf);
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index da69bc8..d102b96 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -638,6 +638,36 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
return ret;
}
+/* mtd_alloc_up_to - called, for example by mtd_{read,write} and
+ * jffs2_scan_medium, to handle smaller (i.e. degraded) buffer
+ * allocations under low- or fragmented-memory situations where
+ * such reduced allocations, from a requested ideal, are allowed.
+ */
+
+#define MAX_KMALLOC_SIZE 0x20000
+
+void *mtd_alloc_up_to(size_t *size)
+{
+ gfp_t flags;
+ size_t try;
+ void *kbuf;
+
+ try = min_t(size_t, *size, MAX_KMALLOC_SIZE);
+
+ do {
+ flags = ((size == PAGE_SIZE) ?
+ GFP_KERNEL :
+ (__GFP_NOWARN | __GFP_WAIT |
+ __GFP_NORETRY | __GFP_NO_KSWAPD));
+
+ kbuf = kmalloc(try, flags);
+ } while (!kbuf && ((try >>= 1) >= PAGE_SIZE));
+
+ *size = try;
+
+ return kbuf;
+}
+
EXPORT_SYMBOL_GPL(add_mtd_device);
EXPORT_SYMBOL_GPL(del_mtd_device);
EXPORT_SYMBOL_GPL(get_mtd_device);
@@ -648,6 +678,7 @@ EXPORT_SYMBOL_GPL(__put_mtd_device);
EXPORT_SYMBOL_GPL(register_mtd_user);
EXPORT_SYMBOL_GPL(unregister_mtd_user);
EXPORT_SYMBOL_GPL(default_mtd_writev);
+EXPORT_SYMBOL_GPL(mtd_alloc_up_to);
#ifdef CONFIG_PROC_FS
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index b632ddd..42a28b1 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -117,14 +117,15 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
else
buf_size = PAGE_SIZE;
- /* Respect kmalloc limitations */
- if (buf_size > 128*1024)
- buf_size = 128*1024;
+ D1(printk(KERN_DEBUG "Trying to allocate readbuf of %d "
+ "bytes\n", buf_size));
- D1(printk(KERN_DEBUG "Allocating readbuf of %d bytes\n", buf_size));
- flashbuf = kmalloc(buf_size, GFP_KERNEL);
+ flashbuf = mtd_alloc_up_to(&buf_size);
if (!flashbuf)
return -ENOMEM;
+
+ D1(printk(KERN_DEBUG "Allocated readbuf of %d bytes\n",
+ buf_size));
}
if (jffs2_sum_active()) {
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 9d5306b..44c4c45 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -348,7 +348,8 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
int default_mtd_readv(struct mtd_info *mtd, struct kvec *vecs,
unsigned long count, loff_t from, size_t *retlen);
+void *mtd_alloc_up_to(size_t *size);
+
#ifdef CONFIG_MTD_PARTITIONS
void mtd_erase_callback(struct erase_info *instr);
#else
--
1.7.4.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] Retry Large Buffer Allocations
2011-04-05 18:34 [PATCH] Retry Large Buffer Allocations Grant Erickson
@ 2011-04-05 19:31 ` Artem Bityutskiy
0 siblings, 0 replies; 4+ messages in thread
From: Artem Bityutskiy @ 2011-04-05 19:31 UTC (permalink / raw)
To: Grant Erickson; +Cc: Jarkko Lavinen, linux-mtd
Just partial review before I go to bed, to keep you busy :-)))
On Tue, 2011-04-05 at 11:34 -0700, Grant Erickson wrote:
> kfree(kbuf);
> diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
> index da69bc8..d102b96 100644
> --- a/drivers/mtd/mtdcore.c
> +++ b/drivers/mtd/mtdcore.c
> @@ -638,6 +638,36 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
> return ret;
> }
>
> +/* mtd_alloc_up_to - called, for example by mtd_{read,write} and
> + * jffs2_scan_medium, to handle smaller (i.e. degraded) buffer
> + * allocations under low- or fragmented-memory situations where
> + * such reduced allocations, from a requested ideal, are allowed.
> + */
Please, make the comment to be of proper style. See the CodingStyle
file. Or even better - make it proper kerneldoc comment, if you can. I'm
sure you'll find the rules in the "Documentation" directory, but this is
optional. Also, please, put the comment just above the function.
> +#define MAX_KMALLOC_SIZE 0x20000
> +
> +void *mtd_alloc_up_to(size_t *size)
> +{
> + gfp_t flags;
> + size_t try;
> + void *kbuf;
> +
> + try = min_t(size_t, *size, MAX_KMALLOC_SIZE);
> +
> + do {
> + flags = ((size == PAGE_SIZE) ?
s/size/try/
> + GFP_KERNEL :
> + (__GFP_NOWARN | __GFP_WAIT |
> + __GFP_NORETRY | __GFP_NO_KSWAPD));
Sorry, but this construct is not beautiful and difficult to read. I'd
call it abuse of ?: operators :-) I think it is much more readable to
simply:
1. initialize gfp_t flags to (__GFP_NOWARN .... ) when you initialize it
or just before the loop.
2. Do
if (try == PAGE_SIZE)
flags = GFP_KERNEL;
Simple and readable.
And do not forget to check the code with checkpatch.pl before sending
out :-)
Good night!
--
Best Regards,
Artem Bityutskiy (Битюцкий Артём)
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] Retry Large Buffer Allocations
@ 2011-04-05 20:05 Grant Erickson
2011-04-06 8:47 ` Artem Bityutskiy
0 siblings, 1 reply; 4+ messages in thread
From: Grant Erickson @ 2011-04-05 20:05 UTC (permalink / raw)
To: linux-mtd; +Cc: Jarkko Lavinen, Artem Bityutskiy
When handling user space read or write requests via mtd_{read,write}
or JFFS2 medium scan requests, exponentially back off on the size of
the requested kernel transfer buffer until it succeeds or until the
requested transfer buffer size falls below the page size.
This helps ensure the operation can succeed under low-memory,
highly-fragmented situations albeit somewhat more slowly.
v2: Incorporated coding style and comment feedback from Artem.
Signed-off-by: Grant Erickson <marathon96@gmail.com>
---
drivers/mtd/mtdchar.c fs/jffs2/scan.c
drivers/mtd/mtdchar.c | 50 +++++++++++++++++++++-------------------------
drivers/mtd/mtdcore.c | 41 ++++++++++++++++++++++++++++++++++++++
fs/jffs2/scan.c | 11 +++++----
include/linux/mtd/mtd.h | 2 +
4 files changed, 72 insertions(+), 32 deletions(-)
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 145b3d0d..fd05a78 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -166,10 +166,23 @@ static int mtd_close(struct inode *inode, struct file *file)
return 0;
} /* mtd_close */
-/* FIXME: This _really_ needs to die. In 2.5, we should lock the
- userspace buffer down and use it directly with readv/writev.
-*/
-#define MAX_KMALLOC_SIZE 0x20000
+/* Back in April 2005, Linus wrote:
+ *
+ * FIXME: This _really_ needs to die. In 2.5, we should lock the
+ * userspace buffer down and use it directly with readv/writev.
+ *
+ * The implementation below, using mtd_alloc_up_to, mitigates
+ * allocation failures when the system is under low-memory situations
+ * or if memory is highly fragmented at the cost of reducing the
+ * performance of the requested transfer due to a smaller buffer size.
+ *
+ * A more complex but more memory-efficient implementation based on
+ * get_user_pages and iovecs to cover extents of those pages is a
+ * longer-term goal, as intimated by Linus above. However, for the
+ * write case, this requires yet more complex head and tail transfer
+ * handling when those head and tail offsets and sizes are such that
+ * alignment requirements are not met in the NAND subdriver.
+ */
static ssize_t mtd_read(struct file *file, char __user *buf, size_t count,loff_t *ppos)
{
@@ -179,6 +192,7 @@ static ssize_t mtd_read(struct file *file, char __user *buf, size_t count,loff_t
size_t total_retlen=0;
int ret=0;
int len;
+ size_t size = count;
char *kbuf;
DEBUG(MTD_DEBUG_LEVEL0,"MTD_read\n");
@@ -189,23 +203,12 @@ static ssize_t mtd_read(struct file *file, char __user *buf, size_t count,loff_t
if (!count)
return 0;
- /* FIXME: Use kiovec in 2.5 to lock down the user's buffers
- and pass them directly to the MTD functions */
-
- if (count > MAX_KMALLOC_SIZE)
- kbuf=kmalloc(MAX_KMALLOC_SIZE, GFP_KERNEL);
- else
- kbuf=kmalloc(count, GFP_KERNEL);
-
+ kbuf = mtd_alloc_up_to(&size);
if (!kbuf)
return -ENOMEM;
while (count) {
-
- if (count > MAX_KMALLOC_SIZE)
- len = MAX_KMALLOC_SIZE;
- else
- len = count;
+ len = min_t(size_t, count, size);
switch (mfi->mode) {
case MTD_MODE_OTP_FACTORY:
@@ -268,6 +271,7 @@ static ssize_t mtd_write(struct file *file, const char __user *buf, size_t count
{
struct mtd_file_info *mfi = file->private_data;
struct mtd_info *mtd = mfi->mtd;
+ size_t size = count;
char *kbuf;
size_t retlen;
size_t total_retlen=0;
@@ -285,20 +289,12 @@ static ssize_t mtd_write(struct file *file, const char __user *buf, size_t count
if (!count)
return 0;
- if (count > MAX_KMALLOC_SIZE)
- kbuf=kmalloc(MAX_KMALLOC_SIZE, GFP_KERNEL);
- else
- kbuf=kmalloc(count, GFP_KERNEL);
-
+ kbuf = mtd_alloc_up_to(&size);
if (!kbuf)
return -ENOMEM;
while (count) {
-
- if (count > MAX_KMALLOC_SIZE)
- len = MAX_KMALLOC_SIZE;
- else
- len = count;
+ len = min_t(size_t, count, size);
if (copy_from_user(kbuf, buf, len)) {
kfree(kbuf);
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index da69bc8..54d9fcc 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -638,6 +638,46 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
return ret;
}
+#define MAX_KMALLOC_SIZE 0x20000
+
+/**
+ * mtd_alloc_up_to - allocate a contiguous buffer up to the specified size
+ * @size: A pointer to the ideal or maximum size of the allocation. Points
+ * to the actual allocation size on success.
+ *
+ * This routine attempts to allocate a contiguous kernel buffer up to
+ * the specified size, backing off the size of the request exponentially
+ * until the request succeeds or until the allocation size falls below
+ * the system page size.
+ *
+ * This is called, for example by mtd_{read,write} and jffs2_scan_medium,
+ * to handle smaller (i.e. degraded) buffer allocations under low- or
+ * fragmented-memory situations where such reduced allocations, from a
+ * requested ideal, are allowed.
+ *
+ * Returns a pointer to the allocated buffer on success; otherwise, NULL.
+ */
+void *mtd_alloc_up_to(size_t *size)
+{
+ gfp_t flags = (__GFP_NOWARN | __GFP_WAIT |
+ __GFP_NORETRY | __GFP_NO_KSWAPD);
+ size_t try;
+ void *kbuf;
+
+ try = min_t(size_t, *size, MAX_KMALLOC_SIZE);
+
+ do {
+ if (try == PAGE_SIZE)
+ flags = GFP_KERNEL;
+
+ kbuf = kmalloc(try, flags);
+ } while (!kbuf && ((try >>= 1) >= PAGE_SIZE));
+
+ *size = try;
+
+ return kbuf;
+}
+
EXPORT_SYMBOL_GPL(add_mtd_device);
EXPORT_SYMBOL_GPL(del_mtd_device);
EXPORT_SYMBOL_GPL(get_mtd_device);
@@ -648,6 +688,7 @@ EXPORT_SYMBOL_GPL(__put_mtd_device);
EXPORT_SYMBOL_GPL(register_mtd_user);
EXPORT_SYMBOL_GPL(unregister_mtd_user);
EXPORT_SYMBOL_GPL(default_mtd_writev);
+EXPORT_SYMBOL_GPL(mtd_alloc_up_to);
#ifdef CONFIG_PROC_FS
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index b632ddd..42a28b1 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -117,14 +117,15 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
else
buf_size = PAGE_SIZE;
- /* Respect kmalloc limitations */
- if (buf_size > 128*1024)
- buf_size = 128*1024;
+ D1(printk(KERN_DEBUG "Trying to allocate readbuf of %d "
+ "bytes\n", buf_size));
- D1(printk(KERN_DEBUG "Allocating readbuf of %d bytes\n", buf_size));
- flashbuf = kmalloc(buf_size, GFP_KERNEL);
+ flashbuf = mtd_alloc_up_to(&buf_size);
if (!flashbuf)
return -ENOMEM;
+
+ D1(printk(KERN_DEBUG "Allocated readbuf of %d bytes\n",
+ buf_size));
}
if (jffs2_sum_active()) {
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 9d5306b..44c4c45 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -348,7 +348,8 @@ int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs,
int default_mtd_readv(struct mtd_info *mtd, struct kvec *vecs,
unsigned long count, loff_t from, size_t *retlen);
+void *mtd_alloc_up_to(size_t *size);
+
#ifdef CONFIG_MTD_PARTITIONS
void mtd_erase_callback(struct erase_info *instr);
#else
--
1.7.4.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] Retry Large Buffer Allocations
2011-04-05 20:05 Grant Erickson
@ 2011-04-06 8:47 ` Artem Bityutskiy
0 siblings, 0 replies; 4+ messages in thread
From: Artem Bityutskiy @ 2011-04-06 8:47 UTC (permalink / raw)
To: Grant Erickson; +Cc: Jarkko Lavinen, linux-mtd
Grant, few more requests ;-)
On Tue, 2011-04-05 at 13:05 -0700, Grant Erickson wrote:
> When handling user space read or write requests via mtd_{read,write}
> or JFFS2 medium scan requests, exponentially back off on the size of
> the requested kernel transfer buffer until it succeeds or until the
> requested transfer buffer size falls below the page size.
>
> This helps ensure the operation can succeed under low-memory,
> highly-fragmented situations albeit somewhat more slowly.
>
> v2: Incorporated coding style and comment feedback from Artem.
>
> Signed-off-by: Grant Erickson <marathon96@gmail.com>
> ---
> drivers/mtd/mtdchar.c fs/jffs2/scan.c
> drivers/mtd/mtdchar.c | 50 +++++++++++++++++++++-------------------------
> drivers/mtd/mtdcore.c | 41 ++++++++++++++++++++++++++++++++++++++
> fs/jffs2/scan.c | 11 +++++----
> include/linux/mtd/mtd.h | 2 +
> 4 files changed, 72 insertions(+), 32 deletions(-)
This patch does not apply to l2-mtd-2.6.git, which is very up-to-date.
The conflict is trivial, but would be nice if you sent a patch against
l2-mtd-2.6.git:
git://git.infradead.org/users/dedekind/l2-mtd-2.6.git
> +#define MAX_KMALLOC_SIZE 0x20000
> +
> +/**
> + * mtd_alloc_up_to - allocate a contiguous buffer up to the specified size
> + * @size: A pointer to the ideal or maximum size of the allocation. Points
> + * to the actual allocation size on success.
> + *
> + * This routine attempts to allocate a contiguous kernel buffer up to
> + * the specified size, backing off the size of the request exponentially
> + * until the request succeeds or until the allocation size falls below
> + * the system page size.
> + *
> + * This is called, for example by mtd_{read,write} and jffs2_scan_medium,
> + * to handle smaller (i.e. degraded) buffer allocations under low- or
> + * fragmented-memory situations where such reduced allocations, from a
> + * requested ideal, are allowed.
Also, I think we need to provide a comment which describes why we picked
these kmalloc flags. I suggest you to add something like this:
This function tries to make sure it does not impact the performance
severely, so when allocating more than one page we ask the memory
allocator to avoid re-trying, swapping, write-back and I/O.
> + *
> + * Returns a pointer to the allocated buffer on success; otherwise, NULL.
> + */
> +void *mtd_alloc_up_to(size_t *size)
Can we please re-name it to mtd_kmalloc_up_to to reflect that we use
kmalloc and not vmalloc.
> +{
> + gfp_t flags = (__GFP_NOWARN | __GFP_WAIT |
> + __GFP_NORETRY | __GFP_NO_KSWAPD);
Brackets are not really needed :-)
> + size_t try;
> + void *kbuf;
> +
> + try = min_t(size_t, *size, MAX_KMALLOC_SIZE);
> +
Since we are trying to make this to be a "generic" function, let's use
KMALLOC_MAX_SIZE as the maximum. This constant is defined in
linux/slab.h, so you need to also add
#include <linux/slab.h>
> + do {
> + if (try == PAGE_SIZE)
Hmm, I think
if (try <= PAGE_SIZE)
because the input *size may be small in theory, in which case we should
basically fall-back to kmalloc(*size, GFP_KERNEL);
> + flags = GFP_KERNEL;
> +
> + kbuf = kmalloc(try, flags);
> + } while (!kbuf && ((try >>= 1) >= PAGE_SIZE));
> +
> + *size = try;
> +
> + return kbuf;
Matter of taste, but I'd remove this extra new line. For me too sparse
code is less readable. I think both of these lines are about "returning
the results", so it makes sense to have them as "one block".
> +}
--
Best Regards,
Artem Bityutskiy (Артём Битюцкий)
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2011-04-06 8:50 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-04-05 18:34 [PATCH] Retry Large Buffer Allocations Grant Erickson
2011-04-05 19:31 ` Artem Bityutskiy
-- strict thread matches above, loose matches on Subject: below --
2011-04-05 20:05 Grant Erickson
2011-04-06 8:47 ` Artem Bityutskiy
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).