From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
To: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
Andy Lutomirski <luto@amacapital.net>,
Dave Watson <davejwatson@fb.com>,
linux-kernel <linux-kernel@vger.kernel.org>,
linux-api <linux-api@vger.kernel.org>,
Paul Turner <pjt@google.com>,
Andrew Morton <akpm@linux-foundation.org>,
Russell King <linux@arm.linux.org.uk>,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>, "H. Peter Anvin" <hpa@zytor.com>,
Andrew Hunter <ahh@google.com>, Andi Kleen <andi@firstfloor.org>,
Chris Lameter <cl@linux.com>, Ben Maurer <bmaurer@fb.com>,
rostedt <rostedt@goodmis.org>,
Josh Triplett <josh@joshtriplett.org>,
Linus Torvalds <torvalds@linux-foundation.org>,
Catalin Marinas <catalin.marinas@arm.com>,
Will Deacon <will.deacon@arm.com>,
Michael Kerrisk <mtk.manpages@gmail.com>
Subject: Re: [RFC PATCH v2 for 4.15 08/14] Provide cpu_opv system call
Date: Tue, 7 Nov 2017 02:40:37 +0000 (UTC) [thread overview]
Message-ID: <444885121.6172.1510022437259.JavaMail.zimbra@efficios.com> (raw)
In-Reply-To: <20171107020711.GA6095@tardis>
[-- Attachment #1: Type: text/plain, Size: 1052 bytes --]
----- On Nov 6, 2017, at 9:07 PM, Boqun Feng boqun.feng@gmail.com wrote:
> On Mon, Nov 06, 2017 at 03:56:38PM -0500, Mathieu Desnoyers wrote:
> [...]
>> +static int cpu_op_pin_pages(unsigned long addr, unsigned long len,
>> + struct page ***pinned_pages_ptr, size_t *nr_pinned,
>> + int write)
>> +{
>> + struct page *pages[2];
>> + int ret, nr_pages;
>> +
>> + if (!len)
>> + return 0;
>> + nr_pages = cpu_op_range_nr_pages(addr, len);
>> + BUG_ON(nr_pages > 2);
>> + if (*nr_pinned + nr_pages > NR_PINNED_PAGES_ON_STACK) {
>
> Is this a bug? Seems you will kzalloc() every time if *nr_pinned is
> bigger than NR_PINNED_PAGES_ON_STACK, which will result in memory
> leaking.
>
> I think the logic here is complex enough for us to introduce a
> structure, like:
>
> struct cpu_opv_page_pinner {
> int nr_pinned;
> bool is_kmalloc;
> struct page **pinned_pages;
> };
>
> Thoughts?
Good catch !
How about the attached diff ? I'll fold it into the rseq/dev tree.
Thanks,
Mathieu
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: fix-cpu-opv-leak.patch --]
[-- Type: text/x-patch; name=fix-cpu-opv-leak.patch, Size: 5793 bytes --]
diff --git a/kernel/cpu_opv.c b/kernel/cpu_opv.c
index 09754bbe6a4f..3d8fd66416a0 100644
--- a/kernel/cpu_opv.c
+++ b/kernel/cpu_opv.c
@@ -46,6 +46,12 @@ union op_fn_data {
#endif
};
+struct cpu_opv_pinned_pages {
+ struct page **pages;
+ size_t nr;
+ bool is_kmalloc;
+};
+
typedef int (*op_fn_t)(union op_fn_data *data, uint64_t v, uint32_t len);
static DEFINE_MUTEX(cpu_opv_offline_lock);
@@ -217,8 +223,7 @@ static int cpu_op_check_pages(struct page **pages,
}
static int cpu_op_pin_pages(unsigned long addr, unsigned long len,
- struct page ***pinned_pages_ptr, size_t *nr_pinned,
- int write)
+ struct cpu_opv_pinned_pages *pin_pages, int write)
{
struct page *pages[2];
int ret, nr_pages;
@@ -227,15 +232,17 @@ static int cpu_op_pin_pages(unsigned long addr, unsigned long len,
return 0;
nr_pages = cpu_op_range_nr_pages(addr, len);
BUG_ON(nr_pages > 2);
- if (*nr_pinned + nr_pages > NR_PINNED_PAGES_ON_STACK) {
+ if (!pin_pages->is_kmalloc && pin_pages->nr + nr_pages
+ > NR_PINNED_PAGES_ON_STACK) {
struct page **pinned_pages =
kzalloc(CPU_OP_VEC_LEN_MAX * CPU_OP_MAX_PAGES
* sizeof(struct page *), GFP_KERNEL);
if (!pinned_pages)
return -ENOMEM;
- memcpy(pinned_pages, *pinned_pages_ptr,
- *nr_pinned * sizeof(struct page *));
- *pinned_pages_ptr = pinned_pages;
+ memcpy(pinned_pages, pin_pages->pages,
+ pin_pages->nr * sizeof(struct page *));
+ pin_pages->pages = pinned_pages;
+ pin_pages->is_kmalloc = true;
}
again:
ret = get_user_pages_fast(addr, nr_pages, write, pages);
@@ -257,9 +264,9 @@ static int cpu_op_pin_pages(unsigned long addr, unsigned long len,
}
if (ret)
goto error;
- (*pinned_pages_ptr)[(*nr_pinned)++] = pages[0];
+ pin_pages->pages[pin_pages->nr++] = pages[0];
if (nr_pages > 1)
- (*pinned_pages_ptr)[(*nr_pinned)++] = pages[1];
+ pin_pages->pages[pin_pages->nr++] = pages[1];
return 0;
error:
@@ -270,7 +277,7 @@ static int cpu_op_pin_pages(unsigned long addr, unsigned long len,
}
static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
- struct page ***pinned_pages_ptr, size_t *nr_pinned)
+ struct cpu_opv_pinned_pages *pin_pages)
{
int ret, i;
bool expect_fault = false;
@@ -289,7 +296,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
goto error;
ret = cpu_op_pin_pages(
(unsigned long)op->u.compare_op.a,
- op->len, pinned_pages_ptr, nr_pinned, 0);
+ op->len, pin_pages, 0);
if (ret)
goto error;
ret = -EFAULT;
@@ -299,7 +306,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
goto error;
ret = cpu_op_pin_pages(
(unsigned long)op->u.compare_op.b,
- op->len, pinned_pages_ptr, nr_pinned, 0);
+ op->len, pin_pages, 0);
if (ret)
goto error;
break;
@@ -311,7 +318,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
goto error;
ret = cpu_op_pin_pages(
(unsigned long)op->u.memcpy_op.dst,
- op->len, pinned_pages_ptr, nr_pinned, 1);
+ op->len, pin_pages, 1);
if (ret)
goto error;
ret = -EFAULT;
@@ -321,7 +328,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
goto error;
ret = cpu_op_pin_pages(
(unsigned long)op->u.memcpy_op.src,
- op->len, pinned_pages_ptr, nr_pinned, 0);
+ op->len, pin_pages, 0);
if (ret)
goto error;
break;
@@ -333,7 +340,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
goto error;
ret = cpu_op_pin_pages(
(unsigned long)op->u.arithmetic_op.p,
- op->len, pinned_pages_ptr, nr_pinned, 1);
+ op->len, pin_pages, 1);
if (ret)
goto error;
break;
@@ -347,7 +354,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
goto error;
ret = cpu_op_pin_pages(
(unsigned long)op->u.bitwise_op.p,
- op->len, pinned_pages_ptr, nr_pinned, 1);
+ op->len, pin_pages, 1);
if (ret)
goto error;
break;
@@ -360,7 +367,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
goto error;
ret = cpu_op_pin_pages(
(unsigned long)op->u.shift_op.p,
- op->len, pinned_pages_ptr, nr_pinned, 1);
+ op->len, pin_pages, 1);
if (ret)
goto error;
break;
@@ -373,9 +380,9 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt,
return 0;
error:
- for (i = 0; i < *nr_pinned; i++)
- put_page((*pinned_pages_ptr)[i]);
- *nr_pinned = 0;
+ for (i = 0; i < pin_pages->nr; i++)
+ put_page(pin_pages->pages[i]);
+ pin_pages->nr = 0;
/*
* If faulting access is expected, return EAGAIN to user-space.
* It allows user-space to distinguish between a fault caused by
@@ -923,9 +930,12 @@ SYSCALL_DEFINE4(cpu_opv, struct cpu_op __user *, ucpuopv, int, cpuopcnt,
{
struct cpu_op cpuopv[CPU_OP_VEC_LEN_MAX];
struct page *pinned_pages_on_stack[NR_PINNED_PAGES_ON_STACK];
- struct page **pinned_pages = pinned_pages_on_stack;
+ struct cpu_opv_pinned_pages pin_pages = {
+ .pages = pinned_pages_on_stack,
+ .nr = 0,
+ .is_kmalloc = false,
+ };
int ret, i;
- size_t nr_pinned = 0;
if (unlikely(flags))
return -EINVAL;
@@ -938,15 +948,14 @@ SYSCALL_DEFINE4(cpu_opv, struct cpu_op __user *, ucpuopv, int, cpuopcnt,
ret = cpu_opv_check(cpuopv, cpuopcnt);
if (ret)
return ret;
- ret = cpu_opv_pin_pages(cpuopv, cpuopcnt,
- &pinned_pages, &nr_pinned);
+ ret = cpu_opv_pin_pages(cpuopv, cpuopcnt, &pin_pages);
if (ret)
goto end;
ret = do_cpu_opv(cpuopv, cpuopcnt, cpu);
- for (i = 0; i < nr_pinned; i++)
- put_page(pinned_pages[i]);
+ for (i = 0; i < pin_pages.nr; i++)
+ put_page(pin_pages.pages[i]);
end:
- if (pinned_pages != pinned_pages_on_stack)
- kfree(pinned_pages);
+ if (pin_pages.is_kmalloc)
+ kfree(pin_pages.pages);
return ret;
}
next prev parent reply other threads:[~2017-11-07 2:40 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-06 20:56 [RFC PATCH for 4.15 00/14] Restartable sequences and CPU op vector v10 Mathieu Desnoyers
2017-11-06 20:56 ` [RFC PATCH v10 for 4.15 01/14] Restartable sequences system call Mathieu Desnoyers
2017-11-07 1:24 ` Boqun Feng
2017-11-07 2:20 ` Mathieu Desnoyers
2017-11-06 20:56 ` [RFC PATCH for 4.15 02/14] Restartable sequences: ARM 32 architecture support Mathieu Desnoyers
2017-11-06 20:56 ` [RFC PATCH for 4.15 03/14] Restartable sequences: wire up ARM 32 system call Mathieu Desnoyers
2017-11-06 20:56 ` [RFC PATCH for 4.15 04/14] Restartable sequences: x86 32/64 architecture support Mathieu Desnoyers
2017-11-06 20:56 ` [RFC PATCH for 4.15 05/14] Restartable sequences: wire up x86 32/64 system call Mathieu Desnoyers
2017-11-06 20:56 ` [RFC PATCH for 4.15 06/14] Restartable sequences: powerpc architecture support Mathieu Desnoyers
2017-11-06 20:56 ` [RFC PATCH for 4.15 07/14] Restartable sequences: Wire up powerpc system call Mathieu Desnoyers
2017-11-06 20:56 ` [RFC PATCH v2 for 4.15 08/14] Provide cpu_opv " Mathieu Desnoyers
2017-11-07 2:07 ` Boqun Feng
2017-11-07 2:40 ` Mathieu Desnoyers [this message]
2017-11-07 3:03 ` Boqun Feng
2017-11-06 20:56 ` [RFC PATCH for 4.15 09/14] cpu_opv: Wire up x86 32/64 " Mathieu Desnoyers
2017-11-06 20:56 ` [RFC PATCH for 4.15 10/14] cpu_opv: Wire up powerpc " Mathieu Desnoyers
2017-11-07 0:37 ` Nicholas Piggin
2017-11-07 0:47 ` Mathieu Desnoyers
2017-11-07 1:21 ` Nicholas Piggin
2017-11-06 20:56 ` [RFC PATCH for 4.15 11/14] cpu_opv: Wire up ARM32 " Mathieu Desnoyers
2017-11-06 20:56 ` [RFC PATCH v2 for 4.15 12/14] cpu_opv: Implement selftests Mathieu Desnoyers
2017-11-06 20:56 ` [RFC PATCH v2 for 4.15 13/14] Restartable sequences: Provide self-tests Mathieu Desnoyers
2017-11-06 20:56 ` [RFC PATCH for 4.15 14/14] Restartable sequences selftests: arm: workaround gcc asm size guess Mathieu Desnoyers
-- strict thread matches above, loose matches on Subject: below --
2017-11-06 9:22 [PATCH] mm, sparse: do not swamp log with huge vmemmap allocation failures Michal Hocko
2017-11-06 17:35 ` Johannes Weiner
2017-11-06 17:57 ` Joe Perches
2017-11-06 18:14 ` Khalid Aziz
2017-11-06 18:18 ` Michal Hocko
2017-11-06 20:17 ` Khalid Aziz
2017-11-07 9:06 ` Michal Hocko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=444885121.6172.1510022437259.JavaMail.zimbra@efficios.com \
--to=mathieu.desnoyers@efficios.com \
--cc=ahh@google.com \
--cc=akpm@linux-foundation.org \
--cc=andi@firstfloor.org \
--cc=bmaurer@fb.com \
--cc=boqun.feng@gmail.com \
--cc=catalin.marinas@arm.com \
--cc=cl@linux.com \
--cc=davejwatson@fb.com \
--cc=hpa@zytor.com \
--cc=josh@joshtriplett.org \
--cc=linux-api@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux@arm.linux.org.uk \
--cc=luto@amacapital.net \
--cc=mingo@redhat.com \
--cc=mtk.manpages@gmail.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=peterz@infradead.org \
--cc=pjt@google.com \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
--cc=will.deacon@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox