* realloc function
@ 2011-01-02 17:37 Vasileios Karakasis
2011-01-02 23:42 ` Andi Kleen
0 siblings, 1 reply; 11+ messages in thread
From: Vasileios Karakasis @ 2011-01-02 17:37 UTC (permalink / raw)
To: linux-numa; +Cc: 'Kornilios Kourtis'
[-- Attachment #1.1: Type: text/plain, Size: 718 bytes --]
Hi,
I am submitting a patch for a realloc function that might be useful. The
proposed numa_realloc() is merely a wrapper to mremap(), which it calls
with the flag MREMAP_MAYMOVE. The policy of the vm area is copied by the
kernel in case of moving. I am also submitting a test program, that
keeps expanding an initial allocation until a limit is reached and
checks the mempolicy of the expanded area in every iteration.
My use case is a dynamic array implementation which uses realloc() to
dynamically expand the array and I want to convert it to a numa-aware
implementation.
PS: I could also submit a brief manpage entry for the new function, if
you agree.
Best regards,
--
Vasileios Karakasis
[-- Attachment #1.2: numactl-2.0.6-rc4-realloc-patch --]
[-- Type: text/plain, Size: 5795 bytes --]
diff -urN numactl-2.0.6-rc4-orig/libnuma.c numactl-2.0.6-rc4/libnuma.c
--- numactl-2.0.6-rc4-orig/libnuma.c 2011-01-02 16:01:07.000000000 +0200
+++ numactl-2.0.6-rc4/libnuma.c 2011-01-02 16:01:14.000000000 +0200
@@ -871,6 +871,16 @@
return mem;
}
+void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
+{
+ char *mem;
+ mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
+ if (mem == (char *)-1)
+ return NULL;
+ /* Policy and binding of the vm segment are preserved by the kernel */
+ return mem;
+}
+
void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
{
char *mem;
diff -urN numactl-2.0.6-rc4-orig/Makefile numactl-2.0.6-rc4/Makefile
--- numactl-2.0.6-rc4-orig/Makefile 2010-12-22 13:29:50.000000000 +0200
+++ numactl-2.0.6-rc4/Makefile 2011-01-02 13:32:36.000000000 +0200
@@ -31,7 +31,7 @@
test/after test/before threadtest test_move_pages \
test/mbind_mig_pages test/migrate_pages \
migratepages migspeed migspeed.o libnuma.a \
- test/move_pages
+ test/move_pages test/realloc_test
SOURCES := bitops.c libnuma.c distance.c memhog.c numactl.c numademo.c \
numamon.c shm.c stream_lib.c stream_main.c syscall.c util.c mt.c \
clearcache.c test/*.c
@@ -43,7 +43,7 @@
all: numactl migratepages migspeed libnuma.so numademo numamon memhog \
test/tshared stream test/mynode test/pagesize test/ftok test/prefered \
test/randmap test/nodemap test/distance test/tbitmap test/move_pages \
- test/mbind_mig_pages test/migrate_pages libnuma.a
+ test/mbind_mig_pages test/migrate_pages test/realloc_test libnuma.a
numactl: numactl.o util.o shm.o bitops.o libnuma.so
@@ -123,6 +123,8 @@
test/migrate_pages: test/migrate_pages.c libnuma.so
+test/realloc_test: test/realloc_test.c libnuma.so
+
.PHONY: install all clean html depend
MANPAGES := numa.3 numactl.8 numastat.8 migratepages.8 migspeed.8
diff -urN numactl-2.0.6-rc4-orig/numa.h numactl-2.0.6-rc4/numa.h
--- numactl-2.0.6-rc4-orig/numa.h 2010-12-22 13:29:50.000000000 +0200
+++ numactl-2.0.6-rc4/numa.h 2010-12-22 13:45:42.000000000 +0200
@@ -212,6 +212,8 @@
void *numa_alloc_local(size_t size);
/* Allocation with current policy */
void *numa_alloc(size_t size);
+/* Realloc memory, binding properties are preserved. */
+void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
/* Free memory allocated by the functions above */
void numa_free(void *mem, size_t size);
diff -urN numactl-2.0.6-rc4-orig/test/realloc_test.c numactl-2.0.6-rc4/test/realloc_test.c
--- numactl-2.0.6-rc4-orig/test/realloc_test.c 1970-01-01 02:00:00.000000000 +0200
+++ numactl-2.0.6-rc4/test/realloc_test.c 2011-01-02 13:25:13.000000000 +0200
@@ -0,0 +1,109 @@
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include "numa.h"
+#include "numaif.h"
+
+#define DEFAULT_NR_PAGES 1024
+
+static int parse_int(const char *str)
+{
+ char *endptr;
+ long ret = strtol(str, &endptr, 0);
+ if (*endptr != '\0') {
+ fprintf(stderr, "[error] strtol() failed: parse error: %s\n", endptr);
+ exit(1);
+ }
+
+ if (errno == ERANGE)
+ fprintf(stderr, "[warning] strtol() out of range\n");
+
+ if (ret > INT_MAX || ret < INT_MIN) {
+ fprintf(stderr, "[warning] parse_int() out of range\n");
+ ret = (ret > 0) ? INT_MAX : INT_MIN;
+ }
+
+ return (int) ret;
+}
+
+int main(int argc, char **argv)
+{
+ char *mem;
+ int page_size = numa_pagesize();
+ int node = 0;
+ int nr_pages = DEFAULT_NR_PAGES;
+
+ if (numa_available() < 0) {
+ fprintf(stderr, "numa is not available");
+ exit(1);
+ }
+
+ if (argc > 1)
+ node = parse_int(argv[1]);
+ if (argc > 2)
+ nr_pages = parse_int(argv[2]);
+
+ mem = numa_alloc_onnode(page_size, node);
+
+ /* Store the policy of the newly allocated area */
+ unsigned long nodemask;
+ int mode;
+ int nr_nodes = numa_num_possible_nodes();
+ if (get_mempolicy(&mode, &nodemask, nr_nodes, mem,
+ MPOL_F_NODE | MPOL_F_ADDR) < 0) {
+ perror("get_mempolicy() failed");
+ exit(1);
+ }
+
+ /* Print some info */
+ printf("Page size: %d\n", page_size);
+ printf("Pages realloc'ed: %d\n", nr_pages);
+ printf("Allocate data in node: %d\n", node);
+
+ /* Request pages until a realloc moves the data */
+ int i;
+ int nr_inplace = 0;
+ int nr_moved = 0;
+ for (i = 0; i < nr_pages; i++) {
+ /* Enlarge mem with one more page */
+ char *new_mem = numa_realloc(mem, (i+1)*page_size, (i+2)*page_size);
+ if (!new_mem) {
+ perror("numa_realloc() failed");
+ exit(1);
+ }
+
+ if (new_mem == mem)
+ ++nr_inplace;
+ else
+ ++nr_moved;
+ mem = new_mem;
+
+ /* Check the policy of the realloc'ed area */
+ unsigned long realloc_nodemask;
+ int realloc_mode;
+ if (get_mempolicy(&realloc_mode, &realloc_nodemask,
+ nr_nodes, mem, MPOL_F_NODE | MPOL_F_ADDR) < 0) {
+ perror("get_mempolicy() failed");
+ exit(1);
+ }
+
+ assert(realloc_nodemask == nodemask &&
+ realloc_mode == mode && "policy changed");
+ }
+
+ /* Shrink to the original size */
+ mem = numa_realloc(mem, (nr_pages + 1)*page_size, page_size);
+ if (!mem) {
+ perror("numa_realloc() failed");
+ exit(1);
+ }
+
+ numa_free(mem, page_size);
+ printf("In-place reallocs: %d\n", nr_inplace);
+ printf("Moved reallocs: %d\n", nr_moved);
+ return 0;
+}
diff -urN numactl-2.0.6-rc4-orig/versions.ldscript numactl-2.0.6-rc4/versions.ldscript
--- numactl-2.0.6-rc4-orig/versions.ldscript 2010-12-22 13:29:50.000000000 +0200
+++ numactl-2.0.6-rc4/versions.ldscript 2010-12-22 20:41:22.000000000 +0200
@@ -87,6 +87,7 @@
numa_alloc_interleaved_subset;
numa_alloc_local;
numa_alloc_onnode;
+ numa_realloc;
numa_allocate_cpumask;
numa_allocate_nodemask;
numa_available;
[-- Attachment #1.3: 0x17A67A9C.asc --]
[-- Type: application/pgp-keys, Size: 2858 bytes --]
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 197 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: realloc function
2011-01-02 17:37 realloc function Vasileios Karakasis
@ 2011-01-02 23:42 ` Andi Kleen
2011-01-03 21:56 ` Vasileios Karakasis
0 siblings, 1 reply; 11+ messages in thread
From: Andi Kleen @ 2011-01-02 23:42 UTC (permalink / raw)
To: Vasileios Karakasis; +Cc: linux-numa, 'Kornilios Kourtis'
> I am submitting a patch for a realloc function that might be useful. The
> proposed numa_realloc() is merely a wrapper to mremap(), which it calls
> with the flag MREMAP_MAYMOVE. The policy of the vm area is copied by the
> kernel in case of moving. I am also submitting a test program, that
> keeps expanding an initial allocation until a limit is reached and
> checks the mempolicy of the expanded area in every iteration.
>
> My use case is a dynamic array implementation which uses realloc() to
> dynamically expand the array and I want to convert it to a numa-aware
> implementation.
You need to call numa_police_memory_int() in the function, otherwise
the policy won't be actually preserved.
-Andi
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: realloc function
2011-01-02 23:42 ` Andi Kleen
@ 2011-01-03 21:56 ` Vasileios Karakasis
2011-01-03 22:44 ` Cliff Wickman
2011-01-04 22:20 ` Andi Kleen
0 siblings, 2 replies; 11+ messages in thread
From: Vasileios Karakasis @ 2011-01-03 21:56 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-numa, 'Kornilios Kourtis'
[-- Attachment #1.1: Type: text/plain, Size: 1428 bytes --]
Hi,
I am sending you the updated patch (against the latest 2.0.6 version). I
call numa_police_memory_int() only for the newly allocated pages, when
the area is expanded. I also added a numa_realloc_onnode() function in
the same fashion as that of the numa_alloc_onnode(), which sets a
specific memory binding. I pass the MPOL_MF_MOVE flag to mbind(), but I
am not sure if this is worth it, since the call becomes too slow even
in the case of no page migration. Without the MPOL_MF_MOVE flag, of
course, if the policy changes between realloc's, previously allocated
pages won't be affected.
Regards,
On 01/03/2011 01:42 AM, Andi Kleen wrote:
>
>> I am submitting a patch for a realloc function that might be useful. The
>> proposed numa_realloc() is merely a wrapper to mremap(), which it calls
>> with the flag MREMAP_MAYMOVE. The policy of the vm area is copied by the
>> kernel in case of moving. I am also submitting a test program, that
>> keeps expanding an initial allocation until a limit is reached and
>> checks the mempolicy of the expanded area in every iteration.
>>
>> My use case is a dynamic array implementation which uses realloc() to
>> dynamically expand the array and I want to convert it to a numa-aware
>> implementation.
>
> You need to call numa_police_memory_int() in the function, otherwise
> the policy won't be actually preserved.
>
> -Andi
>
>
--
V.K.
[-- Attachment #1.2: numactl-2.0.6-realloc-patch --]
[-- Type: text/plain, Size: 7049 bytes --]
diff -urN numactl-2.0.6-orig/libnuma.c numactl-2.0.6/libnuma.c
--- numactl-2.0.6-orig/libnuma.c 2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/libnuma.c 2011-01-03 23:15:14.000000000 +0200
@@ -871,6 +871,17 @@
return mem;
}
+void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
+{
+ char *mem;
+ mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
+ if (mem == (char *)-1)
+ return NULL;
+ if (new_size > old_size)
+ numa_police_memory_int(mem + old_size, new_size - old_size);
+ return mem;
+}
+
void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
{
char *mem;
@@ -997,6 +1008,28 @@
return mem;
}
+void *numa_realloc_onnode(void *old_addr, size_t old_size, size_t new_size,
+ int node)
+{
+ char *mem;
+ struct bitmask *bmp;
+
+ bmp = numa_allocate_nodemask();
+ numa_bitmask_setbit(bmp, node);
+ mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
+ if (mem == (char *)-1) {
+ mem = NULL;
+ } else {
+ unsigned int mbind_flags_save = mbind_flags;
+ mbind_flags |= MPOL_MF_MOVE;
+ dombind(mem, new_size, bind_policy, bmp);
+ mbind_flags = mbind_flags_save;
+ }
+
+ numa_bitmask_free(bmp);
+ return mem;
+}
+
void *numa_alloc_local(size_t size)
{
char *mem;
diff -urN numactl-2.0.6-orig/Makefile numactl-2.0.6/Makefile
--- numactl-2.0.6-orig/Makefile 2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/Makefile 2011-01-03 23:22:57.000000000 +0200
@@ -31,7 +31,7 @@
test/after test/before threadtest test_move_pages \
test/mbind_mig_pages test/migrate_pages \
migratepages migspeed migspeed.o libnuma.a \
- test/move_pages
+ test/move_pages test/realloc_test
SOURCES := bitops.c libnuma.c distance.c memhog.c numactl.c numademo.c \
numamon.c shm.c stream_lib.c stream_main.c syscall.c util.c mt.c \
clearcache.c test/*.c
@@ -43,7 +43,7 @@
all: numactl migratepages migspeed libnuma.so numademo numamon memhog \
test/tshared stream test/mynode test/pagesize test/ftok test/prefered \
test/randmap test/nodemap test/distance test/tbitmap test/move_pages \
- test/mbind_mig_pages test/migrate_pages libnuma.a
+ test/mbind_mig_pages test/migrate_pages test/realloc_test libnuma.a
numactl: numactl.o util.o shm.o bitops.o libnuma.so
@@ -123,6 +123,8 @@
test/migrate_pages: test/migrate_pages.c libnuma.so
+test/realloc_test: test/realloc_test.c libnuma.so
+
.PHONY: install all clean html depend
MANPAGES := numa.3 numactl.8 numastat.8 migratepages.8 migspeed.8
diff -urN numactl-2.0.6-orig/numa.h numactl-2.0.6/numa.h
--- numactl-2.0.6-orig/numa.h 2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/numa.h 2011-01-03 21:06:08.000000000 +0200
@@ -208,10 +208,14 @@
void *numa_alloc_interleaved(size_t size);
/* Alloc memory located on node */
void *numa_alloc_onnode(size_t size, int node);
+/* Realloc memory located on node */
+void *numa_realloc_onnode(void *old_addr, size_t old_size, size_t new_size, int node);
/* Alloc memory on local node */
void *numa_alloc_local(size_t size);
/* Allocation with current policy */
void *numa_alloc(size_t size);
+/* Realloc memory, binding properties are preserved. */
+void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
/* Free memory allocated by the functions above */
void numa_free(void *mem, size_t size);
Binary files numactl-2.0.6-orig/test/move_pages and numactl-2.0.6/test/move_pages differ
diff -urN numactl-2.0.6-orig/test/realloc_test.c numactl-2.0.6/test/realloc_test.c
--- numactl-2.0.6-orig/test/realloc_test.c 1970-01-01 02:00:00.000000000 +0200
+++ numactl-2.0.6/test/realloc_test.c 2011-01-03 23:19:31.000000000 +0200
@@ -0,0 +1,119 @@
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include "numa.h"
+#include "numaif.h"
+
+#define DEFAULT_NR_PAGES 1024
+
+static int parse_int(const char *str)
+{
+ char *endptr;
+ long ret = strtol(str, &endptr, 0);
+ if (*endptr != '\0') {
+ fprintf(stderr, "[error] strtol() failed: parse error: %s\n", endptr);
+ exit(1);
+ }
+
+ if (errno == ERANGE)
+ fprintf(stderr, "[warning] strtol() out of range\n");
+
+ if (ret > INT_MAX || ret < INT_MIN) {
+ fprintf(stderr, "[warning] parse_int() out of range\n");
+ ret = (ret > 0) ? INT_MAX : INT_MIN;
+ }
+
+ return (int) ret;
+}
+
+int main(int argc, char **argv)
+{
+ char *mem;
+ int page_size = numa_pagesize();
+ int node = 0;
+ int nr_pages = DEFAULT_NR_PAGES;
+
+ if (numa_available() < 0) {
+ fprintf(stderr, "numa is not available");
+ exit(1);
+ }
+
+ if (argc > 1)
+ node = parse_int(argv[1]);
+ if (argc > 2)
+ nr_pages = parse_int(argv[2]);
+
+ mem = numa_alloc_onnode(page_size, node);
+
+ /* Store the policy of the newly allocated area */
+ unsigned long nodemask;
+ int mode;
+ int nr_nodes = numa_num_possible_nodes();
+ if (get_mempolicy(&mode, &nodemask, nr_nodes, mem,
+ MPOL_F_NODE | MPOL_F_ADDR) < 0) {
+ perror("get_mempolicy() failed");
+ exit(1);
+ }
+
+ /* Print some info */
+ printf("Page size: %d\n", page_size);
+ printf("Pages realloc'ed: %d\n", nr_pages);
+ printf("Allocate data in node: %d\n", node);
+
+ /* Request pages until a realloc moves the data */
+ int i;
+ int nr_inplace = 0;
+ int nr_moved = 0;
+ for (i = 0; i < nr_pages; i++) {
+ /* Enlarge mem with one more page */
+#ifdef TEST_REALLOC_ONNODE
+ char *new_mem = numa_realloc_onnode(mem,
+ (i+1)*page_size,
+ (i+2)*page_size, node);
+#else
+ char *new_mem = numa_realloc(mem, (i+1)*page_size, (i+2)*page_size);
+#endif
+ if (!new_mem) {
+ perror("numa_realloc() failed");
+ exit(1);
+ }
+
+ if (new_mem == mem)
+ ++nr_inplace;
+ else
+ ++nr_moved;
+ mem = new_mem;
+
+ /* Check the policy of the realloc'ed area */
+ unsigned long realloc_nodemask;
+ int realloc_mode;
+ if (get_mempolicy(&realloc_mode, &realloc_nodemask,
+ nr_nodes, mem, MPOL_F_NODE | MPOL_F_ADDR) < 0) {
+ perror("get_mempolicy() failed");
+ exit(1);
+ }
+
+ assert(realloc_nodemask == nodemask &&
+ realloc_mode == mode && "policy changed");
+ }
+
+ /* Shrink to the original size */
+#ifdef TEST_REALLOC_ONNODE
+ mem = numa_realloc_onnode(mem, (nr_pages + 1)*page_size, page_size, node);
+#else
+ mem = numa_realloc(mem, (nr_pages + 1)*page_size, page_size);
+#endif
+ if (!mem) {
+ perror("numa_realloc() failed");
+ exit(1);
+ }
+
+ numa_free(mem, page_size);
+ printf("In-place reallocs: %d\n", nr_inplace);
+ printf("Moved reallocs: %d\n", nr_moved);
+ return 0;
+}
diff -urN numactl-2.0.6-orig/versions.ldscript numactl-2.0.6/versions.ldscript
--- numactl-2.0.6-orig/versions.ldscript 2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/versions.ldscript 2011-01-03 21:09:13.000000000 +0200
@@ -87,6 +87,8 @@
numa_alloc_interleaved_subset;
numa_alloc_local;
numa_alloc_onnode;
+ numa_realloc_onnode;
+ numa_realloc;
numa_allocate_cpumask;
numa_allocate_nodemask;
numa_available;
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 197 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: realloc function
2011-01-03 21:56 ` Vasileios Karakasis
@ 2011-01-03 22:44 ` Cliff Wickman
2011-01-04 22:20 ` Andi Kleen
1 sibling, 0 replies; 11+ messages in thread
From: Cliff Wickman @ 2011-01-03 22:44 UTC (permalink / raw)
To: Vasileios Karakasis; +Cc: Andi Kleen, linux-numa, 'Kornilios Kourtis'
Hi Vasileios,
Thanks for the patch.
I'm putting in my to-do list for 2.0.7.
I'll let you know when it's in a release candidate tarball.
-Cliff
On Mon, Jan 03, 2011 at 11:56:42PM +0200, Vasileios Karakasis wrote:
> Hi,
>
> I am sending you the updated patch (against the latest 2.0.6 version). I
> call numa_police_memory_int() only for the newly allocated pages, when
> the area is expanded. I also added a numa_realloc_onnode() function in
> the same fashion as that of the numa_alloc_onnode(), which sets a
> specific memory binding. I pass the MPOL_MF_MOVE flag to mbind(), but I
> am not sure if this is worth it, since the call becomes too slow even
> in the case of no page migration. Without the MPOL_MF_MOVE flag, of
> course, if the policy changes between realloc's, previously allocated
> pages won't be affected.
>
> Regards,
>
> On 01/03/2011 01:42 AM, Andi Kleen wrote:
> >
> >> I am submitting a patch for a realloc function that might be useful. The
> >> proposed numa_realloc() is merely a wrapper to mremap(), which it calls
> >> with the flag MREMAP_MAYMOVE. The policy of the vm area is copied by the
> >> kernel in case of moving. I am also submitting a test program, that
> >> keeps expanding an initial allocation until a limit is reached and
> >> checks the mempolicy of the expanded area in every iteration.
> >>
> >> My use case is a dynamic array implementation which uses realloc() to
> >> dynamically expand the array and I want to convert it to a numa-aware
> >> implementation.
> >
> > You need to call numa_police_memory_int() in the function, otherwise
> > the policy won't be actually preserved.
> >
> > -Andi
> >
> >
>
> --
> V.K.
> diff -urN numactl-2.0.6-orig/libnuma.c numactl-2.0.6/libnuma.c
> --- numactl-2.0.6-orig/libnuma.c 2011-01-03 15:09:23.000000000 +0200
> +++ numactl-2.0.6/libnuma.c 2011-01-03 23:15:14.000000000 +0200
> @@ -871,6 +871,17 @@
> return mem;
> }
>
> +void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
> +{
> + char *mem;
> + mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
> + if (mem == (char *)-1)
> + return NULL;
> + if (new_size > old_size)
> + numa_police_memory_int(mem + old_size, new_size - old_size);
> + return mem;
> +}
> +
> void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
> {
> char *mem;
> @@ -997,6 +1008,28 @@
> return mem;
> }
>
> +void *numa_realloc_onnode(void *old_addr, size_t old_size, size_t new_size,
> + int node)
> +{
> + char *mem;
> + struct bitmask *bmp;
> +
> + bmp = numa_allocate_nodemask();
> + numa_bitmask_setbit(bmp, node);
> + mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
> + if (mem == (char *)-1) {
> + mem = NULL;
> + } else {
> + unsigned int mbind_flags_save = mbind_flags;
> + mbind_flags |= MPOL_MF_MOVE;
> + dombind(mem, new_size, bind_policy, bmp);
> + mbind_flags = mbind_flags_save;
> + }
> +
> + numa_bitmask_free(bmp);
> + return mem;
> +}
> +
> void *numa_alloc_local(size_t size)
> {
> char *mem;
> diff -urN numactl-2.0.6-orig/Makefile numactl-2.0.6/Makefile
> --- numactl-2.0.6-orig/Makefile 2011-01-03 15:09:23.000000000 +0200
> +++ numactl-2.0.6/Makefile 2011-01-03 23:22:57.000000000 +0200
> @@ -31,7 +31,7 @@
> test/after test/before threadtest test_move_pages \
> test/mbind_mig_pages test/migrate_pages \
> migratepages migspeed migspeed.o libnuma.a \
> - test/move_pages
> + test/move_pages test/realloc_test
> SOURCES := bitops.c libnuma.c distance.c memhog.c numactl.c numademo.c \
> numamon.c shm.c stream_lib.c stream_main.c syscall.c util.c mt.c \
> clearcache.c test/*.c
> @@ -43,7 +43,7 @@
> all: numactl migratepages migspeed libnuma.so numademo numamon memhog \
> test/tshared stream test/mynode test/pagesize test/ftok test/prefered \
> test/randmap test/nodemap test/distance test/tbitmap test/move_pages \
> - test/mbind_mig_pages test/migrate_pages libnuma.a
> + test/mbind_mig_pages test/migrate_pages test/realloc_test libnuma.a
>
> numactl: numactl.o util.o shm.o bitops.o libnuma.so
>
> @@ -123,6 +123,8 @@
>
> test/migrate_pages: test/migrate_pages.c libnuma.so
>
> +test/realloc_test: test/realloc_test.c libnuma.so
> +
> .PHONY: install all clean html depend
>
> MANPAGES := numa.3 numactl.8 numastat.8 migratepages.8 migspeed.8
> diff -urN numactl-2.0.6-orig/numa.h numactl-2.0.6/numa.h
> --- numactl-2.0.6-orig/numa.h 2011-01-03 15:09:23.000000000 +0200
> +++ numactl-2.0.6/numa.h 2011-01-03 21:06:08.000000000 +0200
> @@ -208,10 +208,14 @@
> void *numa_alloc_interleaved(size_t size);
> /* Alloc memory located on node */
> void *numa_alloc_onnode(size_t size, int node);
> +/* Realloc memory located on node */
> +void *numa_realloc_onnode(void *old_addr, size_t old_size, size_t new_size, int node);
> /* Alloc memory on local node */
> void *numa_alloc_local(size_t size);
> /* Allocation with current policy */
> void *numa_alloc(size_t size);
> +/* Realloc memory, binding properties are preserved. */
> +void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
> /* Free memory allocated by the functions above */
> void numa_free(void *mem, size_t size);
>
> Binary files numactl-2.0.6-orig/test/move_pages and numactl-2.0.6/test/move_pages differ
> diff -urN numactl-2.0.6-orig/test/realloc_test.c numactl-2.0.6/test/realloc_test.c
> --- numactl-2.0.6-orig/test/realloc_test.c 1970-01-01 02:00:00.000000000 +0200
> +++ numactl-2.0.6/test/realloc_test.c 2011-01-03 23:19:31.000000000 +0200
> @@ -0,0 +1,119 @@
> +#include <assert.h>
> +#include <errno.h>
> +#include <limits.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <sys/mman.h>
> +#include "numa.h"
> +#include "numaif.h"
> +
> +#define DEFAULT_NR_PAGES 1024
> +
> +static int parse_int(const char *str)
> +{
> + char *endptr;
> + long ret = strtol(str, &endptr, 0);
> + if (*endptr != '\0') {
> + fprintf(stderr, "[error] strtol() failed: parse error: %s\n", endptr);
> + exit(1);
> + }
> +
> + if (errno == ERANGE)
> + fprintf(stderr, "[warning] strtol() out of range\n");
> +
> + if (ret > INT_MAX || ret < INT_MIN) {
> + fprintf(stderr, "[warning] parse_int() out of range\n");
> + ret = (ret > 0) ? INT_MAX : INT_MIN;
> + }
> +
> + return (int) ret;
> +}
> +
> +int main(int argc, char **argv)
> +{
> + char *mem;
> + int page_size = numa_pagesize();
> + int node = 0;
> + int nr_pages = DEFAULT_NR_PAGES;
> +
> + if (numa_available() < 0) {
> + fprintf(stderr, "numa is not available");
> + exit(1);
> + }
> +
> + if (argc > 1)
> + node = parse_int(argv[1]);
> + if (argc > 2)
> + nr_pages = parse_int(argv[2]);
> +
> + mem = numa_alloc_onnode(page_size, node);
> +
> + /* Store the policy of the newly allocated area */
> + unsigned long nodemask;
> + int mode;
> + int nr_nodes = numa_num_possible_nodes();
> + if (get_mempolicy(&mode, &nodemask, nr_nodes, mem,
> + MPOL_F_NODE | MPOL_F_ADDR) < 0) {
> + perror("get_mempolicy() failed");
> + exit(1);
> + }
> +
> + /* Print some info */
> + printf("Page size: %d\n", page_size);
> + printf("Pages realloc'ed: %d\n", nr_pages);
> + printf("Allocate data in node: %d\n", node);
> +
> + /* Request pages until a realloc moves the data */
> + int i;
> + int nr_inplace = 0;
> + int nr_moved = 0;
> + for (i = 0; i < nr_pages; i++) {
> + /* Enlarge mem with one more page */
> +#ifdef TEST_REALLOC_ONNODE
> + char *new_mem = numa_realloc_onnode(mem,
> + (i+1)*page_size,
> + (i+2)*page_size, node);
> +#else
> + char *new_mem = numa_realloc(mem, (i+1)*page_size, (i+2)*page_size);
> +#endif
> + if (!new_mem) {
> + perror("numa_realloc() failed");
> + exit(1);
> + }
> +
> + if (new_mem == mem)
> + ++nr_inplace;
> + else
> + ++nr_moved;
> + mem = new_mem;
> +
> + /* Check the policy of the realloc'ed area */
> + unsigned long realloc_nodemask;
> + int realloc_mode;
> + if (get_mempolicy(&realloc_mode, &realloc_nodemask,
> + nr_nodes, mem, MPOL_F_NODE | MPOL_F_ADDR) < 0) {
> + perror("get_mempolicy() failed");
> + exit(1);
> + }
> +
> + assert(realloc_nodemask == nodemask &&
> + realloc_mode == mode && "policy changed");
> + }
> +
> + /* Shrink to the original size */
> +#ifdef TEST_REALLOC_ONNODE
> + mem = numa_realloc_onnode(mem, (nr_pages + 1)*page_size, page_size, node);
> +#else
> + mem = numa_realloc(mem, (nr_pages + 1)*page_size, page_size);
> +#endif
> + if (!mem) {
> + perror("numa_realloc() failed");
> + exit(1);
> + }
> +
> + numa_free(mem, page_size);
> + printf("In-place reallocs: %d\n", nr_inplace);
> + printf("Moved reallocs: %d\n", nr_moved);
> + return 0;
> +}
> diff -urN numactl-2.0.6-orig/versions.ldscript numactl-2.0.6/versions.ldscript
> --- numactl-2.0.6-orig/versions.ldscript 2011-01-03 15:09:23.000000000 +0200
> +++ numactl-2.0.6/versions.ldscript 2011-01-03 21:09:13.000000000 +0200
> @@ -87,6 +87,8 @@
> numa_alloc_interleaved_subset;
> numa_alloc_local;
> numa_alloc_onnode;
> + numa_realloc_onnode;
> + numa_realloc;
> numa_allocate_cpumask;
> numa_allocate_nodemask;
> numa_available;
--
Cliff Wickman
SGI
cpw@sgi.com
(651) 683-3824
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: realloc function
2011-01-03 21:56 ` Vasileios Karakasis
2011-01-03 22:44 ` Cliff Wickman
@ 2011-01-04 22:20 ` Andi Kleen
2011-01-05 12:11 ` Vasileios Karakasis
1 sibling, 1 reply; 11+ messages in thread
From: Andi Kleen @ 2011-01-04 22:20 UTC (permalink / raw)
To: Vasileios Karakasis; +Cc: Andi Kleen, linux-numa, 'Kornilios Kourtis'
> Hi,
>
> I am sending you the updated patch (against the latest 2.0.6 version). I
> call numa_police_memory_int() only for the newly allocated pages, when
> the area is expanded. I also added a numa_realloc_onnode() function in
> the same fashion as that of the numa_alloc_onnode(), which sets a
> specific memory binding. I pass the MPOL_MF_MOVE flag to mbind(), but I
> am not sure if this is worth it, since the call becomes too slow even
> in the case of no page migration. Without the MPOL_MF_MOVE flag, of
> course, if the policy changes between realloc's, previously allocated
> pages won't be affected.
Thinking about it more police_* is likely still the wrong semantics.
That will always set the current policy.
But the user more likely wants the same policy the original
mapping had, right?
This could be implemented by calling get_mempolicy() on the old
mapping with MPOL_F_ADDR and setting it on the new pages in
the new mapping.
-Andi
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: realloc function
2011-01-04 22:20 ` Andi Kleen
@ 2011-01-05 12:11 ` Vasileios Karakasis
2011-01-05 15:00 ` Vasileios Karakasis
0 siblings, 1 reply; 11+ messages in thread
From: Vasileios Karakasis @ 2011-01-05 12:11 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-numa, 'Kornilios Kourtis'
[-- Attachment #1: Type: text/plain, Size: 1543 bytes --]
On 01/05/2011 12:20 AM, Andi Kleen wrote:
>> Hi,
>>
>> I am sending you the updated patch (against the latest 2.0.6 version). I
>> call numa_police_memory_int() only for the newly allocated pages, when
>> the area is expanded. I also added a numa_realloc_onnode() function in
>> the same fashion as that of the numa_alloc_onnode(), which sets a
>> specific memory binding. I pass the MPOL_MF_MOVE flag to mbind(), but I
>> am not sure if this is worth it, since the call becomes too slow even
>> in the case of no page migration. Without the MPOL_MF_MOVE flag, of
>> course, if the policy changes between realloc's, previously allocated
>> pages won't be affected.
>
> Thinking about it more police_* is likely still the wrong semantics.
> That will always set the current policy.
>
> But the user more likely wants the same policy the original
> mapping had, right?
I agree with that. In my use case at least, I start with an
alloc_on_node() and keep realloc'ing assuming all new pages will be
allocated on the node I specified. Of course, this questions more the
existence of a realloc_onnode() function, since its functionality
overlaps with that of migrating/moving pages. So adopting these
semantics, I think we can drop the numa_realloc_onnode().
>
> This could be implemented by calling get_mempolicy() on the old
> mapping with MPOL_F_ADDR and setting it on the new pages in
> the new mapping.
>
I will come up with a patch in the next few days.
> -Andi
>
>
Regards,
--
V.K.
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 197 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: realloc function
2011-01-05 12:11 ` Vasileios Karakasis
@ 2011-01-05 15:00 ` Vasileios Karakasis
2011-01-05 19:25 ` Andi Kleen
0 siblings, 1 reply; 11+ messages in thread
From: Vasileios Karakasis @ 2011-01-05 15:00 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-numa, 'Kornilios Kourtis'
[-- Attachment #1: Type: text/plain, Size: 2730 bytes --]
On 01/05/2011 02:11 PM, Vasileios Karakasis wrote:
>
>
> On 01/05/2011 12:20 AM, Andi Kleen wrote:
>>> Hi,
>>>
>>> I am sending you the updated patch (against the latest 2.0.6 version). I
>>> call numa_police_memory_int() only for the newly allocated pages, when
>>> the area is expanded. I also added a numa_realloc_onnode() function in
>>> the same fashion as that of the numa_alloc_onnode(), which sets a
>>> specific memory binding. I pass the MPOL_MF_MOVE flag to mbind(), but I
>>> am not sure if this is worth it, since the call becomes too slow even
>>> in the case of no page migration. Without the MPOL_MF_MOVE flag, of
>>> course, if the policy changes between realloc's, previously allocated
>>> pages won't be affected.
>>
>> Thinking about it more police_* is likely still the wrong semantics.
>> That will always set the current policy.
>>
>> But the user more likely wants the same policy the original
>> mapping had, right?
>
> I agree with that. In my use case at least, I start with an
> alloc_on_node() and keep realloc'ing assuming all new pages will be
> allocated on the node I specified. Of course, this questions more the
> existence of a realloc_onnode() function, since its functionality
> overlaps with that of migrating/moving pages. So adopting these
> semantics, I think we can drop the numa_realloc_onnode().
>
>>
>> This could be implemented by calling get_mempolicy() on the old
>> mapping with MPOL_F_ADDR and setting it on the new pages in
>> the new mapping.
>>
>
> I will come up with a patch in the next few days.
Peeking inside the mremap() source, I can see that the kernel already
does this, i.e., mremap() preserves the policy of the original vm area.
The problem is when the user has not specified a binding for the
original mapping (default policy), in which case copying explicitly the
policy from the old to the new pages won't work either; the new pages
will still have MPOL_DEFAULT. So realloc() cannot guarantee that the new
pages will be allocated on the same node as the preceding alloc(),
unless there is a way to obtain the actual node that the pages of the
original allocation were allocated on. In my opinion, this isn't a real
problem, because even the simple numa_alloc() using the default policy,
cannot guarantee that the pages will be allocated on the node of the
calling cpu: what if the task is migrated to a different cpu on a
different node, while touching (i.e., allocating) the pages with the
police_memory_int()?
However, if the user calls one of the functions that call mbind(), e.g.,
alloc_onnode(), then just mremap() will work fine.
>
>> -Andi
>>
>>
>
> Regards,
--
V.K.
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 197 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: realloc function
2011-01-05 15:00 ` Vasileios Karakasis
@ 2011-01-05 19:25 ` Andi Kleen
2011-01-10 22:12 ` Vasileios Karakasis
0 siblings, 1 reply; 11+ messages in thread
From: Andi Kleen @ 2011-01-05 19:25 UTC (permalink / raw)
To: Vasileios Karakasis; +Cc: Andi Kleen, linux-numa, 'Kornilios Kourtis'
On Wed, Jan 05, 2011 at 05:00:43PM +0200, Vasileios Karakasis wrote:
> Peeking inside the mremap() source, I can see that the kernel already
> does this, i.e., mremap() preserves the policy of the original vm area.
That is true.
>
> The problem is when the user has not specified a binding for the
> original mapping (default policy), in which case copying explicitly the
> policy from the old to the new pages won't work either; the new pages
> will still have MPOL_DEFAULT. So realloc() cannot guarantee that the new
It would be possible to do
get_mempolicy MPOL_F_ADDR
if policy == MPOL_DEFAULT:
get_mempolicy MPOL_F_NODE|MPOL_F_ADDR, &node
mbind MPOL_PREFERRED, node
But then you end up with preferred instead of default. It should
be usually the same, but may not in some corner cases.
I guess you're right and that case is too obscure to care about.
I guess your original patch without anything was good enough.
It may be worth it to add some comments on this rationale though.
> pages will be allocated on the same node as the preceding alloc(),
> unless there is a way to obtain the actual node that the pages of the
> original allocation were allocated on. In my opinion, this isn't a real
> problem, because even the simple numa_alloc() using the default policy,
> cannot guarantee that the pages will be allocated on the node of the
> calling cpu: what if the task is migrated to a different cpu on a
> different node, while touching (i.e., allocating) the pages with the
> police_memory_int()?
process policy and MPOL_DEFAULT are always just heuristics; such races
can always occur. They usually should not because the scheduler
does not migrate too frequently.
-Andi
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: realloc function
2011-01-05 19:25 ` Andi Kleen
@ 2011-01-10 22:12 ` Vasileios Karakasis
2011-01-10 22:17 ` Andi Kleen
2011-01-11 16:29 ` Cliff Wickman
0 siblings, 2 replies; 11+ messages in thread
From: Vasileios Karakasis @ 2011-01-10 22:12 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-numa, 'Kornilios Kourtis'
[-- Attachment #1.1: Type: text/plain, Size: 2067 bytes --]
Hi,
I am submitting the final patch. Essentially, it is my original enhanced
with some comments about the rationale as we discussed it here and an
entry + brief description in the man page.
Regards,
On 01/05/2011 09:25 PM, Andi Kleen wrote:
> On Wed, Jan 05, 2011 at 05:00:43PM +0200, Vasileios Karakasis wrote:
>> Peeking inside the mremap() source, I can see that the kernel already
>> does this, i.e., mremap() preserves the policy of the original vm area.
>
> That is true.
>>
>> The problem is when the user has not specified a binding for the
>> original mapping (default policy), in which case copying explicitly the
>> policy from the old to the new pages won't work either; the new pages
>> will still have MPOL_DEFAULT. So realloc() cannot guarantee that the new
>
>
> It would be possible to do
>
> get_mempolicy MPOL_F_ADDR
> if policy == MPOL_DEFAULT:
> get_mempolicy MPOL_F_NODE|MPOL_F_ADDR, &node
> mbind MPOL_PREFERRED, node
>
> But then you end up with preferred instead of default. It should
> be usually the same, but may not in some corner cases.
>
> I guess you're right and that case is too obscure to care about.
> I guess your original patch without anything was good enough.
> It may be worth it to add some comments on this rationale though.
>
>
>> pages will be allocated on the same node as the preceding alloc(),
>> unless there is a way to obtain the actual node that the pages of the
>> original allocation were allocated on. In my opinion, this isn't a real
>> problem, because even the simple numa_alloc() using the default policy,
>> cannot guarantee that the pages will be allocated on the node of the
>> calling cpu: what if the task is migrated to a different cpu on a
>> different node, while touching (i.e., allocating) the pages with the
>> police_memory_int()?
>
> process policy and MPOL_DEFAULT are always just heuristics; such races
> can always occur. They usually should not because the scheduler
> does not migrate too frequently.
>
> -Andi
--
V.K.
[-- Attachment #1.2: numactl-2.0.6-realloc-patch --]
[-- Type: text/plain, Size: 7884 bytes --]
diff -urN numactl-2.0.6-orig/libnuma.c numactl-2.0.6/libnuma.c
--- numactl-2.0.6-orig/libnuma.c 2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/libnuma.c 2011-01-10 23:49:58.000000000 +0200
@@ -871,6 +871,23 @@
return mem;
}
+void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
+{
+ char *mem;
+ mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
+ if (mem == (char *)-1)
+ return NULL;
+ /*
+ * The memory policy of the allocated pages is preserved by mremap(), so
+ * there is no need to (re)set it here. If the policy of the original
+ * allocation is not set, the new pages will be allocated according to the
+ * process' mempolicy. Trying to allocate explicitly the new pages on the
+ * same node as the original ones would require changing the policy of the
+ * newly allocated pages, which violates the numa_realloc() semantics.
+ */
+ return mem;
+}
+
void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
{
char *mem;
diff -urN numactl-2.0.6-orig/Makefile numactl-2.0.6/Makefile
--- numactl-2.0.6-orig/Makefile 2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/Makefile 2011-01-03 23:22:57.000000000 +0200
@@ -31,7 +31,7 @@
test/after test/before threadtest test_move_pages \
test/mbind_mig_pages test/migrate_pages \
migratepages migspeed migspeed.o libnuma.a \
- test/move_pages
+ test/move_pages test/realloc_test
SOURCES := bitops.c libnuma.c distance.c memhog.c numactl.c numademo.c \
numamon.c shm.c stream_lib.c stream_main.c syscall.c util.c mt.c \
clearcache.c test/*.c
@@ -43,7 +43,7 @@
all: numactl migratepages migspeed libnuma.so numademo numamon memhog \
test/tshared stream test/mynode test/pagesize test/ftok test/prefered \
test/randmap test/nodemap test/distance test/tbitmap test/move_pages \
- test/mbind_mig_pages test/migrate_pages libnuma.a
+ test/mbind_mig_pages test/migrate_pages test/realloc_test libnuma.a
numactl: numactl.o util.o shm.o bitops.o libnuma.so
@@ -123,6 +123,8 @@
test/migrate_pages: test/migrate_pages.c libnuma.so
+test/realloc_test: test/realloc_test.c libnuma.so
+
.PHONY: install all clean html depend
MANPAGES := numa.3 numactl.8 numastat.8 migratepages.8 migspeed.8
diff -urN numactl-2.0.6-orig/numa.3 numactl-2.0.6/numa.3
--- numactl-2.0.6-orig/numa.3 2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/numa.3 2011-01-10 23:39:02.000000000 +0200
@@ -87,6 +87,8 @@
.BI "void *numa_alloc_interleaved_subset(size_t " size ", struct bitmask *" nodemask );
.BI "void *numa_alloc(size_t " size );
.br
+.BI "void *numa_realloc(void *"old_addr ", size_t " old_size ", size_t " new_size );
+.br
.BI "void numa_free(void *" start ", size_t " size );
.sp
.BI "int numa_run_on_node(int " node );
@@ -599,6 +601,39 @@
.BR numa_free ().
On errors NULL is returned.
+.BR numa_realloc ()
+changes the size of the memory area pointed to by
+.I old_addr
+from
+.I old_size
+to
+.I new_size.
+The memory area pointed to by
+.I old_addr
+must have been allocated with one of the
+.BR numa_alloc*
+functions.
+The
+.I new_size
+will be rounded up to a multiple of the system page size. The contents of the
+memory area will be unchanged to the minimum of the old and new sizes; newly
+allocated memory will be uninitialized. The memory policy (and node bindings)
+associated with the original memory area will be preserved in the resized
+area. For example, if the initial area was allocated with a call to
+.BR numa_alloc_onnode(),
+then the new pages (if the area is enlarged) will be allocated on the same node.
+However, if no memory policy was set for the original area, then
+.BR numa_realloc ()
+cannot guarantee that the new pages will be allocated on the same node. On
+success, the address of the resized area is returned (which might be different
+from that of the initial area), otherwise NULL is returned and
+.I errno
+is set to indicate the error. The pointer returned by
+.BR numa_realloc ()
+is suitable for passing to
+.BR numa_free ().
+
+
.BR numa_free ()
frees
.I size
diff -urN numactl-2.0.6-orig/numa.h numactl-2.0.6/numa.h
--- numactl-2.0.6-orig/numa.h 2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/numa.h 2011-01-11 00:06:12.000000000 +0200
@@ -212,6 +212,8 @@
void *numa_alloc_local(size_t size);
/* Allocation with current policy */
void *numa_alloc(size_t size);
+/* Change the size of a memory area preserving the memory policy */
+void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
/* Free memory allocated by the functions above */
void numa_free(void *mem, size_t size);
diff -urN numactl-2.0.6-orig/test/realloc_test.c numactl-2.0.6/test/realloc_test.c
--- numactl-2.0.6-orig/test/realloc_test.c 1970-01-01 02:00:00.000000000 +0200
+++ numactl-2.0.6/test/realloc_test.c 2011-01-10 23:55:37.000000000 +0200
@@ -0,0 +1,108 @@
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include "numa.h"
+#include "numaif.h"
+
+#define DEFAULT_NR_PAGES 1024
+
+static int parse_int(const char *str)
+{
+ char *endptr;
+ long ret = strtol(str, &endptr, 0);
+ if (*endptr != '\0') {
+ fprintf(stderr, "[error] strtol() failed: parse error: %s\n", endptr);
+ exit(1);
+ }
+
+ if (errno == ERANGE)
+ fprintf(stderr, "[warning] strtol() out of range\n");
+
+ if (ret > INT_MAX || ret < INT_MIN) {
+ fprintf(stderr, "[warning] parse_int() out of range\n");
+ ret = (ret > 0) ? INT_MAX : INT_MIN;
+ }
+
+ return (int) ret;
+}
+
+int main(int argc, char **argv)
+{
+ char *mem;
+ int page_size = numa_pagesize();
+ int node = 0;
+ int nr_pages = DEFAULT_NR_PAGES;
+
+ if (numa_available() < 0) {
+ fprintf(stderr, "numa is not available");
+ exit(1);
+ }
+
+ if (argc > 1)
+ node = parse_int(argv[1]);
+ if (argc > 2)
+ nr_pages = parse_int(argv[2]);
+
+ mem = numa_alloc_onnode(page_size, node);
+
+ /* Store the policy of the newly allocated area */
+ unsigned long nodemask;
+ int mode;
+ int nr_nodes = numa_num_possible_nodes();
+ if (get_mempolicy(&mode, &nodemask, nr_nodes, mem,
+ MPOL_F_NODE | MPOL_F_ADDR) < 0) {
+ perror("get_mempolicy() failed");
+ exit(1);
+ }
+
+ /* Print some info */
+ printf("Page size: %d\n", page_size);
+ printf("Pages realloc'ed: %d\n", nr_pages);
+ printf("Allocate data in node: %d\n", node);
+
+ int i;
+ int nr_inplace = 0;
+ int nr_moved = 0;
+ for (i = 0; i < nr_pages; i++) {
+ /* Enlarge mem with one more page */
+ char *new_mem = numa_realloc(mem, (i+1)*page_size, (i+2)*page_size);
+ if (!new_mem) {
+ perror("numa_realloc() failed");
+ exit(1);
+ }
+
+ if (new_mem == mem)
+ ++nr_inplace;
+ else
+ ++nr_moved;
+ mem = new_mem;
+
+ /* Check the policy of the realloc'ed area */
+ unsigned long realloc_nodemask;
+ int realloc_mode;
+ if (get_mempolicy(&realloc_mode, &realloc_nodemask,
+ nr_nodes, mem, MPOL_F_NODE | MPOL_F_ADDR) < 0) {
+ perror("get_mempolicy() failed");
+ exit(1);
+ }
+
+ assert(realloc_nodemask == nodemask &&
+ realloc_mode == mode && "policy changed");
+ }
+
+ /* Shrink to the original size */
+ mem = numa_realloc(mem, (nr_pages + 1)*page_size, page_size);
+ if (!mem) {
+ perror("numa_realloc() failed");
+ exit(1);
+ }
+
+ numa_free(mem, page_size);
+ printf("In-place reallocs: %d\n", nr_inplace);
+ printf("Moved reallocs: %d\n", nr_moved);
+ return 0;
+}
diff -urN numactl-2.0.6-orig/versions.ldscript numactl-2.0.6/versions.ldscript
--- numactl-2.0.6-orig/versions.ldscript 2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/versions.ldscript 2011-01-10 18:36:37.000000000 +0200
@@ -87,6 +87,7 @@
numa_alloc_interleaved_subset;
numa_alloc_local;
numa_alloc_onnode;
+ numa_realloc;
numa_allocate_cpumask;
numa_allocate_nodemask;
numa_available;
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 197 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: realloc function
2011-01-10 22:12 ` Vasileios Karakasis
@ 2011-01-10 22:17 ` Andi Kleen
2011-01-11 16:29 ` Cliff Wickman
1 sibling, 0 replies; 11+ messages in thread
From: Andi Kleen @ 2011-01-10 22:17 UTC (permalink / raw)
To: Vasileios Karakasis; +Cc: Andi Kleen, linux-numa, 'Kornilios Kourtis'
On Tue, Jan 11, 2011 at 12:12:36AM +0200, Vasileios Karakasis wrote:
> Hi,
>
> I am submitting the final patch. Essentially, it is my original enhanced
> with some comments about the rationale as we discussed it here and an
> entry + brief description in the man page.
Thanks. Looks good to me now.
-Andi
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: realloc function
2011-01-10 22:12 ` Vasileios Karakasis
2011-01-10 22:17 ` Andi Kleen
@ 2011-01-11 16:29 ` Cliff Wickman
1 sibling, 0 replies; 11+ messages in thread
From: Cliff Wickman @ 2011-01-11 16:29 UTC (permalink / raw)
To: Vasileios Karakasis; +Cc: linux-numa
Hi Vasileios,
Thanks.
And thanks to Andi for the review. I'll put your patch into 2.0.7-rc1.
I'd like to solve an unrelated regression before I put it on
the download page.
-Cliff
On Tue, Jan 11, 2011 at 12:12:36AM +0200, Vasileios Karakasis wrote:
> Hi,
>
> I am submitting the final patch. Essentially, it is my original enhanced
> with some comments about the rationale as we discussed it here and an
> entry + brief description in the man page.
>
> Regards,
>
> On 01/05/2011 09:25 PM, Andi Kleen wrote:
> > On Wed, Jan 05, 2011 at 05:00:43PM +0200, Vasileios Karakasis wrote:
> >> Peeking inside the mremap() source, I can see that the kernel already
> >> does this, i.e., mremap() preserves the policy of the original vm area.
> >
> > That is true.
> >>
> >> The problem is when the user has not specified a binding for the
> >> original mapping (default policy), in which case copying explicitly the
> >> policy from the old to the new pages won't work either; the new pages
> >> will still have MPOL_DEFAULT. So realloc() cannot guarantee that the new
> >
> >
> > It would be possible to do
> >
> > get_mempolicy MPOL_F_ADDR
> > if policy == MPOL_DEFAULT:
> > get_mempolicy MPOL_F_NODE|MPOL_F_ADDR, &node
> > mbind MPOL_PREFERRED, node
> >
> > But then you end up with preferred instead of default. It should
> > be usually the same, but may not in some corner cases.
> >
> > I guess you're right and that case is too obscure to care about.
> > I guess your original patch without anything was good enough.
> > It may be worth it to add some comments on this rationale though.
> >
> >
> >> pages will be allocated on the same node as the preceding alloc(),
> >> unless there is a way to obtain the actual node that the pages of the
> >> original allocation were allocated on. In my opinion, this isn't a real
> >> problem, because even the simple numa_alloc() using the default policy,
> >> cannot guarantee that the pages will be allocated on the node of the
> >> calling cpu: what if the task is migrated to a different cpu on a
> >> different node, while touching (i.e., allocating) the pages with the
> >> police_memory_int()?
> >
> > process policy and MPOL_DEFAULT are always just heuristics; such races
> > can always occur. They usually should not because the scheduler
> > does not migrate too frequently.
> >
> > -Andi
>
> --
> V.K.
> diff -urN numactl-2.0.6-orig/libnuma.c numactl-2.0.6/libnuma.c
> --- numactl-2.0.6-orig/libnuma.c 2011-01-03 15:09:23.000000000 +0200
> +++ numactl-2.0.6/libnuma.c 2011-01-10 23:49:58.000000000 +0200
> @@ -871,6 +871,23 @@
> return mem;
> }
>
> +void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
> +{
> + char *mem;
> + mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
> + if (mem == (char *)-1)
> + return NULL;
> + /*
> + * The memory policy of the allocated pages is preserved by mremap(), so
> + * there is no need to (re)set it here. If the policy of the original
> + * allocation is not set, the new pages will be allocated according to the
> + * process' mempolicy. Trying to allocate explicitly the new pages on the
> + * same node as the original ones would require changing the policy of the
> + * newly allocated pages, which violates the numa_realloc() semantics.
> + */
> + return mem;
> +}
> +
> void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
> {
> char *mem;
> diff -urN numactl-2.0.6-orig/Makefile numactl-2.0.6/Makefile
> --- numactl-2.0.6-orig/Makefile 2011-01-03 15:09:23.000000000 +0200
> +++ numactl-2.0.6/Makefile 2011-01-03 23:22:57.000000000 +0200
> @@ -31,7 +31,7 @@
> test/after test/before threadtest test_move_pages \
> test/mbind_mig_pages test/migrate_pages \
> migratepages migspeed migspeed.o libnuma.a \
> - test/move_pages
> + test/move_pages test/realloc_test
> SOURCES := bitops.c libnuma.c distance.c memhog.c numactl.c numademo.c \
> numamon.c shm.c stream_lib.c stream_main.c syscall.c util.c mt.c \
> clearcache.c test/*.c
> @@ -43,7 +43,7 @@
> all: numactl migratepages migspeed libnuma.so numademo numamon memhog \
> test/tshared stream test/mynode test/pagesize test/ftok test/prefered \
> test/randmap test/nodemap test/distance test/tbitmap test/move_pages \
> - test/mbind_mig_pages test/migrate_pages libnuma.a
> + test/mbind_mig_pages test/migrate_pages test/realloc_test libnuma.a
>
> numactl: numactl.o util.o shm.o bitops.o libnuma.so
>
> @@ -123,6 +123,8 @@
>
> test/migrate_pages: test/migrate_pages.c libnuma.so
>
> +test/realloc_test: test/realloc_test.c libnuma.so
> +
> .PHONY: install all clean html depend
>
> MANPAGES := numa.3 numactl.8 numastat.8 migratepages.8 migspeed.8
> diff -urN numactl-2.0.6-orig/numa.3 numactl-2.0.6/numa.3
> --- numactl-2.0.6-orig/numa.3 2011-01-03 15:09:23.000000000 +0200
> +++ numactl-2.0.6/numa.3 2011-01-10 23:39:02.000000000 +0200
> @@ -87,6 +87,8 @@
> .BI "void *numa_alloc_interleaved_subset(size_t " size ", struct bitmask *" nodemask );
> .BI "void *numa_alloc(size_t " size );
> .br
> +.BI "void *numa_realloc(void *"old_addr ", size_t " old_size ", size_t " new_size );
> +.br
> .BI "void numa_free(void *" start ", size_t " size );
> .sp
> .BI "int numa_run_on_node(int " node );
> @@ -599,6 +601,39 @@
> .BR numa_free ().
> On errors NULL is returned.
>
> +.BR numa_realloc ()
> +changes the size of the memory area pointed to by
> +.I old_addr
> +from
> +.I old_size
> +to
> +.I new_size.
> +The memory area pointed to by
> +.I old_addr
> +must have been allocated with one of the
> +.BR numa_alloc*
> +functions.
> +The
> +.I new_size
> +will be rounded up to a multiple of the system page size. The contents of the
> +memory area will be unchanged to the minimum of the old and new sizes; newly
> +allocated memory will be uninitialized. The memory policy (and node bindings)
> +associated with the original memory area will be preserved in the resized
> +area. For example, if the initial area was allocated with a call to
> +.BR numa_alloc_onnode(),
> +then the new pages (if the area is enlarged) will be allocated on the same node.
> +However, if no memory policy was set for the original area, then
> +.BR numa_realloc ()
> +cannot guarantee that the new pages will be allocated on the same node. On
> +success, the address of the resized area is returned (which might be different
> +from that of the initial area), otherwise NULL is returned and
> +.I errno
> +is set to indicate the error. The pointer returned by
> +.BR numa_realloc ()
> +is suitable for passing to
> +.BR numa_free ().
> +
> +
> .BR numa_free ()
> frees
> .I size
> diff -urN numactl-2.0.6-orig/numa.h numactl-2.0.6/numa.h
> --- numactl-2.0.6-orig/numa.h 2011-01-03 15:09:23.000000000 +0200
> +++ numactl-2.0.6/numa.h 2011-01-11 00:06:12.000000000 +0200
> @@ -212,6 +212,8 @@
> void *numa_alloc_local(size_t size);
> /* Allocation with current policy */
> void *numa_alloc(size_t size);
> +/* Change the size of a memory area preserving the memory policy */
> +void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
> /* Free memory allocated by the functions above */
> void numa_free(void *mem, size_t size);
>
> diff -urN numactl-2.0.6-orig/test/realloc_test.c numactl-2.0.6/test/realloc_test.c
> --- numactl-2.0.6-orig/test/realloc_test.c 1970-01-01 02:00:00.000000000 +0200
> +++ numactl-2.0.6/test/realloc_test.c 2011-01-10 23:55:37.000000000 +0200
> @@ -0,0 +1,108 @@
> +#include <assert.h>
> +#include <errno.h>
> +#include <limits.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <sys/mman.h>
> +#include "numa.h"
> +#include "numaif.h"
> +
> +#define DEFAULT_NR_PAGES 1024
> +
> +static int parse_int(const char *str)
> +{
> + char *endptr;
> + long ret = strtol(str, &endptr, 0);
> + if (*endptr != '\0') {
> + fprintf(stderr, "[error] strtol() failed: parse error: %s\n", endptr);
> + exit(1);
> + }
> +
> + if (errno == ERANGE)
> + fprintf(stderr, "[warning] strtol() out of range\n");
> +
> + if (ret > INT_MAX || ret < INT_MIN) {
> + fprintf(stderr, "[warning] parse_int() out of range\n");
> + ret = (ret > 0) ? INT_MAX : INT_MIN;
> + }
> +
> + return (int) ret;
> +}
> +
> +int main(int argc, char **argv)
> +{
> + char *mem;
> + int page_size = numa_pagesize();
> + int node = 0;
> + int nr_pages = DEFAULT_NR_PAGES;
> +
> + if (numa_available() < 0) {
> + fprintf(stderr, "numa is not available");
> + exit(1);
> + }
> +
> + if (argc > 1)
> + node = parse_int(argv[1]);
> + if (argc > 2)
> + nr_pages = parse_int(argv[2]);
> +
> + mem = numa_alloc_onnode(page_size, node);
> +
> + /* Store the policy of the newly allocated area */
> + unsigned long nodemask;
> + int mode;
> + int nr_nodes = numa_num_possible_nodes();
> + if (get_mempolicy(&mode, &nodemask, nr_nodes, mem,
> + MPOL_F_NODE | MPOL_F_ADDR) < 0) {
> + perror("get_mempolicy() failed");
> + exit(1);
> + }
> +
> + /* Print some info */
> + printf("Page size: %d\n", page_size);
> + printf("Pages realloc'ed: %d\n", nr_pages);
> + printf("Allocate data in node: %d\n", node);
> +
> + int i;
> + int nr_inplace = 0;
> + int nr_moved = 0;
> + for (i = 0; i < nr_pages; i++) {
> + /* Enlarge mem with one more page */
> + char *new_mem = numa_realloc(mem, (i+1)*page_size, (i+2)*page_size);
> + if (!new_mem) {
> + perror("numa_realloc() failed");
> + exit(1);
> + }
> +
> + if (new_mem == mem)
> + ++nr_inplace;
> + else
> + ++nr_moved;
> + mem = new_mem;
> +
> + /* Check the policy of the realloc'ed area */
> + unsigned long realloc_nodemask;
> + int realloc_mode;
> + if (get_mempolicy(&realloc_mode, &realloc_nodemask,
> + nr_nodes, mem, MPOL_F_NODE | MPOL_F_ADDR) < 0) {
> + perror("get_mempolicy() failed");
> + exit(1);
> + }
> +
> + assert(realloc_nodemask == nodemask &&
> + realloc_mode == mode && "policy changed");
> + }
> +
> + /* Shrink to the original size */
> + mem = numa_realloc(mem, (nr_pages + 1)*page_size, page_size);
> + if (!mem) {
> + perror("numa_realloc() failed");
> + exit(1);
> + }
> +
> + numa_free(mem, page_size);
> + printf("In-place reallocs: %d\n", nr_inplace);
> + printf("Moved reallocs: %d\n", nr_moved);
> + return 0;
> +}
> diff -urN numactl-2.0.6-orig/versions.ldscript numactl-2.0.6/versions.ldscript
> --- numactl-2.0.6-orig/versions.ldscript 2011-01-03 15:09:23.000000000 +0200
> +++ numactl-2.0.6/versions.ldscript 2011-01-10 18:36:37.000000000 +0200
> @@ -87,6 +87,7 @@
> numa_alloc_interleaved_subset;
> numa_alloc_local;
> numa_alloc_onnode;
> + numa_realloc;
> numa_allocate_cpumask;
> numa_allocate_nodemask;
> numa_available;
--
Cliff Wickman
SGI
cpw@sgi.com
(651) 683-3824
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2011-01-11 16:29 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-01-02 17:37 realloc function Vasileios Karakasis
2011-01-02 23:42 ` Andi Kleen
2011-01-03 21:56 ` Vasileios Karakasis
2011-01-03 22:44 ` Cliff Wickman
2011-01-04 22:20 ` Andi Kleen
2011-01-05 12:11 ` Vasileios Karakasis
2011-01-05 15:00 ` Vasileios Karakasis
2011-01-05 19:25 ` Andi Kleen
2011-01-10 22:12 ` Vasileios Karakasis
2011-01-10 22:17 ` Andi Kleen
2011-01-11 16:29 ` Cliff Wickman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).