From: "Yinghai Lu" <yhlu.kernel@gmail.com>
To: Andi Kleen <andi@firstfloor.org>,
Andrew Morton <akpm@linux-foundation.org>,
Ingo Molnar <mingo@elte.hu>, Christoph Lameter <clameter@sgi.com>
Cc: linux-kernel@vger.kernel.org, pj@sgi.com, linux-mm@kvack.org,
nickpiggin@yahoo.com.au
Subject: Re: [PATCH] [11/18] Fix alignment bug in bootmem allocator
Date: Mon, 17 Mar 2008 14:27:21 -0700 [thread overview]
Message-ID: <86802c440803171427y7c9b2a54nacb0603916713033@mail.gmail.com> (raw)
In-Reply-To: <86802c440803171152y379560dfp1296aefb0b86b54b@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 1159 bytes --]
On Mon, Mar 17, 2008 at 11:52 AM, Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> On Mon, Mar 17, 2008 at 1:56 AM, Andi Kleen <andi@firstfloor.org> wrote:
> > > only happen when align is large than alignment of node_boot_start.
> >
> > Here's an updated version of the patch with this addressed.
> > Please review. The patch is somewhat more complicated, but
> > actually makes the code a little cleaner now.
> >
> > -Andi
> >
> >
> > Fix alignment bug in bootmem allocator
> >
> >
> > Without this fix bootmem can return unaligned addresses when the start of a
> > node is not aligned to the align value. Needed for reliably allocating
> > gigabyte pages.
> >
> > I removed the offset variable because all tests should align themself correctly
> > now. Slight drawback might be that the bootmem allocator will spend
> > some more time skipping bits in the bitmap initially, but that shouldn't
> > be a big issue.
> >
> >
> > Signed-off-by: Andi Kleen <ak@suse.de>
> >
> how about create local node_boot_start and node_bootmem_map that make
> sure node_boot_start has bigger alignment than align input.
please check it
YH
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: offset_alloc_bootmem_v2.patch --]
[-- Type: text/x-patch; name=offset_alloc_bootmem_v2.patch, Size: 4834 bytes --]
[PATCH] mm: offset align in alloc_bootmem v2
need offset alignment when node_boot_start's alignment is less than
align required
v2: use local node_boot_start to match align.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -206,9 +206,11 @@ void * __init
__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
unsigned long align, unsigned long goal, unsigned long limit)
{
- unsigned long offset, remaining_size, areasize, preferred;
+ unsigned long areasize, preferred;
unsigned long i, start = 0, incr, eidx, end_pfn;
void *ret;
+ unsigned long node_boot_start;
+ void *node_bootmem_map;
if (!size) {
printk("__alloc_bootmem_core(): zero-sized request\n");
@@ -216,23 +218,29 @@ __alloc_bootmem_core(struct bootmem_data
}
BUG_ON(align & (align-1));
- if (limit && bdata->node_boot_start >= limit)
- return NULL;
-
/* on nodes without memory - bootmem_map is NULL */
if (!bdata->node_bootmem_map)
return NULL;
+ /* bdata->node_boot_start is supposed to be (12+6)bits alignment ? */
+ if (align && ffs(align) > ffs(bdata->node_boot_start)) {
+ node_boot_start = ALIGN(bdata->node_boot_start, align);
+ node_bootmem_map = (unsigned char *)bdata->node_bootmem_map +
+ PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG;
+ } else {
+ node_boot_start = bdata->node_boot_start;
+ node_bootmem_map = bdata->node_bootmem_map;
+ }
+
+ if (limit && node_boot_start >= limit)
+ return NULL;
+
end_pfn = bdata->node_low_pfn;
limit = PFN_DOWN(limit);
if (limit && end_pfn > limit)
end_pfn = limit;
- eidx = end_pfn - PFN_DOWN(bdata->node_boot_start);
- offset = 0;
- if (align && (bdata->node_boot_start & (align - 1UL)) != 0)
- offset = align - (bdata->node_boot_start & (align - 1UL));
- offset = PFN_DOWN(offset);
+ eidx = end_pfn - PFN_DOWN(node_boot_start);
/*
* We try to allocate bootmem pages above 'goal'
@@ -240,15 +248,15 @@ __alloc_bootmem_core(struct bootmem_data
*/
preferred = 0;
if (goal && PFN_DOWN(goal) < end_pfn) {
- if (goal > bdata->node_boot_start)
- preferred = goal - bdata->node_boot_start;
+ if (goal > node_boot_start)
+ preferred = goal - node_boot_start;
if (bdata->last_success >= preferred)
if (!limit || (limit && limit > bdata->last_success))
preferred = bdata->last_success;
}
- preferred = PFN_DOWN(ALIGN(preferred, align)) + offset;
+ preferred = PFN_DOWN(ALIGN(preferred, align));
areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
incr = align >> PAGE_SHIFT ? : 1;
@@ -256,18 +264,18 @@ restart_scan:
for (i = preferred; i < eidx;) {
unsigned long j;
- i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
+ i = find_next_zero_bit(node_bootmem_map, eidx, i);
i = ALIGN(i, incr);
if (i >= eidx)
break;
- if (test_bit(i, bdata->node_bootmem_map)) {
+ if (test_bit(i, node_bootmem_map)) {
i += incr;
continue;
}
for (j = i + 1; j < i + areasize; ++j) {
if (j >= eidx)
goto fail_block;
- if (test_bit(j, bdata->node_bootmem_map))
+ if (test_bit(j, node_bootmem_map))
goto fail_block;
}
start = i;
@@ -278,8 +286,8 @@ restart_scan:
i += incr;
}
- if (preferred > offset) {
- preferred = offset;
+ if (preferred > 0) {
+ preferred = 0;
goto restart_scan;
}
return NULL;
@@ -295,6 +303,7 @@ found:
*/
if (align < PAGE_SIZE &&
bdata->last_offset && bdata->last_pos+1 == start) {
+ unsigned long offset, remaining_size;
offset = ALIGN(bdata->last_offset, align);
BUG_ON(offset > PAGE_SIZE);
remaining_size = PAGE_SIZE - offset;
@@ -303,14 +312,12 @@ found:
/* last_pos unchanged */
bdata->last_offset = offset + size;
ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
- offset +
- bdata->node_boot_start);
+ offset + node_boot_start);
} else {
remaining_size = size - remaining_size;
areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
- offset +
- bdata->node_boot_start);
+ offset + node_boot_start);
bdata->last_pos = start + areasize - 1;
bdata->last_offset = remaining_size;
}
@@ -318,14 +325,14 @@ found:
} else {
bdata->last_pos = start + areasize - 1;
bdata->last_offset = size & ~PAGE_MASK;
- ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
+ ret = phys_to_virt(start * PAGE_SIZE + node_boot_start);
}
/*
* Reserve the area now:
*/
for (i = start; i < start + areasize; i++)
- if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
+ if (unlikely(test_and_set_bit(i, node_bootmem_map)))
BUG();
memset(ret, 0, size);
return ret;
next prev parent reply other threads:[~2008-03-17 21:27 UTC|newest]
Thread overview: 76+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-03-17 1:58 [PATCH] [0/18] GB pages hugetlb support Andi Kleen
2008-03-17 1:58 ` [PATCH] [1/18] Convert hugeltlb.c over to pass global state around in a structure Andi Kleen
2008-03-17 20:15 ` Adam Litke
2008-03-18 12:05 ` Mel Gorman
2008-03-17 1:58 ` [PATCH] [2/18] Add basic support for more than one hstate in hugetlbfs Andi Kleen
2008-03-17 20:22 ` Adam Litke
2008-03-17 20:44 ` Andi Kleen
2008-03-18 12:23 ` Mel Gorman
2008-03-23 10:38 ` KOSAKI Motohiro
2008-03-23 11:28 ` Andi Kleen
2008-03-23 11:30 ` KOSAKI Motohiro
2008-03-17 1:58 ` [PATCH] [3/18] Convert /proc output code over to report multiple hstates Andi Kleen
2008-03-18 12:28 ` Mel Gorman
2008-03-17 1:58 ` [PATCH] [4/18] Add basic support for more than one hstate in hugetlbfs Andi Kleen
2008-03-17 8:09 ` Paul Jackson
2008-03-17 8:15 ` Andi Kleen
2008-03-17 20:28 ` Adam Litke
2008-03-18 14:11 ` Mel Gorman
2008-03-17 1:58 ` [PATCH] [5/18] Expand the hugetlbfs sysctls to handle arrays for all hstates Andi Kleen
2008-03-18 14:34 ` Mel Gorman
2008-03-18 16:49 ` Andi Kleen
2008-03-18 17:01 ` Mel Gorman
2008-03-17 1:58 ` [PATCH] [6/18] Add support to have individual hstates for each hugetlbfs mount Andi Kleen
2008-03-18 14:10 ` Adam Litke
2008-03-18 15:02 ` Mel Gorman
2008-03-17 1:58 ` [PATCH] [7/18] Abstract out the NUMA node round robin code into a separate function Andi Kleen
2008-03-18 15:42 ` Mel Gorman
2008-03-18 15:47 ` Andi Kleen
2008-03-18 16:04 ` Mel Gorman
2008-03-17 1:58 ` [PATCH] [8/18] Add a __alloc_bootmem_node_nopanic Andi Kleen
2008-03-18 15:54 ` Mel Gorman
2008-03-17 1:58 ` [PATCH] [9/18] Export prep_compound_page to the hugetlb allocator Andi Kleen
2008-03-17 1:58 ` [PATCH] [10/18] Factor out new huge page preparation code into separate function Andi Kleen
2008-03-17 20:31 ` Adam Litke
2008-03-18 16:02 ` Mel Gorman
2008-03-17 1:58 ` [PATCH] [11/18] Fix alignment bug in bootmem allocator Andi Kleen
2008-03-17 2:19 ` Yinghai Lu
2008-03-17 7:02 ` Andi Kleen
2008-03-17 7:17 ` Yinghai Lu
2008-03-17 7:31 ` Yinghai Lu
2008-03-17 7:41 ` Andi Kleen
2008-03-17 7:53 ` Yinghai Lu
2008-03-17 8:10 ` Yinghai Lu
2008-03-17 8:17 ` Andi Kleen
2008-03-17 8:56 ` Andi Kleen
2008-03-17 18:52 ` Yinghai Lu
2008-03-17 21:27 ` Yinghai Lu [this message]
2008-03-18 2:06 ` Yinghai Lu
2008-03-18 16:18 ` Mel Gorman
2008-03-17 1:58 ` [PATCH] [12/18] Add support to allocate hugetlb pages that are larger than MAX_ORDER Andi Kleen
2008-03-18 16:27 ` Mel Gorman
2008-04-09 16:05 ` Andrew Hastings
2008-04-09 17:56 ` Andi Kleen
2008-03-17 1:58 ` [PATCH] [13/18] Add support to allocate hugepages of different size with hugepages= Andi Kleen
2008-03-18 16:32 ` Mel Gorman
2008-03-18 16:45 ` Andi Kleen
2008-03-18 16:46 ` Mel Gorman
2008-03-17 1:58 ` [PATCH] [14/18] Clean up hugetlb boot time printk Andi Kleen
2008-03-18 16:37 ` Mel Gorman
2008-03-17 1:58 ` [PATCH] [15/18] Add support to x86-64 to allocate and lookup GB pages in hugetlb Andi Kleen
2008-03-17 1:58 ` [PATCH] [16/18] Add huge pud support to hugetlbfs Andi Kleen
2008-03-17 1:58 ` [PATCH] [17/18] Add huge pud support to mm/memory.c Andi Kleen
2008-03-17 1:58 ` [PATCH] [18/18] Implement hugepagesz= option for x86-64 Andi Kleen
2008-03-17 9:29 ` Paul Jackson
2008-03-17 9:59 ` Andi Kleen
2008-03-17 10:02 ` Paul Jackson
2008-03-17 3:11 ` [PATCH] [0/18] GB pages hugetlb support Paul Jackson
2008-03-17 7:00 ` Andi Kleen
2008-03-17 7:00 ` Paul Jackson
2008-03-17 7:29 ` Andi Kleen
2008-03-17 5:35 ` Paul Jackson
2008-03-17 6:58 ` Andi Kleen
2008-03-17 9:26 ` Paul Jackson
2008-03-17 15:05 ` Adam Litke
2008-03-17 15:33 ` Andi Kleen
2008-03-17 15:59 ` Adam Litke
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=86802c440803171427y7c9b2a54nacb0603916713033@mail.gmail.com \
--to=yhlu.kernel@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=andi@firstfloor.org \
--cc=clameter@sgi.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mingo@elte.hu \
--cc=nickpiggin@yahoo.com.au \
--cc=pj@sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).