* [VM PATCH 2.6.6-rc3-bk5] Dirty balancing in the presence of mapped pages
@ 2004-05-05 0:20 Shantanu Goel
2004-05-05 1:03 ` Andrew Morton
0 siblings, 1 reply; 10+ messages in thread
From: Shantanu Goel @ 2004-05-05 0:20 UTC (permalink / raw)
To: Kernel
[-- Attachment #1: Type: text/plain, Size: 544 bytes --]
Hi,
Presently the kernel does not collection information
about the percentage of memory that processes have
dirtied via mmap until reclamation. Nothing analogous
to balance_dirty_pages() is being done for mmap'ed
pages. The attached patch adds collection of dirty
page information during kswapd() scans and initiation
of background writeback by waking up bdflush.
Thanks,
Shantanu
__________________________________
Do you Yahoo!?
Win a $20,000 Career Makeover at Yahoo! HotJobs
http://hotjobs.sweepstakes.yahoo.com/careermakeover
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: vm-bdflush.patch --]
[-- Type: text/x-patch; name="vm-bdflush.patch", Size: 6487 bytes --]
--- .orig/include/linux/page-flags.h 2004-05-04 20:03:07.000000000 -0400
+++ 2.6.6-rc3-bk5-sg-x86_64/include/linux/page-flags.h 2004-05-03 21:00:54.000000000 -0400
@@ -133,6 +133,7 @@
unsigned long allocstall; /* direct reclaim calls */
unsigned long pgrotated; /* pages rotated to tail of the LRU */
+ unsigned long bdflush; /* bdflush wakeups due to dirty mapped pages */
} ____cacheline_aligned;
DECLARE_PER_CPU(struct page_state, page_states);
--- .orig/include/linux/rmap.h 2004-05-04 20:03:07.000000000 -0400
+++ 2.6.6-rc3-bk5-sg-x86_64/include/linux/rmap.h 2004-05-03 18:41:00.000000000 -0400
@@ -28,6 +28,7 @@
struct pte_chain * fastcall
page_add_rmap(struct page *, pte_t *, struct pte_chain *);
void fastcall page_remove_rmap(struct page *, pte_t *);
+int fastcall page_is_dirty(struct page *);
/*
* Called from mm/vmscan.c to handle paging out
--- .orig/mm/page_alloc.c 2004-05-04 20:03:07.000000000 -0400
+++ 2.6.6-rc3-bk5-sg-x86_64/mm/page_alloc.c 2004-05-03 21:03:57.000000000 -0400
@@ -1681,6 +1681,7 @@
"allocstall",
"pgrotated",
+ "bdflush",
};
static void *vmstat_start(struct seq_file *m, loff_t *pos)
--- .orig/mm/rmap.c 2004-05-04 20:03:07.000000000 -0400
+++ 2.6.6-rc3-bk5-sg-x86_64/mm/rmap.c 2004-05-03 18:44:00.000000000 -0400
@@ -170,6 +170,43 @@
}
/**
+ * page_is_dirty - test if the page was modified
+ * @page: the page to test
+ *
+ * Quick test_and_clear_dirty for all mappings to a page,
+ * returns the number of processes which modified the page.
+ * Caller needs to hold the rmap lock.
+ */
+int fastcall page_is_dirty(struct page * page)
+{
+ struct pte_chain *pc;
+ int dirty = 0;
+
+ if (PageDirect(page)) {
+ pte_t *pte = rmap_ptep_map(page->pte.direct);
+ if (ptep_test_and_clear_dirty(pte))
+ dirty++;
+ rmap_ptep_unmap(pte);
+ } else {
+ /* Check all the page tables mapping this page. */
+ for (pc = page->pte.chain; pc; pc = pte_chain_next(pc)) {
+ int i;
+
+ for (i = pte_chain_idx(pc); i < NRPTE; i++) {
+ pte_addr_t pte_paddr = pc->ptes[i];
+ pte_t *p;
+
+ p = rmap_ptep_map(pte_paddr);
+ if (ptep_test_and_clear_dirty(p))
+ dirty++;
+ rmap_ptep_unmap(p);
+ }
+ }
+ }
+ return dirty;
+}
+
+/**
* page_add_rmap - add reverse mapping entry to a page
* @page: the page to add the mapping to
* @ptep: the page table entry mapping this page
--- .orig/mm/vmscan.c 2004-05-04 20:03:07.000000000 -0400
+++ 2.6.6-rc3-bk5-sg-x86_64/mm/vmscan.c 2004-05-04 18:30:05.000000000 -0400
@@ -242,7 +242,7 @@
*/
static int
shrink_list(struct list_head *page_list, unsigned int gfp_mask,
- int *nr_scanned, int do_writepage)
+ int *nr_scanned, int do_writepage, int *nr_dirty)
{
struct address_space *mapping;
LIST_HEAD(ret_pages);
@@ -277,6 +277,10 @@
referenced = page_referenced(page);
if (referenced && page_mapping_inuse(page)) {
/* In active use or really unfreeable. Activate it. */
+ if (!PageDirty(page) && page_mapped(page) && page_mapping(page) && page_is_dirty(page)) {
+ set_page_dirty(page);
+ (*nr_dirty)++;
+ }
rmap_unlock(page);
goto activate_locked;
}
@@ -472,7 +476,7 @@
*/
static int
shrink_cache(struct zone *zone, unsigned int gfp_mask,
- int max_scan, int *total_scanned, int do_writepage)
+ int max_scan, int *total_scanned, int do_writepage, int *nr_dirty)
{
LIST_HEAD(page_list);
struct pagevec pvec;
@@ -521,7 +525,7 @@
else
mod_page_state_zone(zone, pgscan_direct, nr_scan);
nr_freed = shrink_list(&page_list, gfp_mask,
- total_scanned, do_writepage);
+ total_scanned, do_writepage, nr_dirty);
*total_scanned += nr_taken;
if (current_is_kswapd())
mod_page_state(kswapd_steal, nr_freed);
@@ -576,7 +580,7 @@
*/
static void
refill_inactive_zone(struct zone *zone, const int nr_pages_in,
- struct page_state *ps)
+ struct page_state *ps, int *nr_dirty)
{
int pgmoved;
int pgdeactivate = 0;
@@ -649,12 +653,15 @@
page = lru_to_page(&l_hold);
list_del(&page->lru);
if (page_mapped(page)) {
- if (!reclaim_mapped) {
- list_add(&page->lru, &l_active);
- continue;
- }
rmap_lock(page);
- if (page_referenced(page)) {
+ if (!PageDirty(page) && page_mapping(page) && !TestSetPageLocked(page)) {
+ if (page_is_dirty(page)) {
+ set_page_dirty(page);
+ (*nr_dirty)++;
+ }
+ unlock_page(page);
+ }
+ if (!reclaim_mapped || page_referenced(page)) {
rmap_unlock(page);
list_add(&page->lru, &l_active);
continue;
@@ -734,7 +741,7 @@
*/
static int
shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask,
- int *total_scanned, struct page_state *ps, int do_writepage)
+ int *total_scanned, struct page_state *ps, int do_writepage, int *nr_dirty)
{
unsigned long ratio;
int count;
@@ -756,7 +763,7 @@
count = atomic_read(&zone->nr_scan_active);
if (count >= SWAP_CLUSTER_MAX) {
atomic_set(&zone->nr_scan_active, 0);
- refill_inactive_zone(zone, count, ps);
+ refill_inactive_zone(zone, count, ps, nr_dirty);
}
atomic_add(max_scan, &zone->nr_scan_inactive);
@@ -764,7 +771,7 @@
if (count >= SWAP_CLUSTER_MAX) {
atomic_set(&zone->nr_scan_inactive, 0);
return shrink_cache(zone, gfp_mask, count,
- total_scanned, do_writepage);
+ total_scanned, do_writepage, nr_dirty);
}
return 0;
}
@@ -794,7 +801,7 @@
for (i = 0; zones[i] != NULL; i++) {
struct zone *zone = zones[i];
- int max_scan;
+ int max_scan, nr_dirty = 0;
if (zone->free_pages < zone->pages_high)
zone->temp_priority = priority;
@@ -804,7 +811,7 @@
max_scan = zone->nr_inactive >> priority;
ret += shrink_zone(zone, max_scan, gfp_mask,
- total_scanned, ps, do_writepage);
+ total_scanned, ps, do_writepage, &nr_dirty);
}
return ret;
}
@@ -919,6 +926,7 @@
unsigned long total_scanned = 0;
unsigned long total_reclaimed = 0;
int do_writepage = 0;
+ int nr_dirty = 0;
inc_page_state(pageoutrun);
@@ -980,7 +988,7 @@
zone->temp_priority = priority;
max_scan = zone->nr_inactive >> priority;
reclaimed = shrink_zone(zone, max_scan, GFP_KERNEL,
- &scanned, ps, do_writepage);
+ &scanned, ps, do_writepage, &nr_dirty);
total_scanned += scanned;
reclaim_state->reclaimed_slab = 0;
shrink_slab(scanned, GFP_KERNEL);
@@ -1017,6 +1025,10 @@
zone->prev_priority = zone->temp_priority;
}
+ if (!laptop_mode && nr_dirty >= SWAP_CLUSTER_MAX) {
+ wakeup_bdflush(-1);
+ inc_page_state(bdflush);
+ }
return total_reclaimed;
}
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [VM PATCH 2.6.6-rc3-bk5] Dirty balancing in the presence of mapped pages
2004-05-05 0:20 [VM PATCH 2.6.6-rc3-bk5] Dirty balancing in the presence of mapped pages Shantanu Goel
@ 2004-05-05 1:03 ` Andrew Morton
2004-05-05 2:16 ` Nick Piggin
0 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2004-05-05 1:03 UTC (permalink / raw)
To: Shantanu Goel; +Cc: linux-kernel
Shantanu Goel <sgoel01@yahoo.com> wrote:
>
> Presently the kernel does not collection information
> about the percentage of memory that processes have
> dirtied via mmap until reclamation. Nothing analogous
> to balance_dirty_pages() is being done for mmap'ed
> pages. The attached patch adds collection of dirty
> page information during kswapd() scans and initiation
> of background writeback by waking up bdflush.
And what were the effects of this patch?
Did you consider propagating the pte dirtiness into the pageframe within
page_referenced(), while we're there?
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [VM PATCH 2.6.6-rc3-bk5] Dirty balancing in the presence of mapped pages
2004-05-05 1:03 ` Andrew Morton
@ 2004-05-05 2:16 ` Nick Piggin
2004-05-05 2:57 ` Andrew Morton
0 siblings, 1 reply; 10+ messages in thread
From: Nick Piggin @ 2004-05-05 2:16 UTC (permalink / raw)
To: Andrew Morton; +Cc: Shantanu Goel, linux-kernel
Andrew Morton wrote:
> Shantanu Goel <sgoel01@yahoo.com> wrote:
>
>>Presently the kernel does not collection information
>>about the percentage of memory that processes have
>>dirtied via mmap until reclamation. Nothing analogous
>>to balance_dirty_pages() is being done for mmap'ed
>>pages. The attached patch adds collection of dirty
>>page information during kswapd() scans and initiation
>>of background writeback by waking up bdflush.
>
>
> And what were the effects of this patch?
>
I havea modified patch from Nikita that does the
if (ptep_test_and_clear_dirty) set_page_dirty from
page_referenced, under the page_table_lock.
So it also picks up pages coming off the active list.
It doesn't do the wakeup_bdflush thing, but that sounds
like a good idea. What does wakeup_bdflush(-1) mean?
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [VM PATCH 2.6.6-rc3-bk5] Dirty balancing in the presence of mapped pages
2004-05-05 2:16 ` Nick Piggin
@ 2004-05-05 2:57 ` Andrew Morton
2004-05-05 3:16 ` Nick Piggin
` (2 more replies)
0 siblings, 3 replies; 10+ messages in thread
From: Andrew Morton @ 2004-05-05 2:57 UTC (permalink / raw)
To: Nick Piggin; +Cc: sgoel01, linux-kernel
Nick Piggin <nickpiggin@yahoo.com.au> wrote:
>
> Andrew Morton wrote:
> > Shantanu Goel <sgoel01@yahoo.com> wrote:
> >
> >>Presently the kernel does not collection information
> >>about the percentage of memory that processes have
> >>dirtied via mmap until reclamation. Nothing analogous
> >>to balance_dirty_pages() is being done for mmap'ed
> >>pages. The attached patch adds collection of dirty
> >>page information during kswapd() scans and initiation
> >>of background writeback by waking up bdflush.
> >
> >
> > And what were the effects of this patch?
> >
>
> I havea modified patch from Nikita that does the
> if (ptep_test_and_clear_dirty) set_page_dirty from
> page_referenced, under the page_table_lock.
Dude. I have lots of patches too. The question is: what use are they?
In this case, given that we have an actively mapped MAP_SHARED pagecache
page, marking it dirty will cause it to be written by pdflush. Even though
we're not about to reclaim it, and even though the process which is mapping
the page may well modify it again. This patch will cause additional I/O.
So we need to understand why it was written, and what effects were
observed, with what workload, and all that good stuff.
> It doesn't do the wakeup_bdflush thing, but that sounds
> like a good idea. What does wakeup_bdflush(-1) mean?
It appears that it will cause pdflush to write out down to
dirty_background_ratio.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [VM PATCH 2.6.6-rc3-bk5] Dirty balancing in the presence of mapped pages
2004-05-05 2:57 ` Andrew Morton
@ 2004-05-05 3:16 ` Nick Piggin
2004-05-05 3:24 ` Nick Piggin
2004-05-09 17:24 ` Bill Davidsen
2004-05-05 4:31 ` Shantanu Goel
2004-05-05 16:56 ` Nikita Danilov
2 siblings, 2 replies; 10+ messages in thread
From: Nick Piggin @ 2004-05-05 3:16 UTC (permalink / raw)
To: Andrew Morton; +Cc: sgoel01, linux-kernel
Andrew Morton wrote:
> Nick Piggin <nickpiggin@yahoo.com.au> wrote:
>
>>Andrew Morton wrote:
>>
>>>Shantanu Goel <sgoel01@yahoo.com> wrote:
>>>
>>>
>>>>Presently the kernel does not collection information
>>>>about the percentage of memory that processes have
>>>>dirtied via mmap until reclamation. Nothing analogous
>>>>to balance_dirty_pages() is being done for mmap'ed
>>>>pages. The attached patch adds collection of dirty
>>>>page information during kswapd() scans and initiation
>>>>of background writeback by waking up bdflush.
>>>
>>>
>>>And what were the effects of this patch?
>>>
>>
>>I havea modified patch from Nikita that does the
>>if (ptep_test_and_clear_dirty) set_page_dirty from
>>page_referenced, under the page_table_lock.
>
>
> Dude. I have lots of patches too. The question is: what use are they?
>
> In this case, given that we have an actively mapped MAP_SHARED pagecache
> page, marking it dirty will cause it to be written by pdflush. Even though
> we're not about to reclaim it, and even though the process which is mapping
> the page may well modify it again. This patch will cause additional I/O.
>
> So we need to understand why it was written, and what effects were
> observed, with what workload, and all that good stuff.
>
I guess it is an attempt to do somewhat better at dirty page accounting
for mmap'ed pages. The balance_dirty_pages_ratelimited writeout path
also has the same problem as you describe. Maybe usage patterns means
this is less of an issue here?
But I suppose it wouldn't be nice to change without seeing some
improvement somewhere.
>
>>It doesn't do the wakeup_bdflush thing, but that sounds
>>like a good idea. What does wakeup_bdflush(-1) mean?
>
>
> It appears that it will cause pdflush to write out down to
> dirty_background_ratio.
>
Yeah. So wakeup_bdflush(0) would be more consistent?
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [VM PATCH 2.6.6-rc3-bk5] Dirty balancing in the presence of mapped pages
2004-05-05 3:16 ` Nick Piggin
@ 2004-05-05 3:24 ` Nick Piggin
2004-05-09 17:24 ` Bill Davidsen
1 sibling, 0 replies; 10+ messages in thread
From: Nick Piggin @ 2004-05-05 3:24 UTC (permalink / raw)
To: Nick Piggin; +Cc: Andrew Morton, sgoel01, linux-kernel
Nick Piggin wrote:
> Andrew Morton wrote:
>
>> Nick Piggin <nickpiggin@yahoo.com.au> wrote:
>>
>>> It doesn't do the wakeup_bdflush thing, but that sounds
>>> like a good idea. What does wakeup_bdflush(-1) mean?
>>
>>
>>
>> It appears that it will cause pdflush to write out down to
>> dirty_background_ratio.
>>
>
> Yeah. So wakeup_bdflush(0) would be more consistent?
>
No. Sorry, next time I'll actually read the code :|
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [VM PATCH 2.6.6-rc3-bk5] Dirty balancing in the presence of mapped pages
2004-05-05 2:57 ` Andrew Morton
2004-05-05 3:16 ` Nick Piggin
@ 2004-05-05 4:31 ` Shantanu Goel
2004-05-05 5:25 ` Andrew Morton
2004-05-05 16:56 ` Nikita Danilov
2 siblings, 1 reply; 10+ messages in thread
From: Shantanu Goel @ 2004-05-05 4:31 UTC (permalink / raw)
To: Andrew Morton, Nick Piggin; +Cc: sgoel01, linux-kernel
--- Andrew Morton <akpm@osdl.org> wrote:
> > > And what were the effects of this patch?
Below are some results of an iozone run on
ext3 with ordered data mode. The machine is 2xXeon
with HT and 1.25GB of memory and a 15000rpm SCSI disk.
iozone was run with the following parameters:
iozone -c -B -R -i 0 -r <record length> -s 1944978
The file size is 50% more than the amount of RAM.
2.6.6-rc3-bk5 stock (all KBytes):
record write re-write
4 110752 19143
8 109818 17726
16 112165 17053
32 109824 17096
2.6.6-rc3-bk5 patched (all KBytes):
record write re-write
4 114284 17467
8 117902 17149
16 117835 18742
32 118102 18961
Difference from stock (%):
record write re-write
4 +3.0 -8.7
8 +7.3 -3.2
16 +5.0 +9.9
32 +7.5 +10.9
It seems this patch helps writes a bit but hurts
re-writes for smaller record sizes. My guess is the
larger block size enables this patch to reduce the #
I/O requests. I'll investigate this further and also
run the random write test when I get a chance.
> In this case, given that we have an actively mapped
> MAP_SHARED pagecache
> page, marking it dirty will cause it to be written
> by pdflush. Even though
> we're not about to reclaim it, and even though the
> process which is mapping
> the page may well modify it again. This patch will
> cause additional I/O.
>
True, but is that really very different from normal
file I/O where we actively balance # dirty pages?
Also, the I/O will only happen if the dirty thresholds
are exceeded. It probably makes sense though to skip
SwapCache pages to more closely mimic file I/O
behaviour.
> So we need to understand why it was written, and
> what effects were
> observed, with what workload, and all that good
> stuff.
>
My motivation was the NFS/WRITEPAGE_ACTIVATE
discussion and gobs of mmap'ed sequential writes. If
we can detect dirty pages before they need to be
reclaimed and submit them for writeback, the NFS layer
will be hopefully be able to combine them into bigger
requests thereby reducing # RPCs. This works well in
the file I/O case so I figured it might work equally
well in the mmap case. The results are still pending
though. I posted the patch to get feedback on whether
people see any fundamental flaw in this approach.
> > It doesn't do the wakeup_bdflush thing, but that
> sounds
> > like a good idea. What does wakeup_bdflush(-1)
> mean?
>
> It appears that it will cause pdflush to write out
> down to
> dirty_background_ratio.
Yup, the idea is to mimic the balance_dirty_pages()
behaviour but not to force writes unless required by
the dirty ratios.
Thanks,
Shantanu
__________________________________
Do you Yahoo!?
Win a $20,000 Career Makeover at Yahoo! HotJobs
http://hotjobs.sweepstakes.yahoo.com/careermakeover
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [VM PATCH 2.6.6-rc3-bk5] Dirty balancing in the presence of mapped pages
2004-05-05 4:31 ` Shantanu Goel
@ 2004-05-05 5:25 ` Andrew Morton
0 siblings, 0 replies; 10+ messages in thread
From: Andrew Morton @ 2004-05-05 5:25 UTC (permalink / raw)
To: Shantanu Goel; +Cc: nickpiggin, linux-kernel
Shantanu Goel <sgoel01@yahoo.com> wrote:
>
> > In this case, given that we have an actively mapped
> > MAP_SHARED pagecache
> > page, marking it dirty will cause it to be written
> > by pdflush. Even though
> > we're not about to reclaim it, and even though the
> > process which is mapping
> > the page may well modify it again. This patch will
> > cause additional I/O.
> >
>
> True, but is that really very different from normal
> file I/O where we actively balance # dirty pages?
> Also, the I/O will only happen if the dirty thresholds
> are exceeded. It probably makes sense though to skip
> SwapCache pages to more closely mimic file I/O
> behaviour.
We need to think about why real applications (as opposed to benchmarks) use
MAP_SHARED. I suspect many of them will modify pages again and again and
again. We really want to avoid writing these pages out until the
application has truly finished with them.
I think it is probably the case that pages which were dirtied with write(2)
are much less likely to be redirtied than pages which were dirtied via
MAP_SHARED.
One thing you might like to look at is to give these pages another trip
around the LRU after they have been unmapped from pagetables, and to give
pdflush a poke. Add instrumentation to record how many pages end up
getting written via vmscan's writepage versus via pdflush (use
current_is_pdflush()).
diff -puN mm/vmscan.c~a mm/vmscan.c
--- 25/mm/vmscan.c~a 2004-05-04 22:21:41.613856240 -0700
+++ 25-akpm/mm/vmscan.c 2004-05-04 22:23:16.016504856 -0700
@@ -318,7 +318,9 @@ shrink_list(struct list_head *page_list,
rmap_unlock(page);
goto keep_locked;
case SWAP_SUCCESS:
- ; /* try to free the page below */
+ if (PageDirty(page))
+ goto keep_locked;
+ break;
}
}
rmap_unlock(page);
_
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [VM PATCH 2.6.6-rc3-bk5] Dirty balancing in the presence of mapped pages
2004-05-05 2:57 ` Andrew Morton
2004-05-05 3:16 ` Nick Piggin
2004-05-05 4:31 ` Shantanu Goel
@ 2004-05-05 16:56 ` Nikita Danilov
2 siblings, 0 replies; 10+ messages in thread
From: Nikita Danilov @ 2004-05-05 16:56 UTC (permalink / raw)
To: Andrew Morton; +Cc: Nick Piggin, sgoel01, linux-kernel
Andrew Morton writes:
> Nick Piggin <nickpiggin@yahoo.com.au> wrote:
> >
> > Andrew Morton wrote:
> > > Shantanu Goel <sgoel01@yahoo.com> wrote:
> > >
> > >>Presently the kernel does not collection information
> > >>about the percentage of memory that processes have
> > >>dirtied via mmap until reclamation. Nothing analogous
> > >>to balance_dirty_pages() is being done for mmap'ed
> > >>pages. The attached patch adds collection of dirty
> > >>page information during kswapd() scans and initiation
> > >>of background writeback by waking up bdflush.
> > >
> > >
> > > And what were the effects of this patch?
> > >
> >
> > I havea modified patch from Nikita that does the
> > if (ptep_test_and_clear_dirty) set_page_dirty from
> > page_referenced, under the page_table_lock.
>
> Dude. I have lots of patches too. The question is: what use are they?
Learning patch-scripts? :)
>
> In this case, given that we have an actively mapped MAP_SHARED pagecache
> page, marking it dirty will cause it to be written by pdflush. Even though
> we're not about to reclaim it, and even though the process which is mapping
> the page may well modify it again. This patch will cause additional I/O.
Dirty bit is transferred to the struct page when page is moved to the
inactive list, where pages are not supposedly referenced/dirtied
frequently. Besides, additional IO, if any, will be done through
->writepages() which is much more efficient than single-page pageout
from tail of the inactive list.
>
> So we need to understand why it was written, and what effects were
> observed, with what workload, and all that good stuff.
Another possible scenario where early transfer of dirty bit could be
useful: huge file consisting of single hole is mmapped, and user level
starts dirtying all pages. Current VM thinks happily that all memory is
clean, ->writepages() is not invoked. VM scanning starts, shrink_list()
dirties page-at-a-time
(shrink_list()->try_to_unmap()->set_page_dirty()), and calls
->writepage() that has to insert meta-data for the hole page (extent,
indirect pointer, whatever), and to submit IO. As order of pages at the
inactive list corresponds to the order of page faults (i.e., random),
this will result in the horrible fragmentation.
Nikita.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [VM PATCH 2.6.6-rc3-bk5] Dirty balancing in the presence of mapped pages
2004-05-05 3:16 ` Nick Piggin
2004-05-05 3:24 ` Nick Piggin
@ 2004-05-09 17:24 ` Bill Davidsen
1 sibling, 0 replies; 10+ messages in thread
From: Bill Davidsen @ 2004-05-09 17:24 UTC (permalink / raw)
To: linux-kernel
Nick Piggin wrote:
>> So we need to understand why it was written, and what effects were
>> observed, with what workload, and all that good stuff.
>>
>
> I guess it is an attempt to do somewhat better at dirty page accounting
> for mmap'ed pages. The balance_dirty_pages_ratelimited writeout path
> also has the same problem as you describe. Maybe usage patterns means
> this is less of an issue here?
>
> But I suppose it wouldn't be nice to change without seeing some
> improvement somewhere.
Lots of issues here, writing in random blocks can lead to fragmentation
if the data is newly allocated, but won't change fragmenting if the page
mapped is alread allocated on the disk.
Is it practical or desirable to be writing mapped pages of allready
allocated files back more readily, since it avoid all the allocation
issues? But you still need to limit dirty pages, so at some point it
will be necessary to do the allocation, preferably in an optimal way.
>
>>
>>> It doesn't do the wakeup_bdflush thing, but that sounds
>>> like a good idea. What does wakeup_bdflush(-1) mean?
>>
>>
>>
>> It appears that it will cause pdflush to write out down to
>> dirty_background_ratio.
>>
>
> Yeah. So wakeup_bdflush(0) would be more consistent?
--
bill davidsen <davidsen@tmr.com>
CTO TMR Associates, Inc
Doing interesting things with small computers since 1979
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2004-05-09 17:18 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-05-05 0:20 [VM PATCH 2.6.6-rc3-bk5] Dirty balancing in the presence of mapped pages Shantanu Goel
2004-05-05 1:03 ` Andrew Morton
2004-05-05 2:16 ` Nick Piggin
2004-05-05 2:57 ` Andrew Morton
2004-05-05 3:16 ` Nick Piggin
2004-05-05 3:24 ` Nick Piggin
2004-05-09 17:24 ` Bill Davidsen
2004-05-05 4:31 ` Shantanu Goel
2004-05-05 5:25 ` Andrew Morton
2004-05-05 16:56 ` Nikita Danilov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox