From: Levent Serinol <lserinol@gmail.com>
To: linux-mm <linux-mm@kvack.org>
Cc: marcelo.tosatti@cyclades.com
Subject: [RFC][PATCH] tunable zone watermarks
Date: Mon, 28 Mar 2005 09:43:54 +0300 [thread overview]
Message-ID: <2c1942a70503272243c351eee@mail.gmail.com> (raw)
===========================================================
--- linux-2.6.11.4/include/linux/sysctl.h.org 2005-03-16
02:09:07.000000000 +0200
+++ linux-2.6.11.4/include/linux/sysctl.h 2005-03-27
20:33:17.000000000 +0300
@@ -169,6 +169,7 @@ enum
VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */
VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual
address space layout */
VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
+ VM_ZONE_WATERMARKS=29, /* zone watermarks */
};
--- linux-2.6.11.4/include/linux/mmzone.h.org 2005-03-16
02:09:07.000000000 +0200
+++ linux-2.6.11.4/include/linux/mmzone.h 2005-03-27
20:33:17.000000000 +0300
@@ -27,6 +27,12 @@ struct free_area {
struct pglist_data;
+typedef struct zone_watermarks_vals {
+ unsigned long pages_min;
+ unsigned long pages_low;
+ unsigned long pages_high;
+ }zone_watermarks_vals_t;
+
/*
* zone->lock and zone->lru_lock are two of the hottest locks in the kernel.
* So add a wild amount of padding here to ensure that they fall into separate
@@ -364,6 +370,8 @@ struct ctl_table;
struct file;
int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *,
void __user *, size_t *, loff_t *);
+int zone_watermarks_sysctl_handler(struct ctl_table *, int, struct file *,
+ void __user *, size_t *, loff_t *);
extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
void __user *, size_t *, loff_t *);
--- linux-2.6.11.4/mm/page_alloc.c.org 2005-03-16 02:09:27.000000000 +0200
+++ linux-2.6.11.4/mm/page_alloc.c 2005-03-27 20:33:53.000000000 +0300
@@ -66,7 +66,11 @@ EXPORT_SYMBOL(zone_table);
static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
int min_free_kbytes = 1024;
-
+#ifdef CONFIG_NUMA
+zone_watermarks_vals_t zone_watermarks_sysctl[num_online_nodes() *
MAX_NR_ZONES];
+#else
+zone_watermarks_vals_t zone_watermarks_sysctl[MAX_NUMNODES * MAX_NR_ZONES];
+#endif
unsigned long __initdata nr_kernel_pages;
unsigned long __initdata nr_all_pages;
@@ -1911,6 +1915,24 @@ void __init page_alloc_init(void)
hotcpu_notifier(page_alloc_cpu_notify, 0);
}
+static void setup_zone_watermarks_vals(void)
+{
+ pg_data_t *pgdat;
+ unsigned int j,i;
+
+ j=0;
+ for_each_pgdat(pgdat) {
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ struct zone *zone = pgdat->node_zones + i;
+
+ zone_watermarks_sysctl[j].pages_min = K(zone->pages_min);
+ zone_watermarks_sysctl[j].pages_low = K(zone->pages_low);
+ zone_watermarks_sysctl[j].pages_high = K(zone->pages_high);
+ j++;
+ }
+ }
+}
+
/*
* setup_per_zone_lowmem_reserve - called whenever
* sysctl_lower_zone_reserve_ratio changes. Ensures that each zone
@@ -1990,6 +2012,7 @@ static void setup_per_zone_pages_min(voi
zone->pages_high = (zone->pages_min * 6) / 4;
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
+ setup_zone_watermarks_vals();
}
/*
@@ -2029,6 +2052,7 @@ static int __init init_per_zone_pages_mi
min_free_kbytes = 65536;
setup_per_zone_pages_min();
setup_per_zone_lowmem_reserve();
+ setup_zone_watermarks_vals();
return 0;
}
module_init(init_per_zone_pages_min)
@@ -2046,6 +2070,66 @@ int min_free_kbytes_sysctl_handler(ctl_t
return 0;
}
+int zone_watermarks_sysctl_handler(ctl_table *table, int write,
+ struct file *file, void __user *buffer, size_t
*length, loff_t *ppos)
+{
+ unsigned long flags;
+ unsigned long zone_pages = 0;
+ unsigned long lowmem_pages = 0;
+ pg_data_t *pgdat;
+ unsigned int j,i;
+ int err;
+
+
+ err = proc_dointvec(table, write, file, buffer, length, ppos);
+
+ if ((err >= 0) && write) {
+ j=0;
+ for_each_pgdat(pgdat) {
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ struct zone *zone = pgdat->node_zones + i;
+ if (!is_highmem(zone))
+ lowmem_pages += zone->present_pages;
+ }
+ }
+ for_each_pgdat(pgdat) {
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ struct zone *zone = pgdat->node_zones + i;
+ unsigned long lowmem_min;
+
+ spin_lock_irqsave(&zone->lru_lock, flags);
+ zone_pages =
(zone_watermarks_sysctl[j].pages_min >> (PAGE_SHIFT - 10));
+ if (is_highmem(zone)) {
+ if (zone_pages < SWAP_CLUSTER_MAX)
+ zone_pages = SWAP_CLUSTER_MAX;
+ if (zone_pages >= zone->present_pages)
+ zone_pages =
zone->present_pages;
+ zone->pages_min = zone_pages;
+
+ } else {
+ lowmem_min = (zone_pages *
zone->present_pages) /
+ lowmem_pages;
+ if (lowmem_min > zone_pages)
+ zone_pages = lowmem_min;
+ zone->pages_min = zone_pages;
+ }
+ zone_pages =
(zone_watermarks_sysctl[j].pages_low >> (PAGE_SHIFT - 10));
+ if (zone_pages >= zone->present_pages)
+ zone_pages = zone->present_pages;
+ zone->pages_low = zone_pages;
+ zone_pages =
(zone_watermarks_sysctl[j].pages_high >> (PAGE_SHIFT - 10));
+ if (zone_pages >= zone->present_pages)
+ zone_pages = zone->present_pages;
+ zone->pages_high = zone_pages;
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
+ j++;
+ }
+ }
+ }
+
+ return 0;
+}
+
/*
* lowmem_reserve_ratio_sysctl_handler - just a wrapper around
* proc_dointvec() so that we can call setup_per_zone_lowmem_reserve()
--- linux-2.6.11.4/kernel/sysctl.c.org 2005-03-16 02:09:00.000000000 +0200
+++ linux-2.6.11.4/kernel/sysctl.c 2005-03-27 20:33:17.000000000 +0300
@@ -62,6 +62,11 @@ extern char core_pattern[];
extern int cad_pid;
extern int pid_max;
extern int min_free_kbytes;
+#ifdef CONFIG_NUMA
+extern zone_watermarks_vals_t
zone_watermarks_sysctl[num_online_nodes() * MAX_NR_ZONES];
+#else
+extern zone_watermarks_vals_t zone_watermarks_sysctl[MAX_NUMNODES *
MAX_NR_ZONES];
+#endif
extern int printk_ratelimit_jiffies;
extern int printk_ratelimit_burst;
extern int pid_max_min, pid_max_max;
@@ -825,6 +830,15 @@ static ctl_table vm_table[] = {
.strategy = &sysctl_jiffies,
},
#endif
+ {
+ .ctl_name = VM_ZONE_WATERMARKS,
+ .procname = "zone_watermarks",
+ .data = &zone_watermarks_sysctl,
+ .maxlen = sizeof(zone_watermarks_sysctl),
+ .mode = 0644,
+ .proc_handler = &zone_watermarks_sysctl_handler,
+ .strategy = &sysctl_intvec,
+ },
{ .ctl_name = 0 }
};
===========================================================
--
Stay out of the road, if you want to grow old.
~ Pink Floyd ~.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
next reply other threads:[~2005-03-28 6:43 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-03-28 6:43 Levent Serinol [this message]
2005-03-28 19:30 ` [RFC][PATCH] tunable zone watermarks Martin J. Bligh
2005-03-28 19:51 ` Marcelo Tosatti
2005-03-29 0:45 ` Martin J. Bligh
2005-03-29 2:27 ` Nick Piggin
2005-03-29 9:10 ` Levent Serinol
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2c1942a70503272243c351eee@mail.gmail.com \
--to=lserinol@gmail.com \
--cc=linux-mm@kvack.org \
--cc=marcelo.tosatti@cyclades.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.