All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	netdev@vger.kernel.org, trond.myklebust@fys.uio.no
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 10/28] mm: memory reserve management
Date: Wed, 20 Feb 2008 15:46:20 +0100	[thread overview]
Message-ID: <20080220150306.548965000@chello.nl> (raw)
In-Reply-To: 20080220144610.548202000@chello.nl

[-- Attachment #1: mm-reserve.patch --]
[-- Type: text/plain, Size: 14372 bytes --]

Generic reserve management code. 

It provides methods to reserve and charge. Upon this, generic alloc/free style
reserve pools could be build, which could fully replace mempool_t
functionality.

It should also allow for a Banker's algorithm replacement of __GFP_NOFAIL.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/reserve.h |   54 ++++++
 mm/Makefile             |    2 
 mm/reserve.c            |  429 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 484 insertions(+), 1 deletion(-)

Index: linux-2.6/include/linux/reserve.h
===================================================================
--- /dev/null
+++ linux-2.6/include/linux/reserve.h
@@ -0,0 +1,54 @@
+/*
+ * Memory reserve management.
+ *
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * This file contains the public data structure and API definitions.
+ */
+
+#ifndef _LINUX_RESERVE_H
+#define _LINUX_RESERVE_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct mem_reserve {
+	struct mem_reserve *parent;
+	struct list_head children;
+	struct list_head siblings;
+
+	const char *name;
+
+	long pages;
+	long limit;
+	long usage;
+	spinlock_t lock;	/* protects limit and usage */
+};
+
+extern struct mem_reserve mem_reserve_root;
+
+void mem_reserve_init(struct mem_reserve *res, const char *name,
+		      struct mem_reserve *parent);
+int mem_reserve_connect(struct mem_reserve *new_child,
+			struct mem_reserve *node);
+int mem_reserve_disconnect(struct mem_reserve *node);
+
+int mem_reserve_pages_set(struct mem_reserve *res, long pages);
+int mem_reserve_pages_add(struct mem_reserve *res, long pages);
+int mem_reserve_pages_charge(struct mem_reserve *res, long pages,
+			     int overcommit);
+
+int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes);
+int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes,
+			       int overcommit);
+
+struct kmem_cache;
+
+int mem_reserve_kmem_cache_set(struct mem_reserve *res,
+			       struct kmem_cache *s,
+			       int objects);
+int mem_reserve_kmem_cache_charge(struct mem_reserve *res,
+				  long objs,
+				  int overcommit);
+
+#endif /* _LINUX_RESERVE_H */
Index: linux-2.6/mm/Makefile
===================================================================
--- linux-2.6.orig/mm/Makefile
+++ linux-2.6/mm/Makefile
@@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o
 			   page_alloc.o page-writeback.o pdflush.o \
 			   readahead.o swap.o truncate.o vmscan.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o $(mmu-y)
+			   page_isolation.o reserve.o $(mmu-y)
 
 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
 obj-$(CONFIG_BOUNCE)	+= bounce.o
Index: linux-2.6/mm/reserve.c
===================================================================
--- /dev/null
+++ linux-2.6/mm/reserve.c
@@ -0,0 +1,429 @@
+/*
+ * Memory reserve management.
+ *
+ *  Copyright (C) 2007, Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Description:
+ *
+ * Manage a set of memory reserves.
+ *
+ * A memory reserve is a reserve for a specified number of object of specified
+ * size. Since memory is managed in pages, this reserve demand is then
+ * translated into a page unit.
+ *
+ * So each reserve has a specified object limit, an object usage count and a
+ * number of pages required to back these objects.
+ *
+ * Usage is charged against a reserve, if the charge fails, the resource must
+ * not be allocated/used.
+ *
+ * The reserves are managed in a tree, and the resource demands (pages and
+ * limit) are propagated up the tree. Obviously the object limit will be
+ * meaningless as soon as the unit starts mixing, but the required page reserve
+ * (being of one unit) is still valid at the root.
+ *
+ * It is the page demand of the root node that is used to set the global
+ * reserve (adjust_memalloc_reserve() which sets zone->pages_emerg).
+ *
+ * As long as a subtree has the same usage unit, an aggregate node can be used
+ * to charge against, instead of the leaf nodes. However, do be consistent with
+ * who is charged, resource usage is not propagated up the tree (for
+ * performance reasons).
+ */
+
+#include <linux/reserve.h>
+#include <linux/mutex.h>
+#include <linux/mmzone.h>
+#include <linux/log2.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+static DEFINE_MUTEX(mem_reserve_mutex);
+
+/**
+ * @mem_reserve_root - the global reserve root
+ *
+ * The global reserve is empty, and has no limit unit, it merely
+ * acts as an aggregation point for reserves and an interface to
+ * adjust_memalloc_reserve().
+ */
+struct mem_reserve mem_reserve_root = {
+	.children = LIST_HEAD_INIT(mem_reserve_root.children),
+	.siblings = LIST_HEAD_INIT(mem_reserve_root.siblings),
+	.name = "total reserve",
+	.lock = __SPIN_LOCK_UNLOCKED(mem_reserve_root.lock),
+};
+EXPORT_SYMBOL_GPL(mem_reserve_root);
+
+/**
+ * mem_reserve_init - initialize a memory reserve object
+ * @res - the new reserve object
+ * @name - a name for this reserve
+ */
+void mem_reserve_init(struct mem_reserve *res, const char *name,
+		      struct mem_reserve *parent)
+{
+	memset(res, 0, sizeof(*res));
+	INIT_LIST_HEAD(&res->children);
+	INIT_LIST_HEAD(&res->siblings);
+	res->name = name;
+	spin_lock_init(&res->lock);
+
+	if (parent)
+		mem_reserve_connect(res, parent);
+}
+EXPORT_SYMBOL_GPL(mem_reserve_init);
+
+/*
+ * propagate the pages and limit changes up the tree.
+ */
+static void __calc_reserve(struct mem_reserve *res, long pages, long limit)
+{
+	unsigned long flags;
+
+	for ( ; res; res = res->parent) {
+		res->pages += pages;
+
+		if (limit) {
+			spin_lock_irqsave(&res->lock, flags);
+			res->limit += limit;
+			spin_unlock_irqrestore(&res->lock, flags);
+		}
+	}
+}
+
+/**
+ * __mem_reserve_add - primitive to change the size of a reserve
+ * @res - reserve to change
+ * @pages - page delta
+ * @limit - usage limit delta
+ *
+ * Returns -ENOMEM when a size increase is not possible atm.
+ */
+static int __mem_reserve_add(struct mem_reserve *res, long pages, long limit)
+{
+	int ret = 0;
+	long reserve;
+
+	reserve = mem_reserve_root.pages;
+	__calc_reserve(res, pages, 0);
+	reserve = mem_reserve_root.pages - reserve;
+
+	if (reserve) {
+		ret = adjust_memalloc_reserve(reserve);
+		if (ret)
+			__calc_reserve(res, -pages, 0);
+	}
+
+	if (!ret)
+		__calc_reserve(res, 0, limit);
+
+	return ret;
+}
+
+/**
+ * __mem_reserve_charge - primitive to charge object usage to a reserve
+ * @res - reserve to charge
+ * @charge - size of the charge
+ * @overcommit - allow despite of limit (use with caution!)
+ *
+ * Returns non-zero on success, zero on failure.
+ */
+static
+int __mem_reserve_charge(struct mem_reserve *res, long charge, int overcommit)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&res->lock, flags);
+	if (charge < 0 || res->usage + charge < res->limit || overcommit) {
+		res->usage += charge;
+		if (unlikely(res->usage < 0))
+			res->usage = 0;
+		ret = 1;
+	}
+	spin_unlock_irqrestore(&res->lock, flags);
+
+	return ret;
+}
+
+/**
+ * mem_reserve_connect - connect a reserve to another in a child-parent relation
+ * @new_child - the reserve node to connect (child)
+ * @node - the reserve node to connect to (parent)
+ *
+ * Returns -ENOMEM when the new connection would increase the reserve (parent
+ * is connected to mem_reserve_root) and there is no memory to do so.
+ *
+ * The child is _NOT_ connected on error.
+ */
+int mem_reserve_connect(struct mem_reserve *new_child, struct mem_reserve *node)
+{
+	int ret;
+
+	WARN_ON(!new_child->name);
+
+	mutex_lock(&mem_reserve_mutex);
+	new_child->parent = node;
+	list_add(&new_child->siblings, &node->children);
+	ret = __mem_reserve_add(node, new_child->pages, new_child->limit);
+	if (ret) {
+		new_child->parent = NULL;
+		list_del_init(&new_child->siblings);
+	}
+	mutex_unlock(&mem_reserve_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mem_reserve_connect);
+
+/**
+ * mem_reserve_disconnect - sever a nodes connection to the reserve tree
+ * @node - the node to disconnect
+ *
+ * Could, in theory, return -ENOMEM, but since disconnecting a node _should_
+ * only decrease the reserves that _should_ not happen.
+ */
+int mem_reserve_disconnect(struct mem_reserve *node)
+{
+	int ret;
+
+	BUG_ON(!node->parent);
+
+	mutex_lock(&mem_reserve_mutex);
+	ret = __mem_reserve_add(node->parent, -node->pages, -node->limit);
+	if (!ret) {
+		node->parent = NULL;
+		list_del_init(&node->siblings);
+	}
+	mutex_unlock(&mem_reserve_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mem_reserve_disconnect);
+
+#ifdef CONFIG_PROC_FS
+
+/*
+ * Simple output of the reserve tree in: /proc/reserve_info
+ * Example:
+ *
+ * localhost ~ # cat /proc/reserve_info
+ * total reserve                  8156K (0/544817)
+ *   total network reserve          8156K (0/544817)
+ *     network TX reserve             196K (0/49)
+ *       protocol TX pages              196K (0/49)
+ *     network RX reserve             7960K (0/544768)
+ *       IPv6 route cache               1372K (0/4096)
+ *       IPv4 route cache               5468K (0/16384)
+ *       SKB data reserve               1120K (0/524288)
+ *         IPv6 fragment cache            560K (0/262144)
+ *         IPv4 fragment cache            560K (0/262144)
+ */
+
+static void mem_reserve_show_item(struct seq_file *m, struct mem_reserve *res,
+				  int nesting)
+{
+	int i;
+	struct mem_reserve *child;
+
+	for (i = 0; i < nesting; i++)
+		seq_puts(m, "  ");
+
+	seq_printf(m, "%-30s %ldK (%ld/%ld)\n",
+		   res->name, res->pages << (PAGE_SHIFT - 10),
+		   res->usage, res->limit);
+
+	list_for_each_entry(child, &res->children, siblings)
+		mem_reserve_show_item(m, child, nesting+1);
+}
+
+static int mem_reserve_show(struct seq_file *m, void *v)
+{
+	mutex_lock(&mem_reserve_mutex);
+	mem_reserve_show_item(m, &mem_reserve_root, 0);
+	mutex_unlock(&mem_reserve_mutex);
+
+	return 0;
+}
+
+static int mem_reserve_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mem_reserve_show, NULL);
+}
+
+static const struct file_operations mem_reserve_opterations = {
+	.open = mem_reserve_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static __init int mem_reserve_proc_init(void)
+{
+	struct proc_dir_entry *entry;
+
+	entry = create_proc_entry("reserve_info", S_IRUSR, NULL);
+	if (entry)
+		entry->proc_fops = &mem_reserve_opterations;
+
+	return 0;
+}
+
+__initcall(mem_reserve_proc_init);
+
+#endif
+
+/*
+ * alloc_page helpers
+ */
+
+/**
+ * mem_reserve_pages_set - set reserves size in pages
+ * @res - reserve to set
+ * @pages - size in pages to set it to
+ *
+ * Returns -ENOMEM when it fails to set the reserve. On failure the old size
+ * is preserved.
+ */
+int mem_reserve_pages_set(struct mem_reserve *res, long pages)
+{
+	int ret;
+
+	mutex_lock(&mem_reserve_mutex);
+	pages -= res->pages;
+	ret = __mem_reserve_add(res, pages, pages);
+	mutex_unlock(&mem_reserve_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mem_reserve_pages_set);
+
+/**
+ * mem_reserve_pages_add - change the size in a relative way
+ * @res - reserve to change
+ * @pages - number of pages to add (or subtract when negative)
+ *
+ * Similar to mem_reserve_pages_set, except that the argument is relative
+ * instead of absolute.
+ *
+ * Returns -ENOMEM when it fails to increase.
+ */
+int mem_reserve_pages_add(struct mem_reserve *res, long pages)
+{
+	int ret;
+
+	mutex_lock(&mem_reserve_mutex);
+	ret = __mem_reserve_add(res, pages, pages);
+	mutex_unlock(&mem_reserve_mutex);
+
+	return ret;
+}
+
+/**
+ * mem_reserve_pages_charge - charge page usage to a reserve
+ * @res - reserve to charge
+ * @pages - size to charge
+ * @overcommit - disregard the usage limit (use with caution!)
+ *
+ * Returns non-zero on success.
+ */
+int mem_reserve_pages_charge(struct mem_reserve *res,
+			     long pages, int overcommit)
+{
+	return __mem_reserve_charge(res, pages, overcommit);
+}
+EXPORT_SYMBOL_GPL(mem_reserve_pages_charge);
+
+/*
+ * kmalloc helpers
+ */
+
+/**
+ * mem_reserve_kmalloc_set - set this reserve to bytes worth of kmalloc
+ * @res - reserve to change
+ * @bytes - size in bytes to reserve
+ *
+ * Returns -ENOMEM on failure.
+ */
+int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes)
+{
+	int ret;
+	long pages;
+
+	mutex_lock(&mem_reserve_mutex);
+	pages = kestimate(GFP_ATOMIC, bytes);
+	pages -= res->pages;
+	bytes -= res->limit;
+	ret = __mem_reserve_add(res, pages, bytes);
+	mutex_unlock(&mem_reserve_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_set);
+
+/**
+ * mem_reserve_kmalloc_charge - charge bytes to a reserve
+ * @res - reserve to charge
+ * @bytes - bytes to charge
+ * @overcommit - disregard the usage limit (use with caution!)
+ *
+ * Returns non-zero on success.
+ */
+int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes,
+			       int overcommit)
+{
+	if (bytes < 0)
+		bytes = -roundup_pow_of_two(-bytes);
+	else
+		bytes = roundup_pow_of_two(bytes);
+
+	return __mem_reserve_charge(res, bytes, overcommit);
+}
+EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_charge);
+
+/*
+ * kmem_cache helpers
+ */
+
+/**
+ * mem_reserve_kmem_cache_set - set reserve to @objects worth of kmem_cache_alloc of @s
+ * @res - reserve to set
+ * @s - kmem_cache to reserve from
+ * @objects - number of objects to reserve
+ *
+ * Returns -ENOMEM on failure.
+ */
+int mem_reserve_kmem_cache_set(struct mem_reserve *res, struct kmem_cache *s,
+			       int objects)
+{
+	int ret;
+	long pages;
+
+	mutex_lock(&mem_reserve_mutex);
+	pages = kmem_estimate_pages(s, GFP_ATOMIC, objects);
+	pages -= res->pages;
+	objects -= res->limit;
+	ret = __mem_reserve_add(res, pages, objects);
+	mutex_unlock(&mem_reserve_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_set);
+
+/**
+ * mem_reserve_kmem_cache_charge - charge (or uncharge) usage of objs
+ * @res - reserve to charge
+ * @objs - objects to charge for
+ * @overcommit - disregard the usage limit (use with caution!)
+ *
+ * Returns non-zero on success.
+ */
+int mem_reserve_kmem_cache_charge(struct mem_reserve *res, long objs,
+				  int overcommit)
+{
+	return __mem_reserve_charge(res, objs, overcommit);
+}
+EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_charge);

--


WARNING: multiple messages have this Message-ID (diff)
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	netdev@vger.kernel.org, trond.myklebust@fys.uio.no
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 10/28] mm: memory reserve management
Date: Wed, 20 Feb 2008 15:46:20 +0100	[thread overview]
Message-ID: <20080220150306.548965000@chello.nl> (raw)
In-Reply-To: 20080220144610.548202000@chello.nl

[-- Attachment #1: mm-reserve.patch --]
[-- Type: text/plain, Size: 14597 bytes --]

Generic reserve management code. 

It provides methods to reserve and charge. Upon this, generic alloc/free style
reserve pools could be build, which could fully replace mempool_t
functionality.

It should also allow for a Banker's algorithm replacement of __GFP_NOFAIL.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/reserve.h |   54 ++++++
 mm/Makefile             |    2 
 mm/reserve.c            |  429 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 484 insertions(+), 1 deletion(-)

Index: linux-2.6/include/linux/reserve.h
===================================================================
--- /dev/null
+++ linux-2.6/include/linux/reserve.h
@@ -0,0 +1,54 @@
+/*
+ * Memory reserve management.
+ *
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * This file contains the public data structure and API definitions.
+ */
+
+#ifndef _LINUX_RESERVE_H
+#define _LINUX_RESERVE_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct mem_reserve {
+	struct mem_reserve *parent;
+	struct list_head children;
+	struct list_head siblings;
+
+	const char *name;
+
+	long pages;
+	long limit;
+	long usage;
+	spinlock_t lock;	/* protects limit and usage */
+};
+
+extern struct mem_reserve mem_reserve_root;
+
+void mem_reserve_init(struct mem_reserve *res, const char *name,
+		      struct mem_reserve *parent);
+int mem_reserve_connect(struct mem_reserve *new_child,
+			struct mem_reserve *node);
+int mem_reserve_disconnect(struct mem_reserve *node);
+
+int mem_reserve_pages_set(struct mem_reserve *res, long pages);
+int mem_reserve_pages_add(struct mem_reserve *res, long pages);
+int mem_reserve_pages_charge(struct mem_reserve *res, long pages,
+			     int overcommit);
+
+int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes);
+int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes,
+			       int overcommit);
+
+struct kmem_cache;
+
+int mem_reserve_kmem_cache_set(struct mem_reserve *res,
+			       struct kmem_cache *s,
+			       int objects);
+int mem_reserve_kmem_cache_charge(struct mem_reserve *res,
+				  long objs,
+				  int overcommit);
+
+#endif /* _LINUX_RESERVE_H */
Index: linux-2.6/mm/Makefile
===================================================================
--- linux-2.6.orig/mm/Makefile
+++ linux-2.6/mm/Makefile
@@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o
 			   page_alloc.o page-writeback.o pdflush.o \
 			   readahead.o swap.o truncate.o vmscan.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o $(mmu-y)
+			   page_isolation.o reserve.o $(mmu-y)
 
 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
 obj-$(CONFIG_BOUNCE)	+= bounce.o
Index: linux-2.6/mm/reserve.c
===================================================================
--- /dev/null
+++ linux-2.6/mm/reserve.c
@@ -0,0 +1,429 @@
+/*
+ * Memory reserve management.
+ *
+ *  Copyright (C) 2007, Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Description:
+ *
+ * Manage a set of memory reserves.
+ *
+ * A memory reserve is a reserve for a specified number of object of specified
+ * size. Since memory is managed in pages, this reserve demand is then
+ * translated into a page unit.
+ *
+ * So each reserve has a specified object limit, an object usage count and a
+ * number of pages required to back these objects.
+ *
+ * Usage is charged against a reserve, if the charge fails, the resource must
+ * not be allocated/used.
+ *
+ * The reserves are managed in a tree, and the resource demands (pages and
+ * limit) are propagated up the tree. Obviously the object limit will be
+ * meaningless as soon as the unit starts mixing, but the required page reserve
+ * (being of one unit) is still valid at the root.
+ *
+ * It is the page demand of the root node that is used to set the global
+ * reserve (adjust_memalloc_reserve() which sets zone->pages_emerg).
+ *
+ * As long as a subtree has the same usage unit, an aggregate node can be used
+ * to charge against, instead of the leaf nodes. However, do be consistent with
+ * who is charged, resource usage is not propagated up the tree (for
+ * performance reasons).
+ */
+
+#include <linux/reserve.h>
+#include <linux/mutex.h>
+#include <linux/mmzone.h>
+#include <linux/log2.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+static DEFINE_MUTEX(mem_reserve_mutex);
+
+/**
+ * @mem_reserve_root - the global reserve root
+ *
+ * The global reserve is empty, and has no limit unit, it merely
+ * acts as an aggregation point for reserves and an interface to
+ * adjust_memalloc_reserve().
+ */
+struct mem_reserve mem_reserve_root = {
+	.children = LIST_HEAD_INIT(mem_reserve_root.children),
+	.siblings = LIST_HEAD_INIT(mem_reserve_root.siblings),
+	.name = "total reserve",
+	.lock = __SPIN_LOCK_UNLOCKED(mem_reserve_root.lock),
+};
+EXPORT_SYMBOL_GPL(mem_reserve_root);
+
+/**
+ * mem_reserve_init - initialize a memory reserve object
+ * @res - the new reserve object
+ * @name - a name for this reserve
+ */
+void mem_reserve_init(struct mem_reserve *res, const char *name,
+		      struct mem_reserve *parent)
+{
+	memset(res, 0, sizeof(*res));
+	INIT_LIST_HEAD(&res->children);
+	INIT_LIST_HEAD(&res->siblings);
+	res->name = name;
+	spin_lock_init(&res->lock);
+
+	if (parent)
+		mem_reserve_connect(res, parent);
+}
+EXPORT_SYMBOL_GPL(mem_reserve_init);
+
+/*
+ * propagate the pages and limit changes up the tree.
+ */
+static void __calc_reserve(struct mem_reserve *res, long pages, long limit)
+{
+	unsigned long flags;
+
+	for ( ; res; res = res->parent) {
+		res->pages += pages;
+
+		if (limit) {
+			spin_lock_irqsave(&res->lock, flags);
+			res->limit += limit;
+			spin_unlock_irqrestore(&res->lock, flags);
+		}
+	}
+}
+
+/**
+ * __mem_reserve_add - primitive to change the size of a reserve
+ * @res - reserve to change
+ * @pages - page delta
+ * @limit - usage limit delta
+ *
+ * Returns -ENOMEM when a size increase is not possible atm.
+ */
+static int __mem_reserve_add(struct mem_reserve *res, long pages, long limit)
+{
+	int ret = 0;
+	long reserve;
+
+	reserve = mem_reserve_root.pages;
+	__calc_reserve(res, pages, 0);
+	reserve = mem_reserve_root.pages - reserve;
+
+	if (reserve) {
+		ret = adjust_memalloc_reserve(reserve);
+		if (ret)
+			__calc_reserve(res, -pages, 0);
+	}
+
+	if (!ret)
+		__calc_reserve(res, 0, limit);
+
+	return ret;
+}
+
+/**
+ * __mem_reserve_charge - primitive to charge object usage to a reserve
+ * @res - reserve to charge
+ * @charge - size of the charge
+ * @overcommit - allow despite of limit (use with caution!)
+ *
+ * Returns non-zero on success, zero on failure.
+ */
+static
+int __mem_reserve_charge(struct mem_reserve *res, long charge, int overcommit)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&res->lock, flags);
+	if (charge < 0 || res->usage + charge < res->limit || overcommit) {
+		res->usage += charge;
+		if (unlikely(res->usage < 0))
+			res->usage = 0;
+		ret = 1;
+	}
+	spin_unlock_irqrestore(&res->lock, flags);
+
+	return ret;
+}
+
+/**
+ * mem_reserve_connect - connect a reserve to another in a child-parent relation
+ * @new_child - the reserve node to connect (child)
+ * @node - the reserve node to connect to (parent)
+ *
+ * Returns -ENOMEM when the new connection would increase the reserve (parent
+ * is connected to mem_reserve_root) and there is no memory to do so.
+ *
+ * The child is _NOT_ connected on error.
+ */
+int mem_reserve_connect(struct mem_reserve *new_child, struct mem_reserve *node)
+{
+	int ret;
+
+	WARN_ON(!new_child->name);
+
+	mutex_lock(&mem_reserve_mutex);
+	new_child->parent = node;
+	list_add(&new_child->siblings, &node->children);
+	ret = __mem_reserve_add(node, new_child->pages, new_child->limit);
+	if (ret) {
+		new_child->parent = NULL;
+		list_del_init(&new_child->siblings);
+	}
+	mutex_unlock(&mem_reserve_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mem_reserve_connect);
+
+/**
+ * mem_reserve_disconnect - sever a nodes connection to the reserve tree
+ * @node - the node to disconnect
+ *
+ * Could, in theory, return -ENOMEM, but since disconnecting a node _should_
+ * only decrease the reserves that _should_ not happen.
+ */
+int mem_reserve_disconnect(struct mem_reserve *node)
+{
+	int ret;
+
+	BUG_ON(!node->parent);
+
+	mutex_lock(&mem_reserve_mutex);
+	ret = __mem_reserve_add(node->parent, -node->pages, -node->limit);
+	if (!ret) {
+		node->parent = NULL;
+		list_del_init(&node->siblings);
+	}
+	mutex_unlock(&mem_reserve_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mem_reserve_disconnect);
+
+#ifdef CONFIG_PROC_FS
+
+/*
+ * Simple output of the reserve tree in: /proc/reserve_info
+ * Example:
+ *
+ * localhost ~ # cat /proc/reserve_info
+ * total reserve                  8156K (0/544817)
+ *   total network reserve          8156K (0/544817)
+ *     network TX reserve             196K (0/49)
+ *       protocol TX pages              196K (0/49)
+ *     network RX reserve             7960K (0/544768)
+ *       IPv6 route cache               1372K (0/4096)
+ *       IPv4 route cache               5468K (0/16384)
+ *       SKB data reserve               1120K (0/524288)
+ *         IPv6 fragment cache            560K (0/262144)
+ *         IPv4 fragment cache            560K (0/262144)
+ */
+
+static void mem_reserve_show_item(struct seq_file *m, struct mem_reserve *res,
+				  int nesting)
+{
+	int i;
+	struct mem_reserve *child;
+
+	for (i = 0; i < nesting; i++)
+		seq_puts(m, "  ");
+
+	seq_printf(m, "%-30s %ldK (%ld/%ld)\n",
+		   res->name, res->pages << (PAGE_SHIFT - 10),
+		   res->usage, res->limit);
+
+	list_for_each_entry(child, &res->children, siblings)
+		mem_reserve_show_item(m, child, nesting+1);
+}
+
+static int mem_reserve_show(struct seq_file *m, void *v)
+{
+	mutex_lock(&mem_reserve_mutex);
+	mem_reserve_show_item(m, &mem_reserve_root, 0);
+	mutex_unlock(&mem_reserve_mutex);
+
+	return 0;
+}
+
+static int mem_reserve_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mem_reserve_show, NULL);
+}
+
+static const struct file_operations mem_reserve_opterations = {
+	.open = mem_reserve_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static __init int mem_reserve_proc_init(void)
+{
+	struct proc_dir_entry *entry;
+
+	entry = create_proc_entry("reserve_info", S_IRUSR, NULL);
+	if (entry)
+		entry->proc_fops = &mem_reserve_opterations;
+
+	return 0;
+}
+
+__initcall(mem_reserve_proc_init);
+
+#endif
+
+/*
+ * alloc_page helpers
+ */
+
+/**
+ * mem_reserve_pages_set - set reserves size in pages
+ * @res - reserve to set
+ * @pages - size in pages to set it to
+ *
+ * Returns -ENOMEM when it fails to set the reserve. On failure the old size
+ * is preserved.
+ */
+int mem_reserve_pages_set(struct mem_reserve *res, long pages)
+{
+	int ret;
+
+	mutex_lock(&mem_reserve_mutex);
+	pages -= res->pages;
+	ret = __mem_reserve_add(res, pages, pages);
+	mutex_unlock(&mem_reserve_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mem_reserve_pages_set);
+
+/**
+ * mem_reserve_pages_add - change the size in a relative way
+ * @res - reserve to change
+ * @pages - number of pages to add (or subtract when negative)
+ *
+ * Similar to mem_reserve_pages_set, except that the argument is relative
+ * instead of absolute.
+ *
+ * Returns -ENOMEM when it fails to increase.
+ */
+int mem_reserve_pages_add(struct mem_reserve *res, long pages)
+{
+	int ret;
+
+	mutex_lock(&mem_reserve_mutex);
+	ret = __mem_reserve_add(res, pages, pages);
+	mutex_unlock(&mem_reserve_mutex);
+
+	return ret;
+}
+
+/**
+ * mem_reserve_pages_charge - charge page usage to a reserve
+ * @res - reserve to charge
+ * @pages - size to charge
+ * @overcommit - disregard the usage limit (use with caution!)
+ *
+ * Returns non-zero on success.
+ */
+int mem_reserve_pages_charge(struct mem_reserve *res,
+			     long pages, int overcommit)
+{
+	return __mem_reserve_charge(res, pages, overcommit);
+}
+EXPORT_SYMBOL_GPL(mem_reserve_pages_charge);
+
+/*
+ * kmalloc helpers
+ */
+
+/**
+ * mem_reserve_kmalloc_set - set this reserve to bytes worth of kmalloc
+ * @res - reserve to change
+ * @bytes - size in bytes to reserve
+ *
+ * Returns -ENOMEM on failure.
+ */
+int mem_reserve_kmalloc_set(struct mem_reserve *res, long bytes)
+{
+	int ret;
+	long pages;
+
+	mutex_lock(&mem_reserve_mutex);
+	pages = kestimate(GFP_ATOMIC, bytes);
+	pages -= res->pages;
+	bytes -= res->limit;
+	ret = __mem_reserve_add(res, pages, bytes);
+	mutex_unlock(&mem_reserve_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_set);
+
+/**
+ * mem_reserve_kmalloc_charge - charge bytes to a reserve
+ * @res - reserve to charge
+ * @bytes - bytes to charge
+ * @overcommit - disregard the usage limit (use with caution!)
+ *
+ * Returns non-zero on success.
+ */
+int mem_reserve_kmalloc_charge(struct mem_reserve *res, long bytes,
+			       int overcommit)
+{
+	if (bytes < 0)
+		bytes = -roundup_pow_of_two(-bytes);
+	else
+		bytes = roundup_pow_of_two(bytes);
+
+	return __mem_reserve_charge(res, bytes, overcommit);
+}
+EXPORT_SYMBOL_GPL(mem_reserve_kmalloc_charge);
+
+/*
+ * kmem_cache helpers
+ */
+
+/**
+ * mem_reserve_kmem_cache_set - set reserve to @objects worth of kmem_cache_alloc of @s
+ * @res - reserve to set
+ * @s - kmem_cache to reserve from
+ * @objects - number of objects to reserve
+ *
+ * Returns -ENOMEM on failure.
+ */
+int mem_reserve_kmem_cache_set(struct mem_reserve *res, struct kmem_cache *s,
+			       int objects)
+{
+	int ret;
+	long pages;
+
+	mutex_lock(&mem_reserve_mutex);
+	pages = kmem_estimate_pages(s, GFP_ATOMIC, objects);
+	pages -= res->pages;
+	objects -= res->limit;
+	ret = __mem_reserve_add(res, pages, objects);
+	mutex_unlock(&mem_reserve_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_set);
+
+/**
+ * mem_reserve_kmem_cache_charge - charge (or uncharge) usage of objs
+ * @res - reserve to charge
+ * @objs - objects to charge for
+ * @overcommit - disregard the usage limit (use with caution!)
+ *
+ * Returns non-zero on success.
+ */
+int mem_reserve_kmem_cache_charge(struct mem_reserve *res, long objs,
+				  int overcommit)
+{
+	return __mem_reserve_charge(res, objs, overcommit);
+}
+EXPORT_SYMBOL_GPL(mem_reserve_kmem_cache_charge);

--

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2008-02-20 15:21 UTC|newest]

Thread overview: 146+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-02-20 14:46 [PATCH 00/28] Swap over NFS -v16 Peter Zijlstra
2008-02-20 14:46 ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 01/28] mm: gfp_to_alloc_flags() Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 02/28] mm: tag reseve pages Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 03/28] mm: slb: add knowledge of reserve pages Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 04/28] mm: kmem_estimate_pages() Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:05   ` Andrew Morton
2008-02-23  8:05     ` Andrew Morton
2008-02-20 14:46 ` [PATCH 05/28] mm: allow PF_MEMALLOC from softirq context Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:05   ` Andrew Morton
2008-02-23  8:05     ` Andrew Morton
2008-02-20 14:46 ` [PATCH 06/28] mm: serialize access to min_free_kbytes Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 07/28] mm: emergency pool Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:05   ` Andrew Morton
2008-02-23  8:05     ` Andrew Morton
2008-02-20 14:46 ` [PATCH 08/28] mm: system wide ALLOC_NO_WATERMARK Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:05   ` Andrew Morton
2008-02-23  8:05     ` Andrew Morton
2008-02-20 14:46 ` [PATCH 09/28] mm: __GFP_MEMALLOC Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:06   ` Andrew Morton
2008-02-23  8:06     ` Andrew Morton
2008-02-20 14:46 ` Peter Zijlstra [this message]
2008-02-20 14:46   ` [PATCH 10/28] mm: memory reserve management Peter Zijlstra
2008-02-23  8:06   ` Andrew Morton
2008-02-23  8:06     ` Andrew Morton
2008-02-20 14:46 ` [PATCH 11/28] selinux: tag avc cache alloc as non-critical Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 12/28] net: wrap sk->sk_backlog_rcv() Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 13/28] net: packet split receive api Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 14/28] net: sk_allocation() - concentrate socket related allocations Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 15/28] netvm: network reserve infrastructure Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:06   ` Andrew Morton
2008-02-23  8:06     ` Andrew Morton
2008-02-24  6:52   ` Mike Snitzer
2008-02-24  6:52     ` Mike Snitzer
2008-02-20 14:46 ` [PATCH 16/28] netvm: INET reserves Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 17/28] netvm: hook skb allocation to reserves Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:06   ` Andrew Morton
2008-02-23  8:06     ` Andrew Morton
2008-02-20 14:46 ` [PATCH 18/28] netvm: filter emergency skbs Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 19/28] netvm: prevent a stream specific deadlock Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 20/28] netfilter: NF_QUEUE vs emergency skbs Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 21/28] netvm: skb processing Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 22/28] mm: add support for non block device backed swap files Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 16:30   ` Randy Dunlap
2008-02-20 16:30     ` Randy Dunlap
2008-02-20 16:46     ` Peter Zijlstra
2008-02-20 16:46       ` Peter Zijlstra
2008-02-26 12:45   ` Miklos Szeredi
2008-02-26 12:45     ` Miklos Szeredi
2008-02-26 12:58     ` Peter Zijlstra
2008-02-26 12:58       ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 23/28] mm: methods for teaching filesystems about PG_swapcache pages Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 24/28] nfs: remove mempools Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 25/28] nfs: teach the NFS client how to treat PG_swapcache pages Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 26/28] nfs: disable data cache revalidation for swapfiles Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 27/28] nfs: enable swap on NFS Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-20 14:46 ` [PATCH 28/28] nfs: fix various memory recursions possible with swap over NFS Peter Zijlstra
2008-02-20 14:46   ` Peter Zijlstra
2008-02-23  8:06 ` [PATCH 00/28] Swap over NFS -v16 Andrew Morton
2008-02-23  8:06   ` Andrew Morton
2008-02-26  6:03   ` Neil Brown
2008-02-26  6:03     ` Neil Brown
2008-02-26 10:50     ` Peter Zijlstra
2008-02-26 10:50       ` Peter Zijlstra
2008-02-26 12:00       ` Peter Zijlstra
2008-02-26 12:00         ` Peter Zijlstra
2008-02-26 15:29       ` Miklos Szeredi
2008-02-26 15:29         ` Miklos Szeredi
2008-02-26 15:41         ` Peter Zijlstra
2008-02-26 15:41           ` Peter Zijlstra
2008-02-26 15:43         ` Peter Zijlstra
2008-02-26 15:43           ` Peter Zijlstra
2008-02-26 15:47           ` Miklos Szeredi
2008-02-26 15:47             ` Miklos Szeredi
2008-02-26 17:56       ` Andrew Morton
2008-02-26 17:56         ` Andrew Morton
2008-02-27  5:51       ` Neil Brown
2008-02-27  5:51         ` Neil Brown
2008-02-27  7:58         ` Peter Zijlstra
2008-02-27  7:58           ` Peter Zijlstra
2008-02-27  8:05           ` Pekka Enberg
2008-02-27  8:05             ` Pekka Enberg
2008-02-27  8:14             ` Peter Zijlstra
2008-02-27  8:14               ` Peter Zijlstra
2008-02-27  8:33               ` Peter Zijlstra
2008-02-27  8:33                 ` Peter Zijlstra
2008-02-27  8:43                 ` Pekka J Enberg
2008-02-27  8:43                   ` Pekka J Enberg
2008-02-29 11:51             ` Peter Zijlstra
2008-02-29 11:51               ` Peter Zijlstra
2008-02-29 11:58               ` Pekka Enberg
2008-02-29 11:58                 ` Pekka Enberg
2008-02-29 12:18                 ` Peter Zijlstra
2008-02-29 12:18                   ` Peter Zijlstra
2008-02-29 12:29                   ` Pekka Enberg
2008-02-29 12:29                     ` Pekka Enberg
2008-02-29  1:29           ` Neil Brown
2008-02-29  1:29             ` Neil Brown
2008-02-29 10:21             ` Peter Zijlstra
2008-02-29 10:21               ` Peter Zijlstra
2008-03-02 22:18               ` Neil Brown
2008-03-02 22:18                 ` Neil Brown
2008-03-02 23:33                 ` Peter Zijlstra
2008-03-02 23:33                   ` Peter Zijlstra
2008-03-03 23:41                   ` Neil Brown
2008-03-03 23:41                     ` Neil Brown
2008-03-04 10:28                     ` Peter Zijlstra
2008-03-04 10:28                       ` Peter Zijlstra
     [not found]           ` <1837 <1204626509.6241.39.camel@lappy>
2008-03-07  3:33             ` Neil Brown
2008-03-07  3:33               ` Neil Brown
2008-03-07 11:17               ` Peter Zijlstra
2008-03-07 11:17                 ` Peter Zijlstra
2008-03-07 11:55                 ` Peter Zijlstra
2008-03-07 11:55                   ` Peter Zijlstra
2008-03-10  5:15                 ` Neil Brown
2008-03-10  5:15                   ` Neil Brown
2008-03-10  9:17                   ` Peter Zijlstra
2008-03-10  9:17                     ` Peter Zijlstra
2008-03-14  5:22                     ` Neil Brown
2008-03-14  5:22                       ` Neil Brown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080220150306.548965000@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=trond.myklebust@fys.uio.no \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.