All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kirill Korotaev <dev@sw.ru>
To: Andrew Morton <akpm@osdl.org>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Alan Cox <alan@lxorguk.ukuu.org.uk>,
	Christoph Hellwig <hch@infradead.org>,
	Pavel Emelianov <xemul@openvz.org>, Andrey Savochkin <saw@sw.ru>,
	devel@openvz.org, Rik van Riel <riel@redhat.com>,
	Andi Kleen <ak@suse.de>, Oleg Nesterov <oleg@tv-sign.ru>,
	Alexey Dobriyan <adobriyan@mail.ru>,
	Matt Helsley <matthltc@us.ibm.com>,
	CKRM-Tech <ckrm-tech@lists.sourceforge.net>,
	Hugh Dickins <hugh@veritas.com>
Subject: [PATCH 12/13] BC: vmrss (core)
Date: Tue, 05 Sep 2006 19:32:20 +0400	[thread overview]
Message-ID: <44FD9884.9090801@sw.ru> (raw)
In-Reply-To: <44FD918A.7050501@sw.ru>

This is the core of vmrss accounting.

The main introduced object is page_beancounter.
It ties together page and BCs which use the page.
This allows correctly account fractions of memory shared
between BCs (http://wiki.openvz.org/RSS_fractions_accounting)

Accounting API:
1. bc_alloc_rss_counter() allocates a tie between page and BC
2. bc_free_rss_counter frees it.

(1) and (2) must be done each time a page is about
to be added to someone's rss.

3. When page is touched by BC (i.e. by any task which mm belongs to BC)
   page is bc_vmrss_page_add()-ed to that BC. Touching page leads
   to subtracting it from unused_prvvmpages and adding to held_pages.
4. When page is unmapped from BC it is bc_vmrss_page_del()-ed from it.

5. When task forks all it's mapped pages must be bc_vmrss_page_dup()-ed.
   i.e. page beancounter reference counter must be increased.
   
6. Some pages (former PGReserved) must be added to rss, but without
   having a reference on it. These pages are bc_vmrss_page_add_noref()-ed.

Signed-Off-By: Pavel Emelianov <xemul@sw.ru>
Signed-Off-By: Kirill Korotaev <dev@sw.ru>

---

 include/bc/beancounter.h |    3 
 include/bc/vmpages.h     |    4 
 include/bc/vmrss.h       |   72 ++++++
 include/linux/mm.h       |    6 
 include/linux/shmem_fs.h |    2 
 init/main.c              |    2 
 kernel/bc/Kconfig        |    9 
 kernel/bc/Makefile       |    1 
 kernel/bc/beancounter.c  |    9 
 kernel/bc/vmpages.c      |    7 
 kernel/bc/vmrss.c        |  508 +++++++++++++++++++++++++++++++++++++++++++++++
 mm/shmem.c               |    6 
 12 files changed, 627 insertions(+), 2 deletions(-)

--- ./include/bc/beancounter.h.bcrsscore	2006-09-05 13:44:33.000000000 +0400
+++ ./include/bc/beancounter.h	2006-09-05 13:50:29.000000000 +0400
@@ -68,6 +68,9 @@ struct beancounter {
 	struct hlist_node	hash;
 
 	unsigned long		unused_privvmpages;
+#ifdef CONFIG_BEANCOUNTERS_RSS
+	unsigned long long	rss_pages;
+#endif
 	/* resources statistics and settings */
 	struct bc_resource_parm	bc_parms[BC_RESOURCES];
 };
--- ./include/bc/vmpages.h.bcrsscore	2006-09-05 13:40:21.000000000 +0400
+++ ./include/bc/vmpages.h	2006-09-05 13:46:35.000000000 +0400
@@ -77,6 +77,8 @@ void bc_locked_shm_uncharge(struct shmem
 		put_beancounter((info)->shm_bc);			\
 	} while (0)
 
+#define mm_same_bc(mm1, mm2)	((mm1)->mm_bc == (mm2)->mm_bc)
+
 void bc_update_privvmpages(struct beancounter *bc);
 
 #else /* CONFIG_BEANCOUNTERS */
@@ -136,6 +138,8 @@ static inline void bc_locked_shm_uncharg
 #define shmi_init_bc(info)	do { } while (0)
 #define shmi_free_bc(info)	do { } while (0)
 
+#define mm_same_bc(mm1, mm2)	(1)
+
 #endif /* CONFIG_BEANCOUNTERS */
 #endif
 
--- /dev/null	2006-07-18 14:52:43.075228448 +0400
+++ ./include/bc/vmrss.h	2006-09-05 13:50:25.000000000 +0400
@@ -0,0 +1,72 @@
+/*
+ *  include/ub/vmrss.h
+ *
+ *  Copyright (C) 2006 OpenVZ. SWsoft Inc
+ *
+ */
+
+#ifndef __BC_VMRSS_H_
+#define __BC_VMRSS_H_
+
+struct page_beancounter;
+
+struct page;
+struct mm_struct;
+struct vm_area_struct;
+
+/* values that represens page's 'weight' in bc rss accounting */
+#define PB_PAGE_WEIGHT_SHIFT 24
+#define PB_PAGE_WEIGHT (1 << PB_PAGE_WEIGHT_SHIFT)
+/* page obtains one more reference within beancounter */
+#define PB_COPY_SAME	((struct page_beancounter *)-1)
+
+#ifdef CONFIG_BEANCOUNTERS_RSS
+
+struct page_beancounter * __must_check bc_alloc_rss_counter(void);
+struct page_beancounter * __must_check bc_alloc_rss_counter_list(long num,
+		struct page_beancounter *list);
+
+void bc_free_rss_counter(struct page_beancounter *rc);
+
+void bc_vmrss_page_add(struct page *pg, struct mm_struct *mm,
+		struct vm_area_struct *vma, struct page_beancounter **ppb);
+void bc_vmrss_page_del(struct page *pg, struct mm_struct *mm,
+		struct vm_area_struct *vma);
+void bc_vmrss_page_dup(struct page *pg, struct mm_struct *mm,
+		struct vm_area_struct *vma, struct page_beancounter **ppb);
+void bc_vmrss_page_add_noref(struct page *pg, struct mm_struct *mm,
+		struct vm_area_struct *vma);
+
+unsigned long mm_rss_pages(struct mm_struct *mm, unsigned long start,
+		unsigned long end);
+
+void bc_init_rss(void);
+
+#else /* CONFIG_BEANCOUNTERS_RSS */
+
+static inline struct page_beancounter * __must_check bc_alloc_rss_counter(void)
+{
+	return NULL;
+}
+
+static inline struct page_beancounter * __must_check bc_alloc_rss_counter_list(
+		long num, struct page_beancounter *list)
+{
+	return NULL;
+}
+
+static inline void bc_free_rss_counter(struct page_beancounter *rc)
+{
+}
+
+#define bc_vmrss_page_add(pg, mm, vma, pb)	do { } while (0)
+#define bc_vmrss_page_del(pg, mm, vma)		do { } while (0)
+#define bc_vmrss_page_dup(pg, mm, vma, pb)	do { } while (0)
+#define bc_vmrss_page_add_noref(pg, mm, vma)	do { } while (0)
+#define mm_rss_pages(mm, start, end)	(0)
+
+#define bc_init_rss()			do { } while (0)
+
+#endif /* CONFIG_BEANCOUNTERS_RSS */
+
+#endif
--- ./include/linux/mm.h.bcrsscore	2006-09-05 13:06:37.000000000 +0400
+++ ./include/linux/mm.h	2006-09-05 13:47:12.000000000 +0400
@@ -275,11 +275,15 @@ struct page {
 	unsigned long trace[8];
 #endif
 #ifdef CONFIG_BEANCOUNTERS
-	struct beancounter	*page_bc;
+	union {
+		struct beancounter	*page_bc;
+		struct page_beancounter	*page_pb;
+	};
 #endif
 };
 
 #define page_bc(page)			((page)->page_bc)
+#define page_pb(page)			((page)->page_pb)
 #define page_private(page)		((page)->private)
 #define set_page_private(page, v)	((page)->private = (v))
 
--- ./include/linux/shmem_fs.h.bcrsscore	2006-09-05 12:59:27.000000000 +0400
+++ ./include/linux/shmem_fs.h	2006-09-05 13:50:19.000000000 +0400
@@ -41,4 +41,6 @@ static inline struct shmem_inode_info *S
 	return container_of(inode, struct shmem_inode_info, vfs_inode);
 }
 
+int is_shmem_mapping(struct address_space *mapping);
+
 #endif
--- ./init/main.c.bcrsscore	2006-09-05 12:54:17.000000000 +0400
+++ ./init/main.c	2006-09-05 13:46:35.000000000 +0400
@@ -51,6 +51,7 @@
 #include <linux/lockdep.h>
 
 #include <bc/beancounter.h>
+#include <bc/vmrss.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -608,6 +609,7 @@ asmlinkage void __init start_kernel(void
 	check_bugs();
 
 	acpi_early_init(); /* before LAPIC and SMP init */
+	bc_init_rss();
 
 	/* Do the rest non-__init'ed, we're now alive */
 	rest_init();
--- ./kernel/bc/Kconfig.bcrsscore	2006-09-05 12:54:14.000000000 +0400
+++ ./kernel/bc/Kconfig	2006-09-05 13:50:35.000000000 +0400
@@ -22,4 +22,13 @@ config BEANCOUNTERS
           per-process basis.  Per-process accounting doesn't prevent malicious
           users from spawning a lot of resource-consuming processes.
 
+config BEANCOUNTERS_RSS
+	bool "Account physical memory usage"
+	default y
+	depends on BEANCOUNTERS
+	help
+	  This allows to estimate per beancounter physical memory usage.
+	  Implemented alghorithm accounts shared pages of memory as well,
+	  dividing them by number of beancounter which use the page.
+
 endmenu
--- ./kernel/bc/Makefile.bcrsscore	2006-09-05 12:59:37.000000000 +0400
+++ ./kernel/bc/Makefile	2006-09-05 13:50:48.000000000 +0400
@@ -9,3 +9,4 @@ obj-y += misc.o
 obj-y += sys.o
 obj-y += kmem.o
 obj-y += vmpages.o
+obj-$(CONFIG_BEANCOUNTERS_RSS) += vmrss.o
--- ./kernel/bc/beancounter.c.bcrsscore	2006-09-05 13:44:53.000000000 +0400
+++ ./kernel/bc/beancounter.c	2006-09-05 13:49:38.000000000 +0400
@@ -11,6 +11,7 @@
 #include <linux/hash.h>
 
 #include <bc/beancounter.h>
+#include <bc/vmrss.h>
 
 static kmem_cache_t *bc_cachep;
 static struct beancounter default_beancounter;
@@ -112,6 +113,14 @@ void put_beancounter(struct beancounter 
 			printk("BC: %d has %lu of %s held on put", bc->bc_id,
 				bc->bc_parms[i].held, bc_rnames[i]);
 
+	if (bc->unused_privvmpages != 0)
+		printk("BC: %d has %lu of unused pages held on put", bc->bc_id,
+			bc->unused_privvmpages);
+#ifdef CONFIG_BEANCOUNTERS_RSS
+	if (bc->rss_pages != 0)
+		printk("BC: %d hash %llu of rss pages held on put", bc->bc_id,
+			bc->rss_pages);
+#endif
 	hlist_del(&bc->hash);
 	nr_beancounters--;
 	spin_unlock_irqrestore(&bc_hash_lock, flags);
--- ./kernel/bc/vmpages.c.bcrsscore	2006-09-05 13:45:34.000000000 +0400
+++ ./kernel/bc/vmpages.c	2006-09-05 13:48:50.000000000 +0400
@@ -11,12 +11,17 @@
 
 #include <bc/beancounter.h>
 #include <bc/vmpages.h>
+#include <bc/vmrss.h>
 
 #include <asm/page.h>
 
 void bc_update_privvmpages(struct beancounter *bc)
 {
-	bc->bc_parms[BC_PRIVVMPAGES].held = bc->unused_privvmpages;
+	bc->bc_parms[BC_PRIVVMPAGES].held = bc->unused_privvmpages
+#ifdef CONFIG_BEANCOUNTERS_RSS
+		+ (bc->rss_pages >> PB_PAGE_WEIGHT_SHIFT)
+#endif
+		;
 	bc_adjust_minheld(bc, BC_PRIVVMPAGES);
 	bc_adjust_maxheld(bc, BC_PRIVVMPAGES);
 }
--- /dev/null	2006-07-18 14:52:43.075228448 +0400
+++ ./kernel/bc/vmrss.c	2006-09-05 13:51:21.000000000 +0400
@@ -0,0 +1,508 @@
+/*
+ *  kernel/bc/vmrss.c
+ *
+ *  Copyright (C) 2006 OpenVZ. SWsoft Inc
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/shmem_fs.h>
+#include <linux/highmem.h>
+
+#include <bc/beancounter.h>
+#include <bc/vmpages.h>
+#include <bc/vmrss.h>
+
+#include <asm/pgtable.h>
+
+/*
+ * Core object of accounting.
+ * page_beancounter (or rss_counter) ties together page an bc.
+ * Page has associated circular list of such pbs. When page is
+ * shared between bcs then it's size is splitted between all of
+ * them in 2^n-s parts.
+ *
+ * E.g. three bcs will share page like 1/2:1/4:1/4
+ * adding one more reference would produce such a change:
+ * 1/2(bc1) : 1/4(bc2) : 1/4(bc3) ->
+ * (1/4(bc1) + 1/4(bc1)) : 1/4(bc2) : 1/4(bc3) ->
+ * 1/4(bc2) : 1/4(bc3) : 1/4(bc4) : 1/4(bc1)
+ */
+
+#define PB_MAGIC	0x62700001UL
+
+struct page_beancounter {
+	unsigned long magic;
+	struct page *page;
+	struct beancounter *bc;
+	struct page_beancounter *next_hash;
+	unsigned refcount;
+	struct list_head page_list;
+};
+
+#define PB_REFC_BITS 24
+
+#define pb_shift(p)	((p)->refcount >> PB_REFC_BITS)
+#define pb_shift_inc(p)	do { ((p)->refcount += (1 << PB_REFC_BITS)); } while (0)
+#define pb_shift_dec(p)	do { ((p)->refcount -= (1 << PB_REFC_BITS)); } while (0)
+
+#define pb_count(p)	((p)->refcount & ((1 << PB_REFC_BITS) - 1))
+#define pb_get(p)	do { ((p)->refcount++); } while (0)
+#define pb_put(p)	do { ((p)->refcount--); } while (0)
+
+#define pb_refcount_init(p, shift) do {					\
+		(p)->refcount = ((shift) << PB_REFC_BITS) + (1);	\
+	} while (0)
+
+static spinlock_t pb_lock = SPIN_LOCK_UNLOCKED;
+static struct page_beancounter **pb_hash_table;
+static unsigned int pb_hash_mask;
+
+static inline int pb_hash(struct beancounter *bc, struct page *page)
+{
+	return (page_to_pfn(page) + (bc->bc_id << 10)) & pb_hash_mask;
+}
+
+static kmem_cache_t *pb_cachep;
+#define alloc_pb()	kmem_cache_alloc(pb_cachep, GFP_KERNEL)
+#define free_pb(p)	kmem_cache_free(pb_cachep, p)
+
+#define next_page_pb(p) list_entry(p->page_list.next,	\
+		struct page_beancounter, page_list);
+#define prev_page_pb(p) list_entry(p->page_list.prev,	\
+		struct page_beancounter, page_list);
+
+/*
+ * Allocates a new page_beancounter struct and
+ * initialises requred fields.
+ * pb->next_hash is set to NULL as this field is used
+ * in two ways:
+ * 1. When pb is in hash - it points to the next one in
+ *    the current hash chain;
+ * 2. When pb is not in hash yet - it points to the next pb
+ *    in list just allocated.
+ */
+struct page_beancounter *bc_alloc_rss_counter(void)
+{
+	struct page_beancounter *pb;
+
+	pb = alloc_pb();
+	if (pb == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	pb->magic = PB_MAGIC;
+	pb->next_hash = NULL;
+	return pb;
+}
+
+/*
+ * This function ensures that @list has at least @num elements.
+ * Otherwise needed elements are allocated and new list is
+ * returned. On error old list is freed.
+ *
+ * num == BC_ALLOC_ALL means that lis must contain as many
+ * elements as there are BCCs in hash now.
+ */
+struct page_beancounter *bc_alloc_rss_counter_list(long num,
+		struct page_beancounter *list)
+{
+	struct page_beancounter *pb;
+
+	for (pb = list; pb != NULL && num != 0; pb = pb->next_hash, num--);
+
+	/* need to allocate num more elements */
+	while (num > 0) {
+		pb = alloc_pb();
+		if (pb == NULL)
+			goto err;
+
+		pb->magic = PB_MAGIC;
+		pb->next_hash = list;
+		list = pb;
+		num--;
+	}
+
+	return list;
+
+err:
+	bc_free_rss_counter(list);
+	return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * Free the list of page_beancounter-s
+ */
+void bc_free_rss_counter(struct page_beancounter *pb)
+{
+	struct page_beancounter *tmp;
+
+	while (pb) {
+		tmp = pb->next_hash;
+		free_pb(pb);
+		pb = tmp;
+	}
+}
+
+/*
+ * Helpers to update rss_pages and unused_privvmpages on BC
+ */
+static void mod_rss_pages(struct beancounter *bc, int val,
+		struct vm_area_struct *vma, int unused)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&bc->bc_lock, flags);
+	if (vma && BC_VM_PRIVATE(vma->vm_flags, vma->vm_file)) {
+		if (unused < 0 && unlikely(bc->unused_privvmpages < -unused)) {
+			printk("BC: overuncharging %d unused pages: "
+					"val %i, held %lu\n",
+					bc->bc_id, unused,
+					bc->unused_privvmpages);
+			unused = -bc->unused_privvmpages;
+		}
+		bc->unused_privvmpages += unused;
+	}
+	bc->rss_pages += val;
+	bc_update_privvmpages(bc);
+	spin_unlock_irqrestore(&bc->bc_lock, flags);
+}
+
+#define __inc_rss_pages(bc, val)	mod_rss_pages(bc, val, NULL, 0)
+#define __dec_rss_pages(bc, val)	mod_rss_pages(bc, -(val), NULL, 0)
+#define inc_rss_pages(bc, val, vma)	mod_rss_pages(bc, val, vma, -1)
+#define dec_rss_pages(bc, val, vma)	mod_rss_pages(bc, -(val), vma, 1)
+
+/*
+ * Routines to manipulate page-to-bc references (page_beancounter)
+ * Reference may be added, removed or duplicated (see descriptions below)
+ */
+
+static int __pb_dup_ref(struct page *pg, struct beancounter *bc, int hash)
+{
+	struct page_beancounter *p;
+
+	for (p = pb_hash_table[hash];
+			p != NULL && (p->page != pg || p->bc != bc);
+			p = p->next_hash);
+	if (p == NULL)
+		return -1;
+
+	pb_get(p);
+	return 0;
+}
+
+static int __pb_add_ref(struct page *pg, struct beancounter *bc,
+		int hash, struct page_beancounter **ppb)
+{
+	struct page_beancounter *head, *p;
+	int shift, ret;
+
+	p = *ppb;
+	*ppb = p->next_hash;
+
+	p->page = pg;
+	p->bc = get_beancounter(bc);
+	p->next_hash = pb_hash_table[hash];
+	pb_hash_table[hash] = p;
+
+	head = page_pb(pg);
+	if (head != NULL) {
+		BUG_ON(head->magic != PB_MAGIC);
+		/* 
+		 * Move the first element to the end of the list.
+		 * List head (pb_head) is set to the next entry.
+		 * Note that this code works even if head is the only element
+		 * on the list (because it's cyclic).
+		 */
+		page_pb(pg) = next_page_pb(head);
+		pb_shift_inc(head);
+		shift = pb_shift(head);
+		/*
+		 * Update user beancounter, the share of head has been changed.
+		 * Note that the shift counter is taken after increment.
+		 */
+		__dec_rss_pages(head->bc, PB_PAGE_WEIGHT >> shift);
+		/*
+		 * Add the new page beancounter to the end of the list.
+		 */
+		list_add_tail(&p->page_list, &page_pb(pg)->page_list);
+	} else {
+		page_pb(pg) = p;
+		shift = 0;
+		INIT_LIST_HEAD(&p->page_list);
+	}
+
+	pb_refcount_init(p, shift);
+	ret = PB_PAGE_WEIGHT >> shift;
+	return ret;
+}
+
+static int __pb_remove_ref(struct page *page, struct beancounter *bc)
+{
+	int hash, ret;
+	struct page_beancounter *p, **q;
+	int shift, shiftt;
+
+	ret = 0;
+
+	hash = pb_hash(bc, page);
+
+	BUG_ON(page_pb(page) != NULL && page_pb(page)->magic != PB_MAGIC);
+	for (q = pb_hash_table + hash, p = *q;
+			p != NULL && (p->page != page || p->bc != bc);
+			q = &p->next_hash, p = *q);
+	if (p == NULL)
+		goto out;
+
+	pb_put(p);
+	if (pb_count(p) > 0)
+		goto out;
+
+	/* remove from the hash list */
+	*q = p->next_hash;
+
+	shift = pb_shift(p);
+	ret = PB_PAGE_WEIGHT >> shift;
+
+	if (page_pb(page) == p) {
+		if (list_empty(&p->page_list)) {
+			page_pb(page) = NULL;
+			put_beancounter(bc);
+			free_pb(p);
+			goto out;
+		}
+		page_pb(page) = next_page_pb(p);
+	}
+
+	list_del(&p->page_list);
+	put_beancounter(bc);
+	free_pb(p);
+
+	/*
+	 * Now balance the list.
+	 * Move the tail and adjust its shift counter.
+	 */
+	p = prev_page_pb(page_pb(page));
+	shiftt = pb_shift(p);
+	pb_shift_dec(p);
+	page_pb(page) = p;
+	__inc_rss_pages(p->bc, PB_PAGE_WEIGHT >> shiftt);
+
+	/*
+	 * If the shift counter of the moved beancounter is different from the
+	 * removed one's, repeat the procedure for one more tail beancounter 
+	 */
+	if (shiftt > shift) {
+		p = prev_page_pb(page_pb(page));
+		pb_shift_dec(p);
+		page_pb(page) = p;
+		__inc_rss_pages(p->bc, PB_PAGE_WEIGHT >> shiftt);
+	}
+out:
+	return ret;
+}
+
+/*
+ * bc_vmrss_page_add: Called when page is added to resident set
+ *   of any mm. In this case page is substracted from unused_privvmpages
+ *   (if it is BC_VM_PRIVATE one) and a reference to BC must be set
+ *   with page_beancounter.
+ *
+ * bc_vmrss_page_del: The reverse operation - page is removed from
+ *   resident set and must become unused.
+ *
+ * bc_vmrss_page_dup: This is called on dup_mmap() when all pages
+ *   become shared between two mm structs. This case has one feature:
+ *   some pages (see below) may lack a reference to BC, so setting
+ *   new reference is not needed, but update of unused_privvmpages
+ *   is required.
+ *
+ * bc_vmrss_page_add_noref: This is called for (former) reserved pages
+ *   like ZERO_PAGE() or some pages set up with insert_page(). These
+ *   pages must not have reference to any BC, but must be accounted in
+ *   rss.
+ */
+
+void bc_vmrss_page_add(struct page *pg, struct mm_struct *mm,
+		struct vm_area_struct *vma, struct page_beancounter **ppb)
+{
+	struct beancounter *bc;
+	int hash, ret;
+
+	if (!PageAnon(pg) && is_shmem_mapping(pg->mapping))
+		return;
+
+	bc = mm->mm_bc;
+	hash = pb_hash(bc, pg);
+
+	ret = 0;
+	spin_lock(&pb_lock);
+	if (__pb_dup_ref(pg, bc, hash))
+		ret = __pb_add_ref(pg, bc, hash, ppb);
+	spin_unlock(&pb_lock);
+
+	inc_rss_pages(bc, ret, vma);
+}
+
+void bc_vmrss_page_del(struct page *pg, struct mm_struct *mm,
+		struct vm_area_struct *vma)
+{
+	struct beancounter *bc;
+	int ret;
+
+	if (!PageAnon(pg) && is_shmem_mapping(pg->mapping))
+		return;
+
+	bc = mm->mm_bc;
+
+	spin_lock(&pb_lock);
+	ret = __pb_remove_ref(pg, bc);
+	spin_unlock(&pb_lock);
+
+	dec_rss_pages(bc, ret, vma);
+}
+
+void bc_vmrss_page_dup(struct page *pg, struct mm_struct *mm,
+		struct vm_area_struct *vma, struct page_beancounter **ppb)
+{
+	struct beancounter *bc;
+	int hash, ret;
+
+	if (!PageAnon(pg) && is_shmem_mapping(pg->mapping))
+		return;
+
+	bc = mm->mm_bc;
+	hash = pb_hash(bc, pg);
+
+	ret = 0;
+	spin_lock(&pb_lock);
+	if (page_pb(pg) == NULL)
+		/*
+		 * pages like ZERO_PAGE must not be accounted in pbc
+		 * so on fork we just skip them
+		 */
+		goto out_unlock;
+
+	if (*ppb == PB_COPY_SAME) {
+		if (__pb_dup_ref(pg, bc, hash))
+			WARN_ON(1);
+	} else
+		ret = __pb_add_ref(pg, bc, hash, ppb);
+out_unlock:
+	spin_unlock(&pb_lock);
+
+	inc_rss_pages(bc, ret, vma);
+}
+
+void bc_vmrss_page_add_noref(struct page *pg, struct mm_struct *mm,
+		struct vm_area_struct *vma)
+{
+	inc_rss_pages(mm->mm_bc, 0, vma);
+}
+
+/*
+ * Calculate the number of currently resident pages for
+ * given mm_struct in a given range (addr - end).
+ * This is needed for mprotect_fixup() as by the time
+ * it is called some pages can be resident and thus
+ * not accounted in bc->unused_privvmpages. Such pages
+ * must num be uncharged (as they already are).
+ */
+
+static unsigned long pages_in_pte_range(struct mm_struct *mm, pmd_t *pmd,
+				unsigned long addr, unsigned long end,
+				unsigned long *pages)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	do {
+		pte_t ptent = *pte;
+		if (!pte_none(ptent) && pte_present(ptent))
+			(*pages)++;
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+	pte_unmap_unlock(pte - 1, ptl);
+	return addr;
+}
+
+static inline unsigned long pages_in_pmd_range(struct mm_struct *mm, pud_t *pud,
+				unsigned long addr, unsigned long end,
+				unsigned long *pages)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+
+		next = pages_in_pte_range(mm, pmd, addr, next, pages);
+	} while (pmd++, addr = next, addr != end);
+	return addr;
+}
+
+static inline unsigned long pages_in_pud_range(struct mm_struct *mm, pgd_t *pgd,
+				unsigned long addr, unsigned long end,
+				unsigned long *pages)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+
+		next = pages_in_pmd_range(mm, pud, addr, next, pages);
+	} while (pud++, addr = next, addr != end);
+	return addr;
+}
+
+unsigned long mm_rss_pages(struct mm_struct *mm,
+		unsigned long addr, unsigned long end)
+{
+	pgd_t *pgd;
+	unsigned long next;
+	unsigned long pages;
+
+	BUG_ON(addr >= end);
+
+	pages = 0;
+	pgd = pgd_offset(mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+
+		next = pages_in_pud_range(mm, pgd, addr, next, &pages);
+	} while (pgd++, addr = next, addr != end);
+	return pages;
+}
+
+void __init bc_init_rss(void)
+{
+	unsigned long hash_size;
+
+	pb_cachep = kmem_cache_create("page_beancounter",
+			sizeof(struct page_beancounter), 0,
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL, NULL);
+
+	hash_size = num_physpages >> 2;
+	for (pb_hash_mask = 1;
+			(hash_size & pb_hash_mask) != hash_size;
+			pb_hash_mask = (pb_hash_mask << 1) + 1);
+
+	hash_size = pb_hash_mask + 1;
+	printk(KERN_INFO "BC: Page beancounter hash is %lu entries.\n",
+			hash_size);
+	pb_hash_table = vmalloc(hash_size * sizeof(struct page_beancounter *));
+	memset(pb_hash_table, 0, hash_size * sizeof(struct page_beancounter *));
+}
--- ./mm/shmem.c.bcrsscore	2006-09-05 13:39:26.000000000 +0400
+++ ./mm/shmem.c	2006-09-05 13:46:35.000000000 +0400
@@ -2236,6 +2236,12 @@ static struct vm_operations_struct shmem
 #endif
 };
 
+#ifdef CONFIG_BEANCOUNTERS_RSS
+int is_shmem_mapping(struct address_space *mapping)
+{
+	return (mapping != NULL && mapping->a_ops == &shmem_aops);
+}
+#endif
 
 static int shmem_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)

  parent reply	other threads:[~2006-09-05 15:28 UTC|newest]

Thread overview: 144+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-09-05 15:02 [PATCH] BC: resource beancounters (v4) (added user memory) Kirill Korotaev
2006-09-05 15:19 ` [PATCH 1/13] BC: introduce atomic_dec_and_lock_irqsave() Kirill Korotaev
2006-09-05 15:20 ` [PATCH 2/13] BC: kconfig Kirill Korotaev
2006-09-05 15:21 ` [PATCH 3/17] BC: beancounters core (API) Kirill Korotaev
2006-09-05 15:23 ` [PATCH 4/13] BC: context inheriting and changing Kirill Korotaev
2006-09-05 15:24 ` [PATCH 5/13] BC: user interface (syscalls) Kirill Korotaev
2006-09-05 16:04   ` [ckrm-tech] " Balbir Singh
2006-09-06  8:29     ` Pavel Emelianov
2006-09-06  8:57       ` Balbir Singh
2006-09-06 10:42         ` Pavel Emelianov
2006-09-06 13:23           ` Balbir Singh
2006-09-06 13:45   ` Balbir Singh
2006-09-06 14:23     ` Kirill Korotaev
2006-09-05 15:25 ` [PATCH 6/13] BC: kernel memory (core) Kirill Korotaev
2006-09-05 15:26 ` [PATCH 7/13] BC: kernel memory (marks) Kirill Korotaev
2006-09-06 14:19   ` Cedric Le Goater
2006-09-05 15:27 ` [PATCH 8/13] BC: locked pages (core) Kirill Korotaev
2006-09-05 15:29 ` [PATCH 9/13] BC: locked pages (charge hooks) Kirill Korotaev
2006-09-06  3:43   ` Nick Piggin
2006-09-06  8:45     ` Pavel Emelianov
2006-09-06  9:41       ` Nick Piggin
2006-09-06 14:16     ` [ckrm-tech] " Kirill Korotaev
2006-09-05 15:30 ` [PATCH 10/13] BC: privvm pages Kirill Korotaev
2006-09-05 15:31 ` [PATCH 11/13] BC: vmrss (preparations) Kirill Korotaev
2006-09-05 22:09   ` Cedric Le Goater
2006-09-06 13:59     ` Kirill Korotaev
2006-09-07 16:28   ` [ckrm-tech] " Balbir Singh
2006-09-05 15:32 ` Kirill Korotaev [this message]
2006-09-05 15:33 ` [PATCH 13/13] BC: vmrss (charges) Kirill Korotaev
2006-09-05 16:53 ` [ckrm-tech] [PATCH] BC: resource beancounters (v4) (added user memory) Balbir Singh
2006-09-06  8:34   ` Pavel Emelianov
2006-09-06 13:06   ` Kirill Korotaev
2006-09-06 19:17     ` Balbir Singh
2006-09-06 22:06       ` Chandra Seetharaman
2006-09-07  3:08         ` Balbir Singh
2006-09-08  7:33         ` Pavel Emelianov
2006-09-08 15:43           ` Dave Hansen
2006-09-08 18:26             ` Balbir Singh
2006-09-11  6:56               ` Pavel Emelianov
2006-09-11  7:54                 ` Balbir Singh
2006-09-11  8:13                   ` Pavel Emelianov
2006-09-11  8:19                     ` Balbir Singh
2006-09-12 10:39                       ` Pavel Emelianov
2006-09-12 10:40                       ` Pavel Emelianov
2006-09-12 12:01                         ` Balbir Singh
2006-09-13 13:39                           ` Pavel Emelianov
2006-09-11 10:21                     ` Srivatsa Vaddagiri
2006-09-12 10:35                       ` Pavel Emelianov
2006-09-11 18:49                     ` Chandra Seetharaman
2006-09-12 10:48                       ` Pavel Emelianov
2006-09-12 23:58                         ` Chandra Seetharaman
2006-09-13  8:06                           ` Pavel Emelianov
2006-09-13 12:15                             ` Srivatsa Vaddagiri
2006-09-13 13:35                               ` Pavel Emelianov
2006-09-13 22:31                             ` Chandra Seetharaman
2006-09-14  7:53                               ` Pavel Emelianov
2006-09-14  8:06                                 ` Balbir Singh
2006-09-14 13:02                                   ` Pavel Emelianov
2006-09-15  0:02                                     ` Chandra Seetharaman
2006-09-15  7:21                                       ` Pavel Emelianov
2006-09-15  8:49                                       ` Kirill Korotaev
2006-09-18 23:51                                         ` Chandra Seetharaman
2006-09-14 23:42                                 ` Chandra Seetharaman
2006-09-15  7:15                                   ` Pavel Emelianov
2006-09-15  8:55                                     ` Kirill Korotaev
2006-09-15 11:15                                       ` Pavel Emelianov
2006-09-18  8:25                                         ` Balbir Singh
2006-09-18  8:56                                           ` Pavel Emelianov
2006-09-18 11:20                                             ` Balbir Singh
2006-09-18 11:32                                               ` Pavel Emelianov
2006-09-19  0:05                                             ` Chandra Seetharaman
2006-09-19  8:04                                               ` Pavel Emelianov
2006-09-18 11:27                                         ` Balbir Singh
2006-09-18 12:37                                           ` Pavel Emelianov
2006-09-19  0:08                                             ` Chandra Seetharaman
2006-09-19  8:06                                               ` Pavel Emelianov
2006-09-18 23:48                                     ` Chandra Seetharaman
2006-09-11 18:44                 ` Chandra Seetharaman
2006-09-15  8:57                   ` Kirill Korotaev
2006-09-18 23:57                     ` Chandra Seetharaman
2006-09-08 19:23           ` Chandra Seetharaman
2006-09-08 21:43             ` Rohit Seth
2006-09-11 18:25               ` Chandra Seetharaman
2006-09-11 19:10                 ` Rohit Seth
2006-09-11 19:42                   ` Chandra Seetharaman
2006-09-11 23:58                     ` Rohit Seth
2006-09-12  9:53                       ` Balbir Singh
2006-09-12 23:54                       ` Chandra Seetharaman
2006-09-13  0:39                         ` Rohit Seth
2006-09-13  1:10                           ` Chandra Seetharaman
2006-09-13  1:25                             ` Rohit Seth
2006-09-13  4:41                               ` Srivatsa Vaddagiri
2006-09-13 22:20                               ` Chandra Seetharaman
2006-09-14  1:22                                 ` Rohit Seth
2006-09-14 23:13                                   ` Chandra Seetharaman
2006-09-11 19:48                   ` [Devel] " Kir Kolyshkin
2006-09-12  0:28                     ` Rohit Seth
2006-09-12 10:44                   ` Srivatsa Vaddagiri
2006-09-12 17:22                     ` Rohit Seth
2006-09-12 17:40                       ` Srivatsa Vaddagiri
2006-09-12 18:02                         ` Rohit Seth
2006-09-13  0:02                       ` Chandra Seetharaman
2006-09-13  0:43                         ` Rohit Seth
2006-09-13  1:13                           ` Chandra Seetharaman
2006-09-13  1:33                             ` Rohit Seth
2006-09-13 22:24                               ` Chandra Seetharaman
2006-09-14  1:27                                 ` Rohit Seth
2006-09-14 23:28                                   ` Chandra Seetharaman
2006-09-15  9:26                                 ` Kirill Korotaev
2006-09-15 16:52                                   ` Rohit Seth
2006-09-15 21:21                                     ` [Devel] " Kir Kolyshkin
2006-09-15 21:58                                       ` Rohit Seth
2006-09-19  0:02                                       ` [ckrm-tech] [Devel] " Chandra Seetharaman
2006-09-18 23:59                                   ` [ckrm-tech] " Chandra Seetharaman
2006-09-06 21:47     ` Chandra Seetharaman
2006-09-08 15:33     ` Dave Hansen
2006-09-08 15:57     ` [RFC] Add tgid aggregation to beancounters (was Re: [ckrm-tech] [PATCH] BC: resource beancounters (v4) (added user memory)) Balbir Singh
2006-09-11 21:24       ` V2: " Balbir Singh
2006-09-15 16:40         ` [ckrm-tech] V2: Add tgid aggregation to beancounters (was " Kirill Korotaev
2006-10-09  8:23           ` Balbir Singh
2006-09-05 17:46 ` [ckrm-tech] [PATCH] BC: resource beancounters (v4) (added user memory) Dave Hansen
2006-09-05 18:28   ` Balbir Singh
2006-09-06  0:17   ` Rohit Seth
2006-09-08 15:30     ` Dave Hansen
2006-09-08 17:10       ` Rohit Seth
2006-09-08 17:26         ` Shailabh Nagar
2006-09-08 21:15           ` Rohit Seth
2006-09-08 21:28             ` Shailabh Nagar
2006-09-06 13:57   ` Kirill Korotaev
2006-09-06 21:54     ` Chandra Seetharaman
2006-09-07  7:29       ` Pavel Emelianov
2006-09-07 19:16         ` Chandra Seetharaman
2006-09-08  7:22           ` Pavel Emelianov
2006-09-08 19:07             ` Chandra Seetharaman
2006-09-11  7:02               ` Pavel Emelianov
2006-09-11 13:04                 ` Srivatsa Vaddagiri
2006-09-12 10:24                   ` Pavel Emelianov
2006-09-12 10:29                     ` Srivatsa Vaddagiri
2006-09-12 11:06                       ` Pavel Emelianov
2006-09-12 12:04                         ` Srivatsa Vaddagiri
2006-09-11 18:47                 ` Chandra Seetharaman
2006-09-07 19:29         ` Chandra Seetharaman
2006-09-08  7:26           ` Pavel Emelianov
2006-09-08 19:10             ` Chandra Seetharaman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=44FD9884.9090801@sw.ru \
    --to=dev@sw.ru \
    --cc=adobriyan@mail.ru \
    --cc=ak@suse.de \
    --cc=akpm@osdl.org \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=ckrm-tech@lists.sourceforge.net \
    --cc=devel@openvz.org \
    --cc=hch@infradead.org \
    --cc=hugh@veritas.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matthltc@us.ibm.com \
    --cc=oleg@tv-sign.ru \
    --cc=riel@redhat.com \
    --cc=saw@sw.ru \
    --cc=xemul@openvz.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.