All of lore.kernel.org
 help / color / mirror / Atom feed
From: Fengguang Wu <fengguang.wu@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Greg Thelen <gthelen@google.com>, Jan Kara <jack@suse.cz>,
	Ying Han <yinghan@google.com>,
	"hannes@cmpxchg.org" <hannes@cmpxchg.org>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Rik van Riel <riel@redhat.com>,
	Fengguang Wu <fengguang.wu@intel.com>,
	Linux Memory Management List <linux-mm@kvack.org>,
	LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 4/9] memcg: dirty page accounting support routines
Date: Tue, 28 Feb 2012 22:00:26 +0800	[thread overview]
Message-ID: <20120228144747.124608935@intel.com> (raw)
In-Reply-To: 20120228140022.614718843@intel.com

[-- Attachment #1: memcg-dirty-page-accounting-support-routines.patch --]
[-- Type: text/plain, Size: 5047 bytes --]

From: Greg Thelen <gthelen@google.com>

Added memcg dirty page accounting support routines.  These routines are
used by later changes to provide memcg aware writeback and dirty page
limiting.  A mem_cgroup_dirty_info() tracepoint is is also included to
allow for easier understanding of memcg writeback operation.

Signed-off-by: Greg Thelen <gthelen@google.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
---
Changelog since v8:
- Use 'memcg' rather than 'mem' for local variables and parameters.
  This is consistent with other memory controller code.

 include/linux/memcontrol.h |    5 +
 mm/memcontrol.c            |  112 +++++++++++++++++++++++++++++++++++
 2 files changed, 117 insertions(+)

--- linux.orig/include/linux/memcontrol.h	2012-02-25 20:48:34.337580646 +0800
+++ linux/include/linux/memcontrol.h	2012-02-25 20:48:34.361580646 +0800
@@ -36,8 +36,13 @@ enum mem_cgroup_page_stat_item {
 	MEMCG_NR_FILE_DIRTY, /* # of dirty pages in page cache */
 	MEMCG_NR_FILE_WRITEBACK, /* # of pages under writeback */
 	MEMCG_NR_FILE_UNSTABLE_NFS, /* # of NFS unstable pages */
+	MEMCG_NR_DIRTYABLE_PAGES, /* # of pages that could be dirty */
 };
 
+unsigned long mem_cgroup_page_stat(struct mem_cgroup *memcg,
+				   enum mem_cgroup_page_stat_item item);
+unsigned long mem_cgroup_dirty_pages(struct mem_cgroup *memcg);
+
 struct mem_cgroup_reclaim_cookie {
 	struct zone *zone;
 	int priority;
--- linux.orig/mm/memcontrol.c	2012-02-25 20:48:34.337580646 +0800
+++ linux/mm/memcontrol.c	2012-02-25 21:09:54.073554384 +0800
@@ -1255,6 +1255,118 @@ int mem_cgroup_swappiness(struct mem_cgr
 	return memcg->swappiness;
 }
 
+static inline bool mem_cgroup_can_swap(struct mem_cgroup *memcg)
+{
+	if (nr_swap_pages == 0)
+		return false;
+	if (!do_swap_account)
+		return true;
+	if (memcg->memsw_is_minimum)
+		return false;
+	if (res_counter_margin(&memcg->memsw) == 0)
+		return false;
+	return true;
+}
+
+static s64 mem_cgroup_local_page_stat(struct mem_cgroup *memcg,
+				      enum mem_cgroup_page_stat_item item)
+{
+	s64 ret;
+
+	switch (item) {
+	case MEMCG_NR_FILE_DIRTY:
+		ret = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_DIRTY);
+		break;
+	case MEMCG_NR_FILE_WRITEBACK:
+		ret = mem_cgroup_read_stat(memcg,
+					   MEM_CGROUP_STAT_FILE_WRITEBACK);
+		break;
+	case MEMCG_NR_FILE_UNSTABLE_NFS:
+		ret = mem_cgroup_read_stat(memcg,
+					   MEM_CGROUP_STAT_FILE_UNSTABLE_NFS);
+		break;
+	case MEMCG_NR_DIRTYABLE_PAGES:
+		ret = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE)) +
+			mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
+		if (mem_cgroup_can_swap(memcg))
+			ret += mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON)) +
+				mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
+		break;
+	default:
+		BUG();
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Return the number of additional pages that the @memcg cgroup could allocate.
+ * If use_hierarchy is set, then this involves checking parent mem cgroups to
+ * find the cgroup with the smallest free space.
+ */
+static unsigned long
+mem_cgroup_hierarchical_free_pages(struct mem_cgroup *memcg)
+{
+	u64 free;
+	unsigned long min_free;
+
+	min_free = global_page_state(NR_FREE_PAGES);
+
+	while (memcg) {
+		free = mem_cgroup_margin(memcg);
+		min_free = min_t(u64, min_free, free);
+		memcg = parent_mem_cgroup(memcg);
+	}
+
+	return min_free;
+}
+
+/*
+ * mem_cgroup_page_stat() - get memory cgroup file cache statistics
+ * @memcg:     memory cgroup to query
+ * @item:      memory statistic item exported to the kernel
+ *
+ * Return the accounted statistic value.
+ */
+unsigned long mem_cgroup_page_stat(struct mem_cgroup *memcg,
+				   enum mem_cgroup_page_stat_item item)
+{
+	struct mem_cgroup *iter;
+	s64 value;
+
+	/*
+	 * If we're looking for dirtyable pages we need to evaluate free pages
+	 * depending on the limit and usage of the parents first of all.
+	 */
+	if (item == MEMCG_NR_DIRTYABLE_PAGES)
+		value = mem_cgroup_hierarchical_free_pages(memcg);
+	else
+		value = 0;
+
+	/*
+	 * Recursively evaluate page statistics against all cgroup under
+	 * hierarchy tree
+	 */
+	for_each_mem_cgroup_tree(iter, memcg)
+		value += mem_cgroup_local_page_stat(iter, item);
+
+	/*
+	 * Summing of unlocked per-cpu counters is racy and may yield a slightly
+	 * negative value.  Zero is the only sensible value in such cases.
+	 */
+	if (unlikely(value < 0))
+		value = 0;
+
+	return value;
+}
+
+unsigned long mem_cgroup_dirty_pages(struct mem_cgroup *memcg)
+{
+	return mem_cgroup_page_stat(memcg, MEMCG_NR_FILE_DIRTY) +
+		mem_cgroup_page_stat(memcg, MEMCG_NR_FILE_WRITEBACK) +
+		mem_cgroup_page_stat(memcg, MEMCG_NR_FILE_UNSTABLE_NFS);
+}
+
 static void mem_cgroup_start_move(struct mem_cgroup *memcg)
 {
 	int cpu;


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)
From: Fengguang Wu <fengguang.wu@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Greg Thelen <gthelen@google.com>, Jan Kara <jack@suse.cz>,
	Ying Han <yinghan@google.com>,
	"hannes@cmpxchg.org" <hannes@cmpxchg.org>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Rik van Riel <riel@redhat.com>,
	Fengguang Wu <fengguang.wu@intel.com>
Cc: Linux Memory Management List <linux-mm@kvack.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 4/9] memcg: dirty page accounting support routines
Date: Tue, 28 Feb 2012 22:00:26 +0800	[thread overview]
Message-ID: <20120228144747.124608935@intel.com> (raw)
In-Reply-To: 20120228140022.614718843@intel.com

[-- Attachment #1: memcg-dirty-page-accounting-support-routines.patch --]
[-- Type: text/plain, Size: 4744 bytes --]

From: Greg Thelen <gthelen@google.com>

Added memcg dirty page accounting support routines.  These routines are
used by later changes to provide memcg aware writeback and dirty page
limiting.  A mem_cgroup_dirty_info() tracepoint is is also included to
allow for easier understanding of memcg writeback operation.

Signed-off-by: Greg Thelen <gthelen@google.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
---
Changelog since v8:
- Use 'memcg' rather than 'mem' for local variables and parameters.
  This is consistent with other memory controller code.

 include/linux/memcontrol.h |    5 +
 mm/memcontrol.c            |  112 +++++++++++++++++++++++++++++++++++
 2 files changed, 117 insertions(+)

--- linux.orig/include/linux/memcontrol.h	2012-02-25 20:48:34.337580646 +0800
+++ linux/include/linux/memcontrol.h	2012-02-25 20:48:34.361580646 +0800
@@ -36,8 +36,13 @@ enum mem_cgroup_page_stat_item {
 	MEMCG_NR_FILE_DIRTY, /* # of dirty pages in page cache */
 	MEMCG_NR_FILE_WRITEBACK, /* # of pages under writeback */
 	MEMCG_NR_FILE_UNSTABLE_NFS, /* # of NFS unstable pages */
+	MEMCG_NR_DIRTYABLE_PAGES, /* # of pages that could be dirty */
 };
 
+unsigned long mem_cgroup_page_stat(struct mem_cgroup *memcg,
+				   enum mem_cgroup_page_stat_item item);
+unsigned long mem_cgroup_dirty_pages(struct mem_cgroup *memcg);
+
 struct mem_cgroup_reclaim_cookie {
 	struct zone *zone;
 	int priority;
--- linux.orig/mm/memcontrol.c	2012-02-25 20:48:34.337580646 +0800
+++ linux/mm/memcontrol.c	2012-02-25 21:09:54.073554384 +0800
@@ -1255,6 +1255,118 @@ int mem_cgroup_swappiness(struct mem_cgr
 	return memcg->swappiness;
 }
 
+static inline bool mem_cgroup_can_swap(struct mem_cgroup *memcg)
+{
+	if (nr_swap_pages == 0)
+		return false;
+	if (!do_swap_account)
+		return true;
+	if (memcg->memsw_is_minimum)
+		return false;
+	if (res_counter_margin(&memcg->memsw) == 0)
+		return false;
+	return true;
+}
+
+static s64 mem_cgroup_local_page_stat(struct mem_cgroup *memcg,
+				      enum mem_cgroup_page_stat_item item)
+{
+	s64 ret;
+
+	switch (item) {
+	case MEMCG_NR_FILE_DIRTY:
+		ret = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_DIRTY);
+		break;
+	case MEMCG_NR_FILE_WRITEBACK:
+		ret = mem_cgroup_read_stat(memcg,
+					   MEM_CGROUP_STAT_FILE_WRITEBACK);
+		break;
+	case MEMCG_NR_FILE_UNSTABLE_NFS:
+		ret = mem_cgroup_read_stat(memcg,
+					   MEM_CGROUP_STAT_FILE_UNSTABLE_NFS);
+		break;
+	case MEMCG_NR_DIRTYABLE_PAGES:
+		ret = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE)) +
+			mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
+		if (mem_cgroup_can_swap(memcg))
+			ret += mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON)) +
+				mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
+		break;
+	default:
+		BUG();
+		break;
+	}
+	return ret;
+}
+
+/*
+ * Return the number of additional pages that the @memcg cgroup could allocate.
+ * If use_hierarchy is set, then this involves checking parent mem cgroups to
+ * find the cgroup with the smallest free space.
+ */
+static unsigned long
+mem_cgroup_hierarchical_free_pages(struct mem_cgroup *memcg)
+{
+	u64 free;
+	unsigned long min_free;
+
+	min_free = global_page_state(NR_FREE_PAGES);
+
+	while (memcg) {
+		free = mem_cgroup_margin(memcg);
+		min_free = min_t(u64, min_free, free);
+		memcg = parent_mem_cgroup(memcg);
+	}
+
+	return min_free;
+}
+
+/*
+ * mem_cgroup_page_stat() - get memory cgroup file cache statistics
+ * @memcg:     memory cgroup to query
+ * @item:      memory statistic item exported to the kernel
+ *
+ * Return the accounted statistic value.
+ */
+unsigned long mem_cgroup_page_stat(struct mem_cgroup *memcg,
+				   enum mem_cgroup_page_stat_item item)
+{
+	struct mem_cgroup *iter;
+	s64 value;
+
+	/*
+	 * If we're looking for dirtyable pages we need to evaluate free pages
+	 * depending on the limit and usage of the parents first of all.
+	 */
+	if (item == MEMCG_NR_DIRTYABLE_PAGES)
+		value = mem_cgroup_hierarchical_free_pages(memcg);
+	else
+		value = 0;
+
+	/*
+	 * Recursively evaluate page statistics against all cgroup under
+	 * hierarchy tree
+	 */
+	for_each_mem_cgroup_tree(iter, memcg)
+		value += mem_cgroup_local_page_stat(iter, item);
+
+	/*
+	 * Summing of unlocked per-cpu counters is racy and may yield a slightly
+	 * negative value.  Zero is the only sensible value in such cases.
+	 */
+	if (unlikely(value < 0))
+		value = 0;
+
+	return value;
+}
+
+unsigned long mem_cgroup_dirty_pages(struct mem_cgroup *memcg)
+{
+	return mem_cgroup_page_stat(memcg, MEMCG_NR_FILE_DIRTY) +
+		mem_cgroup_page_stat(memcg, MEMCG_NR_FILE_WRITEBACK) +
+		mem_cgroup_page_stat(memcg, MEMCG_NR_FILE_UNSTABLE_NFS);
+}
+
 static void mem_cgroup_start_move(struct mem_cgroup *memcg)
 {
 	int cpu;



  parent reply	other threads:[~2012-02-28 14:56 UTC|newest]

Thread overview: 116+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-28 14:00 [PATCH 0/9] [RFC] pageout work and dirty reclaim throttling Fengguang Wu
2012-02-28 14:00 ` Fengguang Wu
2012-02-28 14:00 ` [PATCH 1/9] memcg: add page_cgroup flags for dirty page tracking Fengguang Wu
2012-02-28 14:00   ` Fengguang Wu
2012-02-29  0:50   ` KAMEZAWA Hiroyuki
2012-02-29  0:50     ` KAMEZAWA Hiroyuki
2012-03-04  1:29     ` Fengguang Wu
2012-03-04  1:29       ` Fengguang Wu
2012-02-28 14:00 ` [PATCH 2/9] memcg: add dirty page accounting infrastructure Fengguang Wu
2012-02-28 14:00   ` Fengguang Wu
2012-02-28 22:37   ` Andrew Morton
2012-02-28 22:37     ` Andrew Morton
2012-02-29  0:27     ` Fengguang Wu
2012-02-29  0:27       ` Fengguang Wu
2012-02-28 14:00 ` [PATCH 3/9] memcg: add kernel calls for memcg dirty page stats Fengguang Wu
2012-02-28 14:00   ` Fengguang Wu
2012-02-29  1:10   ` KAMEZAWA Hiroyuki
2012-02-29  1:10     ` KAMEZAWA Hiroyuki
2012-02-28 14:00 ` Fengguang Wu [this message]
2012-02-28 14:00   ` [PATCH 4/9] memcg: dirty page accounting support routines Fengguang Wu
2012-02-28 15:15   ` Fengguang Wu
2012-02-28 15:15     ` Fengguang Wu
2012-02-28 22:45   ` Andrew Morton
2012-02-28 22:45     ` Andrew Morton
2012-02-29  1:15     ` KAMEZAWA Hiroyuki
2012-02-29  1:15       ` KAMEZAWA Hiroyuki
2012-02-28 14:00 ` [PATCH 5/9] writeback: introduce the pageout work Fengguang Wu
2012-02-28 14:00   ` Fengguang Wu
2012-02-29  0:04   ` Andrew Morton
2012-02-29  0:04     ` Andrew Morton
2012-02-29  2:31     ` Fengguang Wu
2012-02-29  2:31       ` Fengguang Wu
2012-02-29 13:28     ` Fengguang Wu
2012-02-29 13:28       ` Fengguang Wu
2012-03-01 11:04     ` Jan Kara
2012-03-01 11:04       ` Jan Kara
2012-03-01 11:41       ` Fengguang Wu
2012-03-01 11:41         ` Fengguang Wu
2012-03-01 16:50         ` Jan Kara
2012-03-01 16:50           ` Jan Kara
2012-03-01 19:46         ` Andrew Morton
2012-03-01 19:46           ` Andrew Morton
2012-03-03 13:25           ` Fengguang Wu
2012-03-03 13:25             ` Fengguang Wu
2012-03-07  0:37             ` Andrew Morton
2012-03-07  0:37               ` Andrew Morton
2012-03-07  5:40               ` Fengguang Wu
2012-03-07  5:40                 ` Fengguang Wu
2012-03-01 19:42       ` Andrew Morton
2012-03-01 19:42         ` Andrew Morton
2012-03-01 21:15         ` Jan Kara
2012-03-01 21:15           ` Jan Kara
2012-03-01 21:22           ` Andrew Morton
2012-03-01 21:22             ` Andrew Morton
2012-03-01 12:36     ` Fengguang Wu
2012-03-01 12:36       ` Fengguang Wu
2012-03-01 16:38       ` Jan Kara
2012-03-01 16:38         ` Jan Kara
2012-03-02  4:48         ` Fengguang Wu
2012-03-02  4:48           ` Fengguang Wu
2012-03-02  9:59           ` Jan Kara
2012-03-02  9:59             ` Jan Kara
2012-03-02 10:39             ` Fengguang Wu
2012-03-02 10:39               ` Fengguang Wu
2012-03-02 19:57               ` Andrew Morton
2012-03-02 19:57                 ` Andrew Morton
2012-03-03 13:55                 ` Fengguang Wu
2012-03-03 13:55                   ` Fengguang Wu
2012-03-03 14:27                   ` Fengguang Wu
2012-03-03 14:27                     ` Fengguang Wu
2012-03-04 11:13                     ` Fengguang Wu
2012-03-04 11:13                       ` Fengguang Wu
2012-03-07 15:48                   ` Artem Bityutskiy
2012-03-07 15:48                     ` Artem Bityutskiy
2012-03-09  7:31                     ` Fengguang Wu
2012-03-09  7:31                       ` Fengguang Wu
2012-03-09  9:51                       ` Jan Kara
2012-03-09  9:51                         ` Jan Kara
2012-03-09 10:24                         ` Artem Bityutskiy
2012-03-09 10:24                           ` Artem Bityutskiy
2012-03-09 16:10                         ` Artem Bityutskiy
2012-03-09 16:10                           ` Artem Bityutskiy
2012-03-09 21:11                           ` Jan Kara
2012-03-09 21:11                             ` Jan Kara
2012-03-12 12:36                             ` Artem Bityutskiy
2012-03-12 12:36                               ` Artem Bityutskiy
2012-03-12 14:02                               ` Jan Kara
2012-03-12 14:02                                 ` Jan Kara
2012-03-12 14:21                                 ` Artem Bityutskiy
2012-03-12 14:21                                   ` Artem Bityutskiy
2012-03-09 10:15                   ` Jan Kara
2012-03-09 10:15                     ` Jan Kara
2012-03-09 15:10                     ` Fengguang Wu
2012-03-09 15:10                       ` Fengguang Wu
2012-02-29 13:51   ` [PATCH v2 " Fengguang Wu
2012-02-29 13:51     ` Fengguang Wu
2012-03-01 13:35     ` Fengguang Wu
2012-03-01 13:35       ` Fengguang Wu
2012-03-02  6:22       ` [PATCH v3 " Fengguang Wu
2012-03-02  6:22         ` Fengguang Wu
2012-02-28 14:00 ` [PATCH 6/9] vmscan: dirty reclaim throttling Fengguang Wu
2012-02-28 14:00   ` Fengguang Wu
2012-02-28 14:00 ` [PATCH 7/9] mm: pass __GFP_WRITE to memcg charge and reclaim routines Fengguang Wu
2012-02-28 14:00   ` Fengguang Wu
2012-02-28 14:00 ` [PATCH 8/9] mm: dont set __GFP_WRITE on ramfs/sysfs writes Fengguang Wu
2012-02-28 14:00   ` Fengguang Wu
2012-03-01 10:13   ` Johannes Weiner
2012-03-01 10:13     ` Johannes Weiner
2012-03-01 10:30     ` Fengguang Wu
2012-03-01 10:30       ` Fengguang Wu
2012-02-28 14:00 ` [PATCH 9/9] mm: debug vmscan waits Fengguang Wu
2012-02-28 14:00   ` Fengguang Wu
2012-03-02  6:59   ` [RFC PATCH] mm: don't treat anonymous pages as dirtyable pages Fengguang Wu
2012-03-02  6:59     ` Fengguang Wu
2012-03-02  7:18     ` Fengguang Wu
2012-03-02  7:18       ` Fengguang Wu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120228144747.124608935@intel.com \
    --to=fengguang.wu@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=gthelen@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=jack@suse.cz \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=riel@redhat.com \
    --cc=yinghan@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.