public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Andi Kleen <ak@suse.de>
To: Andi Kleen <ak@suse.de>
Cc: linux-kernel@vger.kernel.org, akpm@osdl.org
Subject: [PATCH] NUMA API for Linux 7/ Add statistics
Date: Tue, 6 Apr 2004 15:38:12 +0200	[thread overview]
Message-ID: <20040406153812.38553277.ak@suse.de> (raw)
In-Reply-To: <20040406153322.5d6e986e.ak@suse.de>

Add NUMA hit/miss statistics to page allocation and display them
in sysfs.

This is not 100% required for NUMA API, but without this it is very
difficult to make sure NUMA API works properly.

The overhead is quite low because all counters are per CPU and only
happens when CONFIG_NUMA is defined.

diff -u linux-2.6.5-numa/include/linux/mmzone.h-o linux-2.6.5-numa/include/linux/mmzone.h
--- linux-2.6.5-numa/include/linux/mmzone.h-o	2004-04-06 13:12:23.000000000 +0200
+++ linux-2.6.5-numa/include/linux/mmzone.h	2004-04-06 13:36:12.000000000 +0200
@@ -52,6 +52,14 @@
 
 struct per_cpu_pageset {
 	struct per_cpu_pages pcp[2];	/* 0: hot.  1: cold */
+#ifdef CONFIG_NUMA
+	unsigned long numa_hit;		/* allocated in intended node */
+	unsigned long numa_miss;	/* allocated in non intended node */
+	unsigned long numa_foreign;	/* was intended here, hit elsewhere */
+	unsigned long interleave_hit; 	/* interleaver prefered this zone */
+	unsigned long local_node;	/* allocation from local node */
+	unsigned long other_node;	/* allocation from other node */
+#endif
 } ____cacheline_aligned_in_smp;
 
 /*
diff -u linux-2.6.5-numa/mm/page_alloc.c-o linux-2.6.5-numa/mm/page_alloc.c
--- linux-2.6.5-numa/mm/page_alloc.c-o	2004-04-06 13:12:24.000000000 +0200
+++ linux-2.6.5-numa/mm/page_alloc.c	2004-04-06 13:49:54.000000000 +0200
@@ -447,6 +447,31 @@
 }
 #endif /* CONFIG_PM */
 
+static void zone_statistics(struct zonelist *zonelist, struct zone *z) 
+{ 
+#ifdef CONFIG_NUMA
+	unsigned long flags;
+	int cpu; 
+	pg_data_t *pg = z->zone_pgdat,
+		*orig = zonelist->zones[0]->zone_pgdat;
+	struct per_cpu_pageset *p;
+	local_irq_save(flags); 
+	cpu = smp_processor_id();
+	p = &z->pageset[cpu];
+	if (pg == orig) {
+		z->pageset[cpu].numa_hit++;
+	} else { 
+		p->numa_miss++;
+		zonelist->zones[0]->pageset[cpu].numa_foreign++;
+	}
+	if (pg == NODE_DATA(numa_node_id()))
+		p->local_node++;
+	else
+		p->other_node++;	
+	local_irq_restore(flags);
+#endif
+} 
+
 /*
  * Free a 0-order page
  */
@@ -582,8 +607,10 @@
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
 			page = buffered_rmqueue(z, order, cold);
-			if (page)
+			if (page) { 
+					zone_statistics(zonelist, z); 
 		       		goto got_pg;
+			}
 		}
 		min += z->pages_low * sysctl_lower_zone_protection;
 	}
@@ -607,8 +634,10 @@
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
 			page = buffered_rmqueue(z, order, cold);
-			if (page)
+			if (page) {
+				zone_statistics(zonelist, z); 
 				goto got_pg;
+			}
 		}
 		min += local_min * sysctl_lower_zone_protection;
 	}
@@ -622,8 +651,10 @@
 			struct zone *z = zones[i];
 
 			page = buffered_rmqueue(z, order, cold);
-			if (page)
+			if (page) {
+				zone_statistics(zonelist, z); 
 				goto got_pg;
+			}
 		}
 		goto nopage;
 	}
@@ -650,8 +681,10 @@
 		if (z->free_pages >= min ||
 				(!wait && z->free_pages >= z->pages_high)) {
 			page = buffered_rmqueue(z, order, cold);
-			if (page)
+			if (page) {
+				zone_statistics(zonelist, z); 
 				goto got_pg;
+			}
 		}
 		min += z->pages_low * sysctl_lower_zone_protection;
 	}
diff -u linux-2.6.5-numa/drivers/base/node.c-o linux-2.6.5-numa/drivers/base/node.c
--- linux-2.6.5-numa/drivers/base/node.c-o	2004-03-17 12:17:46.000000000 +0100
+++ linux-2.6.5-numa/drivers/base/node.c	2004-04-06 13:36:12.000000000 +0200
@@ -30,13 +30,20 @@
 
 static SYSDEV_ATTR(cpumap,S_IRUGO,node_read_cpumap,NULL);
 
+/* Can be overwritten by architecture specific code. */
+int __attribute__((weak)) hugetlb_report_node_meminfo(int node, char *buf)
+{
+	return 0;
+}
+
 #define K(x) ((x) << (PAGE_SHIFT - 10))
 static ssize_t node_read_meminfo(struct sys_device * dev, char * buf)
 {
+	int n;
 	int nid = dev->id;
 	struct sysinfo i;
 	si_meminfo_node(&i, nid);
-	return sprintf(buf, "\n"
+	n = sprintf(buf, "\n"
 		       "Node %d MemTotal:     %8lu kB\n"
 		       "Node %d MemFree:      %8lu kB\n"
 		       "Node %d MemUsed:      %8lu kB\n"
@@ -51,10 +58,52 @@
 		       nid, K(i.freehigh),
 		       nid, K(i.totalram-i.totalhigh),
 		       nid, K(i.freeram-i.freehigh));
+	n += hugetlb_report_node_meminfo(nid, buf + n);
+	return n;
 }
+
 #undef K 
 static SYSDEV_ATTR(meminfo,S_IRUGO,node_read_meminfo,NULL);
 
+static ssize_t node_read_numastat(struct sys_device * dev, char * buf)
+{ 
+	unsigned long numa_hit, numa_miss, interleave_hit, numa_foreign;
+	unsigned long local_node, other_node;
+	int i, cpu;
+	pg_data_t *pg = NODE_DATA(dev->id);
+	numa_hit = 0; 
+	numa_miss = 0; 
+	interleave_hit = 0; 
+	numa_foreign = 0; 
+	local_node = 0;
+	other_node = 0;
+	for (i = 0; i < MAX_NR_ZONES; i++) { 
+		struct zone *z = &pg->node_zones[i]; 
+		for (cpu = 0; cpu < NR_CPUS; cpu++) { 
+			struct per_cpu_pageset *ps = &z->pageset[cpu]; 
+			numa_hit += ps->numa_hit; 
+			numa_miss += ps->numa_miss;
+			numa_foreign += ps->numa_foreign;
+			interleave_hit += ps->interleave_hit;
+			local_node += ps->local_node;
+			other_node += ps->other_node;
+		} 
+	} 
+	return sprintf(buf, 
+		       "numa_hit %lu\n"
+		       "numa_miss %lu\n"
+		       "numa_foreign %lu\n"
+		       "interleave_hit %lu\n"
+		       "local_node %lu\n"
+		       "other_node %lu\n", 
+		       numa_hit,
+		       numa_miss,
+		       numa_foreign,
+		       interleave_hit,
+		       local_node, 
+		       other_node); 
+} 
+static SYSDEV_ATTR(numastat,S_IRUGO,node_read_numastat,NULL);
 
 /*
  * register_node - Setup a driverfs device for a node.
@@ -74,6 +123,7 @@
 	if (!error){
 		sysdev_create_file(&node->sysdev, &attr_cpumap);
 		sysdev_create_file(&node->sysdev, &attr_meminfo);
+		sysdev_create_file(&node->sysdev, &attr_numastat); 
 	}
 	return error;
 }

  parent reply	other threads:[~2004-04-06 13:45 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-04-06 13:33 NUMA API for Linux Andi Kleen
2004-04-06 13:34 ` [PATCH] NUMA API for Linux 1/ Core NUMA API code Andi Kleen
2004-04-06 13:35 ` NUMA API for Linux 2/ Add x86-64 support Andi Kleen
2004-04-06 13:35 ` [PATCH] NUMA API for Linux 3/ Add i386 support Andi Kleen
2004-04-06 23:23   ` Andrew Morton
2004-04-06 13:36 ` [PATCH] NUMA API for Linux 4/ Add IA64 support Andi Kleen
2004-04-06 13:37 ` [PATCH] NUMA API for Linux 5/ Add VMA hooks for policy Andi Kleen
2004-05-05 16:05   ` Paul Jackson
2004-05-05 16:39     ` Andi Kleen
2004-05-05 16:47       ` Paul Jackson
2004-05-06  6:00         ` Andi Kleen
2004-04-06 13:37 ` [PATCH] NUMA API for Linux 6/ Add shared memory support Andi Kleen
2004-04-06 13:38 ` Andi Kleen [this message]
2004-04-06 13:39 ` [PATCH] NUMA API for Linux 8/ Add policy support to anonymous memory Andi Kleen
2004-04-06 13:40 ` [PATCH] NUMA API for Linux 9/ Add simple lazy i386/x86-64 hugetlbfs policy support Andi Kleen
2004-04-06 13:40 ` [PATCH] NUMA API for Linux 10/ Bitmap bugfix Andi Kleen
2004-04-06 23:35 ` NUMA API for Linux Paul Jackson
2004-04-08 20:12 ` Pavel Machek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040406153812.38553277.ak@suse.de \
    --to=ak@suse.de \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox